#include "formrule.hpp"
#include "appostr.hpp"
#include <typedet_exp.h>
#include <openfile.h>
#include <stdio.h>


static char gNounCodeStrings[][9] = {
"SgN",   "------0N",
"SgG",   "------0G",
"SgP",   "------0P",
"SgAdt", "------0D",
"PlG",   "------1G",
"Rpl",   "------1-",
"PlP",   "------1P",
"SgIll", "------01",
"SgIn",  "------02",
"SgEl",  "------03",
"SgAll", "------04",
"SgAd",  "------05",
"SgAbl", "------06",
"SgTr",  "------0T",
"SgTer", "------0R",
"SgEs",  "------0E",
"SgAb",  "------0A",
"SgKom", "------0K",
"PlN",   "------1N",
"PlIll", "------11",
"PlIn",  "------12",
"PlEl",  "------13",
"PlAll", "------14",
"PlAd",  "------15",
"PlAbl", "------16",
"PlTr",  "------1T",
"PlTer", "------1R",
"PlEs",  "------1E",
"PlAb",  "------1A",
"PlKom", "------1K"
};

static char gVerbCodeStrings[][9] = {
"MA",      "30------",
"MATA",    "30-----A",
"V",       "400---0N",
"S",       "-02031--",
"SIN",     "-02011--",
"SIME",    "-02041--",
"DA",      "1-------",
"GE",      "-00350--",
"NUD",     "401--0--",
"B",       "-00031--",
"ME",      "-00041--",
"TUD",     "411--0--",
"TAKSE",   "-100-1--",
"VAT",     "-00100--",
"MAS",     "30-----2",
"MAST",    "30-----3",
"MAKS",    "30-----T",
"SID2",    "-02021--",
"SID6",    "-02061--",
"SITE",    "-02051--",
"DES",     "2-------",
"GU3",     "-00330--",
"GEM",     "-00340--",
"GU6",     "-00360--",
"NUKSIN",  "-01211--",
"NUKSID2", "-01221--",
"NUKS",    "-01230--",
"NUKSIME", "-01241--",
"NUKSITE", "-01251--",
"NUKSID6", "-01261--",
"NUVAT",   "-01100--",
"N",       "-00011--",
"D",       "-00021--",
"0N",      "-00002--",
"02",      "-00320--",
"KSIN",    "-00211--",
"KSID2",   "-00221--",
"KS",      "-00230--",
"KSIME",   "-00241--",
"KSITE",   "-00251--",
"KSID6",   "-00261--",
"TE",      "-00051--",
"VAD",     "-00061--",
"TI",      "-120-1--",
"TAKS",    "-102-0--",
"TUKS",    "-112-0--",
"TAVAT",   "-101-0--",
"TUVAT",   "-111-0--",
"TAGU",    "-103-0--",
"TAMA",    "31------",
"TA",      "-100-2--",
"TAV",     "410---0N"
};


char* findBaseForm(const char* baseCode, PureFormRecord *pBaseFormArr,
                   int start, int end)
{
  for(int i = start; i < end; i++)
    if(!strcmp(baseCode, pBaseFormArr[i].code))
      return pBaseFormArr[i].form;
  return 0;
}


AForm FormRule::convert(PureFormRecord *pBaseFormArr, int start, int end) const
{
//  printf("converting from %d-%d\n", start, end);
//  printf("to %s[%s]\n", fBaseCode.c_str(), fEnd.c_str());
  if(fBaseCode == "X")
    return AForm("#", 0);
  char* baseForm = findBaseForm(fBaseCode.c_str(), pBaseFormArr, start, end);
//  printf("baseForm %s\n", baseForm? baseForm: "NULL");
  if(!baseForm || *baseForm == '\0' || *baseForm == '#' || *baseForm == '0') {
    if(fAltern)
      return AForm("?", 0);
    else
      return AForm("#", 0);
  }
  else
    return AForm(std_string(baseForm) + fEnd, strlen(baseForm));
}



void RuleSet::addRule(int code, const FormRule &rule)
{
  if(code < kMaxForms) {
    fRules[code].push_back(rule);
    if(code >= fNumForms)
      fNumForms = code + 1;
  }
}

void RuleSet::convert(const FormGenerator &gen,
                      const char *lemma, PureFormRecord *pBaseFormArr, int start, int end,
                      int nType, const string &kind,
                      int nVariant, int withAppostr, int codeType,
                      SynthFormRecord *pOutBuf, int bufLength, int &nForms) const
{
  if(nForms < bufLength)
    fpExcReg->findExceptionsStart(lemma, nType);

  int nMaxForms = nType < kNumNounTypes? gen.nounForms() : gen.verbForms();

  const CodeToIndex &rCodeToIndex = gen.codeToIndex();

  int count = 0;

  //peame itereerima standardkoodide alfabeetilises ja"rjestuses
  //muidu me ei saa erandeid ka"tte (kuna nad on sorteeritud standardkoodide ja"rgi)
  for(CodeToIndex::const_iterator mapItr = rCodeToIndex.begin();
      mapItr != rCodeToIndex.end() && count < nMaxForms;
      mapItr++) {
    char standardCode[10];
    strcpy(standardCode, (*mapItr).first.c_str());
    int i = (*mapItr).second; //sellest saab reeglimassiivi indeks

    if(nType < kNumNounTypes) {
      if(i >= 100) //verbivorm, ei huvita praegu
        continue;
    }
    else {
      if(i >= 100)
        i -= 100;
      else //noomenivorm, ei huvita praegu
        continue;
    }

    count++;

    int nOutForm = nForms + i;

    int nParal = 0;

    //printf("getting %s\n", gNounCodeStrings[i*2 + codeType]);
    int done = 0;
    if(nOutForm < bufLength) {
      pOutBuf[nOutForm].fType = nType;
      strncpy(pOutBuf[nOutForm].fKind, kind.c_str(), kSynthKindLength - 1);
      pOutBuf[nOutForm].fKind[kSynthKindLength - 1] = '\0';
      pOutBuf[nOutForm].fVariant = nVariant;
      if(nType < kNumNounTypes)
        strcpy(pOutBuf[nOutForm].fCode, gen.nounCode(i, codeType));
      else
        strcpy(pOutBuf[nOutForm].fCode, gen.verbCode(i, codeType));

      fpExcReg->getException(nType, lemma, standardCode, pOutBuf[nOutForm].fForms,
                             pBaseFormArr, start, end, withAppostr, 0, nParal, done);
    }

    bool firstAlternFound = false;

    if(!done && i < fNumForms) {
      for(int k = 0; k < fRules[i].size(); k++) {

        if(firstAlternFound)
          firstAlternFound = false;
        else {
          AForm form = fRules[i][k].convert(pBaseFormArr, start, end);
          if(form.getForm() != "?") {
            if(nOutForm < bufLength) {
              strcpy(pOutBuf[nOutForm].fForms[nParal].fForm, form.getForm().c_str());
              pOutBuf[nOutForm].fForms[nParal].fStemLength = form.getStemLength();
            }
            nParal++;

            if(fRules[i][k].hasAlternative())
              firstAlternFound = true;
          }
        }
      }

      if(nOutForm < bufLength)
        fpExcReg->getException(nType, lemma, standardCode, pOutBuf[nOutForm].fForms,
                               pBaseFormArr, start, end, withAppostr, 1, nParal, done);
    }

    if(nOutForm < bufLength)
       pOutBuf[nOutForm].fNumForms = nParal;
    //nForms++;
  }

  nForms += nMaxForms;
}


FormGenerator::FormGenerator(const char *codeFileName,
                             const char *ruleFileName,
                             const char *excFileName) :
fExcReg(excFileName),
fnNounForms(0),
fnVerbForms(0)
{
//  printf("In FormGenerator::FormGenerator\n");
  loadCodes(codeFileName);
  initialise(ruleFileName);
  fExcReg.createIndex();
  for(int i = 0; i < kNumTypes; i++)
    fSets[i].setExcReg(&fExcReg);
}



void FormGenerator::initialise(const char *fileName)
{
  FILE *ruleFile = openAtLoc(fileName, "r");
  if(ruleFile) {
    char linebuf[80];
    char formCode[10], basecode[10], end[10];
    int typeNr = 0;
    while(fgets(linebuf, 80, ruleFile)) {
      if( ! (*linebuf >= '0' && *linebuf <= '9' || *linebuf >= 'A' && *linebuf <= 'Z' ||
             *linebuf >= 'a' && *linebuf <= 'z' || *linebuf == '-') )
        continue;
      if(!strncmp(linebuf, "tyyp=", 5)) {
        typeNr = atoi(linebuf + 5);
      }
      else {
        char *pBegin = strrchr(linebuf, ':');
        strncpy(formCode, linebuf, pBegin - linebuf);
        formCode[8] = 0;
        CodeToIndex::iterator place = fCodeToIndex.find(formCode);
        int formNr = -1;
        if(place != fCodeToIndex.end()) {
          formNr = (*place).second;
          if(formNr >= 100)
            //ilmselt oli verbivormikood, teeme saadud numbrist massiivi indeksi
            formNr -= 100;  
        }
        else
          continue;

        if(*(pBegin + 1) == 'X')
          fSets[typeNr].addRule(formNr, FormRule("X", "", false));
        else {
          do {
            pBegin++;
            char *pOpenBracket = strchr(pBegin, '[');
            int baseLength = pOpenBracket - pBegin;
            strncpy(basecode, pBegin, baseLength);
            basecode[baseLength] = '\0';
            char *pCloseBracket = strchr(pOpenBracket, ']');
            int endLength = pCloseBracket - pOpenBracket - 1;
            strncpy(end, pOpenBracket + 1, endLength);
            end[endLength] = '\0';
            bool hasAltern = false;
            if(*(pCloseBracket + 1) == '|')
              hasAltern = true;
            fSets[typeNr].addRule(formNr, FormRule(basecode, end, hasAltern));
            pBegin = pCloseBracket + 1;
          }
          while(*pBegin == '|' || *pBegin == '&');
        }
      }
    }
    fclose(ruleFile);
  }
}

int FormGenerator::convert(const char *lemma, int withApp, int codeType,
                           SynthFormRecord *pOutBuf , int bufLength)
{
  int nTypes, types[10];
  string kinds[10];
  int nForms = 0;
  getTypes(lemma, withApp, types, kinds, nTypes);
  for(int i = 0; i < nTypes; i++)
    if(types[i] < kNumTypes) {
//      printf("Type %d:\n", types[i]);
      PureFormRecord baseFormArr[40];
      char lemma2[40];
      strcpy(lemma2, lemma);
      //if verb then remove 'ma'
      if(types[i] >= kNumNounTypes)
        lemma2[strlen(lemma2) - 2] = '\0';
      int numBaseForms = CreateFormsOnArray(lemma2, types[i], baseFormArr,
                                            40, withApp);
      int nVariant = 1;
      int varStart = 0;
      for(int k = 0; k <= numBaseForms; k++) {
        //if(k < numBaseForms)
          //printf("BaseForm: %s %s\n", baseFormArr[k].code, baseFormArr[k].form);
        if(k > 0 && !strcmp(baseFormArr[k].code, baseFormArr[varStart].code)
           || k == numBaseForms) {
          fSets[types[i]].convert(*this, lemma2, baseFormArr, varStart, k, types[i],
                                  kinds[i], nVariant, withApp, codeType,
                                  pOutBuf, bufLength, nForms);
          nVariant++;
          varStart = k;
        }
      }
    }
    else {
      //muutumatu tyyp
      pOutBuf[nForms].fType = types[i];
      strncpy(pOutBuf[nForms].fKind, kinds[i].c_str(), kSynthKindLength - 1);
      pOutBuf[nForms].fKind[kSynthKindLength - 1] = '\0';
      pOutBuf[nForms].fVariant = 1;
      pOutBuf[nForms].fNumForms = 1;
      pOutBuf[nForms].fCode[0] = '\0';
      switch(codeType) {
      case 0:
        strcpy(pOutBuf[nForms].fCode, "ID");
        break;
      case 1:
        strcpy(pOutBuf[nForms].fCode, "_");
        break;
      case 2:
        strcpy(pOutBuf[nForms].fCode, "--------");
        break;
      default:
        pOutBuf[nForms].fCode[0] = '\0';
      }
      strcpy(pOutBuf[nForms].fForms[0].fForm, lemma);
      pOutBuf[nForms].fForms[0].fStemLength = strlen(lemma);
      nForms++;
    }

  return nForms;
}


void FormGenerator::getTypes(const char *lemma, int withApp,
                             int *typeNumbers, string *kinds,
                             int &nTypes)
{
  char typebuf[256];
  FindTypes((char*)lemma, typebuf, 256, withApp);
  nTypes = 0;
  char *typestr = typebuf;
//  printf("tyyp = %s\n", typebuf);
  typestr = strtok(typebuf, "|?~");
  while(typestr) {
    if(isdigit(*typestr)) {
      typeNumbers[nTypes] = atoi(typestr);
      kinds[nTypes] = strchr(typestr, '_') + 1;
//    printf("typeNumbers[%d] = %d\n", nTypes, typeNumbers[nTypes]);
      nTypes++;
    }
    typestr = strtok(NULL, "|?~");
  }
}


void FormGenerator::loadCodesFromSection(const char *fileName,
                                         const char *section,
                                         char codeArray[][kSynthCodeLength],
                                         int nBeginning,
                                         int &nCodes)
{
  nCodes = 0;
  FILE *codeFile = openAtLoc(fileName, "r");
  if(!codeFile)
    return;

  char linebuf[80];
  int sectionFound = false;
  while(fgets(linebuf, 80, codeFile))
    if(strncmp(linebuf, section, strlen(section)) == 0) {
      sectionFound = true;
      break;
    }

  if(!sectionFound)
    return;

  while(fgets(linebuf, 80, codeFile)){
    if( *linebuf == '@' ) //sektsiooni algus
      break;
    if( ! (*linebuf >= '0' && *linebuf <= '9' || *linebuf >= 'A' && *linebuf <= 'Z' ||
           *linebuf == '-') ) //rida ei sisalda vormikoode
      continue;
    int nStr = 0;
    char *codeStr = strtok(linebuf, ", \n");
    while(codeStr) {
      strcpy(codeArray[nCodes * kMaxCodeStrings + nStr], codeStr);
      if(nStr == 2) {
#ifdef __BORLANDC__
        fCodeToIndex.insert(std::make_pair(std::string(codeStr), nCodes + nBeginning));
#else
        fCodeToIndex.insert(StringToGrammarAssoc(std::string(codeStr), nCodes + nBeginning);
#endif
      }
      codeStr = strtok(0, ", \n");
      nStr++;
    }
    if(nStr)
      nCodes++;
  }


  fclose(codeFile);
}

void FormGenerator::loadCodes(const char *fileName)
{
  loadCodesFromSection(fileName, "@noomen", fNounCodeStrings, 0, fnNounForms);
  //koigi verbivormikoodide indeksid algavad 100-st
  //hiljem lahutame 100 maha, et saada indeksit massiivis
  loadCodesFromSection(fileName, "@verb", fVerbCodeStrings, 100, fnVerbForms);


#if 0
  memset(fNounCodeStrings, '\0', kNounForms * kMaxCodeStrings * kSynthCodeLength);
  memset(fVerbCodeStrings, '\0', kVerbForms * kMaxCodeStrings * kSynthCodeLength);

  FILE *codeFile = openAtLoc(fileName, "r");
  if(!codeFile)
    return;

  char linebuf[80];
  int nAssigned = 0;
  while(fgets(linebuf, 80, codeFile) && nAssigned < kNounForms)
    if(linebuf[0] >= '0' && linebuf[0] <= '9') {
      char *nrStr = strtok(linebuf, ", \n");
      int index = atoi(nrStr);
      for(int i = 0; i < kMaxCodeStrings; i++) {
        char *codeStr = strtok(0, ", \n");
        if(codeStr)
          strcpy(fNounCodeStrings[(index - 1) * kMaxCodeStrings + i], codeStr);
      }
      nAssigned++;
    }

  nAssigned = 0;
  while(fgets(linebuf, 80, codeFile) && nAssigned < kVerbForms)
    if(linebuf[0] >= '0' && linebuf[0] <= '9') {
      char *nrStr = strtok(linebuf, ", \n");
      int index = atoi(nrStr);
      for(int i = 0; i < kMaxCodeStrings; i++) {
        char *codeStr = strtok(0, ", \n");
        if(codeStr)
          strcpy(fVerbCodeStrings[(index - 1) * kMaxCodeStrings + i], codeStr);
      }
      nAssigned++;
    }

  fclose(codeFile);
#endif
}

