#include "synthexc.hpp"
#include "appostr.hpp"
#include <stdio.h>


#ifdef LINUX_PORT
#include <sys/stat.h>
#else
#include <sys\stat.h>
#endif

#include <algorithm>
#include <openfile.h>

extern char* findBaseForm(const char* baseCode, PureFormRecord *pBaseFormArr,
                          int start, int end);


inline void resolveBrackets(char *s, int &stemLength)
{
  stemLength = 0;
  if(*s == '\0' || *s == '#')
    return;

  int i;
  for(i = 0; s[i] && s[i] != '['; i++);

  if(s[i] == '[') {
    stemLength = i;
    for(; s[i + 1] && s[i + 1] != ']'; i++)
      s[i] = s[i + 1];
    s[i] = '\0';
  }
}


SynthExceptionRegistry::SynthExceptionRegistry(const char *exceptFileName):
fExceptFileName(exceptFileName), fNrLines(0),
fpData(0), fCurrent(-1)
{
  memset(fpIndex, '\0', kNumTypes*sizeof(unsigned short*));
  memset(fTypeSizes, '\0', kNumTypes*sizeof(unsigned short));
}

SynthExceptionRegistry::~SynthExceptionRegistry()
{
  for(int j=0; j<kNumTypes; j++)
    if(fpIndex[j]){
      delete [] fpIndex[j];
      fpIndex[j]=0;
    }

  delete [] fpData;
  fpData=0;
}



struct LemmaComp
{
  LemmaComp(SynthExceptionData *pData):fpData(pData) {}

  bool operator()(unsigned short i, unsigned short j) const
  {
#if 0
    printf("comparing %d and %d\n", i, j);
    printf("comparing (%d,%s,%s,%s,%c) & (%d,%s,%s,%s,%c)\n",
           fpData[i].nr, fpData[i].lemma, fpData[i].form, fpData[i].formCode,
           fpData[i].excType,
           fpData[j].nr, fpData[j].lemma, fpData[j].form, fpData[j].formCode,
           fpData[j].excType);
#endif
    int res = strAppstrCmp(fpData[i].lemma, fpData[j].lemma);
    if(res == 0) {
      int codeCmpRes = 0;
      if((codeCmpRes = strcmp(fpData[i].formCode, fpData[j].formCode)) == 0){
        if(fpData[i].excType != fpData[j].excType &&
           (fpData[i].excType == '%' || fpData[j].excType == '+'))
          return true;
        else
          return false;
      }
      else
        return codeCmpRes < 0;
    }
    else
      return res < 0;
  }

  SynthExceptionData *fpData;
};

struct LemmaSrchComp
{
  LemmaSrchComp(SynthExceptionData *pData, const char *w):fpData(pData), word(w) {}

  bool operator()(unsigned short i, unsigned short j) const
  {
    const char *s1, *s2;
    if(i==0xFFFF){
      s1=word; s2=fpData[j].lemma;
    }
    else if(j==0xFFFF){
      s1=fpData[i].lemma; s2=word;
    }
    else{
      s1=fpData[i].lemma; s2=fpData[j].lemma;
    }

//    printf("### comparing %s & %s\n", s1, s2);
    return strAppstrCmp(s1, s2)<0;
  }

  const char *word;
  SynthExceptionData *fpData;
};


SynthExceptionRegistry& SynthExceptionRegistry::createIndex()
{

  FILE *excFile=openAtLoc(fExceptFileName.c_str(), "r");
  if(!excFile)
    return *this;

  char buf[80];

  int allocated = 200;
  fpData=(SynthExceptionData*)malloc(sizeof(SynthExceptionData) * allocated);

  while(fgets(buf, 80, excFile)){
    char *n=strtok(buf, ", \t\n");
    if(!n || *n < '0' || *n > '9') //rida peab algama numbriga!
      continue;
    char *l=strtok(0, ", \t\n");
    char *f=strtok(0, ", \t\n");
    char *fcode = strtok(0, ",\t\n");
    char *hc=strtok(0, ", \t\n");
    char *hf=strtok(0, ", \t\n");
    char *et=strtok(0, ", \t\n");

    char nType = atoi(n);
    fTypeSizes[nType]++;

    if(*hc == '-') *hc = '\0';
    if(*hf == '-') *hf = '\0';

    fpData[fNrLines].set(nType, l, f, fcode, hc, hf, *et);

    fNrLines++;
    if(fNrLines==allocated){
      allocated*=2;
      fpData=(SynthExceptionData*)realloc(fpData, sizeof(SynthExceptionData) * allocated);
    }
  }

  fclose(excFile);

  //final adjustment
  fpData=(SynthExceptionData*)realloc(fpData, sizeof(SynthExceptionData) * fNrLines);

  for(int i=0; i<kNumTypes; i++)
    if(fTypeSizes[i]){
      fpIndex[i]=(unsigned short*)malloc(sizeof(unsigned short)*fTypeSizes[i]);
    }

  if(isIndexFileOutOfDate())
  {
    unsigned short indexCur[kNumTypes];
    memset(indexCur, '\0', kNumTypes*sizeof(unsigned short));

    for(int i=0; i<fNrLines; i++){
      int typeNr=fpData[i].nr;
      fpIndex[typeNr][indexCur[typeNr]]=i;
      indexCur[typeNr]++;
    }

    for(int i=0; i<kNumTypes; i++)
      if(fTypeSizes[i]){
//        printf("%d: nr. of exceptions - %d\n", i, fTypeSizes[i]);
#ifdef __BORLANDC__
        std::sort(fpIndex[i], fpIndex[i]+fTypeSizes[i], LemmaComp(fpData));
#else
        sort(fpIndex[i], fpIndex[i]+fTypeSizes[i], LemmaComp(fpData));
#endif
      }

    saveIndexToFile();
  }
  else
    readIndexFromFile();

  return *this;
}

inline std_string replaceExtension(const std_string &original, const std_string &ext)
{
   std_string outstr(original);
   int pos=outstr.rfind(".");
   if(pos==std_string::npos)
      outstr+="."+ext;
   else
      outstr.replace(pos+1, outstr.length()-pos-1, ext);
   return outstr;
}

int SynthExceptionRegistry::isIndexFileOutOfDate()
{
   std_string fileNameInd=replaceExtension(fExceptFileName, "ndx");
   struct stat statbufExc, statbufInd;
   stat(fExceptFileName.c_str(), &statbufExc);
   int ret=stat(fileNameInd.c_str(), &statbufInd);
   if(ret || statbufInd.st_mtime<statbufExc.st_mtime)
      return 1;
   else
      return 0;
}


void SynthExceptionRegistry::readIndexFromFile()
{
  std_string indexFileName=replaceExtension(fExceptFileName, "ndx");
  FILE *indexFile=openAtLoc(indexFileName.c_str(), "rb");
  if(indexFile){
    while(1){
      int nr;
      int numItems=fread(&nr, sizeof(int), 1, indexFile);
      if(numItems==0 || nr<0 || nr>=kNumTypes)
        break;
      numItems=fread(fpIndex[nr], sizeof(unsigned short), fTypeSizes[nr],
                     indexFile);
      if(numItems<fTypeSizes[nr])
        break;
    }
    fclose(indexFile);
  }
}

void SynthExceptionRegistry::saveIndexToFile()
{
  std_string indexFileName=replaceExtension(fExceptFileName, "ndx");
  FILE *indexFile=openAtLoc(indexFileName.c_str(), "wb");
  for(int i=0; i<kNumTypes; i++){
    if(fpIndex[i]){
      fwrite(&i, sizeof(int), 1, indexFile);
      fwrite(fpIndex[i], sizeof(unsigned short), fTypeSizes[i], indexFile);
    }
  }
  fclose(indexFile);
}

int SynthExceptionRegistry::findExceptionsStart(const char *lemma, int typeId)
{
  fCurrent = -1;

  unsigned short *pStart;

  printf("  findExceptionsStart: looking for %s\n", lemma);

#if 0
  printf("Exc: looking for %s %d\n", lemma, typeId);
  printf("number of exceptions: %d\n", fTypeSizes[typeId]);

  for(int i = 0; i < fTypeSizes[typeId]; i++){
    printf(" index in fpData: %d\n", fpIndex[typeId][i]);
    //printf("? %s %s\n", fpData[fpIndex[typeId][i]].formCode, fpData[fpIndex[typeId][i]].lemma);
  }
#endif

#ifdef __BORLANDC__
  pStart=std::lower_bound(fpIndex[typeId],
                       fpIndex[typeId]+fTypeSizes[typeId],
                       0xFFFF,
                       LemmaSrchComp(fpData, lemma));
#else
  pStart=lower_bound(fpIndex[typeId],
                       fpIndex[typeId]+fTypeSizes[typeId],
                       0xFFFF,
                       LemmaSrchComp(fpData, lemma));
#endif

  if(pStart < fpIndex[typeId] + fTypeSizes[typeId]) {
    printf("found\n");
    fCurrent = pStart - fpIndex[typeId];
    return 1;
  }
  else {
   printf("NOT found\n");
   fCurrent = -1;
    return 0;
  }
}

int SynthExceptionRegistry::getException(int typeId, const char *lemma, const char *formCode,
                                         SynthForm *pRecords,
                                         PureFormRecord *pBaseFormArr, int start, int end,
                                         int withAppostr, int mode,
                                         int &nParal, int &done)
{
  printf("  getException: looking for %s (%s)\n", lemma, formCode);

  if(fCurrent != -1) {
    char *curLemma = fpData[fpIndex[typeId][fCurrent]].lemma;
    char *curFormCode = fpData[fpIndex[typeId][fCurrent]].formCode;
    char startExcType = fpData[fpIndex[typeId][fCurrent]].excType;
    char curExcType = startExcType;

    //ja"tame vahele vormid, millest me ilmselt huvitatud pole
    //st. nende koode pole fcodes.ini-s
    while(fCurrent < fTypeSizes[typeId] &&
          strAppstrCmp(lemma, curLemma) == 0 &&
          strcmp(curFormCode, formCode) < 0) {
      fCurrent++;
      if(fCurrent < fTypeSizes[typeId]) {
        curLemma = fpData[fpIndex[typeId][fCurrent]].lemma;
        curFormCode = fpData[fpIndex[typeId][fCurrent]].formCode;
        curExcType = fpData[fpIndex[typeId][fCurrent]].excType;
      }
    }

    while(fCurrent < fTypeSizes[typeId] &&
          strAppstrCmp(lemma, curLemma) == 0 &&
          strcmp(curFormCode, formCode) == 0 &&
          curExcType == startExcType &&
          (mode == 0 && curExcType !='+' ||
           mode == 1 && curExcType =='+') ) {

      char *curForm = fpData[fpIndex[typeId][fCurrent]].form;
      char *homoCode = fpData[fpIndex[typeId][fCurrent]].homoCode;
      char *homoForm = fpData[fpIndex[typeId][fCurrent]].homoForm;

      printf("  getException: got %s\n", curForm);

      bool useIt = true;
      if(*homoCode && *homoForm) {
        char *exForm = findBaseForm(homoCode, pBaseFormArr, start, end);
        if(!exForm || strAppstrCmp(exForm, homoForm))
          useIt = false;
      }

      if(useIt) {
        strcpy(pRecords[nParal].fForm, curForm);
        if(!withAppostr)
          deAppostrophise(pRecords[nParal].fForm);
        resolveBrackets(pRecords[nParal].fForm, pRecords[nParal].fStemLength);
        nParal++;
      }

      if(curExcType == '%')
        done = 0;
      else
        done = 1;

      fCurrent++;

      if(fCurrent < fTypeSizes[typeId]) {
        curLemma = fpData[fpIndex[typeId][fCurrent]].lemma;
        curFormCode = fpData[fpIndex[typeId][fCurrent]].formCode;
        curExcType = fpData[fpIndex[typeId][fCurrent]].excType;
      }
    }

    if(fCurrent >= fTypeSizes[typeId] || strAppstrCmp(lemma, curLemma) != 0) {
      fCurrent = -1;
      return 0;
    }
    return 1;
  }
  return 0;
}



