#include "grammar.hpp"

#ifdef LINUX_PORT
#include <sys/stat.h>
#else
#include <sys\stat.h>
#endif

#include <set>
#include <algorithm>

#include <openfile.h>

#define DBGAAA1(s, x) { FILE *f=fopen("dbg.aaa", "a"); \
                       fprintf(f, s, x); fclose(f); }

#define DBGAAA2(s, x, y) { FILE *f=fopen("dbg.aaa", "a"); \
                           fprintf(f, s, x, y); fclose(f); }

#define DBGAAA3(s, x, y, z) { FILE *f=fopen("dbg.aaa", "a"); \
                              fprintf(f, s, x, y, z); fclose(f); }

#ifdef __BORLANDC__
typedef std::set<std_string, std::less<std_string> > StringSet;
#else
typedef set<std_string, less<std_string> > StringSet;
#endif


ExceptionRegistry::ExceptionRegistry(const char *exceptFileName):
fExceptFileName(exceptFileName), fNrLines(0), fIndexMode(kBoth),
fpData(0)
{
  memset(fpIndex, '\0', 80*sizeof(unsigned short*));
}

ExceptionRegistry::~ExceptionRegistry()
{
   for(int i=0; i<2; i++)
    for(int j=0; j<40; j++)
      if(fpIndex[i][j]){
        delete [] fpIndex[i][j];
        fpIndex[i][j]=0;
      }

   delete [] fpData;
   fpData=0;
}


ExceptionRegistry& ExceptionRegistry::setIndexMode(ExceptionRegistry::IndexMode iMode)
{
   fIndexMode=iMode;
   return *this;
}


int strAppstrCmp(const char *s1, const char *s2)
{
  do{
    if(*s1=='\'') s1++;
    if(*s2=='\'') s2++;
    if(*s1<*s2)
      return -1;
    else if(*s1>*s2)
      return 1;
  }
  while(*s1++ && *s2++);

  return 0;
}

void deAppostrophise(char *s)
{
  int diff=0;
  do{
    if(*(s+diff)=='\'')
      diff++;
    *s=*(s+diff);
  }
  while(*s++);
}

struct LemmaComp
{
  LemmaComp(ExceptionData *pData):fpData(pData) {}

  bool operator()(unsigned short i, unsigned short j) const
  {
    return strAppstrCmp(fpData[i].lemma, fpData[j].lemma)<0;
  }

  ExceptionData *fpData;
};

struct FormComp
{
  FormComp(ExceptionData *pData):fpData(pData) {}

  bool operator()(unsigned short i, unsigned short j) const
  {
    return strAppstrCmp(fpData[i].form, fpData[j].form)<0;
  }

  ExceptionData *fpData;
};

struct LemmaSrchComp
{
  LemmaSrchComp(ExceptionData *pData, const char *w):fpData(pData), word(w) {}

  bool operator()(unsigned short i, unsigned short j) const
  {
    const char *s1, *s2;
    if(i==0xFFFF){
      s1=word; s2=fpData[j].lemma;
    }
    else if(j==0xFFFF){
      s1=fpData[i].lemma; s2=word;
    }
    else{
      s1=fpData[i].lemma; s2=fpData[j].lemma;
    }

    return strAppstrCmp(s1, s2)<0;
  }

  const char *word;
  ExceptionData *fpData;
};

struct FormSrchComp
{
  FormSrchComp(ExceptionData *pData, const char *w):fpData(pData), word(w) {}

  bool operator()(unsigned short i, unsigned short j) const
  {
    const char *s1, *s2;
    if(i==0xFFFF){
      s1=word; s2=fpData[j].form;
    }
    else if(j==0xFFFF){
      s1=fpData[i].form; s2=word;
    }
    else{
      s1=fpData[i].form; s2=fpData[j].form;
    }

    return strAppstrCmp(s1, s2)<0;
  }

  const char *word;
  ExceptionData *fpData;
};


ExceptionRegistry& ExceptionRegistry::createIndices()
{

  FILE *excFile=openAtLoc(fExceptFileName.c_str(), "r");
  if(!excFile)
    return *this;

  char buf[80];

  int allocated = 200;
  fpData=(ExceptionData*)malloc(sizeof(ExceptionData) * allocated);

  while(fgets(buf, 80, excFile)){
    char *n=strtok(buf, " \t\n");
    if(!n)
      continue;
    char *l=strtok(0, " \t\n");
    char *f=strtok(0, " \t\n");
    char *c=strtok(0, " \t\n");
    char *r=strtok(0, " \t\n");

    if(l && f && c){
      char nType = (*n-'0') * 10 + (*(n+1)-'0');
      fTypeSizes[nType]++;

      fpData[fNrLines].set(nType, l, f, c, r);
      fNrLines++;
      if(fNrLines==allocated){
        allocated*=2;
        fpData=(ExceptionData*)realloc(fpData, sizeof(ExceptionData) * allocated);
      }
    }
  }

  fclose(excFile);

  //final adjustment
  fpData=(ExceptionData*)realloc(fpData, sizeof(ExceptionData) * fNrLines);

  for(int i=0; i<40; i++)
    if(fTypeSizes[i]){
      fpIndex[0][i]=(unsigned short*)malloc(sizeof(unsigned short)*fTypeSizes[i]);
      fpIndex[1][i]=(unsigned short*)malloc(sizeof(unsigned short)*fTypeSizes[i]);
    }

  if(isIndexFileOutOfDate())
  {
    unsigned short indexCur[40];
    memset(indexCur, '\0', 40*sizeof(unsigned short));

    for(int i=0; i<fNrLines; i++){
      int typeNr=fpData[i].nr;
      (fpIndex[0][typeNr])[indexCur[typeNr]]=
        (fpIndex[1][typeNr])[indexCur[typeNr]]=i;
      indexCur[typeNr]++;
    }

    for(int i=0; i<40; i++)
      if(fTypeSizes[i]){
#ifdef __BORLANDC__
        std::sort(fpIndex[0][i], fpIndex[0][i]+fTypeSizes[i], LemmaComp(fpData));
        std::sort(fpIndex[1][i], fpIndex[1][i]+fTypeSizes[i], FormComp(fpData));
#else
        sort(fpIndex[0][i], fpIndex[0][i]+fTypeSizes[i], LemmaComp(fpData));
        sort(fpIndex[1][i], fpIndex[1][i]+fTypeSizes[i], FormComp(fpData));
#endif
      }

    saveIndicesToFile();
  }
  else
    readIndicesFromFile();

   return *this;
}

inline std_string replaceExtension(const std_string &original, const std_string &ext)
{
   std_string outstr(original);
   int pos=outstr.rfind(".");
   if(pos==std_string::npos)
      outstr+="."+ext;
   else
      outstr.replace(pos+1, outstr.length()-pos-1, ext);
   return outstr;
}

int ExceptionRegistry::isIndexFileOutOfDate()
{
   std_string fileNameInd=replaceExtension(fExceptFileName, "ndx");
   struct stat statbufExc, statbufInd;
   stat(fExceptFileName.c_str(), &statbufExc);
   int ret=stat(fileNameInd.c_str(), &statbufInd);
   if(ret || statbufInd.st_mtime<statbufExc.st_mtime)
      return 1;
   else
      return 0;
}


void ExceptionRegistry::readIndicesFromFile()
{
  std_string indexFileName=replaceExtension(fExceptFileName, "ndx");
  FILE *indexFile=openAtLoc(indexFileName.c_str(), "rb");
  if(indexFile){
    while(1){
      int nr;
      int numItems=fread(&nr, sizeof(int), 1, indexFile);
      if(numItems==0 || nr<0 || nr>=40)
        break;
      numItems=fread(fpIndex[0][nr], sizeof(unsigned short), fTypeSizes[nr],
                     indexFile);
      if(numItems<fTypeSizes[nr])
        break;
      numItems=fread(fpIndex[1][nr], sizeof(unsigned short), fTypeSizes[nr],
                     indexFile);
      if(numItems<fTypeSizes[nr])
        break;
    }
    fclose(indexFile);
  }
}

void ExceptionRegistry::saveIndicesToFile()
{
  std_string indexFileName=replaceExtension(fExceptFileName, "ndx");
  FILE *indexFile=openAtLoc(indexFileName.c_str(), "wb");
  for(int i=0; i<40; i++){
    if(fpIndex[0][i]){
      fwrite(&i, sizeof(int), 1, indexFile);
      fwrite(fpIndex[0][i], sizeof(unsigned short), fTypeSizes[i], indexFile);
      fwrite(fpIndex[1][i], sizeof(unsigned short), fTypeSizes[i], indexFile);
    }
  }
  fclose(indexFile);
}

int ExceptionRegistry::findExceptions(ResultCollection *pResult,
                                      DerivationResult *pInput,
                                      int start, int typeId, std_string grammarName,
                                      int withAppostr, int *pHasRuleBasedAlso,
                                      std_string &detectedToken)
{
  //  fprintf(stderr, "looking for %s %d %s\n", pInput->value().c_str(), typeId,
  //          grammarName.c_str());

  int nrExceptions=0;
  if(!fpIndex[start][typeId])
    return 0;

  *pHasRuleBasedAlso=0;
  detectedToken="";

  unsigned short *pStart;
#ifdef __BORLANDC__
  if(start)
    pStart=std::lower_bound(fpIndex[start][typeId],
                       fpIndex[start][typeId]+fTypeSizes[typeId],
                       0xFFFF,
                       FormSrchComp(fpData, pInput->value().c_str()));
  else
    pStart=std::lower_bound(fpIndex[start][typeId],
                       fpIndex[start][typeId]+fTypeSizes[typeId],
                       0xFFFF,
                       LemmaSrchComp(fpData, pInput->value().c_str()));
#else
  if(start)
    pStart=lower_bound(fpIndex[start][typeId],
                       fpIndex[start][typeId]+fTypeSizes[typeId],
                       0xFFFF,
                       FormSrchComp(fpData, pInput->value().c_str()));
  else
    pStart=lower_bound(fpIndex[start][typeId],
                       fpIndex[start][typeId]+fTypeSizes[typeId],
                       0xFFFF,
                       LemmaSrchComp(fpData, pInput->value().c_str()));
#endif

  const char *input=pInput->value().c_str();
  int different=0;
  StringSet foundStrings;
  while(pStart<fpIndex[start][typeId]+fTypeSizes[typeId] && !different){
    int cur=*pStart;
//     fprintf(stderr, "testing %s %s -> cmp(%s,%s)=%d\n",
//             fpData[cur].lemma, fpData[cur].form,
//             fpData[cur].elem(start), input,
//             strAppstrCmp(fpData[cur].elem(start), input));
    char sourceBuf[30], targetBuf[30];
    strcpy(sourceBuf, fpData[cur].elem(start));
    strcpy(targetBuf, fpData[cur].elem(flip(start)));
    if(!withAppostr){
      deAppostrophise(sourceBuf);
      deAppostrophise(targetBuf);
    }
    if(strcmp(sourceBuf, input)==0){
      if(strstr(fpData[cur].excCode, grammarName.c_str())){
        if(start==kLeft && fpData[cur].ruleToo=='>' ||
           start==kRight && fpData[cur].ruleToo=='<')
          *pHasRuleBasedAlso=1;
        if(!foundStrings.count(targetBuf)){
          DerivationResult *pOutput=pInput->newResult(targetBuf);
          pOutput->setExceptionCode(fpData[cur].excCode);
          if(pOutput->value()=="0")
            pOutput->setIgnored();
          pResult->push_back(pOutput);
          nrExceptions++;
          foundStrings.insert(targetBuf);
//           fprintf(stderr, "  YES: %s\n", targetBuf);
        }
      }
      else
        detectedToken=fpData[cur].excCode;
    }
    else if(!withAppostr || strAppstrCmp(sourceBuf, input)!=0)
      different=1;

    pStart++;
  }

  return nrExceptions;
}

StringArray* ExceptionRegistry::findPdgmExceptions(std_string sInput, int start, int typeId,
                                                   std_string grammarName, int withAppostr,
                                                   int *pHasRuleBasedAlso)
{
#if 0
   StringArray *pOutputArray=new StringArray(3, 0, 3);
   if(fIndexMode!=kBoth && fIndexMode!=start)
      return pOutputArray;
   *pHasRuleBasedAlso=0;
   std_string firstSearchString=sInput;
   std_string searchString=sInput;
   if(!withAppostr)
      removeAppostrophes(searchString);
   removeAppostrophes(firstSearchString); //new
   pgFile=fFileHandle;
   //the last argument is 0. That's because the file is sorted without appostrophes
   unsigned long* pPos=findAddress(start, typeId, firstSearchString, 0);
   if(pPos)
   {
      int different=0;
      while(pPos>fpIndex[start] && !different)
      {
         pPos--;
         fseek(fFileHandle, *pPos, SEEK_SET);
         char line[80];
         fgets(line, 80, fFileHandle);
         int arrow, aType;
         char lemmaBuf[20], formBuf[20];
         //don't take appostrophes into account while stepping back.
         //previous line may be the needed one
         decomposeLine(line, &aType, lemmaBuf, formBuf, 0, &arrow, 0, 0, 0);
         std_string comp[2];
         comp[kLeft]=lemmaBuf; comp[kRight]=formBuf;

         if(aType!=typeId || comp[start]!=firstSearchString)
         {
            pPos++;
            different=1;
         }
      }

      different=0;
      while(pPos-fpIndex[start]<fNrLines && !different)
      {
         fseek(fFileHandle, *pPos, SEEK_SET);
         char line[80];
         fgets(line, 80, fFileHandle);
         int arrow, aType;
         char lemmaBuf[20], formBuf[20], tokenBuf[10], hsCodeBuf[20], hsFormBuf[20];
         decomposeLine(line, &aType, lemmaBuf, formBuf, tokenBuf, &arrow, hsCodeBuf, hsFormBuf, withAppostr);
         std_string comp[2];
         comp[kLeft]=lemmaBuf; comp[kRight]=formBuf;
         if(aType==typeId && comp[start]==searchString)
         {
            std_string tokenString(tokenBuf);
            if(tokenString.contains(grammarName))
            {
               if(start==kLeft && arrow==1 || start==kRight && arrow==-1)
                  *pHasRuleBasedAlso=1;
               pOutputArray->Add(comp[flip(start)]);
               pOutputArray->Add(hsCodeBuf);
               pOutputArray->Add(hsFormBuf);
            }
            pPos++;
         }
         else
         {
            //if appostrophes are enabled it may happen that the difference is
            //caused by appostrophes, and hence, we may not reach to line we are
            //looking for
            different=1;
            if(aType==typeId)
            {
               removeAppostrophes(comp[start]);
               if(comp[start]==firstSearchString)
               {
                  different=0;
                  pPos++;
               }
            }
         }
      }
   }
   return pOutputArray;
#else
  return 0;
#endif
}

