postgresql-tsearch2-compound_word_support_20031210.patch

   1 diff -uNr postgresql-7.4/contrib/tsearch2/dict_ispell.c postgresql-7.4.fixed/contrib/tsearch2/dict_ispell.c
   2 --- postgresql-7.4/contrib/tsearch2/dict_ispell.c       2003-08-04 02:43:11.000000000 +0200
   3 +++ postgresql-7.4.fixed/contrib/tsearch2/dict_ispell.c 2003-12-18 17:46:03.000000000 +0100
   4 @@ -27,7 +27,7 @@
   5  static void
   6  freeDictISpell(DictISpell * d)
   7  {
   8 -       FreeIspell(&(d->obj));
   9 +       NIFree(&(d->obj));
  10         freestoplist(&(d->stoplist));
  11         free(d);
  12  }
  13 @@ -71,7 +71,7 @@
  14                                           (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
  15                                            errmsg("dictionary already loaded")));
  16                         }
  17 -                       if (ImportDictionary(&(d->obj), pcfg->value))
  18 +                       if (NIImportDictionary(&(d->obj), pcfg->value))
  19                         {
  20                                 freeDictISpell(d);
  21                                 ereport(ERROR,
  22 @@ -90,7 +90,7 @@
  23                                           (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
  24                                            errmsg("affixes already loaded")));
  25                         }
  26 -                       if (ImportAffixes(&(d->obj), pcfg->value))
  27 +                       if (NIImportAffixes(&(d->obj), pcfg->value))
  28                         {
  29                                 freeDictISpell(d);
  30                                 ereport(ERROR,
  31 @@ -132,8 +132,8 @@
  32
  33         if (affloaded && dictloaded)
  34         {
  35 -               SortDictionary(&(d->obj));
  36 -               SortAffixes(&(d->obj));
  37 +               NISortDictionary(&(d->obj));
  38 +               NISortAffixes(&(d->obj));
  39         }
  40         else if (!affloaded)
  41         {
  42 @@ -168,7 +168,7 @@
  43
  44         res = palloc(sizeof(char *) * 2);
  45         txt = pnstrdup(in, PG_GETARG_INT32(2));
  46 -       res = NormalizeWord(&(d->obj), txt);
  47 +       res = NINormalizeWord(&(d->obj), txt);
  48         pfree(txt);
  49
  50         if (res == NULL)
  51 diff -uNr postgresql-7.4/contrib/tsearch2/ispell/spell.c postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.c
  52 --- postgresql-7.4/contrib/tsearch2/ispell/spell.c      2003-08-04 02:43:11.000000000 +0200
  53 +++ postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.c        2003-12-18 17:46:03.000000000 +0100
  54 @@ -7,15 +7,26 @@
  55
  56  #include "spell.h"
  57
  58 -#define MAXNORMLEN 56
  59 +#define MAX_NORM 1024
  60 +#define MAXNORMLEN 256
  61
  62  #define STRNCASECMP(x,y)               (strncasecmp(x,y,strlen(y)))
  63 +#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)=='p') ? (N) : ( (L) - 1 - (N) ) ] )
  64 +#define GETCHAR(A,N,T)   GETWCHAR( (A)->repl, (A)->replen, N, T )
  65 +
  66 +
  67 +#define MEMOUT(X)  if ( !(X) ) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")))
  68
  69  static int
  70  cmpspell(const void *s1, const void *s2)
  71  {
  72         return (strcmp(((const SPELL *) s1)->word, ((const SPELL *) s2)->word));
  73  }
  74 +static int
  75 +cmpspellaffix(const void *s1, const void *s2)
  76 +{
  77 +       return (strcmp(((const SPELL *) s1)->p.flag, ((const SPELL *) s2)->p.flag));
  78 +}
  79
  80  static void
  81  strlower(char *str)
  82 @@ -29,6 +40,13 @@
  83         }
  84  }
  85
  86 +static char*
  87 +strnduplicate(char *s, int len) {
  88 +       char *d=(char*)palloc( len + 1 );
  89 +       memcpy(d, s, len );
  90 +       d[len]='\0';
  91 +       return d;
  92 +}
  93  /* backward string compaire for suffix tree operations */
  94  static int
  95  strbcmp(const char *s1, const char *s2)
  96 @@ -92,7 +110,7 @@
  97  }
  98
  99  int
 100 -AddSpell(IspellDict * Conf, const char *word, const char *flag)
 101 +NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
 102  {
 103         if (Conf->nspell >= Conf->mspell)
 104         {
 105 @@ -106,24 +124,18 @@
 106                         Conf->mspell = 1024 * 20;
 107                         Conf->Spell = (SPELL *) malloc(Conf->mspell * sizeof(SPELL));
 108                 }
 109 -               if (Conf->Spell == NULL)
 110 -                       ereport(ERROR,
 111 -                                       (errcode(ERRCODE_OUT_OF_MEMORY),
 112 -                                        errmsg("out of memory")));
 113 +               MEMOUT(Conf->Spell);
 114         }
 115         Conf->Spell[Conf->nspell].word = strdup(word);
 116 -       if (!Conf->Spell[Conf->nspell].word)
 117 -               ereport(ERROR,
 118 -                               (errcode(ERRCODE_OUT_OF_MEMORY),
 119 -                                errmsg("out of memory")));
 120 -       strncpy(Conf->Spell[Conf->nspell].flag, flag, 10);
 121 +       MEMOUT(Conf->Spell[Conf->nspell].word);
 122 +       strncpy(Conf->Spell[Conf->nspell].p.flag, flag, 16);
 123         Conf->nspell++;
 124         return (0);
 125  }
 126
 127
 128  int
 129 -ImportDictionary(IspellDict * Conf, const char *filename)
 130 +NIImportDictionary(IspellDict * Conf, const char *filename)
 131  {
 132         unsigned char str[BUFSIZ];
 133         FILE       *dict;
 134 @@ -143,7 +155,7 @@
 135                         flag = s;
 136                         while (*s)
 137                         {
 138 -                               if (((*s >= 'A') && (*s <= 'Z')) || ((*s >= 'a') && (*s <= 'z')))
 139 +                               if (isprint(*s) && !isspace(*s))
 140                                         s++;
 141                                 else
 142                                 {
 143 @@ -166,65 +178,49 @@
 144                                 *s = 0;
 145                         s++;
 146                 }
 147 -               AddSpell(Conf, str, flag);
 148 +               NIAddSpell(Conf, str, flag);
 149         }
 150         fclose(dict);
 151         return (0);
 152  }
 153
 154
 155 -static SPELL *
 156 -FindWord(IspellDict * Conf, const char *word, int affixflag)
 157 +static int
 158 +FindWord(IspellDict * Conf, const char *word, int affixflag, char compoundonly)
 159  {
 160 -       int                     l,
 161 -                               c,
 162 -                               r,
 163 -                               resc,
 164 -                               resl,
 165 -                               resr,
 166 -                               i;
 167 -
 168 -       i = (int) (*word) & 255;
 169 -       l = Conf->SpellTree.Left[i];
 170 -       r = Conf->SpellTree.Right[i];
 171 -       if (l == -1)
 172 -               return (NULL);
 173 -       while (l <= r)
 174 -       {
 175 -               c = (l + r) >> 1;
 176 -               resc = strcmp(Conf->Spell[c].word, word);
 177 -               if ((resc == 0) &&
 178 -                       ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)))
 179 -                       return (&Conf->Spell[c]);
 180 -               resl = strcmp(Conf->Spell[l].word, word);
 181 -               if ((resl == 0) &&
 182 -                       ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)))
 183 -                       return (&Conf->Spell[l]);
 184 -               resr = strcmp(Conf->Spell[r].word, word);
 185 -               if ((resr == 0) &&
 186 -                       ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)))
 187 -                       return (&Conf->Spell[r]);
 188 -               if (resc < 0)
 189 -               {
 190 -                       l = c + 1;
 191 -                       r--;
 192 -               }
 193 -               else if (resc > 0)
 194 -               {
 195 -                       r = c - 1;
 196 -                       l++;
 197 -               }
 198 -               else
 199 -               {
 200 -                       l++;
 201 -                       r--;
 202 +       SPNode *node = Conf->Dictionary;
 203 +       SPNodeData *StopLow, *StopHigh, *StopMiddle;
 204 +       int level=0, wrdlen=strlen(word);
 205 +
 206 +       while( node && level<wrdlen) {
 207 +               StopLow = node->data;
 208 +               StopHigh = node->data+node->length;
 209 +               while (StopLow < StopHigh) {
 210 +                       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
 211 +                       if ( StopMiddle->val == ((uint8*)(word))[level] ) {
 212 +                               if ( wrdlen==level+1 && StopMiddle->isword ) {
 213 +                                       if ( compoundonly && !StopMiddle->compoundallow )
 214 +                                               return 0;
 215 +                                       if ( (affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
 216 +                                               return 1;
 217 +                               }
 218 +                               node=StopMiddle->node;
 219 +                               level++;
 220 +                               break;
 221 +                       } else if ( StopMiddle->val < ((uint8*)(word))[level] ) {
 222 +                               StopLow = StopMiddle + 1;
 223 +                       } else {
 224 +                               StopHigh = StopMiddle;
 225 +                       }
 226                 }
 227 +               if ( StopLow >= StopHigh )
 228 +                       break;
 229         }
 230 -       return (NULL);
 231 +       return 0;
 232  }
 233
 234  int
 235 -AddAffix(IspellDict * Conf, int flag, const char *mask, const char *find, const char *repl, int type)
 236 +NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type)
 237  {
 238         if (Conf->naffixes >= Conf->maffixes)
 239         {
 240 @@ -238,16 +234,14 @@
 241                         Conf->maffixes = 16;
 242                         Conf->Affix = (AFFIX *) malloc(Conf->maffixes * sizeof(AFFIX));
 243                 }
 244 -               if (Conf->Affix == NULL)
 245 -                       ereport(ERROR,
 246 -                                       (errcode(ERRCODE_OUT_OF_MEMORY),
 247 -                                        errmsg("out of memory")));
 248 +               MEMOUT(Conf->Affix);
 249         }
 250         if (type == 's')
 251                 sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask);
 252         else
 253                 sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask);
 254         Conf->Affix[Conf->naffixes].compile = 1;
 255 +       Conf->Affix[Conf->naffixes].flagflags = flagflags;
 256         Conf->Affix[Conf->naffixes].flag = flag;
 257         Conf->Affix[Conf->naffixes].type = type;
 258
 259 @@ -281,7 +275,7 @@
 260
 261
 262  int
 263 -ImportAffixes(IspellDict * Conf, const char *filename)
 264 +NIImportAffixes(IspellDict * Conf, const char *filename)
 265  {
 266         unsigned char str[BUFSIZ];
 267         unsigned char flag = 0;
 268 @@ -292,13 +286,24 @@
 269         int                     i;
 270         int                     suffixes = 0;
 271         int                     prefixes = 0;
 272 +       unsigned char flagflags = 0;
 273         FILE       *affix;
 274
 275         if (!(affix = fopen(filename, "r")))
 276                 return (1);
 277 +       Conf->compoundcontrol='\t';
 278
 279         while (fgets(str, sizeof(str), affix))
 280         {
 281 +               if (STRNCASECMP(str, "compoundwords")==0) {
 282 +                       s=strchr(str, 'l');
 283 +                       if ( s ) {
 284 +                               while( *s!=' ' ) s++;
 285 +                               while( *s==' ' ) s++;
 286 +                               Conf->compoundcontrol = *s;
 287 +                               continue;
 288 +                       }
 289 +               }
 290                 if (!STRNCASECMP(str, "suffixes"))
 291                 {
 292                         suffixes = 1;
 293 @@ -314,8 +319,18 @@
 294                 if (!STRNCASECMP(str, "flag "))
 295                 {
 296                         s = str + 5;
 297 -                       while (strchr("* ", *s))
 298 +                       flagflags=0;
 299 +                       while( *s==' ' ) s++;
 300 +                       if ( *s=='*' ) {
 301 +                               flagflags|=FF_CROSSPRODUCT;
 302 +                               s++;
 303 +                       } else if ( *s=='~' ) {
 304 +                               flagflags|=FF_COMPOUNDONLYAFX;
 305                                 s++;
 306 +                       }
 307 +
 308 +                       if ( *s=='\\' ) s++;
 309 +
 310                         flag = *s;
 311                         continue;
 312                 }
 313 @@ -351,7 +366,7 @@
 314                                 continue;
 315                 }
 316
 317 -               AddAffix(Conf, (int) flag, mask, find, repl, suffixes ? 's' : 'p');
 318 +               NIAddAffix(Conf, (int) flag, (char) flagflags, mask, find, repl, suffixes ? 's' : 'p');
 319
 320         }
 321         fclose(affix);
 322 @@ -359,87 +374,266 @@
 323         return (0);
 324  }
 325
 326 +static int
 327 +MergeAffix(IspellDict *Conf, int a1, int a2) {
 328 +       int naffix=0;
 329 +       char **ptr=Conf->AffixData;
 330 +
 331 +       while(*ptr) {
 332 +               naffix++;
 333 +               ptr++;
 334 +       }
 335 +
 336 +       Conf->AffixData=(char**)realloc( Conf->AffixData, (naffix+2)*sizeof(char*) );
 337 +       MEMOUT(Conf->AffixData);
 338 +       ptr = Conf->AffixData + naffix;
 339 +       *ptr=malloc( strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + 1 /* space */ + 1 /* \0 */ );
 340 +       MEMOUT(ptr);
 341 +       sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
 342 +       ptr++;
 343 +       *ptr='\0';
 344 +       return naffix;
 345 +}
 346 +
 347 +
 348 +static SPNode*
 349 +mkSPNode(IspellDict *Conf, int low, int high, int level) {
 350 +       int i;
 351 +       int nchar=0;
 352 +       char lastchar='\0';
 353 +       SPNode *rs;
 354 +       SPNodeData *data;
 355 +       int lownew=low;
 356 +
 357 +       for(i=low; i<high; i++)
 358 +               if ( Conf->Spell[i].p.d.len>level && lastchar!=Conf->Spell[i].word[level] ) {
 359 +                       nchar++;
 360 +                       lastchar=Conf->Spell[i].word[level];
 361 +               }
 362 +
 363 +       if (!nchar)
 364 +               return NULL;
 365 +
 366 +       rs=(SPNode*)malloc(SPNHRDSZ+nchar*sizeof(SPNodeData));
 367 +       MEMOUT(rs);
 368 +       memset(rs,0,SPNHRDSZ+nchar*sizeof(SPNodeData));
 369 +       rs->length = nchar;
 370 +       data=rs->data;
 371 +
 372 +       lastchar='\0';
 373 +       for(i=low; i<high; i++)
 374 +               if ( Conf->Spell[i].p.d.len>level ) {
 375 +                       if ( lastchar!=Conf->Spell[i].word[level] ) {
 376 +                               if ( lastchar ) {
 377 +                                       data->node = mkSPNode(Conf, lownew, i, level+1);
 378 +                                       lownew=i;
 379 +                                       data++;
 380 +                               }
 381 +                               lastchar=Conf->Spell[i].word[level];
 382 +                       }
 383 +                       data->val=((uint8*)(Conf->Spell[i].word))[level];
 384 +                       if ( Conf->Spell[i].p.d.len == level+1 ) {
 385 +                               if ( data->isword && data->affix!=Conf->Spell[i].p.d.affix) {
 386 +                                       /*
 387 +                                       fprintf(stderr,"Word already exists: %s (affixes: '%s' and '%s')\n",
 388 +                                               Conf->Spell[i].word,
 389 +                                               Conf->AffixData[data->affix],
 390 +                                               Conf->AffixData[Conf->Spell[i].p.d.affix]
 391 +                                       );
 392 +                                       */
 393 +                                       /* MergeAffix called a few times */
 394 +                                       data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i].p.d.affix);
 395 +                               } else
 396 +                                       data->affix = Conf->Spell[i].p.d.affix;
 397 +                               data->isword=1;
 398 +                               if ( strchr( Conf->AffixData[ data->affix ], Conf->compoundcontrol ) )
 399 +                                       data->compoundallow=1;
 400 +                       }
 401 +               }
 402 +
 403 +       data->node = mkSPNode(Conf, lownew, high, level+1);
 404 +
 405 +       return rs;
 406 +}
 407 +
 408 +
 409 +
 410  void
 411 -SortDictionary(IspellDict * Conf)
 412 +NISortDictionary(IspellDict * Conf)
 413  {
 414 -       int                     CurLet = -1,
 415 -                               Let;
 416         size_t          i;
 417 -
 418 +       int     naffix=3;
 419 +
 420 +       /* compress affixes */
 421 +       qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspellaffix);
 422 +       for (i = 1; i < Conf->nspell; i++)
 423 +               if ( strcmp(Conf->Spell[i].p.flag,Conf->Spell[i-1].p.flag) )
 424 +                       naffix++;
 425 +
 426 +       Conf->AffixData=(char**)malloc( naffix*sizeof(char*) );
 427 +       MEMOUT(Conf->AffixData);
 428 +       memset(Conf->AffixData, 0, naffix*sizeof(char*));
 429 +       naffix=1;
 430 +       Conf->AffixData[0]=strdup("");
 431 +       MEMOUT(Conf->AffixData[0]);
 432 +       Conf->AffixData[1]=strdup( Conf->Spell[0].p.flag );
 433 +       MEMOUT(Conf->AffixData[1]);
 434 +       Conf->Spell[0].p.d.affix = 1;
 435 +       Conf->Spell[0].p.d.len = strlen(Conf->Spell[0].word);
 436 +       for (i = 1; i < Conf->nspell; i++) {
 437 +               if ( strcmp(Conf->Spell[i].p.flag, Conf->AffixData[naffix]) ) {
 438 +                       naffix++;
 439 +                       Conf->AffixData[naffix] = strdup( Conf->Spell[i].p.flag );
 440 +                       MEMOUT(Conf->AffixData[naffix]);
 441 +               }
 442 +               Conf->Spell[i].p.d.affix = naffix;
 443 +               Conf->Spell[i].p.d.len = strlen(Conf->Spell[i].word);
 444 +       }
 445 +
 446         qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspell);
 447 +       Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
 448 +
 449 +       for (i = 0; i < Conf->nspell; i++)
 450 +               free( Conf->Spell[i].word );
 451 +       free( Conf->Spell );
 452 +       Conf->Spell=NULL;
 453 +}
 454 +
 455 +static AffixNode*
 456 +mkANode(IspellDict *Conf, int low, int high, int level, int type) {
 457 +       int i;
 458 +       int nchar=0;
 459 +       uint8 lastchar='\0';
 460 +       AffixNode *rs;
 461 +       AffixNodeData *data;
 462 +       int lownew=low;
 463 +
 464 +       for(i=low; i<high; i++)
 465 +               if ( Conf->Affix[i].replen>level && lastchar!=GETCHAR( Conf->Affix + i, level, type ) ) {
 466 +                       nchar++;
 467 +                       lastchar=GETCHAR( Conf->Affix + i, level, type );
 468 +               }
 469
 470 -       for (i = 0; i < 256; i++)
 471 -               Conf->SpellTree.Left[i] = -1;
 472 +       if (!nchar)
 473 +               return NULL;
 474
 475 -       for (i = 0; i < Conf->nspell; i++)
 476 -       {
 477 -               Let = (int) (*(Conf->Spell[i].word)) & 255;
 478 -               if (CurLet != Let)
 479 -               {
 480 -                       Conf->SpellTree.Left[Let] = i;
 481 -                       CurLet = Let;
 482 +       rs=(AffixNode*)malloc(ANHRDSZ+nchar*sizeof(AffixNodeData));
 483 +       MEMOUT(rs);
 484 +       memset(rs,0,ANHRDSZ+nchar*sizeof(AffixNodeData));
 485 +       rs->length = nchar;
 486 +       data=rs->data;
 487 +
 488 +       lastchar='\0';
 489 +       for(i=low; i<high; i++)
 490 +               if ( Conf->Affix[i].replen>level ) {
 491 +                       if ( lastchar!=GETCHAR( Conf->Affix + i, level, type ) ) {
 492 +                               if ( lastchar ) {
 493 +                                       data->node = mkANode(Conf, lownew, i, level+1, type);
 494 +                                       lownew=i;
 495 +                                       data++;
 496 +                               }
 497 +                               lastchar=GETCHAR( Conf->Affix + i, level, type );
 498 +                       }
 499 +                       data->val=GETCHAR( Conf->Affix + i, level, type );
 500 +                       if ( Conf->Affix[i].replen == level+1 ) { /* affix stopped */
 501 +                               if ( !data->naff )
 502 +                                       data->aff=(AFFIX**)malloc(sizeof(AFFIX*)*(high-i+1));
 503 +                                       MEMOUT(data);
 504 +                               data->aff[ data->naff ] = Conf->Affix + i;
 505 +                               data->naff++;
 506 +                       }
 507                 }
 508 -               Conf->SpellTree.Right[Let] = i;
 509 -       }
 510 +
 511 +       data->node = mkANode(Conf, lownew, high, level+1, type);
 512 +
 513 +       return rs;
 514  }
 515
 516  void
 517 -SortAffixes(IspellDict * Conf)
 518 +NISortAffixes(IspellDict * Conf)
 519  {
 520 -       int                     CurLetP = -1,
 521 -                               CurLetS = -1,
 522 -                               Let;
 523         AFFIX      *Affix;
 524         size_t          i;
 525 +       CMPDAffix* ptr;
 526 +       int     firstsuffix=-1;
 527
 528         if (Conf->naffixes > 1)
 529                 qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
 530 -       for (i = 0; i < 256; i++)
 531 -       {
 532 -               Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
 533 -               Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
 534 -       }
 535
 536 -       for (i = 0; i < Conf->naffixes; i++)
 537 -       {
 538 +       Conf->CompoundAffix = ptr = (CMPDAffix*)malloc( sizeof(CMPDAffix) * Conf->naffixes );
 539 +       MEMOUT(Conf->CompoundAffix);
 540 +       ptr->affix=NULL;
 541 +
 542 +       for (i = 0; i < Conf->naffixes; i++) {
 543                 Affix = &(((AFFIX *) Conf->Affix)[i]);
 544 -               if (Affix->type == 'p')
 545 -               {
 546 -                       Let = (int) (*(Affix->repl)) & 255;
 547 -                       if (CurLetP != Let)
 548 -                       {
 549 -                               Conf->PrefixTree.Left[Let] = i;
 550 -                               CurLetP = Let;
 551 +               if ( Affix->type == 's' ) {
 552 +                       if ( firstsuffix<0 ) firstsuffix=i;
 553 +                       if ( Affix->flagflags & FF_COMPOUNDONLYAFX ) {
 554 +                               if ( !ptr->affix || strbncmp((ptr-1)->affix, Affix->repl, (ptr-1)->len) ) {
 555 +                                       /* leave only unique and minimals suffixes */
 556 +                                       ptr->affix=Affix->repl;
 557 +                                       ptr->len=Affix->replen;
 558 +                                       ptr++;
 559 +                               }
 560                         }
 561 -                       Conf->PrefixTree.Right[Let] = i;
 562                 }
 563 -               else
 564 -               {
 565 -                       Let = (Affix->replen) ? (int) (Affix->repl[Affix->replen - 1]) & 255 : 0;
 566 -                       if (CurLetS != Let)
 567 -                       {
 568 -                               Conf->SuffixTree.Left[Let] = i;
 569 -                               CurLetS = Let;
 570 +       }
 571 +       ptr->affix = NULL;
 572 +       Conf->CompoundAffix = (CMPDAffix*)realloc( Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr-Conf->CompoundAffix+1) );
 573 +
 574 +       Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, 'p');
 575 +       Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, 's');
 576 +}
 577 +
 578 +static AffixNodeData*
 579 +FinfAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) {
 580 +       AffixNodeData *StopLow, *StopHigh, *StopMiddle;
 581 +       uint8 symbol;
 582 +
 583 +       while( node && *level<wrdlen) {
 584 +               StopLow = node->data;
 585 +               StopHigh = node->data+node->length;
 586 +               while (StopLow < StopHigh) {
 587 +                       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
 588 +                       symbol = GETWCHAR(word,wrdlen,*level,type);
 589 +                       if ( StopMiddle->val == symbol ) {
 590 +                               if ( StopMiddle->naff )
 591 +                                       return StopMiddle;
 592 +                               node=StopMiddle->node;
 593 +                               (*level)++;
 594 +                               break;
 595 +                       } else if ( StopMiddle->val < symbol ) {
 596 +                               StopLow = StopMiddle + 1;
 597 +                       } else {
 598 +                               StopHigh = StopMiddle;
 599                         }
 600 -                       Conf->SuffixTree.Right[Let] = i;
 601                 }
 602 +               if ( StopLow >= StopHigh )
 603 +                       break;
 604         }
 605 +       return NULL;
 606  }
 607
 608  static char *
 609 -CheckSuffix(const char *word, size_t len, AFFIX * Affix, int *res, IspellDict * Conf)
 610 -{
 611 +CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword) {
 612         regmatch_t      subs[2];                /* workaround for apache&linux */
 613 -       char            newword[2 * MAXNORMLEN] = "";
 614         int                     err;
 615
 616 -       *res = strbncmp(word, Affix->repl, Affix->replen);
 617 -       if (*res < 0)
 618 -               return NULL;
 619 -       if (*res > 0)
 620 -               return NULL;
 621 -       strcpy(newword, word);
 622 -       strcpy(newword + len - Affix->replen, Affix->find);
 623 +       if ( flagflags & FF_COMPOUNDONLYAFX ) {
 624 +               if ( (Affix->flagflags & FF_COMPOUNDONLYAFX) == 0 )
 625 +                       return NULL;
 626 +       } else {
 627 +               if ( Affix->flagflags & FF_COMPOUNDONLYAFX )
 628 +                       return NULL;
 629 +       }
 630 +
 631 +       if ( Affix->type=='s' ) {
 632 +               strcpy(newword, word);
 633 +               strcpy(newword + len - Affix->replen, Affix->find);
 634 +       } else {
 635 +               strcpy(newword, Affix->find);
 636 +               strcat(newword, word + Affix->replen);
 637 +       }
 638
 639         if (Affix->compile)
 640         {
 641 @@ -452,205 +646,364 @@
 642                 }
 643                 Affix->compile = 0;
 644         }
 645 -       if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))
 646 -       {
 647 -               if (FindWord(Conf, newword, Affix->flag))
 648 -                       return pstrdup(newword);
 649 -       }
 650 +       if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))
 651 +                       return newword;
 652         return NULL;
 653  }
 654
 655 -#define NS 1
 656 -#define MAX_NORM 512
 657 -static int
 658 -CheckPrefix(const char *word, size_t len, AFFIX * Affix, IspellDict * Conf, int pi,
 659 -                       char **forms, char ***cur)
 660 -{
 661 -       regmatch_t      subs[NS * 2];
 662 +
 663 +static char      **
 664 +NormalizeSubWord(IspellDict * Conf, char *word, char flag) {
 665 +       AffixNodeData   *suffix=NULL, *prefix=NULL;
 666 +       int     slevel=0, plevel=0;
 667 +       int wrdlen = strlen(word), swrdlen;
 668 +       char      **forms;
 669 +       char      **cur;
 670         char            newword[2 * MAXNORMLEN] = "";
 671 -       int                     err,
 672 -                               ls,
 673 -                               res,
 674 -                               lres;
 675 -       size_t          newlen;
 676 -       AFFIX      *CAffix = Conf->Affix;
 677 -
 678 -       res = strncmp(word, Affix->repl, Affix->replen);
 679 -       if (res != 0)
 680 -               return res;
 681 -       strcpy(newword, Affix->find);
 682 -       strcat(newword, word + Affix->replen);
 683 +       char            pnewword[2 * MAXNORMLEN] = "";
 684 +       AffixNode *snode = Conf->Suffix, *pnode;
 685 +       int i,j;
 686 +
 687 +       if (wrdlen > MAXNORMLEN) return NULL;
 688 +       strlower(word);
 689 +       cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
 690 +       *cur = NULL;
 691
 692 -       if (Affix->compile)
 693 -       {
 694 -               err = regcomp(&(Affix->reg), Affix->mask, REG_EXTENDED | REG_ICASE | REG_NOSUB);
 695 -               if (err)
 696 -               {
 697 -                       /* regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE); */
 698 -                       regfree(&(Affix->reg));
 699 -                       return (0);
 700 -               }
 701 -               Affix->compile = 0;
 702 +
 703 +       /* Check that the word itself is normal form */
 704 +       if (FindWord(Conf, word, 0, flag & FF_COMPOUNDWORD)) {
 705 +               *cur = pstrdup(word);
 706 +               cur++;
 707 +               *cur = NULL;
 708         }
 709 -       if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))
 710 -       {
 711 -               SPELL      *curspell;
 712
 713 -               if ((curspell = FindWord(Conf, newword, Affix->flag)))
 714 -               {
 715 -                       if ((*cur - forms) < (MAX_NORM - 1))
 716 -                       {
 717 -                               **cur = pstrdup(newword);
 718 -                               (*cur)++;
 719 -                               **cur = NULL;
 720 +       /* Find all other NORMAL forms of the 'word' (check only prefix)*/
 721 +       pnode=Conf->Prefix;
 722 +       plevel=0;
 723 +       while(pnode) {
 724 +               prefix=FinfAffixes(pnode, word, wrdlen, &plevel,'p');
 725 +               if (!prefix) break;
 726 +               for(j=0;j<prefix->naff;j++) {
 727 +                       if ( CheckAffix(word,wrdlen,prefix->aff[j], flag, newword) ) {
 728 +                               /* prefix success */
 729 +                               if ( FindWord(Conf, newword, prefix->aff[j]->flag, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
 730 +                                       /* word search success */
 731 +                                       *cur = pstrdup(newword);
 732 +                                       cur++;
 733 +                                       *cur=NULL;
 734 +                               }
 735                         }
 736                 }
 737 -               newlen = strlen(newword);
 738 -               ls = Conf->SuffixTree.Left[pi];
 739 -               if (ls >= 0 && ((*cur - forms) < (MAX_NORM - 1)))
 740 -               {
 741 -                       **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
 742 -                       if (**cur)
 743 -                       {
 744 -                               (*cur)++;
 745 -                               **cur = NULL;
 746 +               pnode = prefix->node;
 747 +               plevel++;
 748 +       }
 749 +
 750 +       /* Find all other NORMAL forms of the 'word' (check suffix and then prefix)*/
 751 +       while( snode ) {
 752 +               /* find possible suffix */
 753 +               suffix = FinfAffixes(snode, word, wrdlen, &slevel, 's');
 754 +               if (!suffix) break;
 755 +               /* foreach suffix check affix */
 756 +               for(i=0;i<suffix->naff;i++) {
 757 +                       if ( CheckAffix(word, wrdlen, suffix->aff[i], flag, newword) ) {
 758 +                               /* suffix success */
 759 +                               if ( FindWord(Conf, newword, suffix->aff[i]->flag, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
 760 +                                       /* word search success */
 761 +                                       *cur = pstrdup(newword);
 762 +                                       cur++;
 763 +                                       *cur=NULL;
 764 +                               }
 765 +                               /* now we will look changed word with prefixes */
 766 +                               pnode=Conf->Prefix;
 767 +                               plevel=0;
 768 +                               swrdlen=strlen(newword);
 769 +                               while(pnode) {
 770 +                                       prefix=FinfAffixes(pnode, newword, swrdlen, &plevel,'p');
 771 +                                       if (!prefix) break;
 772 +                                       for(j=0;j<prefix->naff;j++) {
 773 +                                               if ( CheckAffix(newword,swrdlen,prefix->aff[j], flag, pnewword) ) {
 774 +                                                       /* prefix success */
 775 +                                                       int ff=( prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT ) ?
 776 +                                                                0 : prefix->aff[j]->flag;
 777 +                                                       if ( FindWord(Conf, pnewword, ff, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
 778 +                                                               /* word search success */
 779 +                                                               *cur = pstrdup(pnewword);
 780 +                                                               cur++;
 781 +                                                               *cur=NULL;
 782 +                                                       }
 783 +                                               }
 784 +                                       }
 785 +                                       pnode = prefix->node;
 786 +                                       plevel++;
 787 +                               }
 788                         }
 789                 }
 790 -       }
 791 -       return 0;
 792 -}
 793
 794 +               snode=suffix->node;
 795 +               slevel++;
 796 +       }
 797
 798 -char     **
 799 -NormalizeWord(IspellDict * Conf, char *word)
 800 -{
 801 -/*regmatch_t subs[NS];*/
 802 -       size_t          len;
 803 -       char      **forms;
 804 -       char      **cur;
 805 -       AFFIX      *Affix;
 806 -       int                     ri,
 807 -                               pi,
 808 -                               ipi,
 809 -                               lp,
 810 -                               rp,
 811 -                               cp,
 812 -                               ls,
 813 -                               rs;
 814 -       int                     lres,
 815 -                               rres,
 816 -                               cres = 0;
 817 -       SPELL      *spell;
 818 -
 819 -       len = strlen(word);
 820 -       if (len > MAXNORMLEN)
 821 +       if (cur == forms) {
 822 +               pfree(forms);
 823                 return (NULL);
 824 +       }
 825 +       return (forms);
 826 +}
 827
 828 -       strlower(word);
 829 +typedef struct SplitVar {
 830 +       int     nstem;
 831 +       char    **stem;
 832 +       struct  SplitVar *next;
 833 +} SplitVar;
 834 +
 835 +static int
 836 +CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len) {
 837 +       while( (*ptr)->affix ) {
 838 +               if ( len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len)==0 ) {
 839 +                       len = (*ptr)->len;
 840 +                       (*ptr)++;
 841 +                       return len;
 842 +               }
 843 +               (*ptr)++;
 844 +       }
 845 +       return 0;
 846 +}
 847
 848 -       forms = (char **) palloc(MAX_NORM * sizeof(char **));
 849 -       cur = forms;
 850 -       *cur = NULL;
 851 +static SplitVar*
 852 +CopyVar(SplitVar *s, int makedup) {
 853 +       SplitVar *v = (SplitVar*)palloc(sizeof(SplitVar));
 854 +
 855 +       v->stem=(char**)palloc( sizeof(char*) * (MAX_NORM) );
 856 +       v->next=NULL;
 857 +       if ( s ) {
 858 +               int i;
 859 +               v->nstem = s->nstem;
 860 +               for(i=0;i<s->nstem;i++)
 861 +                       v->stem[i] = (makedup) ? pstrdup( s->stem[i] ) : s->stem[i];
 862 +       } else {
 863 +               v->nstem=0;
 864 +       }
 865 +       return v;
 866 +}
 867
 868 -       ri = (int) (*word) & 255;
 869 -       pi = (int) (word[strlen(word) - 1]) & 255;
 870 -       Affix = (AFFIX *) Conf->Affix;
 871
 872 -       /* Check that the word itself is normal form */
 873 -       if ((spell = FindWord(Conf, word, 0)))
 874 -       {
 875 -               *cur = pstrdup(word);
 876 -               cur++;
 877 -               *cur = NULL;
 878 -       }
 879 +static SplitVar*
 880 +SplitToVariants( IspellDict * Conf, SPNode *snode, SplitVar * orig, char *word, int wordlen, int startpos, int minpos ) {
 881 +       SplitVar *var=NULL;
 882 +       SPNodeData *StopLow, *StopHigh, *StopMiddle;
 883 +       SPNode *node = (snode) ? snode : Conf->Dictionary;
 884 +       int level=(snode) ? minpos : startpos; /* recursive minpos==level*/
 885 +       int lenaff;
 886 +       CMPDAffix *caff;
 887 +       char    notprobed[wordlen];
 888 +
 889 +       memset(notprobed,1,wordlen);
 890 +       var = CopyVar(orig,1);
 891 +
 892 +       while( node && level<wordlen) {
 893 +               StopLow = node->data;
 894 +               StopHigh = node->data+node->length;
 895 +               while (StopLow < StopHigh) {
 896 +                       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
 897 +                       if ( StopMiddle->val == ((uint8*)(word))[level] ) {
 898 +                               break;
 899 +                       } else if ( StopMiddle->val < ((uint8*)(word))[level] ) {
 900 +                               StopLow = StopMiddle + 1;
 901 +                       } else {
 902 +                               StopHigh = StopMiddle;
 903 +                       }
 904 +               }
 905 +               if ( StopLow >= StopHigh )
 906 +                       break;
 907
 908 -       /* Find all other NORMAL forms of the 'word' */
 909 +               /* find word with epenthetic */
 910 +               caff = Conf->CompoundAffix;
 911 +               while ( level>startpos && (lenaff=CheckCompoundAffixes( &caff, word + level, wordlen - level ))>0 ) {
 912 +                       /* there is one of compound suffixes, so check word for existings */
 913 +                       char buf[MAXNORMLEN];
 914 +                       char **subres;
 915 +
 916 +                       lenaff=level-startpos+lenaff;
 917 +
 918 +                       if ( !notprobed[startpos+lenaff-1] )
 919 +                               continue;
 920 +
 921 +                       if ( level+lenaff-1 <= minpos )
 922 +                               continue;
 923
 924 -       for (ipi = 0; ipi <= pi; ipi += pi)
 925 -       {
 926 +                       memcpy(buf, word+startpos, lenaff);
 927 +                       buf[lenaff]='\0';
 928
 929 -               /* check prefix */
 930 -               lp = Conf->PrefixTree.Left[ri];
 931 -               rp = Conf->PrefixTree.Right[ri];
 932 -               while (lp >= 0 && lp <= rp)
 933 -               {
 934 -                       cp = (lp + rp) >> 1;
 935 -                       cres = 0;
 936 -                       if ((cur - forms) < (MAX_NORM - 1))
 937 -                               cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
 938 -                       if ((lp < cp) && ((cur - forms) < (MAX_NORM - 1)))
 939 -                               lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
 940 -                       if ((rp > cp) && ((cur - forms) < (MAX_NORM - 1)))
 941 -                               rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
 942 -                       if (cres < 0)
 943 -                       {
 944 -                               rp = cp - 1;
 945 -                               lp++;
 946 -                       }
 947 -                       else if (cres > 0)
 948 -                       {
 949 -                               lp = cp + 1;
 950 -                               rp--;
 951 -                       }
 952 -                       else
 953 -                       {
 954 -                               lp++;
 955 -                               rp--;
 956 +                       subres = NormalizeSubWord(Conf, buf, FF_COMPOUNDWORD | FF_COMPOUNDONLYAFX);
 957 +                       if ( subres ) {
 958 +                               /* Yes, it was a word from dictionary */
 959 +                               SplitVar *new=CopyVar(var,0);
 960 +                               SplitVar *ptr=var;
 961 +                               char **sptr=subres;
 962 +
 963 +                               notprobed[startpos+lenaff-1]=0;
 964 +
 965 +                               while(*sptr) {
 966 +                                       new->stem[ new->nstem ] = *sptr;
 967 +                                       new->nstem++;
 968 +                                       sptr++;
 969 +                               }
 970 +                               pfree(subres);
 971 +
 972 +                               while( ptr->next )
 973 +                                       ptr = ptr->next;
 974 +                               ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos+lenaff, startpos+lenaff);
 975 +
 976 +                               pfree(new->stem);
 977 +                               pfree(new);
 978                         }
 979                 }
 980
 981 -               /* check suffix */
 982 -               ls = Conf->SuffixTree.Left[ipi];
 983 -               rs = Conf->SuffixTree.Right[ipi];
 984 -               while (ls >= 0 && ls <= rs)
 985 -               {
 986 -                       if (((cur - forms) < (MAX_NORM - 1)))
 987 -                       {
 988 -                               *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
 989 -                               if (*cur)
 990 -                               {
 991 -                                       cur++;
 992 -                                       *cur = NULL;
 993 +               /* find infinitive */
 994 +               if ( StopMiddle->isword && StopMiddle->compoundallow && notprobed[level] ) {
 995 +                       /* ok, we found full compoundallowed word*/
 996 +                       if ( level>minpos ) {
 997 +                               /* and its length more than minimal */
 998 +                               if ( wordlen==level+1 ) {
 999 +                                       /* well, it was last word */
1000 +                                       var->stem[ var->nstem ] = strnduplicate(word + startpos, wordlen - startpos);
1001 +                                       var->nstem++;
1002 +                                       return var;
1003 +                               } else {
1004 +                                       /* then we will search more big word at the same point */
1005 +                                       SplitVar *ptr=var;
1006 +                                       while( ptr->next )
1007 +                                               ptr = ptr->next;
1008 +                                       ptr->next=SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
1009 +                                       /* we can find next word */
1010 +                                       level++;
1011 +                                       var->stem[ var->nstem ] = strnduplicate(word + startpos, level - startpos);
1012 +                                       var->nstem++;
1013 +                                       node = Conf->Dictionary;
1014 +                                       startpos=level;
1015 +                                       continue;
1016                                 }
1017                         }
1018 -                       if ((rs > ls) && ((cur - forms) < (MAX_NORM - 1)))
1019 -                       {
1020 -                               *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
1021 -                               if (*cur)
1022 -                               {
1023 -                                       cur++;
1024 -                                       *cur = NULL;
1025 +               }
1026 +               level++;
1027 +               node=StopMiddle->node;
1028 +       }
1029 +
1030 +       var->stem[ var->nstem ] = strnduplicate(word + startpos, wordlen - startpos);
1031 +       var->nstem++;
1032 +       return var;
1033 +}
1034 +
1035 +char  **
1036 +NINormalizeWord(IspellDict * Conf, char *word) {
1037 +       char **res= NormalizeSubWord(Conf, word, 0);
1038 +
1039 +       if ( Conf->compoundcontrol != '\t' ) {
1040 +               int wordlen=strlen(word);
1041 +               SplitVar *ptr, *var = SplitToVariants(Conf,NULL,NULL, word, wordlen, 0, -1);
1042 +               char **cur=res;
1043 +               int i;
1044 +
1045 +               while(var) {
1046 +                       if ( var->nstem > 1 ) {
1047 +                               char **subres = NormalizeSubWord(Conf, var->stem[ var->nstem-1 ], FF_COMPOUNDWORD);
1048 +                               if ( subres ) {
1049 +                                       char **ptr=subres;
1050 +
1051 +                                       if ( cur ) {
1052 +                                               while(*cur)
1053 +                                                       cur++;
1054 +                                       } else {
1055 +                                               res=cur=(char **) palloc(MAX_NORM * sizeof(char *));
1056 +                                       }
1057 +
1058 +                                       for(i=0;i<var->nstem-1;i++) {
1059 +                                               *cur=var->stem[ i ];
1060 +                                               cur++;
1061 +                                       }
1062 +                                       while(*ptr) {
1063 +                                               *cur=*ptr;
1064 +                                               cur++; ptr++;
1065 +                                       }
1066 +                                       *cur=NULL;
1067 +                                       pfree(subres);
1068 +                                       var->stem[ 0 ] = NULL;
1069                                 }
1070                         }
1071 -                       ls++;
1072 -                       rs--;
1073 -               }                                               /* end while */
1074 +
1075 +                       for(i=0;i<var->nstem && var->stem[ i ];i++)
1076 +                               pfree( var->stem[i] );
1077 +                       ptr = var->next;
1078 +                       pfree(var->stem);
1079 +                       pfree(var);
1080 +                       var=ptr;
1081 +               }
1082 +       }
1083 +       return res;
1084 +}
1085
1086 -       }                                                       /* for ipi */
1087
1088 -       if (cur == forms)
1089 -       {
1090 -               pfree(forms);
1091 -               return (NULL);
1092 +static void freeSPNode(SPNode *node) {
1093 +       SPNodeData *data;
1094 +
1095 +       if (!node) return;
1096 +       data=node->data;
1097 +       while( node->length ) {
1098 +               freeSPNode(data->node);
1099 +               data++;
1100 +               node->length--;
1101         }
1102 -       return (forms);
1103 +       free(node);
1104  }
1105 +
1106 +static void freeANode(AffixNode *node) {
1107 +       AffixNodeData *data;
1108 +
1109 +       if (!node) return;
1110 +       data=node->data;
1111 +       while( node->length ) {
1112 +               freeANode(data->node);
1113 +               if (data->naff)
1114 +                       free(data->aff);
1115 +               data++;
1116 +               node->length--;
1117 +       }
1118 +       free(node);
1119 +}
1120 +
1121
1122  void
1123 -FreeIspell(IspellDict * Conf)
1124 +NIFree(IspellDict * Conf)
1125  {
1126         int                     i;
1127         AFFIX      *Affix = (AFFIX *) Conf->Affix;
1128 +       char**     aff = Conf->AffixData;
1129 +
1130 +       if ( aff ) {
1131 +               while(*aff) {
1132 +                       free(*aff);
1133 +                       aff++;
1134 +               }
1135 +               free(Conf->AffixData);
1136 +       }
1137
1138 +
1139         for (i = 0; i < Conf->naffixes; i++)
1140         {
1141                 if (Affix[i].compile == 0)
1142                         regfree(&(Affix[i].reg));
1143         }
1144 -       for (i = 0; i < Conf->naffixes; i++)
1145 -               free(Conf->Spell[i].word);
1146 -       free(Conf->Affix);
1147 -       free(Conf->Spell);
1148 +       if (Conf->Spell) {
1149 +               for (i = 0; i < Conf->nspell; i++)
1150 +                       free(Conf->Spell[i].word);
1151 +               free(Conf->Spell);
1152 +       }
1153 +
1154 +       if (Conf->Affix) free(Conf->Affix);
1155 +       if ( Conf->CompoundAffix ) free(Conf->CompoundAffix);
1156 +       freeSPNode(Conf->Dictionary);
1157 +       freeANode(Conf->Suffix);
1158 +       freeANode(Conf->Prefix);
1159         memset((void *) Conf, 0, sizeof(IspellDict));
1160         return;
1161  }
1162 diff -uNr postgresql-7.4/contrib/tsearch2/ispell/spell.h postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.h
1163 --- postgresql-7.4/contrib/tsearch2/ispell/spell.h      2003-08-04 02:43:11.000000000 +0200
1164 +++ postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.h        2003-12-18 17:46:03.000000000 +0100
1165 @@ -3,16 +3,44 @@
1166
1167  #include <sys/types.h>
1168  #include <regex.h>
1169 +#include "c.h"
1170 +
1171 +struct SPNode;
1172 +
1173 +
1174 +typedef struct {
1175 +       uint32
1176 +               val:8,
1177 +               isword:1,
1178 +               compoundallow:1,
1179 +               affix:22;
1180 +       struct SPNode *node;
1181 +} SPNodeData;
1182 +
1183 +typedef struct SPNode {
1184 +       uint32  length;
1185 +       SPNodeData      data[1];
1186 +} SPNode;
1187 +
1188 +#define SPNHRDSZ       (sizeof(uint32))
1189 +
1190
1191  typedef struct spell_struct
1192  {
1193         char       *word;
1194 -       char            flag[10];
1195 +       union {
1196 +               char            flag[16];
1197 +               struct {
1198 +                       int             affix;
1199 +                       int             len;
1200 +               } d;
1201 +       } p;
1202  }      SPELL;
1203
1204  typedef struct aff_struct
1205  {
1206         char            flag;
1207 +       char            flagflags;
1208         char            type;
1209         char            mask[33];
1210         char            find[16];
1211 @@ -22,35 +50,66 @@
1212         char            compile;
1213  }      AFFIX;
1214
1215 +#define FF_CROSSPRODUCT        0x01
1216 +#define FF_COMPOUNDWORD        0x02
1217 +#define FF_COMPOUNDONLYAFX      0x04
1218 +
1219 +struct AffixNode;
1220 +
1221 +typedef struct {
1222 +       uint32
1223 +               val:8,
1224 +               naff:24;
1225 +       AFFIX   **aff;
1226 +       struct AffixNode *node;
1227 +} AffixNodeData;
1228 +
1229 +typedef struct AffixNode {
1230 +       uint32 length;
1231 +       AffixNodeData   data[1];
1232 +} AffixNode;
1233 +
1234 +#define ANHRDSZ        (sizeof(uint32))
1235 +
1236  typedef struct Tree_struct
1237  {
1238         int                     Left[256],
1239                                 Right[256];
1240  }      Tree_struct;
1241
1242 +typedef struct {
1243 +       char *affix;
1244 +       int len;
1245 +} CMPDAffix;
1246 +
1247  typedef struct
1248  {
1249         int                     maffixes;
1250         int                     naffixes;
1251         AFFIX      *Affix;
1252 +       char                    compoundcontrol;
1253
1254         int                     nspell;
1255         int                     mspell;
1256         SPELL      *Spell;
1257 -       Tree_struct SpellTree;
1258 -       Tree_struct PrefixTree;
1259 -       Tree_struct SuffixTree;
1260 +
1261 +       AffixNode       *Suffix;
1262 +       AffixNode       *Prefix;
1263 +
1264 +       SPNode  *Dictionary;
1265 +       char    **AffixData;
1266 +       CMPDAffix    *CompoundAffix;
1267
1268  }      IspellDict;
1269
1270 -char     **NormalizeWord(IspellDict * Conf, char *word);
1271 -int                    ImportAffixes(IspellDict * Conf, const char *filename);
1272 -int                    ImportDictionary(IspellDict * Conf, const char *filename);
1273 -
1274 -int                    AddSpell(IspellDict * Conf, const char *word, const char *flag);
1275 -int                    AddAffix(IspellDict * Conf, int flag, const char *mask, const char *find, const char *repl, int type);
1276 -void           SortDictionary(IspellDict * Conf);
1277 -void           SortAffixes(IspellDict * Conf);
1278 -void           FreeIspell(IspellDict * Conf);
1279 +char     **NINormalizeWord(IspellDict * Conf, char *word);
1280 +int                    NIImportAffixes(IspellDict * Conf, const char *filename);
1281 +int                    NIImportDictionary(IspellDict * Conf, const char *filename);
1282 +
1283 +int                    NIAddSpell(IspellDict * Conf, const char *word, const char *flag);
1284 +int                    NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type);
1285 +void           NISortDictionary(IspellDict * Conf);
1286 +void           NISortAffixes(IspellDict * Conf);
1287 +void           NIFree(IspellDict * Conf);
1288
1289  #endif
1290 diff -uNr postgresql-7.4/contrib/tsearch2/my2ispell/Makefile postgresql-7.4.fixed/contrib/tsearch2/my2ispell/Makefile
1291 --- postgresql-7.4/contrib/tsearch2/my2ispell/Makefile  1970-01-01 01:00:00.000000000 +0100
1292 +++ postgresql-7.4.fixed/contrib/tsearch2/my2ispell/Makefile    2003-12-18 17:46:03.000000000 +0100
1293 @@ -0,0 +1,47 @@
1294 +ZIPFILE=nb_NO
1295 +LANGUAGE=norsk
1296 +
1297 +
1298 +UNZIP=unzip -o
1299 +
1300 +
1301 +all: $(LANGUAGE).dict $(LANGUAGE).aff
1302 +
1303 +$(ZIPFILE).aff: $(ZIPFILE).zip
1304 +       $(UNZIP) $? $@
1305 +       touch $@
1306 +
1307 +
1308 +# 1 Cleanup dictionary
1309 +# 2 remove " symbol
1310 +# 3 add compoundwords controlled flag to word which hasn't it, but
1311 +#   has compound only suffixes
1312 +
1313 +$(LANGUAGE).dict: $(ZIPFILE).zip
1314 +       $(UNZIP) $? $(ZIPFILE).dic
1315 +       grep -v -E '^[[:digit:]]+$$' < $(ZIPFILE).dic \
1316 +        | grep -v '\.' \
1317 +        | sed -e 's/"//g' \
1318 +        | perl -pi -e 's|/(\S+)| $$q=$$1; ( $$q=~/[\\_`]/ && $$q!~/z/ ) ? "/$${q}z" : "/$${q}"|e' \
1319 +        | sort \
1320 +       > $@
1321 +
1322 +#just convert affix file
1323 +
1324 +$(LANGUAGE).aff: $(ZIPFILE).aff
1325 +       grep -v -i zyzyzy $(ZIPFILE).aff \
1326 +        | grep -v -i zyzyzy \
1327 +        | perl -pi \
1328 +               -e 's/^COMPOUNDFLAG\s+(\S+)/compoundwords controlled $$1/;' \
1329 +               -e 's/^COMPOUNDMIN\s+(\d+)/compoundmin $$1/;' \
1330 +               -e 's/^PFX\s+(\S+)\s+Y\s+\d+.*$$/ if ( !$$wasprf ) { $$wasprf=1; "prefixes\n\nflag $$1:" } else { "flag $$1:" } /e;' \
1331 +               -e 's/^PFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc("   $$3    > $$2")/e;' \
1332 +               -e 's/^(.*)SFX\s+(\S+)\s+([YN])\s+\d+.*$$/ $$flg=($$3 eq "Y") ? "*" : ""; $$flg="~$$flg" if length $$1; $$q=$$2; $$q="\\$$q" if $$q!~m#[a-zA-Z]#; if ( !$$wassfx ) { $$wassfx=1; "suffixes\n\nflag $$flg$$q:" } else { "flag $$flg$$q:" } /e;' \
1333 +               -e 's/^.*SFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc("   $$3    > ".( ($$1 eq "0") ? "" : "-$$1,").( ($$2 eq "0") ? "" : "$$2") )/e;' \
1334 +               -e 's/^(SET|TRY)/#$$1/' \
1335 +       > $@
1336 +
1337 +clean:
1338 +       rm -rf $(ZIPFILE).aff $(ZIPFILE).dic $(LANGUAGE).dict $(LANGUAGE).aff
1339 +
1340 +
1341 diff -uNr postgresql-7.4/contrib/tsearch2/my2ispell/README postgresql-7.4.fixed/contrib/tsearch2/my2ispell/README
1342 --- postgresql-7.4/contrib/tsearch2/my2ispell/README    1970-01-01 01:00:00.000000000 +0100
1343 +++ postgresql-7.4.fixed/contrib/tsearch2/my2ispell/README      2003-12-18 17:46:03.000000000 +0100
1344 @@ -0,0 +1,12 @@
1345 +Utility for convert MySpell dictionary and affix from
1346 +myspell to ispell format.
1347 +Utility tested on nb_NO.zip and nn_NO.zip from
1348 +OpenOffice (http://lingucomponent.openoffice.org/download_dictionary.html)
1349 +
1350 +usage:
1351 +For example, make norwegian dictionary and affix:
1352 +% cp nb_NO.zip my2ispell
1353 +% cd my2ispell
1354 +% gmake ZIPFILE=nb_NO LANGUAGE=norsk
1355 +
1356 +Author: Teodor Sigaev <teodor@sigaev.ru>