]> git.pld-linux.org Git - packages/postgresql.git/blob - postgresql-tsearch2-compound_word_support_20031210.patch
- oops, jdbc is separate project, now
[packages/postgresql.git] / postgresql-tsearch2-compound_word_support_20031210.patch
1 diff -uNr postgresql-7.4/contrib/tsearch2/dict_ispell.c postgresql-7.4.fixed/contrib/tsearch2/dict_ispell.c
2 --- postgresql-7.4/contrib/tsearch2/dict_ispell.c       2003-08-04 02:43:11.000000000 +0200
3 +++ postgresql-7.4.fixed/contrib/tsearch2/dict_ispell.c 2003-12-18 17:46:03.000000000 +0100
4 @@ -27,7 +27,7 @@
5  static void
6  freeDictISpell(DictISpell * d)
7  {
8 -       FreeIspell(&(d->obj));
9 +       NIFree(&(d->obj));
10         freestoplist(&(d->stoplist));
11         free(d);
12  }
13 @@ -71,7 +71,7 @@
14                                           (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
15                                            errmsg("dictionary already loaded")));
16                         }
17 -                       if (ImportDictionary(&(d->obj), pcfg->value))
18 +                       if (NIImportDictionary(&(d->obj), pcfg->value))
19                         {
20                                 freeDictISpell(d);
21                                 ereport(ERROR,
22 @@ -90,7 +90,7 @@
23                                           (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
24                                            errmsg("affixes already loaded")));
25                         }
26 -                       if (ImportAffixes(&(d->obj), pcfg->value))
27 +                       if (NIImportAffixes(&(d->obj), pcfg->value))
28                         {
29                                 freeDictISpell(d);
30                                 ereport(ERROR,
31 @@ -132,8 +132,8 @@
32  
33         if (affloaded && dictloaded)
34         {
35 -               SortDictionary(&(d->obj));
36 -               SortAffixes(&(d->obj));
37 +               NISortDictionary(&(d->obj));
38 +               NISortAffixes(&(d->obj));
39         }
40         else if (!affloaded)
41         {
42 @@ -168,7 +168,7 @@
43  
44         res = palloc(sizeof(char *) * 2);
45         txt = pnstrdup(in, PG_GETARG_INT32(2));
46 -       res = NormalizeWord(&(d->obj), txt);
47 +       res = NINormalizeWord(&(d->obj), txt);
48         pfree(txt);
49  
50         if (res == NULL)
51 diff -uNr postgresql-7.4/contrib/tsearch2/ispell/spell.c postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.c
52 --- postgresql-7.4/contrib/tsearch2/ispell/spell.c      2003-08-04 02:43:11.000000000 +0200
53 +++ postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.c        2003-12-18 17:46:03.000000000 +0100
54 @@ -7,15 +7,26 @@
55  
56  #include "spell.h"
57  
58 -#define MAXNORMLEN 56
59 +#define MAX_NORM 1024
60 +#define MAXNORMLEN 256
61  
62  #define STRNCASECMP(x,y)               (strncasecmp(x,y,strlen(y)))
63 +#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)=='p') ? (N) : ( (L) - 1 - (N) ) ] )
64 +#define GETCHAR(A,N,T)   GETWCHAR( (A)->repl, (A)->replen, N, T )
65 +
66 +
67 +#define MEMOUT(X)  if ( !(X) ) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")))
68  
69  static int
70  cmpspell(const void *s1, const void *s2)
71  {
72         return (strcmp(((const SPELL *) s1)->word, ((const SPELL *) s2)->word));
73  }
74 +static int
75 +cmpspellaffix(const void *s1, const void *s2)
76 +{
77 +       return (strcmp(((const SPELL *) s1)->p.flag, ((const SPELL *) s2)->p.flag));
78 +}
79  
80  static void
81  strlower(char *str)
82 @@ -29,6 +40,13 @@
83         }
84  }
85  
86 +static char* 
87 +strnduplicate(char *s, int len) {
88 +       char *d=(char*)palloc( len + 1 );
89 +       memcpy(d, s, len );
90 +       d[len]='\0';
91 +       return d;
92 +}
93  /* backward string compaire for suffix tree operations */
94  static int
95  strbcmp(const char *s1, const char *s2)
96 @@ -92,7 +110,7 @@
97  }
98  
99  int
100 -AddSpell(IspellDict * Conf, const char *word, const char *flag)
101 +NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
102  {
103         if (Conf->nspell >= Conf->mspell)
104         {
105 @@ -106,24 +124,18 @@
106                         Conf->mspell = 1024 * 20;
107                         Conf->Spell = (SPELL *) malloc(Conf->mspell * sizeof(SPELL));
108                 }
109 -               if (Conf->Spell == NULL)
110 -                       ereport(ERROR,
111 -                                       (errcode(ERRCODE_OUT_OF_MEMORY),
112 -                                        errmsg("out of memory")));
113 +               MEMOUT(Conf->Spell);
114         }
115         Conf->Spell[Conf->nspell].word = strdup(word);
116 -       if (!Conf->Spell[Conf->nspell].word)
117 -               ereport(ERROR,
118 -                               (errcode(ERRCODE_OUT_OF_MEMORY),
119 -                                errmsg("out of memory")));
120 -       strncpy(Conf->Spell[Conf->nspell].flag, flag, 10);
121 +       MEMOUT(Conf->Spell[Conf->nspell].word);
122 +       strncpy(Conf->Spell[Conf->nspell].p.flag, flag, 16);
123         Conf->nspell++;
124         return (0);
125  }
126  
127  
128  int
129 -ImportDictionary(IspellDict * Conf, const char *filename)
130 +NIImportDictionary(IspellDict * Conf, const char *filename)
131  {
132         unsigned char str[BUFSIZ];
133         FILE       *dict;
134 @@ -143,7 +155,7 @@
135                         flag = s;
136                         while (*s)
137                         {
138 -                               if (((*s >= 'A') && (*s <= 'Z')) || ((*s >= 'a') && (*s <= 'z')))
139 +                               if (isprint(*s) && !isspace(*s))
140                                         s++;
141                                 else
142                                 {
143 @@ -166,65 +178,49 @@
144                                 *s = 0;
145                         s++;
146                 }
147 -               AddSpell(Conf, str, flag);
148 +               NIAddSpell(Conf, str, flag);
149         }
150         fclose(dict);
151         return (0);
152  }
153  
154  
155 -static SPELL *
156 -FindWord(IspellDict * Conf, const char *word, int affixflag)
157 +static int
158 +FindWord(IspellDict * Conf, const char *word, int affixflag, char compoundonly)
159  {
160 -       int                     l,
161 -                               c,
162 -                               r,
163 -                               resc,
164 -                               resl,
165 -                               resr,
166 -                               i;
167 -
168 -       i = (int) (*word) & 255;
169 -       l = Conf->SpellTree.Left[i];
170 -       r = Conf->SpellTree.Right[i];
171 -       if (l == -1)
172 -               return (NULL);
173 -       while (l <= r)
174 -       {
175 -               c = (l + r) >> 1;
176 -               resc = strcmp(Conf->Spell[c].word, word);
177 -               if ((resc == 0) &&
178 -                       ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)))
179 -                       return (&Conf->Spell[c]);
180 -               resl = strcmp(Conf->Spell[l].word, word);
181 -               if ((resl == 0) &&
182 -                       ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)))
183 -                       return (&Conf->Spell[l]);
184 -               resr = strcmp(Conf->Spell[r].word, word);
185 -               if ((resr == 0) &&
186 -                       ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)))
187 -                       return (&Conf->Spell[r]);
188 -               if (resc < 0)
189 -               {
190 -                       l = c + 1;
191 -                       r--;
192 -               }
193 -               else if (resc > 0)
194 -               {
195 -                       r = c - 1;
196 -                       l++;
197 -               }
198 -               else
199 -               {
200 -                       l++;
201 -                       r--;
202 +       SPNode *node = Conf->Dictionary;
203 +       SPNodeData *StopLow, *StopHigh, *StopMiddle;
204 +       int level=0, wrdlen=strlen(word);
205 +
206 +       while( node && level<wrdlen) {
207 +               StopLow = node->data;
208 +               StopHigh = node->data+node->length;
209 +               while (StopLow < StopHigh) {
210 +                       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
211 +                       if ( StopMiddle->val == ((uint8*)(word))[level] ) {
212 +                               if ( wrdlen==level+1 && StopMiddle->isword ) {
213 +                                       if ( compoundonly && !StopMiddle->compoundallow )
214 +                                               return 0;
215 +                                       if ( (affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
216 +                                               return 1;
217 +                               }
218 +                               node=StopMiddle->node;
219 +                               level++;
220 +                               break;
221 +                       } else if ( StopMiddle->val < ((uint8*)(word))[level] ) {
222 +                               StopLow = StopMiddle + 1;
223 +                       } else {
224 +                               StopHigh = StopMiddle;
225 +                       }
226                 }
227 +               if ( StopLow >= StopHigh )
228 +                       break; 
229         }
230 -       return (NULL);
231 +       return 0;
232  }
233  
234  int
235 -AddAffix(IspellDict * Conf, int flag, const char *mask, const char *find, const char *repl, int type)
236 +NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type)
237  {
238         if (Conf->naffixes >= Conf->maffixes)
239         {
240 @@ -238,16 +234,14 @@
241                         Conf->maffixes = 16;
242                         Conf->Affix = (AFFIX *) malloc(Conf->maffixes * sizeof(AFFIX));
243                 }
244 -               if (Conf->Affix == NULL)
245 -                       ereport(ERROR,
246 -                                       (errcode(ERRCODE_OUT_OF_MEMORY),
247 -                                        errmsg("out of memory")));
248 +               MEMOUT(Conf->Affix);
249         }
250         if (type == 's')
251                 sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask);
252         else
253                 sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask);
254         Conf->Affix[Conf->naffixes].compile = 1;
255 +       Conf->Affix[Conf->naffixes].flagflags = flagflags;
256         Conf->Affix[Conf->naffixes].flag = flag;
257         Conf->Affix[Conf->naffixes].type = type;
258  
259 @@ -281,7 +275,7 @@
260  
261  
262  int
263 -ImportAffixes(IspellDict * Conf, const char *filename)
264 +NIImportAffixes(IspellDict * Conf, const char *filename)
265  {
266         unsigned char str[BUFSIZ];
267         unsigned char flag = 0;
268 @@ -292,13 +286,24 @@
269         int                     i;
270         int                     suffixes = 0;
271         int                     prefixes = 0;
272 +       unsigned char flagflags = 0;
273         FILE       *affix;
274  
275         if (!(affix = fopen(filename, "r")))
276                 return (1);
277 +       Conf->compoundcontrol='\t';
278  
279         while (fgets(str, sizeof(str), affix))
280         {
281 +               if (STRNCASECMP(str, "compoundwords")==0) {
282 +                       s=strchr(str, 'l');
283 +                       if ( s ) {
284 +                               while( *s!=' ' ) s++;
285 +                               while( *s==' ' ) s++;
286 +                               Conf->compoundcontrol = *s;
287 +                               continue; 
288 +                       }
289 +               }
290                 if (!STRNCASECMP(str, "suffixes"))
291                 {
292                         suffixes = 1;
293 @@ -314,8 +319,18 @@
294                 if (!STRNCASECMP(str, "flag "))
295                 {
296                         s = str + 5;
297 -                       while (strchr("* ", *s))
298 +                       flagflags=0;
299 +                       while( *s==' ' ) s++;
300 +                       if ( *s=='*' ) {
301 +                               flagflags|=FF_CROSSPRODUCT;
302 +                               s++;
303 +                       } else if ( *s=='~' ) {
304 +                               flagflags|=FF_COMPOUNDONLYAFX;
305                                 s++;
306 +                       }
307 +
308 +                       if ( *s=='\\' ) s++;
309 +               
310                         flag = *s;
311                         continue;
312                 }
313 @@ -351,7 +366,7 @@
314                                 continue;
315                 }
316  
317 -               AddAffix(Conf, (int) flag, mask, find, repl, suffixes ? 's' : 'p');
318 +               NIAddAffix(Conf, (int) flag, (char) flagflags, mask, find, repl, suffixes ? 's' : 'p');
319  
320         }
321         fclose(affix);
322 @@ -359,87 +374,266 @@
323         return (0);
324  }
325  
326 +static int 
327 +MergeAffix(IspellDict *Conf, int a1, int a2) {
328 +       int naffix=0;
329 +       char **ptr=Conf->AffixData;
330 +
331 +       while(*ptr) {
332 +               naffix++;
333 +               ptr++;
334 +       }
335 +       
336 +       Conf->AffixData=(char**)realloc( Conf->AffixData, (naffix+2)*sizeof(char*) );
337 +       MEMOUT(Conf->AffixData);
338 +       ptr = Conf->AffixData + naffix;
339 +       *ptr=malloc( strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + 1 /* space */ + 1 /* \0 */ );
340 +       MEMOUT(ptr);
341 +       sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
342 +       ptr++;
343 +       *ptr='\0';
344 +       return naffix; 
345 +}
346 +
347 +
348 +static SPNode* 
349 +mkSPNode(IspellDict *Conf, int low, int high, int level) {
350 +       int i;
351 +       int nchar=0;
352 +       char lastchar='\0';
353 +       SPNode *rs;
354 +       SPNodeData *data;
355 +       int lownew=low;
356 +
357 +       for(i=low; i<high; i++)
358 +               if ( Conf->Spell[i].p.d.len>level && lastchar!=Conf->Spell[i].word[level] ) {
359 +                       nchar++;
360 +                       lastchar=Conf->Spell[i].word[level];
361 +               }
362 +
363 +       if (!nchar)
364 +               return NULL;
365 +
366 +       rs=(SPNode*)malloc(SPNHRDSZ+nchar*sizeof(SPNodeData));
367 +       MEMOUT(rs);
368 +       memset(rs,0,SPNHRDSZ+nchar*sizeof(SPNodeData));
369 +       rs->length = nchar;
370 +       data=rs->data;
371 +
372 +       lastchar='\0';
373 +       for(i=low; i<high; i++)
374 +               if ( Conf->Spell[i].p.d.len>level ) {
375 +                       if ( lastchar!=Conf->Spell[i].word[level] ) {
376 +                               if ( lastchar ) {
377 +                                       data->node = mkSPNode(Conf, lownew, i, level+1);
378 +                                       lownew=i;
379 +                                       data++;
380 +                               }
381 +                               lastchar=Conf->Spell[i].word[level];
382 +                       }
383 +                       data->val=((uint8*)(Conf->Spell[i].word))[level];
384 +                       if ( Conf->Spell[i].p.d.len == level+1 ) {
385 +                               if ( data->isword && data->affix!=Conf->Spell[i].p.d.affix) {
386 +                                       /* 
387 +                                       fprintf(stderr,"Word already exists: %s (affixes: '%s' and '%s')\n", 
388 +                                               Conf->Spell[i].word, 
389 +                                               Conf->AffixData[data->affix],
390 +                                               Conf->AffixData[Conf->Spell[i].p.d.affix]
391 +                                       ); 
392 +                                       */
393 +                                       /* MergeAffix called a few times */
394 +                                       data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i].p.d.affix);
395 +                               } else
396 +                                       data->affix = Conf->Spell[i].p.d.affix;
397 +                               data->isword=1;
398 +                               if ( strchr( Conf->AffixData[ data->affix ], Conf->compoundcontrol ) )
399 +                                       data->compoundallow=1;
400 +                       }
401 +               }
402 +               
403 +       data->node = mkSPNode(Conf, lownew, high, level+1);
404 +
405 +       return rs;
406 +}
407 +
408 +
409 +
410  void
411 -SortDictionary(IspellDict * Conf)
412 +NISortDictionary(IspellDict * Conf)
413  {
414 -       int                     CurLet = -1,
415 -                               Let;
416         size_t          i;
417 -
418 +       int     naffix=3;
419 +       
420 +       /* compress affixes */
421 +       qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspellaffix);
422 +       for (i = 1; i < Conf->nspell; i++)
423 +               if ( strcmp(Conf->Spell[i].p.flag,Conf->Spell[i-1].p.flag) )
424 +                       naffix++;
425 +
426 +       Conf->AffixData=(char**)malloc( naffix*sizeof(char*) );
427 +       MEMOUT(Conf->AffixData);
428 +       memset(Conf->AffixData, 0, naffix*sizeof(char*));
429 +       naffix=1;
430 +       Conf->AffixData[0]=strdup("");
431 +       MEMOUT(Conf->AffixData[0]);
432 +       Conf->AffixData[1]=strdup( Conf->Spell[0].p.flag );
433 +       MEMOUT(Conf->AffixData[1]);
434 +       Conf->Spell[0].p.d.affix = 1;
435 +       Conf->Spell[0].p.d.len = strlen(Conf->Spell[0].word);
436 +       for (i = 1; i < Conf->nspell; i++) {
437 +               if ( strcmp(Conf->Spell[i].p.flag, Conf->AffixData[naffix]) ) {
438 +                       naffix++;
439 +                       Conf->AffixData[naffix] = strdup( Conf->Spell[i].p.flag );
440 +                       MEMOUT(Conf->AffixData[naffix]);
441 +               }
442 +               Conf->Spell[i].p.d.affix = naffix;
443 +               Conf->Spell[i].p.d.len = strlen(Conf->Spell[i].word);
444 +       }
445 +       
446         qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspell);
447 +       Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
448 +       
449 +       for (i = 0; i < Conf->nspell; i++) 
450 +               free( Conf->Spell[i].word );
451 +       free( Conf->Spell );
452 +       Conf->Spell=NULL;
453 +}
454 +
455 +static AffixNode*
456 +mkANode(IspellDict *Conf, int low, int high, int level, int type) {
457 +       int i;
458 +       int nchar=0;
459 +       uint8 lastchar='\0';
460 +       AffixNode *rs;
461 +       AffixNodeData *data;
462 +       int lownew=low;
463 +
464 +       for(i=low; i<high; i++)
465 +               if ( Conf->Affix[i].replen>level && lastchar!=GETCHAR( Conf->Affix + i, level, type ) ) {
466 +                       nchar++;
467 +                       lastchar=GETCHAR( Conf->Affix + i, level, type );
468 +               }
469  
470 -       for (i = 0; i < 256; i++)
471 -               Conf->SpellTree.Left[i] = -1;
472 +       if (!nchar)
473 +               return NULL;
474  
475 -       for (i = 0; i < Conf->nspell; i++)
476 -       {
477 -               Let = (int) (*(Conf->Spell[i].word)) & 255;
478 -               if (CurLet != Let)
479 -               {
480 -                       Conf->SpellTree.Left[Let] = i;
481 -                       CurLet = Let;
482 +       rs=(AffixNode*)malloc(ANHRDSZ+nchar*sizeof(AffixNodeData));
483 +       MEMOUT(rs);
484 +       memset(rs,0,ANHRDSZ+nchar*sizeof(AffixNodeData));
485 +       rs->length = nchar;
486 +       data=rs->data;
487 +
488 +       lastchar='\0';
489 +       for(i=low; i<high; i++)
490 +               if ( Conf->Affix[i].replen>level ) {
491 +                       if ( lastchar!=GETCHAR( Conf->Affix + i, level, type ) ) {
492 +                               if ( lastchar ) {
493 +                                       data->node = mkANode(Conf, lownew, i, level+1, type);
494 +                                       lownew=i;
495 +                                       data++;
496 +                               }
497 +                               lastchar=GETCHAR( Conf->Affix + i, level, type );
498 +                       }
499 +                       data->val=GETCHAR( Conf->Affix + i, level, type );
500 +                       if ( Conf->Affix[i].replen == level+1 ) { /* affix stopped */
501 +                               if ( !data->naff )
502 +                                       data->aff=(AFFIX**)malloc(sizeof(AFFIX*)*(high-i+1));
503 +                                       MEMOUT(data);
504 +                               data->aff[ data->naff ] = Conf->Affix + i;
505 +                               data->naff++;
506 +                       }
507                 }
508 -               Conf->SpellTree.Right[Let] = i;
509 -       }
510 +               
511 +       data->node = mkANode(Conf, lownew, high, level+1, type);
512 +
513 +       return rs;
514  }
515  
516  void
517 -SortAffixes(IspellDict * Conf)
518 +NISortAffixes(IspellDict * Conf)
519  {
520 -       int                     CurLetP = -1,
521 -                               CurLetS = -1,
522 -                               Let;
523         AFFIX      *Affix;
524         size_t          i;
525 +       CMPDAffix* ptr;
526 +       int     firstsuffix=-1;
527  
528         if (Conf->naffixes > 1)
529                 qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
530 -       for (i = 0; i < 256; i++)
531 -       {
532 -               Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
533 -               Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
534 -       }
535  
536 -       for (i = 0; i < Conf->naffixes; i++)
537 -       {
538 +       Conf->CompoundAffix = ptr = (CMPDAffix*)malloc( sizeof(CMPDAffix) * Conf->naffixes );
539 +       MEMOUT(Conf->CompoundAffix);
540 +       ptr->affix=NULL;
541 +
542 +       for (i = 0; i < Conf->naffixes; i++) {
543                 Affix = &(((AFFIX *) Conf->Affix)[i]);
544 -               if (Affix->type == 'p')
545 -               {
546 -                       Let = (int) (*(Affix->repl)) & 255;
547 -                       if (CurLetP != Let)
548 -                       {
549 -                               Conf->PrefixTree.Left[Let] = i;
550 -                               CurLetP = Let;
551 +               if ( Affix->type == 's' ) {
552 +                       if ( firstsuffix<0 ) firstsuffix=i;
553 +                       if ( Affix->flagflags & FF_COMPOUNDONLYAFX ) {
554 +                               if ( !ptr->affix || strbncmp((ptr-1)->affix, Affix->repl, (ptr-1)->len) ) {
555 +                                       /* leave only unique and minimals suffixes */
556 +                                       ptr->affix=Affix->repl;
557 +                                       ptr->len=Affix->replen;
558 +                                       ptr++;
559 +                               }
560                         }
561 -                       Conf->PrefixTree.Right[Let] = i;
562                 }
563 -               else
564 -               {
565 -                       Let = (Affix->replen) ? (int) (Affix->repl[Affix->replen - 1]) & 255 : 0;
566 -                       if (CurLetS != Let)
567 -                       {
568 -                               Conf->SuffixTree.Left[Let] = i;
569 -                               CurLetS = Let;
570 +       }
571 +       ptr->affix = NULL;
572 +       Conf->CompoundAffix = (CMPDAffix*)realloc( Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr-Conf->CompoundAffix+1) );
573 +
574 +       Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, 'p'); 
575 +       Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, 's');
576 +}
577 +
578 +static AffixNodeData*
579 +FinfAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) {
580 +       AffixNodeData *StopLow, *StopHigh, *StopMiddle;
581 +       uint8 symbol;
582 +
583 +       while( node && *level<wrdlen) {
584 +               StopLow = node->data;
585 +               StopHigh = node->data+node->length;
586 +               while (StopLow < StopHigh) {
587 +                       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
588 +                       symbol = GETWCHAR(word,wrdlen,*level,type);
589 +                       if ( StopMiddle->val == symbol ) {
590 +                               if ( StopMiddle->naff ) 
591 +                                       return StopMiddle;
592 +                               node=StopMiddle->node;
593 +                               (*level)++;
594 +                               break;
595 +                       } else if ( StopMiddle->val < symbol ) {
596 +                               StopLow = StopMiddle + 1;
597 +                       } else {
598 +                               StopHigh = StopMiddle;
599                         }
600 -                       Conf->SuffixTree.Right[Let] = i;
601                 }
602 +               if ( StopLow >= StopHigh )
603 +                       break; 
604         }
605 +       return NULL;
606  }
607  
608  static char *
609 -CheckSuffix(const char *word, size_t len, AFFIX * Affix, int *res, IspellDict * Conf)
610 -{
611 +CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword) {
612         regmatch_t      subs[2];                /* workaround for apache&linux */
613 -       char            newword[2 * MAXNORMLEN] = "";
614         int                     err;
615  
616 -       *res = strbncmp(word, Affix->repl, Affix->replen);
617 -       if (*res < 0)
618 -               return NULL;
619 -       if (*res > 0)
620 -               return NULL;
621 -       strcpy(newword, word);
622 -       strcpy(newword + len - Affix->replen, Affix->find);
623 +       if ( flagflags & FF_COMPOUNDONLYAFX ) {
624 +               if ( (Affix->flagflags & FF_COMPOUNDONLYAFX) == 0 )
625 +                       return NULL;
626 +       } else {
627 +               if ( Affix->flagflags & FF_COMPOUNDONLYAFX )
628 +                       return NULL;
629 +       } 
630 +
631 +       if ( Affix->type=='s' ) {
632 +               strcpy(newword, word);
633 +               strcpy(newword + len - Affix->replen, Affix->find);
634 +       } else {
635 +               strcpy(newword, Affix->find);
636 +               strcat(newword, word + Affix->replen);
637 +       }
638  
639         if (Affix->compile)
640         {
641 @@ -452,205 +646,364 @@
642                 }
643                 Affix->compile = 0;
644         }
645 -       if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))
646 -       {
647 -               if (FindWord(Conf, newword, Affix->flag))
648 -                       return pstrdup(newword);
649 -       }
650 +       if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0))) 
651 +                       return newword;
652         return NULL;
653  }
654  
655 -#define NS 1
656 -#define MAX_NORM 512
657 -static int
658 -CheckPrefix(const char *word, size_t len, AFFIX * Affix, IspellDict * Conf, int pi,
659 -                       char **forms, char ***cur)
660 -{
661 -       regmatch_t      subs[NS * 2];
662 +
663 +static char      **
664 +NormalizeSubWord(IspellDict * Conf, char *word, char flag) {
665 +       AffixNodeData   *suffix=NULL, *prefix=NULL;
666 +       int     slevel=0, plevel=0;
667 +       int wrdlen = strlen(word), swrdlen;
668 +       char      **forms;
669 +       char      **cur;
670         char            newword[2 * MAXNORMLEN] = "";
671 -       int                     err,
672 -                               ls,
673 -                               res,
674 -                               lres;
675 -       size_t          newlen;
676 -       AFFIX      *CAffix = Conf->Affix;
677 -
678 -       res = strncmp(word, Affix->repl, Affix->replen);
679 -       if (res != 0)
680 -               return res;
681 -       strcpy(newword, Affix->find);
682 -       strcat(newword, word + Affix->replen);
683 +       char            pnewword[2 * MAXNORMLEN] = "";
684 +       AffixNode *snode = Conf->Suffix, *pnode;
685 +       int i,j;
686 +
687 +       if (wrdlen > MAXNORMLEN) return NULL;
688 +       strlower(word); 
689 +       cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
690 +       *cur = NULL;
691  
692 -       if (Affix->compile)
693 -       {
694 -               err = regcomp(&(Affix->reg), Affix->mask, REG_EXTENDED | REG_ICASE | REG_NOSUB);
695 -               if (err)
696 -               {
697 -                       /* regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE); */
698 -                       regfree(&(Affix->reg));
699 -                       return (0);
700 -               }
701 -               Affix->compile = 0;
702 +
703 +       /* Check that the word itself is normal form */
704 +       if (FindWord(Conf, word, 0, flag & FF_COMPOUNDWORD)) {
705 +               *cur = pstrdup(word);
706 +               cur++;
707 +               *cur = NULL;
708         }
709 -       if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))
710 -       {
711 -               SPELL      *curspell;
712  
713 -               if ((curspell = FindWord(Conf, newword, Affix->flag)))
714 -               {
715 -                       if ((*cur - forms) < (MAX_NORM - 1))
716 -                       {
717 -                               **cur = pstrdup(newword);
718 -                               (*cur)++;
719 -                               **cur = NULL;
720 +       /* Find all other NORMAL forms of the 'word' (check only prefix)*/
721 +       pnode=Conf->Prefix;
722 +       plevel=0;
723 +       while(pnode) {
724 +               prefix=FinfAffixes(pnode, word, wrdlen, &plevel,'p');
725 +               if (!prefix) break;
726 +               for(j=0;j<prefix->naff;j++) {   
727 +                       if ( CheckAffix(word,wrdlen,prefix->aff[j], flag, newword) ) {
728 +                               /* prefix success */
729 +                               if ( FindWord(Conf, newword, prefix->aff[j]->flag, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
730 +                                       /* word search success */
731 +                                       *cur = pstrdup(newword);
732 +                                       cur++;
733 +                                       *cur=NULL;
734 +                               }
735                         }
736                 }
737 -               newlen = strlen(newword);
738 -               ls = Conf->SuffixTree.Left[pi];
739 -               if (ls >= 0 && ((*cur - forms) < (MAX_NORM - 1)))
740 -               {
741 -                       **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
742 -                       if (**cur)
743 -                       {
744 -                               (*cur)++;
745 -                               **cur = NULL;
746 +               pnode = prefix->node;
747 +               plevel++;
748 +       }
749
750 +       /* Find all other NORMAL forms of the 'word' (check suffix and then prefix)*/
751 +       while( snode ) {
752 +               /* find possible suffix */
753 +               suffix = FinfAffixes(snode, word, wrdlen, &slevel, 's');
754 +               if (!suffix) break;
755 +               /* foreach suffix check affix */
756 +               for(i=0;i<suffix->naff;i++) {
757 +                       if ( CheckAffix(word, wrdlen, suffix->aff[i], flag, newword) ) {
758 +                               /* suffix success */
759 +                               if ( FindWord(Conf, newword, suffix->aff[i]->flag, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
760 +                                       /* word search success */
761 +                                       *cur = pstrdup(newword);
762 +                                       cur++;
763 +                                       *cur=NULL;
764 +                               }
765 +                               /* now we will look changed word with prefixes */
766 +                               pnode=Conf->Prefix;
767 +                               plevel=0;
768 +                               swrdlen=strlen(newword);
769 +                               while(pnode) {
770 +                                       prefix=FinfAffixes(pnode, newword, swrdlen, &plevel,'p');
771 +                                       if (!prefix) break;
772 +                                       for(j=0;j<prefix->naff;j++) {   
773 +                                               if ( CheckAffix(newword,swrdlen,prefix->aff[j], flag, pnewword) ) {
774 +                                                       /* prefix success */
775 +                                                       int ff=( prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT ) ?
776 +                                                                0 : prefix->aff[j]->flag; 
777 +                                                       if ( FindWord(Conf, pnewword, ff, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
778 +                                                               /* word search success */
779 +                                                               *cur = pstrdup(pnewword);
780 +                                                               cur++;
781 +                                                               *cur=NULL;
782 +                                                       }
783 +                                               }
784 +                                       }
785 +                                       pnode = prefix->node;
786 +                                       plevel++;
787 +                               } 
788                         }
789                 }
790 -       }
791 -       return 0;
792 -}
793  
794 +               snode=suffix->node;
795 +               slevel++;
796 +       }
797  
798 -char     **
799 -NormalizeWord(IspellDict * Conf, char *word)
800 -{
801 -/*regmatch_t subs[NS];*/
802 -       size_t          len;
803 -       char      **forms;
804 -       char      **cur;
805 -       AFFIX      *Affix;
806 -       int                     ri,
807 -                               pi,
808 -                               ipi,
809 -                               lp,
810 -                               rp,
811 -                               cp,
812 -                               ls,
813 -                               rs;
814 -       int                     lres,
815 -                               rres,
816 -                               cres = 0;
817 -       SPELL      *spell;
818 -
819 -       len = strlen(word);
820 -       if (len > MAXNORMLEN)
821 +       if (cur == forms) {
822 +               pfree(forms);
823                 return (NULL);
824 +       }
825 +       return (forms);
826 +}
827  
828 -       strlower(word);
829 +typedef struct SplitVar {
830 +       int     nstem;
831 +       char    **stem; 
832 +       struct  SplitVar *next;
833 +} SplitVar;
834 +
835 +static int 
836 +CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len) {
837 +       while( (*ptr)->affix ) {
838 +               if ( len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len)==0 ) {
839 +                       len = (*ptr)->len;
840 +                       (*ptr)++;
841 +                       return len;
842 +               }
843 +               (*ptr)++;
844 +       }
845 +       return 0;
846 +}
847  
848 -       forms = (char **) palloc(MAX_NORM * sizeof(char **));
849 -       cur = forms;
850 -       *cur = NULL;
851 +static SplitVar*
852 +CopyVar(SplitVar *s, int makedup) {
853 +       SplitVar *v = (SplitVar*)palloc(sizeof(SplitVar));
854 +
855 +       v->stem=(char**)palloc( sizeof(char*) * (MAX_NORM) );
856 +       v->next=NULL;
857 +       if ( s ) {
858 +               int i;
859 +               v->nstem = s->nstem;
860 +               for(i=0;i<s->nstem;i++)
861 +                       v->stem[i] = (makedup) ? pstrdup( s->stem[i] ) : s->stem[i];
862 +       } else {
863 +               v->nstem=0;
864 +       }
865 +       return v;
866 +}
867  
868 -       ri = (int) (*word) & 255;
869 -       pi = (int) (word[strlen(word) - 1]) & 255;
870 -       Affix = (AFFIX *) Conf->Affix;
871  
872 -       /* Check that the word itself is normal form */
873 -       if ((spell = FindWord(Conf, word, 0)))
874 -       {
875 -               *cur = pstrdup(word);
876 -               cur++;
877 -               *cur = NULL;
878 -       }
879 +static SplitVar*
880 +SplitToVariants( IspellDict * Conf, SPNode *snode, SplitVar * orig, char *word, int wordlen, int startpos, int minpos ) {
881 +       SplitVar *var=NULL;
882 +       SPNodeData *StopLow, *StopHigh, *StopMiddle;
883 +       SPNode *node = (snode) ? snode : Conf->Dictionary;
884 +       int level=(snode) ? minpos : startpos; /* recursive minpos==level*/
885 +       int lenaff;
886 +       CMPDAffix *caff;
887 +       char    notprobed[wordlen];
888 +
889 +       memset(notprobed,1,wordlen);
890 +       var = CopyVar(orig,1);
891 +
892 +       while( node && level<wordlen) {
893 +               StopLow = node->data;
894 +               StopHigh = node->data+node->length;
895 +               while (StopLow < StopHigh) {
896 +                       StopMiddle = StopLow + (StopHigh - StopLow) / 2;
897 +                       if ( StopMiddle->val == ((uint8*)(word))[level] ) {
898 +                               break;
899 +                       } else if ( StopMiddle->val < ((uint8*)(word))[level] ) {
900 +                               StopLow = StopMiddle + 1;
901 +                       } else {
902 +                               StopHigh = StopMiddle;
903 +                       }
904 +               }
905 +               if ( StopLow >= StopHigh )
906 +                       break;
907  
908 -       /* Find all other NORMAL forms of the 'word' */
909 +               /* find word with epenthetic */
910 +               caff = Conf->CompoundAffix;
911 +               while ( level>startpos && (lenaff=CheckCompoundAffixes( &caff, word + level, wordlen - level ))>0 ) {
912 +                       /* there is one of compound suffixes, so check word for existings */
913 +                       char buf[MAXNORMLEN];
914 +                       char **subres;
915 +
916 +                       lenaff=level-startpos+lenaff;
917 +               
918 +                       if ( !notprobed[startpos+lenaff-1] )
919 +                               continue;
920 +                               
921 +                       if ( level+lenaff-1 <= minpos )
922 +                               continue;
923  
924 -       for (ipi = 0; ipi <= pi; ipi += pi)
925 -       {
926 +                       memcpy(buf, word+startpos, lenaff);
927 +                       buf[lenaff]='\0';
928  
929 -               /* check prefix */
930 -               lp = Conf->PrefixTree.Left[ri];
931 -               rp = Conf->PrefixTree.Right[ri];
932 -               while (lp >= 0 && lp <= rp)
933 -               {
934 -                       cp = (lp + rp) >> 1;
935 -                       cres = 0;
936 -                       if ((cur - forms) < (MAX_NORM - 1))
937 -                               cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
938 -                       if ((lp < cp) && ((cur - forms) < (MAX_NORM - 1)))
939 -                               lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
940 -                       if ((rp > cp) && ((cur - forms) < (MAX_NORM - 1)))
941 -                               rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
942 -                       if (cres < 0)
943 -                       {
944 -                               rp = cp - 1;
945 -                               lp++;
946 -                       }
947 -                       else if (cres > 0)
948 -                       {
949 -                               lp = cp + 1;
950 -                               rp--;
951 -                       }
952 -                       else
953 -                       {
954 -                               lp++;
955 -                               rp--;
956 +                       subres = NormalizeSubWord(Conf, buf, FF_COMPOUNDWORD | FF_COMPOUNDONLYAFX);
957 +                       if ( subres ) {
958 +                               /* Yes, it was a word from dictionary */
959 +                               SplitVar *new=CopyVar(var,0);
960 +                               SplitVar *ptr=var;
961 +                               char **sptr=subres;
962 +                       
963 +                               notprobed[startpos+lenaff-1]=0;
964 +       
965 +                               while(*sptr) {
966 +                                       new->stem[ new->nstem ] = *sptr;
967 +                                       new->nstem++;
968 +                                       sptr++;
969 +                               }
970 +                               pfree(subres);
971 +
972 +                               while( ptr->next ) 
973 +                                       ptr = ptr->next;
974 +                               ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos+lenaff, startpos+lenaff);
975
976 +                               pfree(new->stem);
977 +                               pfree(new);
978                         }
979                 }
980  
981 -               /* check suffix */
982 -               ls = Conf->SuffixTree.Left[ipi];
983 -               rs = Conf->SuffixTree.Right[ipi];
984 -               while (ls >= 0 && ls <= rs)
985 -               {
986 -                       if (((cur - forms) < (MAX_NORM - 1)))
987 -                       {
988 -                               *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
989 -                               if (*cur)
990 -                               {
991 -                                       cur++;
992 -                                       *cur = NULL;
993 +               /* find infinitive */
994 +               if ( StopMiddle->isword && StopMiddle->compoundallow && notprobed[level] ) {
995 +                       /* ok, we found full compoundallowed word*/
996 +                       if ( level>minpos ) {
997 +                               /* and its length more than minimal */
998 +                               if ( wordlen==level+1 ) {
999 +                                       /* well, it was last word */
1000 +                                       var->stem[ var->nstem ] = strnduplicate(word + startpos, wordlen - startpos);
1001 +                                       var->nstem++;
1002 +                                       return var;
1003 +                               } else {
1004 +                                       /* then we will search more big word at the same point */
1005 +                                       SplitVar *ptr=var;
1006 +                                       while( ptr->next ) 
1007 +                                               ptr = ptr->next;
1008 +                                       ptr->next=SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
1009 +                                       /* we can find next word */
1010 +                                       level++;
1011 +                                       var->stem[ var->nstem ] = strnduplicate(word + startpos, level - startpos);
1012 +                                       var->nstem++;
1013 +                                       node = Conf->Dictionary;
1014 +                                       startpos=level;
1015 +                                       continue;
1016                                 }
1017                         }
1018 -                       if ((rs > ls) && ((cur - forms) < (MAX_NORM - 1)))
1019 -                       {
1020 -                               *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
1021 -                               if (*cur)
1022 -                               {
1023 -                                       cur++;
1024 -                                       *cur = NULL;
1025 +               }
1026 +               level++;
1027 +               node=StopMiddle->node;
1028 +       }
1029 +
1030 +       var->stem[ var->nstem ] = strnduplicate(word + startpos, wordlen - startpos);
1031 +       var->nstem++;
1032 +       return var;
1033 +} 
1034 +
1035 +char  **
1036 +NINormalizeWord(IspellDict * Conf, char *word) {
1037 +       char **res= NormalizeSubWord(Conf, word, 0);
1038 +
1039 +       if ( Conf->compoundcontrol != '\t' ) {
1040 +               int wordlen=strlen(word);
1041 +               SplitVar *ptr, *var = SplitToVariants(Conf,NULL,NULL, word, wordlen, 0, -1);
1042 +               char **cur=res;
1043 +               int i;
1044 +       
1045 +               while(var) {
1046 +                       if ( var->nstem > 1 ) {
1047 +                               char **subres = NormalizeSubWord(Conf, var->stem[ var->nstem-1 ], FF_COMPOUNDWORD);
1048 +                               if ( subres ) {
1049 +                                       char **ptr=subres;
1050 +       
1051 +                                       if ( cur ) {
1052 +                                               while(*cur) 
1053 +                                                       cur++;
1054 +                                       } else {
1055 +                                               res=cur=(char **) palloc(MAX_NORM * sizeof(char *));
1056 +                                       }
1057 +       
1058 +                                       for(i=0;i<var->nstem-1;i++) {
1059 +                                               *cur=var->stem[ i ];
1060 +                                               cur++;
1061 +                                       }
1062 +                                       while(*ptr) {
1063 +                                               *cur=*ptr;
1064 +                                               cur++; ptr++;
1065 +                                       }
1066 +                                       *cur=NULL;
1067 +                                       pfree(subres);
1068 +                                       var->stem[ 0 ] = NULL;
1069                                 }
1070                         }
1071 -                       ls++;
1072 -                       rs--;
1073 -               }                                               /* end while */
1074 +       
1075 +                       for(i=0;i<var->nstem && var->stem[ i ];i++)
1076 +                               pfree( var->stem[i] );  
1077 +                       ptr = var->next;
1078 +                       pfree(var->stem);
1079 +                       pfree(var);     
1080 +                       var=ptr;
1081 +               }
1082 +       }
1083 +       return res;
1084 +}
1085  
1086 -       }                                                       /* for ipi */
1087  
1088 -       if (cur == forms)
1089 -       {
1090 -               pfree(forms);
1091 -               return (NULL);
1092 +static void freeSPNode(SPNode *node) {
1093 +       SPNodeData *data;
1094 +
1095 +       if (!node) return;
1096 +       data=node->data;
1097 +       while( node->length ) {
1098 +               freeSPNode(data->node);
1099 +               data++;
1100 +               node->length--;
1101         }
1102 -       return (forms);
1103 +       free(node);
1104  }
1105 +       
1106 +static void freeANode(AffixNode *node) {
1107 +       AffixNodeData *data;
1108 +
1109 +       if (!node) return;
1110 +       data=node->data;
1111 +       while( node->length ) {
1112 +               freeANode(data->node);
1113 +               if (data->naff)
1114 +                       free(data->aff);        
1115 +               data++;
1116 +               node->length--;
1117 +       }
1118 +       free(node);
1119 +}
1120 +       
1121  
1122  void
1123 -FreeIspell(IspellDict * Conf)
1124 +NIFree(IspellDict * Conf)
1125  {
1126         int                     i;
1127         AFFIX      *Affix = (AFFIX *) Conf->Affix;
1128 +       char**     aff = Conf->AffixData;
1129 +
1130 +       if ( aff ) {
1131 +               while(*aff) {
1132 +                       free(*aff);
1133 +                       aff++;
1134 +               }
1135 +               free(Conf->AffixData);
1136 +       }
1137  
1138 +       
1139         for (i = 0; i < Conf->naffixes; i++)
1140         {
1141                 if (Affix[i].compile == 0)
1142                         regfree(&(Affix[i].reg));
1143         }
1144 -       for (i = 0; i < Conf->naffixes; i++)
1145 -               free(Conf->Spell[i].word);
1146 -       free(Conf->Affix);
1147 -       free(Conf->Spell);
1148 +       if (Conf->Spell) {
1149 +               for (i = 0; i < Conf->nspell; i++)
1150 +                       free(Conf->Spell[i].word);
1151 +               free(Conf->Spell);
1152 +       }
1153 +
1154 +       if (Conf->Affix) free(Conf->Affix);
1155 +       if ( Conf->CompoundAffix ) free(Conf->CompoundAffix);
1156 +       freeSPNode(Conf->Dictionary);
1157 +       freeANode(Conf->Suffix);
1158 +       freeANode(Conf->Prefix);
1159         memset((void *) Conf, 0, sizeof(IspellDict));
1160         return;
1161  }
1162 diff -uNr postgresql-7.4/contrib/tsearch2/ispell/spell.h postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.h
1163 --- postgresql-7.4/contrib/tsearch2/ispell/spell.h      2003-08-04 02:43:11.000000000 +0200
1164 +++ postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.h        2003-12-18 17:46:03.000000000 +0100
1165 @@ -3,16 +3,44 @@
1166  
1167  #include <sys/types.h>
1168  #include <regex.h>
1169 +#include "c.h"
1170 +
1171 +struct SPNode;
1172 +
1173 +
1174 +typedef struct {
1175 +       uint32 
1176 +               val:8,
1177 +               isword:1,
1178 +               compoundallow:1,
1179 +               affix:22;
1180 +       struct SPNode *node; 
1181 +} SPNodeData;
1182 +
1183 +typedef struct SPNode {
1184 +       uint32  length;
1185 +       SPNodeData      data[1];        
1186 +} SPNode;
1187 +
1188 +#define SPNHRDSZ       (sizeof(uint32))
1189 +
1190  
1191  typedef struct spell_struct
1192  {
1193         char       *word;
1194 -       char            flag[10];
1195 +       union {
1196 +               char            flag[16];
1197 +               struct {
1198 +                       int             affix;
1199 +                       int             len;
1200 +               } d;
1201 +       } p;
1202  }      SPELL;
1203  
1204  typedef struct aff_struct
1205  {
1206         char            flag;
1207 +       char            flagflags;
1208         char            type;
1209         char            mask[33];
1210         char            find[16];
1211 @@ -22,35 +50,66 @@
1212         char            compile;
1213  }      AFFIX;
1214  
1215 +#define FF_CROSSPRODUCT        0x01
1216 +#define FF_COMPOUNDWORD        0x02
1217 +#define FF_COMPOUNDONLYAFX      0x04
1218 +
1219 +struct AffixNode;
1220 +
1221 +typedef struct {
1222 +       uint32
1223 +               val:8,
1224 +               naff:24;
1225 +       AFFIX   **aff;
1226 +       struct AffixNode *node;
1227 +} AffixNodeData;
1228 +
1229 +typedef struct AffixNode {
1230 +       uint32 length;
1231 +       AffixNodeData   data[1];
1232 +} AffixNode;
1233 +
1234 +#define ANHRDSZ        (sizeof(uint32))
1235 +
1236  typedef struct Tree_struct
1237  {
1238         int                     Left[256],
1239                                 Right[256];
1240  }      Tree_struct;
1241  
1242 +typedef struct {
1243 +       char *affix;
1244 +       int len;
1245 +} CMPDAffix;
1246 +
1247  typedef struct
1248  {
1249         int                     maffixes;
1250         int                     naffixes;
1251         AFFIX      *Affix;
1252 +       char                    compoundcontrol;
1253  
1254         int                     nspell;
1255         int                     mspell;
1256         SPELL      *Spell;
1257 -       Tree_struct SpellTree;
1258 -       Tree_struct PrefixTree;
1259 -       Tree_struct SuffixTree;
1260 +
1261 +       AffixNode       *Suffix;
1262 +       AffixNode       *Prefix;
1263 +
1264 +       SPNode  *Dictionary;
1265 +       char    **AffixData;
1266 +       CMPDAffix    *CompoundAffix;
1267  
1268  }      IspellDict;
1269  
1270 -char     **NormalizeWord(IspellDict * Conf, char *word);
1271 -int                    ImportAffixes(IspellDict * Conf, const char *filename);
1272 -int                    ImportDictionary(IspellDict * Conf, const char *filename);
1273 -
1274 -int                    AddSpell(IspellDict * Conf, const char *word, const char *flag);
1275 -int                    AddAffix(IspellDict * Conf, int flag, const char *mask, const char *find, const char *repl, int type);
1276 -void           SortDictionary(IspellDict * Conf);
1277 -void           SortAffixes(IspellDict * Conf);
1278 -void           FreeIspell(IspellDict * Conf);
1279 +char     **NINormalizeWord(IspellDict * Conf, char *word);
1280 +int                    NIImportAffixes(IspellDict * Conf, const char *filename);
1281 +int                    NIImportDictionary(IspellDict * Conf, const char *filename);
1282 +
1283 +int                    NIAddSpell(IspellDict * Conf, const char *word, const char *flag);
1284 +int                    NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type);
1285 +void           NISortDictionary(IspellDict * Conf);
1286 +void           NISortAffixes(IspellDict * Conf);
1287 +void           NIFree(IspellDict * Conf);
1288  
1289  #endif
1290 diff -uNr postgresql-7.4/contrib/tsearch2/my2ispell/Makefile postgresql-7.4.fixed/contrib/tsearch2/my2ispell/Makefile
1291 --- postgresql-7.4/contrib/tsearch2/my2ispell/Makefile  1970-01-01 01:00:00.000000000 +0100
1292 +++ postgresql-7.4.fixed/contrib/tsearch2/my2ispell/Makefile    2003-12-18 17:46:03.000000000 +0100
1293 @@ -0,0 +1,47 @@
1294 +ZIPFILE=nb_NO
1295 +LANGUAGE=norsk
1296 +
1297 +
1298 +UNZIP=unzip -o
1299 +
1300 +
1301 +all: $(LANGUAGE).dict $(LANGUAGE).aff
1302 +
1303 +$(ZIPFILE).aff: $(ZIPFILE).zip
1304 +       $(UNZIP) $? $@
1305 +       touch $@ 
1306 +
1307 +
1308 +# 1 Cleanup dictionary
1309 +# 2 remove " symbol
1310 +# 3 add compoundwords controlled flag to word which hasn't it, but
1311 +#   has compound only suffixes
1312 +
1313 +$(LANGUAGE).dict: $(ZIPFILE).zip
1314 +       $(UNZIP) $? $(ZIPFILE).dic
1315 +       grep -v -E '^[[:digit:]]+$$' < $(ZIPFILE).dic \
1316 +        | grep -v '\.' \
1317 +        | sed -e 's/"//g' \
1318 +        | perl -pi -e 's|/(\S+)| $$q=$$1; ( $$q=~/[\\_`]/ && $$q!~/z/ ) ? "/$${q}z" : "/$${q}"|e' \
1319 +        | sort \
1320 +       > $@
1321 +
1322 +#just convert affix file
1323 +
1324 +$(LANGUAGE).aff: $(ZIPFILE).aff 
1325 +       grep -v -i zyzyzy $(ZIPFILE).aff \
1326 +        | grep -v -i zyzyzy \
1327 +        | perl -pi \
1328 +               -e 's/^COMPOUNDFLAG\s+(\S+)/compoundwords controlled $$1/;' \
1329 +               -e 's/^COMPOUNDMIN\s+(\d+)/compoundmin $$1/;' \
1330 +               -e 's/^PFX\s+(\S+)\s+Y\s+\d+.*$$/ if ( !$$wasprf ) { $$wasprf=1; "prefixes\n\nflag $$1:" } else { "flag $$1:" } /e;' \
1331 +               -e 's/^PFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc("   $$3    > $$2")/e;' \
1332 +               -e 's/^(.*)SFX\s+(\S+)\s+([YN])\s+\d+.*$$/ $$flg=($$3 eq "Y") ? "*" : ""; $$flg="~$$flg" if length $$1; $$q=$$2; $$q="\\$$q" if $$q!~m#[a-zA-Z]#; if ( !$$wassfx ) { $$wassfx=1; "suffixes\n\nflag $$flg$$q:" } else { "flag $$flg$$q:" } /e;' \
1333 +               -e 's/^.*SFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc("   $$3    > ".( ($$1 eq "0") ? "" : "-$$1,").( ($$2 eq "0") ? "" : "$$2") )/e;' \
1334 +               -e 's/^(SET|TRY)/#$$1/' \
1335 +       > $@ 
1336 +
1337 +clean:
1338 +       rm -rf $(ZIPFILE).aff $(ZIPFILE).dic $(LANGUAGE).dict $(LANGUAGE).aff 
1339 +
1340 +
1341 diff -uNr postgresql-7.4/contrib/tsearch2/my2ispell/README postgresql-7.4.fixed/contrib/tsearch2/my2ispell/README
1342 --- postgresql-7.4/contrib/tsearch2/my2ispell/README    1970-01-01 01:00:00.000000000 +0100
1343 +++ postgresql-7.4.fixed/contrib/tsearch2/my2ispell/README      2003-12-18 17:46:03.000000000 +0100
1344 @@ -0,0 +1,12 @@
1345 +Utility for convert MySpell dictionary and affix from
1346 +myspell to ispell format.
1347 +Utility tested on nb_NO.zip and nn_NO.zip from
1348 +OpenOffice (http://lingucomponent.openoffice.org/download_dictionary.html)
1349 +
1350 +usage:
1351 +For example, make norwegian dictionary and affix:
1352 +% cp nb_NO.zip my2ispell
1353 +% cd my2ispell
1354 +% gmake ZIPFILE=nb_NO LANGUAGE=norsk 
1355 +
1356 +Author: Teodor Sigaev <teodor@sigaev.ru>
This page took 3.701211 seconds and 3 git commands to generate.