1 diff -uNr postgresql-7.4/contrib/tsearch2/dict_ispell.c postgresql-7.4.fixed/contrib/tsearch2/dict_ispell.c
2 --- postgresql-7.4/contrib/tsearch2/dict_ispell.c 2003-08-04 02:43:11.000000000 +0200
3 +++ postgresql-7.4.fixed/contrib/tsearch2/dict_ispell.c 2003-12-18 17:46:03.000000000 +0100
6 freeDictISpell(DictISpell * d)
8 - FreeIspell(&(d->obj));
10 freestoplist(&(d->stoplist));
14 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
15 errmsg("dictionary already loaded")));
17 - if (ImportDictionary(&(d->obj), pcfg->value))
18 + if (NIImportDictionary(&(d->obj), pcfg->value))
23 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
24 errmsg("affixes already loaded")));
26 - if (ImportAffixes(&(d->obj), pcfg->value))
27 + if (NIImportAffixes(&(d->obj), pcfg->value))
33 if (affloaded && dictloaded)
35 - SortDictionary(&(d->obj));
36 - SortAffixes(&(d->obj));
37 + NISortDictionary(&(d->obj));
38 + NISortAffixes(&(d->obj));
44 res = palloc(sizeof(char *) * 2);
45 txt = pnstrdup(in, PG_GETARG_INT32(2));
46 - res = NormalizeWord(&(d->obj), txt);
47 + res = NINormalizeWord(&(d->obj), txt);
51 diff -uNr postgresql-7.4/contrib/tsearch2/ispell/spell.c postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.c
52 --- postgresql-7.4/contrib/tsearch2/ispell/spell.c 2003-08-04 02:43:11.000000000 +0200
53 +++ postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.c 2003-12-18 17:46:03.000000000 +0100
58 -#define MAXNORMLEN 56
59 +#define MAX_NORM 1024
60 +#define MAXNORMLEN 256
62 #define STRNCASECMP(x,y) (strncasecmp(x,y,strlen(y)))
63 +#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)=='p') ? (N) : ( (L) - 1 - (N) ) ] )
64 +#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
67 +#define MEMOUT(X) if ( !(X) ) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")))
70 cmpspell(const void *s1, const void *s2)
72 return (strcmp(((const SPELL *) s1)->word, ((const SPELL *) s2)->word));
75 +cmpspellaffix(const void *s1, const void *s2)
77 + return (strcmp(((const SPELL *) s1)->p.flag, ((const SPELL *) s2)->p.flag));
87 +strnduplicate(char *s, int len) {
88 + char *d=(char*)palloc( len + 1 );
93 /* backward string compaire for suffix tree operations */
95 strbcmp(const char *s1, const char *s2)
100 -AddSpell(IspellDict * Conf, const char *word, const char *flag)
101 +NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
103 if (Conf->nspell >= Conf->mspell)
105 @@ -106,24 +124,18 @@
106 Conf->mspell = 1024 * 20;
107 Conf->Spell = (SPELL *) malloc(Conf->mspell * sizeof(SPELL));
109 - if (Conf->Spell == NULL)
111 - (errcode(ERRCODE_OUT_OF_MEMORY),
112 - errmsg("out of memory")));
113 + MEMOUT(Conf->Spell);
115 Conf->Spell[Conf->nspell].word = strdup(word);
116 - if (!Conf->Spell[Conf->nspell].word)
118 - (errcode(ERRCODE_OUT_OF_MEMORY),
119 - errmsg("out of memory")));
120 - strncpy(Conf->Spell[Conf->nspell].flag, flag, 10);
121 + MEMOUT(Conf->Spell[Conf->nspell].word);
122 + strncpy(Conf->Spell[Conf->nspell].p.flag, flag, 16);
129 -ImportDictionary(IspellDict * Conf, const char *filename)
130 +NIImportDictionary(IspellDict * Conf, const char *filename)
132 unsigned char str[BUFSIZ];
138 - if (((*s >= 'A') && (*s <= 'Z')) || ((*s >= 'a') && (*s <= 'z')))
139 + if (isprint(*s) && !isspace(*s))
143 @@ -166,65 +178,49 @@
147 - AddSpell(Conf, str, flag);
148 + NIAddSpell(Conf, str, flag);
156 -FindWord(IspellDict * Conf, const char *word, int affixflag)
158 +FindWord(IspellDict * Conf, const char *word, int affixflag, char compoundonly)
168 - i = (int) (*word) & 255;
169 - l = Conf->SpellTree.Left[i];
170 - r = Conf->SpellTree.Right[i];
176 - resc = strcmp(Conf->Spell[c].word, word);
178 - ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)))
179 - return (&Conf->Spell[c]);
180 - resl = strcmp(Conf->Spell[l].word, word);
182 - ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)))
183 - return (&Conf->Spell[l]);
184 - resr = strcmp(Conf->Spell[r].word, word);
186 - ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)))
187 - return (&Conf->Spell[r]);
202 + SPNode *node = Conf->Dictionary;
203 + SPNodeData *StopLow, *StopHigh, *StopMiddle;
204 + int level=0, wrdlen=strlen(word);
206 + while( node && level<wrdlen) {
207 + StopLow = node->data;
208 + StopHigh = node->data+node->length;
209 + while (StopLow < StopHigh) {
210 + StopMiddle = StopLow + (StopHigh - StopLow) / 2;
211 + if ( StopMiddle->val == ((uint8*)(word))[level] ) {
212 + if ( wrdlen==level+1 && StopMiddle->isword ) {
213 + if ( compoundonly && !StopMiddle->compoundallow )
215 + if ( (affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
218 + node=StopMiddle->node;
221 + } else if ( StopMiddle->val < ((uint8*)(word))[level] ) {
222 + StopLow = StopMiddle + 1;
224 + StopHigh = StopMiddle;
227 + if ( StopLow >= StopHigh )
235 -AddAffix(IspellDict * Conf, int flag, const char *mask, const char *find, const char *repl, int type)
236 +NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type)
238 if (Conf->naffixes >= Conf->maffixes)
240 @@ -238,16 +234,14 @@
242 Conf->Affix = (AFFIX *) malloc(Conf->maffixes * sizeof(AFFIX));
244 - if (Conf->Affix == NULL)
246 - (errcode(ERRCODE_OUT_OF_MEMORY),
247 - errmsg("out of memory")));
248 + MEMOUT(Conf->Affix);
251 sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask);
253 sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask);
254 Conf->Affix[Conf->naffixes].compile = 1;
255 + Conf->Affix[Conf->naffixes].flagflags = flagflags;
256 Conf->Affix[Conf->naffixes].flag = flag;
257 Conf->Affix[Conf->naffixes].type = type;
263 -ImportAffixes(IspellDict * Conf, const char *filename)
264 +NIImportAffixes(IspellDict * Conf, const char *filename)
266 unsigned char str[BUFSIZ];
267 unsigned char flag = 0;
268 @@ -292,13 +286,24 @@
272 + unsigned char flagflags = 0;
275 if (!(affix = fopen(filename, "r")))
277 + Conf->compoundcontrol='\t';
279 while (fgets(str, sizeof(str), affix))
281 + if (STRNCASECMP(str, "compoundwords")==0) {
282 + s=strchr(str, 'l');
284 + while( *s!=' ' ) s++;
285 + while( *s==' ' ) s++;
286 + Conf->compoundcontrol = *s;
290 if (!STRNCASECMP(str, "suffixes"))
294 if (!STRNCASECMP(str, "flag "))
297 - while (strchr("* ", *s))
299 + while( *s==' ' ) s++;
301 + flagflags|=FF_CROSSPRODUCT;
303 + } else if ( *s=='~' ) {
304 + flagflags|=FF_COMPOUNDONLYAFX;
308 + if ( *s=='\\' ) s++;
317 - AddAffix(Conf, (int) flag, mask, find, repl, suffixes ? 's' : 'p');
318 + NIAddAffix(Conf, (int) flag, (char) flagflags, mask, find, repl, suffixes ? 's' : 'p');
322 @@ -359,87 +374,266 @@
327 +MergeAffix(IspellDict *Conf, int a1, int a2) {
329 + char **ptr=Conf->AffixData;
336 + Conf->AffixData=(char**)realloc( Conf->AffixData, (naffix+2)*sizeof(char*) );
337 + MEMOUT(Conf->AffixData);
338 + ptr = Conf->AffixData + naffix;
339 + *ptr=malloc( strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + 1 /* space */ + 1 /* \0 */ );
341 + sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
349 +mkSPNode(IspellDict *Conf, int low, int high, int level) {
352 + char lastchar='\0';
357 + for(i=low; i<high; i++)
358 + if ( Conf->Spell[i].p.d.len>level && lastchar!=Conf->Spell[i].word[level] ) {
360 + lastchar=Conf->Spell[i].word[level];
366 + rs=(SPNode*)malloc(SPNHRDSZ+nchar*sizeof(SPNodeData));
368 + memset(rs,0,SPNHRDSZ+nchar*sizeof(SPNodeData));
369 + rs->length = nchar;
373 + for(i=low; i<high; i++)
374 + if ( Conf->Spell[i].p.d.len>level ) {
375 + if ( lastchar!=Conf->Spell[i].word[level] ) {
377 + data->node = mkSPNode(Conf, lownew, i, level+1);
381 + lastchar=Conf->Spell[i].word[level];
383 + data->val=((uint8*)(Conf->Spell[i].word))[level];
384 + if ( Conf->Spell[i].p.d.len == level+1 ) {
385 + if ( data->isword && data->affix!=Conf->Spell[i].p.d.affix) {
387 + fprintf(stderr,"Word already exists: %s (affixes: '%s' and '%s')\n",
388 + Conf->Spell[i].word,
389 + Conf->AffixData[data->affix],
390 + Conf->AffixData[Conf->Spell[i].p.d.affix]
393 + /* MergeAffix called a few times */
394 + data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i].p.d.affix);
396 + data->affix = Conf->Spell[i].p.d.affix;
398 + if ( strchr( Conf->AffixData[ data->affix ], Conf->compoundcontrol ) )
399 + data->compoundallow=1;
403 + data->node = mkSPNode(Conf, lownew, high, level+1);
411 -SortDictionary(IspellDict * Conf)
412 +NISortDictionary(IspellDict * Conf)
420 + /* compress affixes */
421 + qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspellaffix);
422 + for (i = 1; i < Conf->nspell; i++)
423 + if ( strcmp(Conf->Spell[i].p.flag,Conf->Spell[i-1].p.flag) )
426 + Conf->AffixData=(char**)malloc( naffix*sizeof(char*) );
427 + MEMOUT(Conf->AffixData);
428 + memset(Conf->AffixData, 0, naffix*sizeof(char*));
430 + Conf->AffixData[0]=strdup("");
431 + MEMOUT(Conf->AffixData[0]);
432 + Conf->AffixData[1]=strdup( Conf->Spell[0].p.flag );
433 + MEMOUT(Conf->AffixData[1]);
434 + Conf->Spell[0].p.d.affix = 1;
435 + Conf->Spell[0].p.d.len = strlen(Conf->Spell[0].word);
436 + for (i = 1; i < Conf->nspell; i++) {
437 + if ( strcmp(Conf->Spell[i].p.flag, Conf->AffixData[naffix]) ) {
439 + Conf->AffixData[naffix] = strdup( Conf->Spell[i].p.flag );
440 + MEMOUT(Conf->AffixData[naffix]);
442 + Conf->Spell[i].p.d.affix = naffix;
443 + Conf->Spell[i].p.d.len = strlen(Conf->Spell[i].word);
446 qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL), cmpspell);
447 + Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
449 + for (i = 0; i < Conf->nspell; i++)
450 + free( Conf->Spell[i].word );
451 + free( Conf->Spell );
456 +mkANode(IspellDict *Conf, int low, int high, int level, int type) {
459 + uint8 lastchar='\0';
461 + AffixNodeData *data;
464 + for(i=low; i<high; i++)
465 + if ( Conf->Affix[i].replen>level && lastchar!=GETCHAR( Conf->Affix + i, level, type ) ) {
467 + lastchar=GETCHAR( Conf->Affix + i, level, type );
470 - for (i = 0; i < 256; i++)
471 - Conf->SpellTree.Left[i] = -1;
475 - for (i = 0; i < Conf->nspell; i++)
477 - Let = (int) (*(Conf->Spell[i].word)) & 255;
480 - Conf->SpellTree.Left[Let] = i;
482 + rs=(AffixNode*)malloc(ANHRDSZ+nchar*sizeof(AffixNodeData));
484 + memset(rs,0,ANHRDSZ+nchar*sizeof(AffixNodeData));
485 + rs->length = nchar;
489 + for(i=low; i<high; i++)
490 + if ( Conf->Affix[i].replen>level ) {
491 + if ( lastchar!=GETCHAR( Conf->Affix + i, level, type ) ) {
493 + data->node = mkANode(Conf, lownew, i, level+1, type);
497 + lastchar=GETCHAR( Conf->Affix + i, level, type );
499 + data->val=GETCHAR( Conf->Affix + i, level, type );
500 + if ( Conf->Affix[i].replen == level+1 ) { /* affix stopped */
502 + data->aff=(AFFIX**)malloc(sizeof(AFFIX*)*(high-i+1));
504 + data->aff[ data->naff ] = Conf->Affix + i;
508 - Conf->SpellTree.Right[Let] = i;
511 + data->node = mkANode(Conf, lownew, high, level+1, type);
517 -SortAffixes(IspellDict * Conf)
518 +NISortAffixes(IspellDict * Conf)
526 + int firstsuffix=-1;
528 if (Conf->naffixes > 1)
529 qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
530 - for (i = 0; i < 256; i++)
532 - Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
533 - Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
536 - for (i = 0; i < Conf->naffixes; i++)
538 + Conf->CompoundAffix = ptr = (CMPDAffix*)malloc( sizeof(CMPDAffix) * Conf->naffixes );
539 + MEMOUT(Conf->CompoundAffix);
542 + for (i = 0; i < Conf->naffixes; i++) {
543 Affix = &(((AFFIX *) Conf->Affix)[i]);
544 - if (Affix->type == 'p')
546 - Let = (int) (*(Affix->repl)) & 255;
547 - if (CurLetP != Let)
549 - Conf->PrefixTree.Left[Let] = i;
551 + if ( Affix->type == 's' ) {
552 + if ( firstsuffix<0 ) firstsuffix=i;
553 + if ( Affix->flagflags & FF_COMPOUNDONLYAFX ) {
554 + if ( !ptr->affix || strbncmp((ptr-1)->affix, Affix->repl, (ptr-1)->len) ) {
555 + /* leave only unique and minimals suffixes */
556 + ptr->affix=Affix->repl;
557 + ptr->len=Affix->replen;
561 - Conf->PrefixTree.Right[Let] = i;
565 - Let = (Affix->replen) ? (int) (Affix->repl[Affix->replen - 1]) & 255 : 0;
566 - if (CurLetS != Let)
568 - Conf->SuffixTree.Left[Let] = i;
572 + Conf->CompoundAffix = (CMPDAffix*)realloc( Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr-Conf->CompoundAffix+1) );
574 + Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, 'p');
575 + Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, 's');
578 +static AffixNodeData*
579 +FinfAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) {
580 + AffixNodeData *StopLow, *StopHigh, *StopMiddle;
583 + while( node && *level<wrdlen) {
584 + StopLow = node->data;
585 + StopHigh = node->data+node->length;
586 + while (StopLow < StopHigh) {
587 + StopMiddle = StopLow + (StopHigh - StopLow) / 2;
588 + symbol = GETWCHAR(word,wrdlen,*level,type);
589 + if ( StopMiddle->val == symbol ) {
590 + if ( StopMiddle->naff )
592 + node=StopMiddle->node;
595 + } else if ( StopMiddle->val < symbol ) {
596 + StopLow = StopMiddle + 1;
598 + StopHigh = StopMiddle;
600 - Conf->SuffixTree.Right[Let] = i;
602 + if ( StopLow >= StopHigh )
609 -CheckSuffix(const char *word, size_t len, AFFIX * Affix, int *res, IspellDict * Conf)
611 +CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *newword) {
612 regmatch_t subs[2]; /* workaround for apache&linux */
613 - char newword[2 * MAXNORMLEN] = "";
616 - *res = strbncmp(word, Affix->repl, Affix->replen);
621 - strcpy(newword, word);
622 - strcpy(newword + len - Affix->replen, Affix->find);
623 + if ( flagflags & FF_COMPOUNDONLYAFX ) {
624 + if ( (Affix->flagflags & FF_COMPOUNDONLYAFX) == 0 )
627 + if ( Affix->flagflags & FF_COMPOUNDONLYAFX )
631 + if ( Affix->type=='s' ) {
632 + strcpy(newword, word);
633 + strcpy(newword + len - Affix->replen, Affix->find);
635 + strcpy(newword, Affix->find);
636 + strcat(newword, word + Affix->replen);
641 @@ -452,205 +646,364 @@
645 - if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))
647 - if (FindWord(Conf, newword, Affix->flag))
648 - return pstrdup(newword);
650 + if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))
656 -#define MAX_NORM 512
658 -CheckPrefix(const char *word, size_t len, AFFIX * Affix, IspellDict * Conf, int pi,
659 - char **forms, char ***cur)
661 - regmatch_t subs[NS * 2];
664 +NormalizeSubWord(IspellDict * Conf, char *word, char flag) {
665 + AffixNodeData *suffix=NULL, *prefix=NULL;
666 + int slevel=0, plevel=0;
667 + int wrdlen = strlen(word), swrdlen;
670 char newword[2 * MAXNORMLEN] = "";
676 - AFFIX *CAffix = Conf->Affix;
678 - res = strncmp(word, Affix->repl, Affix->replen);
681 - strcpy(newword, Affix->find);
682 - strcat(newword, word + Affix->replen);
683 + char pnewword[2 * MAXNORMLEN] = "";
684 + AffixNode *snode = Conf->Suffix, *pnode;
687 + if (wrdlen > MAXNORMLEN) return NULL;
689 + cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
692 - if (Affix->compile)
694 - err = regcomp(&(Affix->reg), Affix->mask, REG_EXTENDED | REG_ICASE | REG_NOSUB);
697 - /* regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE); */
698 - regfree(&(Affix->reg));
701 - Affix->compile = 0;
703 + /* Check that the word itself is normal form */
704 + if (FindWord(Conf, word, 0, flag & FF_COMPOUNDWORD)) {
705 + *cur = pstrdup(word);
709 - if (!(err = regexec(&(Affix->reg), newword, 1, subs, 0)))
713 - if ((curspell = FindWord(Conf, newword, Affix->flag)))
715 - if ((*cur - forms) < (MAX_NORM - 1))
717 - **cur = pstrdup(newword);
720 + /* Find all other NORMAL forms of the 'word' (check only prefix)*/
721 + pnode=Conf->Prefix;
724 + prefix=FinfAffixes(pnode, word, wrdlen, &plevel,'p');
725 + if (!prefix) break;
726 + for(j=0;j<prefix->naff;j++) {
727 + if ( CheckAffix(word,wrdlen,prefix->aff[j], flag, newword) ) {
728 + /* prefix success */
729 + if ( FindWord(Conf, newword, prefix->aff[j]->flag, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
730 + /* word search success */
731 + *cur = pstrdup(newword);
737 - newlen = strlen(newword);
738 - ls = Conf->SuffixTree.Left[pi];
739 - if (ls >= 0 && ((*cur - forms) < (MAX_NORM - 1)))
741 - **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
746 + pnode = prefix->node;
750 + /* Find all other NORMAL forms of the 'word' (check suffix and then prefix)*/
752 + /* find possible suffix */
753 + suffix = FinfAffixes(snode, word, wrdlen, &slevel, 's');
754 + if (!suffix) break;
755 + /* foreach suffix check affix */
756 + for(i=0;i<suffix->naff;i++) {
757 + if ( CheckAffix(word, wrdlen, suffix->aff[i], flag, newword) ) {
758 + /* suffix success */
759 + if ( FindWord(Conf, newword, suffix->aff[i]->flag, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
760 + /* word search success */
761 + *cur = pstrdup(newword);
765 + /* now we will look changed word with prefixes */
766 + pnode=Conf->Prefix;
768 + swrdlen=strlen(newword);
770 + prefix=FinfAffixes(pnode, newword, swrdlen, &plevel,'p');
771 + if (!prefix) break;
772 + for(j=0;j<prefix->naff;j++) {
773 + if ( CheckAffix(newword,swrdlen,prefix->aff[j], flag, pnewword) ) {
774 + /* prefix success */
775 + int ff=( prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT ) ?
776 + 0 : prefix->aff[j]->flag;
777 + if ( FindWord(Conf, pnewword, ff, flag&FF_COMPOUNDWORD) && (cur - forms) < (MAX_NORM-1) ) {
778 + /* word search success */
779 + *cur = pstrdup(pnewword);
785 + pnode = prefix->node;
794 + snode=suffix->node;
799 -NormalizeWord(IspellDict * Conf, char *word)
801 -/*regmatch_t subs[NS];*/
819 - len = strlen(word);
820 - if (len > MAXNORMLEN)
821 + if (cur == forms) {
829 +typedef struct SplitVar {
832 + struct SplitVar *next;
836 +CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len) {
837 + while( (*ptr)->affix ) {
838 + if ( len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len)==0 ) {
848 - forms = (char **) palloc(MAX_NORM * sizeof(char **));
852 +CopyVar(SplitVar *s, int makedup) {
853 + SplitVar *v = (SplitVar*)palloc(sizeof(SplitVar));
855 + v->stem=(char**)palloc( sizeof(char*) * (MAX_NORM) );
859 + v->nstem = s->nstem;
860 + for(i=0;i<s->nstem;i++)
861 + v->stem[i] = (makedup) ? pstrdup( s->stem[i] ) : s->stem[i];
868 - ri = (int) (*word) & 255;
869 - pi = (int) (word[strlen(word) - 1]) & 255;
870 - Affix = (AFFIX *) Conf->Affix;
872 - /* Check that the word itself is normal form */
873 - if ((spell = FindWord(Conf, word, 0)))
875 - *cur = pstrdup(word);
880 +SplitToVariants( IspellDict * Conf, SPNode *snode, SplitVar * orig, char *word, int wordlen, int startpos, int minpos ) {
881 + SplitVar *var=NULL;
882 + SPNodeData *StopLow, *StopHigh, *StopMiddle;
883 + SPNode *node = (snode) ? snode : Conf->Dictionary;
884 + int level=(snode) ? minpos : startpos; /* recursive minpos==level*/
887 + char notprobed[wordlen];
889 + memset(notprobed,1,wordlen);
890 + var = CopyVar(orig,1);
892 + while( node && level<wordlen) {
893 + StopLow = node->data;
894 + StopHigh = node->data+node->length;
895 + while (StopLow < StopHigh) {
896 + StopMiddle = StopLow + (StopHigh - StopLow) / 2;
897 + if ( StopMiddle->val == ((uint8*)(word))[level] ) {
899 + } else if ( StopMiddle->val < ((uint8*)(word))[level] ) {
900 + StopLow = StopMiddle + 1;
902 + StopHigh = StopMiddle;
905 + if ( StopLow >= StopHigh )
908 - /* Find all other NORMAL forms of the 'word' */
909 + /* find word with epenthetic */
910 + caff = Conf->CompoundAffix;
911 + while ( level>startpos && (lenaff=CheckCompoundAffixes( &caff, word + level, wordlen - level ))>0 ) {
912 + /* there is one of compound suffixes, so check word for existings */
913 + char buf[MAXNORMLEN];
916 + lenaff=level-startpos+lenaff;
918 + if ( !notprobed[startpos+lenaff-1] )
921 + if ( level+lenaff-1 <= minpos )
924 - for (ipi = 0; ipi <= pi; ipi += pi)
926 + memcpy(buf, word+startpos, lenaff);
930 - lp = Conf->PrefixTree.Left[ri];
931 - rp = Conf->PrefixTree.Right[ri];
932 - while (lp >= 0 && lp <= rp)
934 - cp = (lp + rp) >> 1;
936 - if ((cur - forms) < (MAX_NORM - 1))
937 - cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
938 - if ((lp < cp) && ((cur - forms) < (MAX_NORM - 1)))
939 - lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
940 - if ((rp > cp) && ((cur - forms) < (MAX_NORM - 1)))
941 - rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
956 + subres = NormalizeSubWord(Conf, buf, FF_COMPOUNDWORD | FF_COMPOUNDONLYAFX);
958 + /* Yes, it was a word from dictionary */
959 + SplitVar *new=CopyVar(var,0);
961 + char **sptr=subres;
963 + notprobed[startpos+lenaff-1]=0;
966 + new->stem[ new->nstem ] = *sptr;
974 + ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos+lenaff, startpos+lenaff);
982 - ls = Conf->SuffixTree.Left[ipi];
983 - rs = Conf->SuffixTree.Right[ipi];
984 - while (ls >= 0 && ls <= rs)
986 - if (((cur - forms) < (MAX_NORM - 1)))
988 - *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
993 + /* find infinitive */
994 + if ( StopMiddle->isword && StopMiddle->compoundallow && notprobed[level] ) {
995 + /* ok, we found full compoundallowed word*/
996 + if ( level>minpos ) {
997 + /* and its length more than minimal */
998 + if ( wordlen==level+1 ) {
999 + /* well, it was last word */
1000 + var->stem[ var->nstem ] = strnduplicate(word + startpos, wordlen - startpos);
1004 + /* then we will search more big word at the same point */
1005 + SplitVar *ptr=var;
1006 + while( ptr->next )
1008 + ptr->next=SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
1009 + /* we can find next word */
1011 + var->stem[ var->nstem ] = strnduplicate(word + startpos, level - startpos);
1013 + node = Conf->Dictionary;
1018 - if ((rs > ls) && ((cur - forms) < (MAX_NORM - 1)))
1020 - *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
1027 + node=StopMiddle->node;
1030 + var->stem[ var->nstem ] = strnduplicate(word + startpos, wordlen - startpos);
1036 +NINormalizeWord(IspellDict * Conf, char *word) {
1037 + char **res= NormalizeSubWord(Conf, word, 0);
1039 + if ( Conf->compoundcontrol != '\t' ) {
1040 + int wordlen=strlen(word);
1041 + SplitVar *ptr, *var = SplitToVariants(Conf,NULL,NULL, word, wordlen, 0, -1);
1046 + if ( var->nstem > 1 ) {
1047 + char **subres = NormalizeSubWord(Conf, var->stem[ var->nstem-1 ], FF_COMPOUNDWORD);
1049 + char **ptr=subres;
1055 + res=cur=(char **) palloc(MAX_NORM * sizeof(char *));
1058 + for(i=0;i<var->nstem-1;i++) {
1059 + *cur=var->stem[ i ];
1068 + var->stem[ 0 ] = NULL;
1075 + for(i=0;i<var->nstem && var->stem[ i ];i++)
1076 + pfree( var->stem[i] );
1092 +static void freeSPNode(SPNode *node) {
1095 + if (!node) return;
1097 + while( node->length ) {
1098 + freeSPNode(data->node);
1106 +static void freeANode(AffixNode *node) {
1107 + AffixNodeData *data;
1109 + if (!node) return;
1111 + while( node->length ) {
1112 + freeANode(data->node);
1123 -FreeIspell(IspellDict * Conf)
1124 +NIFree(IspellDict * Conf)
1127 AFFIX *Affix = (AFFIX *) Conf->Affix;
1128 + char** aff = Conf->AffixData;
1135 + free(Conf->AffixData);
1139 for (i = 0; i < Conf->naffixes; i++)
1141 if (Affix[i].compile == 0)
1142 regfree(&(Affix[i].reg));
1144 - for (i = 0; i < Conf->naffixes; i++)
1145 - free(Conf->Spell[i].word);
1146 - free(Conf->Affix);
1147 - free(Conf->Spell);
1148 + if (Conf->Spell) {
1149 + for (i = 0; i < Conf->nspell; i++)
1150 + free(Conf->Spell[i].word);
1151 + free(Conf->Spell);
1154 + if (Conf->Affix) free(Conf->Affix);
1155 + if ( Conf->CompoundAffix ) free(Conf->CompoundAffix);
1156 + freeSPNode(Conf->Dictionary);
1157 + freeANode(Conf->Suffix);
1158 + freeANode(Conf->Prefix);
1159 memset((void *) Conf, 0, sizeof(IspellDict));
1162 diff -uNr postgresql-7.4/contrib/tsearch2/ispell/spell.h postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.h
1163 --- postgresql-7.4/contrib/tsearch2/ispell/spell.h 2003-08-04 02:43:11.000000000 +0200
1164 +++ postgresql-7.4.fixed/contrib/tsearch2/ispell/spell.h 2003-12-18 17:46:03.000000000 +0100
1167 #include <sys/types.h>
1180 + struct SPNode *node;
1183 +typedef struct SPNode {
1185 + SPNodeData data[1];
1188 +#define SPNHRDSZ (sizeof(uint32))
1191 typedef struct spell_struct
1204 typedef struct aff_struct
1215 +#define FF_CROSSPRODUCT 0x01
1216 +#define FF_COMPOUNDWORD 0x02
1217 +#define FF_COMPOUNDONLYAFX 0x04
1226 + struct AffixNode *node;
1229 +typedef struct AffixNode {
1231 + AffixNodeData data[1];
1234 +#define ANHRDSZ (sizeof(uint32))
1236 typedef struct Tree_struct
1252 + char compoundcontrol;
1257 - Tree_struct SpellTree;
1258 - Tree_struct PrefixTree;
1259 - Tree_struct SuffixTree;
1261 + AffixNode *Suffix;
1262 + AffixNode *Prefix;
1264 + SPNode *Dictionary;
1266 + CMPDAffix *CompoundAffix;
1270 -char **NormalizeWord(IspellDict * Conf, char *word);
1271 -int ImportAffixes(IspellDict * Conf, const char *filename);
1272 -int ImportDictionary(IspellDict * Conf, const char *filename);
1274 -int AddSpell(IspellDict * Conf, const char *word, const char *flag);
1275 -int AddAffix(IspellDict * Conf, int flag, const char *mask, const char *find, const char *repl, int type);
1276 -void SortDictionary(IspellDict * Conf);
1277 -void SortAffixes(IspellDict * Conf);
1278 -void FreeIspell(IspellDict * Conf);
1279 +char **NINormalizeWord(IspellDict * Conf, char *word);
1280 +int NIImportAffixes(IspellDict * Conf, const char *filename);
1281 +int NIImportDictionary(IspellDict * Conf, const char *filename);
1283 +int NIAddSpell(IspellDict * Conf, const char *word, const char *flag);
1284 +int NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type);
1285 +void NISortDictionary(IspellDict * Conf);
1286 +void NISortAffixes(IspellDict * Conf);
1287 +void NIFree(IspellDict * Conf);
1290 diff -uNr postgresql-7.4/contrib/tsearch2/my2ispell/Makefile postgresql-7.4.fixed/contrib/tsearch2/my2ispell/Makefile
1291 --- postgresql-7.4/contrib/tsearch2/my2ispell/Makefile 1970-01-01 01:00:00.000000000 +0100
1292 +++ postgresql-7.4.fixed/contrib/tsearch2/my2ispell/Makefile 2003-12-18 17:46:03.000000000 +0100
1301 +all: $(LANGUAGE).dict $(LANGUAGE).aff
1303 +$(ZIPFILE).aff: $(ZIPFILE).zip
1308 +# 1 Cleanup dictionary
1309 +# 2 remove " symbol
1310 +# 3 add compoundwords controlled flag to word which hasn't it, but
1311 +# has compound only suffixes
1313 +$(LANGUAGE).dict: $(ZIPFILE).zip
1314 + $(UNZIP) $? $(ZIPFILE).dic
1315 + grep -v -E '^[[:digit:]]+$$' < $(ZIPFILE).dic \
1317 + | sed -e 's/"//g' \
1318 + | perl -pi -e 's|/(\S+)| $$q=$$1; ( $$q=~/[\\_`]/ && $$q!~/z/ ) ? "/$${q}z" : "/$${q}"|e' \
1322 +#just convert affix file
1324 +$(LANGUAGE).aff: $(ZIPFILE).aff
1325 + grep -v -i zyzyzy $(ZIPFILE).aff \
1326 + | grep -v -i zyzyzy \
1328 + -e 's/^COMPOUNDFLAG\s+(\S+)/compoundwords controlled $$1/;' \
1329 + -e 's/^COMPOUNDMIN\s+(\d+)/compoundmin $$1/;' \
1330 + -e 's/^PFX\s+(\S+)\s+Y\s+\d+.*$$/ if ( !$$wasprf ) { $$wasprf=1; "prefixes\n\nflag $$1:" } else { "flag $$1:" } /e;' \
1331 + -e 's/^PFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > $$2")/e;' \
1332 + -e 's/^(.*)SFX\s+(\S+)\s+([YN])\s+\d+.*$$/ $$flg=($$3 eq "Y") ? "*" : ""; $$flg="~$$flg" if length $$1; $$q=$$2; $$q="\\$$q" if $$q!~m#[a-zA-Z]#; if ( !$$wassfx ) { $$wassfx=1; "suffixes\n\nflag $$flg$$q:" } else { "flag $$flg$$q:" } /e;' \
1333 + -e 's/^.*SFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > ".( ($$1 eq "0") ? "" : "-$$1,").( ($$2 eq "0") ? "" : "$$2") )/e;' \
1334 + -e 's/^(SET|TRY)/#$$1/' \
1338 + rm -rf $(ZIPFILE).aff $(ZIPFILE).dic $(LANGUAGE).dict $(LANGUAGE).aff
1341 diff -uNr postgresql-7.4/contrib/tsearch2/my2ispell/README postgresql-7.4.fixed/contrib/tsearch2/my2ispell/README
1342 --- postgresql-7.4/contrib/tsearch2/my2ispell/README 1970-01-01 01:00:00.000000000 +0100
1343 +++ postgresql-7.4.fixed/contrib/tsearch2/my2ispell/README 2003-12-18 17:46:03.000000000 +0100
1345 +Utility for convert MySpell dictionary and affix from
1346 +myspell to ispell format.
1347 +Utility tested on nb_NO.zip and nn_NO.zip from
1348 +OpenOffice (http://lingucomponent.openoffice.org/download_dictionary.html)
1351 +For example, make norwegian dictionary and affix:
1352 +% cp nb_NO.zip my2ispell
1354 +% gmake ZIPFILE=nb_NO LANGUAGE=norsk
1356 +Author: Teodor Sigaev <teodor@sigaev.ru>