]>
Commit | Line | Data |
---|---|---|
08e2ccbb AM |
1 | --- mysql-4.0.30/sql/gen_lex_hash.cc.org 2015-06-25 08:55:26.955242963 +0200 |
2 | +++ mysql-4.0.30/sql/gen_lex_hash.cc 2015-06-25 08:55:40.382241154 +0200 | |
3 | @@ -14,11 +14,68 @@ | |
4 | along with this program; if not, write to the Free Software | |
5 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ | |
6 | ||
7 | +/* | |
8 | + | |
9 | +The idea of presented algorithm see in | |
10 | +"The Art of Computer Programming" by Donald E. Knuth | |
11 | +Volume 3 "Sorting and searching" | |
12 | +(chapter 6.3 "Digital searching" - name and number of chapter | |
13 | + is back translation from Russian edition :)) | |
14 | + | |
15 | +as illustration of data structures, imagine next table: | |
16 | + | |
17 | +static SYMBOL symbols[] = { | |
18 | + { "ADD", SYM(ADD),0,0}, | |
19 | + { "AND", SYM(AND),0,0}, | |
20 | + { "DAY", SYM(DAY_SYM),0,0}, | |
21 | +}; | |
22 | + | |
23 | +for this structure, presented program generate next searching-structure: | |
24 | + | |
25 | ++-----------+-+-+-+ | |
26 | +| len |1|2|3| | |
27 | ++-----------+-+-+-+ | |
28 | +|first_char |0|0|a| | |
29 | +|last_char |0|0|d| | |
30 | +|link |0|0|+| | |
31 | + | | |
32 | + V | |
33 | + +----------+-+-+-+--+ | |
34 | + | 1 char|a|b|c|d | | |
35 | + +----------+-+-+-+--+ | |
36 | + |first_char|b|0|0|0 | | |
37 | + |last_char |n|0|0|-1| | |
38 | + |link |+|0|0|+ | | |
39 | + | | | |
40 | + | V | |
41 | + | symbols[2] ( "DAY" ) | |
42 | + V | |
43 | ++----------+--+-+-+-+-+-+-+-+-+-+--+ | |
44 | +| 2 char|d |e|f|j|h|i|j|k|l|m|n | | |
45 | ++----------+--+-+-+-+-+-+-+-+-+-+--+ | |
46 | +|first_char|0 |0|0|0|0|0|0|0|0|0|0 | | |
47 | +|last_char |-1|0|0|0|0|0|0|0|0|0|-1| | |
48 | +|link |+ |0|0|0|0|0|0|0|0|0|+ | | |
49 | + | | | |
50 | + V V | |
51 | + symbols[0] ( "ADD" ) symbols[1] ( "AND" ) | |
52 | + | |
53 | +for optimization, link is the 16-bit index in 'symbols' or 'sql_functions' | |
54 | +or search-array.. | |
55 | + | |
56 | +So, we can read full search-structure as 32-bit word | |
57 | + | |
58 | +TODO: | |
59 | +1. use instead to_upper_lex, special array | |
60 | + (substitute chars) without skip codes.. | |
61 | +2. try use reverse order of comparing.. | |
62 | + | |
63 | +*/ | |
64 | ||
65 | #define NO_YACC_SYMBOLS | |
66 | -#include <my_global.h> | |
67 | -#include <my_sys.h> | |
68 | -#include <m_string.h> | |
69 | +#include "my_global.h" | |
70 | +#include "my_sys.h" | |
71 | +#include "m_string.h" | |
72 | #ifndef __GNU_LIBRARY__ | |
73 | #define __GNU_LIBRARY__ // Skip warnings in getopt.h | |
74 | #endif | |
75 | @@ -26,324 +83,228 @@ | |
76 | #include "mysql_version.h" | |
77 | #include "lex.h" | |
78 | ||
79 | -my_bool opt_search; | |
80 | -int opt_verbose; | |
81 | -ulong opt_count; | |
82 | - | |
83 | -#define max_allowed_array 8000 // Don't generate bigger arrays than this | |
84 | -#define max_symbol 32767 // Use this for 'not found' | |
85 | -#define how_much_for_plus 8 // 2-8 | |
86 | -#define type_count 1 // 1-5 | |
87 | -#define char_table_count 5 | |
88 | -#define total_symbols (sizeof(symbols)/sizeof(SYMBOL) +\ | |
89 | - sizeof(sql_functions)/sizeof(SYMBOL)) | |
90 | - | |
91 | -#define how_much_and INT_MAX24 | |
92 | - | |
93 | -/* | |
94 | - The following only have to work with characters in the set | |
95 | - used by SQL commands | |
96 | -*/ | |
97 | - | |
98 | -#undef tolower | |
99 | -#define tolower(a) ((a) >= 'A' && (a) <= 'Z') ? ((a)- 'A' + 'a') : (a) | |
100 | - | |
101 | -static uint how_long_symbols,function_plus,function_mod,function_type; | |
102 | -static uint char_table[256]; | |
103 | -static uchar unique_length[256]; | |
104 | -static uchar bits[how_much_and/8+1]; | |
105 | -static uint primes[max_allowed_array+1]; | |
106 | -static ulong hash_results[type_count][how_much_for_plus+1][total_symbols]; | |
107 | -static ulong start_value=0; | |
108 | -static uint best_type; | |
109 | -static ulong best_t1,best_t2, best_start_value; | |
110 | - | |
111 | static struct my_option my_long_options[] = | |
112 | { | |
113 | {"help", '?', "Display help and exit", | |
114 | 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, | |
115 | - {"count", 'c', "Try count times to find a optimal hash table", | |
116 | - (gptr*) &opt_count, (gptr*) &opt_count, 0, GET_ULONG, REQUIRED_ARG, | |
117 | - 100000, 0, 0, 0, 0, 0}, | |
118 | - {"search", 'S', "Search after good rnd1 and rnd2 values", | |
119 | - (gptr*) &opt_search, (gptr*) &opt_search, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, | |
120 | - 0, 0}, | |
121 | - {"verbose", 'v', "Write some information while the program executes", | |
122 | - (gptr*) &opt_verbose, (gptr*) &opt_verbose, 0, GET_INT, NO_ARG, 0, 0, 0, | |
123 | - 0, 0, 0}, | |
124 | {"version", 'V', "Output version information and exit", | |
125 | 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, | |
126 | - { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} | |
127 | }; | |
128 | ||
129 | -struct rand_struct { | |
130 | - unsigned long seed1,seed2,max_value; | |
131 | - double max_value_dbl; | |
132 | +struct hash_lex_struct | |
133 | +{ | |
134 | + char first_char; | |
135 | + char last_char; | |
136 | + union{ | |
137 | + hash_lex_struct *char_tails; | |
138 | + int iresult; | |
139 | + }; | |
140 | + int ithis; | |
141 | }; | |
142 | - | |
143 | -void randominit(struct rand_struct *rand_st,ulong seed1, ulong seed2) | |
144 | -{ /* For mysql 3.21.# */ | |
145 | - rand_st->max_value= 0x3FFFFFFFL; | |
146 | - rand_st->max_value_dbl=(double) rand_st->max_value; | |
147 | - rand_st->seed1=seed1%rand_st->max_value ; | |
148 | - rand_st->seed2=seed2%rand_st->max_value; | |
149 | -} | |
150 | - | |
151 | -double rnd(struct rand_struct *rand_st) | |
152 | + | |
153 | +hash_lex_struct *get_hash_struct_by_len(hash_lex_struct **root_by_len, | |
154 | + int len, int *max_len) | |
155 | { | |
156 | - rand_st->seed1=(rand_st->seed1*3+rand_st->seed2) % rand_st->max_value; | |
157 | - rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value; | |
158 | - return (((double) rand_st->seed1)/rand_st->max_value_dbl); | |
159 | + if (*max_len<len){ | |
160 | + *root_by_len= (hash_lex_struct *)realloc((char*)*root_by_len, | |
161 | + sizeof(hash_lex_struct)*len); | |
162 | + hash_lex_struct *cur, *end= *root_by_len + len; | |
163 | + for (cur= *root_by_len + *max_len; cur<end; cur++) | |
164 | + cur->first_char= 0; | |
165 | + *max_len= len; | |
166 | + } | |
167 | + return (*root_by_len)+(len-1); | |
168 | } | |
169 | ||
170 | - | |
171 | -static void make_char_table(ulong t1,ulong t2,int type) | |
172 | +void insert_into_hash(hash_lex_struct *root, const char *name, | |
173 | + int len_from_begin, int index, int function) | |
174 | { | |
175 | - uint i; | |
176 | - struct rand_struct rand_st; | |
177 | - randominit(&rand_st,t1,t2); | |
178 | + hash_lex_struct *end, *cur, *tails; | |
179 | ||
180 | - for (i=0 ; i < 256 ; i++) | |
181 | - { | |
182 | - switch (type) { | |
183 | - case 0: char_table[i]= i + (i << 8); break; | |
184 | - case 1: char_table[i]= i + ((i ^255 ) << 8); break; | |
185 | - case 2: char_table[i]= i; break; | |
186 | - case 3: char_table[i]= i + ((uint) (rnd(&rand_st)*255) << 8); break; | |
187 | - case 4: char_table[i]= (uint) (rnd(&rand_st)*255) + (i << 8); break; | |
188 | - } | |
189 | - } | |
190 | - char_table[0]|=1+257; // Avoid problems with 0 | |
191 | - for (i=0 ; i < 256 ; i++) | |
192 | - { | |
193 | - uint tmp=(uint) (rnd(&rand_st)*255); | |
194 | - swap(uint,char_table[i],char_table[tmp]); | |
195 | - } | |
196 | - /* lower characters should be mapped to upper */ | |
197 | - for (i= 'a' ; i <= 'z' ; i++) | |
198 | - { | |
199 | - /* This loop is coded with extra variables to avoid a bug in gcc 2.96 */ | |
200 | - uchar tmp= (uchar) (i - 'a'); // Assume ascii | |
201 | - tmp+='A'; | |
202 | - char_table[i]=char_table[tmp]; | |
203 | + if (!root->first_char){ | |
204 | + root->first_char= -1; | |
205 | + root->iresult= index; | |
206 | + return; | |
207 | } | |
208 | -} | |
209 | ||
210 | -/* Fill array primes with primes between start and 'max_allowed_array' */ | |
211 | + if (root->first_char==-1){ | |
212 | + int index2= root->iresult; | |
213 | + const char *name2= | |
214 | + (index2<0 ? sql_functions[-index2-1] : symbols[index2]).name + len_from_begin; | |
215 | + root->first_char= name2[0]; | |
216 | + root->last_char= root->first_char; | |
217 | + tails= (hash_lex_struct*)malloc(sizeof(hash_lex_struct)); | |
218 | + root->char_tails= tails; | |
219 | + tails->first_char= -1; | |
220 | + tails->iresult= index2; | |
221 | + } | |
222 | ||
223 | -static void make_prime_array(uint start) | |
224 | -{ | |
225 | - uint i,j,*to; | |
226 | - uint max_index=(uint) sqrt((double) max_allowed_array); | |
227 | + size_t real_size= (root->last_char-root->first_char+1); | |
228 | ||
229 | - bzero((char*) primes,sizeof(primes[0])*max_allowed_array); | |
230 | + if (root->first_char>(*name)){ | |
231 | + size_t new_size= root->last_char-(*name)+1; | |
232 | + if (new_size<real_size) printf("error!!!!\n"); | |
233 | + tails= root->char_tails; | |
234 | + tails= (hash_lex_struct*)realloc((char*)tails, | |
235 | + sizeof(hash_lex_struct)*new_size); | |
236 | + root->char_tails= tails; | |
237 | + memmove(tails+(new_size-real_size),tails,real_size*sizeof(hash_lex_struct)); | |
238 | + end= tails + new_size - real_size; | |
239 | + for (cur= tails; cur<end; cur++) | |
240 | + cur->first_char= 0; | |
241 | + root->first_char= (*name); | |
242 | + } | |
243 | ||
244 | - i=2; | |
245 | - while (i < max_index) | |
246 | - { | |
247 | - for (j=i+i ; j <= max_allowed_array ; j+=i) | |
248 | - primes[j]=1; | |
249 | - while (primes[++i]) ; | |
250 | + if (root->last_char<(*name)){ | |
251 | + size_t new_size= (*name)-root->first_char+1; | |
252 | + if (new_size<real_size) printf("error!!!!\n"); | |
253 | + tails= root->char_tails; | |
254 | + tails= (hash_lex_struct*)realloc((char*)tails, | |
255 | + sizeof(hash_lex_struct)*new_size); | |
256 | + root->char_tails= tails; | |
257 | + end= tails + new_size; | |
258 | + for (cur= tails+real_size; cur<end; cur++) | |
259 | + cur->first_char= 0; | |
260 | + root->last_char= (*name); | |
261 | } | |
262 | ||
263 | - to=primes; | |
264 | - for (i=start ; i <= max_allowed_array ; i++) | |
265 | - if (!primes[i]) | |
266 | - *to++=i; | |
267 | - *to=0; // end marker | |
268 | + insert_into_hash (root->char_tails+(*name)-root->first_char, | |
269 | + name+1,len_from_begin+1,index,function); | |
270 | } | |
271 | ||
272 | -#define USE_char_table | |
273 | +hash_lex_struct *root_by_len= 0; | |
274 | +int max_len=0; | |
275 | ||
276 | -static ulong tab_index_function(const char *s,uint add, uint type) | |
277 | +hash_lex_struct *root_by_len2= 0; | |
278 | +int max_len2=0; | |
279 | + | |
280 | +void insert_symbols() | |
281 | { | |
282 | - register ulong nr=start_value+char_table[(uchar) *s]; // Nice value | |
283 | - ulong pos=3; | |
284 | - uint tmp_length=unique_length[(uchar) *s]-1; | |
285 | - while (*++s && tmp_length-- > 0) | |
286 | - { | |
287 | - switch (type) { | |
288 | - case 0: | |
289 | - nr= (nr ^ (char_table[(uchar) *s] + (nr << add))); | |
290 | - break; | |
291 | - case 1: | |
292 | - nr= (nr + (char_table[(uchar) *s] + (nr << add))); | |
293 | - break; | |
294 | - case 2: | |
295 | - nr= (nr ^ (char_table[(uchar) *s] ^ (nr << add))); | |
296 | - break; | |
297 | - case 3: | |
298 | - nr= (char_table[(uchar) *s] ^ (nr << add)); | |
299 | - break; | |
300 | - case 4: | |
301 | - nr+= nr+nr+((nr & 63)+pos)*((ulong) char_table[(uchar) *s]); | |
302 | - pos+=add; | |
303 | - break; | |
304 | - } | |
305 | + size_t i= 0; | |
306 | + SYMBOL *cur; | |
307 | + for (cur= symbols; i<array_elements(symbols); cur++, i++){ | |
308 | + hash_lex_struct *root= | |
309 | + get_hash_struct_by_len(&root_by_len,cur->length,&max_len); | |
310 | + insert_into_hash(root,cur->name,0,i,0); | |
311 | } | |
312 | - return nr & INT_MAX24; | |
313 | } | |
314 | ||
315 | -static int search(bool write_warning) | |
316 | +void insert_sql_functions() | |
317 | { | |
318 | - uint size_symbols = sizeof(symbols)/sizeof(SYMBOL); | |
319 | - uint size_functions = sizeof(sql_functions)/sizeof(SYMBOL); | |
320 | - uint size=size_symbols + size_functions; | |
321 | - uint i=0,found,*prime,type; | |
322 | - int igra[max_allowed_array],test_count=INT_MAX; | |
323 | - uint possible_plus[how_much_for_plus*type_count+type_count]; | |
324 | - | |
325 | - how_long_symbols = sizeof(symbols)/sizeof(SYMBOL); | |
326 | + size_t i= 0; | |
327 | + SYMBOL *cur; | |
328 | + for (cur= sql_functions; i<array_elements(sql_functions); cur++, i++){ | |
329 | + hash_lex_struct *root= | |
330 | + get_hash_struct_by_len(&root_by_len,cur->length,&max_len); | |
331 | + insert_into_hash(root,cur->name,0,-i-1,1); | |
332 | + } | |
333 | +} | |
334 | ||
335 | - bzero((char*) possible_plus,sizeof(possible_plus)); | |
336 | - found=0; | |
337 | +void generate_find_structs() | |
338 | +{ | |
339 | + root_by_len= 0; | |
340 | + max_len=0; | |
341 | + size_t i; | |
342 | ||
343 | - /* Check first which function_plus are possible */ | |
344 | - for (type=0 ; type < type_count ; type ++) | |
345 | - { | |
346 | - for (function_plus = 1; | |
347 | - function_plus <= how_much_for_plus; | |
348 | - function_plus++) | |
349 | - { | |
350 | - bzero((char*) bits,sizeof(bits)); | |
351 | - for (i=0; i < size; i++) | |
352 | - { | |
353 | - ulong order= tab_index_function ((i < how_long_symbols) ? | |
354 | - symbols[i].name : | |
355 | - sql_functions[i-how_long_symbols].name, | |
356 | - function_plus, type); | |
357 | - hash_results[type][function_plus][i]=order; | |
358 | - uint pos=order/8; | |
359 | - uint bit=order & 7; | |
360 | - if (bits[pos] & (1 << bit)) | |
361 | - break; | |
362 | - bits[pos]|=1 << bit; | |
363 | - } | |
364 | - if (i == size) | |
365 | - { | |
366 | - possible_plus[found++]=function_plus; | |
367 | - } | |
368 | - } | |
369 | - possible_plus[found++]=0; // End marker | |
370 | - } | |
371 | - if (found == type_count) | |
372 | - { | |
373 | - if (write_warning) | |
374 | - fprintf(stderr,"\ | |
375 | -The hash function didn't return a unique value for any parameter\n\ | |
376 | -You have to change gen_lex_code.cc, function 'tab_index_function' to\n\ | |
377 | -generate unique values for some parameter. When you have succeeded in this,\n\ | |
378 | -you have to change 'main' to print out the new function\n"); | |
379 | - return(1); | |
380 | - } | |
381 | + SYMBOL *cur, *end= symbols + array_elements(symbols); | |
382 | + for (cur= symbols; cur < end; cur++) | |
383 | + cur->length=(uchar) strlen(cur->name); | |
384 | + end= sql_functions + array_elements(sql_functions); | |
385 | + for (cur= sql_functions; cur<end; cur++) | |
386 | + cur->length=(uchar) strlen(cur->name); | |
387 | + | |
388 | + insert_symbols(); | |
389 | ||
390 | - if (opt_verbose > 1) | |
391 | - fprintf (stderr,"Info: Possible add values: %d\n",found-type_count); | |
392 | + root_by_len2= root_by_len; | |
393 | + max_len2= max_len; | |
394 | ||
395 | - for (prime=primes; (function_mod=*prime) ; prime++) | |
396 | - { | |
397 | - uint *plus_ptr=possible_plus; | |
398 | - for (type=0 ; type < type_count ; type++ ) | |
399 | - { | |
400 | - while ((function_plus= *plus_ptr++)) | |
401 | - { | |
402 | - ulong *order_pos= &hash_results[type][function_plus][0]; | |
403 | - if (test_count++ == INT_MAX) | |
404 | - { | |
405 | - test_count=1; | |
406 | - bzero((char*) igra,sizeof(igra)); | |
407 | - } | |
408 | - for (i=0; i<size ;i++) | |
409 | - { | |
410 | - ulong order; | |
411 | - order = *order_pos++ % function_mod; | |
412 | - if (igra[order] == test_count) | |
413 | - break; | |
414 | - igra[order] = test_count; | |
415 | - } | |
416 | - if (i == size) | |
417 | - { | |
418 | - *prime=0; // Mark this used | |
419 | - function_type=type; | |
420 | - return 0; // Found ok value | |
421 | - } | |
422 | - } | |
423 | - } | |
424 | - } | |
425 | + root_by_len= 0; | |
426 | + max_len= 0; | |
427 | ||
428 | - function_mod=max_allowed_array; | |
429 | - if (write_warning) | |
430 | - fprintf (stderr,"Fatal error when generating hash for symbols\n\ | |
431 | -Didn't find suitable values for perfect hashing:\n\ | |
432 | -You have to edit gen_lex_hash.cc to generate a new hashing function.\n\ | |
433 | -You can try running gen_lex_hash with --search to find a suitable value\n\ | |
434 | -Symbol array size = %d\n",function_mod); | |
435 | - return -1; | |
436 | + insert_symbols(); | |
437 | + insert_sql_functions(); | |
438 | } | |
439 | ||
440 | +char *hash_map= 0; | |
441 | +int size_hash_map= 0; | |
442 | ||
443 | -void print_arrays() | |
444 | +void add_struct_to_map(hash_lex_struct *st) | |
445 | { | |
446 | - uint size_symbols = sizeof(symbols)/sizeof(SYMBOL); | |
447 | - uint size_functions = sizeof(sql_functions)/sizeof(SYMBOL); | |
448 | - uint size=size_symbols + size_functions; | |
449 | - uint i; | |
450 | - | |
451 | - fprintf(stderr,"Symbols: %d Functions: %d; Total: %d\nShifts per char: %d, Array size: %d\n", | |
452 | - size_symbols,size_functions,size_symbols+size_functions, | |
453 | - function_plus,function_mod); | |
454 | - | |
455 | - int *prva= (int*) my_alloca(sizeof(int)*function_mod); | |
456 | - for (i=0 ; i < function_mod; i++) | |
457 | - prva[i]= max_symbol; | |
458 | - | |
459 | - for (i=0;i<size;i++) | |
460 | + st->ithis= size_hash_map/4; | |
461 | + size_hash_map+= 4; | |
462 | + hash_map= (char*)realloc((char*)hash_map,size_hash_map); | |
463 | + hash_map[size_hash_map-4]= st->first_char==-1 ? 0 : st->first_char; | |
464 | + hash_map[size_hash_map-3]= | |
465 | + st->first_char==-1 || st->first_char==0 ? 0 : st->last_char; | |
466 | + if (st->first_char==-1) | |
467 | { | |
468 | - const char *name= ((i < how_long_symbols) ? | |
469 | - symbols[i].name : | |
470 | - sql_functions[i - how_long_symbols].name); | |
471 | - ulong order = tab_index_function(name,function_plus,function_type); | |
472 | - order %= function_mod; | |
473 | - /* This should never be true */ | |
474 | - if (prva[order] != max_symbol) | |
475 | - { | |
476 | - fprintf(stderr,"Error: Got duplicate value for symbol '%s'\n",name); | |
477 | - exit(1); | |
478 | - } | |
479 | - prva [order] = i; | |
480 | + hash_map[size_hash_map-2]= ((unsigned int)(int16)st->iresult)&255; | |
481 | + hash_map[size_hash_map-1]= ((unsigned int)(int16)st->iresult)>>8; | |
482 | } | |
483 | - | |
484 | -#ifdef USE_char_table | |
485 | - printf("static uint16 char_table[] = {\n"); | |
486 | - for (i=0; i < 255 ;i++) // < 255 is correct | |
487 | + else if (st->first_char==0) | |
488 | { | |
489 | - printf("%u,",char_table[i]); | |
490 | - if (((i+1) & 15) == 0) | |
491 | - puts(""); | |
492 | + hash_map[size_hash_map-2]= ((unsigned int)(int16)array_elements(symbols))&255; | |
493 | + hash_map[size_hash_map-1]= ((unsigned int)(int16)array_elements(symbols))>>8; | |
494 | } | |
495 | - printf("%d\n};\n\n\n",char_table[i]); | |
496 | -#endif | |
497 | +}; | |
498 | ||
499 | - printf("static uchar unique_length[] = {\n"); | |
500 | - for (i=0; i < 255 ;i++) // < 255 is correct | |
501 | - { | |
502 | - printf("%u,",unique_length[i]); | |
503 | - if (((i+1) & 15) == 0) | |
504 | - puts(""); | |
505 | - } | |
506 | - printf("%d\n};\n\n\n",unique_length[i]); | |
507 | +void add_structs_to_map(hash_lex_struct *st, int len) | |
508 | +{ | |
509 | + hash_lex_struct *cur, *end= st+len; | |
510 | + for (cur= st; cur<end; cur++) | |
511 | + add_struct_to_map(cur); | |
512 | + for (cur= st; cur<end; cur++) | |
513 | + if (cur->first_char && cur->first_char!=-1) | |
514 | + add_structs_to_map(cur->char_tails,cur->last_char-cur->first_char+1); | |
515 | +} | |
516 | + | |
517 | +void set_links(hash_lex_struct *st, int len) | |
518 | +{ | |
519 | + hash_lex_struct *cur, *end= st+len; | |
520 | + for (cur= st; cur<end; cur++) | |
521 | + if (cur->first_char!=0 && cur->first_char!=-1){ | |
522 | + int ilink= cur->char_tails->ithis; | |
523 | + hash_map[cur->ithis*4+2]= ilink%256; | |
524 | + hash_map[cur->ithis*4+3]= ilink/256; | |
525 | + set_links(cur->char_tails,cur->last_char-cur->first_char+1); | |
526 | + } | |
527 | +} | |
528 | ||
529 | - printf("static uint16 my_function_table[] = {\n"); | |
530 | - for (i=0; i < function_mod-1 ;i++) | |
531 | - { | |
532 | - printf("%d,",prva[i]); | |
533 | - if (((i+1) % 12) == 0) | |
534 | - puts(""); | |
535 | +void print_hash_map(const char *name) | |
536 | +{ | |
537 | + printf("uchar %s[%d]= {\n",name,size_hash_map); | |
538 | + char *cur; | |
539 | + int i; | |
540 | + for (i=0, cur= hash_map; i<size_hash_map; i++, cur++){ | |
541 | + switch(i%4){ | |
542 | + case 0: case 1: | |
543 | + if (!*cur) | |
544 | + printf("0, "); | |
545 | + else | |
546 | + printf("\'%c\', ",*cur); | |
547 | + break; | |
548 | + case 2: printf("%u, ",(uint)(uchar)*cur); break; | |
549 | + case 3: printf("%u,\n",(uint)(uchar)*cur); break; | |
550 | + } | |
551 | } | |
552 | - printf("%d\n};\n\n\n",prva[i]); | |
553 | - my_afree((gptr) prva); | |
554 | + printf("};\n"); | |
555 | } | |
556 | ||
557 | +void print_find_structs() | |
558 | +{ | |
559 | + add_structs_to_map(root_by_len,max_len); | |
560 | + set_links(root_by_len,max_len); | |
561 | + print_hash_map("sql_functions_map"); | |
562 | + | |
563 | + hash_map= 0; | |
564 | + size_hash_map= 0; | |
565 | + | |
566 | + printf("\n"); | |
567 | + | |
568 | + add_structs_to_map(root_by_len2,max_len2); | |
569 | + set_links(root_by_len2,max_len2); | |
570 | + print_hash_map("symbols_map"); | |
571 | +} | |
572 | ||
573 | static void usage(int version) | |
574 | { | |
575 | @@ -351,23 +312,19 @@ static void usage(int version) | |
576 | my_progname, MYSQL_SERVER_VERSION, SYSTEM_TYPE, MACHINE_TYPE); | |
577 | if (version) | |
578 | return; | |
579 | - puts("Copyright (C) 2001 MySQL AB, by Sinisa and Monty"); | |
580 | - puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n"); | |
581 | + puts("Copyright (C) 2001 MySQL AB, by VVA and Monty"); | |
582 | + puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\n\ | |
583 | +and you are welcome to modify and redistribute it under the GPL license\n"); | |
584 | puts("This program generates a perfect hashing function for the sql_lex.cc"); | |
585 | printf("Usage: %s [OPTIONS]\n\n", my_progname); | |
586 | my_print_help(my_long_options); | |
587 | - my_print_variables(my_long_options); | |
588 | } | |
589 | ||
590 | - | |
591 | -extern "C" my_bool | |
592 | +static my_bool | |
593 | get_one_option(int optid, const struct my_option *opt __attribute__((unused)), | |
594 | char *argument __attribute__((unused))) | |
595 | { | |
596 | - switch (optid) { | |
597 | - case 'v': | |
598 | - opt_verbose++; | |
599 | - break; | |
600 | + switch(optid) { | |
601 | case 'V': | |
602 | usage(1); | |
603 | exit(0); | |
604 | @@ -379,149 +336,28 @@ get_one_option(int optid, const struct m | |
605 | return 0; | |
606 | } | |
607 | ||
608 | - | |
609 | static int get_options(int argc, char **argv) | |
610 | { | |
611 | int ho_error; | |
612 | ||
613 | - if ((ho_error= handle_options(&argc, &argv, my_long_options, get_one_option))) | |
614 | + if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) | |
615 | exit(ho_error); | |
616 | ||
617 | if (argc >= 1) | |
618 | { | |
619 | - fprintf(stderr,"%s: Too many arguments\n", my_progname); | |
620 | usage(0); | |
621 | - exit(1); | |
622 | + exit(1); | |
623 | } | |
624 | return(0); | |
625 | } | |
626 | ||
627 | -static uint max_prefix(const char *name) | |
628 | -{ | |
629 | - uint i; | |
630 | - uint max_length=1; | |
631 | - for (i=0 ; i < sizeof(symbols)/sizeof(SYMBOL) ; i++) | |
632 | - { | |
633 | - const char *str=symbols[i].name; | |
634 | - if (str != name) | |
635 | - { | |
636 | - const char *str2=name; | |
637 | - uint length; | |
638 | - while (*str && *str == *str2) | |
639 | - { | |
640 | - str++; | |
641 | - str2++; | |
642 | - } | |
643 | - length=(uint) (str2 - name)+1; | |
644 | - if (length > max_length) | |
645 | - max_length=length; | |
646 | - } | |
647 | - } | |
648 | - for (i=0 ; i < sizeof(sql_functions)/sizeof(SYMBOL) ; i++) | |
649 | - { | |
650 | - const char *str=sql_functions[i].name; | |
651 | - if (str != name) | |
652 | - { | |
653 | - const char *str2=name; | |
654 | - uint length; | |
655 | - while (*str && *str == *str2) | |
656 | - { | |
657 | - str++; | |
658 | - str2++; | |
659 | - } | |
660 | - length=(uint) (str2 - name)+1; | |
661 | - if (length > max_length) | |
662 | - max_length=length; | |
663 | - } | |
664 | - } | |
665 | - return max_length; | |
666 | -} | |
667 | - | |
668 | - | |
669 | -static void make_max_length_table(void) | |
670 | -{ | |
671 | - uint i; | |
672 | - for (i=0 ; i < sizeof(symbols)/sizeof(SYMBOL) ; i++) | |
673 | - { | |
674 | - uint length=max_prefix(symbols[i].name); | |
675 | - if (length > unique_length[(uchar) symbols[i].name[0]]) | |
676 | - { | |
677 | - unique_length[(uchar) symbols[i].name[0]]=length; | |
678 | - unique_length[(uchar) tolower(symbols[i].name[0])]=length; | |
679 | - } | |
680 | - } | |
681 | - for (i=0 ; i < sizeof(sql_functions)/sizeof(SYMBOL) ; i++) | |
682 | - { | |
683 | - uint length=max_prefix(sql_functions[i].name); | |
684 | - if (length > unique_length[(uchar) sql_functions[i].name[0]]) | |
685 | - { | |
686 | - unique_length[(uchar) sql_functions[i].name[0]]=length; | |
687 | - unique_length[(uchar) tolower(sql_functions[i].name[0])]=length; | |
688 | - } | |
689 | - } | |
690 | -} | |
691 | - | |
692 | - | |
693 | int main(int argc,char **argv) | |
694 | { | |
695 | - struct rand_struct rand_st; | |
696 | - static uint best_mod,best_add,best_functype; | |
697 | - int error; | |
698 | - | |
699 | MY_INIT(argv[0]); | |
700 | - start_value=2925024L; best_t1=654916L; best_t2=1723390L; best_type=3; /* mode=4943 add=1 type: 0 */ | |
701 | + | |
702 | if (get_options(argc,(char **) argv)) | |
703 | exit(1); | |
704 | ||
705 | - make_max_length_table(); | |
706 | - make_char_table(best_t1,best_t2,best_type); | |
707 | - make_prime_array(sizeof(symbols)/sizeof(SYMBOL) + | |
708 | - sizeof(sql_functions)/sizeof(SYMBOL)); | |
709 | - | |
710 | - if ((error=search(1)) > 0 || error && !opt_search) | |
711 | - exit(1); // This should work | |
712 | - best_mod=function_mod; best_add=function_plus; best_functype=function_type; | |
713 | - | |
714 | - if (opt_search) | |
715 | - { | |
716 | - time_t start_time=time((time_t*) 0); | |
717 | - randominit(&rand_st,start_time,start_time/2); // Some random values | |
718 | - printf("start_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d type: %d */\n", | |
719 | - start_value, best_t1,best_t2,best_type,best_mod,best_add, | |
720 | - best_functype); | |
721 | - best_start_value=start_value; | |
722 | - for (uint i=1 ; i <= opt_count ; i++) | |
723 | - { | |
724 | - if (i % 10 == 0) | |
725 | - { | |
726 | - putchar('.'); | |
727 | - fflush(stdout); | |
728 | - } | |
729 | - ulong t1=(ulong) (rnd(&rand_st)*INT_MAX24); | |
730 | - ulong t2=(ulong) (rnd(&rand_st)*INT_MAX24); | |
731 | - uint type=(int) (rnd(&rand_st)*char_table_count); | |
732 | - start_value=(ulong) (rnd(&rand_st)*INT_MAX24); | |
733 | - make_char_table(t1,t2,type); | |
734 | - if (!search(0)) | |
735 | - { | |
736 | - best_mod=function_mod; best_add=function_plus; | |
737 | - best_functype=function_type; | |
738 | - best_t1=t1; best_t2=t2; best_type=type; | |
739 | - best_start_value=start_value; | |
740 | - printf("\nstart_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d type: %d */\n", | |
741 | - best_start_value,best_t1,best_t2,best_type,best_mod,best_add, | |
742 | - best_functype); | |
743 | - } | |
744 | - if (opt_verbose && (i % 20000) == 0) | |
745 | - printf("\nstart_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d type: %d */\n", | |
746 | - best_start_value,best_t1,best_t2,best_type,best_mod,best_add, | |
747 | - best_functype); | |
748 | - } | |
749 | - } | |
750 | - | |
751 | - function_mod=best_mod; function_plus=best_add; | |
752 | - make_char_table(best_t1,best_t2,best_type); | |
753 | - | |
754 | printf("/* Copyright (C) 2001 MySQL AB\n\ | |
755 | This program is free software; you can redistribute it and/or modify\n\ | |
756 | it under the terms of the GNU General Public License as published by\n\ | |
757 | @@ -533,38 +369,84 @@ int main(int argc,char **argv) | |
758 | GNU General Public License for more details.\n\n\ | |
759 | You should have received a copy of the GNU General Public License\n\ | |
760 | along with this program; if not, write to the Free Software\n\ | |
761 | - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */\n\n"); | |
762 | + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307\ | |
763 | + USA */\n\n"); | |
764 | ||
765 | -printf("/* This code is generated by gen_lex_hash.cc that seeks for a perfect\nhash function */\n\n"); | |
766 | + printf("/* This code is generated by gen_lex_hash.cc that seeks for\ | |
767 | + a perfect\nhash function */\n\n"); | |
768 | printf("#include \"lex.h\"\n\n"); | |
769 | ||
770 | - print_arrays(); | |
771 | + generate_find_structs(); | |
772 | + print_find_structs(); | |
773 | ||
774 | - printf("/* start_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; */ /* mode=%d add=%d type: %d */\n\n", | |
775 | - best_start_value, best_t1, best_t2, best_type, | |
776 | - best_mod, best_add, best_functype); | |
777 | + printf("\nunsigned int sql_functions_max_len=%d;\n",max_len); | |
778 | + printf("\nunsigned int symbols_max_len=%d;\n\n",max_len2); | |
779 | ||
780 | - printf("inline SYMBOL *get_hash_symbol(const char *s,unsigned int length,bool function)\n\ | |
781 | + printf | |
782 | +( | |
783 | +"inline SYMBOL *get_hash_symbol(const char *s,\n\ | |
784 | + unsigned int len,bool function)\n\ | |
785 | {\n\ | |
786 | - ulong idx = %lu+char_table[(uchar) *s];\n\ | |
787 | - SYMBOL *sim;\n\ | |
788 | - const char *start=s;\n\ | |
789 | - int i=unique_length[(uchar) *s++];\n\ | |
790 | - if (i > (int) length) i=(int) length;\n\ | |
791 | - while (--i > 0)\n\ | |
792 | - idx= (idx ^ (char_table[(uchar) *s++] + (idx << %d)));\n\ | |
793 | - idx=my_function_table[(idx & %d) %% %d];\n\ | |
794 | - if (idx >= %d)\n\ | |
795 | - {\n\ | |
796 | - if (!function || idx >= %d) return (SYMBOL*) 0;\n\ | |
797 | - sim=sql_functions + (idx - %d);\n\ | |
798 | + register uchar *hash_map;\n\ | |
799 | + register const char *cur_str= s;\n\ | |
800 | + if (function){\n\ | |
801 | + if (len>sql_functions_max_len) return 0;\n\ | |
802 | + hash_map= sql_functions_map;\n\ | |
803 | + register uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\ | |
804 | +\n\ | |
805 | + for(;;){\n\ | |
806 | + register uchar first_char= (uchar)cur_struct;\n\ | |
807 | +\n\ | |
808 | + if (first_char==0){\n\ | |
809 | + register int16 ires= (int16)(cur_struct>>16);\n\ | |
810 | + if (ires==array_elements(symbols)) return 0;\n\ | |
811 | + register SYMBOL *res;\n\ | |
812 | + if (ires>=0) \n\ | |
813 | + res= symbols+ires;\n\ | |
814 | + else\n\ | |
815 | + res= sql_functions-ires-1;\n\ | |
816 | + register uint count= cur_str-s;\n\ | |
817 | + return lex_casecmp(cur_str,res->name+count,len-count) ? 0 : res;\n\ | |
818 | + }\n\ | |
819 | +\n\ | |
820 | + register uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\ | |
821 | + if (cur_char<first_char) return 0;\n\ | |
822 | + cur_struct>>=8;\n\ | |
823 | + if (cur_char>(uchar)cur_struct) return 0;\n\ | |
824 | +\n\ | |
825 | + cur_struct>>=8;\n\ | |
826 | + cur_struct= uint4korr(hash_map+\n\ | |
827 | + (((uint16)cur_struct + cur_char - first_char)*4));\n\ | |
828 | + cur_str++;\n\ | |
829 | + }\n\ | |
830 | + }else{\n\ | |
831 | + if (len>symbols_max_len) return 0;\n\ | |
832 | + hash_map= symbols_map;\n\ | |
833 | + register uint32 cur_struct= uint4korr(hash_map+((len-1)*4));\n\ | |
834 | +\n\ | |
835 | + for(;;){\n\ | |
836 | + register uchar first_char= (uchar)cur_struct;\n\ | |
837 | +\n\ | |
838 | + if (first_char==0){\n\ | |
839 | + register int16 ires= (int16)(cur_struct>>16);\n\ | |
840 | + if (ires==array_elements(symbols)) return 0;\n\ | |
841 | + register SYMBOL *res= symbols+ires;\n\ | |
842 | + register uint count= cur_str-s;\n\ | |
843 | + return lex_casecmp(cur_str,res->name+count,len-count)!=0 ? 0 : res;\n\ | |
844 | + }\n\ | |
845 | +\n\ | |
846 | + register uchar cur_char= (uchar)to_upper_lex[(uchar)*cur_str];\n\ | |
847 | + if (cur_char<first_char) return 0;\n\ | |
848 | + cur_struct>>=8;\n\ | |
849 | + if (cur_char>(uchar)cur_struct) return 0;\n\ | |
850 | +\n\ | |
851 | + cur_struct>>=8;\n\ | |
852 | + cur_struct= uint4korr(hash_map+\n\ | |
853 | + (((uint16)cur_struct + cur_char - first_char)*4));\n\ | |
854 | + cur_str++;\n\ | |
855 | + }\n\ | |
856 | }\n\ | |
857 | - else\n\ | |
858 | - sim=symbols + idx;\n\ | |
859 | - if ((length != sim->length) || lex_casecmp(start,sim->name,length))\n\ | |
860 | - return (SYMBOL *)0;\n\ | |
861 | - return sim;\n\ | |
862 | -}\n",(ulong) start_value,(int) function_plus,(int) how_much_and,function_mod,how_long_symbols,max_symbol,how_long_symbols); | |
863 | - exit(0); | |
864 | - return 0; | |
865 | +}\n" | |
866 | +); | |
867 | } | |
868 | + |