]>
Commit | Line | Data |
---|---|---|
62159205 ER |
1 | diff -r 85c04c7963d1 -r 43de4ebbe7ad src/regexp_nfa.c |
2 | --- src/regexp_nfa.c Tue Jun 04 14:23:05 2013 +0200 | |
3 | +++ src/regexp_nfa.c Tue Jun 04 17:47:05 2013 +0200 | |
4 | @@ -380,38 +380,18 @@ | |
5 | char_u *end; | |
6 | int extra_newl; | |
7 | { | |
8 | - int i; | |
9 | - /* Each of these variables takes up a char in "config[]", | |
10 | - * in the order they are here. */ | |
11 | - int not = FALSE, af = FALSE, AF = FALSE, az = FALSE, AZ = FALSE, | |
12 | - o7 = FALSE, o9 = FALSE, underscore = FALSE, newl = FALSE; | |
13 | +# define CLASS_not 0x80 | |
14 | +# define CLASS_af 0x40 | |
15 | +# define CLASS_AF 0x20 | |
16 | +# define CLASS_az 0x10 | |
17 | +# define CLASS_AZ 0x08 | |
18 | +# define CLASS_o7 0x04 | |
19 | +# define CLASS_o9 0x02 | |
20 | +# define CLASS_underscore 0x01 | |
21 | + | |
22 | + int newl = FALSE; | |
23 | char_u *p; | |
24 | -#define NCONFIGS 16 | |
25 | - int classid[NCONFIGS] = { | |
26 | - NFA_DIGIT, NFA_NDIGIT, NFA_HEX, NFA_NHEX, | |
27 | - NFA_OCTAL, NFA_NOCTAL, NFA_WORD, NFA_NWORD, | |
28 | - NFA_HEAD, NFA_NHEAD, NFA_ALPHA, NFA_NALPHA, | |
29 | - NFA_LOWER, NFA_NLOWER, NFA_UPPER, NFA_NUPPER | |
30 | - }; | |
31 | - char_u myconfig[10]; | |
32 | - char_u config[NCONFIGS][9] = { | |
33 | - "000000100", /* digit */ | |
34 | - "100000100", /* non digit */ | |
35 | - "011000100", /* hex-digit */ | |
36 | - "111000100", /* non hex-digit */ | |
37 | - "000001000", /* octal-digit */ | |
38 | - "100001000", /* [^0-7] */ | |
39 | - "000110110", /* [0-9A-Za-z_] */ | |
40 | - "100110110", /* [^0-9A-Za-z_] */ | |
41 | - "000110010", /* head of word */ | |
42 | - "100110010", /* not head of word */ | |
43 | - "000110000", /* alphabetic char a-z */ | |
44 | - "100110000", /* non alphabetic char */ | |
45 | - "000100000", /* lowercase letter */ | |
46 | - "100100000", /* non lowercase */ | |
47 | - "000010000", /* uppercase */ | |
48 | - "100010000" /* non uppercase */ | |
49 | - }; | |
50 | + int config = 0; | |
51 | ||
52 | if (extra_newl == TRUE) | |
53 | newl = TRUE; | |
54 | @@ -421,7 +401,7 @@ | |
55 | p = start; | |
56 | if (*p == '^') | |
57 | { | |
58 | - not = TRUE; | |
59 | + config |= CLASS_not; | |
60 | p++; | |
61 | } | |
62 | ||
63 | @@ -434,37 +414,37 @@ | |
64 | case '0': | |
65 | if (*(p + 2) == '9') | |
66 | { | |
67 | - o9 = TRUE; | |
68 | + config |= CLASS_o9; | |
69 | break; | |
70 | } | |
71 | else | |
72 | if (*(p + 2) == '7') | |
73 | { | |
74 | - o7 = TRUE; | |
75 | + config |= CLASS_o7; | |
76 | break; | |
77 | } | |
78 | case 'a': | |
79 | if (*(p + 2) == 'z') | |
80 | { | |
81 | - az = TRUE; | |
82 | + config |= CLASS_az; | |
83 | break; | |
84 | } | |
85 | else | |
86 | if (*(p + 2) == 'f') | |
87 | { | |
88 | - af = TRUE; | |
89 | + config |= CLASS_af; | |
90 | break; | |
91 | } | |
92 | case 'A': | |
93 | if (*(p + 2) == 'Z') | |
94 | { | |
95 | - AZ = TRUE; | |
96 | + config |= CLASS_AZ; | |
97 | break; | |
98 | } | |
99 | else | |
100 | if (*(p + 2) == 'F') | |
101 | { | |
102 | - AF = TRUE; | |
103 | + config |= CLASS_AF; | |
104 | break; | |
105 | } | |
106 | /* FALLTHROUGH */ | |
107 | @@ -480,7 +460,7 @@ | |
108 | } | |
109 | else if (*p == '_') | |
110 | { | |
111 | - underscore = TRUE; | |
112 | + config |= CLASS_underscore; | |
113 | p ++; | |
114 | } | |
115 | else if (*p == '\n') | |
116 | @@ -495,38 +475,45 @@ | |
117 | if (p != end) | |
118 | return FAIL; | |
119 | ||
120 | - /* build the config that represents the ranges we gathered */ | |
121 | - STRCPY(myconfig, "000000000"); | |
122 | - if (not == TRUE) | |
123 | - myconfig[0] = '1'; | |
124 | - if (af == TRUE) | |
125 | - myconfig[1] = '1'; | |
126 | - if (AF == TRUE) | |
127 | - myconfig[2] = '1'; | |
128 | - if (az == TRUE) | |
129 | - myconfig[3] = '1'; | |
130 | - if (AZ == TRUE) | |
131 | - myconfig[4] = '1'; | |
132 | - if (o7 == TRUE) | |
133 | - myconfig[5] = '1'; | |
134 | - if (o9 == TRUE) | |
135 | - myconfig[6] = '1'; | |
136 | - if (underscore == TRUE) | |
137 | - myconfig[7] = '1'; | |
138 | if (newl == TRUE) | |
139 | + extra_newl = ADD_NL; | |
140 | + | |
141 | + switch (config) | |
142 | { | |
143 | - myconfig[8] = '1'; | |
144 | - extra_newl = ADD_NL; | |
145 | + case CLASS_o9: | |
146 | + return extra_newl + NFA_DIGIT; | |
147 | + case CLASS_not | CLASS_o9: | |
148 | + return extra_newl + NFA_NDIGIT; | |
149 | + case CLASS_af | CLASS_AF | CLASS_o9: | |
150 | + return extra_newl + NFA_HEX; | |
151 | + case CLASS_not | CLASS_af | CLASS_AF | CLASS_o9: | |
152 | + return extra_newl + NFA_NHEX; | |
153 | + case CLASS_o7: | |
154 | + return extra_newl + NFA_OCTAL; | |
155 | + case CLASS_not | CLASS_o7: | |
156 | + return extra_newl + NFA_NOCTAL; | |
157 | + case CLASS_az | CLASS_AZ | CLASS_o9 | CLASS_underscore: | |
158 | + return extra_newl + NFA_WORD; | |
159 | + case CLASS_not | CLASS_az | CLASS_AZ | CLASS_o9 | CLASS_underscore: | |
160 | + return extra_newl + NFA_NWORD; | |
161 | + case CLASS_az | CLASS_AZ | CLASS_underscore: | |
162 | + return extra_newl + NFA_HEAD; | |
163 | + case CLASS_not | CLASS_az | CLASS_AZ | CLASS_underscore: | |
164 | + return extra_newl + NFA_NHEAD; | |
165 | + case CLASS_az | CLASS_AZ: | |
166 | + return extra_newl + NFA_ALPHA; | |
167 | + case CLASS_not | CLASS_az | CLASS_AZ: | |
168 | + return extra_newl + NFA_NALPHA; | |
169 | + case CLASS_az: | |
170 | + return extra_newl + NFA_LOWER; | |
171 | + case CLASS_not | CLASS_az: | |
172 | + return extra_newl + NFA_NLOWER; | |
173 | + case CLASS_AZ: | |
174 | + return extra_newl + NFA_UPPER; | |
175 | + case CLASS_not | CLASS_AZ: | |
176 | + return extra_newl + NFA_NUPPER; | |
177 | } | |
178 | - /* try to recognize character classes */ | |
179 | - for (i = 0; i < NCONFIGS; i++) | |
180 | - if (STRNCMP(myconfig, config[i], 8) == 0) | |
181 | - return classid[i] + extra_newl; | |
182 | - | |
183 | - /* fallthrough => no success so far */ | |
184 | return FAIL; | |
185 | - | |
186 | -#undef NCONFIGS | |
187 | } | |
188 | ||
189 | /* | |
190 | @@ -900,7 +887,7 @@ | |
191 | EMSG_RET_FAIL(_(e_z1_not_allowed)); | |
192 | EMIT(NFA_ZREF1 + (no_Magic(c) - '1')); | |
193 | /* No need to set nfa_has_backref, the sub-matches don't | |
194 | - * change when \z1 .. \z9 maches or not. */ | |
195 | + * change when \z1 .. \z9 matches or not. */ | |
196 | re_has_z = REX_USE; | |
197 | break; | |
198 | case '(': | |
199 | @@ -4658,7 +4645,7 @@ | |
200 | } | |
201 | else | |
202 | { | |
203 | - /* skip ofer the matched characters, set character | |
204 | + /* skip over the matched characters, set character | |
205 | * count in NFA_SKIP */ | |
206 | ll = nextlist; | |
207 | add_state = t->state->out; | |
208 | diff -r 85c04c7963d1 -r 43de4ebbe7ad src/testdir/test36.in | |
209 | --- src/testdir/test36.in Tue Jun 04 14:23:05 2013 +0200 | |
210 | +++ src/testdir/test36.in Tue Jun 04 17:47:05 2013 +0200 | |
211 | @@ -1,40 +1,105 @@ | |
212 | -Test character classes in regexp | |
213 | +Test character classes in regexp using regexpengine 0, 1, 2. | |
214 | ||
215 | STARTTEST | |
216 | -/^start-here | |
217 | -j:s/\d//g | |
218 | -j:s/\D//g | |
219 | -j:s/\o//g | |
220 | -j:s/\O//g | |
221 | -j:s/\x//g | |
222 | -j:s/\X//g | |
223 | -j:s/\w//g | |
224 | -j:s/\W//g | |
225 | -j:s/\h//g | |
226 | -j:s/\H//g | |
227 | -j:s/\a//g | |
228 | -j:s/\A//g | |
229 | -j:s/\l//g | |
230 | -j:s/\L//g | |
231 | -j:s/\u//g | |
232 | -j:s/\U//g | |
233 | +/^start-here/+1 | |
234 | +Y:s/\%#=0\d//g | |
235 | +p:s/\%#=1\d//g | |
236 | +p:s/\%#=2\d//g | |
237 | +p:s/\%#=0[0-9]//g | |
238 | +p:s/\%#=1[0-9]//g | |
239 | +p:s/\%#=2[0-9]//g | |
240 | +p:s/\%#=0\D//g | |
241 | +p:s/\%#=1\D//g | |
242 | +p:s/\%#=2\D//g | |
243 | +p:s/\%#=0[^0-9]//g | |
244 | +p:s/\%#=1[^0-9]//g | |
245 | +p:s/\%#=2[^0-9]//g | |
246 | +p:s/\%#=0\o//g | |
247 | +p:s/\%#=1\o//g | |
248 | +p:s/\%#=2\o//g | |
249 | +p:s/\%#=0[0-7]//g | |
250 | +p:s/\%#=1[0-7]//g | |
251 | +p:s/\%#=2[0-7]//g | |
252 | +p:s/\%#=0\O//g | |
253 | +p:s/\%#=1\O//g | |
254 | +p:s/\%#=2\O//g | |
255 | +p:s/\%#=0[^0-7]//g | |
256 | +p:s/\%#=1[^0-7]//g | |
257 | +p:s/\%#=2[^0-7]//g | |
258 | +p:s/\%#=0\x//g | |
259 | +p:s/\%#=1\x//g | |
260 | +p:s/\%#=2\x//g | |
261 | +p:s/\%#=0[0-9A-Fa-f]//g | |
262 | +p:s/\%#=1[0-9A-Fa-f]//g | |
263 | +p:s/\%#=2[0-9A-Fa-f]//g | |
264 | +p:s/\%#=0\X//g | |
265 | +p:s/\%#=1\X//g | |
266 | +p:s/\%#=2\X//g | |
267 | +p:s/\%#=0[^0-9A-Fa-f]//g | |
268 | +p:s/\%#=1[^0-9A-Fa-f]//g | |
269 | +p:s/\%#=2[^0-9A-Fa-f]//g | |
270 | +p:s/\%#=0\w//g | |
271 | +p:s/\%#=1\w//g | |
272 | +p:s/\%#=2\w//g | |
273 | +p:s/\%#=0[0-9A-Za-z_]//g | |
274 | +p:s/\%#=1[0-9A-Za-z_]//g | |
275 | +p:s/\%#=2[0-9A-Za-z_]//g | |
276 | +p:s/\%#=0\W//g | |
277 | +p:s/\%#=1\W//g | |
278 | +p:s/\%#=2\W//g | |
279 | +p:s/\%#=0[^0-9A-Za-z_]//g | |
280 | +p:s/\%#=1[^0-9A-Za-z_]//g | |
281 | +p:s/\%#=2[^0-9A-Za-z_]//g | |
282 | +p:s/\%#=0\h//g | |
283 | +p:s/\%#=1\h//g | |
284 | +p:s/\%#=2\h//g | |
285 | +p:s/\%#=0[A-Za-z_]//g | |
286 | +p:s/\%#=1[A-Za-z_]//g | |
287 | +p:s/\%#=2[A-Za-z_]//g | |
288 | +p:s/\%#=0\H//g | |
289 | +p:s/\%#=1\H//g | |
290 | +p:s/\%#=2\H//g | |
291 | +p:s/\%#=0[^A-Za-z_]//g | |
292 | +p:s/\%#=1[^A-Za-z_]//g | |
293 | +p:s/\%#=2[^A-Za-z_]//g | |
294 | +p:s/\%#=0\a//g | |
295 | +p:s/\%#=1\a//g | |
296 | +p:s/\%#=2\a//g | |
297 | +p:s/\%#=0[A-Za-z]//g | |
298 | +p:s/\%#=1[A-Za-z]//g | |
299 | +p:s/\%#=2[A-Za-z]//g | |
300 | +p:s/\%#=0\A//g | |
301 | +p:s/\%#=1\A//g | |
302 | +p:s/\%#=2\A//g | |
303 | +p:s/\%#=0[^A-Za-z]//g | |
304 | +p:s/\%#=1[^A-Za-z]//g | |
305 | +p:s/\%#=2[^A-Za-z]//g | |
306 | +p:s/\%#=0\l//g | |
307 | +p:s/\%#=1\l//g | |
308 | +p:s/\%#=2\l//g | |
309 | +p:s/\%#=0[a-z]//g | |
310 | +p:s/\%#=1[a-z]//g | |
311 | +p:s/\%#=2[a-z]//g | |
312 | +p:s/\%#=0\L//g | |
313 | +p:s/\%#=1\L//g | |
314 | +p:s/\%#=2\L//g | |
315 | +p:s/\%#=0[^a-z]//g | |
316 | +p:s/\%#=1[^a-z]//g | |
317 | +p:s/\%#=2[^a-z]//g | |
318 | +p:s/\%#=0\u//g | |
319 | +p:s/\%#=1\u//g | |
320 | +p:s/\%#=2\u//g | |
321 | +p:s/\%#=0[A-Z]//g | |
322 | +p:s/\%#=1[A-Z]//g | |
323 | +p:s/\%#=2[A-Z]//g | |
324 | +p:s/\%#=0\U//g | |
325 | +p:s/\%#=1\U//g | |
326 | +p:s/\%#=2\U//g | |
327 | +p:s/\%#=0[^A-Z]//g | |
328 | +p:s/\%#=1[^A-Z]//g | |
329 | +p:s/\%#=2[^A-Z]//g | |
330 | :/^start-here/+1,$wq! test.out | |
331 | ENDTEST | |
332 | ||
333 | start-here | |
334 | Content-type: text/html ]>