]>
Commit | Line | Data |
---|---|---|
913c2bc0 JB |
1 | From: Paul Eggert <eggert@cs.ucla.edu> |
2 | Date: Tue, 21 Sep 2021 14:47:45 +0000 (-0700) | |
3 | Subject: regex: copy back from Gnulib | |
4 | X-Git-Url: https://sourceware.org/git/?p=glibc.git;a=commitdiff_plain;h=0b5ca7c3e551e5502f3be3b06453324fe8604e82;hp=f3e664563361dc17530113b3205998d1f19dc4d9 | |
5 | ||
6 | regex: copy back from Gnulib | |
7 | ||
8 | Copy regex-related files back from Gnulib, to fix a problem with | |
9 | static checking of regex calls noted by Martin Sebor. This merges the | |
10 | following changes: | |
11 | ||
12 | * New macro __attribute_nonnull__ in misc/sys/cdefs.h, for use later | |
13 | when copying other files back from Gnulib. | |
14 | ||
15 | * Use __GNULIB_CDEFS instead of __GLIBC__ when deciding | |
16 | whether to include bits/wordsize.h etc. | |
17 | ||
18 | * Avoid duplicate entries in epsilon closure table. | |
19 | ||
20 | * New regex.h macro _REGEX_NELTS to let regexec say that its pmatch | |
21 | arg should contain nmatch elts. Use that for regexec, instead of | |
22 | __attr_access (which is incorrect). | |
23 | ||
24 | * New regex.h macro _Attr_access_ which is like __attr_access except | |
25 | portable to non-glibc platforms. | |
26 | ||
27 | * Add some DEBUG_ASSERTs to pacify gcc -fanalyzer and to catch | |
28 | recently-fixed performance bugs if they recur. | |
29 | ||
30 | * Add Gnulib-specific stuff to port the dynarray- and lock-using parts | |
31 | of regex code to non-glibc platforms. | |
32 | ||
33 | * Fix glibc bug 11053. | |
34 | ||
35 | * Avoid some undefined behavior when popping an empty fail stack. | |
36 | --- | |
37 | ||
38 | diff --git a/include/intprops.h b/include/intprops.h | |
39 | index 2b6e5e93ed..3fe64e82e9 100644 | |
40 | --- a/include/intprops.h | |
41 | +++ b/include/intprops.h | |
42 | @@ -132,7 +132,8 @@ | |
43 | operators might not yield numerically correct answers due to | |
44 | arithmetic overflow. They do not rely on undefined or | |
45 | implementation-defined behavior. Their implementations are simple | |
46 | - and straightforward, but they are a bit harder to use than the | |
47 | + and straightforward, but they are harder to use and may be less | |
48 | + efficient than the INT_<op>_WRAPV, INT_<op>_OK, and | |
49 | INT_<op>_OVERFLOW macros described below. | |
50 | ||
51 | Example usage: | |
52 | @@ -157,6 +158,9 @@ | |
53 | must have minimum value MIN and maximum MAX. Unsigned types should | |
54 | use a zero MIN of the proper type. | |
55 | ||
56 | + Because all arguments are subject to integer promotions, these | |
57 | + macros typically do not work on types narrower than 'int'. | |
58 | + | |
59 | These macros are tuned for constant MIN and MAX. For commutative | |
60 | operations such as A + B, they are also tuned for constant B. */ | |
61 | ||
62 | @@ -338,9 +342,15 @@ | |
63 | arguments should not have side effects. | |
64 | ||
65 | The WRAPV macros are not constant expressions. They support only | |
66 | - +, binary -, and *. Because the WRAPV macros convert the result, | |
67 | - they report overflow in different circumstances than the OVERFLOW | |
68 | - macros do. | |
69 | + +, binary -, and *. | |
70 | + | |
71 | + Because the WRAPV macros convert the result, they report overflow | |
72 | + in different circumstances than the OVERFLOW macros do. For | |
73 | + example, in the typical case with 16-bit 'short' and 32-bit 'int', | |
74 | + if A, B and R are all of type 'short' then INT_ADD_OVERFLOW (A, B) | |
75 | + returns false because the addition cannot overflow after A and B | |
76 | + are converted to 'int', whereas INT_ADD_WRAPV (A, B, &R) returns | |
77 | + true or false depending on whether the sum fits into 'short'. | |
78 | ||
79 | These macros are tuned for their last input argument being a constant. | |
80 | ||
81 | diff --git a/include/regex.h b/include/regex.h | |
82 | index 24eca2c297..34fb67d855 100644 | |
83 | --- a/include/regex.h | |
84 | +++ b/include/regex.h | |
85 | @@ -37,7 +37,8 @@ extern int __regcomp (regex_t *__preg, const char *__pattern, int __cflags); | |
86 | libc_hidden_proto (__regcomp) | |
87 | ||
88 | extern int __regexec (const regex_t *__preg, const char *__string, | |
89 | - size_t __nmatch, regmatch_t __pmatch[], int __eflags); | |
90 | + size_t __nmatch, regmatch_t __pmatch[__nmatch], | |
91 | + int __eflags); | |
92 | libc_hidden_proto (__regexec) | |
93 | ||
94 | extern size_t __regerror (int __errcode, const regex_t *__preg, | |
95 | diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h | |
96 | index e490fc1aeb..4dac9d264d 100644 | |
97 | --- a/misc/sys/cdefs.h | |
98 | +++ b/misc/sys/cdefs.h | |
99 | @@ -318,16 +318,18 @@ | |
100 | #endif | |
101 | ||
102 | /* The nonnull function attribute marks pointer parameters that | |
103 | - must not be NULL. */ | |
104 | -#ifndef __nonnull | |
105 | + must not be NULL. This has the name __nonnull in glibc, | |
106 | + and __attribute_nonnull__ in files shared with Gnulib to avoid | |
107 | + collision with a different __nonnull in DragonFlyBSD 5.9. */ | |
108 | +#ifndef __attribute_nonnull__ | |
109 | # if __GNUC_PREREQ (3,3) || __glibc_has_attribute (__nonnull__) | |
110 | -# define __nonnull(params) __attribute__ ((__nonnull__ params)) | |
111 | +# define __attribute_nonnull__(params) __attribute__ ((__nonnull__ params)) | |
112 | # else | |
113 | -# define __nonnull(params) | |
114 | +# define __attribute_nonnull__(params) | |
115 | # endif | |
116 | -#elif !defined __GLIBC__ | |
117 | -# undef __nonnull | |
118 | -# define __nonnull(params) _GL_ATTRIBUTE_NONNULL (params) | |
119 | +#endif | |
120 | +#ifndef __nonnull | |
121 | +# define __nonnull(params) __attribute_nonnull__ (params) | |
122 | #endif | |
123 | ||
124 | /* The returns_nonnull function attribute marks the return type of the function | |
125 | @@ -493,9 +495,9 @@ | |
126 | [!!sizeof (struct { int __error_if_negative: (expr) ? 2 : -1; })] | |
127 | #endif | |
128 | ||
129 | -/* The #ifndef lets Gnulib avoid including these on non-glibc | |
130 | - platforms, where the includes typically do not exist. */ | |
131 | -#ifdef __GLIBC__ | |
132 | +/* Gnulib avoids including these, as they don't work on non-glibc or | |
133 | + older glibc platforms. */ | |
134 | +#ifndef __GNULIB_CDEFS | |
135 | # include <bits/wordsize.h> | |
136 | # include <bits/long-double.h> | |
137 | #endif | |
138 | diff --git a/posix/regcomp.c b/posix/regcomp.c | |
139 | index d93698ae78..887e5b5068 100644 | |
140 | --- a/posix/regcomp.c | |
141 | +++ b/posix/regcomp.c | |
142 | @@ -1695,12 +1695,14 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) | |
143 | reg_errcode_t err; | |
144 | Idx i; | |
145 | re_node_set eclosure; | |
146 | - bool ok; | |
147 | bool incomplete = false; | |
148 | err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); | |
149 | if (__glibc_unlikely (err != REG_NOERROR)) | |
150 | return err; | |
151 | ||
152 | + /* An epsilon closure includes itself. */ | |
153 | + eclosure.elems[eclosure.nelem++] = node; | |
154 | + | |
155 | /* This indicates that we are calculating this node now. | |
156 | We reference this value to avoid infinite loop. */ | |
157 | dfa->eclosures[node].nelem = -1; | |
158 | @@ -1753,10 +1755,6 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) | |
159 | } | |
160 | } | |
161 | ||
162 | - /* An epsilon closure includes itself. */ | |
163 | - ok = re_node_set_insert (&eclosure, node); | |
164 | - if (__glibc_unlikely (! ok)) | |
165 | - return REG_ESPACE; | |
166 | if (incomplete && !root) | |
167 | dfa->eclosures[node].nelem = 0; | |
168 | else | |
169 | diff --git a/posix/regex.c b/posix/regex.c | |
170 | index 7296be0f08..d32863972c 100644 | |
171 | --- a/posix/regex.c | |
172 | +++ b/posix/regex.c | |
173 | @@ -24,6 +24,7 @@ | |
174 | ||
175 | # if __GNUC_PREREQ (4, 6) | |
176 | # pragma GCC diagnostic ignored "-Wsuggest-attribute=pure" | |
177 | +# pragma GCC diagnostic ignored "-Wvla" | |
178 | # endif | |
179 | # if __GNUC_PREREQ (4, 3) | |
180 | # pragma GCC diagnostic ignored "-Wold-style-definition" | |
181 | diff --git a/posix/regex.h b/posix/regex.h | |
182 | index 14fb1d8364..adb69768ee 100644 | |
183 | --- a/posix/regex.h | |
184 | +++ b/posix/regex.h | |
185 | @@ -522,6 +522,30 @@ typedef struct | |
186 | \f | |
187 | /* Declarations for routines. */ | |
188 | ||
189 | +#ifndef _REGEX_NELTS | |
190 | +# if (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__ \ | |
191 | + && !defined __STDC_NO_VLA__) | |
192 | +# define _REGEX_NELTS(n) n | |
193 | +# else | |
194 | +# define _REGEX_NELTS(n) | |
195 | +# endif | |
196 | +#endif | |
197 | + | |
198 | +#if defined __GNUC__ && 4 < __GNUC__ + (6 <= __GNUC_MINOR__) | |
199 | +# pragma GCC diagnostic push | |
200 | +# pragma GCC diagnostic ignored "-Wvla" | |
201 | +#endif | |
202 | + | |
203 | +#ifndef _Attr_access_ | |
204 | +# ifdef __attr_access | |
205 | +# define _Attr_access_(arg) __attr_access (arg) | |
206 | +# elif defined __GNUC__ && 10 <= __GNUC__ | |
207 | +# define _Attr_access_(x) __attribute__ ((__access__ x)) | |
208 | +# else | |
209 | +# define _Attr_access_(x) | |
210 | +# endif | |
211 | +#endif | |
212 | + | |
213 | #ifdef __USE_GNU | |
214 | /* Sets the current default syntax to SYNTAX, and return the old syntax. | |
215 | You can also simply assign to the 're_syntax_options' variable. */ | |
216 | @@ -537,7 +561,7 @@ extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); | |
217 | 'regfree'. */ | |
218 | extern const char *re_compile_pattern (const char *__pattern, size_t __length, | |
219 | struct re_pattern_buffer *__buffer) | |
220 | - __attr_access ((__read_only__, 1, 2)); | |
221 | + _Attr_access_ ((__read_only__, 1, 2)); | |
222 | ||
223 | ||
224 | /* Compile a fastmap for the compiled pattern in BUFFER; used to | |
225 | @@ -555,7 +579,7 @@ extern regoff_t re_search (struct re_pattern_buffer *__buffer, | |
226 | const char *__String, regoff_t __length, | |
227 | regoff_t __start, regoff_t __range, | |
228 | struct re_registers *__regs) | |
229 | - __attr_access ((__read_only__, 2, 3)); | |
230 | + _Attr_access_ ((__read_only__, 2, 3)); | |
231 | ||
232 | ||
233 | /* Like 're_search', but search in the concatenation of STRING1 and | |
234 | @@ -566,8 +590,8 @@ extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, | |
235 | regoff_t __start, regoff_t __range, | |
236 | struct re_registers *__regs, | |
237 | regoff_t __stop) | |
238 | - __attr_access ((__read_only__, 2, 3)) | |
239 | - __attr_access ((__read_only__, 4, 5)); | |
240 | + _Attr_access_ ((__read_only__, 2, 3)) | |
241 | + _Attr_access_ ((__read_only__, 4, 5)); | |
242 | ||
243 | ||
244 | /* Like 're_search', but return how many characters in STRING the regexp | |
245 | @@ -575,7 +599,7 @@ extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, | |
246 | extern regoff_t re_match (struct re_pattern_buffer *__buffer, | |
247 | const char *__String, regoff_t __length, | |
248 | regoff_t __start, struct re_registers *__regs) | |
249 | - __attr_access ((__read_only__, 2, 3)); | |
250 | + _Attr_access_ ((__read_only__, 2, 3)); | |
251 | ||
252 | ||
253 | /* Relates to 're_match' as 're_search_2' relates to 're_search'. */ | |
254 | @@ -584,8 +608,8 @@ extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer, | |
255 | const char *__string2, regoff_t __length2, | |
256 | regoff_t __start, struct re_registers *__regs, | |
257 | regoff_t __stop) | |
258 | - __attr_access ((__read_only__, 2, 3)) | |
259 | - __attr_access ((__read_only__, 4, 5)); | |
260 | + _Attr_access_ ((__read_only__, 2, 3)) | |
261 | + _Attr_access_ ((__read_only__, 4, 5)); | |
262 | ||
263 | ||
264 | /* Set REGS to hold NUM_REGS registers, storing them in STARTS and | |
265 | @@ -654,16 +678,19 @@ extern int regcomp (regex_t *_Restrict_ __preg, | |
266 | ||
267 | extern int regexec (const regex_t *_Restrict_ __preg, | |
268 | const char *_Restrict_ __String, size_t __nmatch, | |
269 | - regmatch_t __pmatch[_Restrict_arr_], | |
270 | - int __eflags) | |
271 | - __attr_access ((__write_only__, 4, 3)); | |
272 | + regmatch_t __pmatch[_Restrict_arr_ | |
273 | + _REGEX_NELTS (__nmatch)], | |
274 | + int __eflags); | |
275 | ||
276 | extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg, | |
277 | char *_Restrict_ __errbuf, size_t __errbuf_size) | |
278 | - __attr_access ((__write_only__, 3, 4)); | |
279 | + _Attr_access_ ((__write_only__, 3, 4)); | |
280 | ||
281 | extern void regfree (regex_t *__preg); | |
282 | ||
283 | +#if defined __GNUC__ && 4 < __GNUC__ + (6 <= __GNUC_MINOR__) | |
284 | +# pragma GCC diagnostic pop | |
285 | +#endif | |
286 | ||
287 | #ifdef __cplusplus | |
288 | } | |
289 | diff --git a/posix/regex_internal.c b/posix/regex_internal.c | |
290 | index 9dd387ef85..aefcfa2f52 100644 | |
291 | --- a/posix/regex_internal.c | |
292 | +++ b/posix/regex_internal.c | |
293 | @@ -1211,6 +1211,10 @@ re_node_set_merge (re_node_set *dest, const re_node_set *src) | |
294 | ||
295 | if (__glibc_unlikely (dest->nelem == 0)) | |
296 | { | |
297 | + /* Although we already guaranteed above that dest->alloc != 0 and | |
298 | + therefore dest->elems != NULL, add a debug assertion to pacify | |
299 | + GCC 11.2.1's -fanalyzer. */ | |
300 | + DEBUG_ASSERT (dest->elems); | |
301 | dest->nelem = src->nelem; | |
302 | memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx)); | |
303 | return REG_NOERROR; | |
304 | @@ -1286,7 +1290,10 @@ re_node_set_insert (re_node_set *set, Idx elem) | |
305 | ||
306 | if (__glibc_unlikely (set->nelem) == 0) | |
307 | { | |
308 | - /* We already guaranteed above that set->alloc != 0. */ | |
309 | + /* Although we already guaranteed above that set->alloc != 0 and | |
310 | + therefore set->elems != NULL, add a debug assertion to pacify | |
311 | + GCC 11.2 -fanalyzer. */ | |
312 | + DEBUG_ASSERT (set->elems); | |
313 | set->elems[0] = elem; | |
314 | ++set->nelem; | |
315 | return true; | |
316 | @@ -1314,6 +1321,7 @@ re_node_set_insert (re_node_set *set, Idx elem) | |
317 | { | |
318 | for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) | |
319 | set->elems[idx] = set->elems[idx - 1]; | |
320 | + DEBUG_ASSERT (set->elems[idx - 1] < elem); | |
321 | } | |
322 | ||
323 | /* Insert the new element. */ | |
324 | diff --git a/posix/regex_internal.h b/posix/regex_internal.h | |
325 | index edcdc07e99..1245e782ff 100644 | |
326 | --- a/posix/regex_internal.h | |
327 | +++ b/posix/regex_internal.h | |
328 | @@ -32,6 +32,10 @@ | |
329 | #include <stdbool.h> | |
330 | #include <stdint.h> | |
331 | ||
332 | +#ifndef _LIBC | |
333 | +# include <dynarray.h> | |
334 | +#endif | |
335 | + | |
336 | #include <intprops.h> | |
337 | #include <verify.h> | |
338 | ||
339 | @@ -49,14 +53,14 @@ | |
340 | # define lock_fini(lock) ((void) 0) | |
341 | # define lock_lock(lock) __libc_lock_lock (lock) | |
342 | # define lock_unlock(lock) __libc_lock_unlock (lock) | |
343 | -#elif defined GNULIB_LOCK && !defined USE_UNLOCKED_IO | |
344 | +#elif defined GNULIB_LOCK && !defined GNULIB_REGEX_SINGLE_THREAD | |
345 | # include "glthread/lock.h" | |
346 | # define lock_define(name) gl_lock_define (, name) | |
347 | # define lock_init(lock) glthread_lock_init (&(lock)) | |
348 | # define lock_fini(lock) glthread_lock_destroy (&(lock)) | |
349 | # define lock_lock(lock) glthread_lock_lock (&(lock)) | |
350 | # define lock_unlock(lock) glthread_lock_unlock (&(lock)) | |
351 | -#elif defined GNULIB_PTHREAD && !defined USE_UNLOCKED_IO | |
352 | +#elif defined GNULIB_PTHREAD && !defined GNULIB_REGEX_SINGLE_THREAD | |
353 | # include <pthread.h> | |
354 | # define lock_define(name) pthread_mutex_t name; | |
355 | # define lock_init(lock) pthread_mutex_init (&(lock), 0) | |
356 | diff --git a/posix/regexec.c b/posix/regexec.c | |
357 | index f7b4f9cfc3..83e9aaf8ca 100644 | |
358 | --- a/posix/regexec.c | |
359 | +++ b/posix/regexec.c | |
360 | @@ -59,7 +59,7 @@ static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, | |
361 | Idx cur_idx, Idx nmatch); | |
362 | static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, | |
363 | Idx str_idx, Idx dest_node, Idx nregs, | |
364 | - regmatch_t *regs, | |
365 | + regmatch_t *regs, regmatch_t *prevregs, | |
366 | re_node_set *eps_via_nodes); | |
367 | static reg_errcode_t set_regs (const regex_t *preg, | |
368 | const re_match_context_t *mctx, | |
369 | @@ -186,11 +186,12 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len); | |
370 | REG_NOTBOL is set, then ^ does not match at the beginning of the | |
371 | string; if REG_NOTEOL is set, then $ does not match at the end. | |
372 | ||
373 | - We return 0 if we find a match and REG_NOMATCH if not. */ | |
374 | + Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if | |
375 | + EFLAGS is invalid. */ | |
376 | ||
377 | int | |
378 | regexec (const regex_t *__restrict preg, const char *__restrict string, | |
379 | - size_t nmatch, regmatch_t pmatch[], int eflags) | |
380 | + size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags) | |
381 | { | |
382 | reg_errcode_t err; | |
383 | Idx start, length; | |
384 | @@ -234,7 +235,7 @@ int | |
385 | attribute_compat_text_section | |
386 | __compat_regexec (const regex_t *__restrict preg, | |
387 | const char *__restrict string, size_t nmatch, | |
388 | - regmatch_t pmatch[], int eflags) | |
389 | + regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags) | |
390 | { | |
391 | return regexec (preg, string, nmatch, pmatch, | |
392 | eflags & (REG_NOTBOL | REG_NOTEOL)); | |
393 | @@ -269,8 +270,8 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); | |
394 | strings.) | |
395 | ||
396 | On success, re_match* functions return the length of the match, re_search* | |
397 | - return the position of the start of the match. Return value -1 means no | |
398 | - match was found and -2 indicates an internal error. */ | |
399 | + return the position of the start of the match. They return -1 on | |
400 | + match failure, -2 on error. */ | |
401 | ||
402 | regoff_t | |
403 | re_match (struct re_pattern_buffer *bufp, const char *string, Idx length, | |
404 | @@ -1206,27 +1207,30 @@ check_halt_state_context (const re_match_context_t *mctx, | |
405 | /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA | |
406 | corresponding to the DFA). | |
407 | Return the destination node, and update EPS_VIA_NODES; | |
408 | - return -1 in case of errors. */ | |
409 | + return -1 on match failure, -2 on error. */ | |
410 | ||
411 | static Idx | |
412 | proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, | |
413 | + regmatch_t *prevregs, | |
414 | Idx *pidx, Idx node, re_node_set *eps_via_nodes, | |
415 | struct re_fail_stack_t *fs) | |
416 | { | |
417 | const re_dfa_t *const dfa = mctx->dfa; | |
418 | - Idx i; | |
419 | - bool ok; | |
420 | if (IS_EPSILON_NODE (dfa->nodes[node].type)) | |
421 | { | |
422 | re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; | |
423 | re_node_set *edests = &dfa->edests[node]; | |
424 | - Idx dest_node; | |
425 | - ok = re_node_set_insert (eps_via_nodes, node); | |
426 | - if (__glibc_unlikely (! ok)) | |
427 | - return -2; | |
428 | - /* Pick up a valid destination, or return -1 if none | |
429 | - is found. */ | |
430 | - for (dest_node = -1, i = 0; i < edests->nelem; ++i) | |
431 | + | |
432 | + if (! re_node_set_contains (eps_via_nodes, node)) | |
433 | + { | |
434 | + bool ok = re_node_set_insert (eps_via_nodes, node); | |
435 | + if (__glibc_unlikely (! ok)) | |
436 | + return -2; | |
437 | + } | |
438 | + | |
439 | + /* Pick a valid destination, or return -1 if none is found. */ | |
440 | + Idx dest_node = -1; | |
441 | + for (Idx i = 0; i < edests->nelem; i++) | |
442 | { | |
443 | Idx candidate = edests->elems[i]; | |
444 | if (!re_node_set_contains (cur_nodes, candidate)) | |
445 | @@ -1244,7 +1248,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, | |
446 | /* Otherwise, push the second epsilon-transition on the fail stack. */ | |
447 | else if (fs != NULL | |
448 | && push_fail_stack (fs, *pidx, candidate, nregs, regs, | |
449 | - eps_via_nodes)) | |
450 | + prevregs, eps_via_nodes)) | |
451 | return -2; | |
452 | ||
453 | /* We know we are going to exit. */ | |
454 | @@ -1288,7 +1292,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, | |
455 | if (naccepted == 0) | |
456 | { | |
457 | Idx dest_node; | |
458 | - ok = re_node_set_insert (eps_via_nodes, node); | |
459 | + bool ok = re_node_set_insert (eps_via_nodes, node); | |
460 | if (__glibc_unlikely (! ok)) | |
461 | return -2; | |
462 | dest_node = dfa->edests[node].elems[0]; | |
463 | @@ -1317,7 +1321,8 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, | |
464 | static reg_errcode_t | |
465 | __attribute_warn_unused_result__ | |
466 | push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, | |
467 | - Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) | |
468 | + Idx nregs, regmatch_t *regs, regmatch_t *prevregs, | |
469 | + re_node_set *eps_via_nodes) | |
470 | { | |
471 | reg_errcode_t err; | |
472 | Idx num = fs->num++; | |
473 | @@ -1333,25 +1338,30 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, | |
474 | } | |
475 | fs->stack[num].idx = str_idx; | |
476 | fs->stack[num].node = dest_node; | |
477 | - fs->stack[num].regs = re_malloc (regmatch_t, nregs); | |
478 | + fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs); | |
479 | if (fs->stack[num].regs == NULL) | |
480 | return REG_ESPACE; | |
481 | memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); | |
482 | + memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs); | |
483 | err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); | |
484 | return err; | |
485 | } | |
486 | ||
487 | static Idx | |
488 | pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, | |
489 | - regmatch_t *regs, re_node_set *eps_via_nodes) | |
490 | + regmatch_t *regs, regmatch_t *prevregs, | |
491 | + re_node_set *eps_via_nodes) | |
492 | { | |
493 | + if (fs == NULL || fs->num == 0) | |
494 | + return -1; | |
495 | Idx num = --fs->num; | |
496 | - DEBUG_ASSERT (num >= 0); | |
497 | *pidx = fs->stack[num].idx; | |
498 | memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); | |
499 | + memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs); | |
500 | re_node_set_free (eps_via_nodes); | |
501 | re_free (fs->stack[num].regs); | |
502 | *eps_via_nodes = fs->stack[num].eps_via_nodes; | |
503 | + DEBUG_ASSERT (0 <= fs->stack[num].node); | |
504 | return fs->stack[num].node; | |
505 | } | |
506 | ||
507 | @@ -1407,33 +1417,32 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, | |
508 | { | |
509 | update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); | |
510 | ||
511 | - if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) | |
512 | + if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node) | |
513 | + || (fs && re_node_set_contains (&eps_via_nodes, cur_node))) | |
514 | { | |
515 | Idx reg_idx; | |
516 | + cur_node = -1; | |
517 | if (fs) | |
518 | { | |
519 | for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) | |
520 | if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) | |
521 | - break; | |
522 | - if (reg_idx == nmatch) | |
523 | - { | |
524 | - re_node_set_free (&eps_via_nodes); | |
525 | - regmatch_list_free (&prev_match); | |
526 | - return free_fail_stack_return (fs); | |
527 | - } | |
528 | - cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, | |
529 | - &eps_via_nodes); | |
530 | + { | |
531 | + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, | |
532 | + prev_idx_match, &eps_via_nodes); | |
533 | + break; | |
534 | + } | |
535 | } | |
536 | - else | |
537 | + if (cur_node < 0) | |
538 | { | |
539 | re_node_set_free (&eps_via_nodes); | |
540 | regmatch_list_free (&prev_match); | |
541 | - return REG_NOERROR; | |
542 | + return free_fail_stack_return (fs); | |
543 | } | |
544 | } | |
545 | ||
546 | /* Proceed to next node. */ | |
547 | - cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, | |
548 | + cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match, | |
549 | + &idx, cur_node, | |
550 | &eps_via_nodes, fs); | |
551 | ||
552 | if (__glibc_unlikely (cur_node < 0)) | |
553 | @@ -1445,13 +1454,13 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, | |
554 | free_fail_stack_return (fs); | |
555 | return REG_ESPACE; | |
556 | } | |
557 | - if (fs) | |
558 | - cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, | |
559 | - &eps_via_nodes); | |
560 | - else | |
561 | + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, | |
562 | + prev_idx_match, &eps_via_nodes); | |
563 | + if (cur_node < 0) | |
564 | { | |
565 | re_node_set_free (&eps_via_nodes); | |
566 | regmatch_list_free (&prev_match); | |
567 | + free_fail_stack_return (fs); | |
568 | return REG_NOMATCH; | |
569 | } | |
570 | } | |
571 | @@ -1495,10 +1504,10 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, | |
572 | } | |
573 | else if (type == OP_CLOSE_SUBEXP) | |
574 | { | |
575 | + /* We are at the last node of this sub expression. */ | |
576 | Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; | |
577 | if (reg_num < nmatch) | |
578 | { | |
579 | - /* We are at the last node of this sub expression. */ | |
580 | if (pmatch[reg_num].rm_so < cur_idx) | |
581 | { | |
582 | pmatch[reg_num].rm_eo = cur_idx; | |
583 | @@ -2195,6 +2204,7 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, | |
584 | ||
585 | /* Return the next state to which the current state STATE will transit by | |
586 | accepting the current input byte, and update STATE_LOG if necessary. | |
587 | + Return NULL on failure. | |
588 | If STATE can accept a multibyte char/collating element/back reference | |
589 | update the destination of STATE_LOG. */ | |
590 | ||
591 | @@ -2395,7 +2405,7 @@ check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, | |
592 | ||
593 | #if 0 | |
594 | /* Return the next state to which the current state STATE will transit by | |
595 | - accepting the current input byte. */ | |
596 | + accepting the current input byte. Return NULL on failure. */ | |
597 | ||
598 | static re_dfastate_t * | |
599 | transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, | |
600 | @@ -2817,7 +2827,8 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, | |
601 | /* Check whether the node TOP_NODE at TOP_STR can arrive to the node | |
602 | LAST_NODE at LAST_STR. We record the path onto PATH since it will be | |
603 | heavily reused. | |
604 | - Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ | |
605 | + Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot, | |
606 | + REG_ESPACE if memory is exhausted. */ | |
607 | ||
608 | static reg_errcode_t | |
609 | __attribute_warn_unused_result__ | |
610 | @@ -3433,7 +3444,8 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) | |
611 | /* Group all nodes belonging to STATE into several destinations. | |
612 | Then for all destinations, set the nodes belonging to the destination | |
613 | to DESTS_NODE[i] and set the characters accepted by the destination | |
614 | - to DEST_CH[i]. This function return the number of destinations. */ | |
615 | + to DEST_CH[i]. Return the number of destinations if successful, | |
616 | + -1 on internal error. */ | |
617 | ||
618 | static Idx | |
619 | group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, | |
620 | @@ -4211,7 +4223,8 @@ match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) | |
621 | } | |
622 | ||
623 | /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches | |
624 | - at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ | |
625 | + at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. | |
626 | + Return the new entry if successful, NULL if memory is exhausted. */ | |
627 | ||
628 | static re_sub_match_last_t * | |
629 | match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) |