]>
Commit | Line | Data |
---|---|---|
350f6bbb | 1 | diff -rc perl-5.8.8/patchlevel.h perl-5.8.8.patched/patchlevel.h |
2 | *** perl-5.8.8/patchlevel.h Tue Jan 31 16:12:10 2006 | |
3 | --- perl-5.8.8.patched/patchlevel.h Thu Nov 15 16:49:41 2007 | |
4 | *************** | |
5 | *** 124 **** | |
6 | ! ,NULL | |
7 | --- 124,125 ---- | |
8 | ! ,"REGEXP0 - fix for UTF-8 recoding in regexps - CVE-2007-5116" | |
9 | ! ,NULL | |
10 | diff -rc perl-5.8.8/regcomp.c perl-5.8.8.patched/regcomp.c | |
11 | *** perl-5.8.8/regcomp.c Sun Jan 8 20:59:27 2006 | |
12 | --- perl-5.8.8.patched/regcomp.c Thu Nov 15 16:38:53 2007 | |
13 | *************** | |
14 | *** 135,141 **** | |
15 | I32 extralen; | |
16 | I32 seen_zerolen; | |
17 | I32 seen_evals; | |
18 | ! I32 utf8; | |
19 | #if ADD_TO_REGEXEC | |
20 | char *starttry; /* -Dr: where regtry was called. */ | |
21 | #define RExC_starttry (pRExC_state->starttry) | |
22 | --- 135,144 ---- | |
23 | I32 extralen; | |
24 | I32 seen_zerolen; | |
25 | I32 seen_evals; | |
26 | ! I32 utf8; /* whether the pattern is utf8 or not */ | |
27 | ! I32 orig_utf8; /* whether the pattern was originally in utf8 */ | |
28 | ! /* XXX use this for future optimisation of case | |
29 | ! * where pattern must be upgraded to utf8. */ | |
30 | #if ADD_TO_REGEXEC | |
31 | char *starttry; /* -Dr: where regtry was called. */ | |
32 | #define RExC_starttry (pRExC_state->starttry) | |
33 | *************** | |
34 | *** 161,166 **** | |
35 | --- 164,170 ---- | |
36 | #define RExC_seen_zerolen (pRExC_state->seen_zerolen) | |
37 | #define RExC_seen_evals (pRExC_state->seen_evals) | |
38 | #define RExC_utf8 (pRExC_state->utf8) | |
39 | + #define RExC_orig_utf8 (pRExC_state->orig_utf8) | |
40 | ||
41 | #define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?') | |
42 | #define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \ | |
43 | *************** | |
44 | *** 1749,1763 **** | |
45 | if (exp == NULL) | |
46 | FAIL("NULL regexp argument"); | |
47 | ||
48 | ! RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8; | |
49 | ||
50 | - RExC_precomp = exp; | |
51 | DEBUG_r({ | |
52 | if (!PL_colorset) reginitcolors(); | |
53 | PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n", | |
54 | PL_colors[4],PL_colors[5],PL_colors[0], | |
55 | ! (int)(xend - exp), RExC_precomp, PL_colors[1]); | |
56 | }); | |
57 | RExC_flags = pm->op_pmflags; | |
58 | RExC_sawback = 0; | |
59 | ||
60 | --- 1753,1769 ---- | |
61 | if (exp == NULL) | |
62 | FAIL("NULL regexp argument"); | |
63 | ||
64 | ! RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8; | |
65 | ||
66 | DEBUG_r({ | |
67 | if (!PL_colorset) reginitcolors(); | |
68 | PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n", | |
69 | PL_colors[4],PL_colors[5],PL_colors[0], | |
70 | ! (int)(xend - exp), exp, PL_colors[1]); | |
71 | }); | |
72 | + | |
73 | + redo_first_pass: | |
74 | + RExC_precomp = exp; | |
75 | RExC_flags = pm->op_pmflags; | |
76 | RExC_sawback = 0; | |
77 | ||
78 | *************** | |
79 | *** 1782,1787 **** | |
80 | --- 1788,1812 ---- | |
81 | if (reg(pRExC_state, 0, &flags) == NULL) { | |
82 | RExC_precomp = Nullch; | |
83 | return(NULL); | |
84 | + } | |
85 | + if (RExC_utf8 && !RExC_orig_utf8) { | |
86 | + /* It's possible to write a regexp in ascii that represents unicode | |
87 | + codepoints outside of the byte range, such as via \x{100}. If we | |
88 | + detect such a sequence we have to convert the entire pattern to utf8 | |
89 | + and then recompile, as our sizing calculation will have been based | |
90 | + on 1 byte == 1 character, but we will need to use utf8 to encode | |
91 | + at least some part of the pattern, and therefore must convert the whole | |
92 | + thing. | |
93 | + XXX: somehow figure out how to make this less expensive... | |
94 | + -- dmq */ | |
95 | + STRLEN len = xend-exp; | |
96 | + DEBUG_r(PerlIO_printf(Perl_debug_log, | |
97 | + "UTF8 mismatch! Converting to utf8 for resizing and compile\n")); | |
98 | + exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len); | |
99 | + xend = exp + len; | |
100 | + RExC_orig_utf8 = RExC_utf8; | |
101 | + SAVEFREEPV(exp); | |
102 | + goto redo_first_pass; | |
103 | } | |
104 | DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size)); | |
105 | ||
106 | diff -rc perl-5.8.8/t/op/pat.t perl-5.8.8.patched/t/op/pat.t | |
107 | *** perl-5.8.8/t/op/pat.t Sat Jan 7 12:53:32 2006 | |
108 | --- perl-5.8.8.patched/t/op/pat.t Thu Nov 15 16:45:18 2007 | |
109 | *************** | |
110 | *** 6,12 **** | |
111 | ||
112 | $| = 1; | |
113 | ||
114 | ! print "1..1187\n"; | |
115 | ||
116 | BEGIN { | |
117 | chdir 't' if -d 't'; | |
118 | --- 6,12 ---- | |
119 | ||
120 | $| = 1; | |
121 | ||
122 | ! print "1..1189\n"; | |
123 | ||
124 | BEGIN { | |
125 | chdir 't' if -d 't'; | |
126 | *************** | |
127 | *** 3394,3399 **** | |
128 | --- 3394,3408 ---- | |
129 | ok($s eq 'cd', | |
130 | "# assigning to original string should not corrupt match vars"); | |
131 | } | |
132 | + | |
133 | + { | |
134 | + use warnings; | |
135 | + my @w; | |
136 | + local $SIG{__WARN__}=sub{push @w,"@_"}; | |
137 | + my $c=qq(\x{DF}); | |
138 | + ok($c=~/${c}|\x{100}/, "ASCII pattern that really is utf8"); | |
139 | + ok(@w==0, "No warnings"); | |
140 | + } | |
141 | ||
142 | # last test 1187 | |
143 |