]> git.pld-linux.org Git - packages/perl.git/blame - perl-regexp-CVE-2007-5116.patch
- added h2ph-includes patch, always package asm-generic (needed for complete headers)
[packages/perl.git] / perl-regexp-CVE-2007-5116.patch
CommitLineData
350f6bbb 1diff -rc perl-5.8.8/patchlevel.h perl-5.8.8.patched/patchlevel.h
2*** perl-5.8.8/patchlevel.h Tue Jan 31 16:12:10 2006
3--- perl-5.8.8.patched/patchlevel.h Thu Nov 15 16:49:41 2007
4***************
5*** 124 ****
6! ,NULL
7--- 124,125 ----
8! ,"REGEXP0 - fix for UTF-8 recoding in regexps - CVE-2007-5116"
9! ,NULL
10diff -rc perl-5.8.8/regcomp.c perl-5.8.8.patched/regcomp.c
11*** perl-5.8.8/regcomp.c Sun Jan 8 20:59:27 2006
12--- perl-5.8.8.patched/regcomp.c Thu Nov 15 16:38:53 2007
13***************
14*** 135,141 ****
15 I32 extralen;
16 I32 seen_zerolen;
17 I32 seen_evals;
18! I32 utf8;
19 #if ADD_TO_REGEXEC
20 char *starttry; /* -Dr: where regtry was called. */
21 #define RExC_starttry (pRExC_state->starttry)
22--- 135,144 ----
23 I32 extralen;
24 I32 seen_zerolen;
25 I32 seen_evals;
26! I32 utf8; /* whether the pattern is utf8 or not */
27! I32 orig_utf8; /* whether the pattern was originally in utf8 */
28! /* XXX use this for future optimisation of case
29! * where pattern must be upgraded to utf8. */
30 #if ADD_TO_REGEXEC
31 char *starttry; /* -Dr: where regtry was called. */
32 #define RExC_starttry (pRExC_state->starttry)
33***************
34*** 161,166 ****
35--- 164,170 ----
36 #define RExC_seen_zerolen (pRExC_state->seen_zerolen)
37 #define RExC_seen_evals (pRExC_state->seen_evals)
38 #define RExC_utf8 (pRExC_state->utf8)
39+ #define RExC_orig_utf8 (pRExC_state->orig_utf8)
40
41 #define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
42 #define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
43***************
44*** 1749,1763 ****
45 if (exp == NULL)
46 FAIL("NULL regexp argument");
47
48! RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
49
50- RExC_precomp = exp;
51 DEBUG_r({
52 if (!PL_colorset) reginitcolors();
53 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
54 PL_colors[4],PL_colors[5],PL_colors[0],
55! (int)(xend - exp), RExC_precomp, PL_colors[1]);
56 });
57 RExC_flags = pm->op_pmflags;
58 RExC_sawback = 0;
59
60--- 1753,1769 ----
61 if (exp == NULL)
62 FAIL("NULL regexp argument");
63
64! RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
65
66 DEBUG_r({
67 if (!PL_colorset) reginitcolors();
68 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
69 PL_colors[4],PL_colors[5],PL_colors[0],
70! (int)(xend - exp), exp, PL_colors[1]);
71 });
72+
73+ redo_first_pass:
74+ RExC_precomp = exp;
75 RExC_flags = pm->op_pmflags;
76 RExC_sawback = 0;
77
78***************
79*** 1782,1787 ****
80--- 1788,1812 ----
81 if (reg(pRExC_state, 0, &flags) == NULL) {
82 RExC_precomp = Nullch;
83 return(NULL);
84+ }
85+ if (RExC_utf8 && !RExC_orig_utf8) {
86+ /* It's possible to write a regexp in ascii that represents unicode
87+ codepoints outside of the byte range, such as via \x{100}. If we
88+ detect such a sequence we have to convert the entire pattern to utf8
89+ and then recompile, as our sizing calculation will have been based
90+ on 1 byte == 1 character, but we will need to use utf8 to encode
91+ at least some part of the pattern, and therefore must convert the whole
92+ thing.
93+ XXX: somehow figure out how to make this less expensive...
94+ -- dmq */
95+ STRLEN len = xend-exp;
96+ DEBUG_r(PerlIO_printf(Perl_debug_log,
97+ "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
98+ exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
99+ xend = exp + len;
100+ RExC_orig_utf8 = RExC_utf8;
101+ SAVEFREEPV(exp);
102+ goto redo_first_pass;
103 }
104 DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size));
105
106diff -rc perl-5.8.8/t/op/pat.t perl-5.8.8.patched/t/op/pat.t
107*** perl-5.8.8/t/op/pat.t Sat Jan 7 12:53:32 2006
108--- perl-5.8.8.patched/t/op/pat.t Thu Nov 15 16:45:18 2007
109***************
110*** 6,12 ****
111
112 $| = 1;
113
114! print "1..1187\n";
115
116 BEGIN {
117 chdir 't' if -d 't';
118--- 6,12 ----
119
120 $| = 1;
121
122! print "1..1189\n";
123
124 BEGIN {
125 chdir 't' if -d 't';
126***************
127*** 3394,3399 ****
128--- 3394,3408 ----
129 ok($s eq 'cd',
130 "# assigning to original string should not corrupt match vars");
131 }
132+
133+ {
134+ use warnings;
135+ my @w;
136+ local $SIG{__WARN__}=sub{push @w,"@_"};
137+ my $c=qq(\x{DF});
138+ ok($c=~/${c}|\x{100}/, "ASCII pattern that really is utf8");
139+ ok(@w==0, "No warnings");
140+ }
141
142 # last test 1187
143
This page took 0.062135 seconds and 4 git commands to generate.