perl-regexp-CVE-2007-5116.patch

   1 diff -rc perl-5.8.8/patchlevel.h perl-5.8.8.patched/patchlevel.h
   2 *** perl-5.8.8/patchlevel.h     Tue Jan 31 16:12:10 2006
   3 --- perl-5.8.8.patched/patchlevel.h     Thu Nov 15 16:49:41 2007
   4 ***************
   5 *** 124 ****
   6 !       ,NULL
   7 --- 124,125 ----
   8 !       ,"REGEXP0 - fix for UTF-8 recoding in regexps - CVE-2007-5116"
   9 !       ,NULL
  10 diff -rc perl-5.8.8/regcomp.c perl-5.8.8.patched/regcomp.c
  11 *** perl-5.8.8/regcomp.c        Sun Jan  8 20:59:27 2006
  12 --- perl-5.8.8.patched/regcomp.c        Thu Nov 15 16:38:53 2007
  13 ***************
  14 *** 135,141 ****
  15       I32               extralen;
  16       I32               seen_zerolen;
  17       I32               seen_evals;
  18 !     I32               utf8;
  19   #if ADD_TO_REGEXEC
  20       char      *starttry;              /* -Dr: where regtry was called. */
  21   #define RExC_starttry (pRExC_state->starttry)
  22 --- 135,144 ----
  23       I32               extralen;
  24       I32               seen_zerolen;
  25       I32               seen_evals;
  26 !     I32               utf8;           /* whether the pattern is utf8 or not */
  27 !     I32               orig_utf8;      /* whether the pattern was originally in utf8 */
  28 !                               /* XXX use this for future optimisation of case
  29 !                                * where pattern must be upgraded to utf8. */
  30   #if ADD_TO_REGEXEC
  31       char      *starttry;              /* -Dr: where regtry was called. */
  32   #define RExC_starttry (pRExC_state->starttry)
  33 ***************
  34 *** 161,166 ****
  35 --- 164,170 ----
  36   #define RExC_seen_zerolen     (pRExC_state->seen_zerolen)
  37   #define RExC_seen_evals       (pRExC_state->seen_evals)
  38   #define RExC_utf8     (pRExC_state->utf8)
  39 + #define RExC_orig_utf8        (pRExC_state->orig_utf8)
  40
  41   #define       ISMULT1(c)      ((c) == '*' || (c) == '+' || (c) == '?')
  42   #define       ISMULT2(s)      ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
  43 ***************
  44 *** 1749,1763 ****
  45       if (exp == NULL)
  46         FAIL("NULL regexp argument");
  47
  48 !     RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
  49
  50 -     RExC_precomp = exp;
  51       DEBUG_r({
  52          if (!PL_colorset) reginitcolors();
  53          PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
  54                        PL_colors[4],PL_colors[5],PL_colors[0],
  55 !                      (int)(xend - exp), RExC_precomp, PL_colors[1]);
  56       });
  57       RExC_flags = pm->op_pmflags;
  58       RExC_sawback = 0;
  59
  60 --- 1753,1769 ----
  61       if (exp == NULL)
  62         FAIL("NULL regexp argument");
  63
  64 !     RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
  65
  66       DEBUG_r({
  67          if (!PL_colorset) reginitcolors();
  68          PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
  69                        PL_colors[4],PL_colors[5],PL_colors[0],
  70 !                      (int)(xend - exp), exp, PL_colors[1]);
  71       });
  72 +
  73 + redo_first_pass:
  74 +     RExC_precomp = exp;
  75       RExC_flags = pm->op_pmflags;
  76       RExC_sawback = 0;
  77
  78 ***************
  79 *** 1782,1787 ****
  80 --- 1788,1812 ----
  81       if (reg(pRExC_state, 0, &flags) == NULL) {
  82         RExC_precomp = Nullch;
  83         return(NULL);
  84 +     }
  85 +     if (RExC_utf8 && !RExC_orig_utf8) {
  86 +         /* It's possible to write a regexp in ascii that represents unicode
  87 +         codepoints outside of the byte range, such as via \x{100}. If we
  88 +         detect such a sequence we have to convert the entire pattern to utf8
  89 +         and then recompile, as our sizing calculation will have been based
  90 +         on 1 byte == 1 character, but we will need to use utf8 to encode
  91 +         at least some part of the pattern, and therefore must convert the whole
  92 +         thing.
  93 +         XXX: somehow figure out how to make this less expensive...
  94 +         -- dmq */
  95 +         STRLEN len = xend-exp;
  96 +         DEBUG_r(PerlIO_printf(Perl_debug_log,
  97 +           "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
  98 +         exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
  99 +         xend = exp + len;
 100 +         RExC_orig_utf8 = RExC_utf8;
 101 +         SAVEFREEPV(exp);
 102 +         goto redo_first_pass;
 103       }
 104       DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size));
 105
 106 diff -rc perl-5.8.8/t/op/pat.t perl-5.8.8.patched/t/op/pat.t
 107 *** perl-5.8.8/t/op/pat.t       Sat Jan  7 12:53:32 2006
 108 --- perl-5.8.8.patched/t/op/pat.t       Thu Nov 15 16:45:18 2007
 109 ***************
 110 *** 6,12 ****
 111
 112   $| = 1;
 113
 114 ! print "1..1187\n";
 115
 116   BEGIN {
 117       chdir 't' if -d 't';
 118 --- 6,12 ----
 119
 120   $| = 1;
 121
 122 ! print "1..1189\n";
 123
 124   BEGIN {
 125       chdir 't' if -d 't';
 126 ***************
 127 *** 3394,3399 ****
 128 --- 3394,3408 ----
 129       ok($s eq 'cd',
 130          "# assigning to original string should not corrupt match vars");
 131   }
 132 +
 133 + {
 134 +     use warnings;
 135 +     my @w;
 136 +     local $SIG{__WARN__}=sub{push @w,"@_"};
 137 +     my $c=qq(\x{DF});
 138 +     ok($c=~/${c}|\x{100}/, "ASCII pattern that really is utf8");
 139 +     ok(@w==0, "No warnings");
 140 + }
 141
 142   # last test 1187
 143