7.3.253

   1 To: vim_dev@googlegroups.com
   2 Subject: Patch 7.3.253
   3 Fcc: outbox
   4 From: Bram Moolenaar <Bram@moolenaar.net>
   5 Mime-Version: 1.0
   6 Content-Type: text/plain; charset=UTF-8
   7 Content-Transfer-Encoding: 8bit
   8 ------------
   9
  10 Patch 7.3.253
  11 Problem:    "echo 'abc' > ''" returns 0 or 1, depending on 'ignorecase'.
  12             Checks in mb_strnicmp() for illegal and truncated bytes are
  13             wrong.  Should not assume that byte length is equal before case
  14             folding.
  15 Solution:   Add utf_safe_read_char_adv() and utf_strnicmp(). Add a test for
  16             this. (Ivan Krasilnikov)
  17 Files:      src/mbyte.c src/testdir/test82.in, src/testdir/test82.ok,
  18             src/testdir/Makefile, src/testdir/Make_amiga.mak,
  19             src/testdir/Make_dos.mak, src/testdir/Make_ming.mak,
  20             src/testdir/Make_os2.mak, src/testdir/Make_vms.mms
  21
  22
  23 *** ../vim-7.3.252/src/mbyte.c  2011-07-07 15:08:53.000000000 +0200
  24 --- src/mbyte.c 2011-07-15 20:13:52.000000000 +0200
  25 ***************
  26 *** 132,137 ****
  27 --- 132,138 ----
  28   static int dbcs_char2cells __ARGS((int c));
  29   static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
  30   static int dbcs_ptr2char __ARGS((char_u *p));
  31 + static int utf_safe_read_char_adv __ARGS((char_u **s, size_t *n));
  32
  33   /*
  34    * Lookup table to quickly get the length in bytes of a UTF-8 character from
  35 ***************
  36 *** 1701,1706 ****
  37 --- 1702,1767 ----
  38   }
  39
  40   /*
  41 +  * Convert a UTF-8 byte sequence to a wide character.
  42 +  * String is assumed to be terminated by NUL or after "n" bytes, whichever
  43 +  * comes first.
  44 +  * The function is safe in the sense that it never accesses memory beyond the
  45 +  * first "n" bytes of "s".
  46 +  *
  47 +  * On success, returns decoded codepoint, advances "s" to the beginning of
  48 +  * next character and decreases "n" accordingly.
  49 +  *
  50 +  * If end of string was reached, returns 0 and, if "n" > 0, advances "s" past
  51 +  * NUL byte.
  52 +  *
  53 +  * If byte sequence is illegal or incomplete, returns -1 and does not advance
  54 +  * "s".
  55 +  */
  56 +     static int
  57 + utf_safe_read_char_adv(s, n)
  58 +     char_u      **s;
  59 +     size_t      *n;
  60 + {
  61 +     int               c, k;
  62 +
  63 +     if (*n == 0) /* end of buffer */
  64 +       return 0;
  65 +
  66 +     k = utf8len_tab_zero[**s];
  67 +
  68 +     if (k == 1)
  69 +     {
  70 +       /* ASCII character or NUL */
  71 +       (*n)--;
  72 +       return *(*s)++;
  73 +     }
  74 +
  75 +     if ((size_t)k <= *n)
  76 +     {
  77 +       /* We have a multibyte sequence and it isn't truncated by buffer
  78 +        * limits so utf_ptr2char() is safe to use. Or the first byte is
  79 +        * illegal (k=0), and it's also safe to use utf_ptr2char(). */
  80 +       c = utf_ptr2char(*s);
  81 +
  82 +       /* On failure, utf_ptr2char() returns the first byte, so here we
  83 +        * check equality with the first byte. The only non-ASCII character
  84 +        * which equals the first byte of its own UTF-8 representation is
  85 +        * U+00C3 (UTF-8: 0xC3 0x83), so need to check that special case too.
  86 +        * It's safe even if n=1, else we would have k=2 > n. */
  87 +       if (c != (int)(**s) || (c == 0xC3 && (*s)[1] == 0x83))
  88 +       {
  89 +           /* byte sequence was successfully decoded */
  90 +           *s += k;
  91 +           *n -= k;
  92 +           return c;
  93 +       }
  94 +     }
  95 +
  96 +     /* byte sequence is incomplete or illegal */
  97 +     return -1;
  98 + }
  99 +
 100 + /*
 101    * Get character at **pp and advance *pp to the next character.
 102    * Note: composing characters are skipped!
 103    */
 104 ***************
 105 *** 2667,2673 ****
 106         {0x10400,0x10427,1,40}
 107   };
 108
 109 ! static int utf_convert(int a, convertStruct table[], int tableSize);
 110
 111   /*
 112    * Generic conversion function for case operations.
 113 --- 2728,2735 ----
 114         {0x10400,0x10427,1,40}
 115   };
 116
 117 ! static int utf_convert __ARGS((int a, convertStruct table[], int tableSize));
 118 ! static int utf_strnicmp __ARGS((char_u *s1, char_u *s2, size_t n1, size_t n2));
 119
 120   /*
 121    * Generic conversion function for case operations.
 122 ***************
 123 *** 3079,3084 ****
 124 --- 3141,3220 ----
 125       return (utf_tolower(a) != a);
 126   }
 127
 128 +     static int
 129 + utf_strnicmp(s1, s2, n1, n2)
 130 +     char_u      *s1, *s2;
 131 +     size_t      n1, n2;
 132 + {
 133 +     int               c1, c2, cdiff;
 134 +     char_u    buffer[6];
 135 +
 136 +     for (;;)
 137 +     {
 138 +       c1 = utf_safe_read_char_adv(&s1, &n1);
 139 +       c2 = utf_safe_read_char_adv(&s2, &n2);
 140 +
 141 +       if (c1 <= 0 || c2 <= 0)
 142 +           break;
 143 +
 144 +       if (c1 == c2)
 145 +           continue;
 146 +
 147 +       cdiff = utf_fold(c1) - utf_fold(c2);
 148 +       if (cdiff != 0)
 149 +           return cdiff;
 150 +     }
 151 +
 152 +     /* some string ended or has an incomplete/illegal character sequence */
 153 +
 154 +     if (c1 == 0 || c2 == 0)
 155 +     {
 156 +       /* some string ended. shorter string is smaller */
 157 +       if (c1 == 0 && c2 == 0)
 158 +           return 0;
 159 +       return c1 == 0 ? -1 : 1;
 160 +     }
 161 +
 162 +     /* Continue with bytewise comparison to produce some result that
 163 +      * would make comparison operations involving this function transitive.
 164 +      *
 165 +      * If only one string had an error, comparison should be made with
 166 +      * folded version of the other string. In this case it is enough
 167 +      * to fold just one character to determine the result of comparison. */
 168 +
 169 +     if (c1 != -1 && c2 == -1)
 170 +     {
 171 +       n1 = utf_char2bytes(utf_fold(c1), buffer);
 172 +       s1 = buffer;
 173 +     }
 174 +     else if (c2 != -1 && c1 == -1)
 175 +     {
 176 +       n2 = utf_char2bytes(utf_fold(c2), buffer);
 177 +       s2 = buffer;
 178 +     }
 179 +
 180 +     while (n1 > 0 && n2 > 0 && *s1 != NUL && *s2 != NUL)
 181 +     {
 182 +       cdiff = (int)(*s1) - (int)(*s2);
 183 +       if (cdiff != 0)
 184 +           return cdiff;
 185 +
 186 +       s1++;
 187 +       s2++;
 188 +       n1--;
 189 +       n2--;
 190 +     }
 191 +
 192 +     if (n1 > 0 && *s1 == NUL)
 193 +       n1 = 0;
 194 +     if (n2 > 0 && *s2 == NUL)
 195 +       n2 = 0;
 196 +
 197 +     if (n1 == 0 && n2 == 0)
 198 +       return 0;
 199 +     return n1 == 0 ? -1 : 1;
 200 + }
 201 +
 202   /*
 203    * Version of strnicmp() that handles multi-byte characters.
 204    * Needed for Big5, Sjift-JIS and UTF-8 encoding.  Other DBCS encodings can
 205 ***************
 206 *** 3092,3140 ****
 207       char_u    *s1, *s2;
 208       size_t    nn;
 209   {
 210 !     int               i, j, l;
 211       int               cdiff;
 212 -     int               incomplete = FALSE;
 213       int               n = (int)nn;
 214
 215 !     for (i = 0; i < n; i += l)
 216       {
 217 !       if (s1[i] == NUL && s2[i] == NUL)   /* both strings end */
 218 !           return 0;
 219 !       if (enc_utf8)
 220 !       {
 221 !           l = utf_byte2len(s1[i]);
 222 !           if (l > n - i)
 223 !           {
 224 !               l = n - i;                  /* incomplete character */
 225 !               incomplete = TRUE;
 226 !           }
 227 !           /* Check directly first, it's faster. */
 228 !           for (j = 0; j < l; ++j)
 229 !           {
 230 !               if (s1[i + j] != s2[i + j])
 231 !                   break;
 232 !               if (s1[i + j] == 0)
 233 !                   /* Both stings have the same bytes but are incomplete or
 234 !                    * have illegal bytes, accept them as equal. */
 235 !                   l = j;
 236 !           }
 237 !           if (j < l)
 238 !           {
 239 !               /* If one of the two characters is incomplete return -1. */
 240 !               if (incomplete || i + utf_byte2len(s2[i]) > n)
 241 !                   return -1;
 242 !               /* Don't case-fold illegal bytes or truncated characters. */
 243 !               if (utf_ptr2len(s1 + i) < l || utf_ptr2len(s2 + i) < l)
 244 !                   return -1;
 245 !               cdiff = utf_fold(utf_ptr2char(s1 + i))
 246 !                                            - utf_fold(utf_ptr2char(s2 + i));
 247 !               if (cdiff != 0)
 248 !                   return cdiff;
 249 !           }
 250 !       }
 251 !       else
 252         {
 253             l = (*mb_ptr2len)(s1 + i);
 254             if (l <= 1)
 255             {
 256 --- 3228,3248 ----
 257       char_u    *s1, *s2;
 258       size_t    nn;
 259   {
 260 !     int               i, l;
 261       int               cdiff;
 262       int               n = (int)nn;
 263
 264 !     if (enc_utf8)
 265       {
 266 !       return utf_strnicmp(s1, s2, nn, nn);
 267 !     }
 268 !     else
 269 !     {
 270 !       for (i = 0; i < n; i += l)
 271         {
 272 +           if (s1[i] == NUL && s2[i] == NUL)   /* both strings end */
 273 +               return 0;
 274 +
 275             l = (*mb_ptr2len)(s1 + i);
 276             if (l <= 1)
 277             {
 278 *** ../vim-7.3.252/src/testdir/test82.in        2011-07-15 21:16:03.000000000 +0200
 279 --- src/testdir/test82.in       2011-07-15 18:22:46.000000000 +0200
 280 ***************
 281 *** 0 ****
 282 --- 1,93 ----
 283 + Tests for case-insensitive UTF-8 comparisons (utf_strnicmp() in mbyte.c)
 284 +
 285 + STARTTEST
 286 + :so small.vim
 287 + :if !has("multi_byte")
 288 + : e! test.ok
 289 + : w! test.out
 290 + : qa!
 291 + :endif
 292 + :set enc=utf8
 293 + ggdG
 294 + :
 295 + :function! Ch(a, op, b, expected)
 296 + :  if eval(printf('"%s" %s "%s"', a:a, a:op, a:b)) != a:expected
 297 + :    call append(line('$'), printf('"%s" %s "%s" should return %d', a:a, a:op, a:b, a:expected))
 298 + :  else
 299 + :    let b:passed += 1
 300 + :  endif
 301 + :endfunction
 302 + :
 303 + :function! Chk(a, b, result)
 304 + :  if a:result == 0
 305 + :    call Ch(a:a, '==?', a:b, 1)
 306 + :    call Ch(a:a, '!=?', a:b, 0)
 307 + :    call Ch(a:a, '<=?', a:b, 1)
 308 + :    call Ch(a:a, '>=?', a:b, 1)
 309 + :    call Ch(a:a, '<?', a:b, 0)
 310 + :    call Ch(a:a, '>?', a:b, 0)
 311 + :  elseif a:result > 0
 312 + :    call Ch(a:a, '==?', a:b, 0)
 313 + :    call Ch(a:a, '!=?', a:b, 1)
 314 + :    call Ch(a:a, '<=?', a:b, 0)
 315 + :    call Ch(a:a, '>=?', a:b, 1)
 316 + :    call Ch(a:a, '<?', a:b, 0)
 317 + :    call Ch(a:a, '>?', a:b, 1)
 318 + :  else
 319 + :    call Ch(a:a, '==?', a:b, 0)
 320 + :    call Ch(a:a, '!=?', a:b, 1)
 321 + :    call Ch(a:a, '<=?', a:b, 1)
 322 + :    call Ch(a:a, '>=?', a:b, 0)
 323 + :    call Ch(a:a, '<?', a:b, 1)
 324 + :    call Ch(a:a, '>?', a:b, 0)
 325 + :  endif
 326 + :endfunction
 327 + :
 328 + :function! Check(a, b, result)
 329 + :  call Chk(a:a, a:b, a:result)
 330 + :  call Chk(a:b, a:a, -a:result)
 331 + :endfunction
 332 + :
 333 + :function! LT(a, b)
 334 + :  call Check(a:a, a:b, -1)
 335 + :endfunction
 336 + :
 337 + :function! GT(a, b)
 338 + :  call Check(a:a, a:b, 1)
 339 + :endfunction
 340 + :
 341 + :function! EQ(a, b)
 342 + :  call Check(a:a, a:b, 0)
 343 + :endfunction
 344 + :
 345 + :let b:passed=0
 346 + :call EQ('', '')
 347 + :call LT('', 'a')
 348 + :call EQ('abc', 'abc')
 349 + :call EQ('Abc', 'abC')
 350 + :call LT('ab', 'abc')
 351 + :call LT('AB', 'abc')
 352 + :call LT('ab', 'aBc')
 353 + :call EQ('\xd0\xb9\xd1\x86\xd1\x83\xd0\xba\xd0\xb5\xd0\xbd', '\xd0\xb9\xd0\xa6\xd0\xa3\xd0\xba\xd0\x95\xd0\xbd')
 354 + :call LT('\xd0\xb9\xd1\x86\xd1\x83\xd0\xba\xd0\xb5\xd0\xbd', '\xd0\xaf\xd1\x86\xd1\x83\xd0\xba\xd0\xb5\xd0\xbd')
 355 + :call EQ('\xe2\x84\xaa', 'k')
 356 + :call LT('\xe2\x84\xaa', 'kkkkkk')
 357 + :call EQ('\xe2\x84\xaa\xe2\x84\xaa\xe2\x84\xaa', 'kkk')
 358 + :call LT('kk', '\xe2\x84\xaa\xe2\x84\xaa\xe2\x84\xaa')
 359 + :call EQ('\xe2\x84\xaa\xe2\x84\xa6k\xe2\x84\xaak\xcf\x89', 'k\xcf\x89\xe2\x84\xaakk\xe2\x84\xa6')
 360 + :call EQ('Abc\x80', 'AbC\x80')
 361 + :call LT('Abc\x80', 'AbC\x81')
 362 + :call LT('Abc', 'AbC\x80')
 363 + :call LT('abc\x80DEF', 'abc\x80def')  " case folding stops at the first bad character
 364 + :call LT('\xc3XYZ', '\xc3xyz')
 365 + :call EQ('\xef\xbc\xba', '\xef\xbd\x9a')  " FF3A (upper), FF5A (lower)
 366 + :call GT('\xef\xbc\xba', '\xef\xbc\xff')  " first string is ok and equals \xef\xbd\x9a after folding, second string is illegal and was left unchanged, then the strings were bytewise compared
 367 + :call LT('\xc3', '\xc3\x83')
 368 + :call EQ('\xc3\xa3xYz', '\xc3\x83XyZ')
 369 + :for n in range(0x60, 0xFF) | call LT(printf('xYz\x%.2X', n-1), printf('XyZ\x%.2X', n)) | endfor
 370 + :for n in range(0x80, 0xBF) | call EQ(printf('xYz\xc2\x%.2XUvW', n), printf('XyZ\xc2\x%.2XuVw', n)) | endfor
 371 + :for n in range(0xC0, 0xFF) | call LT(printf('xYz\xc2\x%.2XUvW', n), printf('XyZ\xc2\x%.2XuVw', n)) | endfor
 372 + :call append(0, printf('%d checks passed', b:passed))
 373 + :wq! test.out
 374 + ENDTEST
 375 +
 376 *** ../vim-7.3.252/src/testdir/test82.ok        2011-07-15 21:16:03.000000000 +0200
 377 --- src/testdir/test82.ok       2011-07-15 18:37:33.000000000 +0200
 378 ***************
 379 *** 0 ****
 380 --- 1,2 ----
 381 + 3732 checks passed
 382 +
 383 *** ../vim-7.3.252/src/testdir/Makefile 2011-06-26 05:36:07.000000000 +0200
 384 --- src/testdir/Makefile        2011-07-15 18:30:08.000000000 +0200
 385 ***************
 386 *** 26,32 ****
 387                 test64.out test65.out test66.out test67.out test68.out \
 388                 test69.out test70.out test71.out test72.out test73.out \
 389                 test74.out test75.out test76.out test77.out test78.out \
 390 !               test79.out test80.out test81.out
 391
 392   SCRIPTS_GUI = test16.out
 393
 394 --- 26,32 ----
 395                 test64.out test65.out test66.out test67.out test68.out \
 396                 test69.out test70.out test71.out test72.out test73.out \
 397                 test74.out test75.out test76.out test77.out test78.out \
 398 !               test79.out test80.out test81.out test82.out
 399
 400   SCRIPTS_GUI = test16.out
 401
 402 *** ../vim-7.3.252/src/testdir/Make_amiga.mak   2011-06-26 05:36:07.000000000 +0200
 403 --- src/testdir/Make_amiga.mak  2011-07-15 18:29:50.000000000 +0200
 404 ***************
 405 *** 29,35 ****
 406                 test66.out test67.out test68.out test69.out test70.out \
 407                 test71.out test72.out test73.out test74.out test75.out \
 408                 test76.out test77.out test78.out test79.out test80.out \
 409 !               test81.out
 410
 411   .SUFFIXES: .in .out
 412
 413 --- 29,35 ----
 414                 test66.out test67.out test68.out test69.out test70.out \
 415                 test71.out test72.out test73.out test74.out test75.out \
 416                 test76.out test77.out test78.out test79.out test80.out \
 417 !               test81.out test82.out
 418
 419   .SUFFIXES: .in .out
 420
 421 ***************
 422 *** 130,132 ****
 423 --- 130,133 ----
 424   test79.out: test79.in
 425   test80.out: test80.in
 426   test81.out: test81.in
 427 + test82.out: test82.in
 428 *** ../vim-7.3.252/src/testdir/Make_dos.mak     2011-06-26 05:36:07.000000000 +0200
 429 --- src/testdir/Make_dos.mak    2011-07-15 18:30:02.000000000 +0200
 430 ***************
 431 *** 29,35 ****
 432                 test42.out test52.out test65.out test66.out test67.out \
 433                 test68.out test69.out test71.out test72.out test73.out \
 434                 test74.out test75.out test76.out test77.out test78.out \
 435 !               test79.out test80.out test81.out
 436
 437   SCRIPTS32 =   test50.out test70.out
 438
 439 --- 29,35 ----
 440                 test42.out test52.out test65.out test66.out test67.out \
 441                 test68.out test69.out test71.out test72.out test73.out \
 442                 test74.out test75.out test76.out test77.out test78.out \
 443 !               test79.out test80.out test81.out test82.out
 444
 445   SCRIPTS32 =   test50.out test70.out
 446
 447 *** ../vim-7.3.252/src/testdir/Make_ming.mak    2011-06-26 05:36:07.000000000 +0200
 448 --- src/testdir/Make_ming.mak   2011-07-15 18:30:15.000000000 +0200
 449 ***************
 450 *** 49,55 ****
 451                 test42.out test52.out test65.out test66.out test67.out \
 452                 test68.out test69.out test71.out test72.out test73.out \
 453                 test74.out test75.out test76.out test77.out test78.out \
 454 !               test79.out test80.out test81.out
 455
 456   SCRIPTS32 =   test50.out test70.out
 457
 458 --- 49,55 ----
 459                 test42.out test52.out test65.out test66.out test67.out \
 460                 test68.out test69.out test71.out test72.out test73.out \
 461                 test74.out test75.out test76.out test77.out test78.out \
 462 !               test79.out test80.out test81.out test82.out
 463
 464   SCRIPTS32 =   test50.out test70.out
 465
 466 *** ../vim-7.3.252/src/testdir/Make_os2.mak     2011-06-26 05:36:07.000000000 +0200
 467 --- src/testdir/Make_os2.mak    2011-07-15 18:30:25.000000000 +0200
 468 ***************
 469 *** 29,35 ****
 470                 test66.out test67.out test68.out test69.out test70.out \
 471                 test71.out test72.out test73.out test74.out test75.out \
 472                 test76.out test77.out test78.out test79.out test80.out \
 473 !               test81.out
 474
 475   .SUFFIXES: .in .out
 476
 477 --- 29,35 ----
 478                 test66.out test67.out test68.out test69.out test70.out \
 479                 test71.out test72.out test73.out test74.out test75.out \
 480                 test76.out test77.out test78.out test79.out test80.out \
 481 !               test81.out test82.out
 482
 483   .SUFFIXES: .in .out
 484
 485 *** ../vim-7.3.252/src/testdir/Make_vms.mms     2011-06-26 05:36:07.000000000 +0200
 486 --- src/testdir/Make_vms.mms    2011-07-15 18:30:33.000000000 +0200
 487 ***************
 488 *** 4,10 ****
 489   # Authors:    Zoltan Arpadffy, <arpadffy@polarhome.com>
 490   #             Sandor Kopanyi,  <sandor.kopanyi@mailbox.hu>
 491   #
 492 ! # Last change:  2011 Jun 26
 493   #
 494   # This has been tested on VMS 6.2 to 8.3 on DEC Alpha, VAX and IA64.
 495   # Edit the lines in the Configuration section below to select.
 496 --- 4,10 ----
 497   # Authors:    Zoltan Arpadffy, <arpadffy@polarhome.com>
 498   #             Sandor Kopanyi,  <sandor.kopanyi@mailbox.hu>
 499   #
 500 ! # Last change:  2011 Jul 15
 501   #
 502   # This has been tested on VMS 6.2 to 8.3 on DEC Alpha, VAX and IA64.
 503   # Edit the lines in the Configuration section below to select.
 504 ***************
 505 *** 75,81 ****
 506          test61.out test62.out test63.out test64.out test65.out \
 507          test66.out test67.out test68.out test69.out \
 508          test71.out test72.out test74.out test75.out test76.out \
 509 !        test77.out test78.out test79.out test80.out test81.out
 510
 511   # Known problems:
 512   # Test 30: a problem around mac format - unknown reason
 513 --- 75,82 ----
 514          test61.out test62.out test63.out test64.out test65.out \
 515          test66.out test67.out test68.out test69.out \
 516          test71.out test72.out test74.out test75.out test76.out \
 517 !        test77.out test78.out test79.out test80.out test81.out \
 518 !        test82.out
 519
 520   # Known problems:
 521   # Test 30: a problem around mac format - unknown reason
 522 *** ../vim-7.3.252/src/version.c        2011-07-15 17:56:11.000000000 +0200
 523 --- src/version.c       2011-07-15 21:12:26.000000000 +0200
 524 ***************
 525 *** 711,712 ****
 526 --- 711,714 ----
 527   {   /* Add new patch number below this line */
 528 + /**/
 529 +     253,
 530   /**/
 531
 532 --
 533 "Intelligence has much less practical application than you'd think."
 534                   -- Scott Adams, Dilbert.
 535
 536  /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
 537 ///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
 538 \\\  an exciting new programming language -- http://www.Zimbu.org        ///
 539  \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///