4 From: Bram Moolenaar <Bram@moolenaar.net>
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
11 Problem: iconv() returns an invalid character sequence when conversion
12 fails. It should return an empty string. (Yongwei Wu)
13 Solution: Be more strict about invalid characters in the input.
17 *** ../vim-7.2.311/src/mbyte.c 2009-06-16 15:23:07.000000000 +0200
18 --- src/mbyte.c 2009-11-25 16:10:44.000000000 +0100
21 static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
22 static int dbcs_ptr2char __ARGS((char_u *p));
24 ! /* Lookup table to quickly get the length in bytes of a UTF-8 character from
25 ! * the first byte of a UTF-8 string. Bytes which are illegal when used as the
26 ! * first byte have a one, because these will be used separately. */
27 static char utf8len_tab[256] =
29 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
30 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
31 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
32 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
33 ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
34 ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
35 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
36 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
40 * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks
41 * in the "xim.log" file.
44 static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
45 static int dbcs_ptr2char __ARGS((char_u *p));
48 ! * Lookup table to quickly get the length in bytes of a UTF-8 character from
49 ! * the first byte of a UTF-8 string.
50 ! * Bytes which are illegal when used as the first byte have a 1.
51 ! * The NUL byte has length 1.
53 static char utf8len_tab[256] =
55 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
56 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
57 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
58 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
59 ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
60 ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
61 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
62 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
66 + * Like utf8len_tab above, but using a zero for illegal lead bytes.
68 + static char utf8len_tab_zero[256] =
70 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
71 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
72 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
73 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
74 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
75 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
76 + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
77 + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0,
81 * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks
82 * in the "xim.log" file.
86 if (size > 0 && *p >= 0x80)
88 if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
91 /* An illegal byte is displayed as <xx>. */
92 if (utf_ptr2len(p) == 1 || c == NUL)
94 if (size > 0 && *p >= 0x80)
96 if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
97 ! return 1; /* truncated */
99 /* An illegal byte is displayed as <xx>. */
100 if (utf_ptr2len(p) == 1 || c == NUL)
103 if (p[0] < 0x80) /* be quick for ASCII */
106 ! len = utf8len_tab[p[0]];
107 if (len > 1 && (p[1] & 0xc0) == 0x80)
111 if (p[0] < 0x80) /* be quick for ASCII */
114 ! len = utf8len_tab_zero[p[0]];
115 if (len > 1 && (p[1] & 0xc0) == 0x80)
122 * Return length of UTF-8 character, obtained from the first byte.
123 * "b" must be between 0 and 255!
124 + * Returns 1 for an invalid first byte value.
132 * Returns 1 for an illegal byte sequence (also in incomplete byte seq.).
133 * Returns number > "size" for an incomplete byte sequence.
134 + * Never returns zero.
137 utf_ptr2len_len(p, size)
145 ! m = len = utf8len_tab[*p];
147 m = size; /* incomplete byte sequence. */
148 for (i = 1; i < m; ++i)
149 if ((p[i] & 0xc0) != 0x80)
155 ! len = utf8len_tab[*p];
157 ! return 1; /* NUL, ascii or illegal lead byte */
159 m = size; /* incomplete byte sequence. */
162 for (i = 1; i < m; ++i)
163 if ((p[i] & 0xc0) != 0x80)
169 * mb_head_off() function pointer.
170 * Return offset from "p" to the first byte of the character it points into.
171 + * If "p" points to the NUL at the end of the string return 0.
172 * Returns 0 when already at the first byte of a character.
178 /* It can't be a trailing byte when not using DBCS, at the start of the
179 * string or the previous byte can't start a double-byte. */
180 ! if (p <= base || MB_BYTE2LEN(p[-1]) == 1)
183 /* This is slow: need to start at the base and go forward until the
186 /* It can't be a trailing byte when not using DBCS, at the start of the
187 * string or the previous byte can't start a double-byte. */
188 ! if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL)
191 /* This is slow: need to start at the base and go forward until the
194 * lead byte in the current cell. */
196 || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
197 ! || MB_BYTE2LEN(p[-1]) == 1)
200 /* This is slow: need to start at the base and go forward until the
202 * lead byte in the current cell. */
204 || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
205 ! || MB_BYTE2LEN(p[-1]) == 1
209 /* This is slow: need to start at the base and go forward until the
223 /* Check for illegal sequence. Do allow an illegal byte after where we
225 ! if (utf8len_tab[*q] != (int)(s - q + 1)
226 ! && utf8len_tab[*q] != (int)(p - q + 1))
232 /* Check for illegal sequence. Do allow an illegal byte after where we
234 ! len = utf8len_tab[*q];
235 ! if (len != (int)(s - q + 1) && len != (int)(p - q + 1))
242 while (end == NULL ? *p != NUL : p < end)
244 ! if ((*p & 0xc0) == 0x80)
245 return FALSE; /* invalid lead byte */
246 - l = utf8len_tab[*p];
247 if (end != NULL && p + l > end)
248 return FALSE; /* incomplete byte sequence */
252 while (end == NULL ? *p != NUL : p < end)
254 ! l = utf8len_tab_zero[*p];
256 return FALSE; /* invalid lead byte */
257 if (end != NULL && p + l > end)
258 return FALSE; /* incomplete byte sequence */
263 for (i = 0; i < len; ++i)
265 ! l = utf_ptr2len(ptr + i);
270 ! if (unconvlenp != NULL && utf8len_tab[ptr[i]] > len - i)
272 /* Incomplete sequence at the end. */
273 *unconvlenp = len - i;
276 for (i = 0; i < len; ++i)
278 ! l = utf_ptr2len_len(ptr + i, len - i);
283 ! int l_w = utf8len_tab_zero[ptr[i]];
287 ! /* Illegal utf-8 byte cannot be converted */
291 ! if (unconvlenp != NULL && l_w > len - i)
293 /* Incomplete sequence at the end. */
294 *unconvlenp = len - i;
295 *** ../vim-7.2.311/src/version.c 2009-12-02 13:32:10.000000000 +0100
296 --- src/version.c 2009-12-02 15:00:23.000000000 +0100
300 { /* Add new patch number below this line */
306 hundred-and-one symptoms of being an internet addict:
307 6. You refuse to go to a vacation spot with no electricity and no phone lines.
309 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
310 /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
311 \\\ download, build and distribute -- http://www.A-A-P.org ///
312 \\\ help me help AIDS victims -- http://ICCF-Holland.org ///