]>
Commit | Line | Data |
---|---|---|
e40e3b19 AG |
1 | To: vim-dev@vim.org |
2 | Subject: Patch 7.2.312 | |
3 | Fcc: outbox | |
4 | From: Bram Moolenaar <Bram@moolenaar.net> | |
5 | Mime-Version: 1.0 | |
6 | Content-Type: text/plain; charset=UTF-8 | |
7 | Content-Transfer-Encoding: 8bit | |
8 | ------------ | |
9 | ||
10 | Patch 7.2.312 | |
11 | Problem: iconv() returns an invalid character sequence when conversion | |
12 | fails. It should return an empty string. (Yongwei Wu) | |
13 | Solution: Be more strict about invalid characters in the input. | |
14 | Files: src/mbyte.c | |
15 | ||
16 | ||
17 | *** ../vim-7.2.311/src/mbyte.c 2009-06-16 15:23:07.000000000 +0200 | |
18 | --- src/mbyte.c 2009-11-25 16:10:44.000000000 +0100 | |
19 | *************** | |
20 | *** 133,154 **** | |
21 | static int dbcs_ptr2cells_len __ARGS((char_u *p, int size)); | |
22 | static int dbcs_ptr2char __ARGS((char_u *p)); | |
23 | ||
24 | ! /* Lookup table to quickly get the length in bytes of a UTF-8 character from | |
25 | ! * the first byte of a UTF-8 string. Bytes which are illegal when used as the | |
26 | ! * first byte have a one, because these will be used separately. */ | |
27 | static char utf8len_tab[256] = | |
28 | { | |
29 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
30 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
31 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
32 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
33 | ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/ | |
34 | ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/ | |
35 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
36 | 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1, | |
37 | }; | |
38 | ||
39 | /* | |
40 | * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks | |
41 | * in the "xim.log" file. | |
42 | */ | |
43 | --- 133,172 ---- | |
44 | static int dbcs_ptr2cells_len __ARGS((char_u *p, int size)); | |
45 | static int dbcs_ptr2char __ARGS((char_u *p)); | |
46 | ||
47 | ! /* | |
48 | ! * Lookup table to quickly get the length in bytes of a UTF-8 character from | |
49 | ! * the first byte of a UTF-8 string. | |
50 | ! * Bytes which are illegal when used as the first byte have a 1. | |
51 | ! * The NUL byte has length 1. | |
52 | ! */ | |
53 | static char utf8len_tab[256] = | |
54 | { | |
55 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
56 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
57 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
58 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
59 | ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
60 | ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
61 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
62 | 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1, | |
63 | }; | |
64 | ||
65 | /* | |
66 | + * Like utf8len_tab above, but using a zero for illegal lead bytes. | |
67 | + */ | |
68 | + static char utf8len_tab_zero[256] = | |
69 | + { | |
70 | + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
71 | + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
72 | + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
73 | + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
74 | + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
75 | + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
76 | + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
77 | + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0, | |
78 | + }; | |
79 | + | |
80 | + /* | |
81 | * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks | |
82 | * in the "xim.log" file. | |
83 | */ | |
84 | *************** | |
85 | *** 1352,1358 **** | |
86 | if (size > 0 && *p >= 0x80) | |
87 | { | |
88 | if (utf_ptr2len_len(p, size) < utf8len_tab[*p]) | |
89 | ! return 1; | |
90 | c = utf_ptr2char(p); | |
91 | /* An illegal byte is displayed as <xx>. */ | |
92 | if (utf_ptr2len(p) == 1 || c == NUL) | |
93 | --- 1370,1376 ---- | |
94 | if (size > 0 && *p >= 0x80) | |
95 | { | |
96 | if (utf_ptr2len_len(p, size) < utf8len_tab[*p]) | |
97 | ! return 1; /* truncated */ | |
98 | c = utf_ptr2char(p); | |
99 | /* An illegal byte is displayed as <xx>. */ | |
100 | if (utf_ptr2len(p) == 1 || c == NUL) | |
101 | *************** | |
102 | *** 1473,1479 **** | |
103 | if (p[0] < 0x80) /* be quick for ASCII */ | |
104 | return p[0]; | |
105 | ||
106 | ! len = utf8len_tab[p[0]]; | |
107 | if (len > 1 && (p[1] & 0xc0) == 0x80) | |
108 | { | |
109 | if (len == 2) | |
110 | --- 1491,1497 ---- | |
111 | if (p[0] < 0x80) /* be quick for ASCII */ | |
112 | return p[0]; | |
113 | ||
114 | ! len = utf8len_tab_zero[p[0]]; | |
115 | if (len > 1 && (p[1] & 0xc0) == 0x80) | |
116 | { | |
117 | if (len == 2) | |
118 | *************** | |
119 | *** 1723,1728 **** | |
120 | --- 1741,1747 ---- | |
121 | /* | |
122 | * Return length of UTF-8 character, obtained from the first byte. | |
123 | * "b" must be between 0 and 255! | |
124 | + * Returns 1 for an invalid first byte value. | |
125 | */ | |
126 | int | |
127 | utf_byte2len(b) | |
128 | *************** | |
129 | *** 1737,1742 **** | |
130 | --- 1756,1762 ---- | |
131 | * Returns 1 for "". | |
132 | * Returns 1 for an illegal byte sequence (also in incomplete byte seq.). | |
133 | * Returns number > "size" for an incomplete byte sequence. | |
134 | + * Never returns zero. | |
135 | */ | |
136 | int | |
137 | utf_ptr2len_len(p, size) | |
138 | *************** | |
139 | *** 1747,1757 **** | |
140 | int i; | |
141 | int m; | |
142 | ||
143 | ! if (*p == NUL) | |
144 | ! return 1; | |
145 | ! m = len = utf8len_tab[*p]; | |
146 | if (len > size) | |
147 | m = size; /* incomplete byte sequence. */ | |
148 | for (i = 1; i < m; ++i) | |
149 | if ((p[i] & 0xc0) != 0x80) | |
150 | return 1; | |
151 | --- 1767,1779 ---- | |
152 | int i; | |
153 | int m; | |
154 | ||
155 | ! len = utf8len_tab[*p]; | |
156 | ! if (len == 1) | |
157 | ! return 1; /* NUL, ascii or illegal lead byte */ | |
158 | if (len > size) | |
159 | m = size; /* incomplete byte sequence. */ | |
160 | + else | |
161 | + m = len; | |
162 | for (i = 1; i < m; ++i) | |
163 | if ((p[i] & 0xc0) != 0x80) | |
164 | return 1; | |
165 | *************** | |
166 | *** 2505,2510 **** | |
167 | --- 2527,2533 ---- | |
168 | /* | |
169 | * mb_head_off() function pointer. | |
170 | * Return offset from "p" to the first byte of the character it points into. | |
171 | + * If "p" points to the NUL at the end of the string return 0. | |
172 | * Returns 0 when already at the first byte of a character. | |
173 | */ | |
174 | int | |
175 | *************** | |
176 | *** 2524,2530 **** | |
177 | ||
178 | /* It can't be a trailing byte when not using DBCS, at the start of the | |
179 | * string or the previous byte can't start a double-byte. */ | |
180 | ! if (p <= base || MB_BYTE2LEN(p[-1]) == 1) | |
181 | return 0; | |
182 | ||
183 | /* This is slow: need to start at the base and go forward until the | |
184 | --- 2547,2553 ---- | |
185 | ||
186 | /* It can't be a trailing byte when not using DBCS, at the start of the | |
187 | * string or the previous byte can't start a double-byte. */ | |
188 | ! if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL) | |
189 | return 0; | |
190 | ||
191 | /* This is slow: need to start at the base and go forward until the | |
192 | *************** | |
193 | *** 2552,2558 **** | |
194 | * lead byte in the current cell. */ | |
195 | if (p <= base | |
196 | || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e) | |
197 | ! || MB_BYTE2LEN(p[-1]) == 1) | |
198 | return 0; | |
199 | ||
200 | /* This is slow: need to start at the base and go forward until the | |
201 | --- 2575,2582 ---- | |
202 | * lead byte in the current cell. */ | |
203 | if (p <= base | |
204 | || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e) | |
205 | ! || MB_BYTE2LEN(p[-1]) == 1 | |
206 | ! || *p == NUL) | |
207 | return 0; | |
208 | ||
209 | /* This is slow: need to start at the base and go forward until the | |
210 | *************** | |
211 | *** 2578,2583 **** | |
212 | --- 2602,2608 ---- | |
213 | char_u *q; | |
214 | char_u *s; | |
215 | int c; | |
216 | + int len; | |
217 | #ifdef FEAT_ARABIC | |
218 | char_u *j; | |
219 | #endif | |
220 | *************** | |
221 | *** 2597,2604 **** | |
222 | --q; | |
223 | /* Check for illegal sequence. Do allow an illegal byte after where we | |
224 | * started. */ | |
225 | ! if (utf8len_tab[*q] != (int)(s - q + 1) | |
226 | ! && utf8len_tab[*q] != (int)(p - q + 1)) | |
227 | return 0; | |
228 | ||
229 | if (q <= base) | |
230 | --- 2622,2629 ---- | |
231 | --q; | |
232 | /* Check for illegal sequence. Do allow an illegal byte after where we | |
233 | * started. */ | |
234 | ! len = utf8len_tab[*q]; | |
235 | ! if (len != (int)(s - q + 1) && len != (int)(p - q + 1)) | |
236 | return 0; | |
237 | ||
238 | if (q <= base) | |
239 | *************** | |
240 | *** 2810,2818 **** | |
241 | ||
242 | while (end == NULL ? *p != NUL : p < end) | |
243 | { | |
244 | ! if ((*p & 0xc0) == 0x80) | |
245 | return FALSE; /* invalid lead byte */ | |
246 | - l = utf8len_tab[*p]; | |
247 | if (end != NULL && p + l > end) | |
248 | return FALSE; /* incomplete byte sequence */ | |
249 | ++p; | |
250 | --- 2835,2843 ---- | |
251 | ||
252 | while (end == NULL ? *p != NUL : p < end) | |
253 | { | |
254 | ! l = utf8len_tab_zero[*p]; | |
255 | ! if (l == 0) | |
256 | return FALSE; /* invalid lead byte */ | |
257 | if (end != NULL && p + l > end) | |
258 | return FALSE; /* incomplete byte sequence */ | |
259 | ++p; | |
260 | *************** | |
261 | *** 6117,6128 **** | |
262 | d = retval; | |
263 | for (i = 0; i < len; ++i) | |
264 | { | |
265 | ! l = utf_ptr2len(ptr + i); | |
266 | if (l == 0) | |
267 | *d++ = NUL; | |
268 | else if (l == 1) | |
269 | { | |
270 | ! if (unconvlenp != NULL && utf8len_tab[ptr[i]] > len - i) | |
271 | { | |
272 | /* Incomplete sequence at the end. */ | |
273 | *unconvlenp = len - i; | |
274 | --- 6142,6161 ---- | |
275 | d = retval; | |
276 | for (i = 0; i < len; ++i) | |
277 | { | |
278 | ! l = utf_ptr2len_len(ptr + i, len - i); | |
279 | if (l == 0) | |
280 | *d++ = NUL; | |
281 | else if (l == 1) | |
282 | { | |
283 | ! int l_w = utf8len_tab_zero[ptr[i]]; | |
284 | ! | |
285 | ! if (l_w == 0) | |
286 | ! { | |
287 | ! /* Illegal utf-8 byte cannot be converted */ | |
288 | ! vim_free(retval); | |
289 | ! return NULL; | |
290 | ! } | |
291 | ! if (unconvlenp != NULL && l_w > len - i) | |
292 | { | |
293 | /* Incomplete sequence at the end. */ | |
294 | *unconvlenp = len - i; | |
295 | *** ../vim-7.2.311/src/version.c 2009-12-02 13:32:10.000000000 +0100 | |
296 | --- src/version.c 2009-12-02 15:00:23.000000000 +0100 | |
297 | *************** | |
298 | *** 683,684 **** | |
299 | --- 683,686 ---- | |
300 | { /* Add new patch number below this line */ | |
301 | + /**/ | |
302 | + 312, | |
303 | /**/ | |
304 | ||
305 | -- | |
306 | hundred-and-one symptoms of being an internet addict: | |
307 | 6. You refuse to go to a vacation spot with no electricity and no phone lines. | |
308 | ||
309 | /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ | |
310 | /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ | |
311 | \\\ download, build and distribute -- http://www.A-A-P.org /// | |
312 | \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |