]>
Commit | Line | Data |
---|---|---|
50c86d7b AG |
1 | To: vim-dev@vim.org |
2 | Subject: Patch 7.1.310 | |
3 | Fcc: outbox | |
4 | From: Bram Moolenaar <Bram@moolenaar.net> | |
5 | Mime-Version: 1.0 | |
6 | Content-Type: text/plain; charset=ISO-8859-1 | |
7 | Content-Transfer-Encoding: 8bit | |
8 | ------------ | |
9 | ||
10 | Patch 7.1.310 | |
11 | Problem: Incomplete utf-8 byte sequence at end of the file is not detected. | |
12 | Accessing memory that wasn't written. | |
13 | Solution: Check the last bytes in the buffer for being a valid utf-8 | |
14 | character. (mostly by Ben Schmidt) | |
15 | Also fix that the reported line number of the error was wrong. | |
16 | Files: src/fileio.c | |
17 | ||
18 | ||
19 | *** ../vim-7.1.309/src/fileio.c Wed May 7 19:05:55 2008 | |
20 | --- src/fileio.c Wed Jun 4 18:28:48 2008 | |
21 | *************** | |
22 | *** 1288,1299 **** | |
23 | #ifdef FEAT_MBYTE | |
24 | else if (conv_restlen > 0) | |
25 | { | |
26 | ! /* Reached end-of-file but some trailing bytes could | |
27 | ! * not be converted. Truncated file? */ | |
28 | ! if (conv_error == 0) | |
29 | ! conv_error = linecnt; | |
30 | ! if (bad_char_behavior != BAD_DROP) | |
31 | { | |
32 | fio_flags = 0; /* don't convert this */ | |
33 | # ifdef USE_ICONV | |
34 | if (iconv_fd != (iconv_t)-1) | |
35 | --- 1288,1336 ---- | |
36 | #ifdef FEAT_MBYTE | |
37 | else if (conv_restlen > 0) | |
38 | { | |
39 | ! /* | |
40 | ! * Reached end-of-file but some trailing bytes could | |
41 | ! * not be converted. Truncated file? | |
42 | ! */ | |
43 | ! | |
44 | ! /* When we did a conversion report an error. */ | |
45 | ! if (fio_flags != 0 | |
46 | ! # ifdef USE_ICONV | |
47 | ! || iconv_fd != (iconv_t)-1 | |
48 | ! # endif | |
49 | ! ) | |
50 | { | |
51 | + if (conv_error == 0) | |
52 | + conv_error = curbuf->b_ml.ml_line_count | |
53 | + - linecnt + 1; | |
54 | + } | |
55 | + /* Remember the first linenr with an illegal byte */ | |
56 | + else if (illegal_byte == 0) | |
57 | + illegal_byte = curbuf->b_ml.ml_line_count | |
58 | + - linecnt + 1; | |
59 | + if (bad_char_behavior == BAD_DROP) | |
60 | + { | |
61 | + *(ptr - conv_restlen) = NUL; | |
62 | + conv_restlen = 0; | |
63 | + } | |
64 | + else | |
65 | + { | |
66 | + /* Replace the trailing bytes with the replacement | |
67 | + * character if we were converting; if we weren't, | |
68 | + * leave the UTF8 checking code to do it, as it | |
69 | + * works slightly differently. */ | |
70 | + if (bad_char_behavior != BAD_KEEP && (fio_flags != 0 | |
71 | + # ifdef USE_ICONV | |
72 | + || iconv_fd != (iconv_t)-1 | |
73 | + # endif | |
74 | + )) | |
75 | + { | |
76 | + while (conv_restlen > 0) | |
77 | + { | |
78 | + *(--ptr) = bad_char_behavior; | |
79 | + --conv_restlen; | |
80 | + } | |
81 | + } | |
82 | fio_flags = 0; /* don't convert this */ | |
83 | # ifdef USE_ICONV | |
84 | if (iconv_fd != (iconv_t)-1) | |
85 | *************** | |
86 | *** 1302,1321 **** | |
87 | iconv_fd = (iconv_t)-1; | |
88 | } | |
89 | # endif | |
90 | - if (bad_char_behavior == BAD_KEEP) | |
91 | - { | |
92 | - /* Keep the trailing bytes as-is. */ | |
93 | - size = conv_restlen; | |
94 | - ptr -= conv_restlen; | |
95 | - } | |
96 | - else | |
97 | - { | |
98 | - /* Replace the trailing bytes with the | |
99 | - * replacement character. */ | |
100 | - size = 1; | |
101 | - *--ptr = bad_char_behavior; | |
102 | - } | |
103 | - conv_restlen = 0; | |
104 | } | |
105 | } | |
106 | #endif | |
107 | --- 1339,1344 ---- | |
108 | *************** | |
109 | *** 1397,1402 **** | |
110 | --- 1420,1430 ---- | |
111 | goto retry; | |
112 | } | |
113 | } | |
114 | + | |
115 | + /* Include not converted bytes. */ | |
116 | + ptr -= conv_restlen; | |
117 | + size += conv_restlen; | |
118 | + conv_restlen = 0; | |
119 | #endif | |
120 | /* | |
121 | * Break here for a read error or end-of-file. | |
122 | *************** | |
123 | *** 1406,1416 **** | |
124 | ||
125 | #ifdef FEAT_MBYTE | |
126 | ||
127 | - /* Include not converted bytes. */ | |
128 | - ptr -= conv_restlen; | |
129 | - size += conv_restlen; | |
130 | - conv_restlen = 0; | |
131 | - | |
132 | # ifdef USE_ICONV | |
133 | if (iconv_fd != (iconv_t)-1) | |
134 | { | |
135 | --- 1434,1439 ---- | |
136 | *************** | |
137 | *** 1872,1883 **** | |
138 | size = (long)((ptr + real_size) - dest); | |
139 | ptr = dest; | |
140 | } | |
141 | ! else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin) | |
142 | { | |
143 | ! /* Reading UTF-8: Check if the bytes are valid UTF-8. | |
144 | ! * Need to start before "ptr" when part of the character was | |
145 | ! * read in the previous read() call. */ | |
146 | ! for (p = ptr - utf_head_off(buffer, ptr); ; ++p) | |
147 | { | |
148 | int todo = (int)((ptr + size) - p); | |
149 | int l; | |
150 | --- 1895,1906 ---- | |
151 | size = (long)((ptr + real_size) - dest); | |
152 | ptr = dest; | |
153 | } | |
154 | ! else if (enc_utf8 && !curbuf->b_p_bin) | |
155 | { | |
156 | ! int incomplete_tail = FALSE; | |
157 | ! | |
158 | ! /* Reading UTF-8: Check if the bytes are valid UTF-8. */ | |
159 | ! for (p = ptr; ; ++p) | |
160 | { | |
161 | int todo = (int)((ptr + size) - p); | |
162 | int l; | |
163 | *************** | |
164 | *** 1891,1933 **** | |
165 | * read() will get the next bytes, we'll check it | |
166 | * then. */ | |
167 | l = utf_ptr2len_len(p, todo); | |
168 | ! if (l > todo) | |
169 | { | |
170 | ! /* Incomplete byte sequence, the next read() | |
171 | ! * should get them and check the bytes. */ | |
172 | ! p += todo; | |
173 | ! break; | |
174 | } | |
175 | ! if (l == 1) | |
176 | { | |
177 | /* Illegal byte. If we can try another encoding | |
178 | ! * do that. */ | |
179 | ! if (can_retry) | |
180 | break; | |
181 | - | |
182 | - /* Remember the first linenr with an illegal byte */ | |
183 | - if (illegal_byte == 0) | |
184 | - illegal_byte = readfile_linenr(linecnt, ptr, p); | |
185 | # ifdef USE_ICONV | |
186 | /* When we did a conversion report an error. */ | |
187 | if (iconv_fd != (iconv_t)-1 && conv_error == 0) | |
188 | conv_error = readfile_linenr(linecnt, ptr, p); | |
189 | # endif | |
190 | ||
191 | /* Drop, keep or replace the bad byte. */ | |
192 | if (bad_char_behavior == BAD_DROP) | |
193 | { | |
194 | ! mch_memmove(p, p+1, todo - 1); | |
195 | --p; | |
196 | --size; | |
197 | } | |
198 | else if (bad_char_behavior != BAD_KEEP) | |
199 | *p = bad_char_behavior; | |
200 | } | |
201 | ! p += l - 1; | |
202 | } | |
203 | } | |
204 | ! if (p < ptr + size) | |
205 | { | |
206 | /* Detected a UTF-8 error. */ | |
207 | rewind_retry: | |
208 | --- 1914,1969 ---- | |
209 | * read() will get the next bytes, we'll check it | |
210 | * then. */ | |
211 | l = utf_ptr2len_len(p, todo); | |
212 | ! if (l > todo && !incomplete_tail) | |
213 | { | |
214 | ! /* Avoid retrying with a different encoding when | |
215 | ! * a truncated file is more likely, or attempting | |
216 | ! * to read the rest of an incomplete sequence when | |
217 | ! * we have already done so. */ | |
218 | ! if (p > ptr || filesize > 0) | |
219 | ! incomplete_tail = TRUE; | |
220 | ! /* Incomplete byte sequence, move it to conv_rest[] | |
221 | ! * and try to read the rest of it, unless we've | |
222 | ! * already done so. */ | |
223 | ! if (p > ptr) | |
224 | ! { | |
225 | ! conv_restlen = todo; | |
226 | ! mch_memmove(conv_rest, p, conv_restlen); | |
227 | ! size -= conv_restlen; | |
228 | ! break; | |
229 | ! } | |
230 | } | |
231 | ! if (l == 1 || l > todo) | |
232 | { | |
233 | /* Illegal byte. If we can try another encoding | |
234 | ! * do that, unless at EOF where a truncated | |
235 | ! * file is more likely than a conversion error. */ | |
236 | ! if (can_retry && !incomplete_tail) | |
237 | break; | |
238 | # ifdef USE_ICONV | |
239 | /* When we did a conversion report an error. */ | |
240 | if (iconv_fd != (iconv_t)-1 && conv_error == 0) | |
241 | conv_error = readfile_linenr(linecnt, ptr, p); | |
242 | # endif | |
243 | + /* Remember the first linenr with an illegal byte */ | |
244 | + if (conv_error == 0 && illegal_byte == 0) | |
245 | + illegal_byte = readfile_linenr(linecnt, ptr, p); | |
246 | ||
247 | /* Drop, keep or replace the bad byte. */ | |
248 | if (bad_char_behavior == BAD_DROP) | |
249 | { | |
250 | ! mch_memmove(p, p + 1, todo - 1); | |
251 | --p; | |
252 | --size; | |
253 | } | |
254 | else if (bad_char_behavior != BAD_KEEP) | |
255 | *p = bad_char_behavior; | |
256 | } | |
257 | ! else | |
258 | ! p += l - 1; | |
259 | } | |
260 | } | |
261 | ! if (p < ptr + size && !incomplete_tail) | |
262 | { | |
263 | /* Detected a UTF-8 error. */ | |
264 | rewind_retry: | |
265 | *** ../vim-7.1.309/src/version.c Wed Jun 4 15:27:43 2008 | |
266 | --- src/version.c Wed Jun 4 19:35:16 2008 | |
267 | *************** | |
268 | *** 668,669 **** | |
269 | --- 673,676 ---- | |
270 | { /* Add new patch number below this line */ | |
271 | + /**/ | |
272 | + 310, | |
273 | /**/ | |
274 | ||
275 | -- | |
276 | Normal people believe that if it ain't broke, don't fix it. Engineers believe | |
277 | that if it ain't broke, it doesn't have enough features yet. | |
278 | (Scott Adams - The Dilbert principle) | |
279 | ||
280 | /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ | |
281 | /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ | |
282 | \\\ download, build and distribute -- http://www.A-A-P.org /// | |
283 | \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |