4 From: Bram Moolenaar <Bram@moolenaar.net>
6 Content-Type: text/plain; charset=ISO-8859-1
7 Content-Transfer-Encoding: 8bit
11 Problem: Incomplete utf-8 byte sequence at end of the file is not detected.
12 Accessing memory that wasn't written.
13 Solution: Check the last bytes in the buffer for being a valid utf-8
14 character. (mostly by Ben Schmidt)
15 Also fix that the reported line number of the error was wrong.
19 *** ../vim-7.1.309/src/fileio.c Wed May 7 19:05:55 2008
20 --- src/fileio.c Wed Jun 4 18:28:48 2008
24 else if (conv_restlen > 0)
26 ! /* Reached end-of-file but some trailing bytes could
27 ! * not be converted. Truncated file? */
28 ! if (conv_error == 0)
29 ! conv_error = linecnt;
30 ! if (bad_char_behavior != BAD_DROP)
32 fio_flags = 0; /* don't convert this */
34 if (iconv_fd != (iconv_t)-1)
37 else if (conv_restlen > 0)
40 ! * Reached end-of-file but some trailing bytes could
41 ! * not be converted. Truncated file?
44 ! /* When we did a conversion report an error. */
47 ! || iconv_fd != (iconv_t)-1
51 + if (conv_error == 0)
52 + conv_error = curbuf->b_ml.ml_line_count
55 + /* Remember the first linenr with an illegal byte */
56 + else if (illegal_byte == 0)
57 + illegal_byte = curbuf->b_ml.ml_line_count
59 + if (bad_char_behavior == BAD_DROP)
61 + *(ptr - conv_restlen) = NUL;
66 + /* Replace the trailing bytes with the replacement
67 + * character if we were converting; if we weren't,
68 + * leave the UTF8 checking code to do it, as it
69 + * works slightly differently. */
70 + if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
72 + || iconv_fd != (iconv_t)-1
76 + while (conv_restlen > 0)
78 + *(--ptr) = bad_char_behavior;
82 fio_flags = 0; /* don't convert this */
84 if (iconv_fd != (iconv_t)-1)
87 iconv_fd = (iconv_t)-1;
90 - if (bad_char_behavior == BAD_KEEP)
92 - /* Keep the trailing bytes as-is. */
93 - size = conv_restlen;
94 - ptr -= conv_restlen;
98 - /* Replace the trailing bytes with the
99 - * replacement character. */
101 - *--ptr = bad_char_behavior;
115 + /* Include not converted bytes. */
116 + ptr -= conv_restlen;
117 + size += conv_restlen;
121 * Break here for a read error or end-of-file.
127 - /* Include not converted bytes. */
128 - ptr -= conv_restlen;
129 - size += conv_restlen;
133 if (iconv_fd != (iconv_t)-1)
138 size = (long)((ptr + real_size) - dest);
141 ! else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
143 ! /* Reading UTF-8: Check if the bytes are valid UTF-8.
144 ! * Need to start before "ptr" when part of the character was
145 ! * read in the previous read() call. */
146 ! for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
148 int todo = (int)((ptr + size) - p);
151 size = (long)((ptr + real_size) - dest);
154 ! else if (enc_utf8 && !curbuf->b_p_bin)
156 ! int incomplete_tail = FALSE;
158 ! /* Reading UTF-8: Check if the bytes are valid UTF-8. */
159 ! for (p = ptr; ; ++p)
161 int todo = (int)((ptr + size) - p);
165 * read() will get the next bytes, we'll check it
167 l = utf_ptr2len_len(p, todo);
170 ! /* Incomplete byte sequence, the next read()
171 ! * should get them and check the bytes. */
177 /* Illegal byte. If we can try another encoding
182 - /* Remember the first linenr with an illegal byte */
183 - if (illegal_byte == 0)
184 - illegal_byte = readfile_linenr(linecnt, ptr, p);
186 /* When we did a conversion report an error. */
187 if (iconv_fd != (iconv_t)-1 && conv_error == 0)
188 conv_error = readfile_linenr(linecnt, ptr, p);
191 /* Drop, keep or replace the bad byte. */
192 if (bad_char_behavior == BAD_DROP)
194 ! mch_memmove(p, p+1, todo - 1);
198 else if (bad_char_behavior != BAD_KEEP)
199 *p = bad_char_behavior;
204 ! if (p < ptr + size)
206 /* Detected a UTF-8 error. */
209 * read() will get the next bytes, we'll check it
211 l = utf_ptr2len_len(p, todo);
212 ! if (l > todo && !incomplete_tail)
214 ! /* Avoid retrying with a different encoding when
215 ! * a truncated file is more likely, or attempting
216 ! * to read the rest of an incomplete sequence when
217 ! * we have already done so. */
218 ! if (p > ptr || filesize > 0)
219 ! incomplete_tail = TRUE;
220 ! /* Incomplete byte sequence, move it to conv_rest[]
221 ! * and try to read the rest of it, unless we've
222 ! * already done so. */
225 ! conv_restlen = todo;
226 ! mch_memmove(conv_rest, p, conv_restlen);
227 ! size -= conv_restlen;
231 ! if (l == 1 || l > todo)
233 /* Illegal byte. If we can try another encoding
234 ! * do that, unless at EOF where a truncated
235 ! * file is more likely than a conversion error. */
236 ! if (can_retry && !incomplete_tail)
239 /* When we did a conversion report an error. */
240 if (iconv_fd != (iconv_t)-1 && conv_error == 0)
241 conv_error = readfile_linenr(linecnt, ptr, p);
243 + /* Remember the first linenr with an illegal byte */
244 + if (conv_error == 0 && illegal_byte == 0)
245 + illegal_byte = readfile_linenr(linecnt, ptr, p);
247 /* Drop, keep or replace the bad byte. */
248 if (bad_char_behavior == BAD_DROP)
250 ! mch_memmove(p, p + 1, todo - 1);
254 else if (bad_char_behavior != BAD_KEEP)
255 *p = bad_char_behavior;
261 ! if (p < ptr + size && !incomplete_tail)
263 /* Detected a UTF-8 error. */
265 *** ../vim-7.1.309/src/version.c Wed Jun 4 15:27:43 2008
266 --- src/version.c Wed Jun 4 19:35:16 2008
270 { /* Add new patch number below this line */
276 Normal people believe that if it ain't broke, don't fix it. Engineers believe
277 that if it ain't broke, it doesn't have enough features yet.
278 (Scott Adams - The Dilbert principle)
280 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
281 /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
282 \\\ download, build and distribute -- http://www.A-A-P.org ///
283 \\\ help me help AIDS victims -- http://ICCF-Holland.org ///