]> git.pld-linux.org Git - packages/vim.git/blob - 7.1.310
- typo
[packages/vim.git] / 7.1.310
1 To: vim-dev@vim.org
2 Subject: Patch 7.1.310
3 Fcc: outbox
4 From: Bram Moolenaar <Bram@moolenaar.net>
5 Mime-Version: 1.0
6 Content-Type: text/plain; charset=ISO-8859-1
7 Content-Transfer-Encoding: 8bit
8 ------------
9
10 Patch 7.1.310
11 Problem:    Incomplete utf-8 byte sequence at end of the file is not detected.
12             Accessing memory that wasn't written.
13 Solution:   Check the last bytes in the buffer for being a valid utf-8
14             character. (mostly by Ben Schmidt)
15             Also fix that the reported line number of the error was wrong.
16 Files:      src/fileio.c
17
18
19 *** ../vim-7.1.309/src/fileio.c Wed May  7 19:05:55 2008
20 --- src/fileio.c        Wed Jun  4 18:28:48 2008
21 ***************
22 *** 1288,1299 ****
23   #ifdef FEAT_MBYTE
24                     else if (conv_restlen > 0)
25                     {
26 !                       /* Reached end-of-file but some trailing bytes could
27 !                        * not be converted.  Truncated file? */
28 !                       if (conv_error == 0)
29 !                           conv_error = linecnt;
30 !                       if (bad_char_behavior != BAD_DROP)
31                         {
32                             fio_flags = 0;      /* don't convert this */
33   # ifdef USE_ICONV
34                             if (iconv_fd != (iconv_t)-1)
35 --- 1288,1336 ----
36   #ifdef FEAT_MBYTE
37                     else if (conv_restlen > 0)
38                     {
39 !                       /*
40 !                        * Reached end-of-file but some trailing bytes could
41 !                        * not be converted.  Truncated file?
42 !                        */
43
44 !                       /* When we did a conversion report an error. */
45 !                       if (fio_flags != 0
46 ! # ifdef USE_ICONV
47 !                               || iconv_fd != (iconv_t)-1
48 ! # endif
49 !                          )
50                         {
51 +                           if (conv_error == 0)
52 +                               conv_error = curbuf->b_ml.ml_line_count
53 +                                                               - linecnt + 1;
54 +                       }
55 +                       /* Remember the first linenr with an illegal byte */
56 +                       else if (illegal_byte == 0)
57 +                           illegal_byte = curbuf->b_ml.ml_line_count
58 +                                                               - linecnt + 1;
59 +                       if (bad_char_behavior == BAD_DROP)
60 +                       {
61 +                           *(ptr - conv_restlen) = NUL;
62 +                           conv_restlen = 0;
63 +                       }
64 +                       else
65 +                       {
66 +                           /* Replace the trailing bytes with the replacement
67 +                            * character if we were converting; if we weren't,
68 +                            * leave the UTF8 checking code to do it, as it
69 +                            * works slightly differently. */
70 +                           if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
71 + # ifdef USE_ICONV
72 +                                   || iconv_fd != (iconv_t)-1
73 + # endif
74 +                              ))
75 +                           {
76 +                               while (conv_restlen > 0)
77 +                               {
78 +                                   *(--ptr) = bad_char_behavior;
79 +                                   --conv_restlen;
80 +                               }
81 +                           }
82                             fio_flags = 0;      /* don't convert this */
83   # ifdef USE_ICONV
84                             if (iconv_fd != (iconv_t)-1)
85 ***************
86 *** 1302,1321 ****
87                                 iconv_fd = (iconv_t)-1;
88                             }
89   # endif
90 -                           if (bad_char_behavior == BAD_KEEP)
91 -                           {
92 -                               /* Keep the trailing bytes as-is. */
93 -                               size = conv_restlen;
94 -                               ptr -= conv_restlen;
95 -                           }
96 -                           else
97 -                           {
98 -                               /* Replace the trailing bytes with the
99 -                                * replacement character. */
100 -                               size = 1;
101 -                               *--ptr = bad_char_behavior;
102 -                           }
103 -                           conv_restlen = 0;
104                         }
105                     }
106   #endif
107 --- 1339,1344 ----
108 ***************
109 *** 1397,1402 ****
110 --- 1420,1430 ----
111                     goto retry;
112                 }
113             }
114
115 +           /* Include not converted bytes. */
116 +           ptr -= conv_restlen;
117 +           size += conv_restlen;
118 +           conv_restlen = 0;
119   #endif
120             /*
121              * Break here for a read error or end-of-file.
122 ***************
123 *** 1406,1416 ****
124   
125   #ifdef FEAT_MBYTE
126   
127 -           /* Include not converted bytes. */
128 -           ptr -= conv_restlen;
129 -           size += conv_restlen;
130 -           conv_restlen = 0;
131
132   # ifdef USE_ICONV
133             if (iconv_fd != (iconv_t)-1)
134             {
135 --- 1434,1439 ----
136 ***************
137 *** 1872,1883 ****
138                 size = (long)((ptr + real_size) - dest);
139                 ptr = dest;
140             }
141 !           else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
142             {
143 !               /* Reading UTF-8: Check if the bytes are valid UTF-8.
144 !                * Need to start before "ptr" when part of the character was
145 !                * read in the previous read() call. */
146 !               for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
147                 {
148                     int  todo = (int)((ptr + size) - p);
149                     int  l;
150 --- 1895,1906 ----
151                 size = (long)((ptr + real_size) - dest);
152                 ptr = dest;
153             }
154 !           else if (enc_utf8 && !curbuf->b_p_bin)
155             {
156 !               int  incomplete_tail = FALSE;
157
158 !               /* Reading UTF-8: Check if the bytes are valid UTF-8. */
159 !               for (p = ptr; ; ++p)
160                 {
161                     int  todo = (int)((ptr + size) - p);
162                     int  l;
163 ***************
164 *** 1891,1933 ****
165                          * read() will get the next bytes, we'll check it
166                          * then. */
167                         l = utf_ptr2len_len(p, todo);
168 !                       if (l > todo)
169                         {
170 !                           /* Incomplete byte sequence, the next read()
171 !                            * should get them and check the bytes. */
172 !                           p += todo;
173 !                           break;
174                         }
175 !                       if (l == 1)
176                         {
177                             /* Illegal byte.  If we can try another encoding
178 !                            * do that. */
179 !                           if (can_retry)
180                                 break;
181
182 -                           /* Remember the first linenr with an illegal byte */
183 -                           if (illegal_byte == 0)
184 -                               illegal_byte = readfile_linenr(linecnt, ptr, p);
185   # ifdef USE_ICONV
186                             /* When we did a conversion report an error. */
187                             if (iconv_fd != (iconv_t)-1 && conv_error == 0)
188                                 conv_error = readfile_linenr(linecnt, ptr, p);
189   # endif
190   
191                             /* Drop, keep or replace the bad byte. */
192                             if (bad_char_behavior == BAD_DROP)
193                             {
194 !                               mch_memmove(p, p+1, todo - 1);
195                                 --p;
196                                 --size;
197                             }
198                             else if (bad_char_behavior != BAD_KEEP)
199                                 *p = bad_char_behavior;
200                         }
201 !                       p += l - 1;
202                     }
203                 }
204 !               if (p < ptr + size)
205                 {
206                     /* Detected a UTF-8 error. */
207   rewind_retry:
208 --- 1914,1969 ----
209                          * read() will get the next bytes, we'll check it
210                          * then. */
211                         l = utf_ptr2len_len(p, todo);
212 !                       if (l > todo && !incomplete_tail)
213                         {
214 !                           /* Avoid retrying with a different encoding when
215 !                            * a truncated file is more likely, or attempting
216 !                            * to read the rest of an incomplete sequence when
217 !                            * we have already done so. */
218 !                           if (p > ptr || filesize > 0)
219 !                               incomplete_tail = TRUE;
220 !                           /* Incomplete byte sequence, move it to conv_rest[]
221 !                            * and try to read the rest of it, unless we've
222 !                            * already done so. */
223 !                           if (p > ptr)
224 !                           {
225 !                               conv_restlen = todo;
226 !                               mch_memmove(conv_rest, p, conv_restlen);
227 !                               size -= conv_restlen;
228 !                               break;
229 !                           }
230                         }
231 !                       if (l == 1 || l > todo)
232                         {
233                             /* Illegal byte.  If we can try another encoding
234 !                            * do that, unless at EOF where a truncated
235 !                            * file is more likely than a conversion error. */
236 !                           if (can_retry && !incomplete_tail)
237                                 break;
238   # ifdef USE_ICONV
239                             /* When we did a conversion report an error. */
240                             if (iconv_fd != (iconv_t)-1 && conv_error == 0)
241                                 conv_error = readfile_linenr(linecnt, ptr, p);
242   # endif
243 +                           /* Remember the first linenr with an illegal byte */
244 +                           if (conv_error == 0 && illegal_byte == 0)
245 +                               illegal_byte = readfile_linenr(linecnt, ptr, p);
246   
247                             /* Drop, keep or replace the bad byte. */
248                             if (bad_char_behavior == BAD_DROP)
249                             {
250 !                               mch_memmove(p, p + 1, todo - 1);
251                                 --p;
252                                 --size;
253                             }
254                             else if (bad_char_behavior != BAD_KEEP)
255                                 *p = bad_char_behavior;
256                         }
257 !                       else
258 !                           p += l - 1;
259                     }
260                 }
261 !               if (p < ptr + size && !incomplete_tail)
262                 {
263                     /* Detected a UTF-8 error. */
264   rewind_retry:
265 *** ../vim-7.1.309/src/version.c        Wed Jun  4 15:27:43 2008
266 --- src/version.c       Wed Jun  4 19:35:16 2008
267 ***************
268 *** 668,669 ****
269 --- 673,676 ----
270   {   /* Add new patch number below this line */
271 + /**/
272 +     310,
273   /**/
274
275 -- 
276 Normal people believe that if it ain't broke, don't fix it.  Engineers believe
277 that if it ain't broke, it doesn't have enough features yet.
278                                 (Scott Adams - The Dilbert principle)
279
280  /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
281 ///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
282 \\\        download, build and distribute -- http://www.A-A-P.org        ///
283  \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///
This page took 0.043884 seconds and 3 git commands to generate.