[packages/vim.git] / 7.1.310

To: vim-dev@vim.org
Subject: Patch 7.1.310
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 8bit
------------

Patch 7.1.310
Problem:    Incomplete utf-8 byte sequence at end of the file is not detected.
	    Accessing memory that wasn't written.
Solution:   Check the last bytes in the buffer for being a valid utf-8
	    character. (mostly by Ben Schmidt)
	    Also fix that the reported line number of the error was wrong.
Files:	    src/fileio.c


*** ../vim-7.1.309/src/fileio.c	Wed May  7 19:05:55 2008
--- src/fileio.c	Wed Jun  4 18:28:48 2008
***************
*** 1288,1299 ****
  #ifdef FEAT_MBYTE
  		    else if (conv_restlen > 0)
  		    {
! 			/* Reached end-of-file but some trailing bytes could
! 			 * not be converted.  Truncated file? */
! 			if (conv_error == 0)
! 			    conv_error = linecnt;
! 			if (bad_char_behavior != BAD_DROP)
  			{
  			    fio_flags = 0;	/* don't convert this */
  # ifdef USE_ICONV
  			    if (iconv_fd != (iconv_t)-1)
--- 1288,1336 ----
  #ifdef FEAT_MBYTE
  		    else if (conv_restlen > 0)
  		    {
! 			/*
! 			 * Reached end-of-file but some trailing bytes could
! 			 * not be converted.  Truncated file?
! 			 */
! 
! 			/* When we did a conversion report an error. */
! 			if (fio_flags != 0
! # ifdef USE_ICONV
! 				|| iconv_fd != (iconv_t)-1
! # endif
! 			   )
  			{
+ 			    if (conv_error == 0)
+ 				conv_error = curbuf->b_ml.ml_line_count
+ 								- linecnt + 1;
+ 			}
+ 			/* Remember the first linenr with an illegal byte */
+ 			else if (illegal_byte == 0)
+ 			    illegal_byte = curbuf->b_ml.ml_line_count
+ 								- linecnt + 1;
+ 			if (bad_char_behavior == BAD_DROP)
+ 			{
+ 			    *(ptr - conv_restlen) = NUL;
+ 			    conv_restlen = 0;
+ 			}
+ 			else
+ 			{
+ 			    /* Replace the trailing bytes with the replacement
+ 			     * character if we were converting; if we weren't,
+ 			     * leave the UTF8 checking code to do it, as it
+ 			     * works slightly differently. */
+ 			    if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
+ # ifdef USE_ICONV
+ 				    || iconv_fd != (iconv_t)-1
+ # endif
+ 			       ))
+ 			    {
+ 				while (conv_restlen > 0)
+ 				{
+ 				    *(--ptr) = bad_char_behavior;
+ 				    --conv_restlen;
+ 				}
+ 			    }
  			    fio_flags = 0;	/* don't convert this */
  # ifdef USE_ICONV
  			    if (iconv_fd != (iconv_t)-1)
***************
*** 1302,1321 ****
  				iconv_fd = (iconv_t)-1;
  			    }
  # endif
- 			    if (bad_char_behavior == BAD_KEEP)
- 			    {
- 				/* Keep the trailing bytes as-is. */
- 				size = conv_restlen;
- 				ptr -= conv_restlen;
- 			    }
- 			    else
- 			    {
- 				/* Replace the trailing bytes with the
- 				 * replacement character. */
- 				size = 1;
- 				*--ptr = bad_char_behavior;
- 			    }
- 			    conv_restlen = 0;
  			}
  		    }
  #endif
--- 1339,1344 ----
***************
*** 1397,1402 ****
--- 1420,1430 ----
  		    goto retry;
  		}
  	    }
+ 
+ 	    /* Include not converted bytes. */
+ 	    ptr -= conv_restlen;
+ 	    size += conv_restlen;
+ 	    conv_restlen = 0;
  #endif
  	    /*
  	     * Break here for a read error or end-of-file.
***************
*** 1406,1416 ****
  
  #ifdef FEAT_MBYTE
  
- 	    /* Include not converted bytes. */
- 	    ptr -= conv_restlen;
- 	    size += conv_restlen;
- 	    conv_restlen = 0;
- 
  # ifdef USE_ICONV
  	    if (iconv_fd != (iconv_t)-1)
  	    {
--- 1434,1439 ----
***************
*** 1872,1883 ****
  		size = (long)((ptr + real_size) - dest);
  		ptr = dest;
  	    }
! 	    else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
  	    {
! 		/* Reading UTF-8: Check if the bytes are valid UTF-8.
! 		 * Need to start before "ptr" when part of the character was
! 		 * read in the previous read() call. */
! 		for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
  		{
  		    int	 todo = (int)((ptr + size) - p);
  		    int	 l;
--- 1895,1906 ----
  		size = (long)((ptr + real_size) - dest);
  		ptr = dest;
  	    }
! 	    else if (enc_utf8 && !curbuf->b_p_bin)
  	    {
! 		int  incomplete_tail = FALSE;
! 
! 		/* Reading UTF-8: Check if the bytes are valid UTF-8. */
! 		for (p = ptr; ; ++p)
  		{
  		    int	 todo = (int)((ptr + size) - p);
  		    int	 l;
***************
*** 1891,1933 ****
  			 * read() will get the next bytes, we'll check it
  			 * then. */
  			l = utf_ptr2len_len(p, todo);
! 			if (l > todo)
  			{
! 			    /* Incomplete byte sequence, the next read()
! 			     * should get them and check the bytes. */
! 			    p += todo;
! 			    break;
  			}
! 			if (l == 1)
  			{
  			    /* Illegal byte.  If we can try another encoding
! 			     * do that. */
! 			    if (can_retry)
  				break;
- 
- 			    /* Remember the first linenr with an illegal byte */
- 			    if (illegal_byte == 0)
- 				illegal_byte = readfile_linenr(linecnt, ptr, p);
  # ifdef USE_ICONV
  			    /* When we did a conversion report an error. */
  			    if (iconv_fd != (iconv_t)-1 && conv_error == 0)
  				conv_error = readfile_linenr(linecnt, ptr, p);
  # endif
  
  			    /* Drop, keep or replace the bad byte. */
  			    if (bad_char_behavior == BAD_DROP)
  			    {
! 				mch_memmove(p, p+1, todo - 1);
  				--p;
  				--size;
  			    }
  			    else if (bad_char_behavior != BAD_KEEP)
  				*p = bad_char_behavior;
  			}
! 			p += l - 1;
  		    }
  		}
! 		if (p < ptr + size)
  		{
  		    /* Detected a UTF-8 error. */
  rewind_retry:
--- 1914,1969 ----
  			 * read() will get the next bytes, we'll check it
  			 * then. */
  			l = utf_ptr2len_len(p, todo);
! 			if (l > todo && !incomplete_tail)
  			{
! 			    /* Avoid retrying with a different encoding when
! 			     * a truncated file is more likely, or attempting
! 			     * to read the rest of an incomplete sequence when
! 			     * we have already done so. */
! 			    if (p > ptr || filesize > 0)
! 				incomplete_tail = TRUE;
! 			    /* Incomplete byte sequence, move it to conv_rest[]
! 			     * and try to read the rest of it, unless we've
! 			     * already done so. */
! 			    if (p > ptr)
! 			    {
! 				conv_restlen = todo;
! 				mch_memmove(conv_rest, p, conv_restlen);
! 				size -= conv_restlen;
! 				break;
! 			    }
  			}
! 			if (l == 1 || l > todo)
  			{
  			    /* Illegal byte.  If we can try another encoding
! 			     * do that, unless at EOF where a truncated
! 			     * file is more likely than a conversion error. */
! 			    if (can_retry && !incomplete_tail)
  				break;
  # ifdef USE_ICONV
  			    /* When we did a conversion report an error. */
  			    if (iconv_fd != (iconv_t)-1 && conv_error == 0)
  				conv_error = readfile_linenr(linecnt, ptr, p);
  # endif
+ 			    /* Remember the first linenr with an illegal byte */
+ 			    if (conv_error == 0 && illegal_byte == 0)
+ 				illegal_byte = readfile_linenr(linecnt, ptr, p);
  
  			    /* Drop, keep or replace the bad byte. */
  			    if (bad_char_behavior == BAD_DROP)
  			    {
! 				mch_memmove(p, p + 1, todo - 1);
  				--p;
  				--size;
  			    }
  			    else if (bad_char_behavior != BAD_KEEP)
  				*p = bad_char_behavior;
  			}
! 			else
! 			    p += l - 1;
  		    }
  		}
! 		if (p < ptr + size && !incomplete_tail)
  		{
  		    /* Detected a UTF-8 error. */
  rewind_retry:
*** ../vim-7.1.309/src/version.c	Wed Jun  4 15:27:43 2008
--- src/version.c	Wed Jun  4 19:35:16 2008
***************
*** 668,669 ****
--- 673,676 ----
  {   /* Add new patch number below this line */
+ /**/
+     310,
  /**/

-- 
Normal people believe that if it ain't broke, don't fix it.  Engineers believe
that if it ain't broke, it doesn't have enough features yet.
				(Scott Adams - The Dilbert principle)

 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\        download, build and distribute -- http://www.A-A-P.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///
Commit	Line	Data
50c86d7b AG	1	To: vim-dev@vim.org
	2	Subject: Patch 7.1.310
	3	Fcc: outbox
	4	From: Bram Moolenaar <Bram@moolenaar.net>
	5	Mime-Version: 1.0
	6	Content-Type: text/plain; charset=ISO-8859-1
	7	Content-Transfer-Encoding: 8bit
	8	------------
	9
	10	Patch 7.1.310
	11	Problem: Incomplete utf-8 byte sequence at end of the file is not detected.
	12	Accessing memory that wasn't written.
	13	Solution: Check the last bytes in the buffer for being a valid utf-8
	14	character. (mostly by Ben Schmidt)
	15	Also fix that the reported line number of the error was wrong.
	16	Files: src/fileio.c
	17
	18
	19	*** ../vim-7.1.309/src/fileio.c Wed May 7 19:05:55 2008
	20	--- src/fileio.c Wed Jun 4 18:28:48 2008
	21	***************
	22	* 1288,1299 **
	23	#ifdef FEAT_MBYTE
	24	else if (conv_restlen > 0)
	25	{
	26	! /* Reached end-of-file but some trailing bytes could
	27	! * not be converted. Truncated file? */
	28	! if (conv_error == 0)
	29	! conv_error = linecnt;
	30	! if (bad_char_behavior != BAD_DROP)
	31	{
	32	fio_flags = 0; /* don't convert this */
	33	# ifdef USE_ICONV
	34	if (iconv_fd != (iconv_t)-1)
	35	--- 1288,1336 ----
	36	#ifdef FEAT_MBYTE
	37	else if (conv_restlen > 0)
	38	{
	39	! /*
	40	! * Reached end-of-file but some trailing bytes could
	41	! * not be converted. Truncated file?
	42	! */
	43	!
	44	! /* When we did a conversion report an error. */
	45	! if (fio_flags != 0
	46	! # ifdef USE_ICONV
	47	! \|\| iconv_fd != (iconv_t)-1
	48	! # endif
	49	! )
	50	{
	51	+ if (conv_error == 0)
	52	+ conv_error = curbuf->b_ml.ml_line_count
	53	+ - linecnt + 1;
	54	+ }
	55	+ /* Remember the first linenr with an illegal byte */
	56	+ else if (illegal_byte == 0)
	57	+ illegal_byte = curbuf->b_ml.ml_line_count
	58	+ - linecnt + 1;
	59	+ if (bad_char_behavior == BAD_DROP)
	60	+ {
	61	+ *(ptr - conv_restlen) = NUL;
	62	+ conv_restlen = 0;
	63	+ }
	64	+ else
65	+ {
66	+ /* Replace the trailing bytes with the replacement
67	+ * character if we were converting; if we weren't,
68	+ * leave the UTF8 checking code to do it, as it
69	+ * works slightly differently. */
70	+ if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
71	+ # ifdef USE_ICONV
72	+ \|\| iconv_fd != (iconv_t)-1
73	+ # endif
74	+ ))
75	+ {
76	+ while (conv_restlen > 0)
77	+ {
78	+ *(--ptr) = bad_char_behavior;
79	+ --conv_restlen;
80	+ }
81	+ }
82	fio_flags = 0; /* don't convert this */
83	# ifdef USE_ICONV
84	if (iconv_fd != (iconv_t)-1)
85	***************
86	* 1302,1321 **
87	iconv_fd = (iconv_t)-1;
88	}
89	# endif
90	- if (bad_char_behavior == BAD_KEEP)
91	- {
92	- /* Keep the trailing bytes as-is. */
93	- size = conv_restlen;
94	- ptr -= conv_restlen;
95	- }
96	- else
97	- {
98	- /* Replace the trailing bytes with the
99	- * replacement character. */
100	- size = 1;
101	- *--ptr = bad_char_behavior;
102	- }
103	- conv_restlen = 0;
104	}
105	}
106	#endif
107	--- 1339,1344 ----
108	***************
109	* 1397,1402 **
110	--- 1420,1430 ----
111	goto retry;
112	}
113	}
114	+
115	+ /* Include not converted bytes. */
116	+ ptr -= conv_restlen;
117	+ size += conv_restlen;
118	+ conv_restlen = 0;
119	#endif
120	/*
121	* Break here for a read error or end-of-file.
122	***************
123	* 1406,1416 **
124
125	#ifdef FEAT_MBYTE
126
127	- /* Include not converted bytes. */
128	- ptr -= conv_restlen;
129	- size += conv_restlen;
130	- conv_restlen = 0;
131	-
132	# ifdef USE_ICONV
133	if (iconv_fd != (iconv_t)-1)
134	{
135	--- 1434,1439 ----
136	***************
137	* 1872,1883 **
138	size = (long)((ptr + real_size) - dest);
139	ptr = dest;
140	}
141	! else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
142	{
143	! /* Reading UTF-8: Check if the bytes are valid UTF-8.
144	! * Need to start before "ptr" when part of the character was
145	! * read in the previous read() call. */
146	! for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
147	{
148	int todo = (int)((ptr + size) - p);
149	int l;
150	--- 1895,1906 ----
151	size = (long)((ptr + real_size) - dest);
152	ptr = dest;
153	}
154	! else if (enc_utf8 && !curbuf->b_p_bin)
155	{
156	! int incomplete_tail = FALSE;
157	!
158	! /* Reading UTF-8: Check if the bytes are valid UTF-8. */
159	! for (p = ptr; ; ++p)
160	{
161	int todo = (int)((ptr + size) - p);
162	int l;
163	***************
164	* 1891,1933 **
165	* read() will get the next bytes, we'll check it
166	* then. */
167	l = utf_ptr2len_len(p, todo);
168	! if (l > todo)
169	{
170	! /* Incomplete byte sequence, the next read()
171	! * should get them and check the bytes. */
172	! p += todo;
173	! break;
174	}
175	! if (l == 1)
176	{
177	/* Illegal byte. If we can try another encoding
178	! * do that. */
179	! if (can_retry)
180	break;
181	-
182	- /* Remember the first linenr with an illegal byte */
183	- if (illegal_byte == 0)
184	- illegal_byte = readfile_linenr(linecnt, ptr, p);
185	# ifdef USE_ICONV
186	/* When we did a conversion report an error. */
187	if (iconv_fd != (iconv_t)-1 && conv_error == 0)
188	conv_error = readfile_linenr(linecnt, ptr, p);
189	# endif
190
191	/* Drop, keep or replace the bad byte. */
192	if (bad_char_behavior == BAD_DROP)
193	{
194	! mch_memmove(p, p+1, todo - 1);
195	--p;
196	--size;
197	}
198	else if (bad_char_behavior != BAD_KEEP)
199	*p = bad_char_behavior;
200	}
201	! p += l - 1;
202	}
203	}
204	! if (p < ptr + size)
205	{
206	/* Detected a UTF-8 error. */
207	rewind_retry:
208	--- 1914,1969 ----
209	* read() will get the next bytes, we'll check it
210	* then. */
211	l = utf_ptr2len_len(p, todo);
212	! if (l > todo && !incomplete_tail)
213	{
214	! /* Avoid retrying with a different encoding when
215	! * a truncated file is more likely, or attempting
216	! * to read the rest of an incomplete sequence when
217	! * we have already done so. */
218	! if (p > ptr \|\| filesize > 0)
219	! incomplete_tail = TRUE;
220	! /* Incomplete byte sequence, move it to conv_rest[]
221	! * and try to read the rest of it, unless we've
222	! * already done so. */
223	! if (p > ptr)
224	! {
225	! conv_restlen = todo;
226	! mch_memmove(conv_rest, p, conv_restlen);
227	! size -= conv_restlen;
228	! break;
229	! }
230	}
231	! if (l == 1 \|\| l > todo)
232	{
233	/* Illegal byte. If we can try another encoding
234	! * do that, unless at EOF where a truncated
235	! * file is more likely than a conversion error. */
236	! if (can_retry && !incomplete_tail)
237	break;
238	# ifdef USE_ICONV
239	/* When we did a conversion report an error. */
240	if (iconv_fd != (iconv_t)-1 && conv_error == 0)
241	conv_error = readfile_linenr(linecnt, ptr, p);
242	# endif
243	+ /* Remember the first linenr with an illegal byte */
244	+ if (conv_error == 0 && illegal_byte == 0)
245	+ illegal_byte = readfile_linenr(linecnt, ptr, p);
246
247	/* Drop, keep or replace the bad byte. */
248	if (bad_char_behavior == BAD_DROP)
249	{
250	! mch_memmove(p, p + 1, todo - 1);
251	--p;
252	--size;
253	}
254	else if (bad_char_behavior != BAD_KEEP)
255	*p = bad_char_behavior;
256	}
257	! else
258	! p += l - 1;
259	}
260	}
261	! if (p < ptr + size && !incomplete_tail)
262	{
263	/* Detected a UTF-8 error. */
264	rewind_retry:
265	*** ../vim-7.1.309/src/version.c Wed Jun 4 15:27:43 2008
266	--- src/version.c Wed Jun 4 19:35:16 2008
267	***************
268	* 668,669 **
269	--- 673,676 ----
270	{ /* Add new patch number below this line */
271	+ /**/
272	+ 310,
273	/**/
274
275	--
276	Normal people believe that if it ain't broke, don't fix it. Engineers believe
277	that if it ain't broke, it doesn't have enough features yet.
278	(Scott Adams - The Dilbert principle)
279
280	/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
281	/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
282	\\\ download, build and distribute -- http://www.A-A-P.org ///
283	\\\ help me help AIDS victims -- http://ICCF-Holland.org ///