]>
Commit | Line | Data |
---|---|---|
64f9f7bb JR |
1 | #! /bin/sh -e |
2 | ||
3 | if [ $# -eq 3 -a "$2" = '-d' ]; then | |
4 | pdir="-d $3" | |
5 | elif [ $# -ne 1 ]; then | |
6 | echo >&2 "`basename $0`: script expects -patch|-unpatch as argument" | |
7 | exit 1 | |
8 | fi | |
9 | case "$1" in | |
10 | -patch) patch $pdir -f --no-backup-if-mismatch -p0 -l < $0;; | |
11 | -unpatch) patch $pdir -f --no-backup-if-mismatch -R -p0 -l < $0;; | |
12 | *) | |
13 | echo >&2 "`basename $0`: script expects -patch|-unpatch as argument" | |
14 | exit 1 | |
15 | esac | |
16 | exit 0 | |
17 | ||
18 | Patch: gcj -vs- iconv | |
19 | ||
20 | To: Gcc Patch List <gcc-patches at gcc dot gnu dot org> | |
21 | Subject: Patch: gcj -vs- iconv | |
22 | From: Tom Tromey <tromey at cygnus dot com> | |
23 | Date: 06 Mar 2000 14:39:01 -0700 | |
24 | Reply-To: tromey at cygnus dot com | |
25 | ||
26 | # DP: This patch changes gcj to use iconv(), when available, to read Java | |
27 | # DP: source files. It adds a new `--encoding' option that lets the user | |
28 | # DP: choose what encoding to use. For systems without iconv(), gcj still | |
29 | # DP: assumes that the input is UTF-8, but it no longer ignores encoding | |
30 | # DP: errors. | |
31 | # DP: | |
32 | # DP: This patch does have one minor problem, which is that if --encoding is | |
33 | # DP: not specified we default to UTF-8 instead of the encoding the user has | |
34 | # DP: chosen (as part of his locale). I don't know how to find that | |
35 | # DP: information. Anyway, that is an addition which shouldn't affect | |
36 | # DP: whether or not this patch goes in, since this patch doesn't make the | |
37 | # DP: situation any worse than it is right now. | |
38 | ||
39 | Alex, I'm not sure I really understand how the parser context stack | |
40 | works, so it is possible that some of my changes there are wrong. | |
41 | Could you look at it? Is this ok to check in? | |
42 | # DP: | |
43 | # DP: This fixes PR gcj/33; I can now compile a Latin-1 encoded file on my | |
44 | # DP: PPC Linux box with `gcj --encoding=Latin1 ...'. | |
45 | ||
46 | 2000-03-06 Tom Tromey <tromey@cygnus.com> | |
47 | ||
48 | Fix for PR gcj/33: | |
49 | * jv-scan.c (help): Document --encoding. | |
50 | (options): Added `encoding' entry. | |
51 | (OPT_ENCODING): New define. | |
52 | (main): Handle --encoding. | |
53 | * lang-options.h: Document --classpath, --CLASSPATH, --main, and | |
54 | --encoding. | |
55 | * jcf-parse.c (parse_source_file): Correctly call java_init_lex. | |
56 | Added `finput' argument. | |
57 | * java-tree.h (current_encoding): Declare. | |
58 | * parse.y (java_parser_context_restore_global): Don't restore | |
59 | `finput'. | |
60 | (java_parser_context_save_global): Don't set `finput' field. | |
61 | (java_pop_parser_context): Don't restore `finput'. Free old lexer | |
62 | if required. | |
63 | * lang.c (current_encoding): New global. | |
64 | (lang_decode_option): Recognize `-fencoding='. | |
65 | (finish_parse): Don't close finput. | |
66 | * parse.h (struct parser_ctxt): Removed `finput' and | |
67 | `unget_utf8_value' fields. Added `lexer' field. | |
68 | (java_init_lex): Fixed declaration. | |
69 | * lex.c (java_new_lexer): New function. | |
70 | (java_destroy_lexer): Likewise. | |
71 | (java_read_char): Added `lex' argument. Handle iconv case. | |
72 | (java_read_unicode): Added `lex' argument. Count backslashes in | |
73 | lexer structure. | |
74 | (java_init_lex): Added `finput' and `encoding' arguments. Set | |
75 | `lexer' field in ctxp. | |
76 | (BAD_UTF8_VALUE): Removed. | |
77 | * lex.h: Include <iconv.h> if HAVE_ICONV defined. | |
78 | (java_lexer): New structure. | |
79 | (UNGETC): Removed. | |
80 | (GETC): Removed. | |
81 | (DEFAULT_ENCODING): New define. | |
82 | (java_destroy_lexer): Declare. | |
83 | ||
84 | Tom | |
85 | ||
86 | --- gcc/java/java-tree.h.orig Sat Apr 8 23:03:03 2000 | |
87 | +++ gcc/java/java-tree.h Sat Apr 8 23:03:56 2000 | |
88 | @@ -141,6 +141,9 @@ | |
89 | extern int flag_not_overriding; | |
90 | extern int flag_static_local_jdk1_1; | |
91 | ||
92 | +/* Encoding used for source files. */ | |
93 | +extern char *current_encoding; | |
94 | + | |
95 | /* The Java .class file that provides main_class; the main input file. */ | |
96 | extern struct JCF *current_jcf; | |
97 | ||
98 | --- gcc/java/jcf-parse.c.orig Sat Apr 8 23:03:03 2000 | |
99 | +++ gcc/java/jcf-parse.c Sat Apr 8 23:05:39 2000 | |
100 | @@ -84,7 +84,7 @@ | |
101 | static tree give_name_to_class PROTO ((JCF *jcf, int index)); | |
102 | static void parse_zip_file_entries PROTO ((void)); | |
103 | static void process_zip_dir PROTO ((void)); | |
104 | -static void parse_source_file PROTO ((tree)); | |
105 | +static void parse_source_file PARAMS ((tree, FILE *)); | |
106 | static void jcf_parse_source PROTO ((void)); | |
107 | static int jcf_figure_file_type PROTO ((JCF *)); | |
108 | static int find_in_current_zip PROTO ((char *, struct JCF **)); | |
109 | @@ -570,6 +570,7 @@ | |
110 | jcf_parse_source () | |
111 | { | |
112 | tree file; | |
113 | + FILE *finput; | |
114 | ||
115 | java_parser_context_save_global (); | |
116 | java_push_parser_context (); | |
117 | @@ -580,7 +581,7 @@ | |
118 | if (!(finput = fopen (input_filename, "r"))) | |
119 | fatal ("input file `%s' just disappeared - jcf_parse_source", | |
120 | input_filename); | |
121 | - parse_source_file (file); | |
122 | + parse_source_file (file, finput); | |
123 | if (fclose (finput)) | |
124 | fatal ("can't close input file `%s' stream - jcf_parse_source", | |
125 | input_filename); | |
126 | @@ -741,8 +742,9 @@ | |
127 | /* Parse a source file, as pointed by the current value of INPUT_FILENAME. */ | |
128 | ||
129 | static void | |
130 | -parse_source_file (file) | |
131 | +parse_source_file (file, finput) | |
132 | tree file; | |
133 | + FILE *finput; | |
134 | { | |
135 | int save_error_count = java_error_count; | |
136 | /* Mark the file as parsed */ | |
137 | @@ -750,7 +752,9 @@ | |
138 | ||
139 | lang_init_source (1); /* Error msgs have no method prototypes */ | |
140 | ||
141 | - java_init_lex (); /* Initialize the parser */ | |
142 | + /* Initialize the parser */ | |
143 | + java_init_lex (finput, | |
144 | + current_encoding ? current_encoding : DEFAULT_ENCODING); | |
145 | java_parse_abort_on_error (); | |
146 | ||
147 | java_parse (); /* Parse and build partial tree nodes. */ | |
148 | @@ -778,6 +782,7 @@ | |
149 | int several_files = 0; | |
150 | char *list = strdup (input_filename), *next; | |
151 | tree node, current_file_list = NULL_TREE; | |
152 | + FILE *finput; | |
153 | ||
154 | do | |
155 | { | |
156 | @@ -888,7 +893,7 @@ | |
157 | case JCF_SOURCE: | |
158 | java_push_parser_context (); | |
159 | java_parser_context_save_global (); | |
160 | - parse_source_file (name); | |
161 | + parse_source_file (name, finput); | |
162 | java_parser_context_restore_global (); | |
163 | java_pop_parser_context (1); | |
164 | break; | |
165 | --- gcc/java/jv-scan.c.orig Sat Apr 8 23:03:03 2000 | |
166 | +++ gcc/java/jv-scan.c Sat Apr 8 23:11:41 2000 | |
167 | @@ -53,6 +53,7 @@ | |
168 | { | |
169 | int i = 1; | |
170 | char *output_file = NULL; | |
171 | + char *encoding = NULL; | |
172 | long ft; | |
173 | ||
174 | exec_name = argv[0]; | |
175 | @@ -73,6 +74,14 @@ | |
176 | argv [i] = NULL; | |
177 | } | |
178 | ||
179 | + /* file encoding */ | |
180 | + else if (!strcmp (argv [i], "--encoding") && i+1 < argc) | |
181 | + { | |
182 | + argv [i] = NULL; | |
183 | + encoding = argv [++i]; | |
184 | + argv [i] = NULL; | |
185 | + } | |
186 | + | |
187 | /* Print the name of the class that contains main */ | |
188 | else if (!strcmp (argv [i], "--print-main")) | |
189 | flag_find_main = 1; | |
190 | @@ -116,7 +125,7 @@ | |
191 | input_filename = argv [i]; | |
192 | if ( (finput = fopen (argv [i], "r")) ) | |
193 | { | |
194 | - java_init_lex (); | |
195 | + java_init_lex (finput, encoding ? encoding : DEFAULT_ENCODING); | |
196 | yyparse (); | |
197 | if (ftell (out) != ft) | |
198 | fputc ('\n', out); | |
199 | --- gcc/java/lang-options.h.orig Sat Apr 8 23:03:03 2000 | |
200 | +++ gcc/java/lang-options.h Sat Apr 8 23:03:56 2000 | |
201 | @@ -39,8 +39,10 @@ | |
202 | { "-M", "Print dependencies to stdout" }, | |
203 | { "-MM", "Print dependencies to stdout" }, | |
204 | #endif /* ! USE_CPPLIB */ | |
205 | - { "-fclasspath", "Set class path and suppress system path" }, | |
206 | - { "-fCLASSPATH", "Set class path" }, | |
207 | + { "--classpath", "Set class path and suppress system path" }, | |
208 | + { "--CLASSPATH", "Set class path" }, | |
209 | + { "--main", "Choose class whose main method should be used" }, | |
210 | + { "--encoding", "Choose input encoding (default is UTF-8)" }, | |
211 | { "-I", "Add directory to class path" }, | |
212 | { "-foutput-class-dir", "Directory where class files should be written" }, | |
213 | { "-Wredundant-modifiers", | |
214 | --- gcc/java/lang.c.orig Sat Apr 8 23:03:03 2000 | |
215 | +++ gcc/java/lang.c Sat Apr 8 23:03:56 2000 | |
216 | @@ -105,6 +105,9 @@ | |
217 | /* When non zero, warns that final local are treated as non final. */ | |
218 | int flag_static_local_jdk1_1 = 0; | |
219 | ||
220 | +/* The encoding of the source file. */ | |
221 | +char *current_encoding = NULL; | |
222 | + | |
223 | /* From gcc/flags.h, and indicates if exceptions are turned on or not. */ | |
224 | ||
225 | extern int flag_new_exceptions; | |
226 | @@ -172,6 +175,13 @@ | |
227 | return 1; | |
228 | } | |
229 | #undef ARG | |
230 | +#define ARG "-fencoding=" | |
231 | + if (strncmp (p, ARG, sizeof (ARG) - 1) == 0) | |
232 | + { | |
233 | + current_encoding = p + sizeof (ARG) - 1; | |
234 | + return 1; | |
235 | + } | |
236 | +#undef ARG | |
237 | ||
238 | if (p[0] == '-' && p[1] == 'f') | |
239 | { | |
240 | @@ -252,7 +262,9 @@ | |
241 | return 0; | |
242 | } | |
243 | ||
244 | +/* Global open file. */ | |
245 | FILE *finput; | |
246 | + | |
247 | char * | |
248 | init_parse (filename) | |
249 | char *filename; | |
250 | @@ -313,7 +326,6 @@ | |
251 | void | |
252 | finish_parse () | |
253 | { | |
254 | - fclose (finput); | |
255 | jcf_dependency_write (); | |
256 | } | |
257 | ||
258 | --- gcc/java/lex.c.orig Sat Apr 8 23:03:03 2000 | |
259 | +++ gcc/java/lex.c Sat Apr 8 23:14:41 2000 | |
260 | @@ -24,15 +24,15 @@ | |
261 | The Free Software Foundation is independent of Sun Microsystems, Inc. */ | |
262 | ||
263 | /* It defines java_lex (yylex) that reads a Java ASCII source file | |
264 | -possibly containing Unicode escape sequence or utf8 encoded characters | |
265 | -and returns a token for everything found but comments, white spaces | |
266 | -and line terminators. When necessary, it also fills the java_lval | |
267 | -(yylval) union. It's implemented to be called by a re-entrant parser | |
268 | -generated by Bison. | |
269 | - | |
270 | -The lexical analysis conforms to the Java grammar described in "The | |
271 | -Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele. | |
272 | -Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */ | |
273 | + possibly containing Unicode escape sequence or utf8 encoded | |
274 | + characters and returns a token for everything found but comments, | |
275 | + white spaces and line terminators. When necessary, it also fills | |
276 | + the java_lval (yylval) union. It's implemented to be called by a | |
277 | + re-entrant parser generated by Bison. | |
278 | + | |
279 | + The lexical analysis conforms to the Java grammar described in "The | |
280 | + Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele. | |
281 | + Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */ | |
282 | ||
283 | #include "keyword.h" | |
284 | ||
285 | @@ -55,15 +55,18 @@ | |
286 | static int java_parse_doc_section PROTO ((unicode_t)); | |
287 | static void java_parse_end_comment PROTO ((unicode_t)); | |
288 | static unicode_t java_get_unicode PROTO (()); | |
289 | -static unicode_t java_read_unicode PROTO ((int, int *)); | |
290 | -static void java_store_unicode PROTO ((struct java_line *, unicode_t, int)); | |
291 | -static unicode_t java_read_char PROTO (()); | |
292 | +static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *)); | |
293 | +static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int)); | |
294 | +static unicode_t java_read_char PARAMS ((java_lexer *)); | |
295 | static void java_allocate_new_line PROTO (()); | |
296 | static void java_unget_unicode PROTO (()); | |
297 | static unicode_t java_sneak_unicode PROTO (()); | |
298 | +java_lexer *java_new_lexer PARAMS ((FILE *, const char *)); | |
299 | ||
300 | void | |
301 | -java_init_lex () | |
302 | +java_init_lex (finput, encoding) | |
303 | + FILE *finput; | |
304 | + const char *encoding; | |
305 | { | |
306 | #ifndef JC1_LITE | |
307 | int java_lang_imported = 0; | |
308 | @@ -108,9 +111,9 @@ | |
309 | ctxp->lineno = lineno = 0; | |
310 | ctxp->p_line = NULL; | |
311 | ctxp->c_line = NULL; | |
312 | - ctxp->unget_utf8_value = 0; | |
313 | ctxp->minus_seen = 0; | |
314 | ctxp->java_error_flag = 0; | |
315 | + ctxp->lexer = java_new_lexer (finput, encoding); | |
316 | } | |
317 | ||
318 | static char * | |
319 | @@ -188,22 +191,142 @@ | |
320 | ctxp->c_line->white_space_only = 1; | |
321 | } | |
322 | ||
323 | -#define BAD_UTF8_VALUE 0xFFFE | |
324 | +/* Create a new lexer object. */ | |
325 | +java_lexer * | |
326 | +java_new_lexer (finput, encoding) | |
327 | + FILE *finput; | |
328 | + const char *encoding; | |
329 | +{ | |
330 | + java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer)); | |
331 | + int enc_error = 0; | |
332 | + | |
333 | + lex->finput = finput; | |
334 | + lex->bs_count = 0; | |
335 | + lex->unget_value = 0; | |
336 | + | |
337 | +#ifdef HAVE_ICONV | |
338 | + lex->handle = iconv_open ("UCS-2", encoding); | |
339 | + if (lex->handle == (iconv_t) -1) | |
340 | + { | |
341 | + /* FIXME: we should give a nice error based on errno here. */ | |
342 | + enc_error = 1; | |
343 | + } | |
344 | + lex->first = -1; | |
345 | + lex->last = -1; | |
346 | +#else /* HAVE_ICONV */ | |
347 | + if (strcmp (encoding, DEFAULT_ENCODING)) | |
348 | + enc_error = 1; | |
349 | +#endif /* HAVE_ICONV */ | |
350 | + | |
351 | + if (enc_error) | |
352 | + fatal ("unknown encoding: `%s'", encoding); | |
353 | + | |
354 | + return lex; | |
355 | +} | |
356 | + | |
357 | +void | |
358 | +java_destroy_lexer (lex) | |
359 | + java_lexer *lex; | |
360 | +{ | |
361 | + fclose (lex->finput); | |
362 | +#ifdef HAVE_ICONV | |
363 | + iconv_close (lex->handle); | |
364 | +#endif | |
365 | + free (lex); | |
366 | +} | |
367 | ||
368 | static unicode_t | |
369 | -java_read_char () | |
370 | +java_read_char (lex) | |
371 | + java_lexer *lex; | |
372 | { | |
373 | - int c; | |
374 | - int c1, c2; | |
375 | + if (lex->unget_value) | |
376 | + { | |
377 | + unicode_t r = lex->unget_value; | |
378 | + lex->unget_value = 0; | |
379 | + return r; | |
380 | + } | |
381 | + | |
382 | +#ifdef HAVE_ICONV | |
383 | + { | |
384 | + char out[2]; | |
385 | + size_t ir, inbytesleft, in_save, out_count; | |
386 | + char *inp, *outp; | |
387 | ||
388 | - if (ctxp->unget_utf8_value) | |
389 | + while (1) | |
390 | { | |
391 | - int to_return = ctxp->unget_utf8_value; | |
392 | - ctxp->unget_utf8_value = 0; | |
393 | - return (to_return); | |
394 | + /* See if we need to read more data. If FIRST == 0 then the | |
395 | + previous conversion attempt ended in the middle of a | |
396 | + character at the end of the buffer. Otherwise we only have | |
397 | + to read if the buffer is empty. */ | |
398 | + if (lex->first == 0 || lex->first >= lex->last) | |
399 | + { | |
400 | + int r; | |
401 | + | |
402 | + if (lex->first >= lex->last) | |
403 | + { | |
404 | + lex->first = 0; | |
405 | + lex->last = 0; | |
406 | + } | |
407 | + if (feof (lex->finput)) | |
408 | + return UEOF; | |
409 | + r = fread (&lex->buffer[lex->last], 1, | |
410 | + sizeof (lex->buffer) - lex->last, | |
411 | + lex->finput); | |
412 | + lex->last += r; | |
413 | } | |
414 | ||
415 | - c = GETC (); | |
416 | + inbytesleft = lex->last - lex->first; | |
417 | + | |
418 | + if (inbytesleft == 0) | |
419 | + { | |
420 | + /* We've tried to read and there is nothing left. */ | |
421 | + return UEOF; | |
422 | + } | |
423 | + | |
424 | + in_save = inbytesleft; | |
425 | + out_count = 2; | |
426 | + inp = &lex->buffer[lex->first]; | |
427 | + outp = out; | |
428 | + ir = iconv (lex->handle, (const char **) &inp, &inbytesleft, | |
429 | + &outp, &out_count); | |
430 | + lex->first += in_save - inbytesleft; | |
431 | + | |
432 | + if (out_count == 0) | |
433 | + { | |
434 | + /* Success. We assume that UCS-2 is big-endian. This | |
435 | + appears to be an ok assumption. */ | |
436 | + unicode_t result; | |
437 | + result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1]; | |
438 | + return result; | |
439 | + } | |
440 | + | |
441 | + if (ir == (size_t) -1) | |
442 | + { | |
443 | + if (errno == EINVAL) | |
444 | + { | |
445 | + /* This is ok. This means that the end of our buffer | |
446 | + is in the middle of a character sequence. We just | |
447 | + move the valid part of the buffer to the beginning | |
448 | + to force a read. */ | |
449 | + /* We use bcopy() because it should work for | |
450 | + overlapping strings. Use memmove() instead... */ | |
451 | + bcopy (&lex->buffer[lex->first], &lex->buffer[0], | |
452 | + lex->last - lex->first); | |
453 | + lex->last -= lex->first; | |
454 | + lex->first = 0; | |
455 | + } | |
456 | + else | |
457 | + { | |
458 | + /* A more serious error. */ | |
459 | + java_lex_error ("unrecognized character in input stream", 0); | |
460 | + } | |
461 | + } | |
462 | + } | |
463 | + } | |
464 | +#else /* HAVE_ICONV */ | |
465 | + { | |
466 | + int c, c1, c2; | |
467 | + c = getc (lex->finput); | |
468 | ||
469 | if (c < 128) | |
470 | return (unicode_t)c; | |
471 | @@ -213,17 +336,17 @@ | |
472 | { | |
473 | if ((c & 0xe0) == 0xc0) | |
474 | { | |
475 | - c1 = GETC (); | |
476 | + c1 = getc (lex->finput); | |
477 | if ((c1 & 0xc0) == 0x80) | |
478 | return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f)); | |
479 | c = c1; | |
480 | } | |
481 | else if ((c & 0xf0) == 0xe0) | |
482 | { | |
483 | - c1 = GETC (); | |
484 | + c1 = getc (lex->finput); | |
485 | if ((c1 & 0xc0) == 0x80) | |
486 | { | |
487 | - c2 = GETC (); | |
488 | + c2 = getc (lex->finput); | |
489 | if ((c2 & 0xc0) == 0x80) | |
490 | return (unicode_t)(((c & 0xf) << 12) + | |
491 | (( c1 & 0x3f) << 6) + (c2 & 0x3f)); | |
492 | @@ -233,14 +356,15 @@ | |
493 | else | |
494 | c = c1; | |
495 | } | |
496 | - /* We looked for a UTF8 multi-byte sequence (since we saw an initial | |
497 | - byte with the high bit set), but found invalid bytes instead. | |
498 | - If the most recent byte was Ascii (and not EOF), we should | |
499 | - unget it, in case it was a comment terminator or other delimitor. */ | |
500 | - if ((c & 0x80) == 0) | |
501 | - UNGETC (c); | |
502 | - return BAD_UTF8_VALUE; | |
503 | + | |
504 | + /* We simply don't support invalid characters. */ | |
505 | + java_lex_error ("malformed UTF-8 character", 0); | |
506 | + } | |
507 | } | |
508 | +#endif /* HAVE_ICONV */ | |
509 | + | |
510 | + /* We only get here on error. */ | |
511 | + return UEOF; | |
512 | } | |
513 | ||
514 | static void | |
515 | @@ -261,56 +385,54 @@ | |
516 | } | |
517 | ||
518 | static unicode_t | |
519 | -java_read_unicode (term_context, unicode_escape_p) | |
520 | +java_read_unicode (lex, term_context, unicode_escape_p) | |
521 | + java_lexer *lex; | |
522 | int term_context; | |
523 | int *unicode_escape_p; | |
524 | { | |
525 | unicode_t c; | |
526 | - long i, base; | |
527 | ||
528 | - c = java_read_char (); | |
529 | + c = java_read_char (lex); | |
530 | *unicode_escape_p = 0; | |
531 | ||
532 | if (c != '\\') | |
533 | - return ((term_context ? c : | |
534 | - java_lineterminator (c) ? '\n' : (unicode_t)c)); | |
535 | - | |
536 | - /* Count the number of preceeding '\' */ | |
537 | - for (base = ftell (finput), i = base-2; c == '\\';) | |
538 | { | |
539 | - fseek (finput, i--, SEEK_SET); | |
540 | - c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */ | |
541 | + lex->bs_count = 0; | |
542 | + return (term_context ? c : (java_lineterminator (c) | |
543 | + ? '\n' | |
544 | + : (unicode_t) c)); | |
545 | } | |
546 | - fseek (finput, base, SEEK_SET); | |
547 | - if ((base-i-3)%2 == 0) /* If odd number of \ seen */ | |
548 | + | |
549 | + ++lex->bs_count; | |
550 | + if ((lex->bs_count) % 2 == 1) | |
551 | { | |
552 | - c = java_read_char (); | |
553 | + /* Odd number of \ seen. */ | |
554 | + c = java_read_char (lex); | |
555 | if (c == 'u') | |
556 | { | |
557 | - unsigned short unicode = 0; | |
558 | + unicode_t unicode = 0; | |
559 | int shift = 12; | |
560 | /* Next should be 4 hex digits, otherwise it's an error. | |
561 | The hex value is converted into the unicode, pushed into | |
562 | the Unicode stream. */ | |
563 | for (shift = 12; shift >= 0; shift -= 4) | |
564 | { | |
565 | - if ((c = java_read_char ()) == UEOF) | |
566 | + if ((c = java_read_char (lex)) == UEOF) | |
567 | return UEOF; | |
568 | if (c >= '0' && c <= '9') | |
569 | unicode |= (unicode_t)((c-'0') << shift); | |
570 | else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) | |
571 | unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift); | |
572 | else | |
573 | - java_lex_error | |
574 | - ("Non hex digit in Unicode escape sequence", 0); | |
575 | + java_lex_error ("Non hex digit in Unicode escape sequence", 0); | |
576 | } | |
577 | *unicode_escape_p = 1; | |
578 | - return (term_context ? unicode : | |
579 | - (java_lineterminator (c) ? '\n' : unicode)); | |
580 | + return (term_context | |
581 | + ? unicode : (java_lineterminator (c) ? '\n' : unicode)); | |
582 | } | |
583 | - ctxp->unget_utf8_value = c; | |
584 | + lex->unget_value = c; | |
585 | } | |
586 | - return (unicode_t)'\\'; | |
587 | + return (unicode_t) '\\'; | |
588 | } | |
589 | ||
590 | static unicode_t | |
591 | @@ -325,7 +447,7 @@ | |
592 | for (;;) | |
593 | { | |
594 | int unicode_escape_p; | |
595 | - c = java_read_unicode (0, &unicode_escape_p); | |
596 | + c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p); | |
597 | java_store_unicode (ctxp->c_line, c, unicode_escape_p); | |
598 | if (ctxp->c_line->white_space_only | |
599 | && !JAVA_WHITE_SPACE_P (c) && c!='\n') | |
600 | @@ -346,7 +468,7 @@ | |
601 | int unicode_escape_p; | |
602 | if (c == '\n') /* CR */ | |
603 | { | |
604 | - if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r') | |
605 | + if ((c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p)) != '\r') | |
606 | { | |
607 | ctxp->c_line->ahead [0] = c; | |
608 | ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p; | |
609 | @@ -355,7 +477,7 @@ | |
610 | } | |
611 | else if (c == '\r') /* LF */ | |
612 | { | |
613 | - if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n') | |
614 | + if ((c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p)) != '\n') | |
615 | { | |
616 | ctxp->c_line->ahead [0] = c; | |
617 | ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p; | |
618 | --- gcc/java/lex.h.orig Sat Apr 8 23:03:03 2000 | |
619 | +++ gcc/java/lex.h Sat Apr 8 23:03:56 2000 | |
620 | @@ -35,6 +35,13 @@ | |
621 | /* A Unicode character, as read from the input file */ | |
622 | typedef unsigned short unicode_t; | |
623 | ||
624 | +#ifdef HAVE_ICONV | |
625 | +#include <iconv.h> | |
626 | +#endif /* HAVE_ICONV */ | |
627 | + | |
628 | +/* Default encoding to use if no encoding is specified. */ | |
629 | +#define DEFAULT_ENCODING "UTF-8" | |
630 | + | |
631 | /* Debug macro to print-out what we match */ | |
632 | #ifdef JAVA_LEX_DEBUG | |
633 | #ifdef JAVA_LEX_DEBUG_CHAR | |
634 | @@ -96,12 +103,38 @@ | |
635 | int col; | |
636 | } java_lc; | |
637 | ||
638 | +typedef struct java_lexer | |
639 | +{ | |
640 | + /* The file from which we're reading. */ | |
641 | + FILE *finput; | |
642 | + | |
643 | + /* Number of consecutive backslashes we've read. */ | |
644 | + int bs_count; | |
645 | + | |
646 | + /* If nonzero, a value that was pushed back. */ | |
647 | + unicode_t unget_value; | |
648 | + | |
649 | +#ifdef HAVE_ICONV | |
650 | + /* The handle for the iconv converter we're using. */ | |
651 | + iconv_t handle; | |
652 | + | |
653 | + /* Bytes we've read from the file but have not sent to iconv. */ | |
654 | + char buffer[1024]; | |
655 | + | |
656 | + /* Index of first valid character in buffer, -1 if no valid | |
657 | + characters. */ | |
658 | + int first; | |
659 | + | |
660 | + /* Index of last valid character in buffer, plus one. -1 if no | |
661 | + valid characters in buffer. */ | |
662 | + int last; | |
663 | +#endif /* HAVE_ICONV */ | |
664 | +} java_lexer; | |
665 | ||
666 | -#define JAVA_LINE_MAX 80 | |
667 | +/* Destroy a lexer object. */ | |
668 | +extern void java_destroy_lexer PARAMS ((java_lexer *)); | |
669 | ||
670 | -/* Macro to read and unread bytes */ | |
671 | -#define UNGETC(c) ungetc(c, finput) | |
672 | -#define GETC() getc(finput) | |
673 | +#define JAVA_LINE_MAX 80 | |
674 | ||
675 | /* Build a location compound integer */ | |
676 | #define BUILD_LOCATION() ((ctxp->elc.line << 12) | (ctxp->elc.col & 0xfff)) | |
677 | --- gcc/java/parse.h.orig Sat Apr 8 23:03:03 2000 | |
678 | +++ gcc/java/parse.h Sat Apr 8 23:15:36 2000 | |
679 | @@ -586,12 +586,11 @@ | |
680 | struct parser_ctxt { | |
681 | ||
682 | char *filename; /* Current filename */ | |
683 | - FILE *finput; /* Current file input stream */ | |
684 | struct parser_ctxt *next; | |
685 | ||
686 | + java_lexer *lexer; /* Current lexer state */ | |
687 | struct java_line *p_line, *c_line; /* Previous and current line */ | |
688 | java_lc elc; /* Error's line column info */ | |
689 | - unicode_t unget_utf8_value; /* An unget utf8 value */ | |
690 | int ccb_indent; /* Keep track of {} indent, lexer */ | |
691 | int first_ccb_indent1; /* First { at ident level 1 */ | |
692 | int last_ccb_indent1; /* Last } at ident level 1 */ | |
693 | @@ -668,7 +667,7 @@ | |
694 | /* Always in use, no matter what you compile */ | |
695 | void java_push_parser_context PROTO ((void)); | |
696 | void java_pop_parser_context PROTO ((int)); | |
697 | -void java_init_lex PROTO ((void)); | |
698 | +void java_init_lex PARAMS ((FILE *, const char *)); | |
699 | extern void java_parser_context_save_global PROTO ((void)); | |
700 | extern void java_parser_context_restore_global PROTO ((void)); | |
701 | int yyparse PROTO ((void)); | |
702 | --- gcc/java/parse.y.orig Sat Apr 8 23:03:03 2000 | |
703 | +++ gcc/java/parse.y Sat Apr 8 23:03:56 2000 | |
704 | @@ -2347,7 +2347,6 @@ | |
705 | java_push_parser_context (); | |
706 | extra_ctxp_pushed_p = 1; | |
707 | } | |
708 | - ctxp->finput = finput; | |
709 | ctxp->lineno = lineno; | |
710 | ctxp->current_class = current_class; | |
711 | ctxp->filename = input_filename; | |
712 | @@ -2357,7 +2356,6 @@ | |
713 | void | |
714 | java_parser_context_restore_global () | |
715 | { | |
716 | - finput = ctxp->finput; | |
717 | lineno = ctxp->lineno; | |
718 | current_class = ctxp->current_class; | |
719 | input_filename = ctxp->filename; | |
720 | @@ -2386,9 +2384,12 @@ | |
721 | next->incomplete_class = ctxp->incomplete_class; | |
722 | next->gclass_list = ctxp->gclass_list; | |
723 | lineno = ctxp->lineno; | |
724 | - finput = ctxp->finput; | |
725 | current_class = ctxp->current_class; | |
726 | } | |
727 | + | |
728 | + /* If the old and new lexers differ, then free the old one. */ | |
729 | + if (ctxp->lexer && next && ctxp->lexer != next->lexer) | |
730 | + java_destroy_lexer (ctxp->lexer); | |
731 | ||
732 | /* Set the single import class file flag to 0 for the current list | |
733 | of imported things */ |