gcc-gcj-vs-iconv.patch

   1 #! /bin/sh -e
   2
   3 if [ $# -eq 3 -a "$2" = '-d' ]; then
   4     pdir="-d $3"
   5 elif [ $# -ne 1 ]; then
   6     echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
   7     exit 1
   8 fi
   9 case "$1" in
  10     -patch) patch $pdir -f --no-backup-if-mismatch -p0 -l < $0;;
  11     -unpatch) patch $pdir -f --no-backup-if-mismatch -R -p0 -l < $0;;
  12     *)
  13         echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
  14         exit 1
  15 esac
  16 exit 0
  17
  18 Patch: gcj -vs- iconv
  19
  20 To: Gcc Patch List <gcc-patches at gcc dot gnu dot org>
  21 Subject: Patch: gcj -vs- iconv
  22 From: Tom Tromey <tromey at cygnus dot com>
  23 Date: 06 Mar 2000 14:39:01 -0700
  24 Reply-To: tromey at cygnus dot com
  25
  26 # DP: This patch changes gcj to use iconv(), when available, to read Java
  27 # DP: source files.  It adds a new `--encoding' option that lets the user
  28 # DP: choose what encoding to use.  For systems without iconv(), gcj still
  29 # DP: assumes that the input is UTF-8, but it no longer ignores encoding
  30 # DP: errors.
  31 # DP:
  32 # DP: This patch does have one minor problem, which is that if --encoding is
  33 # DP: not specified we default to UTF-8 instead of the encoding the user has
  34 # DP: chosen (as part of his locale).  I don't know how to find that
  35 # DP: information.  Anyway, that is an addition which shouldn't affect
  36 # DP: whether or not this patch goes in, since this patch doesn't make the
  37 # DP: situation any worse than it is right now.
  38
  39 Alex, I'm not sure I really understand how the parser context stack
  40 works, so it is possible that some of my changes there are wrong.
  41 Could you look at it?  Is this ok to check in?
  42 # DP:
  43 # DP: This fixes PR gcj/33; I can now compile a Latin-1 encoded file on my
  44 # DP: PPC Linux box with `gcj --encoding=Latin1 ...'.
  45
  46 2000-03-06  Tom Tromey  <tromey@cygnus.com>
  47
  48         Fix for PR gcj/33:
  49         * jv-scan.c (help): Document --encoding.
  50         (options): Added `encoding' entry.
  51         (OPT_ENCODING): New define.
  52         (main): Handle --encoding.
  53         * lang-options.h: Document --classpath, --CLASSPATH, --main, and
  54         --encoding.
  55         * jcf-parse.c (parse_source_file): Correctly call java_init_lex.
  56         Added `finput' argument.
  57         * java-tree.h (current_encoding): Declare.
  58         * parse.y (java_parser_context_restore_global): Don't restore
  59         `finput'.
  60         (java_parser_context_save_global): Don't set `finput' field.
  61         (java_pop_parser_context): Don't restore `finput'.  Free old lexer
  62         if required.
  63         * lang.c (current_encoding): New global.
  64         (lang_decode_option): Recognize `-fencoding='.
  65         (finish_parse): Don't close finput.
  66         * parse.h (struct parser_ctxt): Removed `finput' and
  67         `unget_utf8_value' fields.  Added `lexer' field.
  68         (java_init_lex): Fixed declaration.
  69         * lex.c (java_new_lexer): New function.
  70         (java_destroy_lexer): Likewise.
  71         (java_read_char): Added `lex' argument.  Handle iconv case.
  72         (java_read_unicode): Added `lex' argument.  Count backslashes in
  73         lexer structure.
  74         (java_init_lex): Added `finput' and `encoding' arguments.  Set
  75         `lexer' field in ctxp.
  76         (BAD_UTF8_VALUE): Removed.
  77         * lex.h: Include <iconv.h> if HAVE_ICONV defined.
  78         (java_lexer): New structure.
  79         (UNGETC): Removed.
  80         (GETC): Removed.
  81         (DEFAULT_ENCODING): New define.
  82         (java_destroy_lexer): Declare.
  83
  84 Tom
  85
  86 --- gcc/java/java-tree.h.orig   Sat Apr  8 23:03:03 2000
  87 +++ gcc/java/java-tree.h        Sat Apr  8 23:03:56 2000
  88 @@ -141,6 +141,9 @@
  89  extern int flag_not_overriding;
  90  extern int flag_static_local_jdk1_1;
  91
  92 +/* Encoding used for source files.  */
  93 +extern char *current_encoding;
  94 +
  95  /* The Java .class file that provides main_class;  the main input file. */
  96  extern struct JCF *current_jcf;
  97
  98 --- gcc/java/jcf-parse.c.orig   Sat Apr  8 23:03:03 2000
  99 +++ gcc/java/jcf-parse.c        Sat Apr  8 23:05:39 2000
 100 @@ -84,7 +84,7 @@
 101  static tree give_name_to_class PROTO ((JCF *jcf, int index));
 102  static void parse_zip_file_entries PROTO ((void));
 103  static void process_zip_dir PROTO ((void));
 104 -static void parse_source_file PROTO ((tree));
 105 +static void parse_source_file PARAMS ((tree, FILE *));
 106  static void jcf_parse_source PROTO ((void));
 107  static int jcf_figure_file_type PROTO ((JCF *));
 108  static int find_in_current_zip PROTO ((char *, struct JCF **));
 109 @@ -570,6 +570,7 @@
 110  jcf_parse_source ()
 111  {
 112    tree file;
 113 +  FILE *finput;
 114
 115    java_parser_context_save_global ();
 116    java_push_parser_context ();
 117 @@ -580,7 +581,7 @@
 118        if (!(finput = fopen (input_filename, "r")))
 119         fatal ("input file `%s' just disappeared - jcf_parse_source",
 120                input_filename);
 121 -      parse_source_file (file);
 122 +      parse_source_file (file, finput);
 123        if (fclose (finput))
 124         fatal ("can't close input file `%s' stream - jcf_parse_source",
 125                input_filename);
 126 @@ -741,8 +742,9 @@
 127  /* Parse a source file, as pointed by the current value of INPUT_FILENAME. */
 128
 129  static void
 130 -parse_source_file (file)
 131 +parse_source_file (file, finput)
 132       tree file;
 133 +     FILE *finput;
 134  {
 135    int save_error_count = java_error_count;
 136    /* Mark the file as parsed */
 137 @@ -750,7 +752,9 @@
 138
 139    lang_init_source (1);                    /* Error msgs have no method prototypes */
 140
 141 -  java_init_lex ();                /* Initialize the parser */
 142 +  /* Initialize the parser */
 143 +  java_init_lex (finput,
 144 +                current_encoding ? current_encoding : DEFAULT_ENCODING);
 145    java_parse_abort_on_error ();
 146
 147    java_parse ();                   /* Parse and build partial tree nodes. */
 148 @@ -778,6 +782,7 @@
 149    int several_files = 0;
 150    char *list = strdup (input_filename), *next;
 151    tree node, current_file_list = NULL_TREE;
 152 +  FILE *finput;
 153
 154    do
 155      {
 156 @@ -888,7 +893,7 @@
 157         case JCF_SOURCE:
 158           java_push_parser_context ();
 159           java_parser_context_save_global ();
 160 -         parse_source_file (name);
 161 +         parse_source_file (name, finput);
 162           java_parser_context_restore_global ();
 163           java_pop_parser_context (1);
 164           break;
 165 --- gcc/java/jv-scan.c.orig     Sat Apr  8 23:03:03 2000
 166 +++ gcc/java/jv-scan.c  Sat Apr  8 23:11:41 2000
 167 @@ -53,6 +53,7 @@
 168  {
 169    int i = 1;
 170    char *output_file = NULL;
 171 +  char *encoding = NULL;
 172    long ft;
 173
 174    exec_name = argv[0];
 175 @@ -73,6 +74,14 @@
 176               argv [i] = NULL;
 177             }
 178
 179 +         /* file encoding */
 180 +         else if (!strcmp (argv [i], "--encoding") && i+1 < argc)
 181 +           {
 182 +             argv [i] = NULL;
 183 +             encoding = argv [++i];
 184 +             argv [i] = NULL;
 185 +           }
 186 +
 187           /* Print the name of the class that contains main */
 188           else if (!strcmp (argv [i], "--print-main"))
 189             flag_find_main = 1;
 190 @@ -116,7 +125,7 @@
 191         input_filename = argv [i];
 192         if ( (finput = fopen (argv [i], "r")) )
 193           {
 194 -           java_init_lex ();
 195 +           java_init_lex (finput, encoding ? encoding : DEFAULT_ENCODING);
 196             yyparse ();
 197             if (ftell (out) != ft)
 198               fputc ('\n', out);
 199 --- gcc/java/lang-options.h.orig        Sat Apr  8 23:03:03 2000
 200 +++ gcc/java/lang-options.h     Sat Apr  8 23:03:56 2000
 201 @@ -39,8 +39,10 @@
 202    { "-M", "Print dependencies to stdout" },
 203    { "-MM", "Print dependencies to stdout" },
 204  #endif /* ! USE_CPPLIB */
 205 -  { "-fclasspath", "Set class path and suppress system path" },
 206 -  { "-fCLASSPATH", "Set class path" },
 207 +  { "--classpath", "Set class path and suppress system path" },
 208 +  { "--CLASSPATH", "Set class path" },
 209 +  { "--main", "Choose class whose main method should be used" },
 210 +  { "--encoding", "Choose input encoding (default is UTF-8)" },
 211    { "-I", "Add directory to class path" },
 212    { "-foutput-class-dir", "Directory where class files should be written" },
 213    { "-Wredundant-modifiers",
 214 --- gcc/java/lang.c.orig        Sat Apr  8 23:03:03 2000
 215 +++ gcc/java/lang.c     Sat Apr  8 23:03:56 2000
 216 @@ -105,6 +105,9 @@
 217  /* When non zero, warns that final local are treated as non final.  */
 218  int flag_static_local_jdk1_1 = 0;
 219
 220 +/* The encoding of the source file.  */
 221 +char *current_encoding = NULL;
 222 +
 223  /* From gcc/flags.h, and indicates if exceptions are turned on or not.  */
 224
 225  extern int flag_new_exceptions;
 226 @@ -172,6 +175,13 @@
 227        return 1;
 228      }
 229  #undef ARG
 230 +#define ARG "-fencoding="
 231 +  if (strncmp (p, ARG, sizeof (ARG) - 1) == 0)
 232 +    {
 233 +      current_encoding = p + sizeof (ARG) - 1;
 234 +      return 1;
 235 +    }
 236 +#undef ARG
 237
 238    if (p[0] == '-' && p[1] == 'f')
 239      {
 240 @@ -252,7 +262,9 @@
 241    return 0;
 242  }
 243
 244 +/* Global open file.  */
 245  FILE *finput;
 246 +
 247  char *
 248  init_parse (filename)
 249       char *filename;
 250 @@ -313,7 +326,6 @@
 251  void
 252  finish_parse ()
 253  {
 254 -  fclose (finput);
 255    jcf_dependency_write ();
 256  }
 257
 258 --- gcc/java/lex.c.orig Sat Apr  8 23:03:03 2000
 259 +++ gcc/java/lex.c      Sat Apr  8 23:14:41 2000
 260 @@ -24,15 +24,15 @@
 261  The Free Software Foundation is independent of Sun Microsystems, Inc.  */
 262
 263  /* It defines java_lex (yylex) that reads a Java ASCII source file
 264 -possibly containing Unicode escape sequence or utf8 encoded characters
 265 -and returns a token for everything found but comments, white spaces
 266 -and line terminators. When necessary, it also fills the java_lval
 267 -(yylval) union. It's implemented to be called by a re-entrant parser
 268 -generated by Bison.
 269 -
 270 -The lexical analysis conforms to the Java grammar described in "The
 271 -Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
 272 -Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html)  */
 273 +   possibly containing Unicode escape sequence or utf8 encoded
 274 +   characters and returns a token for everything found but comments,
 275 +   white spaces and line terminators. When necessary, it also fills
 276 +   the java_lval (yylval) union. It's implemented to be called by a
 277 +   re-entrant parser generated by Bison.
 278 +
 279 +   The lexical analysis conforms to the Java grammar described in "The
 280 +   Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
 281 +   Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
 282
 283  #include "keyword.h"
 284
 285 @@ -55,15 +55,18 @@
 286  static int java_parse_doc_section PROTO ((unicode_t));
 287  static void java_parse_end_comment PROTO ((unicode_t));
 288  static unicode_t java_get_unicode PROTO (());
 289 -static unicode_t java_read_unicode PROTO ((int, int *));
 290 -static void java_store_unicode PROTO ((struct java_line *, unicode_t, int));
 291 -static unicode_t java_read_char PROTO (());
 292 +static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *));
 293 +static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
 294 +static unicode_t java_read_char PARAMS ((java_lexer *));
 295  static void java_allocate_new_line PROTO (());
 296  static void java_unget_unicode PROTO (());
 297  static unicode_t java_sneak_unicode PROTO (());
 298 +java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
 299
 300  void
 301 -java_init_lex ()
 302 +java_init_lex (finput, encoding)
 303 +     FILE *finput;
 304 +     const char *encoding;
 305  {
 306  #ifndef JC1_LITE
 307    int java_lang_imported = 0;
 308 @@ -108,9 +111,9 @@
 309    ctxp->lineno = lineno = 0;
 310    ctxp->p_line = NULL;
 311    ctxp->c_line = NULL;
 312 -  ctxp->unget_utf8_value = 0;
 313    ctxp->minus_seen = 0;
 314    ctxp->java_error_flag = 0;
 315 +  ctxp->lexer = java_new_lexer (finput, encoding);
 316  }
 317
 318  static char *
 319 @@ -188,22 +191,142 @@
 320    ctxp->c_line->white_space_only = 1;
 321  }
 322
 323 -#define BAD_UTF8_VALUE 0xFFFE
 324 +/* Create a new lexer object.  */
 325 +java_lexer *
 326 +java_new_lexer (finput, encoding)
 327 +     FILE *finput;
 328 +     const char *encoding;
 329 +{
 330 +  java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
 331 +  int enc_error = 0;
 332 +
 333 +  lex->finput = finput;
 334 +  lex->bs_count = 0;
 335 +  lex->unget_value = 0;
 336 +
 337 +#ifdef HAVE_ICONV
 338 +  lex->handle = iconv_open ("UCS-2", encoding);
 339 +  if (lex->handle == (iconv_t) -1)
 340 +    {
 341 +      /* FIXME: we should give a nice error based on errno here.  */
 342 +      enc_error = 1;
 343 +    }
 344 +  lex->first = -1;
 345 +  lex->last = -1;
 346 +#else /* HAVE_ICONV */
 347 +  if (strcmp (encoding, DEFAULT_ENCODING))
 348 +    enc_error = 1;
 349 +#endif /* HAVE_ICONV */
 350 +
 351 +  if (enc_error)
 352 +    fatal ("unknown encoding: `%s'", encoding);
 353 +
 354 +  return lex;
 355 +}
 356 +
 357 +void
 358 +java_destroy_lexer (lex)
 359 +     java_lexer *lex;
 360 +{
 361 +  fclose (lex->finput);
 362 +#ifdef HAVE_ICONV
 363 +  iconv_close (lex->handle);
 364 +#endif
 365 +  free (lex);
 366 +}
 367
 368  static unicode_t
 369 -java_read_char ()
 370 +java_read_char (lex)
 371 +     java_lexer *lex;
 372  {
 373 -  int c;
 374 -  int c1, c2;
 375 +  if (lex->unget_value)
 376 +    {
 377 +      unicode_t r = lex->unget_value;
 378 +      lex->unget_value = 0;
 379 +      return r;
 380 +    }
 381 +
 382 +#ifdef HAVE_ICONV
 383 +  {
 384 +    char out[2];
 385 +    size_t ir, inbytesleft, in_save, out_count;
 386 +    char *inp, *outp;
 387
 388 -  if (ctxp->unget_utf8_value)
 389 +    while (1)
 390      {
 391 -      int to_return = ctxp->unget_utf8_value;
 392 -      ctxp->unget_utf8_value = 0;
 393 -      return (to_return);
 394 +       /* See if we need to read more data.  If FIRST == 0 then the
 395 +          previous conversion attempt ended in the middle of a
 396 +          character at the end of the buffer.  Otherwise we only have
 397 +          to read if the buffer is empty.  */
 398 +       if (lex->first == 0 || lex->first >= lex->last)
 399 +         {
 400 +           int r;
 401 +
 402 +           if (lex->first >= lex->last)
 403 +             {
 404 +               lex->first = 0;
 405 +               lex->last = 0;
 406 +             }
 407 +           if (feof (lex->finput))
 408 +             return UEOF;
 409 +           r = fread (&lex->buffer[lex->last], 1,
 410 +                      sizeof (lex->buffer) - lex->last,
 411 +                      lex->finput);
 412 +           lex->last += r;
 413      }
 414
 415 -  c = GETC ();
 416 +       inbytesleft = lex->last - lex->first;
 417 +
 418 +       if (inbytesleft == 0)
 419 +         {
 420 +           /* We've tried to read and there is nothing left.  */
 421 +           return UEOF;
 422 +         }
 423 +
 424 +       in_save = inbytesleft;
 425 +       out_count = 2;
 426 +       inp = &lex->buffer[lex->first];
 427 +       outp = out;
 428 +       ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
 429 +                   &outp, &out_count);
 430 +       lex->first += in_save - inbytesleft;
 431 +
 432 +       if (out_count == 0)
 433 +         {
 434 +           /* Success.  We assume that UCS-2 is big-endian.  This
 435 +              appears to be an ok assumption.  */
 436 +           unicode_t result;
 437 +           result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
 438 +           return result;
 439 +         }
 440 +
 441 +       if (ir == (size_t) -1)
 442 +         {
 443 +           if (errno == EINVAL)
 444 +             {
 445 +               /* This is ok.  This means that the end of our buffer
 446 +                  is in the middle of a character sequence.  We just
 447 +                  move the valid part of the buffer to the beginning
 448 +                  to force a read.  */
 449 +               /* We use bcopy() because it should work for
 450 +                  overlapping strings.  Use memmove() instead... */
 451 +               bcopy (&lex->buffer[lex->first], &lex->buffer[0],
 452 +                      lex->last - lex->first);
 453 +               lex->last -= lex->first;
 454 +               lex->first = 0;
 455 +             }
 456 +           else
 457 +             {
 458 +               /* A more serious error.  */
 459 +               java_lex_error ("unrecognized character in input stream", 0);
 460 +             }
 461 +         }
 462 +      }
 463 +  }
 464 +#else /* HAVE_ICONV */
 465 +  {
 466 +    int c, c1, c2;
 467 +    c = getc (lex->finput);
 468
 469    if (c < 128)
 470      return (unicode_t)c;
 471 @@ -213,17 +336,17 @@
 472      {
 473        if ((c & 0xe0) == 0xc0)
 474          {
 475 -          c1 = GETC ();
 476 +           c1 = getc (lex->finput);
 477           if ((c1 & 0xc0) == 0x80)
 478             return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
 479           c = c1;
 480         }
 481        else if ((c & 0xf0) == 0xe0)
 482          {
 483 -          c1 = GETC ();
 484 +           c1 = getc (lex->finput);
 485           if ((c1 & 0xc0) == 0x80)
 486             {
 487 -             c2 = GETC ();
 488 +               c2 = getc (lex->finput);
 489               if ((c2 & 0xc0) == 0x80)
 490                 return (unicode_t)(((c & 0xf) << 12) +
 491                                    (( c1 & 0x3f) << 6) + (c2 & 0x3f));
 492 @@ -233,14 +356,15 @@
 493           else
 494             c = c1;
 495         }
 496 -      /* We looked for a UTF8 multi-byte sequence (since we saw an initial
 497 -        byte with the high bit set), but found invalid bytes instead.
 498 -        If the most recent byte was Ascii (and not EOF), we should
 499 -        unget it, in case it was a comment terminator or other delimitor. */
 500 -      if ((c & 0x80) == 0)
 501 -       UNGETC (c);
 502 -      return BAD_UTF8_VALUE;
 503 +
 504 +       /* We simply don't support invalid characters.  */
 505 +       java_lex_error ("malformed UTF-8 character", 0);
 506 +      }
 507      }
 508 +#endif /* HAVE_ICONV */
 509 +
 510 +  /* We only get here on error.  */
 511 +  return UEOF;
 512  }
 513
 514  static void
 515 @@ -261,56 +385,54 @@
 516  }
 517
 518  static unicode_t
 519 -java_read_unicode (term_context, unicode_escape_p)
 520 +java_read_unicode (lex, term_context, unicode_escape_p)
 521 +     java_lexer *lex;
 522      int term_context;
 523      int *unicode_escape_p;
 524  {
 525    unicode_t c;
 526 -  long i, base;
 527
 528 -  c = java_read_char ();
 529 +  c = java_read_char (lex);
 530    *unicode_escape_p = 0;
 531
 532    if (c != '\\')
 533 -    return ((term_context ? c :
 534 -            java_lineterminator (c) ? '\n' : (unicode_t)c));
 535 -
 536 -  /* Count the number of preceeding '\' */
 537 -  for (base = ftell (finput), i = base-2; c == '\\';)
 538      {
 539 -      fseek (finput, i--, SEEK_SET);
 540 -      c = java_read_char ();   /* Will fail if reading utf8 stream. FIXME */
 541 +      lex->bs_count = 0;
 542 +      return (term_context ? c : (java_lineterminator (c)
 543 +                                 ? '\n'
 544 +                                 : (unicode_t) c));
 545      }
 546 -  fseek (finput, base, SEEK_SET);
 547 -  if ((base-i-3)%2 == 0)       /* If odd number of \ seen */
 548 +
 549 +  ++lex->bs_count;
 550 +  if ((lex->bs_count) % 2 == 1)
 551      {
 552 -      c = java_read_char ();
 553 +      /* Odd number of \ seen.  */
 554 +      c = java_read_char (lex);
 555        if (c == 'u')
 556          {
 557 -         unsigned short unicode = 0;
 558 +         unicode_t unicode = 0;
 559           int shift = 12;
 560           /* Next should be 4 hex digits, otherwise it's an error.
 561              The hex value is converted into the unicode, pushed into
 562              the Unicode stream.  */
 563           for (shift = 12; shift >= 0; shift -= 4)
 564             {
 565 -             if ((c = java_read_char ()) == UEOF)
 566 +             if ((c = java_read_char (lex)) == UEOF)
 567                 return UEOF;
 568               if (c >= '0' && c <= '9')
 569                 unicode |= (unicode_t)((c-'0') << shift);
 570               else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
 571                 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
 572               else
 573 -                 java_lex_error
 574 -                   ("Non hex digit in Unicode escape sequence", 0);
 575 +               java_lex_error ("Non hex digit in Unicode escape sequence", 0);
 576             }
 577           *unicode_escape_p = 1;
 578 -         return (term_context ? unicode :
 579 -                 (java_lineterminator (c) ? '\n' : unicode));
 580 +         return (term_context
 581 +                 ? unicode : (java_lineterminator (c) ? '\n' : unicode));
 582         }
 583 -      ctxp->unget_utf8_value = c;
 584 +      lex->unget_value = c;
 585      }
 586 -  return (unicode_t)'\\';
 587 +  return (unicode_t) '\\';
 588  }
 589
 590  static unicode_t
 591 @@ -325,7 +447,7 @@
 592         for (;;)
 593           {
 594             int unicode_escape_p;
 595 -           c = java_read_unicode (0, &unicode_escape_p);
 596 +           c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p);
 597             java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 598             if (ctxp->c_line->white_space_only
 599                 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
 600 @@ -346,7 +468,7 @@
 601    int unicode_escape_p;
 602    if (c == '\n')               /* CR */
 603      {
 604 -      if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r')
 605 +      if ((c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p)) != '\r')
 606         {
 607           ctxp->c_line->ahead [0] = c;
 608           ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
 609 @@ -355,7 +477,7 @@
 610      }
 611    else if (c == '\r')          /* LF */
 612      {
 613 -      if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n')
 614 +      if ((c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p)) != '\n')
 615         {
 616           ctxp->c_line->ahead [0] = c;
 617           ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
 618 --- gcc/java/lex.h.orig Sat Apr  8 23:03:03 2000
 619 +++ gcc/java/lex.h      Sat Apr  8 23:03:56 2000
 620 @@ -35,6 +35,13 @@
 621  /* A Unicode character, as read from the input file  */
 622  typedef unsigned short unicode_t;
 623
 624 +#ifdef HAVE_ICONV
 625 +#include <iconv.h>
 626 +#endif /* HAVE_ICONV */
 627 +
 628 +/* Default encoding to use if no encoding is specified.  */
 629 +#define DEFAULT_ENCODING "UTF-8"
 630 +
 631  /* Debug macro to print-out what we match  */
 632  #ifdef JAVA_LEX_DEBUG
 633  #ifdef JAVA_LEX_DEBUG_CHAR
 634 @@ -96,12 +103,38 @@
 635    int col;
 636  } java_lc;
 637
 638 +typedef struct java_lexer
 639 +{
 640 +  /* The file from which we're reading.  */
 641 +  FILE *finput;
 642 +
 643 +  /* Number of consecutive backslashes we've read.  */
 644 +  int bs_count;
 645 +
 646 +  /* If nonzero, a value that was pushed back.  */
 647 +  unicode_t unget_value;
 648 +
 649 +#ifdef HAVE_ICONV
 650 +  /* The handle for the iconv converter we're using.  */
 651 +  iconv_t handle;
 652 +
 653 +  /* Bytes we've read from the file but have not sent to iconv.  */
 654 +  char buffer[1024];
 655 +
 656 +  /* Index of first valid character in buffer, -1 if no valid
 657 +     characters.  */
 658 +  int first;
 659 +
 660 +  /* Index of last valid character in buffer, plus one.  -1 if no
 661 +     valid characters in buffer.  */
 662 +  int last;
 663 +#endif /* HAVE_ICONV */
 664 +} java_lexer;
 665
 666 -#define JAVA_LINE_MAX 80
 667 +/* Destroy a lexer object.  */
 668 +extern void java_destroy_lexer PARAMS ((java_lexer *));
 669
 670 -/* Macro to read and unread bytes */
 671 -#define UNGETC(c) ungetc(c, finput)
 672 -#define GETC()    getc(finput)
 673 +#define JAVA_LINE_MAX 80
 674
 675  /* Build a location compound integer */
 676  #define BUILD_LOCATION() ((ctxp->elc.line << 12) | (ctxp->elc.col & 0xfff))
 677 --- gcc/java/parse.h.orig       Sat Apr  8 23:03:03 2000
 678 +++ gcc/java/parse.h    Sat Apr  8 23:15:36 2000
 679 @@ -586,12 +586,11 @@
 680  struct parser_ctxt {
 681
 682    char *filename;                  /* Current filename */
 683 -  FILE *finput;                            /* Current file input stream */
 684    struct parser_ctxt *next;
 685
 686 +  java_lexer *lexer;                /* Current lexer state */
 687    struct java_line *p_line, *c_line; /* Previous and current line */
 688    java_lc elc;                      /* Error's line column info */
 689 -  unicode_t unget_utf8_value;        /* An unget utf8 value */
 690    int ccb_indent;                   /* Keep track of {} indent, lexer */
 691    int first_ccb_indent1;            /* First { at ident level 1 */
 692    int last_ccb_indent1;                     /* Last } at ident level 1 */
 693 @@ -668,7 +667,7 @@
 694  /* Always in use, no matter what you compile */
 695  void java_push_parser_context PROTO ((void));
 696  void java_pop_parser_context PROTO ((int));
 697 -void java_init_lex PROTO ((void));
 698 +void java_init_lex PARAMS ((FILE *, const char *));
 699  extern void java_parser_context_save_global PROTO ((void));
 700  extern void java_parser_context_restore_global PROTO ((void));
 701  int yyparse PROTO ((void));
 702 --- gcc/java/parse.y.orig       Sat Apr  8 23:03:03 2000
 703 +++ gcc/java/parse.y    Sat Apr  8 23:03:56 2000
 704 @@ -2347,7 +2347,6 @@
 705        java_push_parser_context ();
 706        extra_ctxp_pushed_p = 1;
 707      }
 708 -  ctxp->finput = finput;
 709    ctxp->lineno = lineno;
 710    ctxp->current_class = current_class;
 711    ctxp->filename = input_filename;
 712 @@ -2357,7 +2356,6 @@
 713  void
 714  java_parser_context_restore_global ()
 715  {
 716 -  finput = ctxp->finput;
 717    lineno = ctxp->lineno;
 718    current_class = ctxp->current_class;
 719    input_filename = ctxp->filename;
 720 @@ -2386,9 +2384,12 @@
 721        next->incomplete_class = ctxp->incomplete_class;
 722        next->gclass_list = ctxp->gclass_list;
 723        lineno = ctxp->lineno;
 724 -      finput = ctxp->finput;
 725        current_class = ctxp->current_class;
 726      }
 727 +
 728 +  /* If the old and new lexers differ, then free the old one.  */
 729 +  if (ctxp->lexer && next && ctxp->lexer != next->lexer)
 730 +    java_destroy_lexer (ctxp->lexer);
 731
 732    /* Set the single import class file flag to 0 for the current list
 733       of imported things */