8sa1-gcc/gcc/java/lex.c

/* Language lexer for the GNU compiler for the Java(TM) language.
   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
   Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)

This file is part of GNU CC.

GNU CC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

GNU CC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU CC; see the file COPYING.  If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.

Java and all Java-based marks are trademarks or registered trademarks
of Sun Microsystems, Inc. in the United States and other countries.
The Free Software Foundation is independent of Sun Microsystems, Inc.  */

/* It defines java_lex (yylex) that reads a Java ASCII source file
possibly containing Unicode escape sequence or utf8 encoded characters
and returns a token for everything found but comments, white spaces
and line terminators. When necessary, it also fills the java_lval
(yylval) union. It's implemented to be called by a re-entrant parser
generated by Bison.

The lexical analysis conforms to the Java grammar described in "The
Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html)  */

#include <stdio.h>
#include <string.h>
#include <setjmp.h>

#ifdef JAVA_LEX_DEBUG
#include <ctype.h>
#endif

#ifdef inline			/* javaop.h redefines inline as static */
#undef inline
#endif
#include "keyword.h"

#ifndef SEEK_SET
#include <unistd.h>
#endif

#ifndef JC1_LITE
extern struct obstack *expression_obstack;
#endif

void
java_init_lex ()
{
  int java_lang_imported = 0;

#ifndef JC1_LITE
  if (!java_lang_imported)
    {
      tree node = build_tree_list
	(build_expr_wfl (get_identifier ("java.lang"), NULL, 0, 0), NULL_TREE);
      read_import_dir (TREE_PURPOSE (node));
      TREE_CHAIN (node) = ctxp->import_demand_list;
      ctxp->import_demand_list = node;
      java_lang_imported = 1;
    }

  if (!wfl_operator)
    wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
  if (!label_id)
    label_id = get_identifier ("$L");
  if (!wfl_append)
    wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
  if (!wfl_string_buffer)
    wfl_string_buffer =
      build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
  if (!wfl_to_string)
    wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);

  ctxp->static_initialized = ctxp->non_static_initialized =
    ctxp->incomplete_class = NULL_TREE;

  bzero (ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
  classpath = NULL;
  bzero (current_jcf, sizeof (JCF));
  ctxp->current_parsed_class = NULL;
  ctxp->package = NULL_TREE;
#endif

  ctxp->filename = input_filename;
  ctxp->lineno = lineno = 0;
  ctxp->p_line = NULL;
  ctxp->c_line = NULL;
  ctxp->unget_utf8_value = 0;
  ctxp->minus_seen = 0;
  ctxp->java_error_flag = 0;
}

static char *
java_sprint_unicode (line, i)
    struct java_line *line;
    int i;
{
  static char buffer [10];
  if (line->unicode_escape_p [i] || line->line [i] > 128)
    sprintf (buffer, "\\u%04x", line->line [i]);
  else
    {
      buffer [0] = line->line [i];
      buffer [1] = '\0';
    }
  return buffer;
}

static unicode_t
java_sneak_unicode ()
{
  return (ctxp->c_line->line [ctxp->c_line->current]);
}

static void
java_unget_unicode (c)
     unicode_t c;
{
  if (!ctxp->c_line->current)
    fatal ("can't unget unicode - java_unget_unicode");
  ctxp->c_line->current--;
  ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
}

void
java_allocate_new_line ()
{
  int i;
  unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
  char ahead_escape_p = (ctxp->c_line ?
			 ctxp->c_line->unicode_escape_ahead_p : 0);

  if (ctxp->c_line && !ctxp->c_line->white_space_only)
    {
      if (ctxp->p_line)
	{
	  free (ctxp->p_line->unicode_escape_p);
	  free (ctxp->p_line->line);
	  free (ctxp->p_line);
	}
      ctxp->p_line = ctxp->c_line;
      ctxp->c_line = NULL;		/* Reallocated */
    }

  if (!ctxp->c_line)
    {
      ctxp->c_line = (struct java_line *)malloc (sizeof (struct java_line));
      ctxp->c_line->max = JAVA_LINE_MAX;
      ctxp->c_line->line = (unicode_t *)malloc
	  (sizeof (unicode_t)*ctxp->c_line->max);
      ctxp->c_line->unicode_escape_p =
	  (char *)malloc (sizeof (char)*ctxp->c_line->max);
      ctxp->c_line->white_space_only = 0;
    }

  ctxp->c_line->line [0] = ctxp->c_line->size = 0;
  ctxp->c_line->char_col = ctxp->c_line->current = 0;
  if (ahead)
    {
      ctxp->c_line->line [ctxp->c_line->size] = ahead;
      ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
      ctxp->c_line->size++;
    }
  ctxp->c_line->ahead [0] = 0;
  ctxp->c_line->unicode_escape_ahead_p = 0;
  ctxp->c_line->lineno = ++lineno;
  ctxp->c_line->white_space_only = 1;
}

static unicode_t
java_read_char ()
{
  int c;
  int c1, c2;

  if (ctxp->unget_utf8_value)
    {
      int to_return = ctxp->unget_utf8_value;
      ctxp->unget_utf8_value = 0;
      return (to_return);
    }

  c = GETC ();

  if (c < 128)
    return (unicode_t)c;
  if (c == EOF)
    return UEOF;
  else
    {
      if (c & 0xe0 == 0xc0)
        {
          c1 = GETC ();
	  if (c1 & 0xc0 == 0x80)
	    return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
	}
      else if (c & 0xf0 == 0xe0)
        {
          c1 = GETC ();
	  if (c1 & 0xc0 == 0x80)
	    {
	      c2 = GETC ();
	      if (c2 & 0xc0 == 0x80)
	        return (unicode_t)(((c & 0xf) << 12) +
				   (( c1 & 0x3f) << 6) + (c2 & 0x3f));
	    }
	}
      java_lex_error ("Bad utf8 encoding", 0);
    }
}

static void
java_store_unicode (l, c, unicode_escape_p)
    struct java_line *l;
    unicode_t c;
    int unicode_escape_p;
{
  if (l->size == l->max)
    {
      l->max += JAVA_LINE_MAX;
      l->line = (unicode_t *)realloc (l->line, sizeof (unicode_t)*l->max);
      l->unicode_escape_p = (char *)realloc (l->unicode_escape_p,
					     sizeof (char)*l->max);
    }
  l->line [l->size] = c;
  l->unicode_escape_p [l->size++] = unicode_escape_p;
}

static unicode_t
java_read_unicode (term_context, unicode_escape_p)
    int term_context;
    int *unicode_escape_p;
{
  unicode_t c;
  long i, base;

  c = java_read_char ();
  *unicode_escape_p = 0;

  if (c != '\\')
    return ((term_context ? c :
	     java_lineterminator (c) ? '\n' : (unicode_t)c));

  /* Count the number of preceeding '\' */
  for (base = ftell (finput), i = base-2; c == '\\';)
    {
      fseek (finput, i--, SEEK_SET);
      c = java_read_char ();	/* Will fail if reading utf8 stream. FIXME */
    }
  fseek (finput, base, SEEK_SET);
  if ((base-i-3)%2 == 0)	/* If odd number of \ seen */
    {
      c = java_read_char ();
      if (c == 'u')
        {
	  unsigned short unicode = 0;
	  int shift = 12;
	  /* Next should be 4 hex digits, otherwise it's an error.
	     The hex value is converted into the unicode, pushed into
	     the Unicode stream.  */
	  for (shift = 12; shift >= 0; shift -= 4)
	    {
	      if ((c = java_read_char ()) == UEOF)
	        return UEOF;
	      if (c >= '0' && c <= '9')
		unicode |= (unicode_t)((c-'0') << shift);
	      else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
	        unicode |= (unicode_t)(10+(c | 0x20)-'a' << shift);
	      else
		  java_lex_error
		    ("Non hex digit in Unicode escape sequence", 0);
	    }
	  *unicode_escape_p = 1;
	  return (term_context ? unicode :
		  (java_lineterminator (c) ? '\n' : unicode));
	}
      UNGETC (c);
    }
  return (unicode_t)'\\';
}

static unicode_t
java_get_unicode ()
{
  /* It's time to read a line when... */
  if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
    {
      unicode_t c;
      java_allocate_new_line ();
      if (ctxp->c_line->line[0] != '\n')
	for (;;)
	  {
	    int unicode_escape_p;
	    c = java_read_unicode (0, &unicode_escape_p);
	    java_store_unicode (ctxp->c_line, c, unicode_escape_p);
	    if (ctxp->c_line->white_space_only
		&& !JAVA_WHITE_SPACE_P (c) && c!='\n')
	      ctxp->c_line->white_space_only = 0;
	    if ((c == '\n') || (c == UEOF))
	      break;
	  }
    }
  ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
  JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
  return ctxp->c_line->line [ctxp->c_line->current++];
}

static int
java_lineterminator (c)
     unicode_t c;
{
  int unicode_escape_p;
  if (c == '\n')		/* CR */
    {
      if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r')
	{
	  ctxp->c_line->ahead [0] = c;
	  ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
	}
      return 1;
    }
  else if (c == '\r')		/* LF */
    {
      if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n')
	{
	  ctxp->c_line->ahead [0] = c;
	  ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
	}
      return 1;
    }
  else
    return 0;
}

/* Parse the end of a C style comment */
static void
java_parse_end_comment ()
{
  unicode_t c;

  for (c = java_get_unicode ();; c = java_get_unicode ())
    {
      switch (c)
	{
	case UEOF:
	  java_lex_error ("Comment not terminated at end of input", 0);
	case '*':
	  switch (c = java_get_unicode ())
	    {
	    case UEOF:
	      java_lex_error ("Comment not terminated at end of input", 0);
	    case '/':
	      return;
	    case '*':	/* reparse only '*' */
	      java_unget_unicode (c);
	    }
	}
    }
}

/* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
   will return a wrong result.  */
static int
java_letter_or_digit_p (c)
     unicode_t c;
{
  return _JAVA_LETTER_OR_DIGIT_P (c);
}

static unicode_t
java_parse_escape_sequence ()
{
  unicode_t char_lit;
  unicode_t c;

  switch (c = java_get_unicode ())
    {
    case 'b':
      return (unicode_t)0x8;
    case 't':
      return (unicode_t)0x9;
    case 'n':
      return (unicode_t)0xa;
    case 'f':
      return (unicode_t)0xc;
    case 'r':
      return (unicode_t)0xd;
    case '"':
      return (unicode_t)0x22;
    case '\'':
      return (unicode_t)0x27;
    case '\\':
      return (unicode_t)0x5c;
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
      {
	int octal_escape[3];
	int octal_escape_index = 0;

	for (; octal_escape_index < 3 && RANGE (c, '0', '9');
	     c = java_get_unicode ())
	  octal_escape [octal_escape_index++] = c;

	java_unget_unicode (c);

	if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
	  {
	    java_lex_error ("Literal octal escape out of range", 0);
	    return JAVA_CHAR_ERROR;
	  }
	else
	  {
	    int i, shift;
	    for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
		 i < octal_escape_index; i++, shift -= 3)
	      char_lit |= (octal_escape [i] - '0') << shift;

	    return (char_lit);
	  }
	break;
      }
    case '\n':
      return '\n';		/* ULT, caught latter as a specific error */
    default:
      java_lex_error ("Illegal character in escape sequence", 0);
      return JAVA_CHAR_ERROR;
    }
}

int
#ifdef JC1_LITE
yylex (java_lval)
#else
java_lex (java_lval)
#endif
     YYSTYPE *java_lval;
{
  unicode_t c, first_unicode;
  int line_terminator;
  int ascii_index, all_ascii;
  char *string;

  /* Translation of the Unicode escape in the raw stream of Unicode
     characters. Takes care of line terminator.  */
 step1:
  /* Skip white spaces: SP, TAB and FF or ULT */
  for (c = java_get_unicode ();
       c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
    if (c == '\n')
      {
	ctxp->elc.line = ctxp->c_line->lineno;
	ctxp->elc.col  = ctxp->c_line->char_col-2;
      }

  ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);

  if (c == 0x1a)		/* CTRL-Z */
    {
      if ((c = java_get_unicode ()) == UEOF)
	return 0;		/* Ok here */
      else
	java_unget_unicode (c);	/* Caught latter at the end the function */
    }
  /* Handle EOF here */
  if (c == UEOF)	/* Should probably do something here... */
    return 0;

  /* Take care of eventual comments.  */
  if (c == '/')
    {
      switch (c = java_get_unicode ())
	{
	case '/':
	  for (c = java_get_unicode ();;c = java_get_unicode ())
	    {
	      if (c == UEOF)
		java_lex_error ("Comment not terminated at end of input", 0);
	      if (c == '\n')	/* ULT */
		goto step1;
	    }
	  break;

	case '*':
	  if ((c = java_get_unicode ()) == '*')
	    {
	      if ((c = java_get_unicode ()) == '/')
		goto step1;	/* Empy documentation comment  */

	      else
		/* Parsing the documentation section. We're looking
		 for the @depracated pseudo keyword.  the @deprecated
		 tag must be at the beginning of a doc comment line
		 (ignoring white space and any * character)  */

		{
		  int valid_tag = 0, seen_star;

		  while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
		    {
		      switch (c)
			{
			case '*':
			  seen_star = 1;
			  break;
			case '\n': /* ULT */
			  valid_tag = 1;
			  break;
			default:
			  seen_star = 0;
			}
		      c = java_get_unicode();
		    }

		  if (c == UEOF)
		    java_lex_error
		      ("Comment not terminated at end of input", 0);

		  if (seen_star && (c == '/'))
		    goto step1;	/* End of documentation */

		  if (valid_tag && (c == '@'))
		    {
		      char deprecated [10];
		      int  deprecated_index = 0;

		      for (deprecated_index = 0, c = java_get_unicode ();
			   deprecated_index < 10 && c != UEOF;
			   c = java_get_unicode ())
			deprecated [deprecated_index++] = c;

		      if (c == UEOF)
			java_lex_error
		          ("Comment not terminated at end of input", 0);

		      java_unget_unicode (c);
		      deprecated [deprecated_index] = '\0';
		      if (!strcmp (deprecated, "deprecated"))
			{
			  /* Set global flag to be checked by class. FIXME  */
			  warning ("deprecated implementation found");
			}
		    }
		}
	    }
	  else
	    java_unget_unicode (c);

	  java_parse_end_comment ();
	  goto step1;
	  break;
	default:
	  java_unget_unicode (c);
	  c = '/';
	  break;
	}
    }

  ctxp->elc.line = ctxp->c_line->lineno;
  ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
  if (ctxp->elc.col < 0)
    fatal ("ctxp->elc.col < 0 - java_lex");

  /* Numeric literals */
  if (JAVA_ASCII_DIGIT (c) || (c == '.'))
    {
      unicode_t peep;
      /* This section of code is borrowed from gcc/c-lex.c  */
#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
      int parts[TOTAL_PARTS];
      HOST_WIDE_INT high, low;
      /* End borrowed section  */
      char literal_token [256];
      int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
      int  i;
      int  number_beginning = ctxp->c_line->current;

      /* We might have a . separator instead of a FP like .[0-9]* */
      if (c == '.')
	{
	  unicode_t peep = java_sneak_unicode ();

	  if (!JAVA_ASCII_DIGIT (peep))
	    {
	      JAVA_LEX_SEP('.');
	      BUILD_OPERATOR (DOT_TK);
	    }
	}

      for (i = 0; i < TOTAL_PARTS; i++)
	parts [i] = 0;

      if (c == '0')
	{
	  c = java_get_unicode ();
	  if (c == 'x' || c == 'X')
	    {
	      radix = 16;
	      c = java_get_unicode ();
	    }
	  else if (JAVA_ASCII_DIGIT (c))
	    radix = 8;
	  else if (c == '.')
	    {
	      /* Push the '.' back and prepare for a FP parsing... */
	      java_unget_unicode (c);
	      c = '0';
	    }
	  else
	    {
	      /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
	      JAVA_LEX_LIT ("0", 10);
              switch (c)
		{
		case 'L': case 'l':
		  SET_LVAL_NODE_TYPE (integer_zero_node, long_type_node);
		  return (INT_LIT_TK);
		case 'f': case 'F':
		  SET_LVAL_NODE_TYPE (build_real (float_type_node, dconst0),
					float_type_node);
		  return (FP_LIT_TK);
		case 'd': case 'D':
		  SET_LVAL_NODE_TYPE (build_real (double_type_node, dconst0),
					double_type_node);
		  return (FP_LIT_TK);
		default:
		  java_unget_unicode (c);
		  SET_LVAL_NODE_TYPE (integer_zero_node, int_type_node);
		  return (INT_LIT_TK);
		}
	    }
	}
      /* Parse the first part of the literal, until we find something
	 which is not a number.  */
      while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
	     (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
	     (radix == 8  && JAVA_ASCII_OCTDIGIT (c)))
	{
	  /* We store in a string (in case it turns out to be a FP) and in
	     PARTS if we have to process a integer literal.  */
	  int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
	  int count;

	  literal_token [literal_index++] = c;
	  /* This section of code if borrowed from gcc/c-lex.c  */
	  for (count = 0; count < TOTAL_PARTS; count++)
	    {
	      parts[count] *= radix;
	      if (count)
		{
		  parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
		  parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
		}
	      else
		parts[0] += numeric;
	    }
	  if (parts [TOTAL_PARTS-1] != 0)
	    overflow = 1;
	  /* End borrowed section.  */
	  c = java_get_unicode ();
	}

      /* If we have something from the FP char set but not a digit, parse
	 a FP literal.  */
      if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
	{
	  int stage = 0;
	  int seen_digit = (literal_index ? 1 : 0);
	  int seen_exponent = 0;
	  int fflag = 0;	/* 1 for {f,F}, 0 for {d,D}. FP literal are
				   double unless specified. */
	  if (radix != 10)
	    java_lex_error ("Can't express non-decimal FP literal", 0);

	  for (;;)
	    {
	      if (c == '.')
		{
		  if (stage < 1)
		    {
		      stage = 1;
		      literal_token [literal_index++ ] = c;
		      c = java_get_unicode ();
		    }
		  else
		    java_lex_error ("Invalid character in FP literal", 0);
		}

	      if (c == 'e' || c == 'E')
		{
		  if (stage < 2)
		    {
		      /* {E,e} must have seen at list a digit */
		      if (!seen_digit)
			java_lex_error ("Invalid FP literal", 0);
		      seen_digit = 0;
		      seen_exponent = 1;
		      stage = 2;
		      literal_token [literal_index++] = c;
		      c = java_get_unicode ();
		    }
		  else
		    java_lex_error ("Invalid character in FP literal", 0);
		}
	      if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
		{
		  fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
		  stage = 4;	/* So we fall through */
		}

	      if ((c=='-' || c =='+') && stage < 3)
		{
		  stage = 3;
		  literal_token [literal_index++] = c;
		  c = java_get_unicode ();
		}

	      if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
		  (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
		  (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
		  (stage == 3 && JAVA_ASCII_DIGIT (c)))
		{
		  if (JAVA_ASCII_DIGIT (c))
		    seen_digit = 1;
		  literal_token [literal_index++ ] = c;
		  c = java_get_unicode ();
		}
	      else
		{
		  jmp_buf handler;
		  REAL_VALUE_TYPE value;
#ifndef JC1_LITE
		  tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
#endif

		  if (stage != 4) /* Don't push back fF/dD */
		    java_unget_unicode (c);

		  /* An exponent (if any) must have seen a digit.  */
		  if (seen_exponent && !seen_digit)
		    java_lex_error ("Invalid FP literal", 0);

		  literal_token [literal_index] = '\0';
		  JAVA_LEX_LIT (literal_token, radix);

		  if (setjmp (handler))
		    {
		      JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
		      value = DCONST0;
		    }
		  else
		    {
		      SET_FLOAT_HANDLER (handler);
		      SET_REAL_VALUE_ATOF
		        (value, REAL_VALUE_ATOF (literal_token,
						 TYPE_MODE (type)));

		      if (REAL_VALUE_ISINF (value))
			JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));

		      if (REAL_VALUE_ISNAN (value))
			JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));

		      SET_LVAL_NODE_TYPE (build_real (type, value), type);
		      SET_FLOAT_HANDLER (NULL_PTR);
		      return FP_LIT_TK;
		    }
		}
	    }
	} /* JAVA_ASCCI_FPCHAR (c) */

      /* Here we get back to converting the integral literal.  */
      if (c == 'L' || c == 'l')
	long_suffix = 1;
      else if (radix == 16 && JAVA_ASCII_LETTER (c))
	java_lex_error ("Digit out of range in hexadecimal literal", 0);
      else if (radix == 8  && JAVA_ASCII_DIGIT (c))
	java_lex_error ("Digit out of range in octal literal", 0);
      else if (radix == 16 && !literal_index)
	java_lex_error ("No digit specified for hexadecimal literal", 0);
      else
	java_unget_unicode (c);

#ifdef JAVA_LEX_DEBUG
      literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
      JAVA_LEX_LIT (literal_token, radix);
#endif
      /* This section of code is borrowed from gcc/c-lex.c  */
      if (!overflow)
	{
	  bytes = GET_TYPE_PRECISION (long_type_node);
	  for (i = bytes; i < TOTAL_PARTS; i++)
	    if (parts [i])
	      {
	        overflow = 1;
		break;
	      }
	}
      high = low = 0;
      for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
	{
	  high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
					      / HOST_BITS_PER_CHAR)]
		   << (i * HOST_BITS_PER_CHAR));
	  low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
	}
      /* End borrowed section.  */

      /* Range checking */
      if (long_suffix)
	{
	  /* 9223372036854775808L is valid if operand of a '-'. Otherwise
	     9223372036854775807L is the biggest `long' literal that can be
	     expressed using a 10 radix. For other radixes, everything that
	     fits withing 64 bits is OK. */
	  int hb = (high >> 31);
	  if (overflow || (hb && low && radix == 10) ||
	      (hb && high & 0x7fffffff && radix == 10) ||
	      (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
	}
      else
	{
	  /* 2147483648 is valid if operand of a '-'. Otherwise,
	     2147483647 is the biggest `int' literal that can be
	     expressed using a 10 radix. For other radixes, everything
	     that fits within 32 bits is OK. */
	  int hb = (low >> 31) & 0x1;
	  if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
	      (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
	}
      ctxp->minus_seen = 0;
      SET_LVAL_NODE_TYPE (build_int_2 (low, high),
			  (long_suffix ? long_type_node : int_type_node));
      return INT_LIT_TK;
    }

  ctxp->minus_seen = 0;
  /* Character literals */
  if (c == '\'')
    {
      unicode_t char_lit;
      if ((c = java_get_unicode ()) == '\\')
	char_lit = java_parse_escape_sequence ();
      else
	char_lit = c;

      c = java_get_unicode ();

      if ((c == '\n') || (c == UEOF))
	java_lex_error ("Character literal not terminated at end of line", 0);
      if (c != '\'')
	java_lex_error ("Syntax error in character literal", 0);

      if (c == JAVA_CHAR_ERROR)
        char_lit = 0;		/* We silently convert it to zero */

      JAVA_LEX_CHAR_LIT (char_lit);
      SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
      return CHAR_LIT_TK;
    }

  /* String literals */
  if (c == '"')
    {
      int no_error;
      char *string;

      for (no_error = 1, c = java_get_unicode ();
	   c != '"' && c != '\n'; c = java_get_unicode ())
	{
	  if (c == '\\')
	    c = java_parse_escape_sequence ();
	  no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
	  if (c)
	    java_unicode_2_utf8 (c);
	}
      if (c == '\n' || c == UEOF) /* ULT */
	{
	  lineno--;		/* Refer to the line the terminator was seen */
	  java_lex_error ("String not terminated at end of line.", 0);
	  lineno++;
	}

      obstack_1grow (&temporary_obstack, '\0');
      string = obstack_finish (&temporary_obstack);
#ifndef JC1_LITE
      if (!no_error || (c != '"'))
	java_lval->node = error_mark_node; /* Requires futher testing FIXME */
      else
	{
	  tree s = make_node (STRING_CST);
	  TREE_STRING_LENGTH (s) = strlen (string);
	  TREE_STRING_POINTER (s) =
	    obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
	  strcpy (TREE_STRING_POINTER (s), string);
	  java_lval->node = s;
	}
#endif
      return STRING_LIT_TK;
    }

  /* Separator */
  switch (c)
    {
    case '(':
      JAVA_LEX_SEP (c);
      BUILD_OPERATOR (OP_TK);
    case ')':
      JAVA_LEX_SEP (c);
      return CP_TK;
    case '{':
      JAVA_LEX_SEP (c);
      if (ctxp->ccb_indent == 1)
	ctxp->first_ccb_indent1 = lineno;
      ctxp->ccb_indent++;
      return OCB_TK;
    case '}':
      JAVA_LEX_SEP (c);
      ctxp->ccb_indent--;
      if (ctxp->ccb_indent == 1)
        ctxp->last_ccb_indent1 = lineno;
      return CCB_TK;
    case '[':
      JAVA_LEX_SEP (c);
      BUILD_OPERATOR (OSB_TK);
    case ']':
      JAVA_LEX_SEP (c);
      return CSB_TK;
    case ';':
      JAVA_LEX_SEP (c);
      return SC_TK;
    case ',':
      JAVA_LEX_SEP (c);
      return C_TK;
    case '.':
      JAVA_LEX_SEP (c);
      BUILD_OPERATOR (DOT_TK);
      /*      return DOT_TK; */
    }

  /* Operators */
  switch (c)
    {
    case '=':
      if ((c = java_get_unicode ()) == '=')
	{
	  BUILD_OPERATOR (EQ_TK);
	}
      else
	{
	  /* Equals is used in two different locations. In the
	     variable_declarator: rule, it has to be seen as '=' as opposed
	     to being seen as an ordinary assignment operator in
	     assignment_operators: rule.  */
	  java_unget_unicode (c);
	  BUILD_OPERATOR (ASSIGN_TK);
	}

    case '>':
      switch ((c = java_get_unicode ()))
	{
	case '=':
	  BUILD_OPERATOR (GTE_TK);
	case '>':
	  switch ((c = java_get_unicode ()))
	    {
	    case '>':
	      if ((c = java_get_unicode ()) == '=')
		{
		  BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
		}
	      else
		{
		  java_unget_unicode (c);
		  BUILD_OPERATOR (ZRS_TK);
		}
	    case '=':
	      BUILD_OPERATOR2 (SRS_ASSIGN_TK);
	    default:
	      java_unget_unicode (c);
	      BUILD_OPERATOR (SRS_TK);
	    }
	default:
	  java_unget_unicode (c);
	  BUILD_OPERATOR (GT_TK);
	}

    case '<':
      switch ((c = java_get_unicode ()))
	{
	case '=':
	  BUILD_OPERATOR (LTE_TK);
	case '<':
	  if ((c = java_get_unicode ()) == '=')
	    {
	      BUILD_OPERATOR2 (LS_ASSIGN_TK);
	    }
	  else
	    {
	      java_unget_unicode (c);
	      BUILD_OPERATOR (LS_TK);
	    }
	default:
	  java_unget_unicode (c);
	  BUILD_OPERATOR (LT_TK);
	}

    case '&':
      switch ((c = java_get_unicode ()))
	{
	case '&':
	  BUILD_OPERATOR (BOOL_AND_TK);
	case '=':
	  BUILD_OPERATOR2 (AND_ASSIGN_TK);
	default:
	  java_unget_unicode (c);
	  BUILD_OPERATOR (AND_TK);
	}

    case '|':
      switch ((c = java_get_unicode ()))
	{
	case '|':
	  BUILD_OPERATOR (BOOL_OR_TK);
	case '=':
	  BUILD_OPERATOR2 (OR_ASSIGN_TK);
	default:
	  java_unget_unicode (c);
	  BUILD_OPERATOR (OR_TK);
	}

    case '+':
      switch ((c = java_get_unicode ()))
	{
	case '+':
	  BUILD_OPERATOR (INCR_TK);
	case '=':
	  BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
	default:
	  java_unget_unicode (c);
	  BUILD_OPERATOR (PLUS_TK);
	}

    case '-':
      switch ((c = java_get_unicode ()))
	{
	case '-':
	  BUILD_OPERATOR (DECR_TK);
	case '=':
	  BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
	default:
	  java_unget_unicode (c);
	  ctxp->minus_seen = 1;
	  BUILD_OPERATOR (MINUS_TK);
	}

    case '*':
      if ((c = java_get_unicode ()) == '=')
	{
	  BUILD_OPERATOR2 (MULT_ASSIGN_TK);
	}
      else
	{
	  java_unget_unicode (c);
	  BUILD_OPERATOR (MULT_TK);
	}

    case '/':
      if ((c = java_get_unicode ()) == '=')
	{
	  BUILD_OPERATOR2 (DIV_ASSIGN_TK);
	}
      else
	{
	  java_unget_unicode (c);
	  BUILD_OPERATOR (DIV_TK);
	}

    case '^':
      if ((c = java_get_unicode ()) == '=')
	{
	  BUILD_OPERATOR2 (XOR_ASSIGN_TK);
	}
      else
	{
	  java_unget_unicode (c);
	  BUILD_OPERATOR (XOR_TK);
	}

    case '%':
      if ((c = java_get_unicode ()) == '=')
	{
	  BUILD_OPERATOR2 (REM_ASSIGN_TK);
	}
      else
	{
	  java_unget_unicode (c);
	  BUILD_OPERATOR (REM_TK);
	}

    case '!':
      if ((c = java_get_unicode()) == '=')
	{
	  BUILD_OPERATOR (NEQ_TK);
	}
      else
	{
	  java_unget_unicode (c);
	  BUILD_OPERATOR (NEG_TK);
	}

    case '?':
      JAVA_LEX_OP ("?");
      BUILD_OPERATOR (REL_QM_TK);
    case ':':
      JAVA_LEX_OP (":");
      BUILD_OPERATOR (REL_CL_TK);
    case '~':
      BUILD_OPERATOR (NOT_TK);
    }

  /* Keyword, boolean literal or null literal */
  for (first_unicode = c, all_ascii = 1, ascii_index = 0;
       JAVA_ID_CHAR_P (c); c = java_get_unicode ())
    {
      java_unicode_2_utf8 (c);
      if (all_ascii && c >= 128)
        all_ascii = 0;
      ascii_index++;
    }

  obstack_1grow (&temporary_obstack, '\0');
  string = obstack_finish (&temporary_obstack);
  java_unget_unicode (c);

  /* If we have something all ascii, we consider a keyword, a boolean
     literal, a null literal or an all ASCII identifier.  Otherwise,
     this is an identifier (possibly not respecting formation rule).  */
  if (all_ascii)
    {
      struct java_keyword *kw;
      if ((kw=java_keyword (string, ascii_index)))
	{
	  JAVA_LEX_KW (string);
	  switch (kw->token)
	    {
	    case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
	    case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
	    case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
	    case PRIVATE_TK:
	      SET_MODIFIER_CTX (kw->token);
	      return MODIFIER_TK;
	    case FLOAT_TK:
	      SET_LVAL_NODE (float_type_node);
	      return FP_TK;
	    case DOUBLE_TK:
	      SET_LVAL_NODE (double_type_node);
	      return FP_TK;
	    case BOOLEAN_TK:
	      SET_LVAL_NODE (boolean_type_node);
	      return BOOLEAN_TK;
	    case BYTE_TK:
	      SET_LVAL_NODE (byte_type_node);
	      return INTEGRAL_TK;
	    case SHORT_TK:
	      SET_LVAL_NODE (short_type_node);
	      return INTEGRAL_TK;
	    case INT_TK:
	      SET_LVAL_NODE (int_type_node);
	      return INTEGRAL_TK;
	    case LONG_TK:
	      SET_LVAL_NODE (long_type_node);
	      return INTEGRAL_TK;
	    case CHAR_TK:
	      SET_LVAL_NODE (char_type_node);
	      return INTEGRAL_TK;

	      /* Keyword based literals */
	    case TRUE_TK:
	    case FALSE_TK:
	      SET_LVAL_NODE ((kw->token == TRUE_TK ?
			      boolean_true_node : boolean_false_node));
	      return BOOL_LIT_TK;
	    case NULL_TK:
	      SET_LVAL_NODE (null_pointer_node);
	      return NULL_TK;

	      /* Some keyword we want to retain information on the location
		 they where found */
	    case CASE_TK:
	    case DEFAULT_TK:
	    case SUPER_TK:
	    case THIS_TK:
	    case RETURN_TK:
	    case BREAK_TK:
	    case CONTINUE_TK:
	    case TRY_TK:
	    case CATCH_TK:
	    case THROW_TK:
	      BUILD_OPERATOR (kw->token);

	    default:
	      return kw->token;
	    }
	}
    }

  /* We may have and ID here */
  if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
    {
      JAVA_LEX_ID (string);
      java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
      return ID_TK;
    }

  /* Everything else is an invalid character in the input */
  {
    char lex_error_buffer [128];
    sprintf (lex_error_buffer, "Invalid character '%s' in input",
	     java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
    java_lex_error (lex_error_buffer, 1);
  }
  return 0;
}

static void
java_unicode_2_utf8 (unicode)
    unicode_t unicode;
{
  if (RANGE (unicode, 0x01, 0x7f))
    obstack_1grow (&temporary_obstack, (char)unicode);
  else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
    {
      obstack_1grow (&temporary_obstack,
		     (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
      obstack_1grow (&temporary_obstack,
		     (unsigned char)(0x80 | (unicode & 0x3f)));
    }
  else				/* Range 0x800-0xffff */
    {
      obstack_1grow (&temporary_obstack,
		     (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
      obstack_1grow (&temporary_obstack,
		     (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
      obstack_1grow (&temporary_obstack,
		     (unsigned char)(0x80 | (unicode & 0x003f) >> 12));
    }
}

#ifndef JC1_LITE
static tree
build_wfl_node (node)
     tree node;
{
  return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
}
#endif

static void
java_lex_error (msg, forward)
     char *msg;
     int forward;
{
#ifndef JC1_LITE
  ctxp->elc.line = ctxp->c_line->lineno;
  ctxp->elc.col = ctxp->c_line->char_col-1+forward;

  /* Might be caught in the middle of some error report */
  ctxp->java_error_flag = 0;
  java_error (NULL);
  java_error (msg);
#endif
}

static int
java_is_eol (fp, c)
  FILE *fp;
  int c;
{
  int next;
  switch (c)
    {
    case '\n':
      next = getc (fp);
      if (next != '\r' && next != EOF)
	ungetc (next, fp);
      return 1;
    case '\r':
      return 1;
    default:
      return 0;
    }
}

char *
java_get_line_col (filename, line, col)
     char *filename;
     int line, col;
{
#ifdef JC1_LITE
  return 0;
#else
  /* Dumb implementation. Doesn't try to cache or optimize things. */
  /* First line of the file is line 1, first column is 1 */

  /* COL <= 0 means, at the CR/LF in LINE */

  FILE *fp;
  int c, ccol, cline = 1;
  int current_line_col = 0;

  if (!(fp = fopen (filename, "r")))
    fatal ("Can't open file - java_display_line_col");

  while (cline != line)
    {
      c = getc (fp);
      if (c < 0)
	{
	  static char msg[] = "<<file too short - unexpected EOF>>";
	  obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
	  goto have_line;
	}
      if (java_is_eol (fp, c))
	cline++;
    }

  /* Gather the chars of the current line in a buffer */
  for (;;)
    {
      c = getc (fp);
      if (c < 0 || java_is_eol (fp, c))
	break;
      obstack_1grow (&temporary_obstack, c);
      current_line_col++;
    }
 have_line:

  obstack_1grow (&temporary_obstack, '\n');

  if (col < 0)
    col = current_line_col;

  /* Place the '^' a the right position */
  for (ccol = 1; ccol <= col; ccol++)
    obstack_1grow (&temporary_obstack, ' ');
  obstack_grow0 (&temporary_obstack, "^", 1);

  fclose (fp);
  return obstack_finish (&temporary_obstack);
#endif
}