/*GNU Pascal compiler lexical analyzer

  Copyright (C) 1989-2004, Free Software Foundation, Inc.

  Authors: Jukka Virtanen <jtv@hut.fi>
           Peter Gerwinski <peter@gerwinski.de>
           Frank Heckenbach <frank@pascal.gnu.de>

  This file is part of GNU Pascal.

  GNU Pascal is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published
  by the Free Software Foundation; either version 1, or (at your
  option) any later version.

  GNU Pascal is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with GNU Pascal; see the file COPYING. If not, write to the
  Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  02111-1307, USA. */

#define FLEX_SCANNER  /* @@ depends on flex version, not necessary with 2.5.27 */
#include "gpc.h"
#undef FLEX_SCANNER

#ifdef MULTIBYTE_CHARS
#include <locale.h>
#endif

#define LEX_SEMANTIC_VALUES 1
#define LEX_INVALID (-1)
#define LEX_WHITESPACE (-1)
#define LEX_COMMENT (-1)
#define LEX_COMMENT_BEGIN (-1)
#define LEX_COMMENT_CONTENT (-1)
#define LEX_COMMENT_END (-1)
#define LEX_DIRECTIVE_BEGIN (MIN_EXTRA_SYMBOL - 1)
#define LEX_DIRECTIVE_CONTENT (MIN_EXTRA_SYMBOL - 2)
#define LEX_DIRECTIVE_END (MIN_EXTRA_SYMBOL - 3)
int LEX_LINE_DIRECTIVE = MIN_EXTRA_SYMBOL - 4;
#define BITS_PER_BYTES 8
#define BYTES_PER_INTEGER 8
#define lex_malloc xmalloc
#define YY_NO_FLEX_ALLOC
#define yyalloc xmalloc
#define YY_NO_FLEX_REALLOC
#define yyrealloc xrealloc
#define YY_TYPEDEF_YY_SIZE_T
typedef size_t yy_size_t;
#include "pascal-lex.c"

filename_t lexer_filename = NULL, compiler_filename = NULL;
int column = 0;
int lexer_lineno = 0, lexer_column = 0, compiler_lineno = 0, compiler_column = 0;
int syntax_errors = 0;

/* Determines what the lexer currently returns for `=':
   < 0 means: `='
   = 0 means: LEX_CONST_EQUAL
   > 0 means: `=', but after that many closing parentheses/brackets, return LEX_CONST_EQUAL */
int lex_const_equal = -1;

#ifdef EGCS
FILE *finput;
#endif

/* Newlines encountered in the preprocessed input (all files). */
static int preprocessed_lineno = 0;

#ifndef HAVE_SIGALRM
#ifdef SIGALRM
#define HAVE_SIGALRM 1
#endif
#endif

static void handle_progress_messages PARAMS ((int));
static void do_directive PARAMS ((char *, int));

#ifdef HAVE_SIGALRM
/* Triggers for periodic progress output; set every
   PROGRESS_TIME_INTERVAL microseconds. */
#define PROGRESS_TIME_INTERVAL 200000  /* 5 Hz */
static volatile int progress_message_alarm = 0;
static void alarm_handler PARAMS ((int));

/* Called periodically for outputting status messages. */
static void alarm_handler (sig)
     int sig;
{
  progress_message_alarm = 1;
  signal (sig, &alarm_handler);
#ifdef linux
  siginterrupt (sig, 0);
#endif
}
#endif

static void
handle_progress_messages (force)
     int force;
{
  preprocessed_lineno++;
#ifdef HAVE_SIGALRM
  if (force || progress_message_alarm)
    {
      if (flag_progress_messages)
        fprintf (stderr, "\001#progress# %s (%d)\n", input_filename, lineno);
      if (flag_progress_bar)
        fprintf (stderr, "\001#progress-bar# %d\n", preprocessed_lineno);
      progress_message_alarm = 0;
    }
#else
  if (flag_progress_messages && (force || (lineno % 16 == 0 && lineno > 0)))
    fprintf (stderr, "\001#progress# %s (%d)\n", input_filename, lineno);
  if (flag_progress_bar && (force || preprocessed_lineno % 16 == 0))
    fprintf (stderr, "\001#progress-bar# %d\n", preprocessed_lineno);
#endif
}

void lex_error (const char *Msg)
{
  lineno = LexPos.Line;
  column = LexPos.Column;
  error (Msg);
}

void ExtraUserAction (buf, length)
     const char *buf;
     unsigned int length;
{
  if (co->debug_source && length > 0)
    fwrite (buf, 1, length, stderr);
  if (flag_progress_messages || flag_progress_bar)
    while (length--)
      if (*buf++ == '\n')
        handle_progress_messages (0);
}

int CheckFeature (TLexFeatureIndex Feature, int Message)
{
  lineno = LexPos.Line;
  column = LexPos.Column;
  switch (Feature)
  {
    case DoubleQuotedStrings:
      if (!co->double_quoted_strings)
        error ("double quoted strings are a GNU Pascal extension");
      break;
    case MultilineStrings:
      chk_dialect ("line breaks in char and string constants are", GNU_PASCAL);
      break;
    case IntegersWithoutSeparator:
      if (PEDANTIC (B_D_M_PASCAL) || !co->pascal_dialect)
        error_or_warning (PEDANTIC (B_D_M_PASCAL), "missing white space after decimal integer constant");
      break;
    case IntegersBase:
      chk_dialect_name ("radix#value", E_O_PASCAL);
      break;
    case IntegersHex:
      chk_dialect ("hexadecimal numbers with `$' are", B_D_M_PASCAL);
      break;
    case RealsWithoutSeparator:
      if (PEDANTIC (B_D_M_PASCAL) || !co->pascal_dialect)
        error_or_warning (PEDANTIC (B_D_M_PASCAL), "missing white space after decimal real constant");
      break;
    case RealsWithDotOnly:
      if (!(co->pascal_dialect & B_D_PASCAL))
        warning ("ISO Pascal requires a digit after the decimal point");
      break;
    case RealsWithoutExpDigits:
      error_or_warning (PEDANTIC (B_D_PASCAL), "real constant exponent has no digits");
      break;
    case CharConstantsHash:
      chk_dialect ("char constants with `#' are", B_D_M_PASCAL);
      break;
    case MixedComments:
      if (!co->mixed_comments)
        {
          assert (!Message);
          return 0;
        }
      else if (Message && co->warn_mixed_comments)
        {
          warning ("comments starting with `(*' and ending with `}' or starting with");
          warning (" `{' and ending with `*)' are an obscure ISO Pascal feature");
        }
      break;
    case NestedComments:
      if (!co->nested_comments)
        {
          assert (!Message);
          return 0;
        }
      else if (Message && co->warn_nested_comments)
        warning ("nested comments are a GPC extension");
      break;
    case DelphiComments:
      if (!co->delphi_comments)
        error ("`//' comments are a Borland Delphi extension");
      break;
    case LF_MAX:
      /* nothing */ ;
  }
  return 1;
}

void
discard_input ()
{
  while (getc (finput) != EOF) ;
}

/* Initialize the lexical analyzer. */
void
init_gpc_lex (filename)
     const char *filename;
{
#ifdef MULTIBYTE_CHARS
  /* Change to the native locale for multibyte conversions. */
  setlocale (LC_CTYPE, "");
#endif

#ifdef HAVE_SIGALRM
  /* Periodically trigger the output of progress messages. */
  if (flag_progress_messages || flag_progress_bar)
    {
      static struct itimerval timerval = { { 0, PROGRESS_TIME_INTERVAL },
                                           { 0, PROGRESS_TIME_INTERVAL } };
      signal (SIGALRM, &alarm_handler);
#ifdef linux
      siginterrupt (SIGALRM, 0);
#endif
      setitimer (ITIMER_REAL, &timerval, 0);
    }
#endif

  lineno = column = 1;
  assert (filename);
  InitLex (filename, finput, 0);
}

static void
do_directive (s, l)
     char *s;
     int l;
{
  int is_whole_directive = 0, n;
  char in_string = 0, *p, *q;
  co = (struct options *) xmalloc (sizeof (struct options));
  memcpy (co, lexer_options, sizeof (struct options));
  lexer_options->next = co;
  lexer_options = co;
  co->counter++;
  if (l >= (n = strlen ("local")) && !strncmp (s, "local", n)
      && (s[n] == ' ' || s[n] == '\t' || s[n] == '\n'))
    {
      s[n] = 0;
      process_pascal_directive (s, n);
      s += n + 1;
      l -= n + 1;
    }
  for (p = q = s; p < s + l; p++)
    {
      int c = *p, is_white = c == ' ' || c == '\t' || c == '\n';
      if (in_string && c == in_string)
        in_string = 0;
      else if ((c == '"' || c == '\'') && !in_string && (p == s || p[-1] != '^'))
        in_string = c;
      else if (q == s && is_white)
        /* NOTHING */;
      else if (in_string || is_whole_directive || c != ',')
        {
          if (!in_string && !is_whole_directive && c >= 'A' && c <= 'Z')
            c += 'a' - 'A';
          *q++ = c;
          if (q - s == 2)
            is_whole_directive = is_white && (s[0] == 'm' || s[0] == 'l' || s[0] == 'r');
        }
      else
        {
          *q = 0;
          if (!process_pascal_directive (s, q - s))
            return;
          q = s;
        }
    }
  if (in_string)
    warning ("unterminated string in compiler directive");
  *q = 0;
  process_pascal_directive (s, q - s);
}

void
SetFileName (v)
     int v;
{
  input_filename = NewPos.SrcName;
  if (!main_input_filename)
    main_input_filename = input_filename;
  if (v == 1)
    {
      /* Pushing to a new file. */
      struct file_stack *p = (struct file_stack *) xmalloc (sizeof (struct file_stack));
      input_file_stack->line = LexPos.Line;
      p->next = input_file_stack;
      p->name = input_filename;
      input_file_stack = p;
      input_file_stack_tick++;
#ifdef EGCS97
      (*debug_hooks->start_source_file) (LexPos.Line, input_filename);
#else
      debug_start_source_file (input_filename);
#endif
    }
  else if (v == 2)
    {
      /* Popping out of a file. */
      if (input_file_stack->next)
        {
          struct file_stack *p = input_file_stack;
          input_file_stack = p->next;
          free (p);
          input_file_stack_tick++;
#ifdef EGCS97
          (*debug_hooks->end_source_file) (input_file_stack->line);
#else
          debug_end_source_file (input_file_stack->line);
#endif
        }
      else
        error ("#-lines for entering and leaving files don't match");
    }

  /* Now that we've pushed or popped the input stack,
     update the name in the top element. */
  if (input_file_stack)
    input_file_stack->name = input_filename;
}

/* Hooks for parse.y: error handling. */
void
yyerror (string)
     const char *string;
{
  const char *s = LexSem.TokenString;
  char buf[200];
  syntax_errors++;
  strcpy (buf, string);
  if (!s)
    strcat (buf, " at end of input");
  else if (s[0] < 0x20 || (unsigned char) s[0] >= 0x7f)
    sprintf (buf + strlen (buf), " before character #%i", (unsigned char) s[0]);
  else
    strcat (buf, " before `%s'");
  error_with_file_and_line (lexer_filename, lexer_lineno, buf, s);
}

void
yyerror_id (id, location)
     tree id;
     const YYLTYPE *location;
{
  syntax_errors++;
  error_with_file_and_line (location->last_file, location->last_line,
                            "syntax error before `%s'", IDENTIFIER_NAME (id));
}

static int get_token PARAMS ((int));
static int get_token (update_pos)
     int update_pos;
{
  int value;
  while ((value = lexscan ()) == LEX_DIRECTIVE_BEGIN)
    {
      /* Directives can be fragmented by nested comments. Reassemble them now. */
      int l = 0;
      char *d = NULL;
      while ((value = lexscan ()) == LEX_DIRECTIVE_CONTENT)
        {
          int n = LexSem.TokenStringLength;
          char *dn = alloca (l + n + 2);
          if (d)
            {
              memcpy (dn, d, l);
              dn[l++] = ' ';
            }
          memcpy (dn + l, LexSem.TokenString, n + 1);
          l += n;
          d = dn;
        }
      if (!d)
        error ("empty compiler directive");
      else
        do_directive (d, l);
      assert (value == LEX_DIRECTIVE_END);
      if (update_pos)  /* @@ kludge, until peek_token is removed */
        {
          yylloc.first_file = NewPos.SrcName;
          yylloc.first_line = NewPos.Line;
          yylloc.first_column = NewPos.Column;
        }
    }
  return value;
}

static int next_token = 0;

int
peek_token (do_dir)
     int do_dir;
{
  if (!do_dir)
    {
      int value = get_token (0);
      if (value != LEX_LINE_DIRECTIVE)
        next_token = value;
      LEX_LINE_DIRECTIVE = -1;
      return 0;
    }
  if (!next_token)
    next_token = get_token (0);
  return next_token;
}

/* The main function of the lexical analyzer, as called from the parser. */
int
yylex ()
{
#ifndef EGCS97
  int old_momentary = suspend_momentary ();
#endif
  static int last_token = 0;
  int value;

  input_filename = lexer_filename;
  lineno = lexer_lineno;
  column = lexer_column;
  activate_options (lexer_options, 1);

  yylloc.first_file = input_filename;
  yylloc.first_line = lineno;
  yylloc.first_column = column;

  if (next_token)
    {
      value = next_token;
      next_token = 0;
    }
  else
    value = get_token (1);

  lineno = NewPos.Line;
  column = NewPos.Column;

  switch (value)
  {
    case 0:
      LexSem.TokenString = NULL;
      handle_progress_messages (1);
      break;

    case LEX_STRCONST:
    case LEX_CARET_WHITE:
      yylval.ttype = build_string_constant (LexSem.StringValue, LexSem.StringValueLength, 1);
      break;

    case LEX_INTCONST:
    case LEX_INTCONST_BASE:
      {
        tree t, type;
        HOST_WIDE_INT *v = LexSem.IntegerValueBytes;
        /* This is simplified by the fact that our constant is always positive. */
#if HOST_BITS_PER_WIDE_INT <= 32
        t = build_int_2 ((v[3] << 24) + (v[2] << 16) + (v[1] << 8) + v[0],
                         (v[7] << 24) + (v[6] << 16) + (v[5] << 8) + v[4]);
#else
        t = build_int_2 ((v[7] << 56) + (v[6] << 48) + (v[5] << 40) + (v[4] << 32)
                       + (v[3] << 24) + (v[2] << 16) + (v[1] << 8) + v[0], 0);
#endif
        /* This integer is marked as being input by the user program, so its
           type does not really matter. However, assign it something reasonable. */
        if (!INT_CST_LT_UNSIGNED (TYPE_MAX_VALUE (pascal_integer_type_node), t))
          type = pascal_integer_type_node;
        else if (!INT_CST_LT_UNSIGNED (TYPE_MAX_VALUE (pascal_cardinal_type_node), t))
          type = pascal_cardinal_type_node;
        else if (!INT_CST_LT_UNSIGNED (TYPE_MAX_VALUE (long_integer_type_node), t))
          type = long_long_integer_type_node;
        else
          type = long_long_unsigned_type_node;
        TREE_TYPE (t) = type;
        TREE_UNSIGNED (t) = TREE_UNSIGNED (type);
        PASCAL_TREE_FRESH_CST (t) = 1;
        yylval.ttype = t;
        break;
      }

    case LEX_REALCONST:
      {
        tree t, type = long_double_type_node;
        int esign = 1, expon = 0, adjust_exp = 0, zero = 0;
        const char *p = LexSem.TokenString;
        char *d = (char *) alloca (LexSem.TokenStringLength + 64), *q = d;
        REAL_VALUE_TYPE rval;
        while (*p == '0')
          p++;
        if (*p == '.')
          {
            /* Compress out the leading zeros by adjusting the exponent */
            do adjust_exp--; while (*++p == '0');
            if (!isdigit (*p))
              {
                *q++ = '0';  /* A zero */
                zero = 1;
              }
            else
              {
                *q++ = *p++;
                *q++ = '.';
              }
          }
        if (!zero)
          {
            while (isdigit (*p) || *p == '.')
              *q++ = *p++;
            if (q > d && q[-1] == '.')
              *q++ = '0';
            /* Only valid numbers should get here. */
            while (*p)
              {
                if (isdigit (*p))
                  expon = 10 * expon + *p - '0';
                else if (*p == '-')
                  esign = -1;
                p++;
              }
            expon = esign * expon + adjust_exp;
            if (expon)
              {
                char *r;
                int e;
                *q++ = 'E';
                if (expon < 0)
                  {
                    *q++ = '-';
                    expon = -expon;
                  }
                e = expon;
                do q++, e /= 10; while (e);
                r = q;
                do *--r = expon % 10 + '0', expon /= 10; while (expon);
              }
          }
        *q = 0;
        rval = REAL_VALUE_ATOF (d, TYPE_MODE (type));
        t = build_real (type, rval);
        if (REAL_VALUE_ISINF (rval))
          error ("real constant out of range");
        TREE_TYPE (t) = type;
        PASCAL_TREE_FRESH_CST (t) = 1;
        yylval.ttype = t;
        break;
      }

    default:
      yylval.itype = LexSem.TokenString[0];  /* for `^(' etc. */
  }

  /* Don't look for built-in keywords in LEX_CARET_LETTER (i.e., after `^').
     The only one-letter one is `c' which makes no sense there, anyway, and
     will be removed soon. (And I hope we'll never add a new one-letter one!
     It's bad style, and will take extra work here.) */
  if (value == LEX_ID)
    {
      tree id;
      struct predef *pd;
      yylval.ttype = id = make_identifier (LexSem.TokenString, LexSem.TokenStringLength);

      /* See internals.texi. Fortunately, this can't occur after `^' either. */
      if (current_structor_object_type)
        {
          tree t = build_component_ref (current_structor_object_type, id);
          current_structor_object_type = NULL_TREE;
          if (t && !EM (t)
              && (current_structor_object_type_constructor
                  ? PASCAL_CONSTRUCTOR_METHOD (TREE_OPERAND (t, 1))
                  : PASCAL_DESTRUCTOR_METHOD (TREE_OPERAND (t, 1))))
            {
              value = LEX_STRUCTOR;
              yylval.ttype = t;
            }
        }

      pd = IDENTIFIER_BUILT_IN_VALUE (id);

      /* With `-pedantic', warn about any dialect specific keyword
         encountered. At this point we don't know yet if it will be used as a
         keyword or an identifier, but it doesn't matter. Both usages are not
         completely portable. (That's `-pedantic' at its best! ;-) */
      if (pd && pedantic && (pd->kind == bk_none || pd->kind == bk_keyword) && pd->dialect != ANY_PASCAL)
        warn_about_keyword_redeclaration (id, 0);

      if (PD_ACTIVE (pd) && pd->kind == bk_keyword && !(pd->attributes & KW_WEAK))
        value = pd->symbol;
      else if (value == LEX_ID && PD_ACTIVE (pd) && !lookup_name (id) && !PASCAL_PENDING_DECLARATION (id))
        {
          /* lookup_name resolves built-in constants and types (because it's
             called from many places). Built-in interfaces are handled in
             module.c (only relevant there because interface names are in a
             diffrent scope than all other identifiers). Other built-ins
             (including built-in variables because some of them need special
             handling in get_builtin_variable after they are parsed as
             variables) are resolved here (only once during lexing, and never
             passed around). */
          switch (pd->kind)
          {
            case bk_none:
            case bk_interface:
              break;

            case bk_keyword:
            case bk_special_syntax:
              value = pd->symbol;
              break;

            case bk_var:
              value = LEX_BUILTIN_VARIABLE;
              break;

            case bk_routine:
              if (pd->signature[0] == '-')
                value = LEX_BUILTIN_PROCEDURE;
              else if (pd->signature[0] == '>')
                value = LEX_BUILTIN_PROCEDURE_WRITE;
              else if (pd->signature[1] == '#')
                value = LEX_BUILTIN_FUNCTION_VT;
              else
                value = LEX_BUILTIN_FUNCTION;
              break;

            default:
              assert (0);
          }
          if (value != LEX_ID && !pd->user_disabled < 0)
            chk_dialect_name (IDENTIFIER_NAME (id), pd->dialect);
        }
    }

  /* `+' and `-' have different precedence in BP than in Pascal.
     To handle this we have to use different tokens. */
  if (co->pascal_dialect & B_D_PASCAL)
    switch (value)
    {
      case '+': value = LEX_BPPLUS;  break;
      case '-': value = LEX_BPMINUS; break;
    }

  if (value == ';' && co->warn_semicolon)
    switch (last_token)
    {
      case p_then: warning ("`;' after `then'"); break;
      case p_else: warning ("`;' after `else'"); break;
      case p_do:   warning ("`;' after `do'");   break;
    }
  last_token = value;

  if (value == '=' && lex_const_equal == 0)
    {
      lex_const_equal = -1;
      value = LEX_CONST_EQUAL;
    }
  if (value == LEX_ASSIGN)
    lex_const_equal = -1;
  if (lex_const_equal >= 0)
    switch (value)
    {
      case '(': case '[': lex_const_equal++; break;
      case ')': case ']': lex_const_equal--; break;
    }

  /* Set the location and options here before doing possible read-ahead below. */
  yylloc.last_file = NewPos.SrcName;
  yylloc.last_line = NewPos.Line;
  yylloc.last_column = NewPos.Column;
  yylloc.option_id = lexer_options->counter;

  /* About the following cases, see internals.texi. */

  /* `attribute' can only be a keyword if followed by `('. */
  if (value == p_attribute && peek_token (1) != '(')
    value = LEX_ID;

  /* These can never be keywords if followed by `,', `:', `=' or `('.
     This is sufficient since in constant, type or variable declarations
     and export lists (where the conflicts are) only `,' and `:' and `='
     can follow. Exception: `operator ='. This is a problem (see
     internals.texi). (And `asmname (' which is resolved in the parser
     using new_identifier_limited_par.) */
  if ((value == p_asmname
       || value == p_constructor
       || value == p_destructor
       || value == p_external
       || value == p_implementation
       || value == p_import
       || value == p_initialization
       || value == p_operator
       || value == p_uses)
      && (peek_token (1) == ','
          || next_token == ':'
          || (next_token == '(' && value != p_asmname)
          || (next_token == '=' && value != p_operator)))
    value = LEX_ID;

  lexer_filename = input_filename;
  lexer_lineno = lineno;
  lexer_column = column;
  input_filename = compiler_filename;
  lineno = compiler_lineno;
  column = compiler_column;
  activate_options (compiler_options, 1);

#ifndef EGCS97
  resume_momentary (old_momentary);
#endif
  return value;
}
