// lexerint.h            see license.txt for copyright and terms of use
// LexerInterface, the interface the GLR parser uses
// to access the lexer's token stream

#ifndef LEXERINT_H
#define LEXERINT_H

#include "useract.h"      // SemanticValue
#include "srcloc.h"       // SourceLoc
#include "str.h"          // string

// This 'interface' is a collection of variables describing
// the current token.  I don't use a bunch of pure-virtual
// functions because of the cost of calling them; everything
// here will be in the inner loop of the parser.
class LexerInterface {
public:     // types
  // This special constant is the initial value of 'sval'.  It is used
  // to heuristically detect when the user has failed to prime the
  // lexer by loading it with the first token.  The number itself
  // is arbitrary, but should not be 0 or any other value that is
  // likely to be used as a legitimate sval for EOF.
  enum { DEFAULT_UNPRIMED_SVAL = 0x34D9423E };

public:     // data
  // NOTE: All of these fields are *written* by the lexer, and
  // *read* by the parser.

  // token classification; this is what the parser will use to
  // make parsing decisions; this code must correspond to something
  // declared in the 'terminals' section of the grammar; when this
  // is 0, it is the final (end-of-file) token; the parser is allowed
  // to change this for its own purposes, and currently does so for
  // token reclassification
  int type;

  // semantic value; this is what will be passed to the reduction
  // actions when this token is on the right hand side of a rule
  SemanticValue sval;

  // source location of the token; this will only be used if the
  // parser has been compiled to automatically propagate it
  SourceLoc loc;

public:     // funcs
  LexerInterface()
    : type(0),
      sval((SemanticValue)DEFAULT_UNPRIMED_SVAL),
      loc(SL_UNKNOWN)
  {}
  virtual ~LexerInterface() {}


  // retrieve the next token; the lexer should respond by filling in
  // the above fields with new values, to describe the next token; the
  // lexer indicates end of file by putting 0 into 'type'; when the
  // LexerInterface object is first passed to the parser, the above
  // fields should already be set correctly (i.e. the parser will make
  // its first call to 'nextToken' *after* processing the first token)
  typedef void (*NextTokenFunc)(LexerInterface *);

  // get the function which we'll call to get the next token
  //
  // Why the two-step approach?  Virtual method calls are more
  // expensive than simple indirect function calls, and this happens
  // in the inner parsing loop.  If C++ had a way to explicitly cache
  // the result of a method lookup this wouldn't be necessary.
  virtual NextTokenFunc getTokenFunc() const=0;

  
  // The following functions are called to help create diagnostic
  // reports.  They should describe the current token (the one
  // which the above fields refer to) in more-or-less human-readable
  // terms.

  // describe the token; for tokens with multiple spellings (e.g.
  // identifiers), this should include the actual token spelling
  // if possible; note that if the token has been reclassified,
  // then the 'type' field above might have been changed by the
  // parser, in which case this function should ideally print
  // a description which takes the new type into account
  virtual string tokenDesc() const=0;

  // describe a token kind; this is different from tokenDesc(), since
  // it need not correspond to the token kind that was just yielded,
  // and hence any related lexeme data cannot be assumed to be
  // available; this is used during error diagnosis
  virtual string tokenKindDesc(int kind) const=0;
};

#endif // LEXERINT_H