/* grammar.y see license.txt for copyright and terms of use * parser for grammar files with new ast format */ /* C declarations */ %{ #include "grampar.h" // yylex, etc. #include "gramast.ast.gen.h"// grammar syntax AST definition #include "gramlex.h" // GrammarLexer #include "owner.h" // Owner #include // malloc, free #include // cout // enable debugging the parser #ifndef NDEBUG #define YYDEBUG 1 #endif // name of extra parameter to yylex #define YYLEX_PARAM parseParam // make it call my yylex #define yylex(lv, param) grampar_yylex(lv, param) // Bison calls yyerror(msg) on error; we need the extra // parameter too, so the macro shoehorns it in there #define yyerror(msg) grampar_yyerror(msg, YYPARSE_PARAM) // rename the externally-visible parsing routine to make it // specific to this instance, so multiple bison-generated // parsers can coexist #define yyparse grampar_yyparse // grab the parameter #define PARAM ((ParseParams*)parseParam) // return a locstring for 'str' with no location information #define noloc(str) \ new LocString(SL_UNKNOWN, /* unknown location */ \ PARAM->lexer.strtable.add(str)) // locstring for NULL, with no location #define nolocNULL() \ new LocString(SL_UNKNOWN, NULL) // return a locstring with same location info as something else // (passed as a pointer to a SourceLocation) #define sameloc(otherLoc, str) \ new LocString(otherLoc->loc, PARAM->lexer.strtable.add(str)) // interpret the word into an associativity kind specification AssocKind whichKind(LocString * /*owner*/ kind); %} /* ================== bison declarations =================== */ // don't use globals %pure_parser /* ===================== tokens ============================ */ /* tokens that have many lexical spellings */ %token TOK_INTEGER %token TOK_NAME %token TOK_STRING %token TOK_LIT_CODE /* punctuators */ %token TOK_LBRACE "{" %token TOK_RBRACE "}" %token TOK_COLON ":" %token TOK_SEMICOLON ";" %token TOK_ARROW "->" %token TOK_LPAREN "(" %token TOK_RPAREN ")" %token TOK_COMMA "," /* keywords */ %token TOK_TERMINALS "terminals" %token TOK_TOKEN "token" %token TOK_NONTERM "nonterm" %token TOK_FUN "fun" %token TOK_VERBATIM "verbatim" %token TOK_IMPL_VERBATIM "impl_verbatim" %token TOK_PRECEDENCE "precedence" %token TOK_OPTION "option" %token TOK_EXPECT "expect" %token TOK_CONTEXT_CLASS "context_class" %token TOK_SUBSETS "subsets" %token TOK_DELETE "delete" %token TOK_REPLACE "replace" %token TOK_FORBID_NEXT "forbid_next" // left, right, nonassoc: they're not keywords, since "left" and "right" // are common names for RHS elements; instead, we parse them as names // and interpret them after lexing /* ===================== types ============================ */ /* all pointers are owner pointers */ %union { int num; LocString *str; SourceLoc loc; ASTList *topFormList; TopForm *topForm; ASTList *termDecls; TermDecl *termDecl; ASTList *termTypes; TermType *termType; ASTList *precSpecs; ASTList *specFuncs; SpecFunc *specFunc; ASTList *stringList; ASTList *prodDecls; ProdDecl *prodDecl; ASTList *rhsList; RHSElt *rhsElt; } %type StartSymbol %type Type Action %type TopFormList %type TopForm ContextClass Verbatim Option Terminals Nonterminal %type TermDecls %type TerminalDecl %type TermTypes %type TermType %type Precedence PrecSpecs %type NameOrStringList %type NameOrString %type SpecFuncs %type SpecFunc %type FormalsOpt Formals Subsets %type Productions %type Production %type RHS %type RHSElt /* ===================== productions ======================= */ %% /* The actions in this file simply build an Abstract Syntax Tree (AST) * for later processing. */ /* start symbol */ /* yields: int (dummy value) */ StartSymbol: TopFormList { ((ParseParams*)parseParam)->treeTop = new GrammarAST($1); $$=0; } ; /* yields: ASTList */ TopFormList: /*empty*/ { $$ = new ASTList; } | TopFormList TopForm { ($$=$1)->append($2); } ; /* yields: TopForm */ TopForm: ContextClass { $$ = $1; } | Verbatim { $$ = $1; } | Option { $$ = $1; } | Terminals { $$ = $1; } | Nonterminal { $$ = $1; } ; /* yields: TopForm (always TF_context) */ ContextClass: "context_class" TOK_LIT_CODE ";" { $$ = new TF_context($2); } ; /* yields: TopForm (always TF_verbatim) */ Verbatim: "verbatim" TOK_LIT_CODE { $$ = new TF_verbatim(false, $2); } | "impl_verbatim" TOK_LIT_CODE { $$ = new TF_verbatim(true, $2); } ; /* yields: TopForm (always TF_option) */ /* options without specified values default to a value of 1 */ Option: "option" TOK_NAME ";" { $$ = new TF_option($2, 1); } | "option" TOK_NAME TOK_INTEGER ";" { $$ = new TF_option($2, $3); } ; /* ------ terminals ------ */ /* * the terminals are the grammar symbols that appear only on the RHS of * forms; they are the output of the lexer; the Terminals list declares * all of the terminals that will appear in the rules */ /* yields: TopForm (always TF_terminals) */ Terminals: "terminals" "{" TermDecls TermTypes Precedence "}" { $$ = new TF_terminals($3, $4, $5); } ; /* yields: ASTList */ TermDecls: /* empty */ { $$ = new ASTList; } | TermDecls TerminalDecl { ($$=$1)->append($2); } ; /* each terminal has an integer code which is the integer value the * lexer uses to represent that terminal. it is followed by a * canonical name, and an optional alias; the name/alias appears in * the forms, rather than the integer code itself */ /* yields: TermDecl */ TerminalDecl: TOK_INTEGER ":" TOK_NAME ";" { $$ = new TermDecl($1, $3, sameloc($3, "")); } | TOK_INTEGER ":" TOK_NAME TOK_STRING ";" { $$ = new TermDecl($1, $3, $4); } ; /* yields: LocString */ Type: TOK_LIT_CODE { $$ = $1; } | /* empty */ { $$ = nolocNULL(); } ; /* yields: ASTList */ TermTypes: /* empty */ { $$ = new ASTList; } | TermTypes TermType { ($$=$1)->append($2); } ; /* yields: TermType */ TermType: "token" Type TOK_NAME ";" { $$ = new TermType($3, $2, new ASTList); } | "token" Type TOK_NAME "{" SpecFuncs "}" { $$ = new TermType($3, $2, $5); } ; /* yields: ASTList */ Precedence: /* empty */ { $$ = new ASTList; } | "precedence" "{" PrecSpecs "}" { $$ = $3; } ; /* yields: ASTList */ PrecSpecs: /* empty */ { $$ = new ASTList; } | PrecSpecs TOK_NAME TOK_INTEGER NameOrStringList ";" { ($$=$1)->append(new PrecSpec(whichKind($2), $3, $4)); } ; /* yields: ASTList */ NameOrStringList: /* empty */ { $$ = new ASTList; } | NameOrStringList NameOrString { ($$=$1)->append($2); } ; /* yields: LocString */ NameOrString: TOK_NAME { $$ = $1; } | TOK_STRING { $$ = $1; } ; /* ------ specification functions ------ */ /* yields: ASTList */ SpecFuncs: /* empty */ { $$ = new ASTList; } | SpecFuncs SpecFunc { ($$=$1)->append($2); } ; /* yields: SpecFunc */ SpecFunc: TOK_FUN TOK_NAME "(" FormalsOpt ")" TOK_LIT_CODE { $$ = new SpecFunc($2, $4, $6); } ; /* yields: ASTList */ FormalsOpt: /* empty */ { $$ = new ASTList; } | Formals { $$ = $1; } ; /* yields: ASTList */ Formals: TOK_NAME { $$ = new ASTList($1); } | Formals "," TOK_NAME { ($$=$1)->append($3); } ; /* ------ nonterminals ------ */ /* * a nonterminal is a grammar symbol that appears on the LHS of forms; * the body of the Nonterminal declaration specifies the the RHS forms, * attribute info, etc. */ /* yields: TopForm (always TF_nonterm) */ Nonterminal: "nonterm" Type TOK_NAME Production { $$ = new TF_nonterm($3, $2, new ASTList, new ASTList($4), NULL); } | "nonterm" Type TOK_NAME "{" SpecFuncs Productions Subsets "}" { $$ = new TF_nonterm($3, $2, $5, $6, $7); } ; /* yields: ASTList */ Productions: /* empty */ { $$ = new ASTList; } | Productions Production { ($$=$1)->append($2); } ; /* yields: ProdDecl */ Production: "->" RHS Action { $$ = new ProdDecl($1, PDK_NEW, $2, $3); } | "replace" "->" RHS Action { $$ = new ProdDecl($2, PDK_REPLACE,$3, $4); } | "delete" "->" RHS ";" { $$ = new ProdDecl($2, PDK_DELETE, $3, nolocNULL()); } ; /* yields: LocString */ Action: TOK_LIT_CODE { $$ = $1; } | ";" { $$ = nolocNULL(); } ; /* yields: ASTList */ RHS: /* empty */ { $$ = new ASTList; } | RHS RHSElt { ($$=$1)->append($2); } ; /* * each element on the RHS of a form can have a tag, which appears before a * colon (':') if present; the tag is required if that symbol's attributes * are to be referenced anywhere in the actions or conditions for the form */ /* yields: RHSElt */ RHSElt: TOK_NAME { $$ = new RH_name(sameloc($1, ""), $1); } | TOK_NAME ":" TOK_NAME { $$ = new RH_name($1, $3); } | TOK_STRING { $$ = new RH_string(sameloc($1, ""), $1); } | TOK_NAME ":" TOK_STRING { $$ = new RH_string($1, $3); } | "precedence" "(" NameOrString ")" { $$ = new RH_prec($3); } | "forbid_next" "(" NameOrString ")" { $$ = new RH_forbid($3); } ; /* yields: ASTList */ Subsets: /*empty*/ { $$ = NULL; } | "subsets" Formals ";" { $$ = $2; } ; %% /* ------------------ extra C code ------------------ */ AssocKind whichKind(LocString * /*owner*/ kind) { // delete 'kind' however we exit Owner killer(kind); #define CHECK(syntax, value) \ if (kind->equals(syntax)) { \ return value; \ } CHECK("left", AK_LEFT); CHECK("right", AK_RIGHT); CHECK("nonassoc", AK_NONASSOC); CHECK("prec", AK_NEVERASSOC); CHECK("assoc_split", AK_SPLIT); #undef CHECK xbase(stringc << kind->locString() << ": invalid associativity kind: " << *kind); }