aboutsummaryrefslogtreecommitdiff
path: root/src/libexpr
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2004-01-30 15:21:42 +0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2004-01-30 15:21:42 +0000
commitc5baaafae69394082817ede9e6eb3910c4601a72 (patch)
treebfc2599717c4274e16ba02959617254873c0c007 /src/libexpr
parentabd1878b26200ba3fa75592637aa87e04f52100d (diff)
* Replaced the SDF parser by a substantially faster Bison/Flex
parser (roughly 80x faster). The absolutely latest version of Bison (1.875c) is required for reentrant GLR support, as well as a recent version of Flex (say, 2.5.31). Note that most Unix distributions ship with the prehistoric Flex 2.5.4, which doesn't support reentrancy.
Diffstat (limited to 'src/libexpr')
-rw-r--r--src/libexpr/Makefile.am18
-rw-r--r--src/libexpr/eval.cc2
-rw-r--r--src/libexpr/lexer.l78
-rw-r--r--src/libexpr/nix.sdf131
-rw-r--r--src/libexpr/parser.cc154
-rw-r--r--src/libexpr/parser.y128
6 files changed, 260 insertions, 251 deletions
diff --git a/src/libexpr/Makefile.am b/src/libexpr/Makefile.am
index a11dbbda6..66a3008ed 100644
--- a/src/libexpr/Makefile.am
+++ b/src/libexpr/Makefile.am
@@ -1,20 +1,22 @@
noinst_LIBRARIES = libexpr.a
libexpr_a_SOURCES = nixexpr.cc nixexpr.hh parser.cc parser.hh \
- eval.cc eval.hh primops.cc primops.hh nix.sdf
+ eval.cc eval.hh primops.cc primops.hh \
+ lexer-tab.c lexer-tab.h parser-tab.c parser-tab.h
AM_CXXFLAGS = \
-I.. -I../../externals/inst/include -I../libutil -I../libstore
+AM_CFLAGS = \
+ -I../../externals/inst/include
-# Parse table generation.
+# Parser generation.
-parser.o: parse-table.h
+parser-tab.c parser-tab.h: parser.y
+ ../grammartest/inst/bin/bison -v -o parser-tab.c parser.y -d
-parse-table.h: nix.tbl
- ../bin2c/bin2c nixParseTable < $< > $@ || (rm $@ && exit 1)
+lexer-tab.c lexer-tab.h: lexer.l
+ flex --outfile lexer-tab.c --header-file=lexer-tab.h lexer.l
-%.tbl: %.sdf
- ../../externals/inst/bin/sdf2table -s -i $< -o $@
-CLEANFILES = parse-table.h nix.tbl
+CLEANFILES =
diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc
index 0470deee9..77cab55d0 100644
--- a/src/libexpr/eval.cc
+++ b/src/libexpr/eval.cc
@@ -137,6 +137,8 @@ Expr evalExpr2(EvalState & state, Expr e)
/* Any encountered variables must be undeclared or primops. */
if (atMatch(m, e) >> "Var" >> s1) {
if (s1 == "null") return primNull(state);
+ if (s1 == "true") return ATmake("Bool(True)");
+ if (s1 == "false") return ATmake("Bool(False)");
return e;
}
diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l
new file mode 100644
index 000000000..705b31b41
--- /dev/null
+++ b/src/libexpr/lexer.l
@@ -0,0 +1,78 @@
+%option reentrant bison-bridge bison-locations
+%option noyywrap
+%option never-interactive
+
+
+%{
+#include <string.h>
+#include <aterm2.h>
+#include "parser-tab.h"
+
+static void initLoc(YYLTYPE * loc)
+{
+ loc->first_line = 1;
+ loc->first_column = 1;
+}
+
+static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
+{
+ while (len--) {
+ switch (*s++) {
+ case '\n':
+ ++loc->first_line;
+ loc->first_column = 1;
+ break;
+ default:
+ ++loc->first_column;
+ }
+ }
+}
+
+#define YY_USER_INIT initLoc(yylloc)
+#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng);
+
+%}
+
+
+ID [a-zA-Z\_][a-zA-Z0-9\_\']*
+INT [0-9]+
+STR \"[^\n\"]*\"
+PATH [a-zA-Z0-9\.\_\-\+]*(\/[a-zA-Z0-9\.\_\-\+]+)+
+URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']*
+
+
+%%
+
+
+if { return IF; }
+then { return THEN; }
+else { return ELSE; }
+assert { return ASSERT; }
+let { return LET; }
+rec { return REC; }
+
+\=\= { return EQ; }
+\!\= { return NEQ; }
+\&\& { return AND; }
+\|\| { return OR; }
+\-\> { return IMPL; }
+
+{ID} { yylval->t = ATmake("<str>", yytext); return ID; /* !!! alloc */ }
+{INT} { return INT; }
+{STR} { int len = strlen(yytext);
+ yytext[len - 1] = 0;
+ yylval->t = ATmake("<str>", yytext + 1);
+ yytext[len - 1] = '\"';
+ return STR; /* !!! alloc */
+ }
+{PATH} { yylval->t = ATmake("<str>", yytext); return PATH; /* !!! alloc */ }
+{URI} { yylval->t = ATmake("<str>", yytext); return URI; /* !!! alloc */ }
+
+[ \t\n]+ /* eat up whitespace */
+\#[^\n]* /* single-line comments */
+\/\*(.|\n)*\*\/ /* long comments */
+
+. return yytext[0];
+
+
+%%
diff --git a/src/libexpr/nix.sdf b/src/libexpr/nix.sdf
deleted file mode 100644
index 36fbef39c..000000000
--- a/src/libexpr/nix.sdf
+++ /dev/null
@@ -1,131 +0,0 @@
-definition
-
-module Main
-imports Fix
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% Top level syntax.
-
-module Fix
-imports Fix-Exprs Fix-Layout
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% Expressions.
-
-module Fix-Exprs
-imports Fix-Lexicals
-exports
- sorts Expr Formal Bind Binds ExprList
- context-free syntax
-
- Id -> Expr {cons("Var")}
-
- Int -> Expr {cons("Int")}
-
- Str -> Expr {cons("Str")}
-
- Uri -> Expr {cons("Uri")}
-
- Path -> Expr {cons("Path")}
-
- "(" Expr ")" -> Expr {bracket}
-
- Expr Expr -> Expr {cons("Call"), left}
-
- "{" {Formal ","}* "}" ":" Expr -> Expr {cons("Function")}
- Id -> Formal {cons("NoDefFormal")}
- Id "?" Expr -> Formal {cons("DefFormal")}
-
- "assert" Expr ";" Expr -> Expr {cons("Assert")}
-
- "rec" "{" Binds "}" -> Expr {cons("Rec")}
- "let" "{" Binds "}" -> Expr {cons("LetRec")}
- "{" Binds "}" -> Expr {cons("Attrs")}
-
- Bind* -> Binds
- Id "=" Expr ";" -> Bind {cons("Bind")}
-
- "[" ExprList "]" -> Expr {cons("List")}
- "" -> ExprList {cons("ExprNil")}
- Expr ExprList -> ExprList {cons("ExprCons")}
-
- Expr "." Id -> Expr {cons("Select")}
-
- "if" Expr "then" Expr "else" Expr -> Expr {cons("If")}
-
- Expr "==" Expr -> Expr {cons("OpEq"), non-assoc}
- Expr "!=" Expr -> Expr {cons("OpNEq"), non-assoc}
-
- "!" Expr -> Expr {cons("OpNot")}
- Expr "&&" Expr -> Expr {cons("OpAnd"), right}
- Expr "||" Expr -> Expr {cons("OpOr"), right}
- Expr "->" Expr -> Expr {cons("OpImpl"), right}
-
- context-free priorities
-
- Expr "." Id -> Expr
- > Expr ExprList -> ExprList
- > Expr Expr -> Expr
- > "!" Expr -> Expr
- > Expr "==" Expr -> Expr
- > Expr "!=" Expr -> Expr
- > Expr "&&" Expr -> Expr
- > Expr "||" Expr -> Expr
- > Expr "->" Expr -> Expr
- > "assert" Expr ";" Expr -> Expr
- > "{" {Formal ","}* "}" ":" Expr -> Expr
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% Lexical syntax.
-
-module Fix-Lexicals
-exports
- sorts Id Int Str Path PathComp Uri
- lexical syntax
- [a-zA-Z\_][a-zA-Z0-9\_\']* -> Id
- "rec" -> Id {reject}
- "let" -> Id {reject}
- "if" -> Id {reject}
- "then" -> Id {reject}
- "else" -> Id {reject}
- "assert" -> Id {reject}
-
- [0-9]+ -> Int
-
- "\"" ~[\n\"]* "\"" -> Str
-
- "." ("/" PathComp)+ -> Path
- ".." ("/" PathComp)+ -> Path
- ("/" PathComp)+ -> Path
- [a-zA-Z0-9\.\_\-\+]+ -> PathComp
-
- [a-zA-Z] [a-zA-Z0-9\+\-\.]* ":" [a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']* -> Uri
-
- lexical restrictions
- Id -/- [a-zA-Z0-9\_\']
- Int -/- [0-9]
- Path -/- [a-zA-Z0-9\.\_\-\+\/]
- Uri -/- [a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% Layout.
-
-module Fix-Layout
-exports
- sorts HashComment Asterisk Comment
- lexical syntax
- [\ \t\n] -> LAYOUT
- HashComment -> LAYOUT
- Comment -> LAYOUT
- "#" ~[\n]* -> HashComment
- "/*" ( ~[\*] | Asterisk )* "*/" -> Comment
- [\*] -> Asterisk
- lexical restrictions
- Asterisk -/- [\/]
- HashComment -/- ~[\n]
- context-free restrictions
- LAYOUT? -/- [\ \t\n]
diff --git a/src/libexpr/parser.cc b/src/libexpr/parser.cc
index 1e0ef0c45..167c34bd8 100644
--- a/src/libexpr/parser.cc
+++ b/src/libexpr/parser.cc
@@ -5,133 +5,63 @@
#include <fcntl.h>
#include <unistd.h>
-extern "C" {
-#include <sglr.h>
-#include <asfix2.h>
-}
-
#include "aterm.hh"
#include "parser.hh"
-#include "parse-table.h"
-/* Cleanup cleans up an imploded parse tree into an actual abstract
- syntax tree that we can evaluate. It removes quotes around
- strings, converts integer literals into actual integers, and
- absolutises paths relative to the directory containing the input
- file. */
-struct Cleanup : TermFun
+struct ParseData
{
+ Expr result;
string basePath;
+ string location;
+ string error;
+};
- virtual ATerm operator () (ATerm e)
- {
- checkInterrupt();
-
- ATMatcher m;
- string s;
-
- if (atMatch(m, e) >> "Str" >> s)
- return ATmake("Str(<str>)",
- string(s, 1, s.size() - 2).c_str());
-
- if (atMatch(m, e) >> "Path" >> s)
- return ATmake("Path(<str>)", absPath(s, basePath).c_str());
-
- if (atMatch(m, e) >> "Int" >> s) {
- istringstream s2(s);
- int n;
- s2 >> n;
- return ATmake("Int(<int>)", n);
- }
-
- if (atMatch(m, e) >> "Var" >> "true")
- return ATmake("Bool(True)");
-
- if (atMatch(m, e) >> "Var" >> "false")
- return ATmake("Bool(False)");
-
- if (atMatch(m, e) >> "ExprNil")
- return (ATerm) ATempty;
+extern "C" {
- ATerm e1;
- ATermList e2;
- if (atMatch(m, e) >> "ExprCons" >> e1 >> e2)
- return (ATerm) ATinsert(e2, e1);
+#include "parser-tab.h"
+#include "lexer-tab.h"
+
+ /* Callbacks for getting from C to C++. Due to a (small) bug in the
+ GLR code of Bison we cannot currently compile the parser as C++
+ code. */
+
+ void setParseResult(ParseData * data, ATerm t)
+ {
+ data->result = t;
+ }
- return e;
+ ATerm absParsedPath(ParseData * data, ATerm t)
+ {
+ return string2ATerm(absPath(aterm2String(t), data->basePath).c_str());
}
-};
+
+ void parseError(ParseData * data, char * error, int line, int column)
+ {
+ data->error = (format("%1%, at line %2%, column %3%, of %4%")
+ % error % line % column % data->location).str();
+ }
+
+ int yyparse(yyscan_t scanner, ParseData * data);
+}
static Expr parse(const char * text, const string & location,
const Path & basePath)
{
- /* Initialise the SDF libraries. */
- static bool initialised = false;
- static ATerm parseTable = 0;
- static language lang = 0;
-
- if (!initialised) {
- PT_initMEPTApi();
- PT_initAsFix2Api();
- SGinitParser(ATfalse);
-
- ATprotect(&parseTable);
- parseTable = ATreadFromBinaryString(
- (char *) nixParseTable, sizeof nixParseTable);
- if (!parseTable)
- throw Error(format("cannot construct parse table term"));
-
- ATprotect(&lang);
- lang = ATmake("Nix");
- if (!SGopenLanguageFromTerm("nix-parse", lang, parseTable))
- throw Error(format("cannot open language"));
-
- SG_STARTSYMBOL_ON();
- SG_OUTPUT_ON();
- SG_ASFIX2ME_ON();
- SG_AMBIGUITY_ERROR_ON();
- SG_FILTER_OFF();
-
- initialised = true;
- }
-
- /* Parse it. */
- ATerm result = SGparseString(lang, "Expr", (char *) text);
- if (!result)
- throw SysError(format("parse failed in `%1%'") % location);
- if (SGisParseError(result))
- throw Error(format("parse error in `%1%': %2%")
- % location % result);
-
- /* Implode it. */
- PT_ParseTree tree = PT_makeParseTreeFromTerm(result);
- if (!tree)
- throw Error(format("cannot create parse tree"));
+ yyscan_t scanner;
+ ParseData data;
+ data.basePath = basePath;
+ data.location = location;
+
+ yylex_init(&scanner);
+ yy_scan_string(text, scanner);
+ int res = yyparse(scanner, &data);
+ yylex_destroy(scanner);
- ATerm imploded = PT_implodeParseTree(tree,
- ATtrue,
- ATtrue,
- ATtrue,
- ATtrue,
- ATtrue,
- ATtrue,
- ATfalse,
- ATtrue,
- ATtrue,
- ATtrue,
- ATfalse);
- if (!imploded)
- throw Error(format("cannot implode parse tree"));
-
- printMsg(lvlVomit, format("imploded parse tree of `%1%': %2%")
- % location % imploded);
-
- /* Finally, clean it up. */
- Cleanup cleanup;
- cleanup.basePath = basePath;
- return bottomupRewrite(cleanup, imploded);
+ if (res) throw Error(data.error);
+
+ return data.result;
}
@@ -171,7 +101,7 @@ Expr parseExprFromFile(Path path)
readFull(fd, (unsigned char *) text, st.st_size);
text[st.st_size] = 0;
- return parse(text, path, dirOf(path));
+ return parse(text, "`" + path + "'", dirOf(path));
}
diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y
new file mode 100644
index 000000000..45e6a98e8
--- /dev/null
+++ b/src/libexpr/parser.y
@@ -0,0 +1,128 @@
+%glr-parser
+%pure-parser
+%locations
+%error-verbose
+%parse-param { yyscan_t scanner }
+%parse-param { void * data }
+%lex-param { yyscan_t scanner }
+
+%{
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <aterm2.h>
+
+#include "parser-tab.h"
+#include "lexer-tab.h"
+
+void setParseResult(void * data, ATerm t);
+void parseError(void * data, char * error, int line, int column);
+ATerm absParsedPath(void * data, ATerm t);
+
+void yyerror(YYLTYPE * loc, yyscan_t scanner, void * data, char * s)
+{
+ parseError(data, s, loc->first_line, loc->first_column);
+}
+
+%}
+
+%union {
+ ATerm t;
+ ATermList ts;
+}
+
+%type <t> start expr expr_function expr_assert expr_op
+%type <t> expr_app expr_select expr_simple bind formal
+%type <ts> binds expr_list formals
+%token <t> ID INT STR PATH URI
+%token IF THEN ELSE ASSERT LET REC EQ NEQ AND OR IMPL
+
+%nonassoc IMPL
+%left OR
+%left AND
+%nonassoc EQ NEQ
+%left NEG
+
+%%
+
+start: expr { setParseResult(data, $1); };
+
+expr: expr_function;
+
+expr_function
+ : '{' formals '}' ':' expr_function
+ { $$ = ATmake("Function(<term>, <term>)", $2, $5); }
+ | expr_assert
+ ;
+
+expr_assert
+ : ASSERT expr ';' expr_assert
+ { $$ = ATmake("Assert(<term>, <term>)", $2, $4); }
+ | expr_op
+ ;
+
+expr_op
+ : '!' expr_op %prec NEG { $$ = ATmake("OpNot(<term>)", $2); }
+ | expr_op EQ expr_op { $$ = ATmake("OpEq(<term>, <term>)", $1, $3); }
+ | expr_op NEQ expr_op { $$ = ATmake("OpNEq(<term>, <term>)", $1, $3); }
+ | expr_op AND expr_op { $$ = ATmake("OpAnd(<term>, <term>)", $1, $3); }
+ | expr_op OR expr_op { $$ = ATmake("OpOr(<term>, <term>)", $1, $3); }
+ | expr_op IMPL expr_op { $$ = ATmake("OpImpl(<term>, <term>)", $1, $3); }
+ | expr_app
+ ;
+
+expr_app
+ : expr_app expr_select
+ { $$ = ATmake("Call(<term>, <term>)", $1, $2); }
+ | expr_select { $$ = $1; }
+ ;
+
+expr_select
+ : expr_select '.' ID
+ { $$ = ATmake("Select(<term>, <term>)", $1, $3); }
+ | expr_simple { $$ = $1; }
+ ;
+
+expr_simple
+ : ID { $$ = ATmake("Var(<term>)", $1); }
+ | STR { $$ = ATmake("Str(<term>)", $1); }
+ | PATH { $$ = ATmake("Path(<term>)", absParsedPath(data, $1)); }
+ | URI { $$ = ATmake("Uri(<term>)", $1); }
+ | '(' expr ')' { $$ = $2; }
+ | LET '{' binds '}' { $$ = ATmake("LetRec(<term>)", $3); }
+ | REC '{' binds '}' { $$ = ATmake("Rec(<term>)", $3); }
+ | '{' binds '}' { $$ = ATmake("Attrs(<term>)", $2); }
+ | '[' expr_list ']' { $$ = ATmake("List(<term>)", $2); }
+ | IF expr THEN expr ELSE expr
+ { $$ = ATmake("If(<term>, <term>, <term>)", $2, $4, $6); }
+ ;
+
+binds
+ : binds bind { $$ = ATinsert($1, $2); }
+ | { $$ = ATempty; }
+ ;
+
+bind
+ : ID '=' expr ';'
+ { $$ = ATmake("Bind(<term>, <term>)", $1, $3); }
+ ;
+
+expr_list
+ : expr_select expr_list { $$ = ATinsert($2, $1); }
+ /* yes, this is right-recursive, but it doesn't matter since
+ otherwise we would need ATreverse which requires unbounded
+ stack space */
+ | { $$ = ATempty; }
+ ;
+
+formals
+ : formal ',' formals { $$ = ATinsert($3, $1); } /* idem - right recursive */
+ | formal { $$ = ATinsert(ATempty, $1); }
+ ;
+
+formal
+ : ID { $$ = ATmake("NoDefFormal(<term>)", $1); }
+ | ID '?' expr { $$ = ATmake("DefFormal(<term>, <term>)", $1, $3); }
+ ;
+
+%%