diff options
-rw-r--r-- | flake.nix | 2 | ||||
-rw-r--r-- | meson.build | 10 | ||||
-rwxr-xr-x | meson/cleanup-install.bash | 50 | ||||
-rw-r--r-- | misc/pegtl.nix | 23 | ||||
-rw-r--r-- | package.nix | 6 | ||||
-rw-r--r-- | src/libexpr/eval.cc | 16 | ||||
-rw-r--r-- | src/libexpr/lexer.l | 302 | ||||
-rw-r--r-- | src/libexpr/meson.build | 58 | ||||
-rw-r--r-- | src/libexpr/parser.y | 503 | ||||
-rw-r--r-- | src/libexpr/parser/change_head.hh | 66 | ||||
-rw-r--r-- | src/libexpr/parser/grammar.hh | 707 | ||||
-rw-r--r-- | src/libexpr/parser/parser.cc | 862 | ||||
-rw-r--r-- | src/libexpr/parser/state.hh (renamed from src/libexpr/parser-state.hh) | 113 | ||||
-rw-r--r-- | tests/functional/lang/parse-fail-eof-pos.err.exp | 2 | ||||
-rw-r--r-- | tests/functional/lang/parse-fail-undef-var-2.err.exp | 2 | ||||
-rw-r--r-- | tests/functional/lang/parse-fail-utf8.err.exp | 2 |
16 files changed, 1722 insertions, 1002 deletions
@@ -195,6 +195,8 @@ busybox-sandbox-shell = final.busybox-sandbox-shell or final.default-busybox-sandbox-shell; }; + pegtl = final.callPackage ./misc/pegtl.nix { }; + # Export the patched version of boehmgc that Lix uses into the overlay # for consumers of this flake. boehmgc-nix = final.nix.boehmgc-nix; diff --git a/meson.build b/meson.build index e6151e0a2..0cb2030e7 100644 --- a/meson.build +++ b/meson.build @@ -287,6 +287,14 @@ gtest = [ toml11 = dependency('toml11', version : '>=3.7.0', required : true, method : 'cmake') +pegtl = dependency( + 'pegtl', + version : '>=3.2.7', + required : true, + method : 'cmake', + modules : [ 'taocpp::pegtl' ], +) + nlohmann_json = dependency('nlohmann_json', required : true) # lix-doc is a Rust project provided via buildInputs and unfortunately doesn't have any way to be detected. @@ -335,8 +343,6 @@ endif # that busybox sh won't run busybox applets as builtins (which would break our sandbox). lsof = find_program('lsof', native : true) -bison = find_program('bison', native : true) -flex = find_program('flex', native : true) # This is how Nix does generated headers... # other instances of header generation use a very similar command. diff --git a/meson/cleanup-install.bash b/meson/cleanup-install.bash deleted file mode 100755 index 928edc74a..000000000 --- a/meson/cleanup-install.bash +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash -# Meson will call this with an absolute path to Bash. -# The shebang is just for convenience. - -# The parser and lexer tab are generated via custom Meson targets in src/libexpr/meson.build, -# but Meson doesn't support marking only part of a target for install. The generation creates -# both headers (parser-tab.hh, lexer-tab.hh) and source files (parser-tab.cc, lexer-tab.cc), -# and we definitely want the former installed, but not the latter. This script is added to -# Meson's install steps to correct this, as the logic for it is just complex enough to -# warrant separate and careful handling, because both Meson's configured include directory -# may or may not be an absolute path, and DESTDIR may or may not be set at all, but can't be -# manipulated in Meson logic. - -set -euo pipefail - -echo "cleanup-install: removing Meson-placed C++ sources from dest includedir" - -if [[ "${1/--help/}" != "$1" ]]; then - echo "cleanup-install: this script should only be called from the Meson build system" - exit 1 -fi - -# Ensure the includedir was passed as the first argument -# (set -u will make this fail otherwise). -includedir="$1" -# And then ensure that first argument is a directory that exists. -if ! [[ -d "$1" ]]; then - echo "cleanup-install: this script should only be called from the Meson build system" - echo "argv[1] (${1@Q}) is not a directory" - exit 2 -fi - -# If DESTDIR environment variable is set, prepend it to the include dir. -# Unfortunately, we cannot do this on the Meson side. We do have an environment variable -# `MESON_INSTALL_DESTDIR_PREFIX`, but that will not refer to the include directory if -# includedir has been set separately, which Lix's split-output derivation does. -# We also cannot simply do an inline bash conditional like "${DESTDIR:=}" or similar, -# because we need to specifically *join* DESTDIR and includedir with a slash, and *not* -# have a slash if DESTDIR isn't set at all, since $includedir could be a relative directory. -# Finally, DESTDIR is only available to us as an environment variable in these install scripts, -# not in Meson logic. -# Therefore, our best option is to have Meson pass this script the configured includedir, -# and perform this dance with it and $DESTDIR. -if [[ -n "${DESTDIR:-}" ]]; then - includedir="$DESTDIR/$includedir" -fi - -# Intentionally not using -f. -# If these files don't exist then our assumptions have been violated and we should fail. -rm -v "$includedir/lix/libexpr/parser-tab.cc" "$includedir/lix/libexpr/lexer-tab.cc" diff --git a/misc/pegtl.nix b/misc/pegtl.nix new file mode 100644 index 000000000..3fd999d9d --- /dev/null +++ b/misc/pegtl.nix @@ -0,0 +1,23 @@ +{ + stdenv, + cmake, + ninja, + fetchFromGitHub, +}: + +stdenv.mkDerivation { + pname = "pegtl"; + version = "3.2.7"; + + src = fetchFromGitHub { + repo = "PEGTL"; + owner = "taocpp"; + rev = "refs/tags/3.2.7"; + hash = "sha256-IV5YNGE4EWVrmg2Sia/rcU8jCuiBynQGJM6n3DCWTQU="; + }; + + nativeBuildInputs = [ + cmake + ninja + ]; +} diff --git a/package.nix b/package.nix index 988379618..0f194796f 100644 --- a/package.nix +++ b/package.nix @@ -10,7 +10,6 @@ boehmgc-nix ? __forDefaults.boehmgc-nix, boehmgc, nlohmann_json, - bison, build-release-notes ? __forDefaults.build-release-notes, boost, brotli, @@ -20,7 +19,6 @@ doxygen, editline-lix ? __forDefaults.editline-lix, editline, - flex, git, gtest, jq, @@ -36,6 +34,7 @@ meson, ninja, openssl, + pegtl, pkg-config, python3, rapidcheck, @@ -210,8 +209,6 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ - bison - flex python3 meson ninja @@ -250,6 +247,7 @@ stdenv.mkDerivation (finalAttrs: { libsodium toml11 lix-doc + pegtl ] ++ lib.optionals hostPlatform.isLinux [ libseccomp diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index afee89420..a6a64a43c 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -18,7 +18,6 @@ #include "gc-small-vector.hh" #include "fetch-to-store.hh" #include "flake/flakeref.hh" -#include "parser-tab.hh" #include <algorithm> #include <chrono> @@ -2958,21 +2957,6 @@ std::optional<std::string> EvalState::resolveSearchPathPath(const SearchPath::Pa } -Expr * EvalState::parse( - char * text, - size_t length, - Pos::Origin origin, - const SourcePath & basePath, - std::shared_ptr<StaticEnv> & staticEnv) -{ - auto result = parseExprFromBuf(text, length, origin, basePath, symbols, positions, exprSymbols); - - result->bindVars(*this, staticEnv); - - return result; -} - - std::string ExternalValueBase::coerceToString(EvalState & state, const PosIdx & pos, NixStringContext & context, bool copyMore, bool copyToStore) const { state.error<TypeError>( diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l deleted file mode 100644 index 5bc815f00..000000000 --- a/src/libexpr/lexer.l +++ /dev/null @@ -1,302 +0,0 @@ -%option reentrant bison-bridge bison-locations -%option align -%option noyywrap -%option never-interactive -%option stack -%option nodefault -%option nounput noyy_top_state - - -%s DEFAULT -%x STRING -%x IND_STRING -%x INPATH -%x INPATH_SLASH -%x PATH_START - - -%{ -#ifdef __clang__ -#pragma clang diagnostic ignored "-Wunneeded-internal-declaration" -#endif - -// yacc generates code that uses unannotated fallthrough. -#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" -#ifdef __clang__ -#pragma clang diagnostic ignored "-Wimplicit-fallthrough" -#endif - -#include "nixexpr.hh" -#include "parser-tab.hh" -#include "strings.hh" - -using namespace nix; - -#define THROW(...) \ - do { \ - state->error.reset(new auto(__VA_ARGS__)); \ - return YYerror; \ - } while (0) - -namespace nix { - -#define CUR_POS state->at(*yylloc) - -static void initLoc(YYLTYPE * loc) -{ - loc->first_line = loc->last_line = 0; - loc->first_column = loc->last_column = 0; -} - -static void adjustLoc(YYLTYPE * loc, const char * s, size_t len) -{ - loc->stash(); - - loc->first_column = loc->last_column; - loc->last_column += len; -} - - -// we make use of the fact that the parser receives a private copy of the input -// string and can munge around in it. -static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length) -{ - char * result = s; - char * t = s; - char c; - // the input string is terminated with *two* NULs, so we can safely take - // *one* character after the one being checked against. - while ((c = *s++)) { - if (c == '\\') { - c = *s++; - if (c == 'n') *t = '\n'; - else if (c == 'r') *t = '\r'; - else if (c == 't') *t = '\t'; - else *t = c; - } - else if (c == '\r') { - /* Normalise CR and CR/LF into LF. */ - *t = '\n'; - if (*s == '\n') s++; /* cr/lf */ - } - else *t = c; - t++; - } - return {result, size_t(t - result)}; -} - - -} - -#define YY_USER_INIT initLoc(yylloc) -#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng); - -#define PUSH_STATE(state) yy_push_state(state, yyscanner) -#define POP_STATE() yy_pop_state(yyscanner) - -%} - - -ANY .|\n -ID [a-zA-Z\_][a-zA-Z0-9\_\'\-]* -INT [0-9]+ -FLOAT (([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)? -PATH_CHAR [a-zA-Z0-9\.\_\-\+] -PATH {PATH_CHAR}*(\/{PATH_CHAR}+)+\/? -PATH_SEG {PATH_CHAR}*\/ -HPATH \~(\/{PATH_CHAR}+)+\/? -HPATH_START \~\/ -SPATH \<{PATH_CHAR}+(\/{PATH_CHAR}+)*\> -URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+ - - -%% - - -if { return IF; } -then { return THEN; } -else { return ELSE; } -assert { return ASSERT; } -with { return WITH; } -let { return LET; } -in { return IN; } -rec { return REC; } -inherit { return INHERIT; } -or { return OR_KW; } -\.\.\. { return ELLIPSIS; } - -\=\= { return EQ; } -\!\= { return NEQ; } -\<\= { return LEQ; } -\>\= { return GEQ; } -\&\& { return AND; } -\|\| { return OR; } -\-\> { return IMPL; } -\/\/ { return UPDATE; } -\+\+ { return CONCAT; } - -{ID} { yylval->id = {yytext, (size_t) yyleng}; return ID; } -{INT} { errno = 0; - std::optional<int64_t> numMay = string2Int<int64_t>(yytext); - if (numMay.has_value()) { - yylval->n = *numMay; - } else { - THROW(ParseError(ErrorInfo{ - .msg = HintFmt("invalid integer '%1%'", yytext), - .pos = state->positions[CUR_POS], - })); - } - return INT; - } -{FLOAT} { errno = 0; - yylval->nf = strtod(yytext, 0); - if (errno != 0) - THROW(ParseError(ErrorInfo{ - .msg = HintFmt("invalid float '%1%'", yytext), - .pos = state->positions[CUR_POS], - })); - return FLOAT; - } - -\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; } - -\} { /* State INITIAL only exists at the bottom of the stack and is - used as a marker. DEFAULT replaces it everywhere else. - Popping when in INITIAL state causes an empty stack exception, - so don't */ - if (YYSTATE != INITIAL) - POP_STATE(); - return '}'; - } -\{ { PUSH_STATE(DEFAULT); return '{'; } - -\" { PUSH_STATE(STRING); return '"'; } -<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})*\$/\" | -<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})+ { - /* It is impossible to match strings ending with '$' with one - regex because trailing contexts are only valid at the end - of a rule. (A sane but undocumented limitation.) */ - yylval->str = unescapeStr(state->symbols, yytext, yyleng); - return STR; - } -<STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; } -<STRING>\" { POP_STATE(); return '"'; } -<STRING>\$|\\|\$\\ { - /* This can only occur when we reach EOF, otherwise the above - (...|\$[^\{\"\\]|\\.|\$\\.)+ would have triggered. - This is technically invalid, but we leave the problem to the - parser who fails with exact location. */ - return EOF; - } - -\'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; } -<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ { - yylval->str = {yytext, (size_t) yyleng, true}; - return IND_STR; - } -<IND_STRING>\'\'\$ | -<IND_STRING>\$ { - yylval->str = {"$", 1}; - return IND_STR; - } -<IND_STRING>\'\'\' { - yylval->str = {"''", 2}; - return IND_STR; - } -<IND_STRING>\'\'\\{ANY} { - yylval->str = unescapeStr(state->symbols, yytext + 2, yyleng - 2); - return IND_STR; - } -<IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; } -<IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; } -<IND_STRING>\' { - yylval->str = {"'", 1}; - return IND_STR; - } - -{PATH_SEG}\$\{ | -{HPATH_START}\$\{ { - PUSH_STATE(PATH_START); - yyless(0); - yylloc->unstash(); -} - -<PATH_START>{PATH_SEG} { - POP_STATE(); - PUSH_STATE(INPATH_SLASH); - yylval->path = {yytext, (size_t) yyleng}; - return PATH; -} - -<PATH_START>{HPATH_START} { - POP_STATE(); - PUSH_STATE(INPATH_SLASH); - yylval->path = {yytext, (size_t) yyleng}; - return HPATH; -} - -{PATH} { - if (yytext[yyleng-1] == '/') - PUSH_STATE(INPATH_SLASH); - else - PUSH_STATE(INPATH); - yylval->path = {yytext, (size_t) yyleng}; - return PATH; -} -{HPATH} { - if (yytext[yyleng-1] == '/') - PUSH_STATE(INPATH_SLASH); - else - PUSH_STATE(INPATH); - yylval->path = {yytext, (size_t) yyleng}; - return HPATH; -} - -<INPATH,INPATH_SLASH>\$\{ { - POP_STATE(); - PUSH_STATE(INPATH); - PUSH_STATE(DEFAULT); - return DOLLAR_CURLY; -} -<INPATH,INPATH_SLASH>{PATH}|{PATH_SEG}|{PATH_CHAR}+ { - POP_STATE(); - if (yytext[yyleng-1] == '/') - PUSH_STATE(INPATH_SLASH); - else - PUSH_STATE(INPATH); - yylval->str = {yytext, (size_t) yyleng}; - return STR; -} -<INPATH>{ANY} | -<INPATH><<EOF>> { - /* if we encounter a non-path character we inform the parser that the path has - ended with a PATH_END token and re-parse this character in the default - context (it may be ')', ';', or something of that sort) */ - POP_STATE(); - yyless(0); - yylloc->unstash(); - return PATH_END; -} - -<INPATH_SLASH>{ANY} | -<INPATH_SLASH><<EOF>> { - THROW(ParseError(ErrorInfo{ - .msg = HintFmt("path has a trailing slash"), - .pos = state->positions[CUR_POS], - })); -} - -{SPATH} { yylval->path = {yytext, (size_t) yyleng}; return SPATH; } -{URI} { yylval->uri = {yytext, (size_t) yyleng}; return URI; } - -[ \t\r\n]+ /* eat up whitespace */ -\#[^\r\n]* /* single-line comments */ -\/\*([^*]|\*+[^*/])*\*+\/ /* long comments */ - -{ANY} { - /* Don't return a negative number, as this will cause - Bison to stop parsing without an error. */ - return (unsigned char) yytext[0]; - } - -%% diff --git a/src/libexpr/meson.build b/src/libexpr/meson.build index 080fdb443..39493dadc 100644 --- a/src/libexpr/meson.build +++ b/src/libexpr/meson.build @@ -1,54 +1,3 @@ -parser_tab = custom_target( - input : 'parser.y', - output : [ - 'parser-tab.cc', - 'parser-tab.hh', - ], - command : [ - 'bison', - '-v', - '-o', - '@OUTPUT0@', - '@INPUT@', - '-d', - ], - # NOTE(Qyriad): Meson doesn't support installing only part of a custom target, so we add - # an install script below which removes parser-tab.cc. - install : true, - install_dir : includedir / 'lix/libexpr', -) - -lexer_tab = custom_target( - input : [ - 'lexer.l', - parser_tab, - ], - output : [ - 'lexer-tab.cc', - 'lexer-tab.hh', - ], - command : [ - 'flex', - '--outfile', - '@OUTPUT0@', - '--header-file=' + '@OUTPUT1@', - '@INPUT0@', - ], - # NOTE(Qyriad): Meson doesn't support installing only part of a custom target, so we add - # an install script below which removes lexer-tab.cc. - install : true, - install_dir : includedir / 'lix/libexpr', -) - -# TODO(Qyriad): When the parser and lexer are rewritten this should be removed. -# NOTE(Qyriad): We do this this way instead of an inline bash or rm command -# due to subtleties in Meson. Check the comments in cleanup-install.bash for details. -meson.add_install_script( - bash, - meson.project_source_root() / 'meson/cleanup-install.bash', - '@0@'.format(includedir), -) - libexpr_generated_headers = [ gen_header.process('primops/derivation.nix', preserve_path_from : meson.current_source_dir()), ] @@ -75,6 +24,7 @@ libexpr_sources = files( 'get-drvs.cc', 'json-to-value.cc', 'nixexpr.cc', + 'parser/parser.cc', 'paths.cc', 'primops.cc', 'print-ambiguous.cc', @@ -110,7 +60,9 @@ libexpr_headers = files( 'get-drvs.hh', 'json-to-value.hh', 'nixexpr.hh', - 'parser-state.hh', + 'parser/change_head.hh', + 'parser/grammar.hh', + 'parser/state.hh', 'pos-idx.hh', 'pos-table.hh', 'primops.hh', @@ -129,8 +81,6 @@ libexpr_headers = files( libexpr = library( 'lixexpr', libexpr_sources, - parser_tab, - lexer_tab, libexpr_generated_headers, dependencies : [ liblixutil, diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y deleted file mode 100644 index b825f2ed8..000000000 --- a/src/libexpr/parser.y +++ /dev/null @@ -1,503 +0,0 @@ -%glr-parser -%define api.pure -%locations -%define parse.error verbose -%defines -/* %no-lines */ -%parse-param { void * scanner } -%parse-param { nix::ParserState * state } -%lex-param { void * scanner } -%lex-param { nix::ParserState * state } -%expect 1 -%expect-rr 1 - -%code requires { - -#ifndef BISON_HEADER -#define BISON_HEADER - -#include <variant> - -#include "finally.hh" -#include "users.hh" - -#include "nixexpr.hh" -#include "eval.hh" -#include "eval-settings.hh" -#include "globals.hh" -#include "parser-state.hh" - -#define YYLTYPE ::nix::ParserLocation -#define YY_DECL int yylex \ - (YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParserState * state) - -namespace nix { - -Expr * parseExprFromBuf( - char * text, - size_t length, - Pos::Origin origin, - const SourcePath & basePath, - SymbolTable & symbols, - PosTable & positions, - const Expr::AstSymbols & astSymbols); - -} - -#endif - -} - -%{ - -#include "parser-tab.hh" -#include "lexer-tab.hh" - -YY_DECL; - -using namespace nix; - -#define CUR_POS state->at(*yylocp) - -// otherwise destructors cause compiler errors -#pragma GCC diagnostic ignored "-Wswitch-enum" - -#define THROW(err, ...) \ - do { \ - state->error.reset(new auto(err)); \ - [](auto... d) { (delete d, ...); }(__VA_ARGS__); \ - YYABORT; \ - } while (0) - -void yyerror(YYLTYPE * loc, yyscan_t scanner, ParserState * state, const char * error) -{ - if (std::string_view(error).starts_with("syntax error, unexpected end of file")) { - loc->first_column = loc->last_column; - loc->first_line = loc->last_line; - } - throw ParseError({ - .msg = HintFmt(error), - .pos = state->positions[state->at(*loc)] - }); -} - -template<typename T> -static std::unique_ptr<T> unp(T * e) -{ - return std::unique_ptr<T>(e); -} - -template<typename T = std::unique_ptr<nix::Expr>, typename... Args> -static std::vector<T> vec(Args && ... args) -{ - std::vector<T> result; - result.reserve(sizeof...(Args)); - (result.emplace_back(std::forward<Args>(args)), ...); - return result; -} - - -%} - -%union { - // !!! We're probably leaking stuff here. - nix::Expr * e; - nix::ExprList * list; - nix::ExprAttrs * attrs; - nix::Formals * formals; - nix::Formal * formal; - nix::NixInt n; - nix::NixFloat nf; - nix::StringToken id; // !!! -> Symbol - nix::StringToken path; - nix::StringToken uri; - nix::StringToken str; - std::vector<nix::AttrName> * attrNames; - std::vector<std::pair<nix::AttrName, nix::PosIdx>> * inheritAttrs; - std::vector<std::pair<nix::PosIdx, std::unique_ptr<nix::Expr>>> * string_parts; - std::vector<std::pair<nix::PosIdx, std::variant<std::unique_ptr<nix::Expr>, nix::StringToken>>> * ind_string_parts; -} - -%destructor { delete $$; } <e> -%destructor { delete $$; } <list> -%destructor { delete $$; } <attrs> -%destructor { delete $$; } <formals> -%destructor { delete $$; } <formal> -%destructor { delete $$; } <attrNames> -%destructor { delete $$; } <inheritAttrs> -%destructor { delete $$; } <string_parts> -%destructor { delete $$; } <ind_string_parts> - -%type <e> start -%type <e> expr expr_function expr_if expr_op -%type <e> expr_select expr_simple expr_app -%type <list> expr_list -%type <attrs> binds -%type <formals> formals -%type <formal> formal -%type <attrNames> attrpath -%type <inheritAttrs> attrs -%type <string_parts> string_parts_interpolated -%type <ind_string_parts> ind_string_parts -%type <e> path_start string_parts string_attr -%type <id> attr -%token <id> ID -%token <str> STR IND_STR -%token <n> INT -%token <nf> FLOAT -%token <path> PATH HPATH SPATH PATH_END -%token <uri> URI -%token IF THEN ELSE ASSERT WITH LET IN REC INHERIT EQ NEQ AND OR IMPL OR_KW -%token DOLLAR_CURLY /* == ${ */ -%token IND_STRING_OPEN IND_STRING_CLOSE -%token ELLIPSIS - -%right IMPL -%left OR -%left AND -%nonassoc EQ NEQ -%nonassoc '<' '>' LEQ GEQ -%right UPDATE -%left NOT -%left '+' '-' -%left '*' '/' -%right CONCAT -%nonassoc '?' -%nonassoc NEGATE - -%% - -start: expr { state->result = $1; $$ = 0; }; - -expr: expr_function; - -expr_function - : ID ':' expr_function - { $$ = new ExprLambda(CUR_POS, state->symbols.create($1), nullptr, unp($3)); } - | '{' formals '}' ':' expr_function - { if (auto e = state->validateFormals($2)) THROW(*e); - $$ = new ExprLambda(CUR_POS, unp($2), unp($5)); - } - | '{' formals '}' '@' ID ':' expr_function - { - auto arg = state->symbols.create($5); - if (auto e = state->validateFormals($2, CUR_POS, arg)) THROW(*e, $2, $7); - $$ = new ExprLambda(CUR_POS, arg, unp($2), unp($7)); - } - | ID '@' '{' formals '}' ':' expr_function - { - auto arg = state->symbols.create($1); - if (auto e = state->validateFormals($4, CUR_POS, arg)) THROW(*e, $4, $7); - $$ = new ExprLambda(CUR_POS, arg, unp($4), unp($7)); - } - | ASSERT expr ';' expr_function - { $$ = new ExprAssert(CUR_POS, unp($2), unp($4)); } - | WITH expr ';' expr_function - { $$ = new ExprWith(CUR_POS, unp($2), unp($4)); } - | LET binds IN expr_function - { if (!$2->dynamicAttrs.empty()) - THROW(ParseError({ - .msg = HintFmt("dynamic attributes not allowed in let"), - .pos = state->positions[CUR_POS] - }), $2, $4); - $$ = new ExprLet(unp($2), unp($4)); - } - | expr_if - ; - -expr_if - : IF expr THEN expr ELSE expr { $$ = new ExprIf(CUR_POS, unp($2), unp($4), unp($6)); } - | expr_op - ; - -expr_op - : '!' expr_op %prec NOT { $$ = new ExprOpNot(unp($2)); } - | '-' expr_op %prec NEGATE { $$ = new ExprCall(CUR_POS, std::make_unique<ExprVar>(state->s.sub), vec(std::make_unique<ExprInt>(0), unp($2))); } - | expr_op EQ expr_op { $$ = new ExprOpEq(unp($1), unp($3)); } - | expr_op NEQ expr_op { $$ = new ExprOpNEq(unp($1), unp($3)); } - | expr_op '<' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($1, $3)); } - | expr_op LEQ expr_op { $$ = new ExprOpNot(std::make_unique<ExprCall>(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($3, $1))); } - | expr_op '>' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($3, $1)); } - | expr_op GEQ expr_op { $$ = new ExprOpNot(std::make_unique<ExprCall>(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($1, $3))); } - | expr_op AND expr_op { $$ = new ExprOpAnd(state->at(@2), unp($1), unp($3)); } - | expr_op OR expr_op { $$ = new ExprOpOr(state->at(@2), unp($1), unp($3)); } - | expr_op IMPL expr_op { $$ = new ExprOpImpl(state->at(@2), unp($1), unp($3)); } - | expr_op UPDATE expr_op { $$ = new ExprOpUpdate(state->at(@2), unp($1), unp($3)); } - | expr_op '?' attrpath { $$ = new ExprOpHasAttr(unp($1), std::move(*$3)); delete $3; } - | expr_op '+' expr_op - { $$ = new ExprConcatStrings(state->at(@2), false, vec<std::pair<PosIdx, std::unique_ptr<Expr>>>(std::pair(state->at(@1), unp($1)), std::pair(state->at(@3), unp($3)))); } - | expr_op '-' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.sub), vec($1, $3)); } - | expr_op '*' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.mul), vec($1, $3)); } - | expr_op '/' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.div), vec($1, $3)); } - | expr_op CONCAT expr_op { $$ = new ExprOpConcatLists(state->at(@2), unp($1), unp($3)); } - | expr_app - ; - -expr_app - : expr_app expr_select { - if (auto e2 = dynamic_cast<ExprCall *>($1)) { - e2->args.emplace_back($2); - $$ = $1; - } else - $$ = new ExprCall(CUR_POS, unp($1), vec(unp($2))); - } - | expr_select - ; - -expr_select - : expr_simple '.' attrpath - { $$ = new ExprSelect(CUR_POS, unp($1), std::move(*$3), nullptr); delete $3; } - | expr_simple '.' attrpath OR_KW expr_select - { $$ = new ExprSelect(CUR_POS, unp($1), std::move(*$3), unp($5)); delete $3; } - | /* Backwards compatibility: because Nixpkgs has a rarely used - function named ‘or’, allow stuff like ‘map or [...]’. */ - expr_simple OR_KW - { $$ = new ExprCall(CUR_POS, unp($1), vec(std::make_unique<ExprVar>(CUR_POS, state->s.or_))); } - | expr_simple - ; - -expr_simple - : ID { - std::string_view s = "__curPos"; - if ($1.l == s.size() && strncmp($1.p, s.data(), s.size()) == 0) - $$ = new ExprPos(CUR_POS); - else - $$ = new ExprVar(CUR_POS, state->symbols.create($1)); - } - | INT { $$ = new ExprInt($1); } - | FLOAT { $$ = new ExprFloat($1); } - | '"' string_parts '"' { $$ = $2; } - | IND_STRING_OPEN ind_string_parts IND_STRING_CLOSE { - $$ = state->stripIndentation(CUR_POS, std::move(*$2)).release(); - delete $2; - } - | path_start PATH_END - | path_start string_parts_interpolated PATH_END { - $2->emplace($2->begin(), state->at(@1), $1); - $$ = new ExprConcatStrings(CUR_POS, false, std::move(*$2)); - delete $2; - } - | SPATH { - std::string path($1.p + 1, $1.l - 2); - $$ = new ExprCall(CUR_POS, - std::make_unique<ExprVar>(state->s.findFile), - vec(std::make_unique<ExprVar>(state->s.nixPath), - std::make_unique<ExprString>(std::move(path)))); - } - | URI { - static bool noURLLiterals = experimentalFeatureSettings.isEnabled(Xp::NoUrlLiterals); - if (noURLLiterals) - THROW(ParseError({ - .msg = HintFmt("URL literals are disabled"), - .pos = state->positions[CUR_POS] - })); - $$ = new ExprString(std::string($1)); - } - | '(' expr ')' { $$ = $2; } - /* Let expressions `let {..., body = ...}' are just desugared - into `(rec {..., body = ...}).body'. */ - | LET '{' binds '}' - { $3->recursive = true; $$ = new ExprSelect(noPos, unp($3), state->s.body); } - | REC '{' binds '}' - { $3->recursive = true; $$ = $3; } - | '{' binds '}' - { $$ = $2; } - | '[' expr_list ']' { $$ = $2; } - ; - -string_parts - : STR { $$ = new ExprString(std::string($1)); } - | string_parts_interpolated - { $$ = new ExprConcatStrings(CUR_POS, true, std::move(*$1)); - delete $1; - } - | { $$ = new ExprString(""); } - ; - -string_parts_interpolated - : string_parts_interpolated STR - { $$ = $1; $1->emplace_back(state->at(@2), new ExprString(std::string($2))); } - | string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(state->at(@2), $3); } - | DOLLAR_CURLY expr '}' { $$ = new std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>>; $$->emplace_back(state->at(@1), $2); } - | STR DOLLAR_CURLY expr '}' { - $$ = new std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>>; - $$->emplace_back(state->at(@1), new ExprString(std::string($1))); - $$->emplace_back(state->at(@2), $3); - } - ; - -path_start - : PATH { - Path path(absPath({$1.p, $1.l}, state->basePath.path.abs())); - /* add back in the trailing '/' to the first segment */ - if ($1.p[$1.l-1] == '/' && $1.l > 1) - path += "/"; - $$ = new ExprPath(path); - } - | HPATH { - if (evalSettings.pureEval) { - THROW(Error( - "the path '%s' can not be resolved in pure mode", - std::string_view($1.p, $1.l) - )); - } - Path path(getHome() + std::string($1.p + 1, $1.l - 1)); - $$ = new ExprPath(path); - } - ; - -ind_string_parts - : ind_string_parts IND_STR { $$ = $1; $1->emplace_back(state->at(@2), $2); } - | ind_string_parts DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(state->at(@2), unp($3)); } - | { $$ = new std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>>; } - ; - -binds - : binds attrpath '=' expr ';' - { $$ = $1; - if (auto e = state->addAttr($$, std::move(*$2), unp($4), state->at(@2))) THROW(*e, $1, $2); - delete $2; - } - | binds INHERIT attrs ';' - { $$ = $1; - for (auto & [i, iPos] : *$3) { - if ($$->attrs.find(i.symbol) != $$->attrs.end()) - THROW(state->dupAttr(i.symbol, iPos, $$->attrs[i.symbol].pos), $1); - $$->attrs.emplace( - i.symbol, - ExprAttrs::AttrDef(std::make_unique<ExprVar>(iPos, i.symbol), iPos, ExprAttrs::AttrDef::Kind::Inherited)); - } - delete $3; - } - | binds INHERIT '(' expr ')' attrs ';' - { $$ = $1; - if (!$$->inheritFromExprs) - $$->inheritFromExprs = std::make_unique<std::vector<std::unique_ptr<Expr>>>(); - $$->inheritFromExprs->push_back(unp($4)); - for (auto & [i, iPos] : *$6) { - if ($$->attrs.find(i.symbol) != $$->attrs.end()) - THROW(state->dupAttr(i.symbol, iPos, $$->attrs[i.symbol].pos), $1); - auto from = std::make_unique<nix::ExprInheritFrom>(state->at(@4), $$->inheritFromExprs->size() - 1); - $$->attrs.emplace( - i.symbol, - ExprAttrs::AttrDef( - std::make_unique<ExprSelect>(iPos, std::move(from), i.symbol), - iPos, - ExprAttrs::AttrDef::Kind::InheritedFrom)); - } - delete $6; - } - | { $$ = new ExprAttrs(state->at(@0)); } - ; - -attrs - : attrs attr { $$ = $1; $1->emplace_back(AttrName(state->symbols.create($2)), state->at(@2)); } - | attrs string_attr - { $$ = $1; - ExprString * str = dynamic_cast<ExprString *>($2); - if (str) { - $$->emplace_back(AttrName(state->symbols.create(str->s)), state->at(@2)); - delete str; - } else - THROW(ParseError({ - .msg = HintFmt("dynamic attributes not allowed in inherit"), - .pos = state->positions[state->at(@2)] - }), $1, $2); - } - | { $$ = new std::vector<std::pair<AttrName, PosIdx>>; } - ; - -attrpath - : attrpath '.' attr { $$ = $1; $1->push_back(AttrName(state->symbols.create($3))); } - | attrpath '.' string_attr - { $$ = $1; - ExprString * str = dynamic_cast<ExprString *>($3); - if (str) { - $$->push_back(AttrName(state->symbols.create(str->s))); - delete str; - } else - $$->emplace_back(unp($3)); - } - | attr { $$ = new std::vector<AttrName>; $$->push_back(AttrName(state->symbols.create($1))); } - | string_attr - { $$ = new std::vector<AttrName>; - ExprString *str = dynamic_cast<ExprString *>($1); - if (str) { - $$->push_back(AttrName(state->symbols.create(str->s))); - delete str; - } else - $$->emplace_back(unp($1)); - } - ; - -attr - : ID - | OR_KW { $$ = {"or", 2}; } - ; - -string_attr - : '"' string_parts '"' { $$ = $2; } - | DOLLAR_CURLY expr '}' { $$ = $2; } - ; - -expr_list - : expr_list expr_select { $$ = $1; $1->elems.emplace_back($2); /* !!! dangerous */ } - | { $$ = new ExprList; } - ; - -formals - : formal ',' formals - { $$ = $3; $$->formals.emplace_back(std::move(*$1)); delete $1; } - | formal - { $$ = new Formals; $$->formals.emplace_back(std::move(*$1)); $$->ellipsis = false; delete $1; } - | - { $$ = new Formals; $$->ellipsis = false; } - | ELLIPSIS - { $$ = new Formals; $$->ellipsis = true; } - ; - -formal - : ID { $$ = new Formal{CUR_POS, state->symbols.create($1), nullptr}; } - | ID '?' expr { $$ = new Formal{CUR_POS, state->symbols.create($1), unp($3)}; } - ; - -%% - -#include "eval.hh" - - -namespace nix { - -Expr * parseExprFromBuf( - char * text, - size_t length, - Pos::Origin origin, - const SourcePath & basePath, - SymbolTable & symbols, - PosTable & positions, - const Expr::AstSymbols & astSymbols) -{ - yyscan_t scanner; - ParserState state { - .symbols = symbols, - .positions = positions, - .basePath = basePath, - .origin = positions.addOrigin(origin, length), - .s = astSymbols, - }; - - yylex_init(&scanner); - Finally _destroy([&] { yylex_destroy(scanner); }); - - yy_scan_buffer(text, length, scanner); - yyparse(scanner, &state); - if (state.error) { - delete state.result; - throw *state.error; - } - - return state.result; -} - - -} diff --git a/src/libexpr/parser/change_head.hh b/src/libexpr/parser/change_head.hh new file mode 100644 index 000000000..aab315553 --- /dev/null +++ b/src/libexpr/parser/change_head.hh @@ -0,0 +1,66 @@ +#pragma once +///@file + +#include <tao/pegtl.hpp> + +namespace nix::parser { + +// modified copy of change_state, as the manual suggest for more involved +// state manipulation. we want to change only the first state parameter, +// and we care about the *initial* position of a rule application (not the +// past-the-end position as pegtl change_state provides) +template<typename NewState> +struct change_head : tao::pegtl::maybe_nothing +{ + template< + typename Rule, + tao::pegtl::apply_mode A, + tao::pegtl::rewind_mode M, + template<typename...> class Action, + template<typename...> class Control, + typename ParseInput, + typename State, + typename... States + > + [[nodiscard]] static bool match(ParseInput & in, State && st, States &&... sts) + { + const auto begin = in.iterator(); + + if constexpr (std::is_constructible_v<NewState, State, States...>) { + NewState s(st, sts...); + if (tao::pegtl::match<Rule, A, M, Action, Control>(in, s, sts...)) { + if constexpr (A == tao::pegtl::apply_mode::action) { + _success<Action<Rule>>(0, begin, in, s, st, sts...); + } + return true; + } + return false; + } else if constexpr (std::is_default_constructible_v<NewState>) { + NewState s; + if (tao::pegtl::match<Rule, A, M, Action, Control>(in, s, sts...)) { + if constexpr (A == tao::pegtl::apply_mode::action) { + _success<Action<Rule>>(0, begin, in, s, st, sts...); + } + return true; + } + return false; + } else { + static_assert(decltype(sizeof(NewState))(), "unable to instantiate new state"); + } + } + + template<typename Target, typename ParseInput, typename... S> + static void _success(void *, auto & begin, ParseInput & in, S & ... sts) + { + const typename ParseInput::action_t at(begin, in); + Target::success(at, sts...); + } + + template<typename Target, typename... S> + static void _success(decltype(Target::success0(std::declval<S &>()...), 0), auto &, auto &, S & ... sts) + { + Target::success0(sts...); + } +}; + +} diff --git a/src/libexpr/parser/grammar.hh b/src/libexpr/parser/grammar.hh new file mode 100644 index 000000000..82df63bc5 --- /dev/null +++ b/src/libexpr/parser/grammar.hh @@ -0,0 +1,707 @@ +#pragma once +///@file + +#include "tao/pegtl.hpp" +#include <type_traits> +#include <variant> + +#include <boost/container/small_vector.hpp> + +// NOTE +// nix line endings are \n, \r\n, \r. the grammar does not use eol or +// eolf rules in favor of reproducing the old flex lexer as faithfully as +// possible, and deferring calculation of positions to downstream users. + +namespace nix::parser::grammar { + +using namespace tao::pegtl; +namespace p = tao::pegtl; + +// character classes +namespace c { + +struct path : sor< + ranges<'a', 'z', 'A', 'Z', '0', '9'>, + one<'.', '_', '-', '+'> +> {}; +struct path_sep : one<'/'> {}; + +struct id_first : ranges<'a', 'z', 'A', 'Z', '_'> {}; +struct id_rest : sor< + ranges<'a', 'z', 'A', 'Z', '0', '9'>, + one<'_', '\'', '-'> +> {}; + +struct uri_scheme_first : ranges<'a', 'z', 'A', 'Z'> {}; +struct uri_scheme_rest : sor< + ranges<'a', 'z', 'A', 'Z', '0', '9'>, + one<'+', '-', '.'> +> {}; +struct uri_sep : one<':'> {}; +struct uri_rest : sor< + ranges<'a', 'z', 'A', 'Z', '0', '9'>, + one<'%', '/', '?', ':', '@', '&', '=', '+', '$', ',', '-', '_', '.', '!', '~', '*', '\''> +> {}; + +} + +// "tokens". PEGs don't really care about tokens, we merely use them as a convenient +// way of writing down keywords and a couple complicated syntax rules. +namespace t { + +struct _extend_as_path : seq< + star<c::path>, + not_at<TAO_PEGTL_STRING("/*")>, + not_at<TAO_PEGTL_STRING("//")>, + c::path_sep, + sor<c::path, TAO_PEGTL_STRING("${")> +> {}; +struct _extend_as_uri : seq< + star<c::uri_scheme_rest>, + c::uri_sep, + c::uri_rest +> {}; + +// keywords might be extended to identifiers, paths, or uris. +// NOTE this assumes that keywords are a-zA-Z only, otherwise uri schemes would never +// match correctly. +// NOTE not a simple seq<...> because this would report incorrect positions for +// keywords used inside must<> if a prefix of the keyword matches. +template<typename S> +struct _keyword : sor< + seq< + S, + not_at<c::id_rest>, + not_at<_extend_as_path>, + not_at<_extend_as_uri> + >, + failure +> {}; + +struct kw_if : _keyword<TAO_PEGTL_STRING("if")> {}; +struct kw_then : _keyword<TAO_PEGTL_STRING("then")> {}; +struct kw_else : _keyword<TAO_PEGTL_STRING("else")> {}; +struct kw_assert : _keyword<TAO_PEGTL_STRING("assert")> {}; +struct kw_with : _keyword<TAO_PEGTL_STRING("with")> {}; +struct kw_let : _keyword<TAO_PEGTL_STRING("let")> {}; +struct kw_in : _keyword<TAO_PEGTL_STRING("in")> {}; +struct kw_rec : _keyword<TAO_PEGTL_STRING("rec")> {}; +struct kw_inherit : _keyword<TAO_PEGTL_STRING("inherit")> {}; +struct kw_or : _keyword<TAO_PEGTL_STRING("or")> {}; + +// `-` can be a unary prefix op, a binary infix op, or the first character +// of a path or -> (ex 1->1--1) +// `/` can be a path leader or an operator (ex a?a /a) +struct op_minus : seq<one<'-'>, not_at<one<'>'>>, not_at<_extend_as_path>> {}; +struct op_div : seq<one<'/'>, not_at<c::path>> {}; + +// match a rule, making sure we are not matching it where a keyword would match. +// using minus like this is a lot faster than flipping the order and using seq. +template<typename... Rules> +struct _not_at_any_keyword : minus< + seq<Rules...>, + sor< + TAO_PEGTL_STRING("inherit"), + TAO_PEGTL_STRING("assert"), + TAO_PEGTL_STRING("else"), + TAO_PEGTL_STRING("then"), + TAO_PEGTL_STRING("with"), + TAO_PEGTL_STRING("let"), + TAO_PEGTL_STRING("rec"), + TAO_PEGTL_STRING("if"), + TAO_PEGTL_STRING("in"), + TAO_PEGTL_STRING("or") + > +> {}; + +// identifiers are kind of horrid: +// +// - uri_scheme_first ⊂ id_first +// - uri_scheme_first ⊂ uri_scheme_rest ⊂ path +// - id_first ⊂ id_rest ∖ { ' } ⊂ path +// - id_first ∩ (path ∖ uri_scheme_first) = { _ } +// - uri_sep ∉ ⋃ { id_first, id_rest, uri_scheme_first, uri_scheme_rest, path } +// - path_sep ∉ ⋃ { id_first, id_rest, uri_scheme_first, uri_scheme_rest } +// +// and we want, without reading the input more than once, a string that +// matches (id_first id_rest*) and is not followed by any number of +// characters such that the extended string matches path or uri rules. +// +// since the first character must be either _ or a uri scheme character +// we can ignore path-like bits at the beginning. uri_sep cannot appear anywhere +// in an identifier, so it's only needed in lookahead checks at the uri-like +// prefix. likewise path_sep cannot appear anywhere in the idenfier, so it's +// only needed in lookahead checks in the path-like prefix. +// +// in total that gives us a decomposition of +// +// (uri-scheme-like? (?! continues-as-uri) | _) +// (path-segment-like? (?! continues-as-path)) +// id_rest* +struct identifier : _not_at_any_keyword< + // we don't use (at<id_rest>, ...) matches here because identifiers are + // a really hot path and rewinding as needed by at<> isn't entirely free. + sor< + seq< + c::uri_scheme_first, + star<ranges<'a', 'z', 'A', 'Z', '0', '9', '-'>>, + not_at<_extend_as_uri> + >, + one<'_'> + >, + star<sor<ranges<'a', 'z', 'A', 'Z', '0', '9'>, one<'_', '-'>>>, + not_at<_extend_as_path>, + star<c::id_rest> +> {}; + +// floats may extend ints, thus these rules are very similar. +struct integer : seq< + sor< + seq<range<'1', '9'>, star<digit>, not_at<one<'.'>>>, + seq<one<'0'>, not_at<one<'.'>, digit>, star<digit>> + >, + not_at<_extend_as_path> +> {}; + +struct floating : seq< + sor< + seq<range<'1', '9'>, star<digit>, one<'.'>, star<digit>>, + seq<opt<one<'0'>>, one<'.'>, plus<digit>> + >, + opt<one<'E', 'e'>, opt<one<'+', '-'>>, plus<digit>>, + not_at<_extend_as_path> +> {}; + +struct uri : seq< + c::uri_scheme_first, + star<c::uri_scheme_rest>, + c::uri_sep, + plus<c::uri_rest> +> {}; + +struct sep : sor< + plus<one<' ', '\t', '\r', '\n'>>, + seq<one<'#'>, star<not_one<'\r', '\n'>>>, + seq<string<'/', '*'>, until<string<'*', '/'>>> +> {}; + +} + + + +using seps = star<t::sep>; + + +// marker for semantic rules. not handling one of these in an action that cares about +// semantics is probably an error. +struct semantic {}; + + +struct expr; + +struct _string { + template<typename... Inner> + struct literal : semantic, seq<Inner...> {}; + struct cr_lf : semantic, seq<one<'\r'>, opt<one<'\n'>>> {}; + struct interpolation : semantic, seq< + p::string<'$', '{'>, seps, + must<expr>, seps, + must<one<'}'>> + > {}; + struct escape : semantic, must<any> {}; +}; +struct string : _string, seq< + one<'"'>, + star< + sor< + _string::literal<plus<not_one<'$', '"', '\\', '\r'>>>, + _string::cr_lf, + _string::interpolation, + _string::literal<one<'$'>, opt<one<'$'>>>, + seq<one<'\\'>, _string::escape> + > + >, + must<one<'"'>> +> {}; + +struct _ind_string { + template<bool Indented, typename... Inner> + struct literal : semantic, seq<Inner...> {}; + struct interpolation : semantic, seq< + p::string<'$', '{'>, seps, + must<expr>, seps, + must<one<'}'>> + > {}; + struct escape : semantic, must<any> {}; +}; +struct ind_string : _ind_string, seq< + TAO_PEGTL_STRING("''"), + opt<star<one<' '>>, one<'\n'>>, + star< + sor< + _ind_string::literal< + true, + plus< + sor< + not_one<'$', '\''>, + seq<one<'$'>, not_one<'{', '\''>>, + seq<one<'\''>, not_one<'\'', '$'>> + > + > + >, + _ind_string::interpolation, + _ind_string::literal<false, one<'$'>>, + _ind_string::literal<false, one<'\''>, not_at<one<'\''>>>, + seq<one<'\''>, _ind_string::literal<false, p::string<'\'', '\''>>>, + seq< + p::string<'\'', '\''>, + sor< + _ind_string::literal<false, one<'$'>>, + seq<one<'\\'>, _ind_string::escape> + > + > + > + >, + must<TAO_PEGTL_STRING("''")> +> {}; + +struct _path { + // legacy lexer rules. extra l_ to avoid reserved c++ identifiers. + struct _l_PATH : seq<star<c::path>, plus<c::path_sep, plus<c::path>>, opt<c::path_sep>> {}; + struct _l_PATH_SEG : seq<star<c::path>, c::path_sep> {}; + struct _l_HPATH : seq<one<'~'>, plus<c::path_sep, plus<c::path>>, opt<c::path_sep>> {}; + struct _l_HPATH_START : TAO_PEGTL_STRING("~/") {}; + struct _path_str : sor<_l_PATH, _l_PATH_SEG, plus<c::path>> {}; + // modern rules + template<typename... Inner> + struct literal : semantic, seq<Inner...> {}; + struct interpolation : semantic, seq< + p::string<'$', '{'>, seps, + must<expr>, seps, + must<one<'}'>> + > {}; + struct anchor : semantic, sor< + _l_PATH, + seq<_l_PATH_SEG, at<TAO_PEGTL_STRING("${")>> + > {}; + struct home_anchor : semantic, sor< + _l_HPATH, + seq<_l_HPATH_START, at<TAO_PEGTL_STRING("${")>> + > {}; + struct searched_path : semantic, list<plus<c::path>, c::path_sep> {}; + struct forbid_prefix_triple_slash : sor<not_at<c::path_sep>, failure> {}; + struct forbid_prefix_double_slash_no_interp : sor< + not_at<c::path_sep, star<c::path>, not_at<TAO_PEGTL_STRING("${")>>, + failure + > {}; + // legacy parser rules + struct _str_rest : seq< + must<forbid_prefix_double_slash_no_interp>, + opt<literal<_path_str>>, + must<forbid_prefix_triple_slash>, + star< + sor< + literal<_path_str>, + interpolation + > + > + > {}; +}; +struct path : _path, sor< + seq< + sor<_path::anchor, _path::home_anchor>, + _path::_str_rest + >, + seq<one<'<'>, _path::searched_path, one<'>'>> +> {}; + +struct _formal { + struct name : semantic, t::identifier {}; + struct default_value : semantic, must<expr> {}; +}; +struct formal : semantic, _formal, seq< + _formal::name, + opt<seps, one<'?'>, seps, _formal::default_value> +> {}; + +struct _formals { + struct ellipsis : semantic, p::ellipsis {}; +}; +struct formals : semantic, _formals, seq< + one<'{'>, seps, + // formals and attrsets share a two-token head sequence ('{' <id>). + // this rule unrolls the formals list a bit to provide better error messages than + // "expected '='" at the first ',' if formals are incorrect. + sor< + one<'}'>, + seq<_formals::ellipsis, seps, must<one<'}'>>>, + seq< + formal, seps, + if_then_else< + at<one<','>>, + seq< + star<one<','>, seps, formal, seps>, + opt<one<','>, seps, opt<_formals::ellipsis, seps>>, + must<one<'}'>> + >, + one<'}'> + > + > + > +> {}; + +struct _attr { + struct simple : semantic, sor<t::identifier, t::kw_or> {}; + struct string : semantic, seq<grammar::string> {}; + struct expr : semantic, seq< + TAO_PEGTL_STRING("${"), seps, + must<grammar::expr>, seps, + must<one<'}'>> + > {}; +}; +struct attr : _attr, sor< + _attr::simple, + _attr::string, + _attr::expr +> {}; + +struct attrpath : list<attr, one<'.'>, t::sep> {}; + +struct _inherit { + struct from : semantic, must<expr> {}; + struct attrs : list<attr, seps> {}; +}; +struct inherit : _inherit, seq< + t::kw_inherit, seps, + opt<one<'('>, seps, _inherit::from, seps, must<one<')'>>, seps>, + opt<_inherit::attrs, seps>, + must<one<';'>> +> {}; + +struct _binding { + struct path : semantic, attrpath {}; + struct equal : one<'='> {}; + struct value : semantic, must<expr> {}; +}; +struct binding : _binding, seq< + _binding::path, seps, + must<_binding::equal>, seps, + _binding::value, seps, + must<one<';'>> +> {}; + +struct bindings : opt<list<sor<inherit, binding>, seps>> {}; + +struct op { + enum class kind { + // NOTE non-associativity is *NOT* handled in the grammar structure. + // handling it in the grammar itself instead of in semantic actions + // slows down the parser significantly and makes the rules *much* + // harder to read. maybe this will be different at some point when + // ! does not sit between two binary precedence levels. + nonAssoc, + leftAssoc, + rightAssoc, + unary, + }; + template<typename Rule, unsigned Precedence, kind Kind = kind::leftAssoc> + struct _op : Rule { + static constexpr unsigned precedence = Precedence; + static constexpr op::kind kind = Kind; + }; + + struct unary_minus : _op<t::op_minus, 3, kind::unary> {}; + + // treating this like a unary postfix operator is sketchy, but that's + // the most reasonable way to implement the operator precedence set forth + // by the language way back. it'd be much better if `.` and `?` had the same + // precedence, but alas. + struct has_attr : _op<seq<one<'?'>, seps, must<attrpath>>, 4> {}; + + struct concat : _op<TAO_PEGTL_STRING("++"), 5, kind::rightAssoc> {}; + struct mul : _op<one<'*'>, 6> {}; + struct div : _op<t::op_div, 6> {}; + struct plus : _op<one<'+'>, 7> {}; + struct minus : _op<t::op_minus, 7> {}; + struct not_ : _op<one<'!'>, 8, kind::unary> {}; + struct update : _op<TAO_PEGTL_STRING("//"), 9, kind::rightAssoc> {}; + struct less_eq : _op<TAO_PEGTL_STRING("<="), 10, kind::nonAssoc> {}; + struct greater_eq : _op<TAO_PEGTL_STRING(">="), 10, kind::nonAssoc> {}; + struct less : _op<one<'<'>, 10, kind::nonAssoc> {}; + struct greater : _op<one<'>'>, 10, kind::nonAssoc> {}; + struct equals : _op<TAO_PEGTL_STRING("=="), 11, kind::nonAssoc> {}; + struct not_equals : _op<TAO_PEGTL_STRING("!="), 11, kind::nonAssoc> {}; + struct and_ : _op<TAO_PEGTL_STRING("&&"), 12> {}; + struct or_ : _op<TAO_PEGTL_STRING("||"), 13> {}; + struct implies : _op<TAO_PEGTL_STRING("->"), 14, kind::rightAssoc> {}; +}; + +struct _expr { + template<template<typename...> class OpenMod = seq, typename... Init> + struct _attrset : seq< + Init..., + OpenMod<one<'{'>>, seps, + bindings, seps, + must<one<'}'>> + > {}; + + struct select; + + struct id : semantic, t::identifier {}; + struct int_ : semantic, t::integer {}; + struct float_ : semantic, t::floating {}; + struct string : semantic, seq<grammar::string> {}; + struct ind_string : semantic, seq<grammar::ind_string> {}; + struct path : semantic, seq<grammar::path> {}; + struct uri : semantic, t::uri {}; + struct ancient_let : semantic, _attrset<must, t::kw_let, seps> {}; + struct rec_set : semantic, _attrset<must, t::kw_rec, seps> {}; + struct set : semantic, _attrset<> {}; + + struct _list { + struct entry : semantic, seq<select> {}; + }; + struct list : semantic, _list, seq< + one<'['>, seps, + opt<p::list<_list::entry, seps>, seps>, + must<one<']'>> + > {}; + + struct _simple : sor< + id, + int_, + float_, + string, + ind_string, + path, + uri, + seq<one<'('>, seps, must<expr>, seps, must<one<')'>>>, + ancient_let, + rec_set, + set, + list + > {}; + + struct _select { + struct head : _simple {}; + struct attr : semantic, seq<attrpath> {}; + struct attr_or : semantic, must<select> {}; + struct as_app_or : semantic, t::kw_or {}; + }; + struct _app { + struct first_arg : semantic, seq<select> {}; + struct another_arg : semantic, seq<select> {}; + // can be used to stash a position of the application head node + struct select_or_fn : seq<select> {}; + }; + + struct select : _select, seq< + _select::head, seps, + opt< + sor< + seq< + one<'.'>, seps, _select::attr, + opt<seps, t::kw_or, seps, _select::attr_or> + >, + _select::as_app_or + > + > + > {}; + + struct app : _app, seq< + _app::select_or_fn, + opt<seps, _app::first_arg, star<seps, _app::another_arg>> + > {}; + + template<typename Op> + struct operator_ : semantic, Op {}; + + struct unary : seq< + star<sor<operator_<op::not_>, operator_<op::unary_minus>>, seps>, + app + > {}; + + struct _binary_operator : sor< + operator_<op::implies>, + operator_<op::update>, + operator_<op::concat>, + operator_<op::plus>, + operator_<op::minus>, + operator_<op::mul>, + operator_<op::div>, + operator_<op::less_eq>, + operator_<op::greater_eq>, + operator_<op::less>, + operator_<op::greater>, + operator_<op::equals>, + operator_<op::not_equals>, + operator_<op::or_>, + operator_<op::and_> + > {}; + + struct _binop : seq< + unary, + star< + seps, + sor< + seq<_binary_operator, seps, must<unary>>, + operator_<op::has_attr> + > + > + > {}; + + struct _lambda { + struct arg : semantic, t::identifier {}; + }; + struct lambda : semantic, _lambda, sor< + seq< + _lambda::arg, seps, + sor< + seq<one<':'>, seps, must<expr>>, + seq<one<'@'>, seps, must<formals, seps, one<':'>, seps, expr>> + > + >, + seq< + formals, seps, + sor< + seq<one<':'>, seps, must<expr>>, + seq<one<'@'>, seps, must<_lambda::arg, seps, one<':'>, seps, expr>> + > + > + > {}; + + struct assert_ : semantic, seq< + t::kw_assert, seps, + must<expr>, seps, + must<one<';'>>, seps, + must<expr> + > {}; + struct with : semantic, seq< + t::kw_with, seps, + must<expr>, seps, + must<one<';'>>, seps, + must<expr> + > {}; + struct let : seq< + t::kw_let, seps, + not_at<one<'{'>>, // exclude ancient_let so we can must<kw_in> + bindings, seps, + must<t::kw_in>, seps, + must<expr> + > {}; + struct if_ : semantic, seq< + t::kw_if, seps, + must<expr>, seps, + must<t::kw_then>, seps, + must<expr>, seps, + must<t::kw_else>, seps, + must<expr> + > {}; +}; +struct expr : semantic, _expr, sor< + _expr::lambda, + _expr::assert_, + _expr::with, + _expr::let, + _expr::if_, + _expr::_binop +> {}; + +// legacy support: \0 terminates input if passed from flex to bison as a token +struct eof : sor<p::eof, one<0>> {}; + +struct root : must<seps, expr, seps, eof> {}; + + + +template<typename Rule> +struct nothing : p::nothing<Rule> { + static_assert(!std::is_base_of_v<semantic, Rule>); +}; + + + +template<typename Self, typename OpCtx, typename AttrPathT, typename ExprT> +struct operator_semantics { + struct has_attr : grammar::op::has_attr { + AttrPathT path; + }; + + struct OpEntry { + OpCtx ctx; + uint8_t prec; + grammar::op::kind assoc; + std::variant< + grammar::op::not_, + grammar::op::unary_minus, + grammar::op::implies, + grammar::op::or_, + grammar::op::and_, + grammar::op::equals, + grammar::op::not_equals, + grammar::op::less_eq, + grammar::op::greater_eq, + grammar::op::update, + grammar::op::concat, + grammar::op::less, + grammar::op::greater, + grammar::op::plus, + grammar::op::minus, + grammar::op::mul, + grammar::op::div, + has_attr + > op; + }; + + // statistics here are taken from nixpkgs commit de502c4d0ba96261e5de803e4d1d1925afd3e22f. + // over 99.9% of contexts in nixpkgs need at most 4 slots, ~85% need only 1 + boost::container::small_vector<ExprT, 4> exprs; + // over 99.9% of contexts in nixpkgs need at most 2 slots, ~85% need only 1 + boost::container::small_vector<OpEntry, 2> ops; + + // derived class is expected to define members: + // + // ExprT applyOp(OpCtx & pos, auto & op, auto &... args); + // [[noreturn]] static void badOperator(OpCtx & pos, auto &... args); + + void reduce(uint8_t toPrecedence, auto &... args) { + while (!ops.empty()) { + auto & [ctx, precedence, kind, op] = ops.back(); + // NOTE this relies on associativity not being mixed within a precedence level. + if ((precedence > toPrecedence) + || (kind != grammar::op::kind::leftAssoc && precedence == toPrecedence)) + break; + std::visit([&, ctx=std::move(ctx)] (auto & op) { + exprs.push_back(static_cast<Self &>(*this).applyOp(ctx, op, args...)); + }, op); + ops.pop_back(); + } + } + + ExprT popExpr() + { + auto r = std::move(exprs.back()); + exprs.pop_back(); + return r; + } + + void pushOp(OpCtx ctx, auto o, auto &... args) + { + if (o.kind != grammar::op::kind::unary) + reduce(o.precedence, args...); + if (!ops.empty() && o.kind == grammar::op::kind::nonAssoc) { + auto & [_pos, _prec, _kind, _o] = ops.back(); + if (_kind == o.kind && _prec == o.precedence) + Self::badOperator(ctx, args...); + } + ops.emplace_back(ctx, o.precedence, o.kind, std::move(o)); + } + + ExprT finish(auto &... args) + { + reduce(255, args...); + return popExpr(); + } +}; + +} diff --git a/src/libexpr/parser/parser.cc b/src/libexpr/parser/parser.cc new file mode 100644 index 000000000..850f1276e --- /dev/null +++ b/src/libexpr/parser/parser.cc @@ -0,0 +1,862 @@ +#include "attr-set.hh" +#include "error.hh" +#include "eval-settings.hh" +#include "eval.hh" +#include "finally.hh" +#include "nixexpr.hh" +#include "symbol-table.hh" +#include "users.hh" + +#include "change_head.hh" +#include "grammar.hh" +#include "state.hh" + +#include <charconv> +#include <clocale> +#include <memory> + +// flip this define when doing parser development to enable some g checks. +#if 0 +#include <tao/pegtl/contrib/analyze.hpp> +#define ANALYZE_GRAMMAR \ + ([] { \ + const std::size_t issues = tao::pegtl::analyze<grammar::root>(); \ + assert(issues == 0); \ + })() +#else +#define ANALYZE_GRAMMAR ((void) 0) +#endif + +namespace p = tao::pegtl; + +namespace nix::parser { +namespace { + +template<typename> +inline constexpr const char * error_message = nullptr; + +#define error_message_for(...) \ + template<> inline constexpr auto error_message<__VA_ARGS__> + +error_message_for(p::one<'{'>) = "expecting '{'"; +error_message_for(p::one<'}'>) = "expecting '}'"; +error_message_for(p::one<'"'>) = "expecting '\"'"; +error_message_for(p::one<';'>) = "expecting ';'"; +error_message_for(p::one<')'>) = "expecting ')'"; +error_message_for(p::one<'='>) = "expecting '='"; +error_message_for(p::one<']'>) = "expecting ']'"; +error_message_for(p::one<':'>) = "expecting ':'"; +error_message_for(p::string<'\'', '\''>) = "expecting \"''\""; +error_message_for(p::any) = "expecting any character"; +error_message_for(grammar::eof) = "expecting end of file"; +error_message_for(grammar::seps) = "expecting separators"; +error_message_for(grammar::path::forbid_prefix_triple_slash) = "too many slashes in path"; +error_message_for(grammar::path::forbid_prefix_double_slash_no_interp) = "path has a trailing slash"; +error_message_for(grammar::expr) = "expecting expression"; +error_message_for(grammar::expr::unary) = "expecting expression"; +error_message_for(grammar::binding::equal) = "expecting '='"; +error_message_for(grammar::expr::lambda::arg) = "expecting identifier"; +error_message_for(grammar::formals) = "expecting formals"; +error_message_for(grammar::attrpath) = "expecting attribute path"; +error_message_for(grammar::expr::select) = "expecting selection expression"; +error_message_for(grammar::t::kw_then) = "expecting 'then'"; +error_message_for(grammar::t::kw_else) = "expecting 'else'"; +error_message_for(grammar::t::kw_in) = "expecting 'in'"; + +struct SyntaxErrors +{ + template<typename Rule> + static constexpr auto message = error_message<Rule>; + + template<typename Rule> + static constexpr bool raise_on_failure = false; +}; + +template<typename Rule> +struct Control : p::must_if<SyntaxErrors>::control<Rule> +{ + template<typename ParseInput, typename... States> + [[noreturn]] static void raise(const ParseInput & in, States &&... st) + { + if (in.empty()) { + std::string expected; + if constexpr (constexpr auto msg = error_message<Rule>) + expected = fmt(", %s", msg); + throw p::parse_error("unexpected end of file" + expected, in); + } + p::must_if<SyntaxErrors>::control<Rule>::raise(in, st...); + } +}; + +struct ExprState + : grammar:: + operator_semantics<ExprState, PosIdx, AttrPath, std::pair<PosIdx, std::unique_ptr<Expr>>> +{ + std::unique_ptr<Expr> popExprOnly() { + return std::move(popExpr().second); + } + + template<typename Op, typename... Args> + std::unique_ptr<Expr> applyUnary(Args &&... args) { + return std::make_unique<Op>(popExprOnly(), std::forward<Args>(args)...); + } + + template<typename Op> + std::unique_ptr<Expr> applyBinary(PosIdx pos) { + auto right = popExprOnly(), left = popExprOnly(); + return std::make_unique<Op>(pos, std::move(left), std::move(right)); + } + + std::unique_ptr<Expr> call(PosIdx pos, Symbol fn, bool flip = false) + { + std::vector<std::unique_ptr<Expr>> args(2); + args[flip ? 0 : 1] = popExprOnly(); + args[flip ? 1 : 0] = popExprOnly(); + return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(fn), std::move(args)); + } + + std::unique_ptr<Expr> order(PosIdx pos, bool less, State & state) + { + return call(pos, state.s.lessThan, !less); + } + + std::unique_ptr<Expr> concatStrings(PosIdx pos) + { + std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>> args(2); + args[1] = popExpr(); + args[0] = popExpr(); + return std::make_unique<ExprConcatStrings>(pos, false, std::move(args)); + } + + std::unique_ptr<Expr> negate(PosIdx pos, State & state) + { + std::vector<std::unique_ptr<Expr>> args(2); + args[0] = std::make_unique<ExprInt>(0); + args[1] = popExprOnly(); + return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(state.s.sub), std::move(args)); + } + + std::pair<PosIdx, std::unique_ptr<Expr>> applyOp(PosIdx pos, auto & op, State & state) { + using Op = grammar::op; + + auto not_ = [] (auto e) { + return std::make_unique<ExprOpNot>(std::move(e)); + }; + + return { + pos, + (overloaded { + [&] (Op::implies) { return applyBinary<ExprOpImpl>(pos); }, + [&] (Op::or_) { return applyBinary<ExprOpOr>(pos); }, + [&] (Op::and_) { return applyBinary<ExprOpAnd>(pos); }, + [&] (Op::equals) { return applyBinary<ExprOpEq>(pos); }, + [&] (Op::not_equals) { return applyBinary<ExprOpNEq>(pos); }, + [&] (Op::less) { return order(pos, true, state); }, + [&] (Op::greater_eq) { return not_(order(pos, true, state)); }, + [&] (Op::greater) { return order(pos, false, state); }, + [&] (Op::less_eq) { return not_(order(pos, false, state)); }, + [&] (Op::update) { return applyBinary<ExprOpUpdate>(pos); }, + [&] (Op::not_) { return applyUnary<ExprOpNot>(); }, + [&] (Op::plus) { return concatStrings(pos); }, + [&] (Op::minus) { return call(pos, state.s.sub); }, + [&] (Op::mul) { return call(pos, state.s.mul); }, + [&] (Op::div) { return call(pos, state.s.div); }, + [&] (Op::concat) { return applyBinary<ExprOpConcatLists>(pos); }, + [&] (has_attr & a) { return applyUnary<ExprOpHasAttr>(std::move(a.path)); }, + [&] (Op::unary_minus) { return negate(pos, state); }, + })(op) + }; + } + + // always_inline is needed, otherwise pushOp slows down considerably + [[noreturn, gnu::always_inline]] + static void badOperator(PosIdx pos, State & state) + { + throw ParseError({ + .msg = HintFmt("syntax error, unexpected operator"), + .pos = state.positions[pos] + }); + } + + template<typename Expr, typename... Args> + Expr & pushExpr(PosIdx pos, Args && ... args) + { + auto p = std::make_unique<Expr>(std::forward<Args>(args)...); + auto & result = *p; + exprs.emplace_back(pos, std::move(p)); + return result; + } +}; + +struct SubexprState { +private: + ExprState * up; + +public: + explicit SubexprState(ExprState & up, auto &...) : up(&up) {} + operator ExprState &() { return *up; } + ExprState * operator->() { return up; } +}; + + + +template<typename Rule> +struct BuildAST : grammar::nothing<Rule> {}; + +struct LambdaState : SubexprState { + using SubexprState::SubexprState; + + Symbol arg; + std::unique_ptr<Formals> formals; +}; + +struct FormalsState : SubexprState { + using SubexprState::SubexprState; + + Formals formals{}; + Formal formal{}; +}; + +template<> struct BuildAST<grammar::formal::name> { + static void apply(const auto & in, FormalsState & s, State & ps) { + s.formal = { + .pos = ps.at(in), + .name = ps.symbols.create(in.string_view()), + }; + } +}; + +template<> struct BuildAST<grammar::formal> { + static void apply0(FormalsState & s, State &) { + s.formals.formals.emplace_back(std::move(s.formal)); + } +}; + +template<> struct BuildAST<grammar::formal::default_value> { + static void apply0(FormalsState & s, State & ps) { + s.formal.def = s->popExprOnly(); + } +}; + +template<> struct BuildAST<grammar::formals::ellipsis> { + static void apply0(FormalsState & s, State &) { + s.formals.ellipsis = true; + } +}; + +template<> struct BuildAST<grammar::formals> : change_head<FormalsState> { + static void success0(FormalsState & f, LambdaState & s, State &) { + s.formals = std::make_unique<Formals>(std::move(f.formals)); + } +}; + +struct AttrState : SubexprState { + using SubexprState::SubexprState; + + std::vector<AttrName> attrs; + + void pushAttr(auto && attr, PosIdx) { attrs.emplace_back(std::move(attr)); } +}; + +template<> struct BuildAST<grammar::attr::simple> { + static void apply(const auto & in, auto & s, State & ps) { + s.pushAttr(ps.symbols.create(in.string_view()), ps.at(in)); + } +}; + +template<> struct BuildAST<grammar::attr::string> { + static void apply(const auto & in, auto & s, State & ps) { + auto e = s->popExprOnly(); + if (auto str = dynamic_cast<ExprString *>(e.get())) + s.pushAttr(ps.symbols.create(str->s), ps.at(in)); + else + s.pushAttr(std::move(e), ps.at(in)); + } +}; + +template<> struct BuildAST<grammar::attr::expr> : BuildAST<grammar::attr::string> {}; + +struct BindingsState : SubexprState { + using SubexprState::SubexprState; + + ExprAttrs attrs; + AttrPath path; + std::unique_ptr<Expr> value; +}; + +struct InheritState : SubexprState { + using SubexprState::SubexprState; + + std::vector<std::pair<AttrName, PosIdx>> attrs; + std::unique_ptr<Expr> from; + PosIdx fromPos; + + void pushAttr(auto && attr, PosIdx pos) { attrs.emplace_back(std::move(attr), pos); } +}; + +template<> struct BuildAST<grammar::inherit::from> { + static void apply(const auto & in, InheritState & s, State & ps) { + s.from = s->popExprOnly(); + s.fromPos = ps.at(in); + } +}; + +template<> struct BuildAST<grammar::inherit> : change_head<InheritState> { + static void success0(InheritState & s, BindingsState & b, State & ps) { + auto & attrs = b.attrs.attrs; + // TODO this should not reuse generic attrpath rules. + for (auto & [i, iPos] : s.attrs) { + if (i.symbol) + continue; + if (auto str = dynamic_cast<ExprString *>(i.expr.get())) + i = AttrName(ps.symbols.create(str->s)); + else { + throw ParseError({ + .msg = HintFmt("dynamic attributes not allowed in inherit"), + .pos = ps.positions[iPos] + }); + } + } + if (auto fromE = std::move(s.from)) { + if (!b.attrs.inheritFromExprs) + b.attrs.inheritFromExprs = std::make_unique<std::vector<std::unique_ptr<Expr>>>(); + b.attrs.inheritFromExprs->push_back(std::move(fromE)); + for (auto & [i, iPos] : s.attrs) { + if (attrs.find(i.symbol) != attrs.end()) + ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos); + auto from = std::make_unique<ExprInheritFrom>(s.fromPos, b.attrs.inheritFromExprs->size() - 1); + attrs.emplace( + i.symbol, + ExprAttrs::AttrDef( + std::make_unique<ExprSelect>(iPos, std::move(from), i.symbol), + iPos, + ExprAttrs::AttrDef::Kind::InheritedFrom)); + } + } else { + for (auto & [i, iPos] : s.attrs) { + if (attrs.find(i.symbol) != attrs.end()) + ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos); + attrs.emplace( + i.symbol, + ExprAttrs::AttrDef( + std::make_unique<ExprVar>(iPos, i.symbol), + iPos, + ExprAttrs::AttrDef::Kind::Inherited)); + } + } + } +}; + +template<> struct BuildAST<grammar::binding::path> : change_head<AttrState> { + static void success0(AttrState & a, BindingsState & s, State & ps) { + s.path = std::move(a.attrs); + } +}; + +template<> struct BuildAST<grammar::binding::value> { + static void apply0(BindingsState & s, State & ps) { + s.value = s->popExprOnly(); + } +}; + +template<> struct BuildAST<grammar::binding> { + static void apply(const auto & in, BindingsState & s, State & ps) { + ps.addAttr(&s.attrs, std::move(s.path), std::move(s.value), ps.at(in)); + } +}; + +template<> struct BuildAST<grammar::expr::id> { + static void apply(const auto & in, ExprState & s, State & ps) { + if (in.string_view() == "__curPos") + s.pushExpr<ExprPos>(ps.at(in), ps.at(in)); + else + s.pushExpr<ExprVar>(ps.at(in), ps.at(in), ps.symbols.create(in.string_view())); + } +}; + +template<> struct BuildAST<grammar::expr::int_> { + static void apply(const auto & in, ExprState & s, State & ps) { + int64_t v; + if (std::from_chars(in.begin(), in.end(), v).ec != std::errc{}) { + throw ParseError({ + .msg = HintFmt("invalid integer '%1%'", in.string_view()), + .pos = ps.positions[ps.at(in)], + }); + } + s.pushExpr<ExprInt>(noPos, v); + } +}; + +template<> struct BuildAST<grammar::expr::float_> { + static void apply(const auto & in, ExprState & s, State & ps) { + // copy the input into a temporary string so we can call stod. + // can't use from_chars because libc++ (thus darwin) does not have it, + // and floats are not performance-sensitive anyway. if they were you'd + // be in much bigger trouble than this. + // + // we also get to do a locale-save dance because stod is locale-aware and + // something (a plugin?) may have called setlocale or uselocale. + static struct locale_hack { + locale_t posix; + locale_hack(): posix(newlocale(LC_ALL_MASK, "POSIX", 0)) + { + if (posix == 0) + throw SysError("could not get POSIX locale"); + } + } locale; + + auto tmp = in.string(); + double v = [&] { + auto oldLocale = uselocale(locale.posix); + Finally resetLocale([=] { uselocale(oldLocale); }); + try { + return std::stod(tmp); + } catch (...) { + throw ParseError({ + .msg = HintFmt("invalid float '%1%'", in.string_view()), + .pos = ps.positions[ps.at(in)], + }); + } + }(); + s.pushExpr<ExprFloat>(noPos, v); + } +}; + +struct StringState : SubexprState { + using SubexprState::SubexprState; + + std::string currentLiteral; + PosIdx currentPos; + std::vector<std::pair<nix::PosIdx, std::unique_ptr<Expr>>> parts; + + void append(PosIdx pos, std::string_view s) + { + if (currentLiteral.empty()) + currentPos = pos; + currentLiteral += s; + } + + // FIXME this truncates strings on NUL for compat with the old parser. ideally + // we should use the decomposition the g gives us instead of iterating over + // the entire string again. + static void unescapeStr(std::string & str) + { + char * s = str.data(); + char * t = s; + char c; + while ((c = *s++)) { + if (c == '\\') { + c = *s++; + if (c == 'n') *t = '\n'; + else if (c == 'r') *t = '\r'; + else if (c == 't') *t = '\t'; + else *t = c; + } + else if (c == '\r') { + /* Normalise CR and CR/LF into LF. */ + *t = '\n'; + if (*s == '\n') s++; /* cr/lf */ + } + else *t = c; + t++; + } + str.resize(t - str.data()); + } + + void endLiteral() + { + if (!currentLiteral.empty()) { + unescapeStr(currentLiteral); + parts.emplace_back(currentPos, std::make_unique<ExprString>(std::move(currentLiteral))); + } + } + + std::unique_ptr<Expr> finish() + { + if (parts.empty()) { + unescapeStr(currentLiteral); + return std::make_unique<ExprString>(std::move(currentLiteral)); + } else { + endLiteral(); + auto pos = parts[0].first; + return std::make_unique<ExprConcatStrings>(pos, true, std::move(parts)); + } + } +}; + +template<typename... Content> struct BuildAST<grammar::string::literal<Content...>> { + static void apply(const auto & in, StringState & s, State & ps) { + s.append(ps.at(in), in.string_view()); + } +}; + +template<> struct BuildAST<grammar::string::cr_lf> { + static void apply(const auto & in, StringState & s, State & ps) { + s.append(ps.at(in), in.string_view()); // FIXME compat with old parser + } +}; + +template<> struct BuildAST<grammar::string::interpolation> { + static void apply(const auto & in, StringState & s, State & ps) { + s.endLiteral(); + s.parts.emplace_back(ps.at(in), s->popExprOnly()); + } +}; + +template<> struct BuildAST<grammar::string::escape> { + static void apply(const auto & in, StringState & s, State & ps) { + s.append(ps.at(in), "\\"); // FIXME compat with old parser + s.append(ps.at(in), in.string_view()); + } +}; + +template<> struct BuildAST<grammar::string> : change_head<StringState> { + static void success0(StringState & s, ExprState & e, State &) { + e.exprs.emplace_back(noPos, s.finish()); + } +}; + +struct IndStringState : SubexprState { + using SubexprState::SubexprState; + + std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> parts; +}; + +template<bool Indented, typename... Content> +struct BuildAST<grammar::ind_string::literal<Indented, Content...>> { + static void apply(const auto & in, IndStringState & s, State & ps) { + s.parts.emplace_back(ps.at(in), StringToken{in.string_view(), Indented}); + } +}; + +template<> struct BuildAST<grammar::ind_string::interpolation> { + static void apply(const auto & in, IndStringState & s, State & ps) { + s.parts.emplace_back(ps.at(in), s->popExprOnly()); + } +}; + +template<> struct BuildAST<grammar::ind_string::escape> { + static void apply(const auto & in, IndStringState & s, State & ps) { + switch (*in.begin()) { + case 'n': s.parts.emplace_back(ps.at(in), StringToken{"\n"}); break; + case 'r': s.parts.emplace_back(ps.at(in), StringToken{"\r"}); break; + case 't': s.parts.emplace_back(ps.at(in), StringToken{"\t"}); break; + default: s.parts.emplace_back(ps.at(in), StringToken{in.string_view()}); break; + } + } +}; + +template<> struct BuildAST<grammar::ind_string> : change_head<IndStringState> { + static void success(const auto & in, IndStringState & s, ExprState & e, State & ps) { + e.exprs.emplace_back(noPos, ps.stripIndentation(ps.at(in), std::move(s.parts))); + } +}; + +template<typename... Content> struct BuildAST<grammar::path::literal<Content...>> { + static void apply(const auto & in, StringState & s, State & ps) { + s.append(ps.at(in), in.string_view()); + s.endLiteral(); + } +}; + +template<> struct BuildAST<grammar::path::interpolation> : BuildAST<grammar::string::interpolation> {}; + +template<> struct BuildAST<grammar::path::anchor> { + static void apply(const auto & in, StringState & s, State & ps) { + Path path(absPath(in.string(), ps.basePath.path.abs())); + /* add back in the trailing '/' to the first segment */ + if (in.string_view().ends_with('/') && in.size() > 1) + path += "/"; + s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path))); + } +}; + +template<> struct BuildAST<grammar::path::home_anchor> { + static void apply(const auto & in, StringState & s, State & ps) { + if (evalSettings.pureEval) + throw Error("the path '%s' can not be resolved in pure mode", in.string_view()); + Path path(getHome() + in.string_view().substr(1)); + s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path))); + } +}; + +template<> struct BuildAST<grammar::path::searched_path> { + static void apply(const auto & in, StringState & s, State & ps) { + std::vector<std::unique_ptr<Expr>> args{2}; + args[0] = std::make_unique<ExprVar>(ps.s.nixPath); + args[1] = std::make_unique<ExprString>(in.string()); + s.parts.emplace_back( + ps.at(in), + std::make_unique<ExprCall>( + ps.at(in), + std::make_unique<ExprVar>(ps.s.findFile), + std::move(args))); + } +}; + +template<> struct BuildAST<grammar::path> : change_head<StringState> { + template<typename E> + static void check_slash(PosIdx end, StringState & s, State & ps) { + auto e = dynamic_cast<E *>(s.parts.back().second.get()); + if (!e || !e->s.ends_with('/')) + return; + if (s.parts.size() > 1 || e->s != "/") + throw ParseError({ + .msg = HintFmt("path has a trailing slash"), + .pos = ps.positions[end], + }); + } + + static void success(const auto & in, StringState & s, ExprState & e, State & ps) { + s.endLiteral(); + check_slash<ExprPath>(ps.atEnd(in), s, ps); + check_slash<ExprString>(ps.atEnd(in), s, ps); + if (s.parts.size() == 1) { + e.exprs.emplace_back(noPos, std::move(s.parts.back().second)); + } else { + e.pushExpr<ExprConcatStrings>(ps.at(in), ps.at(in), false, std::move(s.parts)); + } + } +}; + +// strings and paths sare handled fully by the grammar-level rule for now +template<> struct BuildAST<grammar::expr::string> : p::maybe_nothing {}; +template<> struct BuildAST<grammar::expr::ind_string> : p::maybe_nothing {}; +template<> struct BuildAST<grammar::expr::path> : p::maybe_nothing {}; + +template<> struct BuildAST<grammar::expr::uri> { + static void apply(const auto & in, ExprState & s, State & ps) { + static bool noURLLiterals = experimentalFeatureSettings.isEnabled(Xp::NoUrlLiterals); + if (noURLLiterals) + throw ParseError({ + .msg = HintFmt("URL literals are disabled"), + .pos = ps.positions[ps.at(in)] + }); + s.pushExpr<ExprString>(ps.at(in), in.string()); + } +}; + +template<> struct BuildAST<grammar::expr::ancient_let> : change_head<BindingsState> { + static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { + b.attrs.pos = ps.at(in); + b.attrs.recursive = true; + s.pushExpr<ExprSelect>(b.attrs.pos, b.attrs.pos, std::make_unique<ExprAttrs>(std::move(b.attrs)), ps.s.body); + } +}; + +template<> struct BuildAST<grammar::expr::rec_set> : change_head<BindingsState> { + static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { + b.attrs.pos = ps.at(in); + b.attrs.recursive = true; + s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs)); + } +}; + +template<> struct BuildAST<grammar::expr::set> : change_head<BindingsState> { + static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { + b.attrs.pos = ps.at(in); + s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs)); + } +}; + +using ListState = std::vector<std::unique_ptr<Expr>>; + +template<> struct BuildAST<grammar::expr::list> : change_head<ListState> { + static void success(const auto & in, ListState & ls, ExprState & s, State & ps) { + auto e = std::make_unique<ExprList>(); + e->elems = std::move(ls); + s.exprs.emplace_back(ps.at(in), std::move(e)); + } +}; + +template<> struct BuildAST<grammar::expr::list::entry> : change_head<ExprState> { + static void success0(ExprState & e, ListState & s, State & ps) { + s.emplace_back(e.finish(ps).second); + } +}; + +struct SelectState : SubexprState { + using SubexprState::SubexprState; + + PosIdx pos; + ExprSelect * e = nullptr; +}; + +template<> struct BuildAST<grammar::expr::select::head> { + static void apply(const auto & in, SelectState & s, State & ps) { + s.pos = ps.at(in); + } +}; + +template<> struct BuildAST<grammar::expr::select::attr> : change_head<AttrState> { + static void success0(AttrState & a, SelectState & s, State &) { + s.e = &s->pushExpr<ExprSelect>(s.pos, s.pos, s->popExprOnly(), std::move(a.attrs), nullptr); + } +}; + +template<> struct BuildAST<grammar::expr::select::attr_or> { + static void apply0(SelectState & s, State &) { + s.e->def = s->popExprOnly(); + } +}; + +template<> struct BuildAST<grammar::expr::select::as_app_or> { + static void apply(const auto & in, SelectState & s, State & ps) { + std::vector<std::unique_ptr<Expr>> args(1); + args[0] = std::make_unique<ExprVar>(ps.at(in), ps.s.or_); + s->pushExpr<ExprCall>(s.pos, s.pos, s->popExprOnly(), std::move(args)); + } +}; + +template<> struct BuildAST<grammar::expr::select> : change_head<SelectState> { + static void success0(const auto &...) {} +}; + +struct AppState : SubexprState { + using SubexprState::SubexprState; + + PosIdx pos; + ExprCall * e = nullptr; +}; + +template<> struct BuildAST<grammar::expr::app::select_or_fn> { + static void apply(const auto & in, AppState & s, State & ps) { + s.pos = ps.at(in); + } +}; + +template<> struct BuildAST<grammar::expr::app::first_arg> { + static void apply(auto & in, AppState & s, State & ps) { + auto arg = s->popExprOnly(), fn = s->popExprOnly(); + if ((s.e = dynamic_cast<ExprCall *>(fn.get()))) { + // TODO remove. + // AST compat with old parser, semantics are the same. + // this can happen on occasions such as `<p> <p>` or `a or b or`, + // neither of which are super worth optimizing. + s.e->args.push_back(std::move(arg)); + s->exprs.emplace_back(noPos, std::move(fn)); + } else { + std::vector<std::unique_ptr<Expr>> args{1}; + args[0] = std::move(arg); + s.e = &s->pushExpr<ExprCall>(s.pos, s.pos, std::move(fn), std::move(args)); + } + } +}; + +template<> struct BuildAST<grammar::expr::app::another_arg> { + static void apply0(AppState & s, State & ps) { + s.e->args.push_back(s->popExprOnly()); + } +}; + +template<> struct BuildAST<grammar::expr::app> : change_head<AppState> { + static void success0(const auto &...) {} +}; + +template<typename Op> struct BuildAST<grammar::expr::operator_<Op>> { + static void apply(const auto & in, ExprState & s, State & ps) { + s.pushOp(ps.at(in), Op{}, ps); + } +}; +template<> struct BuildAST<grammar::expr::operator_<grammar::op::has_attr>> : change_head<AttrState> { + static void success(const auto & in, AttrState & a, ExprState & s, State & ps) { + s.pushOp(ps.at(in), ExprState::has_attr{{}, std::move(a.attrs)}, ps); + } +}; + +template<> struct BuildAST<grammar::expr::lambda::arg> { + static void apply(const auto & in, LambdaState & s, State & ps) { + s.arg = ps.symbols.create(in.string_view()); + } +}; + +template<> struct BuildAST<grammar::expr::lambda> : change_head<LambdaState> { + static void success(const auto & in, LambdaState & l, ExprState & s, State & ps) { + if (l.formals) + l.formals = ps.validateFormals(std::move(l.formals), ps.at(in), l.arg); + s.pushExpr<ExprLambda>(ps.at(in), ps.at(in), l.arg, std::move(l.formals), l->popExprOnly()); + } +}; + +template<> struct BuildAST<grammar::expr::assert_> { + static void apply(const auto & in, ExprState & s, State & ps) { + auto body = s.popExprOnly(), cond = s.popExprOnly(); + s.pushExpr<ExprAssert>(ps.at(in), ps.at(in), std::move(cond), std::move(body)); + } +}; + +template<> struct BuildAST<grammar::expr::with> { + static void apply(const auto & in, ExprState & s, State & ps) { + auto body = s.popExprOnly(), scope = s.popExprOnly(); + s.pushExpr<ExprWith>(ps.at(in), ps.at(in), std::move(scope), std::move(body)); + } +}; + +template<> struct BuildAST<grammar::expr::let> : change_head<BindingsState> { + static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { + if (!b.attrs.dynamicAttrs.empty()) + throw ParseError({ + .msg = HintFmt("dynamic attributes not allowed in let"), + .pos = ps.positions[ps.at(in)] + }); + + s.pushExpr<ExprLet>(ps.at(in), std::make_unique<ExprAttrs>(std::move(b.attrs)), b->popExprOnly()); + } +}; + +template<> struct BuildAST<grammar::expr::if_> { + static void apply(const auto & in, ExprState & s, State & ps) { + auto else_ = s.popExprOnly(), then = s.popExprOnly(), cond = s.popExprOnly(); + s.pushExpr<ExprIf>(ps.at(in), ps.at(in), std::move(cond), std::move(then), std::move(else_)); + } +}; + +template<> struct BuildAST<grammar::expr> : change_head<ExprState> { + static void success0(ExprState & inner, ExprState & outer, State & ps) { + outer.exprs.push_back(inner.finish(ps)); + } +}; + +} +} + +namespace nix { + +Expr * EvalState::parse( + char * text, + size_t length, + Pos::Origin origin, + const SourcePath & basePath, + std::shared_ptr<StaticEnv> & staticEnv) +{ + parser::State s = { + symbols, + positions, + basePath, + positions.addOrigin(origin, length), + exprSymbols, + }; + parser::ExprState x; + + assert(length >= 2); + assert(text[length - 1] == 0); + assert(text[length - 2] == 0); + length -= 2; + + p::string_input<p::tracking_mode::lazy> inp{std::string_view{text, length}, "input"}; + try { + p::parse<parser::grammar::root, parser::BuildAST, parser::Control>(inp, x, s); + } catch (p::parse_error & e) { + auto pos = e.positions().back(); + throw ParseError({ + .msg = HintFmt("syntax error, %s", e.message()), + .pos = positions[s.positions.add(s.origin, pos.byte)] + }); + } + + auto [_pos, result] = x.finish(s); + result->bindVars(*this, staticEnv); + return result.release(); +} + +} diff --git a/src/libexpr/parser-state.hh b/src/libexpr/parser/state.hh index cb1f12230..f5a0428d7 100644 --- a/src/libexpr/parser-state.hh +++ b/src/libexpr/parser/state.hh @@ -3,77 +3,61 @@ #include "eval.hh" -namespace nix { +namespace nix::parser { -/** - * @note Storing a C-style `char *` and `size_t` allows us to avoid - * having to define the special members that using string_view here - * would implicitly delete. - */ struct StringToken { - const char * p; - size_t l; + std::string_view s; bool hasIndentation; - operator std::string_view() const { return {p, l}; } + operator std::string_view() const { return s; } }; -struct ParserLocation -{ - int first_line, first_column; - int last_line, last_column; - - // backup to recover from yyless(0) - int stashed_first_column, stashed_last_column; - - void stash() { - stashed_first_column = first_column; - stashed_last_column = last_column; - } - - void unstash() { - first_column = stashed_first_column; - last_column = stashed_last_column; - } -}; - -struct ParserState +struct State { SymbolTable & symbols; PosTable & positions; - Expr * result; SourcePath basePath; PosTable::Origin origin; const Expr::AstSymbols & s; - std::unique_ptr<Error> error; - [[nodiscard]] ParseError dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos); - [[nodiscard]] ParseError dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos); - [[nodiscard]] std::optional<ParseError> addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos); - [[nodiscard]] std::optional<ParseError> validateFormals(Formals * formals, PosIdx pos = noPos, Symbol arg = {}); + void dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos); + void dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos); + void addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos); + std::unique_ptr<Formals> validateFormals(std::unique_ptr<Formals> formals, PosIdx pos = noPos, Symbol arg = {}); std::unique_ptr<Expr> stripIndentation(const PosIdx pos, std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> && es); - PosIdx at(const ParserLocation & loc); + + // lazy positioning means we don't get byte offsets directly, in.position() would work + // but also requires line and column (which is expensive) + PosIdx at(const auto & in) + { + return positions.add(origin, in.begin() - in.input().begin()); + } + + PosIdx atEnd(const auto & in) + { + return positions.add(origin, in.end() - in.input().begin()); + } }; -inline ParseError ParserState::dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos) +inline void State::dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos) { - return ParseError({ + throw ParseError({ .msg = HintFmt("attribute '%1%' already defined at %2%", showAttrPath(symbols, attrPath), positions[prevPos]), .pos = positions[pos] }); } -inline ParseError ParserState::dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos) +inline void State::dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos) { - return ParseError({ + throw ParseError({ .msg = HintFmt("attribute '%1%' already defined at %2%", symbols[attr], positions[prevPos]), .pos = positions[pos] }); } -inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos) +inline void State::addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos) { AttrPath::iterator i; // All attrpaths have at least one attr @@ -88,12 +72,12 @@ inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPat ExprAttrs * attrs2 = dynamic_cast<ExprAttrs *>(j->second.e.get()); if (!attrs2) { attrPath.erase(i + 1, attrPath.end()); - return dupAttr(attrPath, pos, j->second.pos); + dupAttr(attrPath, pos, j->second.pos); } attrs = attrs2; } else { attrPath.erase(i + 1, attrPath.end()); - return dupAttr(attrPath, pos, j->second.pos); + dupAttr(attrPath, pos, j->second.pos); } } else { auto next = attrs->attrs.emplace(std::piecewise_construct, @@ -135,7 +119,7 @@ inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPat if (ae->inheritFromExprs) std::ranges::move(*ae->inheritFromExprs, std::back_inserter(*jAttrs->inheritFromExprs)); } else { - return dupAttr(attrPath, pos, j->second.pos); + dupAttr(attrPath, pos, j->second.pos); } } else { // This attr path is not defined. Let's create it. @@ -147,11 +131,9 @@ inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPat } else { attrs->dynamicAttrs.emplace_back(std::move(i->expr), std::move(e), pos); } - - return {}; } -inline std::optional<ParseError> ParserState::validateFormals(Formals * formals, PosIdx pos, Symbol arg) +inline std::unique_ptr<Formals> State::validateFormals(std::unique_ptr<Formals> formals, PosIdx pos, Symbol arg) { std::sort(formals->formals.begin(), formals->formals.end(), [] (const auto & a, const auto & b) { @@ -166,21 +148,21 @@ inline std::optional<ParseError> ParserState::validateFormals(Formals * formals, duplicate = std::min(thisDup, duplicate.value_or(thisDup)); } if (duplicate) - return ParseError({ + throw ParseError({ .msg = HintFmt("duplicate formal function argument '%1%'", symbols[duplicate->first]), .pos = positions[duplicate->second] }); if (arg && formals->has(arg)) - return ParseError({ + throw ParseError({ .msg = HintFmt("duplicate formal function argument '%1%'", symbols[arg]), .pos = positions[pos] }); - return {}; + return formals; } -inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos, +inline std::unique_ptr<Expr> State::stripIndentation(const PosIdx pos, std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> && es) { if (es.empty()) return std::make_unique<ExprString>(""); @@ -201,11 +183,11 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos, } continue; } - for (size_t j = 0; j < str->l; ++j) { + for (size_t j = 0; j < str->s.size(); ++j) { if (atStartOfLine) { - if (str->p[j] == ' ') + if (str->s[j] == ' ') curIndent++; - else if (str->p[j] == '\n') { + else if (str->s[j] == '\n') { /* Empty line, doesn't influence minimum indentation. */ curIndent = 0; @@ -213,7 +195,7 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos, atStartOfLine = false; if (curIndent < minIndent) minIndent = curIndent; } - } else if (str->p[j] == '\n') { + } else if (str->s[j] == '\n') { atStartOfLine = true; curIndent = 0; } @@ -233,23 +215,23 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos, }; const auto trimString = [&] (const StringToken t) { std::string s2; - for (size_t j = 0; j < t.l; ++j) { + for (size_t j = 0; j < t.s.size(); ++j) { if (atStartOfLine) { - if (t.p[j] == ' ') { + if (t.s[j] == ' ') { if (curDropped++ >= minIndent) - s2 += t.p[j]; + s2 += t.s[j]; } - else if (t.p[j] == '\n') { + else if (t.s[j] == '\n') { curDropped = 0; - s2 += t.p[j]; + s2 += t.s[j]; } else { atStartOfLine = false; curDropped = 0; - s2 += t.p[j]; + s2 += t.s[j]; } } else { - s2 += t.p[j]; - if (t.p[j] == '\n') atStartOfLine = true; + s2 += t.s[j]; + if (t.s[j] == '\n') atStartOfLine = true; } } @@ -274,9 +256,4 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos, return std::make_unique<ExprConcatStrings>(pos, true, std::move(es2)); } -inline PosIdx ParserState::at(const ParserLocation & loc) -{ - return positions.add(origin, loc.first_column); -} - } diff --git a/tests/functional/lang/parse-fail-eof-pos.err.exp b/tests/functional/lang/parse-fail-eof-pos.err.exp index ef9ca381c..fc56897e4 100644 --- a/tests/functional/lang/parse-fail-eof-pos.err.exp +++ b/tests/functional/lang/parse-fail-eof-pos.err.exp @@ -1,4 +1,4 @@ -error: syntax error, unexpected end of file +error: syntax error, unexpected end of file, expecting expression at «stdin»:3:1: 2| # no content 3| diff --git a/tests/functional/lang/parse-fail-undef-var-2.err.exp b/tests/functional/lang/parse-fail-undef-var-2.err.exp index 393c454dd..d1728f125 100644 --- a/tests/functional/lang/parse-fail-undef-var-2.err.exp +++ b/tests/functional/lang/parse-fail-undef-var-2.err.exp @@ -1,4 +1,4 @@ -error: syntax error, unexpected ':', expecting '}' +error: syntax error, expecting '}' at «stdin»:3:13: 2| 3| f = {x, y : ["baz" "bar" z "bat"]}: x + y; diff --git a/tests/functional/lang/parse-fail-utf8.err.exp b/tests/functional/lang/parse-fail-utf8.err.exp index 1c83f6eb3..81df9dc8a 100644 --- a/tests/functional/lang/parse-fail-utf8.err.exp +++ b/tests/functional/lang/parse-fail-utf8.err.exp @@ -1,4 +1,4 @@ -error: syntax error, unexpected invalid token, expecting end of file +error: syntax error, expecting end of file at «stdin»:1:5: 1| 123 é 4 | ^ |