aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--flake.nix2
-rw-r--r--meson.build10
-rwxr-xr-xmeson/cleanup-install.bash50
-rw-r--r--misc/pegtl.nix23
-rw-r--r--package.nix6
-rw-r--r--src/libexpr/eval.cc16
-rw-r--r--src/libexpr/lexer.l302
-rw-r--r--src/libexpr/meson.build58
-rw-r--r--src/libexpr/parser.y503
-rw-r--r--src/libexpr/parser/change_head.hh66
-rw-r--r--src/libexpr/parser/grammar.hh707
-rw-r--r--src/libexpr/parser/parser.cc862
-rw-r--r--src/libexpr/parser/state.hh (renamed from src/libexpr/parser-state.hh)113
-rw-r--r--tests/functional/lang/parse-fail-eof-pos.err.exp2
-rw-r--r--tests/functional/lang/parse-fail-undef-var-2.err.exp2
-rw-r--r--tests/functional/lang/parse-fail-utf8.err.exp2
16 files changed, 1722 insertions, 1002 deletions
diff --git a/flake.nix b/flake.nix
index 372983f6d..5c764d73e 100644
--- a/flake.nix
+++ b/flake.nix
@@ -195,6 +195,8 @@
busybox-sandbox-shell = final.busybox-sandbox-shell or final.default-busybox-sandbox-shell;
};
+ pegtl = final.callPackage ./misc/pegtl.nix { };
+
# Export the patched version of boehmgc that Lix uses into the overlay
# for consumers of this flake.
boehmgc-nix = final.nix.boehmgc-nix;
diff --git a/meson.build b/meson.build
index e6151e0a2..0cb2030e7 100644
--- a/meson.build
+++ b/meson.build
@@ -287,6 +287,14 @@ gtest = [
toml11 = dependency('toml11', version : '>=3.7.0', required : true, method : 'cmake')
+pegtl = dependency(
+ 'pegtl',
+ version : '>=3.2.7',
+ required : true,
+ method : 'cmake',
+ modules : [ 'taocpp::pegtl' ],
+)
+
nlohmann_json = dependency('nlohmann_json', required : true)
# lix-doc is a Rust project provided via buildInputs and unfortunately doesn't have any way to be detected.
@@ -335,8 +343,6 @@ endif
# that busybox sh won't run busybox applets as builtins (which would break our sandbox).
lsof = find_program('lsof', native : true)
-bison = find_program('bison', native : true)
-flex = find_program('flex', native : true)
# This is how Nix does generated headers...
# other instances of header generation use a very similar command.
diff --git a/meson/cleanup-install.bash b/meson/cleanup-install.bash
deleted file mode 100755
index 928edc74a..000000000
--- a/meson/cleanup-install.bash
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env bash
-# Meson will call this with an absolute path to Bash.
-# The shebang is just for convenience.
-
-# The parser and lexer tab are generated via custom Meson targets in src/libexpr/meson.build,
-# but Meson doesn't support marking only part of a target for install. The generation creates
-# both headers (parser-tab.hh, lexer-tab.hh) and source files (parser-tab.cc, lexer-tab.cc),
-# and we definitely want the former installed, but not the latter. This script is added to
-# Meson's install steps to correct this, as the logic for it is just complex enough to
-# warrant separate and careful handling, because both Meson's configured include directory
-# may or may not be an absolute path, and DESTDIR may or may not be set at all, but can't be
-# manipulated in Meson logic.
-
-set -euo pipefail
-
-echo "cleanup-install: removing Meson-placed C++ sources from dest includedir"
-
-if [[ "${1/--help/}" != "$1" ]]; then
- echo "cleanup-install: this script should only be called from the Meson build system"
- exit 1
-fi
-
-# Ensure the includedir was passed as the first argument
-# (set -u will make this fail otherwise).
-includedir="$1"
-# And then ensure that first argument is a directory that exists.
-if ! [[ -d "$1" ]]; then
- echo "cleanup-install: this script should only be called from the Meson build system"
- echo "argv[1] (${1@Q}) is not a directory"
- exit 2
-fi
-
-# If DESTDIR environment variable is set, prepend it to the include dir.
-# Unfortunately, we cannot do this on the Meson side. We do have an environment variable
-# `MESON_INSTALL_DESTDIR_PREFIX`, but that will not refer to the include directory if
-# includedir has been set separately, which Lix's split-output derivation does.
-# We also cannot simply do an inline bash conditional like "${DESTDIR:=}" or similar,
-# because we need to specifically *join* DESTDIR and includedir with a slash, and *not*
-# have a slash if DESTDIR isn't set at all, since $includedir could be a relative directory.
-# Finally, DESTDIR is only available to us as an environment variable in these install scripts,
-# not in Meson logic.
-# Therefore, our best option is to have Meson pass this script the configured includedir,
-# and perform this dance with it and $DESTDIR.
-if [[ -n "${DESTDIR:-}" ]]; then
- includedir="$DESTDIR/$includedir"
-fi
-
-# Intentionally not using -f.
-# If these files don't exist then our assumptions have been violated and we should fail.
-rm -v "$includedir/lix/libexpr/parser-tab.cc" "$includedir/lix/libexpr/lexer-tab.cc"
diff --git a/misc/pegtl.nix b/misc/pegtl.nix
new file mode 100644
index 000000000..3fd999d9d
--- /dev/null
+++ b/misc/pegtl.nix
@@ -0,0 +1,23 @@
+{
+ stdenv,
+ cmake,
+ ninja,
+ fetchFromGitHub,
+}:
+
+stdenv.mkDerivation {
+ pname = "pegtl";
+ version = "3.2.7";
+
+ src = fetchFromGitHub {
+ repo = "PEGTL";
+ owner = "taocpp";
+ rev = "refs/tags/3.2.7";
+ hash = "sha256-IV5YNGE4EWVrmg2Sia/rcU8jCuiBynQGJM6n3DCWTQU=";
+ };
+
+ nativeBuildInputs = [
+ cmake
+ ninja
+ ];
+}
diff --git a/package.nix b/package.nix
index 988379618..0f194796f 100644
--- a/package.nix
+++ b/package.nix
@@ -10,7 +10,6 @@
boehmgc-nix ? __forDefaults.boehmgc-nix,
boehmgc,
nlohmann_json,
- bison,
build-release-notes ? __forDefaults.build-release-notes,
boost,
brotli,
@@ -20,7 +19,6 @@
doxygen,
editline-lix ? __forDefaults.editline-lix,
editline,
- flex,
git,
gtest,
jq,
@@ -36,6 +34,7 @@
meson,
ninja,
openssl,
+ pegtl,
pkg-config,
python3,
rapidcheck,
@@ -210,8 +209,6 @@ stdenv.mkDerivation (finalAttrs: {
nativeBuildInputs =
[
- bison
- flex
python3
meson
ninja
@@ -250,6 +247,7 @@ stdenv.mkDerivation (finalAttrs: {
libsodium
toml11
lix-doc
+ pegtl
]
++ lib.optionals hostPlatform.isLinux [
libseccomp
diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc
index afee89420..a6a64a43c 100644
--- a/src/libexpr/eval.cc
+++ b/src/libexpr/eval.cc
@@ -18,7 +18,6 @@
#include "gc-small-vector.hh"
#include "fetch-to-store.hh"
#include "flake/flakeref.hh"
-#include "parser-tab.hh"
#include <algorithm>
#include <chrono>
@@ -2958,21 +2957,6 @@ std::optional<std::string> EvalState::resolveSearchPathPath(const SearchPath::Pa
}
-Expr * EvalState::parse(
- char * text,
- size_t length,
- Pos::Origin origin,
- const SourcePath & basePath,
- std::shared_ptr<StaticEnv> & staticEnv)
-{
- auto result = parseExprFromBuf(text, length, origin, basePath, symbols, positions, exprSymbols);
-
- result->bindVars(*this, staticEnv);
-
- return result;
-}
-
-
std::string ExternalValueBase::coerceToString(EvalState & state, const PosIdx & pos, NixStringContext & context, bool copyMore, bool copyToStore) const
{
state.error<TypeError>(
diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l
deleted file mode 100644
index 5bc815f00..000000000
--- a/src/libexpr/lexer.l
+++ /dev/null
@@ -1,302 +0,0 @@
-%option reentrant bison-bridge bison-locations
-%option align
-%option noyywrap
-%option never-interactive
-%option stack
-%option nodefault
-%option nounput noyy_top_state
-
-
-%s DEFAULT
-%x STRING
-%x IND_STRING
-%x INPATH
-%x INPATH_SLASH
-%x PATH_START
-
-
-%{
-#ifdef __clang__
-#pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
-#endif
-
-// yacc generates code that uses unannotated fallthrough.
-#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
-#ifdef __clang__
-#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
-#endif
-
-#include "nixexpr.hh"
-#include "parser-tab.hh"
-#include "strings.hh"
-
-using namespace nix;
-
-#define THROW(...) \
- do { \
- state->error.reset(new auto(__VA_ARGS__)); \
- return YYerror; \
- } while (0)
-
-namespace nix {
-
-#define CUR_POS state->at(*yylloc)
-
-static void initLoc(YYLTYPE * loc)
-{
- loc->first_line = loc->last_line = 0;
- loc->first_column = loc->last_column = 0;
-}
-
-static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
-{
- loc->stash();
-
- loc->first_column = loc->last_column;
- loc->last_column += len;
-}
-
-
-// we make use of the fact that the parser receives a private copy of the input
-// string and can munge around in it.
-static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length)
-{
- char * result = s;
- char * t = s;
- char c;
- // the input string is terminated with *two* NULs, so we can safely take
- // *one* character after the one being checked against.
- while ((c = *s++)) {
- if (c == '\\') {
- c = *s++;
- if (c == 'n') *t = '\n';
- else if (c == 'r') *t = '\r';
- else if (c == 't') *t = '\t';
- else *t = c;
- }
- else if (c == '\r') {
- /* Normalise CR and CR/LF into LF. */
- *t = '\n';
- if (*s == '\n') s++; /* cr/lf */
- }
- else *t = c;
- t++;
- }
- return {result, size_t(t - result)};
-}
-
-
-}
-
-#define YY_USER_INIT initLoc(yylloc)
-#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng);
-
-#define PUSH_STATE(state) yy_push_state(state, yyscanner)
-#define POP_STATE() yy_pop_state(yyscanner)
-
-%}
-
-
-ANY .|\n
-ID [a-zA-Z\_][a-zA-Z0-9\_\'\-]*
-INT [0-9]+
-FLOAT (([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?
-PATH_CHAR [a-zA-Z0-9\.\_\-\+]
-PATH {PATH_CHAR}*(\/{PATH_CHAR}+)+\/?
-PATH_SEG {PATH_CHAR}*\/
-HPATH \~(\/{PATH_CHAR}+)+\/?
-HPATH_START \~\/
-SPATH \<{PATH_CHAR}+(\/{PATH_CHAR}+)*\>
-URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+
-
-
-%%
-
-
-if { return IF; }
-then { return THEN; }
-else { return ELSE; }
-assert { return ASSERT; }
-with { return WITH; }
-let { return LET; }
-in { return IN; }
-rec { return REC; }
-inherit { return INHERIT; }
-or { return OR_KW; }
-\.\.\. { return ELLIPSIS; }
-
-\=\= { return EQ; }
-\!\= { return NEQ; }
-\<\= { return LEQ; }
-\>\= { return GEQ; }
-\&\& { return AND; }
-\|\| { return OR; }
-\-\> { return IMPL; }
-\/\/ { return UPDATE; }
-\+\+ { return CONCAT; }
-
-{ID} { yylval->id = {yytext, (size_t) yyleng}; return ID; }
-{INT} { errno = 0;
- std::optional<int64_t> numMay = string2Int<int64_t>(yytext);
- if (numMay.has_value()) {
- yylval->n = *numMay;
- } else {
- THROW(ParseError(ErrorInfo{
- .msg = HintFmt("invalid integer '%1%'", yytext),
- .pos = state->positions[CUR_POS],
- }));
- }
- return INT;
- }
-{FLOAT} { errno = 0;
- yylval->nf = strtod(yytext, 0);
- if (errno != 0)
- THROW(ParseError(ErrorInfo{
- .msg = HintFmt("invalid float '%1%'", yytext),
- .pos = state->positions[CUR_POS],
- }));
- return FLOAT;
- }
-
-\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
-
-\} { /* State INITIAL only exists at the bottom of the stack and is
- used as a marker. DEFAULT replaces it everywhere else.
- Popping when in INITIAL state causes an empty stack exception,
- so don't */
- if (YYSTATE != INITIAL)
- POP_STATE();
- return '}';
- }
-\{ { PUSH_STATE(DEFAULT); return '{'; }
-
-\" { PUSH_STATE(STRING); return '"'; }
-<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})*\$/\" |
-<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})+ {
- /* It is impossible to match strings ending with '$' with one
- regex because trailing contexts are only valid at the end
- of a rule. (A sane but undocumented limitation.) */
- yylval->str = unescapeStr(state->symbols, yytext, yyleng);
- return STR;
- }
-<STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
-<STRING>\" { POP_STATE(); return '"'; }
-<STRING>\$|\\|\$\\ {
- /* This can only occur when we reach EOF, otherwise the above
- (...|\$[^\{\"\\]|\\.|\$\\.)+ would have triggered.
- This is technically invalid, but we leave the problem to the
- parser who fails with exact location. */
- return EOF;
- }
-
-\'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; }
-<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ {
- yylval->str = {yytext, (size_t) yyleng, true};
- return IND_STR;
- }
-<IND_STRING>\'\'\$ |
-<IND_STRING>\$ {
- yylval->str = {"$", 1};
- return IND_STR;
- }
-<IND_STRING>\'\'\' {
- yylval->str = {"''", 2};
- return IND_STR;
- }
-<IND_STRING>\'\'\\{ANY} {
- yylval->str = unescapeStr(state->symbols, yytext + 2, yyleng - 2);
- return IND_STR;
- }
-<IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; }
-<IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; }
-<IND_STRING>\' {
- yylval->str = {"'", 1};
- return IND_STR;
- }
-
-{PATH_SEG}\$\{ |
-{HPATH_START}\$\{ {
- PUSH_STATE(PATH_START);
- yyless(0);
- yylloc->unstash();
-}
-
-<PATH_START>{PATH_SEG} {
- POP_STATE();
- PUSH_STATE(INPATH_SLASH);
- yylval->path = {yytext, (size_t) yyleng};
- return PATH;
-}
-
-<PATH_START>{HPATH_START} {
- POP_STATE();
- PUSH_STATE(INPATH_SLASH);
- yylval->path = {yytext, (size_t) yyleng};
- return HPATH;
-}
-
-{PATH} {
- if (yytext[yyleng-1] == '/')
- PUSH_STATE(INPATH_SLASH);
- else
- PUSH_STATE(INPATH);
- yylval->path = {yytext, (size_t) yyleng};
- return PATH;
-}
-{HPATH} {
- if (yytext[yyleng-1] == '/')
- PUSH_STATE(INPATH_SLASH);
- else
- PUSH_STATE(INPATH);
- yylval->path = {yytext, (size_t) yyleng};
- return HPATH;
-}
-
-<INPATH,INPATH_SLASH>\$\{ {
- POP_STATE();
- PUSH_STATE(INPATH);
- PUSH_STATE(DEFAULT);
- return DOLLAR_CURLY;
-}
-<INPATH,INPATH_SLASH>{PATH}|{PATH_SEG}|{PATH_CHAR}+ {
- POP_STATE();
- if (yytext[yyleng-1] == '/')
- PUSH_STATE(INPATH_SLASH);
- else
- PUSH_STATE(INPATH);
- yylval->str = {yytext, (size_t) yyleng};
- return STR;
-}
-<INPATH>{ANY} |
-<INPATH><<EOF>> {
- /* if we encounter a non-path character we inform the parser that the path has
- ended with a PATH_END token and re-parse this character in the default
- context (it may be ')', ';', or something of that sort) */
- POP_STATE();
- yyless(0);
- yylloc->unstash();
- return PATH_END;
-}
-
-<INPATH_SLASH>{ANY} |
-<INPATH_SLASH><<EOF>> {
- THROW(ParseError(ErrorInfo{
- .msg = HintFmt("path has a trailing slash"),
- .pos = state->positions[CUR_POS],
- }));
-}
-
-{SPATH} { yylval->path = {yytext, (size_t) yyleng}; return SPATH; }
-{URI} { yylval->uri = {yytext, (size_t) yyleng}; return URI; }
-
-[ \t\r\n]+ /* eat up whitespace */
-\#[^\r\n]* /* single-line comments */
-\/\*([^*]|\*+[^*/])*\*+\/ /* long comments */
-
-{ANY} {
- /* Don't return a negative number, as this will cause
- Bison to stop parsing without an error. */
- return (unsigned char) yytext[0];
- }
-
-%%
diff --git a/src/libexpr/meson.build b/src/libexpr/meson.build
index 080fdb443..39493dadc 100644
--- a/src/libexpr/meson.build
+++ b/src/libexpr/meson.build
@@ -1,54 +1,3 @@
-parser_tab = custom_target(
- input : 'parser.y',
- output : [
- 'parser-tab.cc',
- 'parser-tab.hh',
- ],
- command : [
- 'bison',
- '-v',
- '-o',
- '@OUTPUT0@',
- '@INPUT@',
- '-d',
- ],
- # NOTE(Qyriad): Meson doesn't support installing only part of a custom target, so we add
- # an install script below which removes parser-tab.cc.
- install : true,
- install_dir : includedir / 'lix/libexpr',
-)
-
-lexer_tab = custom_target(
- input : [
- 'lexer.l',
- parser_tab,
- ],
- output : [
- 'lexer-tab.cc',
- 'lexer-tab.hh',
- ],
- command : [
- 'flex',
- '--outfile',
- '@OUTPUT0@',
- '--header-file=' + '@OUTPUT1@',
- '@INPUT0@',
- ],
- # NOTE(Qyriad): Meson doesn't support installing only part of a custom target, so we add
- # an install script below which removes lexer-tab.cc.
- install : true,
- install_dir : includedir / 'lix/libexpr',
-)
-
-# TODO(Qyriad): When the parser and lexer are rewritten this should be removed.
-# NOTE(Qyriad): We do this this way instead of an inline bash or rm command
-# due to subtleties in Meson. Check the comments in cleanup-install.bash for details.
-meson.add_install_script(
- bash,
- meson.project_source_root() / 'meson/cleanup-install.bash',
- '@0@'.format(includedir),
-)
-
libexpr_generated_headers = [
gen_header.process('primops/derivation.nix', preserve_path_from : meson.current_source_dir()),
]
@@ -75,6 +24,7 @@ libexpr_sources = files(
'get-drvs.cc',
'json-to-value.cc',
'nixexpr.cc',
+ 'parser/parser.cc',
'paths.cc',
'primops.cc',
'print-ambiguous.cc',
@@ -110,7 +60,9 @@ libexpr_headers = files(
'get-drvs.hh',
'json-to-value.hh',
'nixexpr.hh',
- 'parser-state.hh',
+ 'parser/change_head.hh',
+ 'parser/grammar.hh',
+ 'parser/state.hh',
'pos-idx.hh',
'pos-table.hh',
'primops.hh',
@@ -129,8 +81,6 @@ libexpr_headers = files(
libexpr = library(
'lixexpr',
libexpr_sources,
- parser_tab,
- lexer_tab,
libexpr_generated_headers,
dependencies : [
liblixutil,
diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y
deleted file mode 100644
index b825f2ed8..000000000
--- a/src/libexpr/parser.y
+++ /dev/null
@@ -1,503 +0,0 @@
-%glr-parser
-%define api.pure
-%locations
-%define parse.error verbose
-%defines
-/* %no-lines */
-%parse-param { void * scanner }
-%parse-param { nix::ParserState * state }
-%lex-param { void * scanner }
-%lex-param { nix::ParserState * state }
-%expect 1
-%expect-rr 1
-
-%code requires {
-
-#ifndef BISON_HEADER
-#define BISON_HEADER
-
-#include <variant>
-
-#include "finally.hh"
-#include "users.hh"
-
-#include "nixexpr.hh"
-#include "eval.hh"
-#include "eval-settings.hh"
-#include "globals.hh"
-#include "parser-state.hh"
-
-#define YYLTYPE ::nix::ParserLocation
-#define YY_DECL int yylex \
- (YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParserState * state)
-
-namespace nix {
-
-Expr * parseExprFromBuf(
- char * text,
- size_t length,
- Pos::Origin origin,
- const SourcePath & basePath,
- SymbolTable & symbols,
- PosTable & positions,
- const Expr::AstSymbols & astSymbols);
-
-}
-
-#endif
-
-}
-
-%{
-
-#include "parser-tab.hh"
-#include "lexer-tab.hh"
-
-YY_DECL;
-
-using namespace nix;
-
-#define CUR_POS state->at(*yylocp)
-
-// otherwise destructors cause compiler errors
-#pragma GCC diagnostic ignored "-Wswitch-enum"
-
-#define THROW(err, ...) \
- do { \
- state->error.reset(new auto(err)); \
- [](auto... d) { (delete d, ...); }(__VA_ARGS__); \
- YYABORT; \
- } while (0)
-
-void yyerror(YYLTYPE * loc, yyscan_t scanner, ParserState * state, const char * error)
-{
- if (std::string_view(error).starts_with("syntax error, unexpected end of file")) {
- loc->first_column = loc->last_column;
- loc->first_line = loc->last_line;
- }
- throw ParseError({
- .msg = HintFmt(error),
- .pos = state->positions[state->at(*loc)]
- });
-}
-
-template<typename T>
-static std::unique_ptr<T> unp(T * e)
-{
- return std::unique_ptr<T>(e);
-}
-
-template<typename T = std::unique_ptr<nix::Expr>, typename... Args>
-static std::vector<T> vec(Args && ... args)
-{
- std::vector<T> result;
- result.reserve(sizeof...(Args));
- (result.emplace_back(std::forward<Args>(args)), ...);
- return result;
-}
-
-
-%}
-
-%union {
- // !!! We're probably leaking stuff here.
- nix::Expr * e;
- nix::ExprList * list;
- nix::ExprAttrs * attrs;
- nix::Formals * formals;
- nix::Formal * formal;
- nix::NixInt n;
- nix::NixFloat nf;
- nix::StringToken id; // !!! -> Symbol
- nix::StringToken path;
- nix::StringToken uri;
- nix::StringToken str;
- std::vector<nix::AttrName> * attrNames;
- std::vector<std::pair<nix::AttrName, nix::PosIdx>> * inheritAttrs;
- std::vector<std::pair<nix::PosIdx, std::unique_ptr<nix::Expr>>> * string_parts;
- std::vector<std::pair<nix::PosIdx, std::variant<std::unique_ptr<nix::Expr>, nix::StringToken>>> * ind_string_parts;
-}
-
-%destructor { delete $$; } <e>
-%destructor { delete $$; } <list>
-%destructor { delete $$; } <attrs>
-%destructor { delete $$; } <formals>
-%destructor { delete $$; } <formal>
-%destructor { delete $$; } <attrNames>
-%destructor { delete $$; } <inheritAttrs>
-%destructor { delete $$; } <string_parts>
-%destructor { delete $$; } <ind_string_parts>
-
-%type <e> start
-%type <e> expr expr_function expr_if expr_op
-%type <e> expr_select expr_simple expr_app
-%type <list> expr_list
-%type <attrs> binds
-%type <formals> formals
-%type <formal> formal
-%type <attrNames> attrpath
-%type <inheritAttrs> attrs
-%type <string_parts> string_parts_interpolated
-%type <ind_string_parts> ind_string_parts
-%type <e> path_start string_parts string_attr
-%type <id> attr
-%token <id> ID
-%token <str> STR IND_STR
-%token <n> INT
-%token <nf> FLOAT
-%token <path> PATH HPATH SPATH PATH_END
-%token <uri> URI
-%token IF THEN ELSE ASSERT WITH LET IN REC INHERIT EQ NEQ AND OR IMPL OR_KW
-%token DOLLAR_CURLY /* == ${ */
-%token IND_STRING_OPEN IND_STRING_CLOSE
-%token ELLIPSIS
-
-%right IMPL
-%left OR
-%left AND
-%nonassoc EQ NEQ
-%nonassoc '<' '>' LEQ GEQ
-%right UPDATE
-%left NOT
-%left '+' '-'
-%left '*' '/'
-%right CONCAT
-%nonassoc '?'
-%nonassoc NEGATE
-
-%%
-
-start: expr { state->result = $1; $$ = 0; };
-
-expr: expr_function;
-
-expr_function
- : ID ':' expr_function
- { $$ = new ExprLambda(CUR_POS, state->symbols.create($1), nullptr, unp($3)); }
- | '{' formals '}' ':' expr_function
- { if (auto e = state->validateFormals($2)) THROW(*e);
- $$ = new ExprLambda(CUR_POS, unp($2), unp($5));
- }
- | '{' formals '}' '@' ID ':' expr_function
- {
- auto arg = state->symbols.create($5);
- if (auto e = state->validateFormals($2, CUR_POS, arg)) THROW(*e, $2, $7);
- $$ = new ExprLambda(CUR_POS, arg, unp($2), unp($7));
- }
- | ID '@' '{' formals '}' ':' expr_function
- {
- auto arg = state->symbols.create($1);
- if (auto e = state->validateFormals($4, CUR_POS, arg)) THROW(*e, $4, $7);
- $$ = new ExprLambda(CUR_POS, arg, unp($4), unp($7));
- }
- | ASSERT expr ';' expr_function
- { $$ = new ExprAssert(CUR_POS, unp($2), unp($4)); }
- | WITH expr ';' expr_function
- { $$ = new ExprWith(CUR_POS, unp($2), unp($4)); }
- | LET binds IN expr_function
- { if (!$2->dynamicAttrs.empty())
- THROW(ParseError({
- .msg = HintFmt("dynamic attributes not allowed in let"),
- .pos = state->positions[CUR_POS]
- }), $2, $4);
- $$ = new ExprLet(unp($2), unp($4));
- }
- | expr_if
- ;
-
-expr_if
- : IF expr THEN expr ELSE expr { $$ = new ExprIf(CUR_POS, unp($2), unp($4), unp($6)); }
- | expr_op
- ;
-
-expr_op
- : '!' expr_op %prec NOT { $$ = new ExprOpNot(unp($2)); }
- | '-' expr_op %prec NEGATE { $$ = new ExprCall(CUR_POS, std::make_unique<ExprVar>(state->s.sub), vec(std::make_unique<ExprInt>(0), unp($2))); }
- | expr_op EQ expr_op { $$ = new ExprOpEq(unp($1), unp($3)); }
- | expr_op NEQ expr_op { $$ = new ExprOpNEq(unp($1), unp($3)); }
- | expr_op '<' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($1, $3)); }
- | expr_op LEQ expr_op { $$ = new ExprOpNot(std::make_unique<ExprCall>(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($3, $1))); }
- | expr_op '>' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($3, $1)); }
- | expr_op GEQ expr_op { $$ = new ExprOpNot(std::make_unique<ExprCall>(state->at(@2), std::make_unique<ExprVar>(state->s.lessThan), vec($1, $3))); }
- | expr_op AND expr_op { $$ = new ExprOpAnd(state->at(@2), unp($1), unp($3)); }
- | expr_op OR expr_op { $$ = new ExprOpOr(state->at(@2), unp($1), unp($3)); }
- | expr_op IMPL expr_op { $$ = new ExprOpImpl(state->at(@2), unp($1), unp($3)); }
- | expr_op UPDATE expr_op { $$ = new ExprOpUpdate(state->at(@2), unp($1), unp($3)); }
- | expr_op '?' attrpath { $$ = new ExprOpHasAttr(unp($1), std::move(*$3)); delete $3; }
- | expr_op '+' expr_op
- { $$ = new ExprConcatStrings(state->at(@2), false, vec<std::pair<PosIdx, std::unique_ptr<Expr>>>(std::pair(state->at(@1), unp($1)), std::pair(state->at(@3), unp($3)))); }
- | expr_op '-' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.sub), vec($1, $3)); }
- | expr_op '*' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.mul), vec($1, $3)); }
- | expr_op '/' expr_op { $$ = new ExprCall(state->at(@2), std::make_unique<ExprVar>(state->s.div), vec($1, $3)); }
- | expr_op CONCAT expr_op { $$ = new ExprOpConcatLists(state->at(@2), unp($1), unp($3)); }
- | expr_app
- ;
-
-expr_app
- : expr_app expr_select {
- if (auto e2 = dynamic_cast<ExprCall *>($1)) {
- e2->args.emplace_back($2);
- $$ = $1;
- } else
- $$ = new ExprCall(CUR_POS, unp($1), vec(unp($2)));
- }
- | expr_select
- ;
-
-expr_select
- : expr_simple '.' attrpath
- { $$ = new ExprSelect(CUR_POS, unp($1), std::move(*$3), nullptr); delete $3; }
- | expr_simple '.' attrpath OR_KW expr_select
- { $$ = new ExprSelect(CUR_POS, unp($1), std::move(*$3), unp($5)); delete $3; }
- | /* Backwards compatibility: because Nixpkgs has a rarely used
- function named ‘or’, allow stuff like ‘map or [...]’. */
- expr_simple OR_KW
- { $$ = new ExprCall(CUR_POS, unp($1), vec(std::make_unique<ExprVar>(CUR_POS, state->s.or_))); }
- | expr_simple
- ;
-
-expr_simple
- : ID {
- std::string_view s = "__curPos";
- if ($1.l == s.size() && strncmp($1.p, s.data(), s.size()) == 0)
- $$ = new ExprPos(CUR_POS);
- else
- $$ = new ExprVar(CUR_POS, state->symbols.create($1));
- }
- | INT { $$ = new ExprInt($1); }
- | FLOAT { $$ = new ExprFloat($1); }
- | '"' string_parts '"' { $$ = $2; }
- | IND_STRING_OPEN ind_string_parts IND_STRING_CLOSE {
- $$ = state->stripIndentation(CUR_POS, std::move(*$2)).release();
- delete $2;
- }
- | path_start PATH_END
- | path_start string_parts_interpolated PATH_END {
- $2->emplace($2->begin(), state->at(@1), $1);
- $$ = new ExprConcatStrings(CUR_POS, false, std::move(*$2));
- delete $2;
- }
- | SPATH {
- std::string path($1.p + 1, $1.l - 2);
- $$ = new ExprCall(CUR_POS,
- std::make_unique<ExprVar>(state->s.findFile),
- vec(std::make_unique<ExprVar>(state->s.nixPath),
- std::make_unique<ExprString>(std::move(path))));
- }
- | URI {
- static bool noURLLiterals = experimentalFeatureSettings.isEnabled(Xp::NoUrlLiterals);
- if (noURLLiterals)
- THROW(ParseError({
- .msg = HintFmt("URL literals are disabled"),
- .pos = state->positions[CUR_POS]
- }));
- $$ = new ExprString(std::string($1));
- }
- | '(' expr ')' { $$ = $2; }
- /* Let expressions `let {..., body = ...}' are just desugared
- into `(rec {..., body = ...}).body'. */
- | LET '{' binds '}'
- { $3->recursive = true; $$ = new ExprSelect(noPos, unp($3), state->s.body); }
- | REC '{' binds '}'
- { $3->recursive = true; $$ = $3; }
- | '{' binds '}'
- { $$ = $2; }
- | '[' expr_list ']' { $$ = $2; }
- ;
-
-string_parts
- : STR { $$ = new ExprString(std::string($1)); }
- | string_parts_interpolated
- { $$ = new ExprConcatStrings(CUR_POS, true, std::move(*$1));
- delete $1;
- }
- | { $$ = new ExprString(""); }
- ;
-
-string_parts_interpolated
- : string_parts_interpolated STR
- { $$ = $1; $1->emplace_back(state->at(@2), new ExprString(std::string($2))); }
- | string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(state->at(@2), $3); }
- | DOLLAR_CURLY expr '}' { $$ = new std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>>; $$->emplace_back(state->at(@1), $2); }
- | STR DOLLAR_CURLY expr '}' {
- $$ = new std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>>;
- $$->emplace_back(state->at(@1), new ExprString(std::string($1)));
- $$->emplace_back(state->at(@2), $3);
- }
- ;
-
-path_start
- : PATH {
- Path path(absPath({$1.p, $1.l}, state->basePath.path.abs()));
- /* add back in the trailing '/' to the first segment */
- if ($1.p[$1.l-1] == '/' && $1.l > 1)
- path += "/";
- $$ = new ExprPath(path);
- }
- | HPATH {
- if (evalSettings.pureEval) {
- THROW(Error(
- "the path '%s' can not be resolved in pure mode",
- std::string_view($1.p, $1.l)
- ));
- }
- Path path(getHome() + std::string($1.p + 1, $1.l - 1));
- $$ = new ExprPath(path);
- }
- ;
-
-ind_string_parts
- : ind_string_parts IND_STR { $$ = $1; $1->emplace_back(state->at(@2), $2); }
- | ind_string_parts DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(state->at(@2), unp($3)); }
- | { $$ = new std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>>; }
- ;
-
-binds
- : binds attrpath '=' expr ';'
- { $$ = $1;
- if (auto e = state->addAttr($$, std::move(*$2), unp($4), state->at(@2))) THROW(*e, $1, $2);
- delete $2;
- }
- | binds INHERIT attrs ';'
- { $$ = $1;
- for (auto & [i, iPos] : *$3) {
- if ($$->attrs.find(i.symbol) != $$->attrs.end())
- THROW(state->dupAttr(i.symbol, iPos, $$->attrs[i.symbol].pos), $1);
- $$->attrs.emplace(
- i.symbol,
- ExprAttrs::AttrDef(std::make_unique<ExprVar>(iPos, i.symbol), iPos, ExprAttrs::AttrDef::Kind::Inherited));
- }
- delete $3;
- }
- | binds INHERIT '(' expr ')' attrs ';'
- { $$ = $1;
- if (!$$->inheritFromExprs)
- $$->inheritFromExprs = std::make_unique<std::vector<std::unique_ptr<Expr>>>();
- $$->inheritFromExprs->push_back(unp($4));
- for (auto & [i, iPos] : *$6) {
- if ($$->attrs.find(i.symbol) != $$->attrs.end())
- THROW(state->dupAttr(i.symbol, iPos, $$->attrs[i.symbol].pos), $1);
- auto from = std::make_unique<nix::ExprInheritFrom>(state->at(@4), $$->inheritFromExprs->size() - 1);
- $$->attrs.emplace(
- i.symbol,
- ExprAttrs::AttrDef(
- std::make_unique<ExprSelect>(iPos, std::move(from), i.symbol),
- iPos,
- ExprAttrs::AttrDef::Kind::InheritedFrom));
- }
- delete $6;
- }
- | { $$ = new ExprAttrs(state->at(@0)); }
- ;
-
-attrs
- : attrs attr { $$ = $1; $1->emplace_back(AttrName(state->symbols.create($2)), state->at(@2)); }
- | attrs string_attr
- { $$ = $1;
- ExprString * str = dynamic_cast<ExprString *>($2);
- if (str) {
- $$->emplace_back(AttrName(state->symbols.create(str->s)), state->at(@2));
- delete str;
- } else
- THROW(ParseError({
- .msg = HintFmt("dynamic attributes not allowed in inherit"),
- .pos = state->positions[state->at(@2)]
- }), $1, $2);
- }
- | { $$ = new std::vector<std::pair<AttrName, PosIdx>>; }
- ;
-
-attrpath
- : attrpath '.' attr { $$ = $1; $1->push_back(AttrName(state->symbols.create($3))); }
- | attrpath '.' string_attr
- { $$ = $1;
- ExprString * str = dynamic_cast<ExprString *>($3);
- if (str) {
- $$->push_back(AttrName(state->symbols.create(str->s)));
- delete str;
- } else
- $$->emplace_back(unp($3));
- }
- | attr { $$ = new std::vector<AttrName>; $$->push_back(AttrName(state->symbols.create($1))); }
- | string_attr
- { $$ = new std::vector<AttrName>;
- ExprString *str = dynamic_cast<ExprString *>($1);
- if (str) {
- $$->push_back(AttrName(state->symbols.create(str->s)));
- delete str;
- } else
- $$->emplace_back(unp($1));
- }
- ;
-
-attr
- : ID
- | OR_KW { $$ = {"or", 2}; }
- ;
-
-string_attr
- : '"' string_parts '"' { $$ = $2; }
- | DOLLAR_CURLY expr '}' { $$ = $2; }
- ;
-
-expr_list
- : expr_list expr_select { $$ = $1; $1->elems.emplace_back($2); /* !!! dangerous */ }
- | { $$ = new ExprList; }
- ;
-
-formals
- : formal ',' formals
- { $$ = $3; $$->formals.emplace_back(std::move(*$1)); delete $1; }
- | formal
- { $$ = new Formals; $$->formals.emplace_back(std::move(*$1)); $$->ellipsis = false; delete $1; }
- |
- { $$ = new Formals; $$->ellipsis = false; }
- | ELLIPSIS
- { $$ = new Formals; $$->ellipsis = true; }
- ;
-
-formal
- : ID { $$ = new Formal{CUR_POS, state->symbols.create($1), nullptr}; }
- | ID '?' expr { $$ = new Formal{CUR_POS, state->symbols.create($1), unp($3)}; }
- ;
-
-%%
-
-#include "eval.hh"
-
-
-namespace nix {
-
-Expr * parseExprFromBuf(
- char * text,
- size_t length,
- Pos::Origin origin,
- const SourcePath & basePath,
- SymbolTable & symbols,
- PosTable & positions,
- const Expr::AstSymbols & astSymbols)
-{
- yyscan_t scanner;
- ParserState state {
- .symbols = symbols,
- .positions = positions,
- .basePath = basePath,
- .origin = positions.addOrigin(origin, length),
- .s = astSymbols,
- };
-
- yylex_init(&scanner);
- Finally _destroy([&] { yylex_destroy(scanner); });
-
- yy_scan_buffer(text, length, scanner);
- yyparse(scanner, &state);
- if (state.error) {
- delete state.result;
- throw *state.error;
- }
-
- return state.result;
-}
-
-
-}
diff --git a/src/libexpr/parser/change_head.hh b/src/libexpr/parser/change_head.hh
new file mode 100644
index 000000000..aab315553
--- /dev/null
+++ b/src/libexpr/parser/change_head.hh
@@ -0,0 +1,66 @@
+#pragma once
+///@file
+
+#include <tao/pegtl.hpp>
+
+namespace nix::parser {
+
+// modified copy of change_state, as the manual suggest for more involved
+// state manipulation. we want to change only the first state parameter,
+// and we care about the *initial* position of a rule application (not the
+// past-the-end position as pegtl change_state provides)
+template<typename NewState>
+struct change_head : tao::pegtl::maybe_nothing
+{
+ template<
+ typename Rule,
+ tao::pegtl::apply_mode A,
+ tao::pegtl::rewind_mode M,
+ template<typename...> class Action,
+ template<typename...> class Control,
+ typename ParseInput,
+ typename State,
+ typename... States
+ >
+ [[nodiscard]] static bool match(ParseInput & in, State && st, States &&... sts)
+ {
+ const auto begin = in.iterator();
+
+ if constexpr (std::is_constructible_v<NewState, State, States...>) {
+ NewState s(st, sts...);
+ if (tao::pegtl::match<Rule, A, M, Action, Control>(in, s, sts...)) {
+ if constexpr (A == tao::pegtl::apply_mode::action) {
+ _success<Action<Rule>>(0, begin, in, s, st, sts...);
+ }
+ return true;
+ }
+ return false;
+ } else if constexpr (std::is_default_constructible_v<NewState>) {
+ NewState s;
+ if (tao::pegtl::match<Rule, A, M, Action, Control>(in, s, sts...)) {
+ if constexpr (A == tao::pegtl::apply_mode::action) {
+ _success<Action<Rule>>(0, begin, in, s, st, sts...);
+ }
+ return true;
+ }
+ return false;
+ } else {
+ static_assert(decltype(sizeof(NewState))(), "unable to instantiate new state");
+ }
+ }
+
+ template<typename Target, typename ParseInput, typename... S>
+ static void _success(void *, auto & begin, ParseInput & in, S & ... sts)
+ {
+ const typename ParseInput::action_t at(begin, in);
+ Target::success(at, sts...);
+ }
+
+ template<typename Target, typename... S>
+ static void _success(decltype(Target::success0(std::declval<S &>()...), 0), auto &, auto &, S & ... sts)
+ {
+ Target::success0(sts...);
+ }
+};
+
+}
diff --git a/src/libexpr/parser/grammar.hh b/src/libexpr/parser/grammar.hh
new file mode 100644
index 000000000..82df63bc5
--- /dev/null
+++ b/src/libexpr/parser/grammar.hh
@@ -0,0 +1,707 @@
+#pragma once
+///@file
+
+#include "tao/pegtl.hpp"
+#include <type_traits>
+#include <variant>
+
+#include <boost/container/small_vector.hpp>
+
+// NOTE
+// nix line endings are \n, \r\n, \r. the grammar does not use eol or
+// eolf rules in favor of reproducing the old flex lexer as faithfully as
+// possible, and deferring calculation of positions to downstream users.
+
+namespace nix::parser::grammar {
+
+using namespace tao::pegtl;
+namespace p = tao::pegtl;
+
+// character classes
+namespace c {
+
+struct path : sor<
+ ranges<'a', 'z', 'A', 'Z', '0', '9'>,
+ one<'.', '_', '-', '+'>
+> {};
+struct path_sep : one<'/'> {};
+
+struct id_first : ranges<'a', 'z', 'A', 'Z', '_'> {};
+struct id_rest : sor<
+ ranges<'a', 'z', 'A', 'Z', '0', '9'>,
+ one<'_', '\'', '-'>
+> {};
+
+struct uri_scheme_first : ranges<'a', 'z', 'A', 'Z'> {};
+struct uri_scheme_rest : sor<
+ ranges<'a', 'z', 'A', 'Z', '0', '9'>,
+ one<'+', '-', '.'>
+> {};
+struct uri_sep : one<':'> {};
+struct uri_rest : sor<
+ ranges<'a', 'z', 'A', 'Z', '0', '9'>,
+ one<'%', '/', '?', ':', '@', '&', '=', '+', '$', ',', '-', '_', '.', '!', '~', '*', '\''>
+> {};
+
+}
+
+// "tokens". PEGs don't really care about tokens, we merely use them as a convenient
+// way of writing down keywords and a couple complicated syntax rules.
+namespace t {
+
+struct _extend_as_path : seq<
+ star<c::path>,
+ not_at<TAO_PEGTL_STRING("/*")>,
+ not_at<TAO_PEGTL_STRING("//")>,
+ c::path_sep,
+ sor<c::path, TAO_PEGTL_STRING("${")>
+> {};
+struct _extend_as_uri : seq<
+ star<c::uri_scheme_rest>,
+ c::uri_sep,
+ c::uri_rest
+> {};
+
+// keywords might be extended to identifiers, paths, or uris.
+// NOTE this assumes that keywords are a-zA-Z only, otherwise uri schemes would never
+// match correctly.
+// NOTE not a simple seq<...> because this would report incorrect positions for
+// keywords used inside must<> if a prefix of the keyword matches.
+template<typename S>
+struct _keyword : sor<
+ seq<
+ S,
+ not_at<c::id_rest>,
+ not_at<_extend_as_path>,
+ not_at<_extend_as_uri>
+ >,
+ failure
+> {};
+
+struct kw_if : _keyword<TAO_PEGTL_STRING("if")> {};
+struct kw_then : _keyword<TAO_PEGTL_STRING("then")> {};
+struct kw_else : _keyword<TAO_PEGTL_STRING("else")> {};
+struct kw_assert : _keyword<TAO_PEGTL_STRING("assert")> {};
+struct kw_with : _keyword<TAO_PEGTL_STRING("with")> {};
+struct kw_let : _keyword<TAO_PEGTL_STRING("let")> {};
+struct kw_in : _keyword<TAO_PEGTL_STRING("in")> {};
+struct kw_rec : _keyword<TAO_PEGTL_STRING("rec")> {};
+struct kw_inherit : _keyword<TAO_PEGTL_STRING("inherit")> {};
+struct kw_or : _keyword<TAO_PEGTL_STRING("or")> {};
+
+// `-` can be a unary prefix op, a binary infix op, or the first character
+// of a path or -> (ex 1->1--1)
+// `/` can be a path leader or an operator (ex a?a /a)
+struct op_minus : seq<one<'-'>, not_at<one<'>'>>, not_at<_extend_as_path>> {};
+struct op_div : seq<one<'/'>, not_at<c::path>> {};
+
+// match a rule, making sure we are not matching it where a keyword would match.
+// using minus like this is a lot faster than flipping the order and using seq.
+template<typename... Rules>
+struct _not_at_any_keyword : minus<
+ seq<Rules...>,
+ sor<
+ TAO_PEGTL_STRING("inherit"),
+ TAO_PEGTL_STRING("assert"),
+ TAO_PEGTL_STRING("else"),
+ TAO_PEGTL_STRING("then"),
+ TAO_PEGTL_STRING("with"),
+ TAO_PEGTL_STRING("let"),
+ TAO_PEGTL_STRING("rec"),
+ TAO_PEGTL_STRING("if"),
+ TAO_PEGTL_STRING("in"),
+ TAO_PEGTL_STRING("or")
+ >
+> {};
+
+// identifiers are kind of horrid:
+//
+// - uri_scheme_first ⊂ id_first
+// - uri_scheme_first ⊂ uri_scheme_rest ⊂ path
+// - id_first ⊂ id_rest ∖ { ' } ⊂ path
+// - id_first ∩ (path ∖ uri_scheme_first) = { _ }
+// - uri_sep ∉ ⋃ { id_first, id_rest, uri_scheme_first, uri_scheme_rest, path }
+// - path_sep ∉ ⋃ { id_first, id_rest, uri_scheme_first, uri_scheme_rest }
+//
+// and we want, without reading the input more than once, a string that
+// matches (id_first id_rest*) and is not followed by any number of
+// characters such that the extended string matches path or uri rules.
+//
+// since the first character must be either _ or a uri scheme character
+// we can ignore path-like bits at the beginning. uri_sep cannot appear anywhere
+// in an identifier, so it's only needed in lookahead checks at the uri-like
+// prefix. likewise path_sep cannot appear anywhere in the idenfier, so it's
+// only needed in lookahead checks in the path-like prefix.
+//
+// in total that gives us a decomposition of
+//
+// (uri-scheme-like? (?! continues-as-uri) | _)
+// (path-segment-like? (?! continues-as-path))
+// id_rest*
+struct identifier : _not_at_any_keyword<
+ // we don't use (at<id_rest>, ...) matches here because identifiers are
+ // a really hot path and rewinding as needed by at<> isn't entirely free.
+ sor<
+ seq<
+ c::uri_scheme_first,
+ star<ranges<'a', 'z', 'A', 'Z', '0', '9', '-'>>,
+ not_at<_extend_as_uri>
+ >,
+ one<'_'>
+ >,
+ star<sor<ranges<'a', 'z', 'A', 'Z', '0', '9'>, one<'_', '-'>>>,
+ not_at<_extend_as_path>,
+ star<c::id_rest>
+> {};
+
+// floats may extend ints, thus these rules are very similar.
+struct integer : seq<
+ sor<
+ seq<range<'1', '9'>, star<digit>, not_at<one<'.'>>>,
+ seq<one<'0'>, not_at<one<'.'>, digit>, star<digit>>
+ >,
+ not_at<_extend_as_path>
+> {};
+
+struct floating : seq<
+ sor<
+ seq<range<'1', '9'>, star<digit>, one<'.'>, star<digit>>,
+ seq<opt<one<'0'>>, one<'.'>, plus<digit>>
+ >,
+ opt<one<'E', 'e'>, opt<one<'+', '-'>>, plus<digit>>,
+ not_at<_extend_as_path>
+> {};
+
+struct uri : seq<
+ c::uri_scheme_first,
+ star<c::uri_scheme_rest>,
+ c::uri_sep,
+ plus<c::uri_rest>
+> {};
+
+struct sep : sor<
+ plus<one<' ', '\t', '\r', '\n'>>,
+ seq<one<'#'>, star<not_one<'\r', '\n'>>>,
+ seq<string<'/', '*'>, until<string<'*', '/'>>>
+> {};
+
+}
+
+
+
+using seps = star<t::sep>;
+
+
+// marker for semantic rules. not handling one of these in an action that cares about
+// semantics is probably an error.
+struct semantic {};
+
+
+struct expr;
+
+struct _string {
+ template<typename... Inner>
+ struct literal : semantic, seq<Inner...> {};
+ struct cr_lf : semantic, seq<one<'\r'>, opt<one<'\n'>>> {};
+ struct interpolation : semantic, seq<
+ p::string<'$', '{'>, seps,
+ must<expr>, seps,
+ must<one<'}'>>
+ > {};
+ struct escape : semantic, must<any> {};
+};
+struct string : _string, seq<
+ one<'"'>,
+ star<
+ sor<
+ _string::literal<plus<not_one<'$', '"', '\\', '\r'>>>,
+ _string::cr_lf,
+ _string::interpolation,
+ _string::literal<one<'$'>, opt<one<'$'>>>,
+ seq<one<'\\'>, _string::escape>
+ >
+ >,
+ must<one<'"'>>
+> {};
+
+struct _ind_string {
+ template<bool Indented, typename... Inner>
+ struct literal : semantic, seq<Inner...> {};
+ struct interpolation : semantic, seq<
+ p::string<'$', '{'>, seps,
+ must<expr>, seps,
+ must<one<'}'>>
+ > {};
+ struct escape : semantic, must<any> {};
+};
+struct ind_string : _ind_string, seq<
+ TAO_PEGTL_STRING("''"),
+ opt<star<one<' '>>, one<'\n'>>,
+ star<
+ sor<
+ _ind_string::literal<
+ true,
+ plus<
+ sor<
+ not_one<'$', '\''>,
+ seq<one<'$'>, not_one<'{', '\''>>,
+ seq<one<'\''>, not_one<'\'', '$'>>
+ >
+ >
+ >,
+ _ind_string::interpolation,
+ _ind_string::literal<false, one<'$'>>,
+ _ind_string::literal<false, one<'\''>, not_at<one<'\''>>>,
+ seq<one<'\''>, _ind_string::literal<false, p::string<'\'', '\''>>>,
+ seq<
+ p::string<'\'', '\''>,
+ sor<
+ _ind_string::literal<false, one<'$'>>,
+ seq<one<'\\'>, _ind_string::escape>
+ >
+ >
+ >
+ >,
+ must<TAO_PEGTL_STRING("''")>
+> {};
+
+struct _path {
+ // legacy lexer rules. extra l_ to avoid reserved c++ identifiers.
+ struct _l_PATH : seq<star<c::path>, plus<c::path_sep, plus<c::path>>, opt<c::path_sep>> {};
+ struct _l_PATH_SEG : seq<star<c::path>, c::path_sep> {};
+ struct _l_HPATH : seq<one<'~'>, plus<c::path_sep, plus<c::path>>, opt<c::path_sep>> {};
+ struct _l_HPATH_START : TAO_PEGTL_STRING("~/") {};
+ struct _path_str : sor<_l_PATH, _l_PATH_SEG, plus<c::path>> {};
+ // modern rules
+ template<typename... Inner>
+ struct literal : semantic, seq<Inner...> {};
+ struct interpolation : semantic, seq<
+ p::string<'$', '{'>, seps,
+ must<expr>, seps,
+ must<one<'}'>>
+ > {};
+ struct anchor : semantic, sor<
+ _l_PATH,
+ seq<_l_PATH_SEG, at<TAO_PEGTL_STRING("${")>>
+ > {};
+ struct home_anchor : semantic, sor<
+ _l_HPATH,
+ seq<_l_HPATH_START, at<TAO_PEGTL_STRING("${")>>
+ > {};
+ struct searched_path : semantic, list<plus<c::path>, c::path_sep> {};
+ struct forbid_prefix_triple_slash : sor<not_at<c::path_sep>, failure> {};
+ struct forbid_prefix_double_slash_no_interp : sor<
+ not_at<c::path_sep, star<c::path>, not_at<TAO_PEGTL_STRING("${")>>,
+ failure
+ > {};
+ // legacy parser rules
+ struct _str_rest : seq<
+ must<forbid_prefix_double_slash_no_interp>,
+ opt<literal<_path_str>>,
+ must<forbid_prefix_triple_slash>,
+ star<
+ sor<
+ literal<_path_str>,
+ interpolation
+ >
+ >
+ > {};
+};
+struct path : _path, sor<
+ seq<
+ sor<_path::anchor, _path::home_anchor>,
+ _path::_str_rest
+ >,
+ seq<one<'<'>, _path::searched_path, one<'>'>>
+> {};
+
+struct _formal {
+ struct name : semantic, t::identifier {};
+ struct default_value : semantic, must<expr> {};
+};
+struct formal : semantic, _formal, seq<
+ _formal::name,
+ opt<seps, one<'?'>, seps, _formal::default_value>
+> {};
+
+struct _formals {
+ struct ellipsis : semantic, p::ellipsis {};
+};
+struct formals : semantic, _formals, seq<
+ one<'{'>, seps,
+ // formals and attrsets share a two-token head sequence ('{' <id>).
+ // this rule unrolls the formals list a bit to provide better error messages than
+ // "expected '='" at the first ',' if formals are incorrect.
+ sor<
+ one<'}'>,
+ seq<_formals::ellipsis, seps, must<one<'}'>>>,
+ seq<
+ formal, seps,
+ if_then_else<
+ at<one<','>>,
+ seq<
+ star<one<','>, seps, formal, seps>,
+ opt<one<','>, seps, opt<_formals::ellipsis, seps>>,
+ must<one<'}'>>
+ >,
+ one<'}'>
+ >
+ >
+ >
+> {};
+
+struct _attr {
+ struct simple : semantic, sor<t::identifier, t::kw_or> {};
+ struct string : semantic, seq<grammar::string> {};
+ struct expr : semantic, seq<
+ TAO_PEGTL_STRING("${"), seps,
+ must<grammar::expr>, seps,
+ must<one<'}'>>
+ > {};
+};
+struct attr : _attr, sor<
+ _attr::simple,
+ _attr::string,
+ _attr::expr
+> {};
+
+struct attrpath : list<attr, one<'.'>, t::sep> {};
+
+struct _inherit {
+ struct from : semantic, must<expr> {};
+ struct attrs : list<attr, seps> {};
+};
+struct inherit : _inherit, seq<
+ t::kw_inherit, seps,
+ opt<one<'('>, seps, _inherit::from, seps, must<one<')'>>, seps>,
+ opt<_inherit::attrs, seps>,
+ must<one<';'>>
+> {};
+
+struct _binding {
+ struct path : semantic, attrpath {};
+ struct equal : one<'='> {};
+ struct value : semantic, must<expr> {};
+};
+struct binding : _binding, seq<
+ _binding::path, seps,
+ must<_binding::equal>, seps,
+ _binding::value, seps,
+ must<one<';'>>
+> {};
+
+struct bindings : opt<list<sor<inherit, binding>, seps>> {};
+
+struct op {
+ enum class kind {
+ // NOTE non-associativity is *NOT* handled in the grammar structure.
+ // handling it in the grammar itself instead of in semantic actions
+ // slows down the parser significantly and makes the rules *much*
+ // harder to read. maybe this will be different at some point when
+ // ! does not sit between two binary precedence levels.
+ nonAssoc,
+ leftAssoc,
+ rightAssoc,
+ unary,
+ };
+ template<typename Rule, unsigned Precedence, kind Kind = kind::leftAssoc>
+ struct _op : Rule {
+ static constexpr unsigned precedence = Precedence;
+ static constexpr op::kind kind = Kind;
+ };
+
+ struct unary_minus : _op<t::op_minus, 3, kind::unary> {};
+
+ // treating this like a unary postfix operator is sketchy, but that's
+ // the most reasonable way to implement the operator precedence set forth
+ // by the language way back. it'd be much better if `.` and `?` had the same
+ // precedence, but alas.
+ struct has_attr : _op<seq<one<'?'>, seps, must<attrpath>>, 4> {};
+
+ struct concat : _op<TAO_PEGTL_STRING("++"), 5, kind::rightAssoc> {};
+ struct mul : _op<one<'*'>, 6> {};
+ struct div : _op<t::op_div, 6> {};
+ struct plus : _op<one<'+'>, 7> {};
+ struct minus : _op<t::op_minus, 7> {};
+ struct not_ : _op<one<'!'>, 8, kind::unary> {};
+ struct update : _op<TAO_PEGTL_STRING("//"), 9, kind::rightAssoc> {};
+ struct less_eq : _op<TAO_PEGTL_STRING("<="), 10, kind::nonAssoc> {};
+ struct greater_eq : _op<TAO_PEGTL_STRING(">="), 10, kind::nonAssoc> {};
+ struct less : _op<one<'<'>, 10, kind::nonAssoc> {};
+ struct greater : _op<one<'>'>, 10, kind::nonAssoc> {};
+ struct equals : _op<TAO_PEGTL_STRING("=="), 11, kind::nonAssoc> {};
+ struct not_equals : _op<TAO_PEGTL_STRING("!="), 11, kind::nonAssoc> {};
+ struct and_ : _op<TAO_PEGTL_STRING("&&"), 12> {};
+ struct or_ : _op<TAO_PEGTL_STRING("||"), 13> {};
+ struct implies : _op<TAO_PEGTL_STRING("->"), 14, kind::rightAssoc> {};
+};
+
+struct _expr {
+ template<template<typename...> class OpenMod = seq, typename... Init>
+ struct _attrset : seq<
+ Init...,
+ OpenMod<one<'{'>>, seps,
+ bindings, seps,
+ must<one<'}'>>
+ > {};
+
+ struct select;
+
+ struct id : semantic, t::identifier {};
+ struct int_ : semantic, t::integer {};
+ struct float_ : semantic, t::floating {};
+ struct string : semantic, seq<grammar::string> {};
+ struct ind_string : semantic, seq<grammar::ind_string> {};
+ struct path : semantic, seq<grammar::path> {};
+ struct uri : semantic, t::uri {};
+ struct ancient_let : semantic, _attrset<must, t::kw_let, seps> {};
+ struct rec_set : semantic, _attrset<must, t::kw_rec, seps> {};
+ struct set : semantic, _attrset<> {};
+
+ struct _list {
+ struct entry : semantic, seq<select> {};
+ };
+ struct list : semantic, _list, seq<
+ one<'['>, seps,
+ opt<p::list<_list::entry, seps>, seps>,
+ must<one<']'>>
+ > {};
+
+ struct _simple : sor<
+ id,
+ int_,
+ float_,
+ string,
+ ind_string,
+ path,
+ uri,
+ seq<one<'('>, seps, must<expr>, seps, must<one<')'>>>,
+ ancient_let,
+ rec_set,
+ set,
+ list
+ > {};
+
+ struct _select {
+ struct head : _simple {};
+ struct attr : semantic, seq<attrpath> {};
+ struct attr_or : semantic, must<select> {};
+ struct as_app_or : semantic, t::kw_or {};
+ };
+ struct _app {
+ struct first_arg : semantic, seq<select> {};
+ struct another_arg : semantic, seq<select> {};
+ // can be used to stash a position of the application head node
+ struct select_or_fn : seq<select> {};
+ };
+
+ struct select : _select, seq<
+ _select::head, seps,
+ opt<
+ sor<
+ seq<
+ one<'.'>, seps, _select::attr,
+ opt<seps, t::kw_or, seps, _select::attr_or>
+ >,
+ _select::as_app_or
+ >
+ >
+ > {};
+
+ struct app : _app, seq<
+ _app::select_or_fn,
+ opt<seps, _app::first_arg, star<seps, _app::another_arg>>
+ > {};
+
+ template<typename Op>
+ struct operator_ : semantic, Op {};
+
+ struct unary : seq<
+ star<sor<operator_<op::not_>, operator_<op::unary_minus>>, seps>,
+ app
+ > {};
+
+ struct _binary_operator : sor<
+ operator_<op::implies>,
+ operator_<op::update>,
+ operator_<op::concat>,
+ operator_<op::plus>,
+ operator_<op::minus>,
+ operator_<op::mul>,
+ operator_<op::div>,
+ operator_<op::less_eq>,
+ operator_<op::greater_eq>,
+ operator_<op::less>,
+ operator_<op::greater>,
+ operator_<op::equals>,
+ operator_<op::not_equals>,
+ operator_<op::or_>,
+ operator_<op::and_>
+ > {};
+
+ struct _binop : seq<
+ unary,
+ star<
+ seps,
+ sor<
+ seq<_binary_operator, seps, must<unary>>,
+ operator_<op::has_attr>
+ >
+ >
+ > {};
+
+ struct _lambda {
+ struct arg : semantic, t::identifier {};
+ };
+ struct lambda : semantic, _lambda, sor<
+ seq<
+ _lambda::arg, seps,
+ sor<
+ seq<one<':'>, seps, must<expr>>,
+ seq<one<'@'>, seps, must<formals, seps, one<':'>, seps, expr>>
+ >
+ >,
+ seq<
+ formals, seps,
+ sor<
+ seq<one<':'>, seps, must<expr>>,
+ seq<one<'@'>, seps, must<_lambda::arg, seps, one<':'>, seps, expr>>
+ >
+ >
+ > {};
+
+ struct assert_ : semantic, seq<
+ t::kw_assert, seps,
+ must<expr>, seps,
+ must<one<';'>>, seps,
+ must<expr>
+ > {};
+ struct with : semantic, seq<
+ t::kw_with, seps,
+ must<expr>, seps,
+ must<one<';'>>, seps,
+ must<expr>
+ > {};
+ struct let : seq<
+ t::kw_let, seps,
+ not_at<one<'{'>>, // exclude ancient_let so we can must<kw_in>
+ bindings, seps,
+ must<t::kw_in>, seps,
+ must<expr>
+ > {};
+ struct if_ : semantic, seq<
+ t::kw_if, seps,
+ must<expr>, seps,
+ must<t::kw_then>, seps,
+ must<expr>, seps,
+ must<t::kw_else>, seps,
+ must<expr>
+ > {};
+};
+struct expr : semantic, _expr, sor<
+ _expr::lambda,
+ _expr::assert_,
+ _expr::with,
+ _expr::let,
+ _expr::if_,
+ _expr::_binop
+> {};
+
+// legacy support: \0 terminates input if passed from flex to bison as a token
+struct eof : sor<p::eof, one<0>> {};
+
+struct root : must<seps, expr, seps, eof> {};
+
+
+
+template<typename Rule>
+struct nothing : p::nothing<Rule> {
+ static_assert(!std::is_base_of_v<semantic, Rule>);
+};
+
+
+
+template<typename Self, typename OpCtx, typename AttrPathT, typename ExprT>
+struct operator_semantics {
+ struct has_attr : grammar::op::has_attr {
+ AttrPathT path;
+ };
+
+ struct OpEntry {
+ OpCtx ctx;
+ uint8_t prec;
+ grammar::op::kind assoc;
+ std::variant<
+ grammar::op::not_,
+ grammar::op::unary_minus,
+ grammar::op::implies,
+ grammar::op::or_,
+ grammar::op::and_,
+ grammar::op::equals,
+ grammar::op::not_equals,
+ grammar::op::less_eq,
+ grammar::op::greater_eq,
+ grammar::op::update,
+ grammar::op::concat,
+ grammar::op::less,
+ grammar::op::greater,
+ grammar::op::plus,
+ grammar::op::minus,
+ grammar::op::mul,
+ grammar::op::div,
+ has_attr
+ > op;
+ };
+
+ // statistics here are taken from nixpkgs commit de502c4d0ba96261e5de803e4d1d1925afd3e22f.
+ // over 99.9% of contexts in nixpkgs need at most 4 slots, ~85% need only 1
+ boost::container::small_vector<ExprT, 4> exprs;
+ // over 99.9% of contexts in nixpkgs need at most 2 slots, ~85% need only 1
+ boost::container::small_vector<OpEntry, 2> ops;
+
+ // derived class is expected to define members:
+ //
+ // ExprT applyOp(OpCtx & pos, auto & op, auto &... args);
+ // [[noreturn]] static void badOperator(OpCtx & pos, auto &... args);
+
+ void reduce(uint8_t toPrecedence, auto &... args) {
+ while (!ops.empty()) {
+ auto & [ctx, precedence, kind, op] = ops.back();
+ // NOTE this relies on associativity not being mixed within a precedence level.
+ if ((precedence > toPrecedence)
+ || (kind != grammar::op::kind::leftAssoc && precedence == toPrecedence))
+ break;
+ std::visit([&, ctx=std::move(ctx)] (auto & op) {
+ exprs.push_back(static_cast<Self &>(*this).applyOp(ctx, op, args...));
+ }, op);
+ ops.pop_back();
+ }
+ }
+
+ ExprT popExpr()
+ {
+ auto r = std::move(exprs.back());
+ exprs.pop_back();
+ return r;
+ }
+
+ void pushOp(OpCtx ctx, auto o, auto &... args)
+ {
+ if (o.kind != grammar::op::kind::unary)
+ reduce(o.precedence, args...);
+ if (!ops.empty() && o.kind == grammar::op::kind::nonAssoc) {
+ auto & [_pos, _prec, _kind, _o] = ops.back();
+ if (_kind == o.kind && _prec == o.precedence)
+ Self::badOperator(ctx, args...);
+ }
+ ops.emplace_back(ctx, o.precedence, o.kind, std::move(o));
+ }
+
+ ExprT finish(auto &... args)
+ {
+ reduce(255, args...);
+ return popExpr();
+ }
+};
+
+}
diff --git a/src/libexpr/parser/parser.cc b/src/libexpr/parser/parser.cc
new file mode 100644
index 000000000..850f1276e
--- /dev/null
+++ b/src/libexpr/parser/parser.cc
@@ -0,0 +1,862 @@
+#include "attr-set.hh"
+#include "error.hh"
+#include "eval-settings.hh"
+#include "eval.hh"
+#include "finally.hh"
+#include "nixexpr.hh"
+#include "symbol-table.hh"
+#include "users.hh"
+
+#include "change_head.hh"
+#include "grammar.hh"
+#include "state.hh"
+
+#include <charconv>
+#include <clocale>
+#include <memory>
+
+// flip this define when doing parser development to enable some g checks.
+#if 0
+#include <tao/pegtl/contrib/analyze.hpp>
+#define ANALYZE_GRAMMAR \
+ ([] { \
+ const std::size_t issues = tao::pegtl::analyze<grammar::root>(); \
+ assert(issues == 0); \
+ })()
+#else
+#define ANALYZE_GRAMMAR ((void) 0)
+#endif
+
+namespace p = tao::pegtl;
+
+namespace nix::parser {
+namespace {
+
+template<typename>
+inline constexpr const char * error_message = nullptr;
+
+#define error_message_for(...) \
+ template<> inline constexpr auto error_message<__VA_ARGS__>
+
+error_message_for(p::one<'{'>) = "expecting '{'";
+error_message_for(p::one<'}'>) = "expecting '}'";
+error_message_for(p::one<'"'>) = "expecting '\"'";
+error_message_for(p::one<';'>) = "expecting ';'";
+error_message_for(p::one<')'>) = "expecting ')'";
+error_message_for(p::one<'='>) = "expecting '='";
+error_message_for(p::one<']'>) = "expecting ']'";
+error_message_for(p::one<':'>) = "expecting ':'";
+error_message_for(p::string<'\'', '\''>) = "expecting \"''\"";
+error_message_for(p::any) = "expecting any character";
+error_message_for(grammar::eof) = "expecting end of file";
+error_message_for(grammar::seps) = "expecting separators";
+error_message_for(grammar::path::forbid_prefix_triple_slash) = "too many slashes in path";
+error_message_for(grammar::path::forbid_prefix_double_slash_no_interp) = "path has a trailing slash";
+error_message_for(grammar::expr) = "expecting expression";
+error_message_for(grammar::expr::unary) = "expecting expression";
+error_message_for(grammar::binding::equal) = "expecting '='";
+error_message_for(grammar::expr::lambda::arg) = "expecting identifier";
+error_message_for(grammar::formals) = "expecting formals";
+error_message_for(grammar::attrpath) = "expecting attribute path";
+error_message_for(grammar::expr::select) = "expecting selection expression";
+error_message_for(grammar::t::kw_then) = "expecting 'then'";
+error_message_for(grammar::t::kw_else) = "expecting 'else'";
+error_message_for(grammar::t::kw_in) = "expecting 'in'";
+
+struct SyntaxErrors
+{
+ template<typename Rule>
+ static constexpr auto message = error_message<Rule>;
+
+ template<typename Rule>
+ static constexpr bool raise_on_failure = false;
+};
+
+template<typename Rule>
+struct Control : p::must_if<SyntaxErrors>::control<Rule>
+{
+ template<typename ParseInput, typename... States>
+ [[noreturn]] static void raise(const ParseInput & in, States &&... st)
+ {
+ if (in.empty()) {
+ std::string expected;
+ if constexpr (constexpr auto msg = error_message<Rule>)
+ expected = fmt(", %s", msg);
+ throw p::parse_error("unexpected end of file" + expected, in);
+ }
+ p::must_if<SyntaxErrors>::control<Rule>::raise(in, st...);
+ }
+};
+
+struct ExprState
+ : grammar::
+ operator_semantics<ExprState, PosIdx, AttrPath, std::pair<PosIdx, std::unique_ptr<Expr>>>
+{
+ std::unique_ptr<Expr> popExprOnly() {
+ return std::move(popExpr().second);
+ }
+
+ template<typename Op, typename... Args>
+ std::unique_ptr<Expr> applyUnary(Args &&... args) {
+ return std::make_unique<Op>(popExprOnly(), std::forward<Args>(args)...);
+ }
+
+ template<typename Op>
+ std::unique_ptr<Expr> applyBinary(PosIdx pos) {
+ auto right = popExprOnly(), left = popExprOnly();
+ return std::make_unique<Op>(pos, std::move(left), std::move(right));
+ }
+
+ std::unique_ptr<Expr> call(PosIdx pos, Symbol fn, bool flip = false)
+ {
+ std::vector<std::unique_ptr<Expr>> args(2);
+ args[flip ? 0 : 1] = popExprOnly();
+ args[flip ? 1 : 0] = popExprOnly();
+ return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(fn), std::move(args));
+ }
+
+ std::unique_ptr<Expr> order(PosIdx pos, bool less, State & state)
+ {
+ return call(pos, state.s.lessThan, !less);
+ }
+
+ std::unique_ptr<Expr> concatStrings(PosIdx pos)
+ {
+ std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>> args(2);
+ args[1] = popExpr();
+ args[0] = popExpr();
+ return std::make_unique<ExprConcatStrings>(pos, false, std::move(args));
+ }
+
+ std::unique_ptr<Expr> negate(PosIdx pos, State & state)
+ {
+ std::vector<std::unique_ptr<Expr>> args(2);
+ args[0] = std::make_unique<ExprInt>(0);
+ args[1] = popExprOnly();
+ return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(state.s.sub), std::move(args));
+ }
+
+ std::pair<PosIdx, std::unique_ptr<Expr>> applyOp(PosIdx pos, auto & op, State & state) {
+ using Op = grammar::op;
+
+ auto not_ = [] (auto e) {
+ return std::make_unique<ExprOpNot>(std::move(e));
+ };
+
+ return {
+ pos,
+ (overloaded {
+ [&] (Op::implies) { return applyBinary<ExprOpImpl>(pos); },
+ [&] (Op::or_) { return applyBinary<ExprOpOr>(pos); },
+ [&] (Op::and_) { return applyBinary<ExprOpAnd>(pos); },
+ [&] (Op::equals) { return applyBinary<ExprOpEq>(pos); },
+ [&] (Op::not_equals) { return applyBinary<ExprOpNEq>(pos); },
+ [&] (Op::less) { return order(pos, true, state); },
+ [&] (Op::greater_eq) { return not_(order(pos, true, state)); },
+ [&] (Op::greater) { return order(pos, false, state); },
+ [&] (Op::less_eq) { return not_(order(pos, false, state)); },
+ [&] (Op::update) { return applyBinary<ExprOpUpdate>(pos); },
+ [&] (Op::not_) { return applyUnary<ExprOpNot>(); },
+ [&] (Op::plus) { return concatStrings(pos); },
+ [&] (Op::minus) { return call(pos, state.s.sub); },
+ [&] (Op::mul) { return call(pos, state.s.mul); },
+ [&] (Op::div) { return call(pos, state.s.div); },
+ [&] (Op::concat) { return applyBinary<ExprOpConcatLists>(pos); },
+ [&] (has_attr & a) { return applyUnary<ExprOpHasAttr>(std::move(a.path)); },
+ [&] (Op::unary_minus) { return negate(pos, state); },
+ })(op)
+ };
+ }
+
+ // always_inline is needed, otherwise pushOp slows down considerably
+ [[noreturn, gnu::always_inline]]
+ static void badOperator(PosIdx pos, State & state)
+ {
+ throw ParseError({
+ .msg = HintFmt("syntax error, unexpected operator"),
+ .pos = state.positions[pos]
+ });
+ }
+
+ template<typename Expr, typename... Args>
+ Expr & pushExpr(PosIdx pos, Args && ... args)
+ {
+ auto p = std::make_unique<Expr>(std::forward<Args>(args)...);
+ auto & result = *p;
+ exprs.emplace_back(pos, std::move(p));
+ return result;
+ }
+};
+
+struct SubexprState {
+private:
+ ExprState * up;
+
+public:
+ explicit SubexprState(ExprState & up, auto &...) : up(&up) {}
+ operator ExprState &() { return *up; }
+ ExprState * operator->() { return up; }
+};
+
+
+
+template<typename Rule>
+struct BuildAST : grammar::nothing<Rule> {};
+
+struct LambdaState : SubexprState {
+ using SubexprState::SubexprState;
+
+ Symbol arg;
+ std::unique_ptr<Formals> formals;
+};
+
+struct FormalsState : SubexprState {
+ using SubexprState::SubexprState;
+
+ Formals formals{};
+ Formal formal{};
+};
+
+template<> struct BuildAST<grammar::formal::name> {
+ static void apply(const auto & in, FormalsState & s, State & ps) {
+ s.formal = {
+ .pos = ps.at(in),
+ .name = ps.symbols.create(in.string_view()),
+ };
+ }
+};
+
+template<> struct BuildAST<grammar::formal> {
+ static void apply0(FormalsState & s, State &) {
+ s.formals.formals.emplace_back(std::move(s.formal));
+ }
+};
+
+template<> struct BuildAST<grammar::formal::default_value> {
+ static void apply0(FormalsState & s, State & ps) {
+ s.formal.def = s->popExprOnly();
+ }
+};
+
+template<> struct BuildAST<grammar::formals::ellipsis> {
+ static void apply0(FormalsState & s, State &) {
+ s.formals.ellipsis = true;
+ }
+};
+
+template<> struct BuildAST<grammar::formals> : change_head<FormalsState> {
+ static void success0(FormalsState & f, LambdaState & s, State &) {
+ s.formals = std::make_unique<Formals>(std::move(f.formals));
+ }
+};
+
+struct AttrState : SubexprState {
+ using SubexprState::SubexprState;
+
+ std::vector<AttrName> attrs;
+
+ void pushAttr(auto && attr, PosIdx) { attrs.emplace_back(std::move(attr)); }
+};
+
+template<> struct BuildAST<grammar::attr::simple> {
+ static void apply(const auto & in, auto & s, State & ps) {
+ s.pushAttr(ps.symbols.create(in.string_view()), ps.at(in));
+ }
+};
+
+template<> struct BuildAST<grammar::attr::string> {
+ static void apply(const auto & in, auto & s, State & ps) {
+ auto e = s->popExprOnly();
+ if (auto str = dynamic_cast<ExprString *>(e.get()))
+ s.pushAttr(ps.symbols.create(str->s), ps.at(in));
+ else
+ s.pushAttr(std::move(e), ps.at(in));
+ }
+};
+
+template<> struct BuildAST<grammar::attr::expr> : BuildAST<grammar::attr::string> {};
+
+struct BindingsState : SubexprState {
+ using SubexprState::SubexprState;
+
+ ExprAttrs attrs;
+ AttrPath path;
+ std::unique_ptr<Expr> value;
+};
+
+struct InheritState : SubexprState {
+ using SubexprState::SubexprState;
+
+ std::vector<std::pair<AttrName, PosIdx>> attrs;
+ std::unique_ptr<Expr> from;
+ PosIdx fromPos;
+
+ void pushAttr(auto && attr, PosIdx pos) { attrs.emplace_back(std::move(attr), pos); }
+};
+
+template<> struct BuildAST<grammar::inherit::from> {
+ static void apply(const auto & in, InheritState & s, State & ps) {
+ s.from = s->popExprOnly();
+ s.fromPos = ps.at(in);
+ }
+};
+
+template<> struct BuildAST<grammar::inherit> : change_head<InheritState> {
+ static void success0(InheritState & s, BindingsState & b, State & ps) {
+ auto & attrs = b.attrs.attrs;
+ // TODO this should not reuse generic attrpath rules.
+ for (auto & [i, iPos] : s.attrs) {
+ if (i.symbol)
+ continue;
+ if (auto str = dynamic_cast<ExprString *>(i.expr.get()))
+ i = AttrName(ps.symbols.create(str->s));
+ else {
+ throw ParseError({
+ .msg = HintFmt("dynamic attributes not allowed in inherit"),
+ .pos = ps.positions[iPos]
+ });
+ }
+ }
+ if (auto fromE = std::move(s.from)) {
+ if (!b.attrs.inheritFromExprs)
+ b.attrs.inheritFromExprs = std::make_unique<std::vector<std::unique_ptr<Expr>>>();
+ b.attrs.inheritFromExprs->push_back(std::move(fromE));
+ for (auto & [i, iPos] : s.attrs) {
+ if (attrs.find(i.symbol) != attrs.end())
+ ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos);
+ auto from = std::make_unique<ExprInheritFrom>(s.fromPos, b.attrs.inheritFromExprs->size() - 1);
+ attrs.emplace(
+ i.symbol,
+ ExprAttrs::AttrDef(
+ std::make_unique<ExprSelect>(iPos, std::move(from), i.symbol),
+ iPos,
+ ExprAttrs::AttrDef::Kind::InheritedFrom));
+ }
+ } else {
+ for (auto & [i, iPos] : s.attrs) {
+ if (attrs.find(i.symbol) != attrs.end())
+ ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos);
+ attrs.emplace(
+ i.symbol,
+ ExprAttrs::AttrDef(
+ std::make_unique<ExprVar>(iPos, i.symbol),
+ iPos,
+ ExprAttrs::AttrDef::Kind::Inherited));
+ }
+ }
+ }
+};
+
+template<> struct BuildAST<grammar::binding::path> : change_head<AttrState> {
+ static void success0(AttrState & a, BindingsState & s, State & ps) {
+ s.path = std::move(a.attrs);
+ }
+};
+
+template<> struct BuildAST<grammar::binding::value> {
+ static void apply0(BindingsState & s, State & ps) {
+ s.value = s->popExprOnly();
+ }
+};
+
+template<> struct BuildAST<grammar::binding> {
+ static void apply(const auto & in, BindingsState & s, State & ps) {
+ ps.addAttr(&s.attrs, std::move(s.path), std::move(s.value), ps.at(in));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::id> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ if (in.string_view() == "__curPos")
+ s.pushExpr<ExprPos>(ps.at(in), ps.at(in));
+ else
+ s.pushExpr<ExprVar>(ps.at(in), ps.at(in), ps.symbols.create(in.string_view()));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::int_> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ int64_t v;
+ if (std::from_chars(in.begin(), in.end(), v).ec != std::errc{}) {
+ throw ParseError({
+ .msg = HintFmt("invalid integer '%1%'", in.string_view()),
+ .pos = ps.positions[ps.at(in)],
+ });
+ }
+ s.pushExpr<ExprInt>(noPos, v);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::float_> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ // copy the input into a temporary string so we can call stod.
+ // can't use from_chars because libc++ (thus darwin) does not have it,
+ // and floats are not performance-sensitive anyway. if they were you'd
+ // be in much bigger trouble than this.
+ //
+ // we also get to do a locale-save dance because stod is locale-aware and
+ // something (a plugin?) may have called setlocale or uselocale.
+ static struct locale_hack {
+ locale_t posix;
+ locale_hack(): posix(newlocale(LC_ALL_MASK, "POSIX", 0))
+ {
+ if (posix == 0)
+ throw SysError("could not get POSIX locale");
+ }
+ } locale;
+
+ auto tmp = in.string();
+ double v = [&] {
+ auto oldLocale = uselocale(locale.posix);
+ Finally resetLocale([=] { uselocale(oldLocale); });
+ try {
+ return std::stod(tmp);
+ } catch (...) {
+ throw ParseError({
+ .msg = HintFmt("invalid float '%1%'", in.string_view()),
+ .pos = ps.positions[ps.at(in)],
+ });
+ }
+ }();
+ s.pushExpr<ExprFloat>(noPos, v);
+ }
+};
+
+struct StringState : SubexprState {
+ using SubexprState::SubexprState;
+
+ std::string currentLiteral;
+ PosIdx currentPos;
+ std::vector<std::pair<nix::PosIdx, std::unique_ptr<Expr>>> parts;
+
+ void append(PosIdx pos, std::string_view s)
+ {
+ if (currentLiteral.empty())
+ currentPos = pos;
+ currentLiteral += s;
+ }
+
+ // FIXME this truncates strings on NUL for compat with the old parser. ideally
+ // we should use the decomposition the g gives us instead of iterating over
+ // the entire string again.
+ static void unescapeStr(std::string & str)
+ {
+ char * s = str.data();
+ char * t = s;
+ char c;
+ while ((c = *s++)) {
+ if (c == '\\') {
+ c = *s++;
+ if (c == 'n') *t = '\n';
+ else if (c == 'r') *t = '\r';
+ else if (c == 't') *t = '\t';
+ else *t = c;
+ }
+ else if (c == '\r') {
+ /* Normalise CR and CR/LF into LF. */
+ *t = '\n';
+ if (*s == '\n') s++; /* cr/lf */
+ }
+ else *t = c;
+ t++;
+ }
+ str.resize(t - str.data());
+ }
+
+ void endLiteral()
+ {
+ if (!currentLiteral.empty()) {
+ unescapeStr(currentLiteral);
+ parts.emplace_back(currentPos, std::make_unique<ExprString>(std::move(currentLiteral)));
+ }
+ }
+
+ std::unique_ptr<Expr> finish()
+ {
+ if (parts.empty()) {
+ unescapeStr(currentLiteral);
+ return std::make_unique<ExprString>(std::move(currentLiteral));
+ } else {
+ endLiteral();
+ auto pos = parts[0].first;
+ return std::make_unique<ExprConcatStrings>(pos, true, std::move(parts));
+ }
+ }
+};
+
+template<typename... Content> struct BuildAST<grammar::string::literal<Content...>> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ s.append(ps.at(in), in.string_view());
+ }
+};
+
+template<> struct BuildAST<grammar::string::cr_lf> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ s.append(ps.at(in), in.string_view()); // FIXME compat with old parser
+ }
+};
+
+template<> struct BuildAST<grammar::string::interpolation> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ s.endLiteral();
+ s.parts.emplace_back(ps.at(in), s->popExprOnly());
+ }
+};
+
+template<> struct BuildAST<grammar::string::escape> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ s.append(ps.at(in), "\\"); // FIXME compat with old parser
+ s.append(ps.at(in), in.string_view());
+ }
+};
+
+template<> struct BuildAST<grammar::string> : change_head<StringState> {
+ static void success0(StringState & s, ExprState & e, State &) {
+ e.exprs.emplace_back(noPos, s.finish());
+ }
+};
+
+struct IndStringState : SubexprState {
+ using SubexprState::SubexprState;
+
+ std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> parts;
+};
+
+template<bool Indented, typename... Content>
+struct BuildAST<grammar::ind_string::literal<Indented, Content...>> {
+ static void apply(const auto & in, IndStringState & s, State & ps) {
+ s.parts.emplace_back(ps.at(in), StringToken{in.string_view(), Indented});
+ }
+};
+
+template<> struct BuildAST<grammar::ind_string::interpolation> {
+ static void apply(const auto & in, IndStringState & s, State & ps) {
+ s.parts.emplace_back(ps.at(in), s->popExprOnly());
+ }
+};
+
+template<> struct BuildAST<grammar::ind_string::escape> {
+ static void apply(const auto & in, IndStringState & s, State & ps) {
+ switch (*in.begin()) {
+ case 'n': s.parts.emplace_back(ps.at(in), StringToken{"\n"}); break;
+ case 'r': s.parts.emplace_back(ps.at(in), StringToken{"\r"}); break;
+ case 't': s.parts.emplace_back(ps.at(in), StringToken{"\t"}); break;
+ default: s.parts.emplace_back(ps.at(in), StringToken{in.string_view()}); break;
+ }
+ }
+};
+
+template<> struct BuildAST<grammar::ind_string> : change_head<IndStringState> {
+ static void success(const auto & in, IndStringState & s, ExprState & e, State & ps) {
+ e.exprs.emplace_back(noPos, ps.stripIndentation(ps.at(in), std::move(s.parts)));
+ }
+};
+
+template<typename... Content> struct BuildAST<grammar::path::literal<Content...>> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ s.append(ps.at(in), in.string_view());
+ s.endLiteral();
+ }
+};
+
+template<> struct BuildAST<grammar::path::interpolation> : BuildAST<grammar::string::interpolation> {};
+
+template<> struct BuildAST<grammar::path::anchor> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ Path path(absPath(in.string(), ps.basePath.path.abs()));
+ /* add back in the trailing '/' to the first segment */
+ if (in.string_view().ends_with('/') && in.size() > 1)
+ path += "/";
+ s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path)));
+ }
+};
+
+template<> struct BuildAST<grammar::path::home_anchor> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ if (evalSettings.pureEval)
+ throw Error("the path '%s' can not be resolved in pure mode", in.string_view());
+ Path path(getHome() + in.string_view().substr(1));
+ s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path)));
+ }
+};
+
+template<> struct BuildAST<grammar::path::searched_path> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ std::vector<std::unique_ptr<Expr>> args{2};
+ args[0] = std::make_unique<ExprVar>(ps.s.nixPath);
+ args[1] = std::make_unique<ExprString>(in.string());
+ s.parts.emplace_back(
+ ps.at(in),
+ std::make_unique<ExprCall>(
+ ps.at(in),
+ std::make_unique<ExprVar>(ps.s.findFile),
+ std::move(args)));
+ }
+};
+
+template<> struct BuildAST<grammar::path> : change_head<StringState> {
+ template<typename E>
+ static void check_slash(PosIdx end, StringState & s, State & ps) {
+ auto e = dynamic_cast<E *>(s.parts.back().second.get());
+ if (!e || !e->s.ends_with('/'))
+ return;
+ if (s.parts.size() > 1 || e->s != "/")
+ throw ParseError({
+ .msg = HintFmt("path has a trailing slash"),
+ .pos = ps.positions[end],
+ });
+ }
+
+ static void success(const auto & in, StringState & s, ExprState & e, State & ps) {
+ s.endLiteral();
+ check_slash<ExprPath>(ps.atEnd(in), s, ps);
+ check_slash<ExprString>(ps.atEnd(in), s, ps);
+ if (s.parts.size() == 1) {
+ e.exprs.emplace_back(noPos, std::move(s.parts.back().second));
+ } else {
+ e.pushExpr<ExprConcatStrings>(ps.at(in), ps.at(in), false, std::move(s.parts));
+ }
+ }
+};
+
+// strings and paths sare handled fully by the grammar-level rule for now
+template<> struct BuildAST<grammar::expr::string> : p::maybe_nothing {};
+template<> struct BuildAST<grammar::expr::ind_string> : p::maybe_nothing {};
+template<> struct BuildAST<grammar::expr::path> : p::maybe_nothing {};
+
+template<> struct BuildAST<grammar::expr::uri> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ static bool noURLLiterals = experimentalFeatureSettings.isEnabled(Xp::NoUrlLiterals);
+ if (noURLLiterals)
+ throw ParseError({
+ .msg = HintFmt("URL literals are disabled"),
+ .pos = ps.positions[ps.at(in)]
+ });
+ s.pushExpr<ExprString>(ps.at(in), in.string());
+ }
+};
+
+template<> struct BuildAST<grammar::expr::ancient_let> : change_head<BindingsState> {
+ static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+ b.attrs.pos = ps.at(in);
+ b.attrs.recursive = true;
+ s.pushExpr<ExprSelect>(b.attrs.pos, b.attrs.pos, std::make_unique<ExprAttrs>(std::move(b.attrs)), ps.s.body);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::rec_set> : change_head<BindingsState> {
+ static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+ b.attrs.pos = ps.at(in);
+ b.attrs.recursive = true;
+ s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::set> : change_head<BindingsState> {
+ static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+ b.attrs.pos = ps.at(in);
+ s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs));
+ }
+};
+
+using ListState = std::vector<std::unique_ptr<Expr>>;
+
+template<> struct BuildAST<grammar::expr::list> : change_head<ListState> {
+ static void success(const auto & in, ListState & ls, ExprState & s, State & ps) {
+ auto e = std::make_unique<ExprList>();
+ e->elems = std::move(ls);
+ s.exprs.emplace_back(ps.at(in), std::move(e));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::list::entry> : change_head<ExprState> {
+ static void success0(ExprState & e, ListState & s, State & ps) {
+ s.emplace_back(e.finish(ps).second);
+ }
+};
+
+struct SelectState : SubexprState {
+ using SubexprState::SubexprState;
+
+ PosIdx pos;
+ ExprSelect * e = nullptr;
+};
+
+template<> struct BuildAST<grammar::expr::select::head> {
+ static void apply(const auto & in, SelectState & s, State & ps) {
+ s.pos = ps.at(in);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::select::attr> : change_head<AttrState> {
+ static void success0(AttrState & a, SelectState & s, State &) {
+ s.e = &s->pushExpr<ExprSelect>(s.pos, s.pos, s->popExprOnly(), std::move(a.attrs), nullptr);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::select::attr_or> {
+ static void apply0(SelectState & s, State &) {
+ s.e->def = s->popExprOnly();
+ }
+};
+
+template<> struct BuildAST<grammar::expr::select::as_app_or> {
+ static void apply(const auto & in, SelectState & s, State & ps) {
+ std::vector<std::unique_ptr<Expr>> args(1);
+ args[0] = std::make_unique<ExprVar>(ps.at(in), ps.s.or_);
+ s->pushExpr<ExprCall>(s.pos, s.pos, s->popExprOnly(), std::move(args));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::select> : change_head<SelectState> {
+ static void success0(const auto &...) {}
+};
+
+struct AppState : SubexprState {
+ using SubexprState::SubexprState;
+
+ PosIdx pos;
+ ExprCall * e = nullptr;
+};
+
+template<> struct BuildAST<grammar::expr::app::select_or_fn> {
+ static void apply(const auto & in, AppState & s, State & ps) {
+ s.pos = ps.at(in);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::app::first_arg> {
+ static void apply(auto & in, AppState & s, State & ps) {
+ auto arg = s->popExprOnly(), fn = s->popExprOnly();
+ if ((s.e = dynamic_cast<ExprCall *>(fn.get()))) {
+ // TODO remove.
+ // AST compat with old parser, semantics are the same.
+ // this can happen on occasions such as `<p> <p>` or `a or b or`,
+ // neither of which are super worth optimizing.
+ s.e->args.push_back(std::move(arg));
+ s->exprs.emplace_back(noPos, std::move(fn));
+ } else {
+ std::vector<std::unique_ptr<Expr>> args{1};
+ args[0] = std::move(arg);
+ s.e = &s->pushExpr<ExprCall>(s.pos, s.pos, std::move(fn), std::move(args));
+ }
+ }
+};
+
+template<> struct BuildAST<grammar::expr::app::another_arg> {
+ static void apply0(AppState & s, State & ps) {
+ s.e->args.push_back(s->popExprOnly());
+ }
+};
+
+template<> struct BuildAST<grammar::expr::app> : change_head<AppState> {
+ static void success0(const auto &...) {}
+};
+
+template<typename Op> struct BuildAST<grammar::expr::operator_<Op>> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ s.pushOp(ps.at(in), Op{}, ps);
+ }
+};
+template<> struct BuildAST<grammar::expr::operator_<grammar::op::has_attr>> : change_head<AttrState> {
+ static void success(const auto & in, AttrState & a, ExprState & s, State & ps) {
+ s.pushOp(ps.at(in), ExprState::has_attr{{}, std::move(a.attrs)}, ps);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::lambda::arg> {
+ static void apply(const auto & in, LambdaState & s, State & ps) {
+ s.arg = ps.symbols.create(in.string_view());
+ }
+};
+
+template<> struct BuildAST<grammar::expr::lambda> : change_head<LambdaState> {
+ static void success(const auto & in, LambdaState & l, ExprState & s, State & ps) {
+ if (l.formals)
+ l.formals = ps.validateFormals(std::move(l.formals), ps.at(in), l.arg);
+ s.pushExpr<ExprLambda>(ps.at(in), ps.at(in), l.arg, std::move(l.formals), l->popExprOnly());
+ }
+};
+
+template<> struct BuildAST<grammar::expr::assert_> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ auto body = s.popExprOnly(), cond = s.popExprOnly();
+ s.pushExpr<ExprAssert>(ps.at(in), ps.at(in), std::move(cond), std::move(body));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::with> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ auto body = s.popExprOnly(), scope = s.popExprOnly();
+ s.pushExpr<ExprWith>(ps.at(in), ps.at(in), std::move(scope), std::move(body));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::let> : change_head<BindingsState> {
+ static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+ if (!b.attrs.dynamicAttrs.empty())
+ throw ParseError({
+ .msg = HintFmt("dynamic attributes not allowed in let"),
+ .pos = ps.positions[ps.at(in)]
+ });
+
+ s.pushExpr<ExprLet>(ps.at(in), std::make_unique<ExprAttrs>(std::move(b.attrs)), b->popExprOnly());
+ }
+};
+
+template<> struct BuildAST<grammar::expr::if_> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ auto else_ = s.popExprOnly(), then = s.popExprOnly(), cond = s.popExprOnly();
+ s.pushExpr<ExprIf>(ps.at(in), ps.at(in), std::move(cond), std::move(then), std::move(else_));
+ }
+};
+
+template<> struct BuildAST<grammar::expr> : change_head<ExprState> {
+ static void success0(ExprState & inner, ExprState & outer, State & ps) {
+ outer.exprs.push_back(inner.finish(ps));
+ }
+};
+
+}
+}
+
+namespace nix {
+
+Expr * EvalState::parse(
+ char * text,
+ size_t length,
+ Pos::Origin origin,
+ const SourcePath & basePath,
+ std::shared_ptr<StaticEnv> & staticEnv)
+{
+ parser::State s = {
+ symbols,
+ positions,
+ basePath,
+ positions.addOrigin(origin, length),
+ exprSymbols,
+ };
+ parser::ExprState x;
+
+ assert(length >= 2);
+ assert(text[length - 1] == 0);
+ assert(text[length - 2] == 0);
+ length -= 2;
+
+ p::string_input<p::tracking_mode::lazy> inp{std::string_view{text, length}, "input"};
+ try {
+ p::parse<parser::grammar::root, parser::BuildAST, parser::Control>(inp, x, s);
+ } catch (p::parse_error & e) {
+ auto pos = e.positions().back();
+ throw ParseError({
+ .msg = HintFmt("syntax error, %s", e.message()),
+ .pos = positions[s.positions.add(s.origin, pos.byte)]
+ });
+ }
+
+ auto [_pos, result] = x.finish(s);
+ result->bindVars(*this, staticEnv);
+ return result.release();
+}
+
+}
diff --git a/src/libexpr/parser-state.hh b/src/libexpr/parser/state.hh
index cb1f12230..f5a0428d7 100644
--- a/src/libexpr/parser-state.hh
+++ b/src/libexpr/parser/state.hh
@@ -3,77 +3,61 @@
#include "eval.hh"
-namespace nix {
+namespace nix::parser {
-/**
- * @note Storing a C-style `char *` and `size_t` allows us to avoid
- * having to define the special members that using string_view here
- * would implicitly delete.
- */
struct StringToken
{
- const char * p;
- size_t l;
+ std::string_view s;
bool hasIndentation;
- operator std::string_view() const { return {p, l}; }
+ operator std::string_view() const { return s; }
};
-struct ParserLocation
-{
- int first_line, first_column;
- int last_line, last_column;
-
- // backup to recover from yyless(0)
- int stashed_first_column, stashed_last_column;
-
- void stash() {
- stashed_first_column = first_column;
- stashed_last_column = last_column;
- }
-
- void unstash() {
- first_column = stashed_first_column;
- last_column = stashed_last_column;
- }
-};
-
-struct ParserState
+struct State
{
SymbolTable & symbols;
PosTable & positions;
- Expr * result;
SourcePath basePath;
PosTable::Origin origin;
const Expr::AstSymbols & s;
- std::unique_ptr<Error> error;
- [[nodiscard]] ParseError dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos);
- [[nodiscard]] ParseError dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos);
- [[nodiscard]] std::optional<ParseError> addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos);
- [[nodiscard]] std::optional<ParseError> validateFormals(Formals * formals, PosIdx pos = noPos, Symbol arg = {});
+ void dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos);
+ void dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos);
+ void addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos);
+ std::unique_ptr<Formals> validateFormals(std::unique_ptr<Formals> formals, PosIdx pos = noPos, Symbol arg = {});
std::unique_ptr<Expr> stripIndentation(const PosIdx pos,
std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> && es);
- PosIdx at(const ParserLocation & loc);
+
+ // lazy positioning means we don't get byte offsets directly, in.position() would work
+ // but also requires line and column (which is expensive)
+ PosIdx at(const auto & in)
+ {
+ return positions.add(origin, in.begin() - in.input().begin());
+ }
+
+ PosIdx atEnd(const auto & in)
+ {
+ return positions.add(origin, in.end() - in.input().begin());
+ }
};
-inline ParseError ParserState::dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos)
+inline void State::dupAttr(const AttrPath & attrPath, const PosIdx pos, const PosIdx prevPos)
{
- return ParseError({
+ throw ParseError({
.msg = HintFmt("attribute '%1%' already defined at %2%",
showAttrPath(symbols, attrPath), positions[prevPos]),
.pos = positions[pos]
});
}
-inline ParseError ParserState::dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos)
+inline void State::dupAttr(Symbol attr, const PosIdx pos, const PosIdx prevPos)
{
- return ParseError({
+ throw ParseError({
.msg = HintFmt("attribute '%1%' already defined at %2%", symbols[attr], positions[prevPos]),
.pos = positions[pos]
});
}
-inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos)
+inline void State::addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos)
{
AttrPath::iterator i;
// All attrpaths have at least one attr
@@ -88,12 +72,12 @@ inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPat
ExprAttrs * attrs2 = dynamic_cast<ExprAttrs *>(j->second.e.get());
if (!attrs2) {
attrPath.erase(i + 1, attrPath.end());
- return dupAttr(attrPath, pos, j->second.pos);
+ dupAttr(attrPath, pos, j->second.pos);
}
attrs = attrs2;
} else {
attrPath.erase(i + 1, attrPath.end());
- return dupAttr(attrPath, pos, j->second.pos);
+ dupAttr(attrPath, pos, j->second.pos);
}
} else {
auto next = attrs->attrs.emplace(std::piecewise_construct,
@@ -135,7 +119,7 @@ inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPat
if (ae->inheritFromExprs)
std::ranges::move(*ae->inheritFromExprs, std::back_inserter(*jAttrs->inheritFromExprs));
} else {
- return dupAttr(attrPath, pos, j->second.pos);
+ dupAttr(attrPath, pos, j->second.pos);
}
} else {
// This attr path is not defined. Let's create it.
@@ -147,11 +131,9 @@ inline std::optional<ParseError> ParserState::addAttr(ExprAttrs * attrs, AttrPat
} else {
attrs->dynamicAttrs.emplace_back(std::move(i->expr), std::move(e), pos);
}
-
- return {};
}
-inline std::optional<ParseError> ParserState::validateFormals(Formals * formals, PosIdx pos, Symbol arg)
+inline std::unique_ptr<Formals> State::validateFormals(std::unique_ptr<Formals> formals, PosIdx pos, Symbol arg)
{
std::sort(formals->formals.begin(), formals->formals.end(),
[] (const auto & a, const auto & b) {
@@ -166,21 +148,21 @@ inline std::optional<ParseError> ParserState::validateFormals(Formals * formals,
duplicate = std::min(thisDup, duplicate.value_or(thisDup));
}
if (duplicate)
- return ParseError({
+ throw ParseError({
.msg = HintFmt("duplicate formal function argument '%1%'", symbols[duplicate->first]),
.pos = positions[duplicate->second]
});
if (arg && formals->has(arg))
- return ParseError({
+ throw ParseError({
.msg = HintFmt("duplicate formal function argument '%1%'", symbols[arg]),
.pos = positions[pos]
});
- return {};
+ return formals;
}
-inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos,
+inline std::unique_ptr<Expr> State::stripIndentation(const PosIdx pos,
std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> && es)
{
if (es.empty()) return std::make_unique<ExprString>("");
@@ -201,11 +183,11 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos,
}
continue;
}
- for (size_t j = 0; j < str->l; ++j) {
+ for (size_t j = 0; j < str->s.size(); ++j) {
if (atStartOfLine) {
- if (str->p[j] == ' ')
+ if (str->s[j] == ' ')
curIndent++;
- else if (str->p[j] == '\n') {
+ else if (str->s[j] == '\n') {
/* Empty line, doesn't influence minimum
indentation. */
curIndent = 0;
@@ -213,7 +195,7 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos,
atStartOfLine = false;
if (curIndent < minIndent) minIndent = curIndent;
}
- } else if (str->p[j] == '\n') {
+ } else if (str->s[j] == '\n') {
atStartOfLine = true;
curIndent = 0;
}
@@ -233,23 +215,23 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos,
};
const auto trimString = [&] (const StringToken t) {
std::string s2;
- for (size_t j = 0; j < t.l; ++j) {
+ for (size_t j = 0; j < t.s.size(); ++j) {
if (atStartOfLine) {
- if (t.p[j] == ' ') {
+ if (t.s[j] == ' ') {
if (curDropped++ >= minIndent)
- s2 += t.p[j];
+ s2 += t.s[j];
}
- else if (t.p[j] == '\n') {
+ else if (t.s[j] == '\n') {
curDropped = 0;
- s2 += t.p[j];
+ s2 += t.s[j];
} else {
atStartOfLine = false;
curDropped = 0;
- s2 += t.p[j];
+ s2 += t.s[j];
}
} else {
- s2 += t.p[j];
- if (t.p[j] == '\n') atStartOfLine = true;
+ s2 += t.s[j];
+ if (t.s[j] == '\n') atStartOfLine = true;
}
}
@@ -274,9 +256,4 @@ inline std::unique_ptr<Expr> ParserState::stripIndentation(const PosIdx pos,
return std::make_unique<ExprConcatStrings>(pos, true, std::move(es2));
}
-inline PosIdx ParserState::at(const ParserLocation & loc)
-{
- return positions.add(origin, loc.first_column);
-}
-
}
diff --git a/tests/functional/lang/parse-fail-eof-pos.err.exp b/tests/functional/lang/parse-fail-eof-pos.err.exp
index ef9ca381c..fc56897e4 100644
--- a/tests/functional/lang/parse-fail-eof-pos.err.exp
+++ b/tests/functional/lang/parse-fail-eof-pos.err.exp
@@ -1,4 +1,4 @@
-error: syntax error, unexpected end of file
+error: syntax error, unexpected end of file, expecting expression
at «stdin»:3:1:
2| # no content
3|
diff --git a/tests/functional/lang/parse-fail-undef-var-2.err.exp b/tests/functional/lang/parse-fail-undef-var-2.err.exp
index 393c454dd..d1728f125 100644
--- a/tests/functional/lang/parse-fail-undef-var-2.err.exp
+++ b/tests/functional/lang/parse-fail-undef-var-2.err.exp
@@ -1,4 +1,4 @@
-error: syntax error, unexpected ':', expecting '}'
+error: syntax error, expecting '}'
at «stdin»:3:13:
2|
3| f = {x, y : ["baz" "bar" z "bat"]}: x + y;
diff --git a/tests/functional/lang/parse-fail-utf8.err.exp b/tests/functional/lang/parse-fail-utf8.err.exp
index 1c83f6eb3..81df9dc8a 100644
--- a/tests/functional/lang/parse-fail-utf8.err.exp
+++ b/tests/functional/lang/parse-fail-utf8.err.exp
@@ -1,4 +1,4 @@
-error: syntax error, unexpected invalid token, expecting end of file
+error: syntax error, expecting end of file
at «stdin»:1:5:
1| 123 é 4
| ^