aboutsummaryrefslogtreecommitdiff
path: root/src/libexpr/parser/parser.cc
diff options
context:
space:
mode:
authoreldritch horrors <pennae@lix.systems>2024-06-16 23:10:09 +0200
committereldritch horrors <pennae@lix.systems>2024-06-25 12:24:58 +0000
commite6cd67591b44b4902bac73febcab3c4d96724aea (patch)
tree94c8ad90b8e756c5b00b8d68b2adf13c0f2febd9 /src/libexpr/parser/parser.cc
parentc097ebe66bf474da886ffa20d2f31bdb1d2196a8 (diff)
libexpr: rewrite the parser with pegtl instead of flex/bison
this gives about 20% performance improvements on pure parsing. obviously it will be less on full eval, but depending on how much parsing is to be done (e.g. including hackage-packages.nix or not) it's more like 4%-10%. this has been tested (with thousands of core hours of fuzzing) to ensure that the ASTs produced by the new parser are exactly the same as the old one would have produced. error messages will change (sometimes by a lot) and are not yet perfect, but we would rather leave this as is for later. test results for running only the parser (excluding the variable binding code) in a tight loop with inputs and parameters as given are promising: - 40% faster on lix's package.nix at 10000 iterations - 1.3% faster on nixpkgs all-packages.nix at 1000 iterations - equivalent on all of nixpkgs concatenated at 100 iterations (excluding invalid files, each file surrounded with parens) more realistic benchmarks are somewhere in between the extremes, parsing once again getting the largest uplift. other realistic workloads improve by a few percentage points as well, notably system builds are 4% faster. Benchmarks summary (from ./bench/summarize.jq bench/bench-*.json) old/bin/nix --extra-experimental-features 'nix-command flakes' eval -f bench/nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix mean: 0.408s ± 0.025s user: 0.355s | system: 0.033s median: 0.389s range: 0.388s ... 0.442s relative: 1 new/bin/nix --extra-experimental-features 'nix-command flakes' eval -f bench/nixpkgs/pkgs/development/haskell-modules/hackage-packages.nix mean: 0.332s ± 0.024s user: 0.279s | system: 0.033s median: 0.314s range: 0.313s ... 0.361s relative: 0.814 --- old/bin/nix --extra-experimental-features 'nix-command flakes' eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' mean: 6.133s ± 0.022s user: 5.395s | system: 0.437s median: 6.128s range: 6.099s ... 6.183s relative: 1 new/bin/nix --extra-experimental-features 'nix-command flakes' eval --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' mean: 5.925s ± 0.025s user: 5.176s | system: 0.456s median: 5.934s range: 5.861s ... 5.943s relative: 0.966 --- GC_INITIAL_HEAP_SIZE=10g old/bin/nix eval --extra-experimental-features 'nix-command flakes' --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' mean: 4.503s ± 0.027s user: 3.731s | system: 0.547s median: 4.499s range: 4.478s ... 4.541s relative: 1 GC_INITIAL_HEAP_SIZE=10g new/bin/nix eval --extra-experimental-features 'nix-command flakes' --raw --impure --expr 'with import <nixpkgs/nixos> {}; system' mean: 4.285s ± 0.031s user: 3.504s | system: 0.571s median: 4.281s range: 4.221s ... 4.328s relative: 0.951 --- old/bin/nix --extra-experimental-features 'nix-command flakes' search --no-eval-cache github:nixos/nixpkgs/e1fa12d4f6c6fe19ccb59cac54b5b3f25e160870 hello mean: 16.475s ± 0.07s user: 14.088s | system: 1.572s median: 16.495s range: 16.351s ... 16.536s relative: 1 new/bin/nix --extra-experimental-features 'nix-command flakes' search --no-eval-cache github:nixos/nixpkgs/e1fa12d4f6c6fe19ccb59cac54b5b3f25e160870 hello mean: 15.973s ± 0.013s user: 13.558s | system: 1.615s median: 15.973s range: 15.946s ... 15.99s relative: 0.97 --- Change-Id: Ie66ec2d045dec964632c6541e25f8f0797319ee2
Diffstat (limited to 'src/libexpr/parser/parser.cc')
-rw-r--r--src/libexpr/parser/parser.cc862
1 files changed, 862 insertions, 0 deletions
diff --git a/src/libexpr/parser/parser.cc b/src/libexpr/parser/parser.cc
new file mode 100644
index 000000000..850f1276e
--- /dev/null
+++ b/src/libexpr/parser/parser.cc
@@ -0,0 +1,862 @@
+#include "attr-set.hh"
+#include "error.hh"
+#include "eval-settings.hh"
+#include "eval.hh"
+#include "finally.hh"
+#include "nixexpr.hh"
+#include "symbol-table.hh"
+#include "users.hh"
+
+#include "change_head.hh"
+#include "grammar.hh"
+#include "state.hh"
+
+#include <charconv>
+#include <clocale>
+#include <memory>
+
+// flip this define when doing parser development to enable some g checks.
+#if 0
+#include <tao/pegtl/contrib/analyze.hpp>
+#define ANALYZE_GRAMMAR \
+ ([] { \
+ const std::size_t issues = tao::pegtl::analyze<grammar::root>(); \
+ assert(issues == 0); \
+ })()
+#else
+#define ANALYZE_GRAMMAR ((void) 0)
+#endif
+
+namespace p = tao::pegtl;
+
+namespace nix::parser {
+namespace {
+
+template<typename>
+inline constexpr const char * error_message = nullptr;
+
+#define error_message_for(...) \
+ template<> inline constexpr auto error_message<__VA_ARGS__>
+
+error_message_for(p::one<'{'>) = "expecting '{'";
+error_message_for(p::one<'}'>) = "expecting '}'";
+error_message_for(p::one<'"'>) = "expecting '\"'";
+error_message_for(p::one<';'>) = "expecting ';'";
+error_message_for(p::one<')'>) = "expecting ')'";
+error_message_for(p::one<'='>) = "expecting '='";
+error_message_for(p::one<']'>) = "expecting ']'";
+error_message_for(p::one<':'>) = "expecting ':'";
+error_message_for(p::string<'\'', '\''>) = "expecting \"''\"";
+error_message_for(p::any) = "expecting any character";
+error_message_for(grammar::eof) = "expecting end of file";
+error_message_for(grammar::seps) = "expecting separators";
+error_message_for(grammar::path::forbid_prefix_triple_slash) = "too many slashes in path";
+error_message_for(grammar::path::forbid_prefix_double_slash_no_interp) = "path has a trailing slash";
+error_message_for(grammar::expr) = "expecting expression";
+error_message_for(grammar::expr::unary) = "expecting expression";
+error_message_for(grammar::binding::equal) = "expecting '='";
+error_message_for(grammar::expr::lambda::arg) = "expecting identifier";
+error_message_for(grammar::formals) = "expecting formals";
+error_message_for(grammar::attrpath) = "expecting attribute path";
+error_message_for(grammar::expr::select) = "expecting selection expression";
+error_message_for(grammar::t::kw_then) = "expecting 'then'";
+error_message_for(grammar::t::kw_else) = "expecting 'else'";
+error_message_for(grammar::t::kw_in) = "expecting 'in'";
+
+struct SyntaxErrors
+{
+ template<typename Rule>
+ static constexpr auto message = error_message<Rule>;
+
+ template<typename Rule>
+ static constexpr bool raise_on_failure = false;
+};
+
+template<typename Rule>
+struct Control : p::must_if<SyntaxErrors>::control<Rule>
+{
+ template<typename ParseInput, typename... States>
+ [[noreturn]] static void raise(const ParseInput & in, States &&... st)
+ {
+ if (in.empty()) {
+ std::string expected;
+ if constexpr (constexpr auto msg = error_message<Rule>)
+ expected = fmt(", %s", msg);
+ throw p::parse_error("unexpected end of file" + expected, in);
+ }
+ p::must_if<SyntaxErrors>::control<Rule>::raise(in, st...);
+ }
+};
+
+struct ExprState
+ : grammar::
+ operator_semantics<ExprState, PosIdx, AttrPath, std::pair<PosIdx, std::unique_ptr<Expr>>>
+{
+ std::unique_ptr<Expr> popExprOnly() {
+ return std::move(popExpr().second);
+ }
+
+ template<typename Op, typename... Args>
+ std::unique_ptr<Expr> applyUnary(Args &&... args) {
+ return std::make_unique<Op>(popExprOnly(), std::forward<Args>(args)...);
+ }
+
+ template<typename Op>
+ std::unique_ptr<Expr> applyBinary(PosIdx pos) {
+ auto right = popExprOnly(), left = popExprOnly();
+ return std::make_unique<Op>(pos, std::move(left), std::move(right));
+ }
+
+ std::unique_ptr<Expr> call(PosIdx pos, Symbol fn, bool flip = false)
+ {
+ std::vector<std::unique_ptr<Expr>> args(2);
+ args[flip ? 0 : 1] = popExprOnly();
+ args[flip ? 1 : 0] = popExprOnly();
+ return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(fn), std::move(args));
+ }
+
+ std::unique_ptr<Expr> order(PosIdx pos, bool less, State & state)
+ {
+ return call(pos, state.s.lessThan, !less);
+ }
+
+ std::unique_ptr<Expr> concatStrings(PosIdx pos)
+ {
+ std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>> args(2);
+ args[1] = popExpr();
+ args[0] = popExpr();
+ return std::make_unique<ExprConcatStrings>(pos, false, std::move(args));
+ }
+
+ std::unique_ptr<Expr> negate(PosIdx pos, State & state)
+ {
+ std::vector<std::unique_ptr<Expr>> args(2);
+ args[0] = std::make_unique<ExprInt>(0);
+ args[1] = popExprOnly();
+ return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(state.s.sub), std::move(args));
+ }
+
+ std::pair<PosIdx, std::unique_ptr<Expr>> applyOp(PosIdx pos, auto & op, State & state) {
+ using Op = grammar::op;
+
+ auto not_ = [] (auto e) {
+ return std::make_unique<ExprOpNot>(std::move(e));
+ };
+
+ return {
+ pos,
+ (overloaded {
+ [&] (Op::implies) { return applyBinary<ExprOpImpl>(pos); },
+ [&] (Op::or_) { return applyBinary<ExprOpOr>(pos); },
+ [&] (Op::and_) { return applyBinary<ExprOpAnd>(pos); },
+ [&] (Op::equals) { return applyBinary<ExprOpEq>(pos); },
+ [&] (Op::not_equals) { return applyBinary<ExprOpNEq>(pos); },
+ [&] (Op::less) { return order(pos, true, state); },
+ [&] (Op::greater_eq) { return not_(order(pos, true, state)); },
+ [&] (Op::greater) { return order(pos, false, state); },
+ [&] (Op::less_eq) { return not_(order(pos, false, state)); },
+ [&] (Op::update) { return applyBinary<ExprOpUpdate>(pos); },
+ [&] (Op::not_) { return applyUnary<ExprOpNot>(); },
+ [&] (Op::plus) { return concatStrings(pos); },
+ [&] (Op::minus) { return call(pos, state.s.sub); },
+ [&] (Op::mul) { return call(pos, state.s.mul); },
+ [&] (Op::div) { return call(pos, state.s.div); },
+ [&] (Op::concat) { return applyBinary<ExprOpConcatLists>(pos); },
+ [&] (has_attr & a) { return applyUnary<ExprOpHasAttr>(std::move(a.path)); },
+ [&] (Op::unary_minus) { return negate(pos, state); },
+ })(op)
+ };
+ }
+
+ // always_inline is needed, otherwise pushOp slows down considerably
+ [[noreturn, gnu::always_inline]]
+ static void badOperator(PosIdx pos, State & state)
+ {
+ throw ParseError({
+ .msg = HintFmt("syntax error, unexpected operator"),
+ .pos = state.positions[pos]
+ });
+ }
+
+ template<typename Expr, typename... Args>
+ Expr & pushExpr(PosIdx pos, Args && ... args)
+ {
+ auto p = std::make_unique<Expr>(std::forward<Args>(args)...);
+ auto & result = *p;
+ exprs.emplace_back(pos, std::move(p));
+ return result;
+ }
+};
+
+struct SubexprState {
+private:
+ ExprState * up;
+
+public:
+ explicit SubexprState(ExprState & up, auto &...) : up(&up) {}
+ operator ExprState &() { return *up; }
+ ExprState * operator->() { return up; }
+};
+
+
+
+template<typename Rule>
+struct BuildAST : grammar::nothing<Rule> {};
+
+struct LambdaState : SubexprState {
+ using SubexprState::SubexprState;
+
+ Symbol arg;
+ std::unique_ptr<Formals> formals;
+};
+
+struct FormalsState : SubexprState {
+ using SubexprState::SubexprState;
+
+ Formals formals{};
+ Formal formal{};
+};
+
+template<> struct BuildAST<grammar::formal::name> {
+ static void apply(const auto & in, FormalsState & s, State & ps) {
+ s.formal = {
+ .pos = ps.at(in),
+ .name = ps.symbols.create(in.string_view()),
+ };
+ }
+};
+
+template<> struct BuildAST<grammar::formal> {
+ static void apply0(FormalsState & s, State &) {
+ s.formals.formals.emplace_back(std::move(s.formal));
+ }
+};
+
+template<> struct BuildAST<grammar::formal::default_value> {
+ static void apply0(FormalsState & s, State & ps) {
+ s.formal.def = s->popExprOnly();
+ }
+};
+
+template<> struct BuildAST<grammar::formals::ellipsis> {
+ static void apply0(FormalsState & s, State &) {
+ s.formals.ellipsis = true;
+ }
+};
+
+template<> struct BuildAST<grammar::formals> : change_head<FormalsState> {
+ static void success0(FormalsState & f, LambdaState & s, State &) {
+ s.formals = std::make_unique<Formals>(std::move(f.formals));
+ }
+};
+
+struct AttrState : SubexprState {
+ using SubexprState::SubexprState;
+
+ std::vector<AttrName> attrs;
+
+ void pushAttr(auto && attr, PosIdx) { attrs.emplace_back(std::move(attr)); }
+};
+
+template<> struct BuildAST<grammar::attr::simple> {
+ static void apply(const auto & in, auto & s, State & ps) {
+ s.pushAttr(ps.symbols.create(in.string_view()), ps.at(in));
+ }
+};
+
+template<> struct BuildAST<grammar::attr::string> {
+ static void apply(const auto & in, auto & s, State & ps) {
+ auto e = s->popExprOnly();
+ if (auto str = dynamic_cast<ExprString *>(e.get()))
+ s.pushAttr(ps.symbols.create(str->s), ps.at(in));
+ else
+ s.pushAttr(std::move(e), ps.at(in));
+ }
+};
+
+template<> struct BuildAST<grammar::attr::expr> : BuildAST<grammar::attr::string> {};
+
+struct BindingsState : SubexprState {
+ using SubexprState::SubexprState;
+
+ ExprAttrs attrs;
+ AttrPath path;
+ std::unique_ptr<Expr> value;
+};
+
+struct InheritState : SubexprState {
+ using SubexprState::SubexprState;
+
+ std::vector<std::pair<AttrName, PosIdx>> attrs;
+ std::unique_ptr<Expr> from;
+ PosIdx fromPos;
+
+ void pushAttr(auto && attr, PosIdx pos) { attrs.emplace_back(std::move(attr), pos); }
+};
+
+template<> struct BuildAST<grammar::inherit::from> {
+ static void apply(const auto & in, InheritState & s, State & ps) {
+ s.from = s->popExprOnly();
+ s.fromPos = ps.at(in);
+ }
+};
+
+template<> struct BuildAST<grammar::inherit> : change_head<InheritState> {
+ static void success0(InheritState & s, BindingsState & b, State & ps) {
+ auto & attrs = b.attrs.attrs;
+ // TODO this should not reuse generic attrpath rules.
+ for (auto & [i, iPos] : s.attrs) {
+ if (i.symbol)
+ continue;
+ if (auto str = dynamic_cast<ExprString *>(i.expr.get()))
+ i = AttrName(ps.symbols.create(str->s));
+ else {
+ throw ParseError({
+ .msg = HintFmt("dynamic attributes not allowed in inherit"),
+ .pos = ps.positions[iPos]
+ });
+ }
+ }
+ if (auto fromE = std::move(s.from)) {
+ if (!b.attrs.inheritFromExprs)
+ b.attrs.inheritFromExprs = std::make_unique<std::vector<std::unique_ptr<Expr>>>();
+ b.attrs.inheritFromExprs->push_back(std::move(fromE));
+ for (auto & [i, iPos] : s.attrs) {
+ if (attrs.find(i.symbol) != attrs.end())
+ ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos);
+ auto from = std::make_unique<ExprInheritFrom>(s.fromPos, b.attrs.inheritFromExprs->size() - 1);
+ attrs.emplace(
+ i.symbol,
+ ExprAttrs::AttrDef(
+ std::make_unique<ExprSelect>(iPos, std::move(from), i.symbol),
+ iPos,
+ ExprAttrs::AttrDef::Kind::InheritedFrom));
+ }
+ } else {
+ for (auto & [i, iPos] : s.attrs) {
+ if (attrs.find(i.symbol) != attrs.end())
+ ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos);
+ attrs.emplace(
+ i.symbol,
+ ExprAttrs::AttrDef(
+ std::make_unique<ExprVar>(iPos, i.symbol),
+ iPos,
+ ExprAttrs::AttrDef::Kind::Inherited));
+ }
+ }
+ }
+};
+
+template<> struct BuildAST<grammar::binding::path> : change_head<AttrState> {
+ static void success0(AttrState & a, BindingsState & s, State & ps) {
+ s.path = std::move(a.attrs);
+ }
+};
+
+template<> struct BuildAST<grammar::binding::value> {
+ static void apply0(BindingsState & s, State & ps) {
+ s.value = s->popExprOnly();
+ }
+};
+
+template<> struct BuildAST<grammar::binding> {
+ static void apply(const auto & in, BindingsState & s, State & ps) {
+ ps.addAttr(&s.attrs, std::move(s.path), std::move(s.value), ps.at(in));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::id> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ if (in.string_view() == "__curPos")
+ s.pushExpr<ExprPos>(ps.at(in), ps.at(in));
+ else
+ s.pushExpr<ExprVar>(ps.at(in), ps.at(in), ps.symbols.create(in.string_view()));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::int_> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ int64_t v;
+ if (std::from_chars(in.begin(), in.end(), v).ec != std::errc{}) {
+ throw ParseError({
+ .msg = HintFmt("invalid integer '%1%'", in.string_view()),
+ .pos = ps.positions[ps.at(in)],
+ });
+ }
+ s.pushExpr<ExprInt>(noPos, v);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::float_> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ // copy the input into a temporary string so we can call stod.
+ // can't use from_chars because libc++ (thus darwin) does not have it,
+ // and floats are not performance-sensitive anyway. if they were you'd
+ // be in much bigger trouble than this.
+ //
+ // we also get to do a locale-save dance because stod is locale-aware and
+ // something (a plugin?) may have called setlocale or uselocale.
+ static struct locale_hack {
+ locale_t posix;
+ locale_hack(): posix(newlocale(LC_ALL_MASK, "POSIX", 0))
+ {
+ if (posix == 0)
+ throw SysError("could not get POSIX locale");
+ }
+ } locale;
+
+ auto tmp = in.string();
+ double v = [&] {
+ auto oldLocale = uselocale(locale.posix);
+ Finally resetLocale([=] { uselocale(oldLocale); });
+ try {
+ return std::stod(tmp);
+ } catch (...) {
+ throw ParseError({
+ .msg = HintFmt("invalid float '%1%'", in.string_view()),
+ .pos = ps.positions[ps.at(in)],
+ });
+ }
+ }();
+ s.pushExpr<ExprFloat>(noPos, v);
+ }
+};
+
+struct StringState : SubexprState {
+ using SubexprState::SubexprState;
+
+ std::string currentLiteral;
+ PosIdx currentPos;
+ std::vector<std::pair<nix::PosIdx, std::unique_ptr<Expr>>> parts;
+
+ void append(PosIdx pos, std::string_view s)
+ {
+ if (currentLiteral.empty())
+ currentPos = pos;
+ currentLiteral += s;
+ }
+
+ // FIXME this truncates strings on NUL for compat with the old parser. ideally
+ // we should use the decomposition the g gives us instead of iterating over
+ // the entire string again.
+ static void unescapeStr(std::string & str)
+ {
+ char * s = str.data();
+ char * t = s;
+ char c;
+ while ((c = *s++)) {
+ if (c == '\\') {
+ c = *s++;
+ if (c == 'n') *t = '\n';
+ else if (c == 'r') *t = '\r';
+ else if (c == 't') *t = '\t';
+ else *t = c;
+ }
+ else if (c == '\r') {
+ /* Normalise CR and CR/LF into LF. */
+ *t = '\n';
+ if (*s == '\n') s++; /* cr/lf */
+ }
+ else *t = c;
+ t++;
+ }
+ str.resize(t - str.data());
+ }
+
+ void endLiteral()
+ {
+ if (!currentLiteral.empty()) {
+ unescapeStr(currentLiteral);
+ parts.emplace_back(currentPos, std::make_unique<ExprString>(std::move(currentLiteral)));
+ }
+ }
+
+ std::unique_ptr<Expr> finish()
+ {
+ if (parts.empty()) {
+ unescapeStr(currentLiteral);
+ return std::make_unique<ExprString>(std::move(currentLiteral));
+ } else {
+ endLiteral();
+ auto pos = parts[0].first;
+ return std::make_unique<ExprConcatStrings>(pos, true, std::move(parts));
+ }
+ }
+};
+
+template<typename... Content> struct BuildAST<grammar::string::literal<Content...>> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ s.append(ps.at(in), in.string_view());
+ }
+};
+
+template<> struct BuildAST<grammar::string::cr_lf> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ s.append(ps.at(in), in.string_view()); // FIXME compat with old parser
+ }
+};
+
+template<> struct BuildAST<grammar::string::interpolation> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ s.endLiteral();
+ s.parts.emplace_back(ps.at(in), s->popExprOnly());
+ }
+};
+
+template<> struct BuildAST<grammar::string::escape> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ s.append(ps.at(in), "\\"); // FIXME compat with old parser
+ s.append(ps.at(in), in.string_view());
+ }
+};
+
+template<> struct BuildAST<grammar::string> : change_head<StringState> {
+ static void success0(StringState & s, ExprState & e, State &) {
+ e.exprs.emplace_back(noPos, s.finish());
+ }
+};
+
+struct IndStringState : SubexprState {
+ using SubexprState::SubexprState;
+
+ std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> parts;
+};
+
+template<bool Indented, typename... Content>
+struct BuildAST<grammar::ind_string::literal<Indented, Content...>> {
+ static void apply(const auto & in, IndStringState & s, State & ps) {
+ s.parts.emplace_back(ps.at(in), StringToken{in.string_view(), Indented});
+ }
+};
+
+template<> struct BuildAST<grammar::ind_string::interpolation> {
+ static void apply(const auto & in, IndStringState & s, State & ps) {
+ s.parts.emplace_back(ps.at(in), s->popExprOnly());
+ }
+};
+
+template<> struct BuildAST<grammar::ind_string::escape> {
+ static void apply(const auto & in, IndStringState & s, State & ps) {
+ switch (*in.begin()) {
+ case 'n': s.parts.emplace_back(ps.at(in), StringToken{"\n"}); break;
+ case 'r': s.parts.emplace_back(ps.at(in), StringToken{"\r"}); break;
+ case 't': s.parts.emplace_back(ps.at(in), StringToken{"\t"}); break;
+ default: s.parts.emplace_back(ps.at(in), StringToken{in.string_view()}); break;
+ }
+ }
+};
+
+template<> struct BuildAST<grammar::ind_string> : change_head<IndStringState> {
+ static void success(const auto & in, IndStringState & s, ExprState & e, State & ps) {
+ e.exprs.emplace_back(noPos, ps.stripIndentation(ps.at(in), std::move(s.parts)));
+ }
+};
+
+template<typename... Content> struct BuildAST<grammar::path::literal<Content...>> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ s.append(ps.at(in), in.string_view());
+ s.endLiteral();
+ }
+};
+
+template<> struct BuildAST<grammar::path::interpolation> : BuildAST<grammar::string::interpolation> {};
+
+template<> struct BuildAST<grammar::path::anchor> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ Path path(absPath(in.string(), ps.basePath.path.abs()));
+ /* add back in the trailing '/' to the first segment */
+ if (in.string_view().ends_with('/') && in.size() > 1)
+ path += "/";
+ s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path)));
+ }
+};
+
+template<> struct BuildAST<grammar::path::home_anchor> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ if (evalSettings.pureEval)
+ throw Error("the path '%s' can not be resolved in pure mode", in.string_view());
+ Path path(getHome() + in.string_view().substr(1));
+ s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path)));
+ }
+};
+
+template<> struct BuildAST<grammar::path::searched_path> {
+ static void apply(const auto & in, StringState & s, State & ps) {
+ std::vector<std::unique_ptr<Expr>> args{2};
+ args[0] = std::make_unique<ExprVar>(ps.s.nixPath);
+ args[1] = std::make_unique<ExprString>(in.string());
+ s.parts.emplace_back(
+ ps.at(in),
+ std::make_unique<ExprCall>(
+ ps.at(in),
+ std::make_unique<ExprVar>(ps.s.findFile),
+ std::move(args)));
+ }
+};
+
+template<> struct BuildAST<grammar::path> : change_head<StringState> {
+ template<typename E>
+ static void check_slash(PosIdx end, StringState & s, State & ps) {
+ auto e = dynamic_cast<E *>(s.parts.back().second.get());
+ if (!e || !e->s.ends_with('/'))
+ return;
+ if (s.parts.size() > 1 || e->s != "/")
+ throw ParseError({
+ .msg = HintFmt("path has a trailing slash"),
+ .pos = ps.positions[end],
+ });
+ }
+
+ static void success(const auto & in, StringState & s, ExprState & e, State & ps) {
+ s.endLiteral();
+ check_slash<ExprPath>(ps.atEnd(in), s, ps);
+ check_slash<ExprString>(ps.atEnd(in), s, ps);
+ if (s.parts.size() == 1) {
+ e.exprs.emplace_back(noPos, std::move(s.parts.back().second));
+ } else {
+ e.pushExpr<ExprConcatStrings>(ps.at(in), ps.at(in), false, std::move(s.parts));
+ }
+ }
+};
+
+// strings and paths sare handled fully by the grammar-level rule for now
+template<> struct BuildAST<grammar::expr::string> : p::maybe_nothing {};
+template<> struct BuildAST<grammar::expr::ind_string> : p::maybe_nothing {};
+template<> struct BuildAST<grammar::expr::path> : p::maybe_nothing {};
+
+template<> struct BuildAST<grammar::expr::uri> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ static bool noURLLiterals = experimentalFeatureSettings.isEnabled(Xp::NoUrlLiterals);
+ if (noURLLiterals)
+ throw ParseError({
+ .msg = HintFmt("URL literals are disabled"),
+ .pos = ps.positions[ps.at(in)]
+ });
+ s.pushExpr<ExprString>(ps.at(in), in.string());
+ }
+};
+
+template<> struct BuildAST<grammar::expr::ancient_let> : change_head<BindingsState> {
+ static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+ b.attrs.pos = ps.at(in);
+ b.attrs.recursive = true;
+ s.pushExpr<ExprSelect>(b.attrs.pos, b.attrs.pos, std::make_unique<ExprAttrs>(std::move(b.attrs)), ps.s.body);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::rec_set> : change_head<BindingsState> {
+ static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+ b.attrs.pos = ps.at(in);
+ b.attrs.recursive = true;
+ s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::set> : change_head<BindingsState> {
+ static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+ b.attrs.pos = ps.at(in);
+ s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs));
+ }
+};
+
+using ListState = std::vector<std::unique_ptr<Expr>>;
+
+template<> struct BuildAST<grammar::expr::list> : change_head<ListState> {
+ static void success(const auto & in, ListState & ls, ExprState & s, State & ps) {
+ auto e = std::make_unique<ExprList>();
+ e->elems = std::move(ls);
+ s.exprs.emplace_back(ps.at(in), std::move(e));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::list::entry> : change_head<ExprState> {
+ static void success0(ExprState & e, ListState & s, State & ps) {
+ s.emplace_back(e.finish(ps).second);
+ }
+};
+
+struct SelectState : SubexprState {
+ using SubexprState::SubexprState;
+
+ PosIdx pos;
+ ExprSelect * e = nullptr;
+};
+
+template<> struct BuildAST<grammar::expr::select::head> {
+ static void apply(const auto & in, SelectState & s, State & ps) {
+ s.pos = ps.at(in);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::select::attr> : change_head<AttrState> {
+ static void success0(AttrState & a, SelectState & s, State &) {
+ s.e = &s->pushExpr<ExprSelect>(s.pos, s.pos, s->popExprOnly(), std::move(a.attrs), nullptr);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::select::attr_or> {
+ static void apply0(SelectState & s, State &) {
+ s.e->def = s->popExprOnly();
+ }
+};
+
+template<> struct BuildAST<grammar::expr::select::as_app_or> {
+ static void apply(const auto & in, SelectState & s, State & ps) {
+ std::vector<std::unique_ptr<Expr>> args(1);
+ args[0] = std::make_unique<ExprVar>(ps.at(in), ps.s.or_);
+ s->pushExpr<ExprCall>(s.pos, s.pos, s->popExprOnly(), std::move(args));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::select> : change_head<SelectState> {
+ static void success0(const auto &...) {}
+};
+
+struct AppState : SubexprState {
+ using SubexprState::SubexprState;
+
+ PosIdx pos;
+ ExprCall * e = nullptr;
+};
+
+template<> struct BuildAST<grammar::expr::app::select_or_fn> {
+ static void apply(const auto & in, AppState & s, State & ps) {
+ s.pos = ps.at(in);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::app::first_arg> {
+ static void apply(auto & in, AppState & s, State & ps) {
+ auto arg = s->popExprOnly(), fn = s->popExprOnly();
+ if ((s.e = dynamic_cast<ExprCall *>(fn.get()))) {
+ // TODO remove.
+ // AST compat with old parser, semantics are the same.
+ // this can happen on occasions such as `<p> <p>` or `a or b or`,
+ // neither of which are super worth optimizing.
+ s.e->args.push_back(std::move(arg));
+ s->exprs.emplace_back(noPos, std::move(fn));
+ } else {
+ std::vector<std::unique_ptr<Expr>> args{1};
+ args[0] = std::move(arg);
+ s.e = &s->pushExpr<ExprCall>(s.pos, s.pos, std::move(fn), std::move(args));
+ }
+ }
+};
+
+template<> struct BuildAST<grammar::expr::app::another_arg> {
+ static void apply0(AppState & s, State & ps) {
+ s.e->args.push_back(s->popExprOnly());
+ }
+};
+
+template<> struct BuildAST<grammar::expr::app> : change_head<AppState> {
+ static void success0(const auto &...) {}
+};
+
+template<typename Op> struct BuildAST<grammar::expr::operator_<Op>> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ s.pushOp(ps.at(in), Op{}, ps);
+ }
+};
+template<> struct BuildAST<grammar::expr::operator_<grammar::op::has_attr>> : change_head<AttrState> {
+ static void success(const auto & in, AttrState & a, ExprState & s, State & ps) {
+ s.pushOp(ps.at(in), ExprState::has_attr{{}, std::move(a.attrs)}, ps);
+ }
+};
+
+template<> struct BuildAST<grammar::expr::lambda::arg> {
+ static void apply(const auto & in, LambdaState & s, State & ps) {
+ s.arg = ps.symbols.create(in.string_view());
+ }
+};
+
+template<> struct BuildAST<grammar::expr::lambda> : change_head<LambdaState> {
+ static void success(const auto & in, LambdaState & l, ExprState & s, State & ps) {
+ if (l.formals)
+ l.formals = ps.validateFormals(std::move(l.formals), ps.at(in), l.arg);
+ s.pushExpr<ExprLambda>(ps.at(in), ps.at(in), l.arg, std::move(l.formals), l->popExprOnly());
+ }
+};
+
+template<> struct BuildAST<grammar::expr::assert_> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ auto body = s.popExprOnly(), cond = s.popExprOnly();
+ s.pushExpr<ExprAssert>(ps.at(in), ps.at(in), std::move(cond), std::move(body));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::with> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ auto body = s.popExprOnly(), scope = s.popExprOnly();
+ s.pushExpr<ExprWith>(ps.at(in), ps.at(in), std::move(scope), std::move(body));
+ }
+};
+
+template<> struct BuildAST<grammar::expr::let> : change_head<BindingsState> {
+ static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) {
+ if (!b.attrs.dynamicAttrs.empty())
+ throw ParseError({
+ .msg = HintFmt("dynamic attributes not allowed in let"),
+ .pos = ps.positions[ps.at(in)]
+ });
+
+ s.pushExpr<ExprLet>(ps.at(in), std::make_unique<ExprAttrs>(std::move(b.attrs)), b->popExprOnly());
+ }
+};
+
+template<> struct BuildAST<grammar::expr::if_> {
+ static void apply(const auto & in, ExprState & s, State & ps) {
+ auto else_ = s.popExprOnly(), then = s.popExprOnly(), cond = s.popExprOnly();
+ s.pushExpr<ExprIf>(ps.at(in), ps.at(in), std::move(cond), std::move(then), std::move(else_));
+ }
+};
+
+template<> struct BuildAST<grammar::expr> : change_head<ExprState> {
+ static void success0(ExprState & inner, ExprState & outer, State & ps) {
+ outer.exprs.push_back(inner.finish(ps));
+ }
+};
+
+}
+}
+
+namespace nix {
+
+Expr * EvalState::parse(
+ char * text,
+ size_t length,
+ Pos::Origin origin,
+ const SourcePath & basePath,
+ std::shared_ptr<StaticEnv> & staticEnv)
+{
+ parser::State s = {
+ symbols,
+ positions,
+ basePath,
+ positions.addOrigin(origin, length),
+ exprSymbols,
+ };
+ parser::ExprState x;
+
+ assert(length >= 2);
+ assert(text[length - 1] == 0);
+ assert(text[length - 2] == 0);
+ length -= 2;
+
+ p::string_input<p::tracking_mode::lazy> inp{std::string_view{text, length}, "input"};
+ try {
+ p::parse<parser::grammar::root, parser::BuildAST, parser::Control>(inp, x, s);
+ } catch (p::parse_error & e) {
+ auto pos = e.positions().back();
+ throw ParseError({
+ .msg = HintFmt("syntax error, %s", e.message()),
+ .pos = positions[s.positions.add(s.origin, pos.byte)]
+ });
+ }
+
+ auto [_pos, result] = x.finish(s);
+ result->bindVars(*this, staticEnv);
+ return result.release();
+}
+
+}