diff options
Diffstat (limited to 'src/libexpr')
-rw-r--r-- | src/libexpr/eval-cache.cc | 6 | ||||
-rw-r--r-- | src/libexpr/nixexpr.cc | 29 | ||||
-rw-r--r-- | src/libexpr/nixexpr.hh | 4 | ||||
-rw-r--r-- | src/libexpr/parser/grammar.hh | 122 | ||||
-rw-r--r-- | src/libexpr/parser/parser-impl1.inc.cc | 863 | ||||
-rw-r--r-- | src/libexpr/parser/parser.cc | 868 | ||||
-rw-r--r-- | src/libexpr/parser/state.hh | 168 | ||||
-rw-r--r-- | src/libexpr/primops/fetchTree.cc | 3 |
8 files changed, 1061 insertions, 1002 deletions
diff --git a/src/libexpr/eval-cache.cc b/src/libexpr/eval-cache.cc index 83bfd4fb0..e7336c7e8 100644 --- a/src/libexpr/eval-cache.cc +++ b/src/libexpr/eval-cache.cc @@ -79,7 +79,7 @@ struct AttrDb state->txn->commit(); state->txn.reset(); } catch (...) { - ignoreException(); + ignoreExceptionInDestructor(); } } @@ -90,7 +90,7 @@ struct AttrDb try { return fun(); } catch (SQLiteError &) { - ignoreException(); + ignoreExceptionExceptInterrupt(); failed = true; return 0; } @@ -329,7 +329,7 @@ static std::shared_ptr<AttrDb> makeAttrDb( try { return std::make_shared<AttrDb>(cfg, fingerprint, symbols); } catch (SQLiteError &) { - ignoreException(); + ignoreExceptionExceptInterrupt(); return nullptr; } } diff --git a/src/libexpr/nixexpr.cc b/src/libexpr/nixexpr.cc index 68da254e2..0c1a1ec0e 100644 --- a/src/libexpr/nixexpr.cc +++ b/src/libexpr/nixexpr.cc @@ -21,6 +21,14 @@ std::ostream & operator <<(std::ostream & str, const SymbolStr & symbol) return printIdentifier(str, s); } +AttrName::AttrName(Symbol s) : symbol(s) +{ +} + +AttrName::AttrName(std::unique_ptr<Expr> e) : expr(std::move(e)) +{ +} + void Expr::show(const SymbolTable & symbols, std::ostream & str) const { abort(); @@ -239,9 +247,24 @@ void ExprConcatStrings::show(const SymbolTable & symbols, std::ostream & str) co { bool first = true; str << "("; - for (auto & i : es) { - if (first) first = false; else str << " + "; - i.second->show(symbols, str); + for (auto & [_pos, part] : es) { + if (first) + first = false; + else + str << " + "; + + if (forceString && !dynamic_cast<ExprString *>(part.get())) { + /* Print as a string with an interpolation, to preserve the + * semantics of the value having to be a string. + * Interpolations are weird and someone should eventually + * move them out into their own AST node please. + */ + str << "\"${"; + part->show(symbols, str); + str << "}\""; + } else { + part->show(symbols, str); + } } str << ")"; } diff --git a/src/libexpr/nixexpr.hh b/src/libexpr/nixexpr.hh index d16281c39..4e857b321 100644 --- a/src/libexpr/nixexpr.hh +++ b/src/libexpr/nixexpr.hh @@ -30,8 +30,8 @@ struct AttrName { Symbol symbol; std::unique_ptr<Expr> expr; - AttrName(Symbol s) : symbol(s) {}; - AttrName(std::unique_ptr<Expr> e) : expr(std::move(e)) {}; + AttrName(Symbol s); + AttrName(std::unique_ptr<Expr> e); }; typedef std::vector<AttrName> AttrPath; diff --git a/src/libexpr/parser/grammar.hh b/src/libexpr/parser/grammar.hh index 2c5a3d1be..701b40505 100644 --- a/src/libexpr/parser/grammar.hh +++ b/src/libexpr/parser/grammar.hh @@ -12,7 +12,7 @@ // eolf rules in favor of reproducing the old flex lexer as faithfully as // possible, and deferring calculation of positions to downstream users. -namespace nix::parser::grammar { +namespace nix::parser::grammar::v1 { using namespace tao::pegtl; namespace p = tao::pegtl; @@ -225,7 +225,8 @@ struct string : _string, seq< > {}; struct _ind_string { - template<bool Indented, typename... Inner> + struct line_start : semantic, star<one<' '>> {}; + template<typename... Inner> struct literal : semantic, seq<Inner...> {}; struct interpolation : semantic, seq< p::string<'$', '{'>, seps, @@ -233,34 +234,53 @@ struct _ind_string { must<one<'}'>> > {}; struct escape : semantic, must<any> {}; + /* Marker for non-empty lines */ + struct has_content : semantic, seq<> {}; }; struct ind_string : _ind_string, seq< TAO_PEGTL_STRING("''"), + // Strip first line completely if empty opt<star<one<' '>>, one<'\n'>>, - star< - sor< - _ind_string::literal< - true, + list< + seq< + // Start a line with some indentation + // (we always match even the empty string if no indentation, as this creates the line) + _ind_string::line_start, + // The actual line + opt< plus< sor< - not_one<'$', '\''>, - seq<one<'$'>, not_one<'{', '\''>>, - seq<one<'\''>, not_one<'\'', '$'>> - > - > - >, - _ind_string::interpolation, - _ind_string::literal<false, one<'$'>>, - _ind_string::literal<false, one<'\''>, not_at<one<'\''>>>, - seq<one<'\''>, _ind_string::literal<false, p::string<'\'', '\''>>>, - seq< - p::string<'\'', '\''>, - sor< - _ind_string::literal<false, one<'$'>>, - seq<one<'\\'>, _ind_string::escape> + _ind_string::literal< + plus< + sor< + not_one<'$', '\'', '\n'>, + // TODO probably factor this out like the others for performance + seq<one<'$'>, not_one<'{', '\'', '\n'>>, + seq<one<'$'>, at<one<'\n'>>>, + seq<one<'\''>, not_one<'\'', '$', '\n'>>, + seq<one<'\''>, at<one<'\n'>>> + > + > + >, + _ind_string::interpolation, + _ind_string::literal<one<'$'>>, + _ind_string::literal<one<'\''>, not_at<one<'\''>>>, + seq<one<'\''>, _ind_string::literal<p::string<'\'', '\''>>>, + seq< + p::string<'\'', '\''>, + sor< + _ind_string::literal<one<'$'>>, + seq<one<'\\'>, _ind_string::escape> + > + > + >, + _ind_string::has_content > > - > + >, + // End of line, LF. CR is just ignored and not treated as ending a line + // (for the purpose of indentation stripping) + _ind_string::literal<one<'\n'>> >, must<TAO_PEGTL_STRING("''")> > {}; @@ -352,10 +372,10 @@ struct formals : semantic, _formals, seq< struct _attr { struct simple : semantic, sor<t::identifier, t::kw_or> {}; - struct string : semantic, seq<grammar::string> {}; + struct string : semantic, seq<grammar::v1::string> {}; struct expr : semantic, seq< TAO_PEGTL_STRING("${"), seps, - must<grammar::expr>, seps, + must<grammar::v1::expr>, seps, must<one<'}'>> > {}; }; @@ -452,9 +472,9 @@ struct _expr { struct id : semantic, t::identifier {}; struct int_ : semantic, t::integer {}; struct float_ : semantic, t::floating {}; - struct string : semantic, seq<grammar::string> {}; - struct ind_string : semantic, seq<grammar::ind_string> {}; - struct path : semantic, seq<grammar::path> {}; + struct string : semantic, seq<grammar::v1::string> {}; + struct ind_string : semantic, seq<grammar::v1::ind_string> {}; + struct path : semantic, seq<grammar::v1::path> {}; struct uri : semantic, t::uri {}; struct ancient_let : semantic, _attrset<must, t::kw_let, seps> {}; struct rec_set : semantic, _attrset<must, t::kw_rec, seps> {}; @@ -628,34 +648,34 @@ struct nothing : p::nothing<Rule> { template<typename Self, typename OpCtx, typename AttrPathT, typename ExprT> struct operator_semantics { - struct has_attr : grammar::op::has_attr { + struct has_attr : grammar::v1::op::has_attr { AttrPathT path; }; struct OpEntry { OpCtx ctx; uint8_t prec; - grammar::op::kind assoc; + grammar::v1::op::kind assoc; std::variant< - grammar::op::not_, - grammar::op::unary_minus, - grammar::op::implies, - grammar::op::or_, - grammar::op::and_, - grammar::op::equals, - grammar::op::not_equals, - grammar::op::less_eq, - grammar::op::greater_eq, - grammar::op::update, - grammar::op::concat, - grammar::op::less, - grammar::op::greater, - grammar::op::plus, - grammar::op::minus, - grammar::op::mul, - grammar::op::div, - grammar::op::pipe_right, - grammar::op::pipe_left, + grammar::v1::op::not_, + grammar::v1::op::unary_minus, + grammar::v1::op::implies, + grammar::v1::op::or_, + grammar::v1::op::and_, + grammar::v1::op::equals, + grammar::v1::op::not_equals, + grammar::v1::op::less_eq, + grammar::v1::op::greater_eq, + grammar::v1::op::update, + grammar::v1::op::concat, + grammar::v1::op::less, + grammar::v1::op::greater, + grammar::v1::op::plus, + grammar::v1::op::minus, + grammar::v1::op::mul, + grammar::v1::op::div, + grammar::v1::op::pipe_right, + grammar::v1::op::pipe_left, has_attr > op; }; @@ -676,7 +696,7 @@ struct operator_semantics { auto & [ctx, precedence, kind, op] = ops.back(); // NOTE this relies on associativity not being mixed within a precedence level. if ((precedence > toPrecedence) - || (kind != grammar::op::kind::leftAssoc && precedence == toPrecedence)) + || (kind != grammar::v1::op::kind::leftAssoc && precedence == toPrecedence)) break; std::visit([&, ctx=std::move(ctx)] (auto & op) { exprs.push_back(static_cast<Self &>(*this).applyOp(ctx, op, args...)); @@ -694,9 +714,9 @@ struct operator_semantics { void pushOp(OpCtx ctx, auto o, auto &... args) { - if (o.kind != grammar::op::kind::unary) + if (o.kind != grammar::v1::op::kind::unary) reduce(o.precedence, args...); - if (!ops.empty() && o.kind == grammar::op::kind::nonAssoc) { + if (!ops.empty() && o.kind == grammar::v1::op::kind::nonAssoc) { auto & [_pos, _prec, _kind, _o] = ops.back(); if (_kind == o.kind && _prec == o.precedence) Self::badOperator(ctx, args...); diff --git a/src/libexpr/parser/parser-impl1.inc.cc b/src/libexpr/parser/parser-impl1.inc.cc new file mode 100644 index 000000000..c937d17fd --- /dev/null +++ b/src/libexpr/parser/parser-impl1.inc.cc @@ -0,0 +1,863 @@ +// flip this define when doing parser development to enable some g checks. +#if 0 +#include <tao/pegtl/contrib/analyze.hpp> +#define ANALYZE_GRAMMAR \ + ([] { \ + const std::size_t issues = tao::pegtl::analyze<grammar::v1::root>(); \ + assert(issues == 0); \ + })() +#else +#define ANALYZE_GRAMMAR ((void) 0) +#endif + +namespace p = tao::pegtl; + +namespace nix::parser::v1 { +namespace { + +template<typename> +inline constexpr const char * error_message = nullptr; + +#define error_message_for(...) \ + template<> inline constexpr auto error_message<__VA_ARGS__> + +error_message_for(p::one<'{'>) = "expecting '{'"; +error_message_for(p::one<'}'>) = "expecting '}'"; +error_message_for(p::one<'"'>) = "expecting '\"'"; +error_message_for(p::one<';'>) = "expecting ';'"; +error_message_for(p::one<')'>) = "expecting ')'"; +error_message_for(p::one<']'>) = "expecting ']'"; +error_message_for(p::one<':'>) = "expecting ':'"; +error_message_for(p::string<'\'', '\''>) = "expecting \"''\""; +error_message_for(p::any) = "expecting any character"; +error_message_for(grammar::v1::eof) = "expecting end of file"; +error_message_for(grammar::v1::seps) = "expecting separators"; +error_message_for(grammar::v1::path::forbid_prefix_triple_slash) = "too many slashes in path"; +error_message_for(grammar::v1::path::forbid_prefix_double_slash_no_interp) = "path has a trailing slash"; +error_message_for(grammar::v1::expr) = "expecting expression"; +error_message_for(grammar::v1::expr::unary) = "expecting expression"; +error_message_for(grammar::v1::binding::equal) = "expecting '='"; +error_message_for(grammar::v1::expr::lambda::arg) = "expecting identifier"; +error_message_for(grammar::v1::formals) = "expecting formals"; +error_message_for(grammar::v1::attrpath) = "expecting attribute path"; +error_message_for(grammar::v1::expr::select) = "expecting selection expression"; +error_message_for(grammar::v1::t::kw_then) = "expecting 'then'"; +error_message_for(grammar::v1::t::kw_else) = "expecting 'else'"; +error_message_for(grammar::v1::t::kw_in) = "expecting 'in'"; + +struct SyntaxErrors +{ + template<typename Rule> + static constexpr auto message = error_message<Rule>; + + template<typename Rule> + static constexpr bool raise_on_failure = false; +}; + +template<typename Rule> +struct Control : p::must_if<SyntaxErrors>::control<Rule> +{ + template<typename ParseInput, typename... States> + [[noreturn]] static void raise(const ParseInput & in, States &&... st) + { + if (in.empty()) { + std::string expected; + if constexpr (constexpr auto msg = error_message<Rule>) + expected = fmt(", %s", msg); + throw p::parse_error("unexpected end of file" + expected, in); + } + p::must_if<SyntaxErrors>::control<Rule>::raise(in, st...); + } +}; + +struct ExprState + : grammar::v1:: + operator_semantics<ExprState, PosIdx, AttrPath, std::pair<PosIdx, std::unique_ptr<Expr>>> +{ + std::unique_ptr<Expr> popExprOnly() { + return std::move(popExpr().second); + } + + template<typename Op, typename... Args> + std::unique_ptr<Expr> applyUnary(Args &&... args) { + return std::make_unique<Op>(popExprOnly(), std::forward<Args>(args)...); + } + + template<typename Op> + std::unique_ptr<Expr> applyBinary(PosIdx pos) { + auto right = popExprOnly(), left = popExprOnly(); + return std::make_unique<Op>(pos, std::move(left), std::move(right)); + } + + std::unique_ptr<Expr> call(PosIdx pos, Symbol fn, bool flip = false) + { + std::vector<std::unique_ptr<Expr>> args(2); + args[flip ? 0 : 1] = popExprOnly(); + args[flip ? 1 : 0] = popExprOnly(); + return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(fn), std::move(args)); + } + + std::unique_ptr<Expr> pipe(PosIdx pos, State & state, bool flip = false) + { + if (!state.featureSettings.isEnabled(Xp::PipeOperator)) + throw ParseError({ + .msg = HintFmt("Pipe operator is disabled"), + .pos = state.positions[pos] + }); + + // Reverse the order compared to normal function application: arg |> fn + std::unique_ptr<Expr> fn, arg; + if (flip) { + fn = popExprOnly(); + arg = popExprOnly(); + } else { + arg = popExprOnly(); + fn = popExprOnly(); + } + std::vector<std::unique_ptr<Expr>> args{1}; + args[0] = std::move(arg); + + return std::make_unique<ExprCall>(pos, std::move(fn), std::move(args)); + } + + std::unique_ptr<Expr> order(PosIdx pos, bool less, State & state) + { + return call(pos, state.s.lessThan, !less); + } + + std::unique_ptr<Expr> concatStrings(PosIdx pos) + { + std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>> args(2); + args[1] = popExpr(); + args[0] = popExpr(); + return std::make_unique<ExprConcatStrings>(pos, false, std::move(args)); + } + + std::unique_ptr<Expr> negate(PosIdx pos, State & state) + { + std::vector<std::unique_ptr<Expr>> args(2); + args[0] = std::make_unique<ExprInt>(0); + args[1] = popExprOnly(); + return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(state.s.sub), std::move(args)); + } + + std::pair<PosIdx, std::unique_ptr<Expr>> applyOp(PosIdx pos, auto & op, State & state) { + using Op = grammar::v1::op; + + auto not_ = [] (auto e) { + return std::make_unique<ExprOpNot>(std::move(e)); + }; + + return { + pos, + (overloaded { + [&] (Op::implies) { return applyBinary<ExprOpImpl>(pos); }, + [&] (Op::or_) { return applyBinary<ExprOpOr>(pos); }, + [&] (Op::and_) { return applyBinary<ExprOpAnd>(pos); }, + [&] (Op::equals) { return applyBinary<ExprOpEq>(pos); }, + [&] (Op::not_equals) { return applyBinary<ExprOpNEq>(pos); }, + [&] (Op::less) { return order(pos, true, state); }, + [&] (Op::greater_eq) { return not_(order(pos, true, state)); }, + [&] (Op::greater) { return order(pos, false, state); }, + [&] (Op::less_eq) { return not_(order(pos, false, state)); }, + [&] (Op::update) { return applyBinary<ExprOpUpdate>(pos); }, + [&] (Op::not_) { return applyUnary<ExprOpNot>(); }, + [&] (Op::plus) { return concatStrings(pos); }, + [&] (Op::minus) { return call(pos, state.s.sub); }, + [&] (Op::mul) { return call(pos, state.s.mul); }, + [&] (Op::div) { return call(pos, state.s.div); }, + [&] (Op::concat) { return applyBinary<ExprOpConcatLists>(pos); }, + [&] (has_attr & a) { return applyUnary<ExprOpHasAttr>(std::move(a.path)); }, + [&] (Op::unary_minus) { return negate(pos, state); }, + [&] (Op::pipe_right) { return pipe(pos, state, true); }, + [&] (Op::pipe_left) { return pipe(pos, state); }, + })(op) + }; + } + + // always_inline is needed, otherwise pushOp slows down considerably + [[noreturn, gnu::always_inline]] + static void badOperator(PosIdx pos, State & state) + { + throw ParseError({ + .msg = HintFmt("syntax error, unexpected operator"), + .pos = state.positions[pos] + }); + } + + template<typename Expr, typename... Args> + Expr & pushExpr(PosIdx pos, Args && ... args) + { + auto p = std::make_unique<Expr>(std::forward<Args>(args)...); + auto & result = *p; + exprs.emplace_back(pos, std::move(p)); + return result; + } +}; + +struct SubexprState { +private: + ExprState * up; + +public: + explicit SubexprState(ExprState & up, auto &...) : up(&up) {} + operator ExprState &() { return *up; } + ExprState * operator->() { return up; } +}; + + + +template<typename Rule> +struct BuildAST : grammar::v1::nothing<Rule> {}; + +struct LambdaState : SubexprState { + using SubexprState::SubexprState; + + Symbol arg; + std::unique_ptr<Formals> formals; +}; + +struct FormalsState : SubexprState { + using SubexprState::SubexprState; + + Formals formals{}; + Formal formal{}; +}; + +template<> struct BuildAST<grammar::v1::formal::name> { + static void apply(const auto & in, FormalsState & s, State & ps) { + s.formal = { + .pos = ps.at(in), + .name = ps.symbols.create(in.string_view()), + }; + } +}; + +template<> struct BuildAST<grammar::v1::formal> { + static void apply0(FormalsState & s, State &) { + s.formals.formals.emplace_back(std::move(s.formal)); + } +}; + +template<> struct BuildAST<grammar::v1::formal::default_value> { + static void apply0(FormalsState & s, State & ps) { + s.formal.def = s->popExprOnly(); + } +}; + +template<> struct BuildAST<grammar::v1::formals::ellipsis> { + static void apply0(FormalsState & s, State &) { + s.formals.ellipsis = true; + } +}; + +template<> struct BuildAST<grammar::v1::formals> : change_head<FormalsState> { + static void success0(FormalsState & f, LambdaState & s, State &) { + s.formals = std::make_unique<Formals>(std::move(f.formals)); + } +}; + +struct AttrState : SubexprState { + using SubexprState::SubexprState; + + std::vector<AttrName> attrs; + + template <typename T> + void pushAttr(T && attr, PosIdx) { attrs.emplace_back(std::forward<T>(attr)); } +}; + +template<> struct BuildAST<grammar::v1::attr::simple> { + static void apply(const auto & in, auto & s, State & ps) { + s.pushAttr(ps.symbols.create(in.string_view()), ps.at(in)); + } +}; + +template<> struct BuildAST<grammar::v1::attr::string> { + static void apply(const auto & in, auto & s, State & ps) { + auto e = s->popExprOnly(); + if (auto str = dynamic_cast<ExprString *>(e.get())) + s.pushAttr(ps.symbols.create(str->s), ps.at(in)); + else + s.pushAttr(std::move(e), ps.at(in)); + } +}; + +template<> struct BuildAST<grammar::v1::attr::expr> : BuildAST<grammar::v1::attr::string> {}; + +struct BindingsState : SubexprState { + using SubexprState::SubexprState; + + ExprAttrs attrs; + AttrPath path; + std::unique_ptr<Expr> value; +}; + +struct InheritState : SubexprState { + using SubexprState::SubexprState; + + std::vector<std::pair<AttrName, PosIdx>> attrs; + std::unique_ptr<Expr> from; + PosIdx fromPos; + + template <typename T> + void pushAttr(T && attr, PosIdx pos) { attrs.emplace_back(std::forward<T>(attr), pos); } +}; + +template<> struct BuildAST<grammar::v1::inherit::from> { + static void apply(const auto & in, InheritState & s, State & ps) { + s.from = s->popExprOnly(); + s.fromPos = ps.at(in); + } +}; + +template<> struct BuildAST<grammar::v1::inherit> : change_head<InheritState> { + static void success0(InheritState & s, BindingsState & b, State & ps) { + auto & attrs = b.attrs.attrs; + // TODO this should not reuse generic attrpath rules. + for (auto & [i, iPos] : s.attrs) { + if (i.symbol) + continue; + if (auto str = dynamic_cast<ExprString *>(i.expr.get())) + i = AttrName(ps.symbols.create(str->s)); + else { + throw ParseError({ + .msg = HintFmt("dynamic attributes not allowed in inherit"), + .pos = ps.positions[iPos] + }); + } + } + if (s.from != nullptr) { + if (!b.attrs.inheritFromExprs) + b.attrs.inheritFromExprs = std::make_unique<std::vector<ref<Expr>>>(); + auto fromExpr = ref<Expr>(std::move(s.from)); + b.attrs.inheritFromExprs->push_back(fromExpr); + for (auto & [i, iPos] : s.attrs) { + if (attrs.find(i.symbol) != attrs.end()) + ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos); + auto inheritFrom = std::make_unique<ExprInheritFrom>( + s.fromPos, + b.attrs.inheritFromExprs->size() - 1, + fromExpr + ); + attrs.emplace( + i.symbol, + ExprAttrs::AttrDef( + std::make_unique<ExprSelect>(iPos, std::move(inheritFrom), i.symbol), + iPos, + ExprAttrs::AttrDef::Kind::InheritedFrom)); + } + } else { + for (auto & [i, iPos] : s.attrs) { + if (attrs.find(i.symbol) != attrs.end()) + ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos); + attrs.emplace( + i.symbol, + ExprAttrs::AttrDef( + std::make_unique<ExprVar>(iPos, i.symbol), + iPos, + ExprAttrs::AttrDef::Kind::Inherited)); + } + } + } +}; + +template<> struct BuildAST<grammar::v1::binding::path> : change_head<AttrState> { + static void success0(AttrState & a, BindingsState & s, State & ps) { + s.path = std::move(a.attrs); + } +}; + +template<> struct BuildAST<grammar::v1::binding::value> { + static void apply0(BindingsState & s, State & ps) { + s.value = s->popExprOnly(); + } +}; + +template<> struct BuildAST<grammar::v1::binding> { + static void apply(const auto & in, BindingsState & s, State & ps) { + ps.addAttr(&s.attrs, std::move(s.path), std::move(s.value), ps.at(in)); + } +}; + +template<> struct BuildAST<grammar::v1::expr::id> { + static void apply(const auto & in, ExprState & s, State & ps) { + if (in.string_view() == "__curPos") + s.pushExpr<ExprPos>(ps.at(in), ps.at(in)); + else + s.pushExpr<ExprVar>(ps.at(in), ps.at(in), ps.symbols.create(in.string_view())); + } +}; + +template<> struct BuildAST<grammar::v1::expr::int_> { + static void apply(const auto & in, ExprState & s, State & ps) { + int64_t v; + if (std::from_chars(in.begin(), in.end(), v).ec != std::errc{}) { + throw ParseError({ + .msg = HintFmt("invalid integer '%1%'", in.string_view()), + .pos = ps.positions[ps.at(in)], + }); + } + s.pushExpr<ExprInt>(noPos, v); + } +}; + +template<> struct BuildAST<grammar::v1::expr::float_> { + static void apply(const auto & in, ExprState & s, State & ps) { + // copy the input into a temporary string so we can call stod. + // can't use from_chars because libc++ (thus darwin) does not have it, + // and floats are not performance-sensitive anyway. if they were you'd + // be in much bigger trouble than this. + // + // we also get to do a locale-save dance because stod is locale-aware and + // something (a plugin?) may have called setlocale or uselocale. + static struct locale_hack { + locale_t posix; + locale_hack(): posix(newlocale(LC_ALL_MASK, "POSIX", 0)) + { + if (posix == 0) + throw SysError("could not get POSIX locale"); + } + } locale; + + auto tmp = in.string(); + double v = [&] { + auto oldLocale = uselocale(locale.posix); + Finally resetLocale([=] { uselocale(oldLocale); }); + try { + return std::stod(tmp); + } catch (...) { + throw ParseError({ + .msg = HintFmt("invalid float '%1%'", in.string_view()), + .pos = ps.positions[ps.at(in)], + }); + } + }(); + s.pushExpr<ExprFloat>(noPos, v); + } +}; + +struct StringState : SubexprState { + using SubexprState::SubexprState; + + std::string currentLiteral; + PosIdx currentPos; + std::vector<std::pair<nix::PosIdx, std::unique_ptr<Expr>>> parts; + + void append(PosIdx pos, std::string_view s) + { + if (currentLiteral.empty()) + currentPos = pos; + currentLiteral += s; + } + + // FIXME this truncates strings on NUL for compat with the old parser. ideally + // we should use the decomposition the g gives us instead of iterating over + // the entire string again. + static void unescapeStr(std::string & str) + { + char * s = str.data(); + char * t = s; + char c; + while ((c = *s++)) { + if (c == '\\') { + c = *s++; + if (c == 'n') *t = '\n'; + else if (c == 'r') *t = '\r'; + else if (c == 't') *t = '\t'; + else *t = c; + } + else if (c == '\r') { + /* Normalise CR and CR/LF into LF. */ + *t = '\n'; + if (*s == '\n') s++; /* cr/lf */ + } + else *t = c; + t++; + } + str.resize(t - str.data()); + } + + void endLiteral() + { + if (!currentLiteral.empty()) { + unescapeStr(currentLiteral); + parts.emplace_back(currentPos, std::make_unique<ExprString>(std::move(currentLiteral))); + } + } + + std::unique_ptr<Expr> finish() + { + if (parts.empty()) { + unescapeStr(currentLiteral); + return std::make_unique<ExprString>(std::move(currentLiteral)); + } else { + endLiteral(); + auto pos = parts[0].first; + return std::make_unique<ExprConcatStrings>(pos, true, std::move(parts)); + } + } +}; + +template<typename... Content> struct BuildAST<grammar::v1::string::literal<Content...>> { + static void apply(const auto & in, StringState & s, State & ps) { + s.append(ps.at(in), in.string_view()); + } +}; + +template<> struct BuildAST<grammar::v1::string::cr_lf> { + static void apply(const auto & in, StringState & s, State & ps) { + s.append(ps.at(in), in.string_view()); // FIXME compat with old parser + } +}; + +template<> struct BuildAST<grammar::v1::string::interpolation> { + static void apply(const auto & in, StringState & s, State & ps) { + s.endLiteral(); + s.parts.emplace_back(ps.at(in), s->popExprOnly()); + } +}; + +template<> struct BuildAST<grammar::v1::string::escape> { + static void apply(const auto & in, StringState & s, State & ps) { + s.append(ps.at(in), "\\"); // FIXME compat with old parser + s.append(ps.at(in), in.string_view()); + } +}; + +template<> struct BuildAST<grammar::v1::string> : change_head<StringState> { + static void success0(StringState & s, ExprState & e, State &) { + e.exprs.emplace_back(noPos, s.finish()); + } +}; + +struct IndStringState : SubexprState { + using SubexprState::SubexprState; + + std::vector<IndStringLine> lines; +}; + +template<> struct BuildAST<grammar::v1::ind_string::line_start> { + static void apply(const auto & in, IndStringState & s, State & ps) { + s.lines.push_back(IndStringLine { in.string_view(), ps.at(in) }); + } +}; + +template<typename... Content> +struct BuildAST<grammar::v1::ind_string::literal<Content...>> { + static void apply(const auto & in, IndStringState & s, State & ps) { + s.lines.back().parts.emplace_back(ps.at(in), in.string_view()); + } +}; + +template<> struct BuildAST<grammar::v1::ind_string::interpolation> { + static void apply(const auto & in, IndStringState & s, State & ps) { + s.lines.back().parts.emplace_back(ps.at(in), s->popExprOnly()); + } +}; + +template<> struct BuildAST<grammar::v1::ind_string::escape> { + static void apply(const auto & in, IndStringState & s, State & ps) { + switch (*in.begin()) { + case 'n': s.lines.back().parts.emplace_back(ps.at(in), "\n"); break; + case 'r': s.lines.back().parts.emplace_back(ps.at(in), "\r"); break; + case 't': s.lines.back().parts.emplace_back(ps.at(in), "\t"); break; + default: s.lines.back().parts.emplace_back(ps.at(in), in.string_view()); break; + } + } +}; + +template<> struct BuildAST<grammar::v1::ind_string::has_content> { + static void apply(const auto & in, IndStringState & s, State & ps) { + s.lines.back().hasContent = true; + } +}; + +template<> struct BuildAST<grammar::v1::ind_string> : change_head<IndStringState> { + static void success(const auto & in, IndStringState & s, ExprState & e, State & ps) { + e.exprs.emplace_back(noPos, ps.stripIndentation(ps.at(in), std::move(s.lines))); + } +}; + +template<typename... Content> struct BuildAST<grammar::v1::path::literal<Content...>> { + static void apply(const auto & in, StringState & s, State & ps) { + s.append(ps.at(in), in.string_view()); + s.endLiteral(); + } +}; + +template<> struct BuildAST<grammar::v1::path::interpolation> : BuildAST<grammar::v1::string::interpolation> {}; + +template<> struct BuildAST<grammar::v1::path::anchor> { + static void apply(const auto & in, StringState & s, State & ps) { + Path path(absPath(in.string(), ps.basePath.path.abs())); + /* add back in the trailing '/' to the first segment */ + if (in.string_view().ends_with('/') && in.size() > 1) + path += "/"; + s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path))); + } +}; + +template<> struct BuildAST<grammar::v1::path::home_anchor> { + static void apply(const auto & in, StringState & s, State & ps) { + if (evalSettings.pureEval) + throw Error("the path '%s' can not be resolved in pure mode", in.string_view()); + Path path(getHome() + in.string_view().substr(1)); + s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path))); + } +}; + +template<> struct BuildAST<grammar::v1::path::searched_path> { + static void apply(const auto & in, StringState & s, State & ps) { + std::vector<std::unique_ptr<Expr>> args{2}; + args[0] = std::make_unique<ExprVar>(ps.s.nixPath); + args[1] = std::make_unique<ExprString>(in.string()); + s.parts.emplace_back( + ps.at(in), + std::make_unique<ExprCall>( + ps.at(in), + std::make_unique<ExprVar>(ps.s.findFile), + std::move(args))); + } +}; + +template<> struct BuildAST<grammar::v1::path> : change_head<StringState> { + template<typename E> + static void check_slash(PosIdx end, StringState & s, State & ps) { + auto e = dynamic_cast<E *>(s.parts.back().second.get()); + if (!e || !e->s.ends_with('/')) + return; + if (s.parts.size() > 1 || e->s != "/") + throw ParseError({ + .msg = HintFmt("path has a trailing slash"), + .pos = ps.positions[end], + }); + } + + static void success(const auto & in, StringState & s, ExprState & e, State & ps) { + s.endLiteral(); + check_slash<ExprPath>(ps.atEnd(in), s, ps); + check_slash<ExprString>(ps.atEnd(in), s, ps); + if (s.parts.size() == 1) { + e.exprs.emplace_back(noPos, std::move(s.parts.back().second)); + } else { + e.pushExpr<ExprConcatStrings>(ps.at(in), ps.at(in), false, std::move(s.parts)); + } + } +}; + +// strings and paths sare handled fully by the grammar-level rule for now +template<> struct BuildAST<grammar::v1::expr::string> : p::maybe_nothing {}; +template<> struct BuildAST<grammar::v1::expr::ind_string> : p::maybe_nothing {}; +template<> struct BuildAST<grammar::v1::expr::path> : p::maybe_nothing {}; + +template<> struct BuildAST<grammar::v1::expr::uri> { + static void apply(const auto & in, ExprState & s, State & ps) { + bool URLLiterals = ps.featureSettings.isEnabled(Dep::UrlLiterals); + if (!URLLiterals) + throw ParseError({ + .msg = HintFmt("URL literals are deprecated, allow using them with %s", "--extra-deprecated-features url-literals"), + .pos = ps.positions[ps.at(in)] + }); + s.pushExpr<ExprString>(ps.at(in), in.string()); + } +}; + +template<> struct BuildAST<grammar::v1::expr::ancient_let> : change_head<BindingsState> { + static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { + // Added 2024-09-18. Turn into an error at some point in the future. + // See the documentation on deprecated features for more details. + if (!ps.featureSettings.isEnabled(Dep::AncientLet)) + warn( + "%s found at %s. This feature is deprecated and will be removed in the future. Use %s to silence this warning.", + "let {", + ps.positions[ps.at(in)], + "--extra-deprecated-features ancient-let" + ); + + b.attrs.pos = ps.at(in); + b.attrs.recursive = true; + s.pushExpr<ExprSelect>(b.attrs.pos, b.attrs.pos, std::make_unique<ExprAttrs>(std::move(b.attrs)), ps.s.body); + } +}; + +template<> struct BuildAST<grammar::v1::expr::rec_set> : change_head<BindingsState> { + static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { + // Before inserting new attrs, check for __override and throw an error + // (the error will initially be a warning to ease migration) + if (!featureSettings.isEnabled(Dep::RecSetOverrides) && b.attrs.attrs.contains(ps.s.overrides)) { + ps.overridesFound(ps.at(in)); + } + + b.attrs.pos = ps.at(in); + b.attrs.recursive = true; + s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs)); + } +}; + +template<> struct BuildAST<grammar::v1::expr::set> : change_head<BindingsState> { + static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { + b.attrs.pos = ps.at(in); + s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs)); + } +}; + +using ListState = std::vector<std::unique_ptr<Expr>>; + +template<> struct BuildAST<grammar::v1::expr::list> : change_head<ListState> { + static void success(const auto & in, ListState & ls, ExprState & s, State & ps) { + auto e = std::make_unique<ExprList>(); + e->elems = std::move(ls); + s.exprs.emplace_back(ps.at(in), std::move(e)); + } +}; + +template<> struct BuildAST<grammar::v1::expr::list::entry> : change_head<ExprState> { + static void success0(ExprState & e, ListState & s, State & ps) { + s.emplace_back(e.finish(ps).second); + } +}; + +struct SelectState : SubexprState { + using SubexprState::SubexprState; + + PosIdx pos; + ExprSelect * e = nullptr; +}; + +template<> struct BuildAST<grammar::v1::expr::select::head> { + static void apply(const auto & in, SelectState & s, State & ps) { + s.pos = ps.at(in); + } +}; + +template<> struct BuildAST<grammar::v1::expr::select::attr> : change_head<AttrState> { + static void success0(AttrState & a, SelectState & s, State &) { + s.e = &s->pushExpr<ExprSelect>(s.pos, s.pos, s->popExprOnly(), std::move(a.attrs), nullptr); + } +}; + +template<> struct BuildAST<grammar::v1::expr::select::attr_or> { + static void apply0(SelectState & s, State &) { + s.e->def = s->popExprOnly(); + } +}; + +template<> struct BuildAST<grammar::v1::expr::select::as_app_or> { + static void apply(const auto & in, SelectState & s, State & ps) { + std::vector<std::unique_ptr<Expr>> args(1); + args[0] = std::make_unique<ExprVar>(ps.at(in), ps.s.or_); + s->pushExpr<ExprCall>(s.pos, s.pos, s->popExprOnly(), std::move(args)); + } +}; + +template<> struct BuildAST<grammar::v1::expr::select> : change_head<SelectState> { + static void success0(const auto &...) {} +}; + +struct AppState : SubexprState { + using SubexprState::SubexprState; + + PosIdx pos; + ExprCall * e = nullptr; +}; + +template<> struct BuildAST<grammar::v1::expr::app::select_or_fn> { + static void apply(const auto & in, AppState & s, State & ps) { + s.pos = ps.at(in); + } +}; + +template<> struct BuildAST<grammar::v1::expr::app::first_arg> { + static void apply(auto & in, AppState & s, State & ps) { + auto arg = s->popExprOnly(), fn = s->popExprOnly(); + if ((s.e = dynamic_cast<ExprCall *>(fn.get()))) { + // TODO remove. + // AST compat with old parser, semantics are the same. + // this can happen on occasions such as `<p> <p>` or `a or b or`, + // neither of which are super worth optimizing. + s.e->args.push_back(std::move(arg)); + s->exprs.emplace_back(noPos, std::move(fn)); + } else { + std::vector<std::unique_ptr<Expr>> args{1}; + args[0] = std::move(arg); + s.e = &s->pushExpr<ExprCall>(s.pos, s.pos, std::move(fn), std::move(args)); + } + } +}; + +template<> struct BuildAST<grammar::v1::expr::app::another_arg> { + static void apply0(AppState & s, State & ps) { + s.e->args.push_back(s->popExprOnly()); + } +}; + +template<> struct BuildAST<grammar::v1::expr::app> : change_head<AppState> { + static void success0(const auto &...) {} +}; + +template<typename Op> struct BuildAST<grammar::v1::expr::operator_<Op>> { + static void apply(const auto & in, ExprState & s, State & ps) { + s.pushOp(ps.at(in), Op{}, ps); + } +}; +template<> struct BuildAST<grammar::v1::expr::operator_<grammar::v1::op::has_attr>> : change_head<AttrState> { + static void success(const auto & in, AttrState & a, ExprState & s, State & ps) { + s.pushOp(ps.at(in), ExprState::has_attr{{}, std::move(a.attrs)}, ps); + } +}; + +template<> struct BuildAST<grammar::v1::expr::lambda::arg> { + static void apply(const auto & in, LambdaState & s, State & ps) { + s.arg = ps.symbols.create(in.string_view()); + } +}; + +template<> struct BuildAST<grammar::v1::expr::lambda> : change_head<LambdaState> { + static void success(const auto & in, LambdaState & l, ExprState & s, State & ps) { + if (l.formals) + l.formals = ps.validateFormals(std::move(l.formals), ps.at(in), l.arg); + s.pushExpr<ExprLambda>(ps.at(in), ps.at(in), l.arg, std::move(l.formals), l->popExprOnly()); + } +}; + +template<> struct BuildAST<grammar::v1::expr::assert_> { + static void apply(const auto & in, ExprState & s, State & ps) { + auto body = s.popExprOnly(), cond = s.popExprOnly(); + s.pushExpr<ExprAssert>(ps.at(in), ps.at(in), std::move(cond), std::move(body)); + } +}; + +template<> struct BuildAST<grammar::v1::expr::with> { + static void apply(const auto & in, ExprState & s, State & ps) { + auto body = s.popExprOnly(), scope = s.popExprOnly(); + s.pushExpr<ExprWith>(ps.at(in), ps.at(in), std::move(scope), std::move(body)); + } +}; + +template<> struct BuildAST<grammar::v1::expr::let> : change_head<BindingsState> { + static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { + if (!b.attrs.dynamicAttrs.empty()) + throw ParseError({ + .msg = HintFmt("dynamic attributes not allowed in let"), + .pos = ps.positions[ps.at(in)] + }); + + s.pushExpr<ExprLet>(ps.at(in), std::make_unique<ExprAttrs>(std::move(b.attrs)), b->popExprOnly()); + } +}; + +template<> struct BuildAST<grammar::v1::expr::if_> { + static void apply(const auto & in, ExprState & s, State & ps) { + auto else_ = s.popExprOnly(), then = s.popExprOnly(), cond = s.popExprOnly(); + s.pushExpr<ExprIf>(ps.at(in), ps.at(in), std::move(cond), std::move(then), std::move(else_)); + } +}; + +template<> struct BuildAST<grammar::v1::expr> : change_head<ExprState> { + static void success0(ExprState & inner, ExprState & outer, State & ps) { + outer.exprs.push_back(inner.finish(ps)); + } +}; + +} +} diff --git a/src/libexpr/parser/parser.cc b/src/libexpr/parser/parser.cc index 17463056c..896a54981 100644 --- a/src/libexpr/parser/parser.cc +++ b/src/libexpr/parser/parser.cc @@ -14,857 +14,11 @@ #include <charconv> #include <memory> -// flip this define when doing parser development to enable some g checks. -#if 0 -#include <tao/pegtl/contrib/analyze.hpp> -#define ANALYZE_GRAMMAR \ - ([] { \ - const std::size_t issues = tao::pegtl::analyze<grammar::root>(); \ - assert(issues == 0); \ - })() -#else -#define ANALYZE_GRAMMAR ((void) 0) -#endif - -namespace p = tao::pegtl; - -namespace nix::parser { -namespace { - -template<typename> -inline constexpr const char * error_message = nullptr; - -#define error_message_for(...) \ - template<> inline constexpr auto error_message<__VA_ARGS__> - -error_message_for(p::one<'{'>) = "expecting '{'"; -error_message_for(p::one<'}'>) = "expecting '}'"; -error_message_for(p::one<'"'>) = "expecting '\"'"; -error_message_for(p::one<';'>) = "expecting ';'"; -error_message_for(p::one<')'>) = "expecting ')'"; -error_message_for(p::one<']'>) = "expecting ']'"; -error_message_for(p::one<':'>) = "expecting ':'"; -error_message_for(p::string<'\'', '\''>) = "expecting \"''\""; -error_message_for(p::any) = "expecting any character"; -error_message_for(grammar::eof) = "expecting end of file"; -error_message_for(grammar::seps) = "expecting separators"; -error_message_for(grammar::path::forbid_prefix_triple_slash) = "too many slashes in path"; -error_message_for(grammar::path::forbid_prefix_double_slash_no_interp) = "path has a trailing slash"; -error_message_for(grammar::expr) = "expecting expression"; -error_message_for(grammar::expr::unary) = "expecting expression"; -error_message_for(grammar::binding::equal) = "expecting '='"; -error_message_for(grammar::expr::lambda::arg) = "expecting identifier"; -error_message_for(grammar::formals) = "expecting formals"; -error_message_for(grammar::attrpath) = "expecting attribute path"; -error_message_for(grammar::expr::select) = "expecting selection expression"; -error_message_for(grammar::t::kw_then) = "expecting 'then'"; -error_message_for(grammar::t::kw_else) = "expecting 'else'"; -error_message_for(grammar::t::kw_in) = "expecting 'in'"; - -struct SyntaxErrors -{ - template<typename Rule> - static constexpr auto message = error_message<Rule>; - - template<typename Rule> - static constexpr bool raise_on_failure = false; -}; - -template<typename Rule> -struct Control : p::must_if<SyntaxErrors>::control<Rule> -{ - template<typename ParseInput, typename... States> - [[noreturn]] static void raise(const ParseInput & in, States &&... st) - { - if (in.empty()) { - std::string expected; - if constexpr (constexpr auto msg = error_message<Rule>) - expected = fmt(", %s", msg); - throw p::parse_error("unexpected end of file" + expected, in); - } - p::must_if<SyntaxErrors>::control<Rule>::raise(in, st...); - } -}; - -struct ExprState - : grammar:: - operator_semantics<ExprState, PosIdx, AttrPath, std::pair<PosIdx, std::unique_ptr<Expr>>> -{ - std::unique_ptr<Expr> popExprOnly() { - return std::move(popExpr().second); - } - - template<typename Op, typename... Args> - std::unique_ptr<Expr> applyUnary(Args &&... args) { - return std::make_unique<Op>(popExprOnly(), std::forward<Args>(args)...); - } - - template<typename Op> - std::unique_ptr<Expr> applyBinary(PosIdx pos) { - auto right = popExprOnly(), left = popExprOnly(); - return std::make_unique<Op>(pos, std::move(left), std::move(right)); - } - - std::unique_ptr<Expr> call(PosIdx pos, Symbol fn, bool flip = false) - { - std::vector<std::unique_ptr<Expr>> args(2); - args[flip ? 0 : 1] = popExprOnly(); - args[flip ? 1 : 0] = popExprOnly(); - return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(fn), std::move(args)); - } - - std::unique_ptr<Expr> pipe(PosIdx pos, State & state, bool flip = false) - { - if (!state.featureSettings.isEnabled(Xp::PipeOperator)) - throw ParseError({ - .msg = HintFmt("Pipe operator is disabled"), - .pos = state.positions[pos] - }); - - // Reverse the order compared to normal function application: arg |> fn - std::unique_ptr<Expr> fn, arg; - if (flip) { - fn = popExprOnly(); - arg = popExprOnly(); - } else { - arg = popExprOnly(); - fn = popExprOnly(); - } - std::vector<std::unique_ptr<Expr>> args{1}; - args[0] = std::move(arg); - - return std::make_unique<ExprCall>(pos, std::move(fn), std::move(args)); - } - - std::unique_ptr<Expr> order(PosIdx pos, bool less, State & state) - { - return call(pos, state.s.lessThan, !less); - } - - std::unique_ptr<Expr> concatStrings(PosIdx pos) - { - std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>> args(2); - args[1] = popExpr(); - args[0] = popExpr(); - return std::make_unique<ExprConcatStrings>(pos, false, std::move(args)); - } - - std::unique_ptr<Expr> negate(PosIdx pos, State & state) - { - std::vector<std::unique_ptr<Expr>> args(2); - args[0] = std::make_unique<ExprInt>(0); - args[1] = popExprOnly(); - return std::make_unique<ExprCall>(pos, std::make_unique<ExprVar>(state.s.sub), std::move(args)); - } - - std::pair<PosIdx, std::unique_ptr<Expr>> applyOp(PosIdx pos, auto & op, State & state) { - using Op = grammar::op; - - auto not_ = [] (auto e) { - return std::make_unique<ExprOpNot>(std::move(e)); - }; - - return { - pos, - (overloaded { - [&] (Op::implies) { return applyBinary<ExprOpImpl>(pos); }, - [&] (Op::or_) { return applyBinary<ExprOpOr>(pos); }, - [&] (Op::and_) { return applyBinary<ExprOpAnd>(pos); }, - [&] (Op::equals) { return applyBinary<ExprOpEq>(pos); }, - [&] (Op::not_equals) { return applyBinary<ExprOpNEq>(pos); }, - [&] (Op::less) { return order(pos, true, state); }, - [&] (Op::greater_eq) { return not_(order(pos, true, state)); }, - [&] (Op::greater) { return order(pos, false, state); }, - [&] (Op::less_eq) { return not_(order(pos, false, state)); }, - [&] (Op::update) { return applyBinary<ExprOpUpdate>(pos); }, - [&] (Op::not_) { return applyUnary<ExprOpNot>(); }, - [&] (Op::plus) { return concatStrings(pos); }, - [&] (Op::minus) { return call(pos, state.s.sub); }, - [&] (Op::mul) { return call(pos, state.s.mul); }, - [&] (Op::div) { return call(pos, state.s.div); }, - [&] (Op::concat) { return applyBinary<ExprOpConcatLists>(pos); }, - [&] (has_attr & a) { return applyUnary<ExprOpHasAttr>(std::move(a.path)); }, - [&] (Op::unary_minus) { return negate(pos, state); }, - [&] (Op::pipe_right) { return pipe(pos, state, true); }, - [&] (Op::pipe_left) { return pipe(pos, state); }, - })(op) - }; - } - - // always_inline is needed, otherwise pushOp slows down considerably - [[noreturn, gnu::always_inline]] - static void badOperator(PosIdx pos, State & state) - { - throw ParseError({ - .msg = HintFmt("syntax error, unexpected operator"), - .pos = state.positions[pos] - }); - } - - template<typename Expr, typename... Args> - Expr & pushExpr(PosIdx pos, Args && ... args) - { - auto p = std::make_unique<Expr>(std::forward<Args>(args)...); - auto & result = *p; - exprs.emplace_back(pos, std::move(p)); - return result; - } -}; - -struct SubexprState { -private: - ExprState * up; - -public: - explicit SubexprState(ExprState & up, auto &...) : up(&up) {} - operator ExprState &() { return *up; } - ExprState * operator->() { return up; } -}; - - - -template<typename Rule> -struct BuildAST : grammar::nothing<Rule> {}; - -struct LambdaState : SubexprState { - using SubexprState::SubexprState; - - Symbol arg; - std::unique_ptr<Formals> formals; -}; - -struct FormalsState : SubexprState { - using SubexprState::SubexprState; - - Formals formals{}; - Formal formal{}; -}; - -template<> struct BuildAST<grammar::formal::name> { - static void apply(const auto & in, FormalsState & s, State & ps) { - s.formal = { - .pos = ps.at(in), - .name = ps.symbols.create(in.string_view()), - }; - } -}; - -template<> struct BuildAST<grammar::formal> { - static void apply0(FormalsState & s, State &) { - s.formals.formals.emplace_back(std::move(s.formal)); - } -}; - -template<> struct BuildAST<grammar::formal::default_value> { - static void apply0(FormalsState & s, State & ps) { - s.formal.def = s->popExprOnly(); - } -}; - -template<> struct BuildAST<grammar::formals::ellipsis> { - static void apply0(FormalsState & s, State &) { - s.formals.ellipsis = true; - } -}; - -template<> struct BuildAST<grammar::formals> : change_head<FormalsState> { - static void success0(FormalsState & f, LambdaState & s, State &) { - s.formals = std::make_unique<Formals>(std::move(f.formals)); - } -}; - -struct AttrState : SubexprState { - using SubexprState::SubexprState; - - std::vector<AttrName> attrs; - - template <typename T> - void pushAttr(T && attr, PosIdx) { attrs.emplace_back(std::forward<T>(attr)); } -}; - -template<> struct BuildAST<grammar::attr::simple> { - static void apply(const auto & in, auto & s, State & ps) { - s.pushAttr(ps.symbols.create(in.string_view()), ps.at(in)); - } -}; - -template<> struct BuildAST<grammar::attr::string> { - static void apply(const auto & in, auto & s, State & ps) { - auto e = s->popExprOnly(); - if (auto str = dynamic_cast<ExprString *>(e.get())) - s.pushAttr(ps.symbols.create(str->s), ps.at(in)); - else - s.pushAttr(std::move(e), ps.at(in)); - } -}; - -template<> struct BuildAST<grammar::attr::expr> : BuildAST<grammar::attr::string> {}; - -struct BindingsState : SubexprState { - using SubexprState::SubexprState; - - ExprAttrs attrs; - AttrPath path; - std::unique_ptr<Expr> value; -}; - -struct InheritState : SubexprState { - using SubexprState::SubexprState; - - std::vector<std::pair<AttrName, PosIdx>> attrs; - std::unique_ptr<Expr> from; - PosIdx fromPos; - - template <typename T> - void pushAttr(T && attr, PosIdx pos) { attrs.emplace_back(std::forward<T>(attr), pos); } -}; - -template<> struct BuildAST<grammar::inherit::from> { - static void apply(const auto & in, InheritState & s, State & ps) { - s.from = s->popExprOnly(); - s.fromPos = ps.at(in); - } -}; - -template<> struct BuildAST<grammar::inherit> : change_head<InheritState> { - static void success0(InheritState & s, BindingsState & b, State & ps) { - auto & attrs = b.attrs.attrs; - // TODO this should not reuse generic attrpath rules. - for (auto & [i, iPos] : s.attrs) { - if (i.symbol) - continue; - if (auto str = dynamic_cast<ExprString *>(i.expr.get())) - i = AttrName(ps.symbols.create(str->s)); - else { - throw ParseError({ - .msg = HintFmt("dynamic attributes not allowed in inherit"), - .pos = ps.positions[iPos] - }); - } - } - if (s.from != nullptr) { - if (!b.attrs.inheritFromExprs) - b.attrs.inheritFromExprs = std::make_unique<std::vector<ref<Expr>>>(); - auto fromExpr = ref<Expr>(std::move(s.from)); - b.attrs.inheritFromExprs->push_back(fromExpr); - for (auto & [i, iPos] : s.attrs) { - if (attrs.find(i.symbol) != attrs.end()) - ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos); - auto inheritFrom = std::make_unique<ExprInheritFrom>( - s.fromPos, - b.attrs.inheritFromExprs->size() - 1, - fromExpr - ); - attrs.emplace( - i.symbol, - ExprAttrs::AttrDef( - std::make_unique<ExprSelect>(iPos, std::move(inheritFrom), i.symbol), - iPos, - ExprAttrs::AttrDef::Kind::InheritedFrom)); - } - } else { - for (auto & [i, iPos] : s.attrs) { - if (attrs.find(i.symbol) != attrs.end()) - ps.dupAttr(i.symbol, iPos, attrs[i.symbol].pos); - attrs.emplace( - i.symbol, - ExprAttrs::AttrDef( - std::make_unique<ExprVar>(iPos, i.symbol), - iPos, - ExprAttrs::AttrDef::Kind::Inherited)); - } - } - } -}; - -template<> struct BuildAST<grammar::binding::path> : change_head<AttrState> { - static void success0(AttrState & a, BindingsState & s, State & ps) { - s.path = std::move(a.attrs); - } -}; - -template<> struct BuildAST<grammar::binding::value> { - static void apply0(BindingsState & s, State & ps) { - s.value = s->popExprOnly(); - } -}; - -template<> struct BuildAST<grammar::binding> { - static void apply(const auto & in, BindingsState & s, State & ps) { - ps.addAttr(&s.attrs, std::move(s.path), std::move(s.value), ps.at(in)); - } -}; - -template<> struct BuildAST<grammar::expr::id> { - static void apply(const auto & in, ExprState & s, State & ps) { - if (in.string_view() == "__curPos") - s.pushExpr<ExprPos>(ps.at(in), ps.at(in)); - else - s.pushExpr<ExprVar>(ps.at(in), ps.at(in), ps.symbols.create(in.string_view())); - } -}; - -template<> struct BuildAST<grammar::expr::int_> { - static void apply(const auto & in, ExprState & s, State & ps) { - int64_t v; - if (std::from_chars(in.begin(), in.end(), v).ec != std::errc{}) { - throw ParseError({ - .msg = HintFmt("invalid integer '%1%'", in.string_view()), - .pos = ps.positions[ps.at(in)], - }); - } - s.pushExpr<ExprInt>(noPos, v); - } -}; - -template<> struct BuildAST<grammar::expr::float_> { - static void apply(const auto & in, ExprState & s, State & ps) { - // copy the input into a temporary string so we can call stod. - // can't use from_chars because libc++ (thus darwin) does not have it, - // and floats are not performance-sensitive anyway. if they were you'd - // be in much bigger trouble than this. - // - // we also get to do a locale-save dance because stod is locale-aware and - // something (a plugin?) may have called setlocale or uselocale. - static struct locale_hack { - locale_t posix; - locale_hack(): posix(newlocale(LC_ALL_MASK, "POSIX", 0)) - { - if (posix == 0) - throw SysError("could not get POSIX locale"); - } - } locale; - - auto tmp = in.string(); - double v = [&] { - auto oldLocale = uselocale(locale.posix); - Finally resetLocale([=] { uselocale(oldLocale); }); - try { - return std::stod(tmp); - } catch (...) { - throw ParseError({ - .msg = HintFmt("invalid float '%1%'", in.string_view()), - .pos = ps.positions[ps.at(in)], - }); - } - }(); - s.pushExpr<ExprFloat>(noPos, v); - } -}; - -struct StringState : SubexprState { - using SubexprState::SubexprState; - - std::string currentLiteral; - PosIdx currentPos; - std::vector<std::pair<nix::PosIdx, std::unique_ptr<Expr>>> parts; - - void append(PosIdx pos, std::string_view s) - { - if (currentLiteral.empty()) - currentPos = pos; - currentLiteral += s; - } - - // FIXME this truncates strings on NUL for compat with the old parser. ideally - // we should use the decomposition the g gives us instead of iterating over - // the entire string again. - static void unescapeStr(std::string & str) - { - char * s = str.data(); - char * t = s; - char c; - while ((c = *s++)) { - if (c == '\\') { - c = *s++; - if (c == 'n') *t = '\n'; - else if (c == 'r') *t = '\r'; - else if (c == 't') *t = '\t'; - else *t = c; - } - else if (c == '\r') { - /* Normalise CR and CR/LF into LF. */ - *t = '\n'; - if (*s == '\n') s++; /* cr/lf */ - } - else *t = c; - t++; - } - str.resize(t - str.data()); - } - - void endLiteral() - { - if (!currentLiteral.empty()) { - unescapeStr(currentLiteral); - parts.emplace_back(currentPos, std::make_unique<ExprString>(std::move(currentLiteral))); - } - } - - std::unique_ptr<Expr> finish() - { - if (parts.empty()) { - unescapeStr(currentLiteral); - return std::make_unique<ExprString>(std::move(currentLiteral)); - } else { - endLiteral(); - auto pos = parts[0].first; - return std::make_unique<ExprConcatStrings>(pos, true, std::move(parts)); - } - } -}; - -template<typename... Content> struct BuildAST<grammar::string::literal<Content...>> { - static void apply(const auto & in, StringState & s, State & ps) { - s.append(ps.at(in), in.string_view()); - } -}; - -template<> struct BuildAST<grammar::string::cr_lf> { - static void apply(const auto & in, StringState & s, State & ps) { - s.append(ps.at(in), in.string_view()); // FIXME compat with old parser - } -}; - -template<> struct BuildAST<grammar::string::interpolation> { - static void apply(const auto & in, StringState & s, State & ps) { - s.endLiteral(); - s.parts.emplace_back(ps.at(in), s->popExprOnly()); - } -}; - -template<> struct BuildAST<grammar::string::escape> { - static void apply(const auto & in, StringState & s, State & ps) { - s.append(ps.at(in), "\\"); // FIXME compat with old parser - s.append(ps.at(in), in.string_view()); - } -}; - -template<> struct BuildAST<grammar::string> : change_head<StringState> { - static void success0(StringState & s, ExprState & e, State &) { - e.exprs.emplace_back(noPos, s.finish()); - } -}; - -struct IndStringState : SubexprState { - using SubexprState::SubexprState; - - std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> parts; -}; - -template<bool Indented, typename... Content> -struct BuildAST<grammar::ind_string::literal<Indented, Content...>> { - static void apply(const auto & in, IndStringState & s, State & ps) { - s.parts.emplace_back(ps.at(in), StringToken{in.string_view(), Indented}); - } -}; - -template<> struct BuildAST<grammar::ind_string::interpolation> { - static void apply(const auto & in, IndStringState & s, State & ps) { - s.parts.emplace_back(ps.at(in), s->popExprOnly()); - } -}; - -template<> struct BuildAST<grammar::ind_string::escape> { - static void apply(const auto & in, IndStringState & s, State & ps) { - switch (*in.begin()) { - case 'n': s.parts.emplace_back(ps.at(in), StringToken{"\n"}); break; - case 'r': s.parts.emplace_back(ps.at(in), StringToken{"\r"}); break; - case 't': s.parts.emplace_back(ps.at(in), StringToken{"\t"}); break; - default: s.parts.emplace_back(ps.at(in), StringToken{in.string_view()}); break; - } - } -}; - -template<> struct BuildAST<grammar::ind_string> : change_head<IndStringState> { - static void success(const auto & in, IndStringState & s, ExprState & e, State & ps) { - e.exprs.emplace_back(noPos, ps.stripIndentation(ps.at(in), std::move(s.parts))); - } -}; - -template<typename... Content> struct BuildAST<grammar::path::literal<Content...>> { - static void apply(const auto & in, StringState & s, State & ps) { - s.append(ps.at(in), in.string_view()); - s.endLiteral(); - } -}; - -template<> struct BuildAST<grammar::path::interpolation> : BuildAST<grammar::string::interpolation> {}; - -template<> struct BuildAST<grammar::path::anchor> { - static void apply(const auto & in, StringState & s, State & ps) { - Path path(absPath(in.string(), ps.basePath.path.abs())); - /* add back in the trailing '/' to the first segment */ - if (in.string_view().ends_with('/') && in.size() > 1) - path += "/"; - s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path))); - } -}; - -template<> struct BuildAST<grammar::path::home_anchor> { - static void apply(const auto & in, StringState & s, State & ps) { - if (evalSettings.pureEval) - throw Error("the path '%s' can not be resolved in pure mode", in.string_view()); - Path path(getHome() + in.string_view().substr(1)); - s.parts.emplace_back(ps.at(in), new ExprPath(std::move(path))); - } -}; - -template<> struct BuildAST<grammar::path::searched_path> { - static void apply(const auto & in, StringState & s, State & ps) { - std::vector<std::unique_ptr<Expr>> args{2}; - args[0] = std::make_unique<ExprVar>(ps.s.nixPath); - args[1] = std::make_unique<ExprString>(in.string()); - s.parts.emplace_back( - ps.at(in), - std::make_unique<ExprCall>( - ps.at(in), - std::make_unique<ExprVar>(ps.s.findFile), - std::move(args))); - } -}; - -template<> struct BuildAST<grammar::path> : change_head<StringState> { - template<typename E> - static void check_slash(PosIdx end, StringState & s, State & ps) { - auto e = dynamic_cast<E *>(s.parts.back().second.get()); - if (!e || !e->s.ends_with('/')) - return; - if (s.parts.size() > 1 || e->s != "/") - throw ParseError({ - .msg = HintFmt("path has a trailing slash"), - .pos = ps.positions[end], - }); - } - - static void success(const auto & in, StringState & s, ExprState & e, State & ps) { - s.endLiteral(); - check_slash<ExprPath>(ps.atEnd(in), s, ps); - check_slash<ExprString>(ps.atEnd(in), s, ps); - if (s.parts.size() == 1) { - e.exprs.emplace_back(noPos, std::move(s.parts.back().second)); - } else { - e.pushExpr<ExprConcatStrings>(ps.at(in), ps.at(in), false, std::move(s.parts)); - } - } -}; - -// strings and paths sare handled fully by the grammar-level rule for now -template<> struct BuildAST<grammar::expr::string> : p::maybe_nothing {}; -template<> struct BuildAST<grammar::expr::ind_string> : p::maybe_nothing {}; -template<> struct BuildAST<grammar::expr::path> : p::maybe_nothing {}; - -template<> struct BuildAST<grammar::expr::uri> { - static void apply(const auto & in, ExprState & s, State & ps) { - bool URLLiterals = ps.featureSettings.isEnabled(Dep::UrlLiterals); - if (!URLLiterals) - throw ParseError({ - .msg = HintFmt("URL literals are deprecated, allow using them with --extra-deprecated-features=url-literals"), - .pos = ps.positions[ps.at(in)] - }); - s.pushExpr<ExprString>(ps.at(in), in.string()); - } -}; - -template<> struct BuildAST<grammar::expr::ancient_let> : change_head<BindingsState> { - static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { - // Added 2024-09-18. Turn into an error at some point in the future. - // See the documentation on deprecated features for more details. - if (!ps.featureSettings.isEnabled(Dep::AncientLet)) - warn( - "%s found at %s. This feature is deprecated and will be removed in the future. Use %s to silence this warning.", - "let {", - ps.positions[ps.at(in)], - "--extra-deprecated-features ancient-let" - ); - - b.attrs.pos = ps.at(in); - b.attrs.recursive = true; - s.pushExpr<ExprSelect>(b.attrs.pos, b.attrs.pos, std::make_unique<ExprAttrs>(std::move(b.attrs)), ps.s.body); - } -}; - -template<> struct BuildAST<grammar::expr::rec_set> : change_head<BindingsState> { - static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { - // Before inserting new attrs, check for __override and throw an error - // (the error will initially be a warning to ease migration) - if (!featureSettings.isEnabled(Dep::RecSetOverrides) && b.attrs.attrs.contains(ps.s.overrides)) { - ps.overridesFound(ps.at(in)); - } - - b.attrs.pos = ps.at(in); - b.attrs.recursive = true; - s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs)); - } -}; - -template<> struct BuildAST<grammar::expr::set> : change_head<BindingsState> { - static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { - b.attrs.pos = ps.at(in); - s.pushExpr<ExprAttrs>(b.attrs.pos, std::move(b.attrs)); - } -}; - -using ListState = std::vector<std::unique_ptr<Expr>>; - -template<> struct BuildAST<grammar::expr::list> : change_head<ListState> { - static void success(const auto & in, ListState & ls, ExprState & s, State & ps) { - auto e = std::make_unique<ExprList>(); - e->elems = std::move(ls); - s.exprs.emplace_back(ps.at(in), std::move(e)); - } -}; - -template<> struct BuildAST<grammar::expr::list::entry> : change_head<ExprState> { - static void success0(ExprState & e, ListState & s, State & ps) { - s.emplace_back(e.finish(ps).second); - } -}; - -struct SelectState : SubexprState { - using SubexprState::SubexprState; - - PosIdx pos; - ExprSelect * e = nullptr; -}; - -template<> struct BuildAST<grammar::expr::select::head> { - static void apply(const auto & in, SelectState & s, State & ps) { - s.pos = ps.at(in); - } -}; - -template<> struct BuildAST<grammar::expr::select::attr> : change_head<AttrState> { - static void success0(AttrState & a, SelectState & s, State &) { - s.e = &s->pushExpr<ExprSelect>(s.pos, s.pos, s->popExprOnly(), std::move(a.attrs), nullptr); - } -}; - -template<> struct BuildAST<grammar::expr::select::attr_or> { - static void apply0(SelectState & s, State &) { - s.e->def = s->popExprOnly(); - } -}; - -template<> struct BuildAST<grammar::expr::select::as_app_or> { - static void apply(const auto & in, SelectState & s, State & ps) { - std::vector<std::unique_ptr<Expr>> args(1); - args[0] = std::make_unique<ExprVar>(ps.at(in), ps.s.or_); - s->pushExpr<ExprCall>(s.pos, s.pos, s->popExprOnly(), std::move(args)); - } -}; - -template<> struct BuildAST<grammar::expr::select> : change_head<SelectState> { - static void success0(const auto &...) {} -}; - -struct AppState : SubexprState { - using SubexprState::SubexprState; - - PosIdx pos; - ExprCall * e = nullptr; -}; - -template<> struct BuildAST<grammar::expr::app::select_or_fn> { - static void apply(const auto & in, AppState & s, State & ps) { - s.pos = ps.at(in); - } -}; - -template<> struct BuildAST<grammar::expr::app::first_arg> { - static void apply(auto & in, AppState & s, State & ps) { - auto arg = s->popExprOnly(), fn = s->popExprOnly(); - if ((s.e = dynamic_cast<ExprCall *>(fn.get()))) { - // TODO remove. - // AST compat with old parser, semantics are the same. - // this can happen on occasions such as `<p> <p>` or `a or b or`, - // neither of which are super worth optimizing. - s.e->args.push_back(std::move(arg)); - s->exprs.emplace_back(noPos, std::move(fn)); - } else { - std::vector<std::unique_ptr<Expr>> args{1}; - args[0] = std::move(arg); - s.e = &s->pushExpr<ExprCall>(s.pos, s.pos, std::move(fn), std::move(args)); - } - } -}; - -template<> struct BuildAST<grammar::expr::app::another_arg> { - static void apply0(AppState & s, State & ps) { - s.e->args.push_back(s->popExprOnly()); - } -}; - -template<> struct BuildAST<grammar::expr::app> : change_head<AppState> { - static void success0(const auto &...) {} -}; - -template<typename Op> struct BuildAST<grammar::expr::operator_<Op>> { - static void apply(const auto & in, ExprState & s, State & ps) { - s.pushOp(ps.at(in), Op{}, ps); - } -}; -template<> struct BuildAST<grammar::expr::operator_<grammar::op::has_attr>> : change_head<AttrState> { - static void success(const auto & in, AttrState & a, ExprState & s, State & ps) { - s.pushOp(ps.at(in), ExprState::has_attr{{}, std::move(a.attrs)}, ps); - } -}; - -template<> struct BuildAST<grammar::expr::lambda::arg> { - static void apply(const auto & in, LambdaState & s, State & ps) { - s.arg = ps.symbols.create(in.string_view()); - } -}; - -template<> struct BuildAST<grammar::expr::lambda> : change_head<LambdaState> { - static void success(const auto & in, LambdaState & l, ExprState & s, State & ps) { - if (l.formals) - l.formals = ps.validateFormals(std::move(l.formals), ps.at(in), l.arg); - s.pushExpr<ExprLambda>(ps.at(in), ps.at(in), l.arg, std::move(l.formals), l->popExprOnly()); - } -}; - -template<> struct BuildAST<grammar::expr::assert_> { - static void apply(const auto & in, ExprState & s, State & ps) { - auto body = s.popExprOnly(), cond = s.popExprOnly(); - s.pushExpr<ExprAssert>(ps.at(in), ps.at(in), std::move(cond), std::move(body)); - } -}; - -template<> struct BuildAST<grammar::expr::with> { - static void apply(const auto & in, ExprState & s, State & ps) { - auto body = s.popExprOnly(), scope = s.popExprOnly(); - s.pushExpr<ExprWith>(ps.at(in), ps.at(in), std::move(scope), std::move(body)); - } -}; - -template<> struct BuildAST<grammar::expr::let> : change_head<BindingsState> { - static void success(const auto & in, BindingsState & b, ExprState & s, State & ps) { - if (!b.attrs.dynamicAttrs.empty()) - throw ParseError({ - .msg = HintFmt("dynamic attributes not allowed in let"), - .pos = ps.positions[ps.at(in)] - }); - - s.pushExpr<ExprLet>(ps.at(in), std::make_unique<ExprAttrs>(std::move(b.attrs)), b->popExprOnly()); - } -}; - -template<> struct BuildAST<grammar::expr::if_> { - static void apply(const auto & in, ExprState & s, State & ps) { - auto else_ = s.popExprOnly(), then = s.popExprOnly(), cond = s.popExprOnly(); - s.pushExpr<ExprIf>(ps.at(in), ps.at(in), std::move(cond), std::move(then), std::move(else_)); - } -}; - -template<> struct BuildAST<grammar::expr> : change_head<ExprState> { - static void success0(ExprState & inner, ExprState & outer, State & ps) { - outer.exprs.push_back(inner.finish(ps)); - } -}; - -} -} +// Linter complains that this is a "suspicious include of file with '.cc' extension". +// While that is correct and generally not great, it is one of the less bad options to pick +// in terms of diff noise. +// NOLINTNEXTLINE(bugprone-suspicious-include) +#include "parser-impl1.inc.cc" namespace nix { @@ -884,7 +38,6 @@ Expr * EvalState::parse( exprSymbols, featureSettings, }; - parser::ExprState x; assert(length >= 2); assert(text[length - 1] == 0); @@ -893,7 +46,12 @@ Expr * EvalState::parse( p::string_input<p::tracking_mode::lazy> inp{std::string_view{text, length}, "input"}; try { - p::parse<parser::grammar::root, parser::BuildAST, parser::Control>(inp, x, s); + parser::v1::ExprState x; + p::parse<parser::grammar::v1::root, parser::v1::BuildAST, parser::v1::Control>(inp, x, s); + + auto [_pos, result] = x.finish(s); + result->bindVars(*this, staticEnv); + return result.release(); } catch (p::parse_error & e) { auto pos = e.positions().back(); throw ParseError({ @@ -901,10 +59,6 @@ Expr * EvalState::parse( .pos = positions[s.positions.add(s.origin, pos.byte)] }); } - - auto [_pos, result] = x.finish(s); - result->bindVars(*this, staticEnv); - return result.release(); } } diff --git a/src/libexpr/parser/state.hh b/src/libexpr/parser/state.hh index 1d57e2f5f..b969f73e4 100644 --- a/src/libexpr/parser/state.hh +++ b/src/libexpr/parser/state.hh @@ -6,11 +6,21 @@ namespace nix::parser { -struct StringToken -{ - std::string_view s; - bool hasIndentation; - operator std::string_view() const { return s; } +struct IndStringLine { + // String containing only the leading whitespace of the line. May be empty. + std::string_view indentation; + // Position of the line start (before the indentation) + PosIdx pos; + + // Whether the line contains anything besides indentation and line break + bool hasContent = false; + + std::vector< + std::pair< + PosIdx, + std::variant<std::unique_ptr<Expr>, std::string_view> + > + > parts = {}; }; struct State @@ -27,8 +37,7 @@ struct State void overridesFound(const PosIdx pos); void addAttr(ExprAttrs * attrs, AttrPath && attrPath, std::unique_ptr<Expr> e, const PosIdx pos); std::unique_ptr<Formals> validateFormals(std::unique_ptr<Formals> formals, PosIdx pos = noPos, Symbol arg = {}); - std::unique_ptr<Expr> stripIndentation(const PosIdx pos, - std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> && es); + std::unique_ptr<Expr> stripIndentation(const PosIdx pos, std::vector<IndStringLine> && line); // lazy positioning means we don't get byte offsets directly, in.position() would work // but also requires line and column (which is expensive) @@ -182,98 +191,87 @@ inline std::unique_ptr<Formals> State::validateFormals(std::unique_ptr<Formals> return formals; } -inline std::unique_ptr<Expr> State::stripIndentation(const PosIdx pos, - std::vector<std::pair<PosIdx, std::variant<std::unique_ptr<Expr>, StringToken>>> && es) +inline std::unique_ptr<Expr> State::stripIndentation( + const PosIdx pos, + std::vector<IndStringLine> && lines) { - if (es.empty()) return std::make_unique<ExprString>(""); + /* If the only line is whitespace-only, directly return empty string. + * The rest of the code relies on the final string not being empty. + */ + if (lines.size() == 1 && lines.front().parts.empty()) { + return std::make_unique<ExprString>(""); + } - /* Figure out the minimum indentation. Note that by design - whitespace-only final lines are not taken into account. (So - the " " in "\n ''" is ignored, but the " " in "\n foo''" is.) */ - bool atStartOfLine = true; /* = seen only whitespace in the current line */ + /* If the last line only contains whitespace, trim it to not cause excessive whitespace. + * (Other whitespace-only lines get stripped only of the common indentation, and excess + * whitespace becomes part of the string.) + */ + if (lines.back().parts.empty()) { + lines.back().indentation = {}; + } + + /* Figure out the minimum indentation. Note that by design + whitespace-only lines are not taken into account. */ size_t minIndent = 1000000; - size_t curIndent = 0; - for (auto & [i_pos, i] : es) { - auto * str = std::get_if<StringToken>(&i); - if (!str || !str->hasIndentation) { - /* Anti-quotations and escaped characters end the current start-of-line whitespace. */ - if (atStartOfLine) { - atStartOfLine = false; - if (curIndent < minIndent) minIndent = curIndent; - } - continue; - } - for (size_t j = 0; j < str->s.size(); ++j) { - if (atStartOfLine) { - if (str->s[j] == ' ') - curIndent++; - else if (str->s[j] == '\n') { - /* Empty line, doesn't influence minimum - indentation. */ - curIndent = 0; - } else { - atStartOfLine = false; - if (curIndent < minIndent) minIndent = curIndent; - } - } else if (str->s[j] == '\n') { - atStartOfLine = true; - curIndent = 0; - } + for (auto & line : lines) { + if (line.hasContent) { + minIndent = std::min(minIndent, line.indentation.size()); } } /* Strip spaces from each line. */ - std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>> es2; - atStartOfLine = true; - size_t curDropped = 0; - size_t n = es.size(); - auto i = es.begin(); - const auto trimExpr = [&] (std::unique_ptr<Expr> e) { - atStartOfLine = false; - curDropped = 0; - es2.emplace_back(i->first, std::move(e)); - }; - const auto trimString = [&] (const StringToken t) { - std::string s2; - for (size_t j = 0; j < t.s.size(); ++j) { - if (atStartOfLine) { - if (t.s[j] == ' ') { - if (curDropped++ >= minIndent) - s2 += t.s[j]; - } - else if (t.s[j] == '\n') { - curDropped = 0; - s2 += t.s[j]; - } else { - atStartOfLine = false; - curDropped = 0; - s2 += t.s[j]; - } - } else { - s2 += t.s[j]; - if (t.s[j] == '\n') atStartOfLine = true; - } - } + for (auto & line : lines) { + line.indentation.remove_prefix(std::min(minIndent, line.indentation.size())); + } + + /* Concat the parts together again */ + + std::vector<std::pair<PosIdx, std::unique_ptr<Expr>>> parts; + /* Accumulator for merging intermediates */ + PosIdx merged_pos; + std::string merged = ""; - /* Remove the last line if it is empty and consists only of - spaces. */ - if (n == 1) { - std::string::size_type p = s2.find_last_of('\n'); - if (p != std::string::npos && s2.find_first_not_of(' ', p + 1) == std::string::npos) - s2 = std::string(s2, 0, p + 1); + auto push_merged = [&] (PosIdx i_pos, std::string_view str) { + if (merged.empty()) { + merged_pos = i_pos; } + merged += str; + }; - es2.emplace_back(i->first, std::make_unique<ExprString>(std::move(s2))); + auto flush_merged = [&] () { + if (!merged.empty()) { + parts.emplace_back(merged_pos, std::make_unique<ExprString>(std::string(merged))); + merged.clear(); + } }; - for (; i != es.end(); ++i, --n) { - std::visit(overloaded { trimExpr, trimString }, std::move(i->second)); + + for (auto && [li, line] : enumerate(lines)) { + push_merged(line.pos, line.indentation); + + for (auto & val : line.parts) { + auto &[i_pos, item] = val; + + std::visit(overloaded{ + [&](std::string_view str) { + push_merged(i_pos, str); + }, + [&](std::unique_ptr<Expr> expr) { + flush_merged(); + parts.emplace_back(i_pos, std::move(expr)); + }, + }, std::move(item)); + } } - /* If this is a single string, then don't do a concatenation. */ - if (es2.size() == 1 && dynamic_cast<ExprString *>(es2[0].second.get())) { - return std::move(es2[0].second); + flush_merged(); + + /* If this is a single string, then don't do a concatenation. + * (If it's a single expression, still do the ConcatStrings to properly force it being a string.) + */ + if (parts.size() == 1 && dynamic_cast<ExprString *>(parts[0].second.get())) { + return std::move(parts[0].second); } - return std::make_unique<ExprConcatStrings>(pos, true, std::move(es2)); + return std::make_unique<ExprConcatStrings>(pos, true, std::move(parts)); } } diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index b0e14a26e..c98fe2a03 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -394,7 +394,8 @@ static RegisterPrimOp primop_fetchGit({ [Git reference]: https://git-scm.com/book/en/v2/Git-Internals-Git-References By default, the `ref` value is prefixed with `refs/heads/`. - As of 2.3.0, Nix will not prefix `refs/heads/` if `ref` starts with `refs/`. + As of 2.3.0, Nix will not prefix `refs/heads/` if `ref` starts with `refs/` or + if `ref` looks like a commit hash for backwards compatibility with CppNix 2.3. - `submodules` (default: `false`) |