4 files changed, 558 insertions, 299 deletions
diff --git a/tests/unit/libutil-support/local.mk b/tests/unit/libutil-support/local.mk
index cfd88be99..67fd92d77 100644
--- a/tests/unit/libutil-support/local.mk
+++ b/tests/unit/libutil-support/local.mk
@@ -8,7 +8,7 @@ libutil-test-support_INSTALL_DIR :=
 
 libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc)
 
-libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES)
+libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES) -I src/libutil
 
 # libexpr so we can steal their string printer from print.cc
 libutil-test-support_LIBS = libutil libexpr
diff --git a/tests/unit/libutil-support/tests/characterization.hh b/tests/unit/libutil-support/tests/characterization.hh
index 6ee994d71..7f570f619 100644
--- a/tests/unit/libutil-support/tests/characterization.hh
+++ b/tests/unit/libutil-support/tests/characterization.hh
@@ -74,20 +74,20 @@ public:
     {
         auto file = goldenMaster(testStem);
 
-        auto got = test();
+        auto actual = test();
 
         if (testAccept())
         {
             createDirs(dirOf(file));
-            writeFile2(file, got);
+            writeFile2(file, actual);
             GTEST_SKIP()
                 << "Updating golden master "
                 << file;
         }
         else
         {
-            decltype(got) expected = readFile2(file);
-            ASSERT_EQ(got, expected);
+            decltype(actual) expected = readFile2(file);
+            ASSERT_EQ(expected, actual);
         }
     }
 
diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.cc b/tests/unit/libutil-support/tests/cli-literate-parser.cc
index 4edf434be..023f86cd7 100644
--- a/tests/unit/libutil-support/tests/cli-literate-parser.cc
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.cc
@@ -1,248 +1,444 @@
 #include "cli-literate-parser.hh"
 #include "escape-string.hh"
-#include "libexpr/print.hh"
 #include "escape-char.hh"
+#include "libexpr/print.hh"
 #include "types.hh"
 #include "util.hh"
 #include <ranges>
+#include <boost/algorithm/string/replace.hpp>
+#include <boost/algorithm/string/trim.hpp>
 #include <iostream>
 #include <memory>
-#include <boost/algorithm/string/trim.hpp>
+#include <sstream>
+#include <variant>
+
+#include "cli-literate-parser.hh"
+#include "escape-string.hh"
+#include "fmt.hh"
+#include "libexpr/print.hh"
+#include "shlex.hh"
+#include "types.hh"
+#include "util.hh"
+
+static constexpr const bool DEBUG_PARSER = false;
 
 using namespace std::string_literals;
+using namespace boost::algorithm;
 
 namespace nix {
 
-static constexpr const bool DEBUG_PARSER = false;
+namespace cli_literate_parser {
 
-constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char *
+struct Parser
 {
-    return std::visit(
-        overloaded{// clang-format off
-            [](Indent const&) -> const char * { return "indent"; },
-            [](Commentary const&) -> const char * { return "indent"; },
-            [](Prompt const&) -> const char * { return "prompt"; },
-            [](Command const&) -> const char * { return "command"; },
-            [](OutputLine const&) -> const char * { return "output_line"; }},
-        // clang-format on
-        s);
-}
+    Parser(const std::string input, Config config)
+        : input(input)
+        , rest(this->input)
+        , prompt(config.prompt)
+        , indentString(std::string(config.indent, ' '))
+        , lastWasOutput(false)
+        , syntax{}
+    {
+        assert(!prompt.empty());
+    }
 
-auto CLILiterateParser::Node::print() const -> std::string
-{
-    std::ostringstream s{};
-    switch (kind) {
-    case NodeKind::COMMENTARY:
-        s << "Commentary ";
-        break;
-    case NodeKind::COMMAND:
-        s << "Command ";
-        break;
-    case NodeKind::OUTPUT:
-        s << "Output ";
-        break;
-    }
-    escapeString(s, this->text);
-    return s.str();
-}
+    const std::string input;
+    std::string_view rest;
+    const std::string prompt;
+    const std::string indentString;
+
+    /** Last line was output, so we consider a blank to be part of the output */
+    bool lastWasOutput;
+
+    /**
+     * Nodes of syntax being built.
+     */
+    std::vector<Node> syntax;
+
+    auto dbg(std::string_view state) -> void
+    {
+        std::cout << state << ": ";
+        escapeString(
+            std::cout,
+            rest,
+            {
+                .maxLength = 40,
+                .outputAnsiColors = true,
+                .escapeNonPrinting = true,
+            }
+        );
+        std::cout << std::endl;
+    }
 
-void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os)
-{
-    for (auto & node : nodes) {
-        *os << node.print() << "\\n";
+    template<typename T>
+    auto pushNode(T node) -> void
+    {
+        if constexpr (DEBUG_PARSER) {
+            std::cout << debugNode(node);
+        }
+        syntax.emplace_back(node);
     }
-}
 
-auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector<Node>
-{
-    CLILiterateParser p{std::move(prompt), indent};
-    p.feed(input);
-    return std::move(p).intoSyntax();
-}
+    auto parseLiteral(const char c) -> bool
+    {
+        if (rest.starts_with(c)) {
+            rest.remove_prefix(1);
+            return true;
+        } else {
+            return false;
+        }
+    }
 
-auto CLILiterateParser::intoSyntax() && -> std::vector<Node>
-{
-    return std::move(this->syntax_);
-}
+    auto parseLiteral(const std::string_view & literal) -> bool
+    {
+        if (rest.starts_with(literal)) {
+            rest.remove_prefix(literal.length());
+            return true;
+        } else {
+            return false;
+        }
+    }
 
-CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent)
-    : state_(indent == 0 ? State(Prompt{}) : State(Indent{}))
-    , prompt_(prompt)
-    , indent_(indent)
-    , lastWasOutput_(false)
-    , syntax_{}
-{
-    assert(!prompt.empty());
-}
+    auto parseBool() -> bool
+    {
+        auto result = false;
+        if (parseLiteral("true")) {
+            result = true;
+        } else if (parseLiteral("false")) {
+            result = false;
+        } else {
+            throw ParseError("true or false", std::string(rest));
+        }
+        auto untilNewline = parseUntilNewline();
+        if (!untilNewline.empty()) {
+            throw ParseError("nothing after true or false", untilNewline);
+        }
+        return result;
+    }
 
-void CLILiterateParser::feed(char c)
-{
-    if constexpr (DEBUG_PARSER) {
-        std::cout << stateDebug(state_) << " " << MaybeHexEscapedChar{c} << "\n";
+    auto parseUntilNewline() -> std::string
+    {
+        auto pos = rest.find('\n');
+        if (pos == std::string_view::npos) {
+            throw ParseError("text and then newline", std::string(rest));
+        } else {
+            // `parseOutput()` sets this to true anyways.
+            lastWasOutput = false;
+            auto result = std::string(rest, 0, pos);
+            rest.remove_prefix(pos + 1);
+            return result;
+        }
+    }
+
+    auto parseIndent() -> bool
+    {
+        if constexpr (DEBUG_PARSER) {
+            dbg("indent");
+        }
+        if (indentString.empty()) {
+            return true;
+        }
+
+        if (parseLiteral(indentString)) {
+            pushNode(Indent(indentString));
+            return true;
+        } else {
+            if constexpr (DEBUG_PARSER) {
+                dbg("indent failed");
+            }
+            return false;
+        }
     }
 
-    if (c == '\n') {
-        onNewline();
-        return;
+    auto parseCommand() -> void
+    {
+        if constexpr (DEBUG_PARSER) {
+            dbg("command");
+        }
+        auto untilNewline = parseUntilNewline();
+        pushNode(Command(untilNewline));
     }
 
-    std::visit(
-        overloaded{
-            [&](Indent & s) {
-                if (c == ' ') {
-                    if (++s.pos >= indent_) {
-                        transition(Prompt{});
+    auto parsePrompt() -> void
+    {
+        if constexpr (DEBUG_PARSER) {
+            dbg("prompt");
+        }
+        if (parseLiteral(prompt)) {
+            pushNode(Prompt(prompt));
+            if (rest.empty()) {
+                return;
+            }
+            parseCommand();
+        } else {
+            parseOutput();
+        }
+    }
+
+    auto parseOutput() -> void
+    {
+        if constexpr (DEBUG_PARSER) {
+            dbg("output");
+        }
+        auto untilNewline = parseUntilNewline();
+        pushNode(Output(untilNewline));
+        lastWasOutput = true;
+    }
+
+    auto parseAtSign() -> void
+    {
+        if constexpr (DEBUG_PARSER) {
+            dbg("@ symbol");
+        }
+        if (!parseLiteral('@')) {
+            parseOutputOrCommentary();
+        }
+
+        if (parseLiteral("args ")) {
+            parseArgs();
+        } else if (parseLiteral("should-start ")) {
+            if constexpr (DEBUG_PARSER) {
+                dbg("@should-start");
+            }
+            auto shouldStart = parseBool();
+            pushNode(ShouldStart{shouldStart});
+        }
+    }
+
+    auto parseArgs() -> void
+    {
+        if constexpr (DEBUG_PARSER) {
+            dbg("@args");
+        }
+        auto untilNewline = parseUntilNewline();
+        pushNode(Args(untilNewline));
+    }
+
+    auto parseOutputOrCommentary() -> void
+    {
+        if constexpr (DEBUG_PARSER) {
+            dbg("output/commentary");
+        }
+        auto oldLastWasOutput = lastWasOutput;
+        auto untilNewline = parseUntilNewline();
+
+        auto trimmed = trim_right_copy(untilNewline);
+
+        if (oldLastWasOutput && trimmed.empty()) {
+            pushNode(Output{trimmed});
+        } else {
+            pushNode(Commentary{untilNewline});
+        }
+    }
+
+    auto parseStartOfLine() -> void
+    {
+        if constexpr (DEBUG_PARSER) {
+            dbg("start of line");
+        }
+        if (parseIndent()) {
+            parsePrompt();
+        } else {
+            parseAtSign();
+        }
+    }
+
+    auto parse() && -> ParseResult
+    {
+        // Begin the recursive descent parser at the start of a new line.
+        while (!rest.empty()) {
+            parseStartOfLine();
+        }
+        return std::move(*this).intoParseResult();
+    }
+
+    auto intoParseResult() && -> ParseResult
+    {
+        // Do another pass over the nodes to produce auxiliary results like parsed
+        // command line arguments.
+        std::vector<std::string> args;
+        std::vector<Node> newSyntax;
+        auto shouldStart = true;
+
+        for (auto it = syntax.begin(); it != syntax.end(); ++it) {
+            Node node = std::move(*it);
+            std::visit(
+                overloaded{
+                    [&](Args & e) {
+                        auto split = shell_split(std::string(e.text));
+                        args.insert(args.end(), split.begin(), split.end());
+                    },
+                    [&](ShouldStart & e) { shouldStart = e.shouldStart; },
+                    [&](auto & e) {},
+                },
+                node
+            );
+
+            newSyntax.push_back(node);
+        }
+
+        return ParseResult{
+            .syntax = std::move(newSyntax),
+            .args = std::move(args),
+            .shouldStart = shouldStart,
+        };
+    }
+};
+
+template<typename View>
+auto tidySyntax(View syntax) -> std::vector<Node>
+{
+    // Note: Setting `lastWasCommand` lets us trim blank lines at the start and
+    // end of the output stream.
+    auto lastWasCommand = true;
+    std::vector<Node> newSyntax;
+
+    for (auto it = syntax.begin(); it != syntax.end(); ++it) {
+        Node node = *it;
+        // Only compare `Command` and `Output` nodes.
+        if (std::visit([&](auto && e) { return !e.shouldCompare(); }, node)) {
+            continue;
+        }
+
+        // Remove blank lines before and after commands. This lets us keep nice
+        // whitespace in the test files.
+        auto shouldKeep = std::visit(
+            overloaded{
+                [&](Command & e) {
+                    lastWasCommand = true;
+                    auto trimmed = trim_right_copy(e.text);
+                    if (trimmed.empty()) {
+                        return false;
+                    } else {
+                        e.text = trimmed;
+                        return true;
                     }
-                } else {
-                    transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}});
-                }
-            },
-            [&](Prompt & s) {
-                if (s.pos >= prompt_.length()) {
-                    transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}});
-                    return;
-                } else if (c == prompt_[s.pos]) {
-                    // good prompt character
-                    ++s.pos;
-                } else {
-                    // didn't match the prompt, so it must have actually been output.
-                    s.lineAccumulator.push_back(c);
-                    transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}});
-                    return;
-                }
-                s.lineAccumulator.push_back(c);
+                },
+                [&](Output & e) {
+                    std::string trimmed = trim_right_copy(e.text);
+                    if (lastWasCommand && trimmed.empty()) {
+                        // NB: Keep `lastWasCommand` true in this branch so we
+                        // can keep pruning empty output lines.
+                        return false;
+                    } else {
+                        e.text = trimmed;
+                        lastWasCommand = false;
+                        return true;
+                    }
+                },
+                [&](auto & e) {
+                    lastWasCommand = false;
+                    return false;
+                },
             },
-            [&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }},
-        state_);
+            node
+        );
+
+        if (shouldKeep) {
+            newSyntax.push_back(node);
+        }
+    }
+
+    return newSyntax;
 }
 
-void CLILiterateParser::onNewline()
+auto ParseResult::tidyOutputForComparison() -> std::vector<Node>
 {
-    State lastState = std::move(state_);
-    bool newLastWasOutput = false;
-
-    syntax_.push_back(std::visit(
-        overloaded{
-            [&](Indent & s) {
-                // XXX: technically this eats trailing spaces
-
-                // a newline following output is considered part of that output
-                if (lastWasOutput_) {
-                    newLastWasOutput = true;
-                    return Node::mkOutput("");
-                }
-                return Node::mkCommentary("");
-            },
-            [&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); },
-            [&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); },
-            [&](OutputLine & s) {
-                newLastWasOutput = true;
-                return Node::mkOutput(std::move(s.lineAccumulator));
-            },
-            [&](Prompt & s) {
-                // INDENT followed by newline is also considered a blank output line
-                return Node::mkOutput(std::move(s.lineAccumulator));
-            }},
-        lastState));
-
-    transition(Indent{});
-    lastWasOutput_ = newLastWasOutput;
+    auto reversed = tidySyntax(std::ranges::reverse_view(syntax));
+    auto unreversed = tidySyntax(std::ranges::reverse_view(reversed));
+    return unreversed;
 }
 
-void CLILiterateParser::feed(std::string_view s)
+void ParseResult::interpolatePwd(std::string_view pwd)
 {
-    for (char ch : s) {
-        feed(ch);
+    std::vector<std::string> newArgs;
+    for (auto & arg : args) {
+        newArgs.push_back(replaceStrings(arg, "${PWD}", pwd));
     }
+    args = std::move(newArgs);
 }
 
-void CLILiterateParser::transition(State new_state)
+const char * ParseError::what() const noexcept
 {
-    // When we expect INDENT and we are parsing without indents, commentary
-    // cannot exist, so we want to transition directly into PROMPT before
-    // resuming normal processing.
-    if (Indent * i = std::get_if<Indent>(&new_state); i != nullptr && indent_ == 0) {
-        new_state = Prompt{AccumulatingState{}, i->pos};
+    if (what_) {
+        return what_->c_str();
+    } else {
+        auto escaped = escapeString(rest, {.maxLength = 256, .escapeNonPrinting = true});
+        auto hint =
+            new HintFmt("Parse error: Expected %1%, got:\n%2%", expected, Uncolored(escaped));
+        what_ = hint->str();
+        return what_->c_str();
     }
-
-    state_ = new_state;
 }
 
-auto CLILiterateParser::syntax() const -> std::vector<Node> const &
+auto parse(const std::string input, Config config) -> ParseResult
 {
-    return syntax_;
+    return Parser(input, config).parse();
 }
 
-auto CLILiterateParser::unparse(const std::string & prompt, const std::vector<Node> & syntax, size_t indent)
-    -> std::string
+std::ostream & operator<<(std::ostream & output, const Args & node)
 {
-    std::string indent_str(indent, ' ');
-    std::ostringstream out{};
-
-    for (auto & node : syntax) {
-        switch (node.kind) {
-        case NodeKind::COMMENTARY:
-            out << node.text << "\n";
-            break;
-        case NodeKind::COMMAND:
-            out << indent_str << prompt << node.text << "\n";
-            break;
-        case NodeKind::OUTPUT:
-            out << indent_str << node.text << "\n";
-            break;
-        }
-    }
-
-    return out.str();
+    return output << "@args " << node.text;
 }
 
-auto CLILiterateParser::tidyOutputForComparison(std::vector<Node> && syntax) -> std::vector<Node>
+std::ostream & operator<<(std::ostream & output, const ShouldStart & node)
 {
-    std::vector<Node> newSyntax{};
-
-    // Eat trailing newlines, so assume that the very end was actually a command
-    bool lastWasCommand = true;
-    bool newLastWasCommand = true;
-
-    auto v = std::ranges::reverse_view(syntax);
+    return output << "@should-start " << (node.shouldStart ? "true" : "false");
+}
 
-    for (auto it = v.begin(); it != v.end(); ++it) {
-        Node item = std::move(*it);
+std::ostream & operator<<(std::ostream & output, const TextNode & rhs)
+{
+    return output << rhs.text;
+}
 
-        lastWasCommand = newLastWasCommand;
-        // chomp commentary
-        if (item.kind == NodeKind::COMMENTARY) {
-            continue;
-        }
+void unparseNode(std::ostream & output, const Node & node, bool withNewline)
+{
+    std::visit(
+        [&](const auto & n) { output << n << (withNewline && n.emitNewlineAfter() ? "\n" : ""); },
+        node
+    );
+}
 
-        if (item.kind == NodeKind::COMMAND) {
-            newLastWasCommand = true;
+template<typename T>
+std::string gtestFormat(T & value)
+{
+    std::ostringstream formatted;
+    unparseNode(formatted, value, true);
+    auto str = formatted.str();
+    // Needs to be the literal string `\n` and not a newline character to
+    // trigger gtest diff printing. Yes seriously.
+    boost::algorithm::replace_all(str, "\n", "\\n");
+    return str;
+}
 
-            if (item.text == "") {
-                // chomp empty commands
-                continue;
-            }
-        }
+void PrintTo(const std::vector<Node> & nodes, std::ostream * output)
+{
+    for (auto & node : nodes) {
+        *output << gtestFormat(node);
+    }
+}
 
-        if (item.kind == NodeKind::OUTPUT) {
-            // TODO: horrible
-            bool nextIsCommand = (it + 1 == v.end()) ? false : (it + 1)->kind == NodeKind::COMMAND;
-            std::string trimmedText = boost::algorithm::trim_right_copy(item.text);
-            if ((lastWasCommand || nextIsCommand) && trimmedText == "") {
-                // chomp empty text above or directly below commands
-                continue;
-            }
+std::string debugNode(const Node & node)
+{
+    std::ostringstream output;
+    output << std::visit([](const auto & n) { return n.kind(); }, node) << ": ";
+    std::ostringstream contents;
+    unparseNode(contents, node, false);
+    escapeString(output, contents.str(), {.escapeNonPrinting = true});
+    return output.str();
+}
 
-            // real output, stop chomping
-            newLastWasCommand = false;
+auto ParseResult::debugPrint(std::ostream & output) -> void
+{
+    ::nix::cli_literate_parser::debugPrint(output, syntax);
+}
 
-            item = Node::mkOutput(std::move(trimmedText));
-        }
-        newSyntax.push_back(std::move(item));
+void debugPrint(std::ostream & output, std::vector<Node> & nodes)
+{
+    for (auto & node : nodes) {
+        output << debugNode(node) << std::endl;
     }
-
-    std::reverse(newSyntax.begin(), newSyntax.end());
-    return newSyntax;
 }
 
-};
+} // namespace cli_literate_parser
+} // namespace nix
diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.hh b/tests/unit/libutil-support/tests/cli-literate-parser.hh
index 4cffd2ba9..2ff9348ef 100644
--- a/tests/unit/libutil-support/tests/cli-literate-parser.hh
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.hh
@@ -3,132 +3,195 @@
 
 #include <compare>
 #include <memory>
+#include <optional>
 #include <sstream>
+#include <string>
 #include <variant>
 #include <vector>
-#include <string>
 
 namespace nix {
+namespace cli_literate_parser {
+
+// ------------------------- NODES -------------------------
+//
+// To update golden test files while preserving commentary output and other `@`
+// directives, we need to keep commentary output around after parsing.
+
+struct BaseNode {
+  virtual ~BaseNode() = default;
+
+  virtual auto shouldCompare() const -> bool { return false; }
+
+  virtual auto kind() const -> std::string = 0;
+  virtual auto emitNewlineAfter() const -> bool = 0;
+
+  auto operator<=>(const BaseNode &rhs) const = default;
+};
+
+/**
+ * A node containing text. The text should be identical to how the node was
+ * written in the input file.
+ */
+struct TextNode : BaseNode {
+  std::string text;
+
+  explicit TextNode(std::string text) : text(text) {}
+};
+
+std::ostream &operator<<(std::ostream &output, const TextNode &node);
+
+#define DECLARE_TEXT_NODE(NAME, NEEDS_NEWLINE, SHOULD_COMPARE)                 \
+  struct NAME : TextNode {                                                     \
+    using TextNode::TextNode;                                                  \
+    ~NAME() override = default;                                                \
+                                                                               \
+    auto kind() const -> std::string override { return #NAME; }                \
+    auto emitNewlineAfter() const -> bool override { return NEEDS_NEWLINE; }   \
+    auto shouldCompare() const -> bool override { return SHOULD_COMPARE; }     \
+  };
+
+/* name, needsNewline, shouldCompare */
+DECLARE_TEXT_NODE(Prompt, false, false)
+DECLARE_TEXT_NODE(Command, true, true)
+DECLARE_TEXT_NODE(Output, true, true)
+DECLARE_TEXT_NODE(Commentary, true, false)
+DECLARE_TEXT_NODE(Args, true, false)
+DECLARE_TEXT_NODE(Indent, false, false)
+
+#undef DECLARE_TEXT_NODE
+
+struct ShouldStart : BaseNode {
+  bool shouldStart;
+
+  ShouldStart(bool shouldStart) : shouldStart(shouldStart) {}
+  ~ShouldStart() override = default;
+  auto emitNewlineAfter() const -> bool override { return true; }
+  auto kind() const -> std::string override { return "should-start"; }
+
+  auto operator<=>(const ShouldStart &rhs) const = default;
+};
+std::ostream &operator<<(std::ostream &output, const ShouldStart &node);
+
+/**
+ * Any syntax node, including those that are cosmetic.
+ */
+using Node = std::variant<Prompt, Command, Output, Commentary, Args,
+                          ShouldStart, Indent>;
+
+/** Unparses a node into the exact text that would have created it, including a
+ * newline at the end if present, if withNewline is set */
+void unparseNode(std::ostream &output, const Node &node,
+                 bool withNewline = true);
+
+std::string debugNode(const Node &node);
+void debugPrint(std::ostream &output, std::vector<Node> &nodes);
+
+/**
+ * Override gtest printing for lists of nodes.
+ */
+void PrintTo(std::vector<Node> const &nodes, std::ostream *output);
+
+/**
+ * The result of parsing a test file.
+ */
+struct ParseResult {
+  /**
+   * A set of nodes that can be used to reproduce the input file. This is used
+   * to implement updating the test files.
+   */
+  std::vector<Node> syntax;
+
+  /**
+   * Extra CLI arguments.
+   */
+  std::vector<std::string> args;
+
+  /**
+   * Should the program start successfully?
+   */
+  bool shouldStart = false;
+
+  /**
+   * Replace `$PWD` with the given value in `args`.
+   */
+  void interpolatePwd(std::string_view pwd);
+
+  /**
+   * Tidy `syntax` to remove unnecessary nodes.
+   */
+  auto tidyOutputForComparison() -> std::vector<Node>;
+
+  auto debugPrint(std::ostream &output) -> void;
+};
+
+/**
+ * A parse error.
+ */
+struct ParseError : std::exception {
+  std::string expected;
+  std::string rest;
+
+  ParseError(std::string expected, std::string rest)
+      : expected(expected), rest(rest) {}
+
+  const char *what() const noexcept override;
+
+private:
+  /**
+   * Cached formatted contents of `what()`.
+   */
+  mutable std::optional<std::string> what_;
+};
+
+struct Config {
+  /**
+   * The prompt string to look for.
+   */
+  std::string prompt;
+  /**
+   * The number of spaces of indent for commands and output.
+   */
+  size_t indent = 2;
+};
+
 /*
- * A DFA parser for literate test cases for CLIs.
+ * A recursive descent parser for literate test cases for CLIs.
  *
  * FIXME: implement merging of these, so you can auto update cases that have
  * comments.
  *
- * Format:
- * COMMENTARY
- * INDENT PROMPT COMMAND
- * INDENT OUTPUT
+ * Syntax:
+ * ```
+ * ( COMMENTARY
+ * | INDENT PROMPT COMMAND
+ * | INDENT OUTPUT
+ * | @args ARGS
+ * | @should-start ( true | false )) *
+ * ```
  *
  * e.g.
+ * ```
  * commentary commentary commentary
+ * @args --foo
+ * @should-start false
  *   nix-repl> :t 1
  *   an integer
+ * ```
  *
- * Yields:
+ * Yields something like:
+ * ```
  * Commentary "commentary commentary commentary"
+ * Args "--foo"
+ * ShouldStart false
  * Command ":t 1"
  * Output "an integer"
+ * ```
  *
  * Note: one Output line is generated for each line of the sources, because
  * this is effectively necessary to be able to align them in the future to
  * auto-update tests.
  */
-class CLILiterateParser
-{
-public:
-
-    enum class NodeKind {
-        COMMENTARY,
-        COMMAND,
-        OUTPUT,
-    };
-
-    struct Node
-    {
-        NodeKind kind;
-        std::string text;
-        std::strong_ordering operator<=>(Node const &) const = default;
-
-        static Node mkCommentary(std::string text)
-        {
-            return Node{.kind = NodeKind::COMMENTARY, .text = text};
-        }
-
-        static Node mkCommand(std::string text)
-        {
-            return Node{.kind = NodeKind::COMMAND, .text = text};
-        }
-
-        static Node mkOutput(std::string text)
-        {
-            return Node{.kind = NodeKind::OUTPUT, .text = text};
-        }
-
-        auto print() const -> std::string;
-    };
+auto parse(std::string input, Config config) -> ParseResult;
 
-    CLILiterateParser(std::string prompt, size_t indent = 2);
-
-    auto syntax() const -> std::vector<Node> const &;
-
-    /** Feeds a character into the parser */
-    void feed(char c);
-
-    /** Feeds a string into the parser */
-    void feed(std::string_view s);
-
-    /** Parses an input in a non-streaming fashion */
-    static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
-
-    /** Returns, losslessly, the string that would have generated a syntax tree */
-    static auto unparse(std::string const & prompt, std::vector<Node> const & syntax, size_t indent = 2) -> std::string;
-
-    /** Consumes a CLILiterateParser and gives you the syntax out of it */
-    auto intoSyntax() && -> std::vector<Node>;
-
-    /** Tidies syntax to remove trailing whitespace from outputs and remove any
-     * empty prompts */
-    static auto tidyOutputForComparison(std::vector<Node> && syntax) -> std::vector<Node>;
-
-private:
-
-    struct AccumulatingState
-    {
-        std::string lineAccumulator;
-    };
-    struct Indent
-    {
-        size_t pos = 0;
-    };
-    struct Commentary : public AccumulatingState
-    {};
-    struct Prompt : AccumulatingState
-    {
-        size_t pos = 0;
-    };
-    struct Command : public AccumulatingState
-    {};
-    struct OutputLine : public AccumulatingState
-    {};
-
-    using State = std::variant<Indent, Commentary, Prompt, Command, OutputLine>;
-    State state_;
-
-    constexpr static auto stateDebug(State const&) -> const char *;
-
-    const std::string prompt_;
-    const size_t indent_;
-
-    /** Last line was output, so we consider a blank to be part of the output */
-    bool lastWasOutput_;
-
-    std::vector<Node> syntax_;
-
-    void transition(State newState);
-    void onNewline();
-};
-
-// Override gtest printing for lists of nodes
-void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os);
-};
+}; // namespace cli_literate_parser
+}; // namespace nix