diff options
author | Jade Lovelace <lix@jade.fyi> | 2024-03-09 23:59:50 -0800 |
---|---|---|
committer | Jade Lovelace <lix@jade.fyi> | 2024-03-14 14:30:38 -0700 |
commit | 38571c50e6dc0ee910e9e7619e482fdbbfd644e1 (patch) | |
tree | 2f1635b17b71ef3aa0ed3c8dac7bc9ffd2165d07 /tests/unit | |
parent | 84727bebb42ded17c128567e34636c4cdc7eed45 (diff) |
Implement a parser for a literate testing system for the repl
This parser can be reused for other purposes. It's inspired by
https://bitheap.org/cram/
Although eelco's impostor exists https://github.com/mobusoperandi/eelco,
it is not very nice to depend on out of tree testing frameworks with no
way to customize them.
Change-Id: Ifca50177e09730182baf0ebf829c3505bbb0274a
Diffstat (limited to 'tests/unit')
-rw-r--r-- | tests/unit/libutil-support/local.mk | 3 | ||||
-rw-r--r-- | tests/unit/libutil-support/tests/cli-literate-parser.cc | 174 | ||||
-rw-r--r-- | tests/unit/libutil-support/tests/cli-literate-parser.hh | 127 | ||||
-rw-r--r-- | tests/unit/libutil-support/tests/debug-char.hh | 24 |
4 files changed, 327 insertions, 1 deletions
diff --git a/tests/unit/libutil-support/local.mk b/tests/unit/libutil-support/local.mk index b4c8f2475..cfd88be99 100644 --- a/tests/unit/libutil-support/local.mk +++ b/tests/unit/libutil-support/local.mk @@ -10,6 +10,7 @@ libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc) libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES) -libutil-test-support_LIBS = libutil +# libexpr so we can steal their string printer from print.cc +libutil-test-support_LIBS = libutil libexpr libutil-test-support_LDFLAGS := -pthread -lrapidcheck diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.cc b/tests/unit/libutil-support/tests/cli-literate-parser.cc new file mode 100644 index 000000000..3b2345e8e --- /dev/null +++ b/tests/unit/libutil-support/tests/cli-literate-parser.cc @@ -0,0 +1,174 @@ +#include "cli-literate-parser.hh" +#include "libexpr/print.hh" +#include "debug-char.hh" +#include "types.hh" +#include "util.hh" +#include <iostream> +#include <memory> +#include <boost/algorithm/string/trim.hpp> + +using namespace std::string_literals; + +namespace nix { + +static constexpr const bool DEBUG_PARSER = false; + +constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char * +{ + return std::visit( + overloaded{// clang-format off + [](Indent const&) -> const char * { return "indent"; }, + [](Commentary const&) -> const char * { return "indent"; }, + [](Prompt const&) -> const char * { return "prompt"; }, + [](Command const&) -> const char * { return "command"; }, + [](OutputLine const&) -> const char * { return "output_line"; }}, + // clang-format on + s); +} + +auto CLILiterateParser::Node::print() const -> std::string +{ + std::ostringstream s{}; + switch (kind) { + case NodeKind::COMMENTARY: + s << "Commentary "; + break; + case NodeKind::COMMAND: + s << "Command "; + break; + case NodeKind::OUTPUT: + s << "Output "; + break; + } + printLiteralString(s, this->text); + return s.str(); +} + +void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os) +{ + for (auto & node : nodes) { + *os << node.print() << "\\n"; + } +} + +auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector<Node> +{ + CLILiterateParser p{std::move(prompt), indent}; + p.feed(input); + return std::move(p).intoSyntax(); +} + +auto CLILiterateParser::intoSyntax() && -> std::vector<Node> +{ + return std::move(this->syntax_); +} + +CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent) + : state_(indent == 0 ? State(Prompt{}) : State(Indent{})) + , prompt_(prompt) + , indent_(indent) + , lastWasOutput_(false) + , syntax_{} +{ + assert(!prompt.empty()); +} + +void CLILiterateParser::feed(char c) +{ + if constexpr (DEBUG_PARSER) { + std::cout << stateDebug(state_) << " " << DebugChar{c} << "\n"; + } + + if (c == '\n') { + onNewline(); + return; + } + + std::visit( + overloaded{ + [&](Indent & s) { + if (c == ' ') { + if (++s.pos >= indent_) { + transition(Prompt{}); + } + } else { + transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}}); + } + }, + [&](Prompt & s) { + if (s.pos >= prompt_.length()) { + transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}}); + return; + } else if (c == prompt_[s.pos]) { + // good prompt character + ++s.pos; + } else { + // didn't match the prompt, so it must have actually been output. + s.lineAccumulator.push_back(c); + transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}}); + return; + } + s.lineAccumulator.push_back(c); + }, + [&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }}, + state_); +} + +void CLILiterateParser::onNewline() +{ + State lastState = std::move(state_); + bool newLastWasOutput = false; + + syntax_.push_back(std::visit( + overloaded{ + [&](Indent & s) { + // XXX: technically this eats trailing spaces + + // a newline following output is considered part of that output + if (lastWasOutput_) { + newLastWasOutput = true; + return Node::mkOutput(""); + } + return Node::mkCommentary(""); + }, + [&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); }, + [&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); }, + [&](OutputLine & s) { + newLastWasOutput = true; + return Node::mkOutput(std::move(s.lineAccumulator)); + }, + [&](Prompt & s) { + // INDENT followed by newline is also considered a blank output line + return Node::mkOutput(std::move(s.lineAccumulator)); + }}, + lastState)); + + transition(Indent{}); + lastWasOutput_ = newLastWasOutput; +} + +void CLILiterateParser::feed(std::string_view s) +{ + for (char ch : s) { + feed(ch); + } +} + +void CLILiterateParser::transition(State new_state) +{ + // When we expect INDENT and we are parsing without indents, commentary + // cannot exist, so we want to transition directly into PROMPT before + // resuming normal processing. + if (Indent * i = std::get_if<Indent>(&new_state); i != nullptr && indent_ == 0) { + new_state = Prompt{AccumulatingState{}, i->pos}; + } + + state_ = new_state; +} + +auto CLILiterateParser::syntax() const -> std::vector<Node> const & +{ + return syntax_; +} + +}; diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.hh b/tests/unit/libutil-support/tests/cli-literate-parser.hh new file mode 100644 index 000000000..86a5bdd32 --- /dev/null +++ b/tests/unit/libutil-support/tests/cli-literate-parser.hh @@ -0,0 +1,127 @@ +#pragma once +///@file + +#include <compare> +#include <memory> +#include <sstream> +#include <variant> +#include <vector> +#include <string> + +namespace nix { +/* + * A DFA parser for literate test cases for CLIs. + * + * FIXME: implement merging of these, so you can auto update cases that have + * comments. + * + * Format: + * COMMENTARY + * INDENT PROMPT COMMAND + * INDENT OUTPUT + * + * e.g. + * commentary commentary commentary + * nix-repl> :t 1 + * an integer + * + * Yields: + * Commentary "commentary commentary commentary" + * Command ":t 1" + * Output "an integer" + * + * Note: one Output line is generated for each line of the sources, because + * this is effectively necessary to be able to align them in the future to + * auto-update tests. + */ +class CLILiterateParser +{ +public: + + enum class NodeKind { + COMMENTARY, + COMMAND, + OUTPUT, + }; + + struct Node + { + NodeKind kind; + std::string text; + std::strong_ordering operator<=>(Node const &) const = default; + + static Node mkCommentary(std::string text) + { + return Node{.kind = NodeKind::COMMENTARY, .text = text}; + } + + static Node mkCommand(std::string text) + { + return Node{.kind = NodeKind::COMMAND, .text = text}; + } + + static Node mkOutput(std::string text) + { + return Node{.kind = NodeKind::OUTPUT, .text = text}; + } + + auto print() const -> std::string; + }; + + CLILiterateParser(std::string prompt, size_t indent = 2); + + auto syntax() const -> std::vector<Node> const &; + + /** Feeds a character into the parser */ + void feed(char c); + + /** Feeds a string into the parser */ + void feed(std::string_view s); + + /** Parses an input in a non-streaming fashion */ + static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>; + + /** Consumes a CLILiterateParser and gives you the syntax out of it */ + auto intoSyntax() && -> std::vector<Node>; + +private: + + struct AccumulatingState + { + std::string lineAccumulator; + }; + struct Indent + { + size_t pos = 0; + }; + struct Commentary : public AccumulatingState + {}; + struct Prompt : AccumulatingState + { + size_t pos = 0; + }; + struct Command : public AccumulatingState + {}; + struct OutputLine : public AccumulatingState + {}; + + using State = std::variant<Indent, Commentary, Prompt, Command, OutputLine>; + State state_; + + constexpr static auto stateDebug(State const&) -> const char *; + + const std::string prompt_; + const size_t indent_; + + /** Last line was output, so we consider a blank to be part of the output */ + bool lastWasOutput_; + + std::vector<Node> syntax_; + + void transition(State newState); + void onNewline(); +}; + +// Override gtest printing for lists of nodes +void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os); +}; diff --git a/tests/unit/libutil-support/tests/debug-char.hh b/tests/unit/libutil-support/tests/debug-char.hh new file mode 100644 index 000000000..765d8553f --- /dev/null +++ b/tests/unit/libutil-support/tests/debug-char.hh @@ -0,0 +1,24 @@ +///@file +#include <ostream> +#include <boost/io/ios_state.hpp> + +namespace nix { + +struct DebugChar +{ + char c; +}; + +inline std::ostream & operator<<(std::ostream & s, DebugChar c) +{ + boost::io::ios_flags_saver _ifs(s); + + if (isprint(c.c)) { + s << static_cast<char>(c.c); + } else { + s << std::hex << "0x" << (static_cast<unsigned int>(c.c) & 0xff); + } + return s; +} + +} |