aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorick van Pelt <yorick@yorickvanpelt.nl>2020-01-09 17:38:27 +0100
committerYorick van Pelt <yorick@yorickvanpelt.nl>2020-01-09 17:38:27 +0100
commitf1fac0b5c3b75efab781949fdff2b67ffdda2cb3 (patch)
tree35f8513da1fe622d194c4854c63a1921ef4f147c
parent04bbfa692f4ac506a74ae372eca486bd9b56de77 (diff)
builtins.fromJSON: use nlohmann/json parser instead of custom parser
-rw-r--r--src/libexpr/json-to-value.cc315
1 files changed, 124 insertions, 191 deletions
diff --git a/src/libexpr/json-to-value.cc b/src/libexpr/json-to-value.cc
index 47cab2bb5..19d9a9b90 100644
--- a/src/libexpr/json-to-value.cc
+++ b/src/libexpr/json-to-value.cc
@@ -1,232 +1,165 @@
#include "json-to-value.hh"
-#include <cstring>
-
-namespace nix {
+#include <variant>
+#include <nlohmann/json.hpp>
+#include <nlohmann/detail/exceptions.hpp>
+using json = nlohmann::json;
-static void skipWhitespace(const char * & s)
-{
- while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') s++;
-}
+namespace nix {
+// for more information, refer to
+// https://github.com/nlohmann/json/blob/master/include/nlohmann/detail/input/json_sax.hpp
+class JSONSax : nlohmann::json_sax<json> {
+ class JSONState {
+ protected:
+ JSONState* parent;
+ Value * v;
+ public:
+ virtual JSONState* resolve(EvalState &)
+ {
+ throw std::logic_error("tried to close toplevel json parser state");
+ };
+ explicit JSONState(JSONState* p) : parent(p), v(nullptr) {};
+ explicit JSONState(Value* v) : v(v) {};
+ JSONState(JSONState& p) = delete;
+ Value& value(EvalState & state)
+ {
+ if (v == nullptr)
+ v = state.allocValue();
+ return *v;
+ };
+ virtual ~JSONState() {};
+ virtual void add() {};
+ };
+
+ class JSONObjectState : public JSONState {
+ using JSONState::JSONState;
+ ValueMap attrs = ValueMap();
+ virtual JSONState* resolve(EvalState & state) override
+ {
+ Value& v = parent->value(state);
+ state.mkAttrs(v, attrs.size());
+ for (auto & i : attrs)
+ v.attrs->push_back(Attr(i.first, i.second));
+ return parent;
+ }
+ virtual void add() override { v = nullptr; };
+ public:
+ void key(string_t& name, EvalState & state)
+ {
+ attrs[state.symbols.create(name)] = &value(state);
+ }
+ };
+
+ class JSONListState : public JSONState {
+ ValueVector values = ValueVector();
+ virtual JSONState* resolve(EvalState & state) override
+ {
+ Value& v = parent->value(state);
+ state.mkList(v, values.size());
+ for (size_t n = 0; n < values.size(); ++n) {
+ v.listElems()[n] = values[n];
+ }
+ return parent;
+ }
+ virtual void add() override {
+ values.push_back(v);
+ v = nullptr;
+ };
+ public:
+ JSONListState(JSONState* p, std::size_t reserve) : JSONState(p)
+ {
+ values.reserve(reserve);
+ }
+ };
-/*
- Parse an unicode escape sequence (4 hex characters following \u) in JSON string
-*/
-static string parseUnicodeEscapeSequence(const char * & s)
-{
- int codepoint = 0;
+ EvalState & state;
+ JSONState* rs;
- const auto factors = { 12u, 8u, 4u, 0u };
- for (const auto factor : factors)
+ template<typename T, typename... Args> inline bool handle_value(T f, Args... args)
{
- if (!*s) throw JSONParseError("got end-of-string in JSON string while parsing \\u sequence");
-
- if (*s >= '0' and *s <= '9') {
- codepoint += static_cast<int>((static_cast<unsigned int>(*s) - 0x30u) << factor);
- } else if (*s >= 'A' and *s <= 'F') {
- codepoint += static_cast<int>((static_cast<unsigned int>(*s) - 0x37u) << factor);
- } else if (*s >= 'a' and *s <= 'f') {
- codepoint += static_cast<int>((static_cast<unsigned int>(*s) - 0x57u) << factor);
- } else {
- throw JSONParseError(format("illegal character '%1%' in \\u escape sequence.") % *s);
- }
- s++;
+ f(rs->value(state), args...);
+ rs->add();
+ return true;
}
- if ((codepoint > 0xd7ff && codepoint < 0xe000) || codepoint > 0x10ffff) {
- throw JSONParseError("Unicode escape sequence is not a Unicode scalar value");
- }
+public:
+ JSONSax(EvalState & state, Value & v) : state(state), rs(new JSONState(&v)) {};
+ ~JSONSax() { delete rs; };
- // taken from cpptoml.h
- std::string result;
- // See Table 3-6 of the Unicode standard
- if (codepoint <= 0x7f)
+ bool null()
{
- // 1-byte codepoints: 00000000 0xxxxxxx
- // repr: 0xxxxxxx
- result += static_cast<char>(codepoint & 0x7f);
+ return handle_value(mkNull);
}
- else if (codepoint <= 0x7ff)
+
+ bool boolean(bool val)
{
- // 2-byte codepoints: 00000yyy yyxxxxxx
- // repr: 110yyyyy 10xxxxxx
- //
- // 0x1f = 00011111
- // 0xc0 = 11000000
- //
- result += static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f));
- //
- // 0x80 = 10000000
- // 0x3f = 00111111
- //
- result += static_cast<char>(0x80 | (codepoint & 0x3f));
+ return handle_value(mkBool, val);
}
- else if (codepoint <= 0xffff)
+
+ bool number_integer(number_integer_t val)
{
- // 3-byte codepoints: zzzzyyyy yyxxxxxx
- // repr: 1110zzzz 10yyyyyy 10xxxxxx
- //
- // 0xe0 = 11100000
- // 0x0f = 00001111
- //
- result += static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f));
- result += static_cast<char>(0x80 | ((codepoint >> 6) & 0x1f));
- result += static_cast<char>(0x80 | (codepoint & 0x3f));
+ return handle_value(mkInt, val);
}
- else
+
+ bool number_unsigned(number_unsigned_t val)
{
- // 4-byte codepoints: 000uuuuu zzzzyyyy yyxxxxxx
- // repr: 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
- //
- // 0xf0 = 11110000
- // 0x07 = 00000111
- //
- result += static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07));
- result += static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f));
- result += static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f));
- result += static_cast<char>(0x80 | (codepoint & 0x3f));
+ return handle_value(mkInt, val);
}
- return result;
-}
-
-static string parseJSONString(const char * & s)
-{
- string res;
- if (*s++ != '"') throw JSONParseError("expected JSON string");
- while (*s != '"') {
- if (!*s) throw JSONParseError("got end-of-string in JSON string");
- if (*s == '\\') {
- s++;
- if (*s == '"') res += '"';
- else if (*s == '\\') res += '\\';
- else if (*s == '/') res += '/';
- else if (*s == 'b') res += '\b';
- else if (*s == 'f') res += '\f';
- else if (*s == 'n') res += '\n';
- else if (*s == 'r') res += '\r';
- else if (*s == 't') res += '\t';
- else if (*s == 'u') {
- res += parseUnicodeEscapeSequence(++s);
- // to neuter the outside s++
- s--;
- } else throw JSONParseError("invalid escaped character in JSON string");
- s++;
- } else
- res += *s++;
+ bool number_float(number_float_t val, const string_t& s)
+ {
+ return handle_value(mkFloat, val);
}
- s++;
- return res;
-}
-
-static void parseJSON(EvalState & state, const char * & s, Value & v)
-{
- skipWhitespace(s);
-
- if (!*s) throw JSONParseError("expected JSON value");
-
- if (*s == '[') {
- s++;
- ValueVector values;
- values.reserve(128);
- skipWhitespace(s);
- while (1) {
- if (values.empty() && *s == ']') break;
- Value * v2 = state.allocValue();
- parseJSON(state, s, *v2);
- values.push_back(v2);
- skipWhitespace(s);
- if (*s == ']') break;
- if (*s != ',') throw JSONParseError("expected ',' or ']' after JSON array element");
- s++;
- }
- s++;
- state.mkList(v, values.size());
- for (size_t n = 0; n < values.size(); ++n)
- v.listElems()[n] = values[n];
+ bool string(string_t& val)
+ {
+ return handle_value<void(Value&, const char*)>(mkString, val.c_str());
}
- else if (*s == '{') {
- s++;
- ValueMap attrs;
- while (1) {
- skipWhitespace(s);
- if (attrs.empty() && *s == '}') break;
- string name = parseJSONString(s);
- skipWhitespace(s);
- if (*s != ':') throw JSONParseError("expected ':' in JSON object");
- s++;
- Value * v2 = state.allocValue();
- parseJSON(state, s, *v2);
- attrs[state.symbols.create(name)] = v2;
- skipWhitespace(s);
- if (*s == '}') break;
- if (*s != ',') throw JSONParseError("expected ',' or '}' after JSON member");
- s++;
- }
- state.mkAttrs(v, attrs.size());
- for (auto & i : attrs)
- v.attrs->push_back(Attr(i.first, i.second));
- v.attrs->sort();
- s++;
+ bool start_object(std::size_t len)
+ {
+ JSONState* old = rs;
+ rs = new JSONObjectState(old);
+ return true;
}
- else if (*s == '"') {
- mkString(v, parseJSONString(s));
+ bool key(string_t& name)
+ {
+ dynamic_cast<JSONObjectState*>(rs)->key(name, state);
+ return true;
}
- else if (isdigit(*s) || *s == '-' || *s == '.' ) {
- // Buffer into a string first, then use built-in C++ conversions
- std::string tmp_number;
- ValueType number_type = tInt;
-
- while (isdigit(*s) || *s == '-' || *s == '.' || *s == 'e' || *s == 'E') {
- if (*s == '.' || *s == 'e' || *s == 'E')
- number_type = tFloat;
- tmp_number += *s++;
- }
-
- try {
- if (number_type == tFloat)
- mkFloat(v, stod(tmp_number));
- else
- mkInt(v, stol(tmp_number));
- } catch (std::invalid_argument & e) {
- throw JSONParseError("invalid JSON number");
- } catch (std::out_of_range & e) {
- throw JSONParseError("out-of-range JSON number");
- }
+ bool end_object() {
+ JSONState* old = rs;
+ rs = old->resolve(state);
+ delete old;
+ rs->add();
+ return true;
}
- else if (strncmp(s, "true", 4) == 0) {
- s += 4;
- mkBool(v, true);
+ bool end_array() {
+ return end_object();
}
- else if (strncmp(s, "false", 5) == 0) {
- s += 5;
- mkBool(v, false);
+ bool start_array(size_t len) {
+ JSONState* old = rs;
+ rs = new JSONListState(old, len != std::numeric_limits<size_t>::max() ? len : 128);
+ return true;
}
- else if (strncmp(s, "null", 4) == 0) {
- s += 4;
- mkNull(v);
+ bool parse_error(std::size_t, const std::string&, const nlohmann::detail::exception& ex) {
+ throw JSONParseError(ex.what());
}
-
- else throw JSONParseError("unrecognised JSON value");
-}
-
+};
void parseJSON(EvalState & state, const string & s_, Value & v)
{
- const char * s = s_.c_str();
- parseJSON(state, s, v);
- skipWhitespace(s);
- if (*s) throw JSONParseError(format("expected end-of-string while parsing JSON value: %1%") % s);
+ JSONSax parser(state, v);
+ bool res = json::sax_parse(s_, &parser);
+ if (!res)
+ throw JSONParseError("Invalid JSON Value");
}
-
}