From c3f68b5db7b9d4b963102a2abc20e20445a8bec5 Mon Sep 17 00:00:00 2001 From: Nicolas Mattia Date: Fri, 17 Dec 2021 22:03:33 +0100 Subject: Replace cpptoml with toml11 --- src/toml11/toml/combinator.hpp | 306 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 306 insertions(+) create mode 100644 src/toml11/toml/combinator.hpp (limited to 'src/toml11/toml/combinator.hpp') diff --git a/src/toml11/toml/combinator.hpp b/src/toml11/toml/combinator.hpp new file mode 100644 index 000000000..33ecca1eb --- /dev/null +++ b/src/toml11/toml/combinator.hpp @@ -0,0 +1,306 @@ +// Copyright Toru Niina 2017. +// Distributed under the MIT License. +#ifndef TOML11_COMBINATOR_HPP +#define TOML11_COMBINATOR_HPP +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "region.hpp" +#include "result.hpp" +#include "traits.hpp" +#include "utility.hpp" + +// they scans characters and returns region if it matches to the condition. +// when they fail, it does not change the location. +// in lexer.hpp, these are used. + +namespace toml +{ +namespace detail +{ + +// to output character as an error message. +inline std::string show_char(const char c) +{ + // It suppresses an error that occurs only in Debug mode of MSVC++ on Windows. + // I'm not completely sure but they check the value of char to be in the + // range [0, 256) and some of the COMPLETELY VALID utf-8 character sometimes + // has negative value (if char has sign). So here it re-interprets c as + // unsigned char through pointer. In general, converting pointer to a + // pointer that has different type cause UB, but `(signed|unsigned)?char` + // are one of the exceptions. Converting pointer only to char and std::byte + // (c++17) are valid. + if(std::isgraph(*reinterpret_cast(std::addressof(c)))) + { + return std::string(1, c); + } + else + { + std::array buf; + buf.fill('\0'); + const auto r = std::snprintf( + buf.data(), buf.size(), "0x%02x", static_cast(c) & 0xFF); + (void) r; // Unused variable warning + assert(r == static_cast(buf.size()) - 1); + return std::string(buf.data()); + } +} + +template +struct character +{ + static constexpr char target = C; + + static result + invoke(location& loc) + { + if(loc.iter() == loc.end()) {return none();} + const auto first = loc.iter(); + + const char c = *(loc.iter()); + if(c != target) + { + return none(); + } + loc.advance(); // update location + + return ok(region(loc, first, loc.iter())); + } +}; +template +constexpr char character::target; + +// closed interval [Low, Up]. both Low and Up are included. +template +struct in_range +{ + // assuming ascii part of UTF-8... + static_assert(Low <= Up, "lower bound should be less than upper bound."); + + static constexpr char upper = Up; + static constexpr char lower = Low; + + static result + invoke(location& loc) + { + if(loc.iter() == loc.end()) {return none();} + const auto first = loc.iter(); + + const char c = *(loc.iter()); + if(c < lower || upper < c) + { + return none(); + } + + loc.advance(); + return ok(region(loc, first, loc.iter())); + } +}; +template constexpr char in_range::upper; +template constexpr char in_range::lower; + +// keep iterator if `Combinator` matches. otherwise, increment `iter` by 1 char. +// for detecting invalid characters, like control sequences in toml string. +template +struct exclude +{ + static result + invoke(location& loc) + { + if(loc.iter() == loc.end()) {return none();} + auto first = loc.iter(); + + auto rslt = Combinator::invoke(loc); + if(rslt.is_ok()) + { + loc.reset(first); + return none(); + } + loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but... + return ok(region(loc, first, loc.iter())); + } +}; + +// increment `iter`, if matches. otherwise, just return empty string. +template +struct maybe +{ + static result + invoke(location& loc) + { + const auto rslt = Combinator::invoke(loc); + if(rslt.is_ok()) + { + return rslt; + } + return ok(region(loc)); + } +}; + +template +struct sequence; + +template +struct sequence +{ + static result + invoke(location& loc) + { + const auto first = loc.iter(); + auto rslt = Head::invoke(loc); + if(rslt.is_err()) + { + loc.reset(first); + return none(); + } + return sequence::invoke(loc, std::move(rslt.unwrap()), first); + } + + // called from the above function only, recursively. + template + static result + invoke(location& loc, region reg, Iterator first) + { + const auto rslt = Head::invoke(loc); + if(rslt.is_err()) + { + loc.reset(first); + return none(); + } + reg += rslt.unwrap(); // concat regions + return sequence::invoke(loc, std::move(reg), first); + } +}; + +template +struct sequence +{ + // would be called from sequence::invoke only. + template + static result + invoke(location& loc, region reg, Iterator first) + { + const auto rslt = Head::invoke(loc); + if(rslt.is_err()) + { + loc.reset(first); + return none(); + } + reg += rslt.unwrap(); // concat regions + return ok(reg); + } +}; + +template +struct either; + +template +struct either +{ + static result + invoke(location& loc) + { + const auto rslt = Head::invoke(loc); + if(rslt.is_ok()) {return rslt;} + return either::invoke(loc); + } +}; +template +struct either +{ + static result + invoke(location& loc) + { + return Head::invoke(loc); + } +}; + +template +struct repeat; + +template struct exactly{}; +template struct at_least{}; +struct unlimited{}; + +template +struct repeat> +{ + static result + invoke(location& loc) + { + region retval(loc); + const auto first = loc.iter(); + for(std::size_t i=0; i +struct repeat> +{ + static result + invoke(location& loc) + { + region retval(loc); + + const auto first = loc.iter(); + for(std::size_t i=0; i +struct repeat +{ + static result + invoke(location& loc) + { + region retval(loc); + while(true) + { + auto rslt = T::invoke(loc); + if(rslt.is_err()) + { + return ok(std::move(retval)); + } + retval += rslt.unwrap(); + } + } +}; + +} // detail +} // toml +#endif// TOML11_COMBINATOR_HPP -- cgit v1.2.3