diff options
Diffstat (limited to 'src/toml11/toml/combinator.hpp')
-rw-r--r-- | src/toml11/toml/combinator.hpp | 306 |
1 files changed, 306 insertions, 0 deletions
diff --git a/src/toml11/toml/combinator.hpp b/src/toml11/toml/combinator.hpp new file mode 100644 index 000000000..33ecca1eb --- /dev/null +++ b/src/toml11/toml/combinator.hpp @@ -0,0 +1,306 @@ +// Copyright Toru Niina 2017. +// Distributed under the MIT License. +#ifndef TOML11_COMBINATOR_HPP +#define TOML11_COMBINATOR_HPP +#include <cassert> +#include <cctype> +#include <cstdio> + +#include <array> +#include <iomanip> +#include <iterator> +#include <limits> +#include <type_traits> + +#include "region.hpp" +#include "result.hpp" +#include "traits.hpp" +#include "utility.hpp" + +// they scans characters and returns region if it matches to the condition. +// when they fail, it does not change the location. +// in lexer.hpp, these are used. + +namespace toml +{ +namespace detail +{ + +// to output character as an error message. +inline std::string show_char(const char c) +{ + // It suppresses an error that occurs only in Debug mode of MSVC++ on Windows. + // I'm not completely sure but they check the value of char to be in the + // range [0, 256) and some of the COMPLETELY VALID utf-8 character sometimes + // has negative value (if char has sign). So here it re-interprets c as + // unsigned char through pointer. In general, converting pointer to a + // pointer that has different type cause UB, but `(signed|unsigned)?char` + // are one of the exceptions. Converting pointer only to char and std::byte + // (c++17) are valid. + if(std::isgraph(*reinterpret_cast<unsigned char const*>(std::addressof(c)))) + { + return std::string(1, c); + } + else + { + std::array<char, 5> buf; + buf.fill('\0'); + const auto r = std::snprintf( + buf.data(), buf.size(), "0x%02x", static_cast<int>(c) & 0xFF); + (void) r; // Unused variable warning + assert(r == static_cast<int>(buf.size()) - 1); + return std::string(buf.data()); + } +} + +template<char C> +struct character +{ + static constexpr char target = C; + + static result<region, none_t> + invoke(location& loc) + { + if(loc.iter() == loc.end()) {return none();} + const auto first = loc.iter(); + + const char c = *(loc.iter()); + if(c != target) + { + return none(); + } + loc.advance(); // update location + + return ok(region(loc, first, loc.iter())); + } +}; +template<char C> +constexpr char character<C>::target; + +// closed interval [Low, Up]. both Low and Up are included. +template<char Low, char Up> +struct in_range +{ + // assuming ascii part of UTF-8... + static_assert(Low <= Up, "lower bound should be less than upper bound."); + + static constexpr char upper = Up; + static constexpr char lower = Low; + + static result<region, none_t> + invoke(location& loc) + { + if(loc.iter() == loc.end()) {return none();} + const auto first = loc.iter(); + + const char c = *(loc.iter()); + if(c < lower || upper < c) + { + return none(); + } + + loc.advance(); + return ok(region(loc, first, loc.iter())); + } +}; +template<char L, char U> constexpr char in_range<L, U>::upper; +template<char L, char U> constexpr char in_range<L, U>::lower; + +// keep iterator if `Combinator` matches. otherwise, increment `iter` by 1 char. +// for detecting invalid characters, like control sequences in toml string. +template<typename Combinator> +struct exclude +{ + static result<region, none_t> + invoke(location& loc) + { + if(loc.iter() == loc.end()) {return none();} + auto first = loc.iter(); + + auto rslt = Combinator::invoke(loc); + if(rslt.is_ok()) + { + loc.reset(first); + return none(); + } + loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but... + return ok(region(loc, first, loc.iter())); + } +}; + +// increment `iter`, if matches. otherwise, just return empty string. +template<typename Combinator> +struct maybe +{ + static result<region, none_t> + invoke(location& loc) + { + const auto rslt = Combinator::invoke(loc); + if(rslt.is_ok()) + { + return rslt; + } + return ok(region(loc)); + } +}; + +template<typename ... Ts> +struct sequence; + +template<typename Head, typename ... Tail> +struct sequence<Head, Tail...> +{ + static result<region, none_t> + invoke(location& loc) + { + const auto first = loc.iter(); + auto rslt = Head::invoke(loc); + if(rslt.is_err()) + { + loc.reset(first); + return none(); + } + return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first); + } + + // called from the above function only, recursively. + template<typename Iterator> + static result<region, none_t> + invoke(location& loc, region reg, Iterator first) + { + const auto rslt = Head::invoke(loc); + if(rslt.is_err()) + { + loc.reset(first); + return none(); + } + reg += rslt.unwrap(); // concat regions + return sequence<Tail...>::invoke(loc, std::move(reg), first); + } +}; + +template<typename Head> +struct sequence<Head> +{ + // would be called from sequence<T ...>::invoke only. + template<typename Iterator> + static result<region, none_t> + invoke(location& loc, region reg, Iterator first) + { + const auto rslt = Head::invoke(loc); + if(rslt.is_err()) + { + loc.reset(first); + return none(); + } + reg += rslt.unwrap(); // concat regions + return ok(reg); + } +}; + +template<typename ... Ts> +struct either; + +template<typename Head, typename ... Tail> +struct either<Head, Tail...> +{ + static result<region, none_t> + invoke(location& loc) + { + const auto rslt = Head::invoke(loc); + if(rslt.is_ok()) {return rslt;} + return either<Tail...>::invoke(loc); + } +}; +template<typename Head> +struct either<Head> +{ + static result<region, none_t> + invoke(location& loc) + { + return Head::invoke(loc); + } +}; + +template<typename T, typename N> +struct repeat; + +template<std::size_t N> struct exactly{}; +template<std::size_t N> struct at_least{}; +struct unlimited{}; + +template<typename T, std::size_t N> +struct repeat<T, exactly<N>> +{ + static result<region, none_t> + invoke(location& loc) + { + region retval(loc); + const auto first = loc.iter(); + for(std::size_t i=0; i<N; ++i) + { + auto rslt = T::invoke(loc); + if(rslt.is_err()) + { + loc.reset(first); + return none(); + } + retval += rslt.unwrap(); + } + return ok(std::move(retval)); + } +}; + +template<typename T, std::size_t N> +struct repeat<T, at_least<N>> +{ + static result<region, none_t> + invoke(location& loc) + { + region retval(loc); + + const auto first = loc.iter(); + for(std::size_t i=0; i<N; ++i) + { + auto rslt = T::invoke(loc); + if(rslt.is_err()) + { + loc.reset(first); + return none(); + } + retval += rslt.unwrap(); + } + while(true) + { + auto rslt = T::invoke(loc); + if(rslt.is_err()) + { + return ok(std::move(retval)); + } + retval += rslt.unwrap(); + } + } +}; + +template<typename T> +struct repeat<T, unlimited> +{ + static result<region, none_t> + invoke(location& loc) + { + region retval(loc); + while(true) + { + auto rslt = T::invoke(loc); + if(rslt.is_err()) + { + return ok(std::move(retval)); + } + retval += rslt.unwrap(); + } + } +}; + +} // detail +} // toml +#endif// TOML11_COMBINATOR_HPP |