1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
|
// Copyright Toru Niina 2017.
// Distributed under the MIT License.
#ifndef TOML11_COMBINATOR_HPP
#define TOML11_COMBINATOR_HPP
#include <cassert>
#include <cctype>
#include <cstdio>
#include <array>
#include <iomanip>
#include <iterator>
#include <limits>
#include <type_traits>
#include "region.hpp"
#include "result.hpp"
#include "traits.hpp"
#include "utility.hpp"
// they scans characters and returns region if it matches to the condition.
// when they fail, it does not change the location.
// in lexer.hpp, these are used.
namespace toml
{
namespace detail
{
// to output character as an error message.
inline std::string show_char(const char c)
{
// It suppresses an error that occurs only in Debug mode of MSVC++ on Windows.
// I'm not completely sure but they check the value of char to be in the
// range [0, 256) and some of the COMPLETELY VALID utf-8 character sometimes
// has negative value (if char has sign). So here it re-interprets c as
// unsigned char through pointer. In general, converting pointer to a
// pointer that has different type cause UB, but `(signed|unsigned)?char`
// are one of the exceptions. Converting pointer only to char and std::byte
// (c++17) are valid.
if(std::isgraph(*reinterpret_cast<unsigned char const*>(std::addressof(c))))
{
return std::string(1, c);
}
else
{
std::array<char, 5> buf;
buf.fill('\0');
const auto r = std::snprintf(
buf.data(), buf.size(), "0x%02x", static_cast<int>(c) & 0xFF);
(void) r; // Unused variable warning
assert(r == static_cast<int>(buf.size()) - 1);
return std::string(buf.data());
}
}
template<char C>
struct character
{
static constexpr char target = C;
static result<region, none_t>
invoke(location& loc)
{
if(loc.iter() == loc.end()) {return none();}
const auto first = loc.iter();
const char c = *(loc.iter());
if(c != target)
{
return none();
}
loc.advance(); // update location
return ok(region(loc, first, loc.iter()));
}
};
template<char C>
constexpr char character<C>::target;
// closed interval [Low, Up]. both Low and Up are included.
template<char Low, char Up>
struct in_range
{
// assuming ascii part of UTF-8...
static_assert(Low <= Up, "lower bound should be less than upper bound.");
static constexpr char upper = Up;
static constexpr char lower = Low;
static result<region, none_t>
invoke(location& loc)
{
if(loc.iter() == loc.end()) {return none();}
const auto first = loc.iter();
const char c = *(loc.iter());
if(c < lower || upper < c)
{
return none();
}
loc.advance();
return ok(region(loc, first, loc.iter()));
}
};
template<char L, char U> constexpr char in_range<L, U>::upper;
template<char L, char U> constexpr char in_range<L, U>::lower;
// keep iterator if `Combinator` matches. otherwise, increment `iter` by 1 char.
// for detecting invalid characters, like control sequences in toml string.
template<typename Combinator>
struct exclude
{
static result<region, none_t>
invoke(location& loc)
{
if(loc.iter() == loc.end()) {return none();}
auto first = loc.iter();
auto rslt = Combinator::invoke(loc);
if(rslt.is_ok())
{
loc.reset(first);
return none();
}
loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but...
return ok(region(loc, first, loc.iter()));
}
};
// increment `iter`, if matches. otherwise, just return empty string.
template<typename Combinator>
struct maybe
{
static result<region, none_t>
invoke(location& loc)
{
const auto rslt = Combinator::invoke(loc);
if(rslt.is_ok())
{
return rslt;
}
return ok(region(loc));
}
};
template<typename ... Ts>
struct sequence;
template<typename Head, typename ... Tail>
struct sequence<Head, Tail...>
{
static result<region, none_t>
invoke(location& loc)
{
const auto first = loc.iter();
auto rslt = Head::invoke(loc);
if(rslt.is_err())
{
loc.reset(first);
return none();
}
return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first);
}
// called from the above function only, recursively.
template<typename Iterator>
static result<region, none_t>
invoke(location& loc, region reg, Iterator first)
{
const auto rslt = Head::invoke(loc);
if(rslt.is_err())
{
loc.reset(first);
return none();
}
reg += rslt.unwrap(); // concat regions
return sequence<Tail...>::invoke(loc, std::move(reg), first);
}
};
template<typename Head>
struct sequence<Head>
{
// would be called from sequence<T ...>::invoke only.
template<typename Iterator>
static result<region, none_t>
invoke(location& loc, region reg, Iterator first)
{
const auto rslt = Head::invoke(loc);
if(rslt.is_err())
{
loc.reset(first);
return none();
}
reg += rslt.unwrap(); // concat regions
return ok(reg);
}
};
template<typename ... Ts>
struct either;
template<typename Head, typename ... Tail>
struct either<Head, Tail...>
{
static result<region, none_t>
invoke(location& loc)
{
const auto rslt = Head::invoke(loc);
if(rslt.is_ok()) {return rslt;}
return either<Tail...>::invoke(loc);
}
};
template<typename Head>
struct either<Head>
{
static result<region, none_t>
invoke(location& loc)
{
return Head::invoke(loc);
}
};
template<typename T, typename N>
struct repeat;
template<std::size_t N> struct exactly{};
template<std::size_t N> struct at_least{};
struct unlimited{};
template<typename T, std::size_t N>
struct repeat<T, exactly<N>>
{
static result<region, none_t>
invoke(location& loc)
{
region retval(loc);
const auto first = loc.iter();
for(std::size_t i=0; i<N; ++i)
{
auto rslt = T::invoke(loc);
if(rslt.is_err())
{
loc.reset(first);
return none();
}
retval += rslt.unwrap();
}
return ok(std::move(retval));
}
};
template<typename T, std::size_t N>
struct repeat<T, at_least<N>>
{
static result<region, none_t>
invoke(location& loc)
{
region retval(loc);
const auto first = loc.iter();
for(std::size_t i=0; i<N; ++i)
{
auto rslt = T::invoke(loc);
if(rslt.is_err())
{
loc.reset(first);
return none();
}
retval += rslt.unwrap();
}
while(true)
{
auto rslt = T::invoke(loc);
if(rslt.is_err())
{
return ok(std::move(retval));
}
retval += rslt.unwrap();
}
}
};
template<typename T>
struct repeat<T, unlimited>
{
static result<region, none_t>
invoke(location& loc)
{
region retval(loc);
while(true)
{
auto rslt = T::invoke(loc);
if(rslt.is_err())
{
return ok(std::move(retval));
}
retval += rslt.unwrap();
}
}
};
} // detail
} // toml
#endif// TOML11_COMBINATOR_HPP
|