blob: 967a186dd59a454cef63e3312589ff08de57a7c9 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
#pragma once
///@file
#include <list>
#include <map>
#include <unordered_map>
#include "types.hh"
#include "chunked-vector.hh"
namespace nix {
/**
* This class mainly exists to give us an operator<< for ostreams. We could also
* return plain strings from SymbolTable, but then we'd have to wrap every
* instance of a symbol that is fmt()ed, which is inconvenient and error-prone.
*/
class SymbolStr
{
friend class SymbolTable;
private:
const std::string * s;
explicit SymbolStr(const std::string & symbol): s(&symbol) {}
public:
bool operator == (std::string_view s2) const
{
return *s == s2;
}
operator const std::string & () const
{
return *s;
}
operator const std::string_view () const
{
return *s;
}
friend std::ostream & operator <<(std::ostream & os, const SymbolStr & symbol);
};
/**
* Symbols have the property that they can be compared efficiently
* (using an equality test), because the symbol table stores only one
* copy of each string.
*/
class Symbol
{
friend class SymbolTable;
private:
uint32_t id;
explicit Symbol(uint32_t id): id(id) {}
public:
Symbol() : id(0) {}
explicit operator bool() const { return id > 0; }
bool operator<(const Symbol other) const { return id < other.id; }
bool operator==(const Symbol other) const { return id == other.id; }
bool operator!=(const Symbol other) const { return id != other.id; }
};
/**
* Symbol table used by the parser and evaluator to represent and look
* up identifiers and attributes efficiently.
*/
class SymbolTable
{
private:
std::unordered_map<std::string_view, std::pair<const std::string *, uint32_t>> symbols;
ChunkedVector<std::string, 8192> store{16};
public:
/**
* converts a string into a symbol.
*/
Symbol create(std::string_view s)
{
// Most symbols are looked up more than once, so we trade off insertion performance
// for lookup performance.
// TODO: could probably be done more efficiently with transparent Hash and Equals
// on the original implementation using unordered_set
// FIXME: make this thread-safe.
auto it = symbols.find(s);
if (it != symbols.end()) return Symbol(it->second.second + 1);
const auto & [rawSym, idx] = store.add(std::string(s));
symbols.emplace(rawSym, std::make_pair(&rawSym, idx));
return Symbol(idx + 1);
}
std::vector<SymbolStr> resolve(const std::vector<Symbol> & symbols) const
{
std::vector<SymbolStr> result;
result.reserve(symbols.size());
for (auto sym : symbols)
result.push_back((*this)[sym]);
return result;
}
SymbolStr operator[](Symbol s) const
{
if (s.id == 0 || s.id > store.size())
abort();
return SymbolStr(store[s.id - 1]);
}
size_t size() const
{
return store.size();
}
size_t totalSize() const;
template<typename T>
void dump(T callback) const
{
store.forEach(callback);
}
};
}
|