diff options
author | pennae <github@quasiparticle.net> | 2022-03-05 14:40:24 +0100 |
---|---|---|
committer | pennae <github@quasiparticle.net> | 2022-04-21 21:56:31 +0200 |
commit | 8775be33931ec3b1cad97035ff3d5370a97178a1 (patch) | |
tree | 0855d6b35e24153092738315176ea19aa72b9530 /src/libexpr/symbol-table.hh | |
parent | 00a32802328b58daa7af48ccac60f6154ef05639 (diff) |
store Symbols in a table as well, like positions
this slightly increases the amount of memory used for any given symbol, but this
increase is more than made up for if the symbol is referenced more than once in
the EvalState that holds it. on average every symbol should be referenced at
least twice (once to introduce a binding, once to use it), so we expect no
increase in memory on average.
symbol tables are limited to 2³² entries like position tables, and similar
arguments apply to why overflow is not likely: 2³² symbols would require as many
string instances (at 24 bytes each) and map entries (at 24 bytes or more each,
assuming that the map holds on average at most one item per bucket as the docs
say). a full symbol table would require at least 192GB of memory just for
symbols, which is well out of reach. (an ofborg eval of nixpks today creates
less than a million symbols!)
Diffstat (limited to 'src/libexpr/symbol-table.hh')
-rw-r--r-- | src/libexpr/symbol-table.hh | 77 |
1 files changed, 41 insertions, 36 deletions
diff --git a/src/libexpr/symbol-table.hh b/src/libexpr/symbol-table.hh index 297605295..d0cd841a0 100644 --- a/src/libexpr/symbol-table.hh +++ b/src/libexpr/symbol-table.hh @@ -16,85 +16,90 @@ namespace nix { class Symbol { -private: - const std::string * s; // pointer into SymbolTable - Symbol(const std::string * s) : s(s) { }; friend class SymbolTable; +private: + std::string s; public: - Symbol() : s(0) { }; - - bool operator == (const Symbol & s2) const - { - return s == s2.s; - } + Symbol(std::string_view s) : s(s) { } // FIXME: remove bool operator == (std::string_view s2) const { - return s->compare(s2) == 0; - } - - bool operator != (const Symbol & s2) const - { - return s != s2.s; - } - - bool operator < (const Symbol & s2) const - { - return s < s2.s; + return s == s2; } operator const std::string & () const { - return *s; + return s; } operator const std::string_view () const { - return *s; - } - - bool set() const - { return s; } friend std::ostream & operator << (std::ostream & str, const Symbol & sym); }; +class SymbolIdx +{ + friend class SymbolTable; + +private: + uint32_t id; + + explicit SymbolIdx(uint32_t id): id(id) {} + +public: + SymbolIdx() : id(0) {} + + explicit operator bool() const { return id > 0; } + + bool operator<(const SymbolIdx other) const { return id < other.id; } + bool operator==(const SymbolIdx other) const { return id == other.id; } + bool operator!=(const SymbolIdx other) const { return id != other.id; } +}; + class SymbolTable { private: - std::unordered_map<std::string_view, Symbol> symbols; - std::list<std::string> store; + std::unordered_map<std::string_view, std::pair<const Symbol *, uint32_t>> symbols; + ChunkedVector<Symbol, 8192> store{16}; public: - Symbol create(std::string_view s) + SymbolIdx create(std::string_view s) { // Most symbols are looked up more than once, so we trade off insertion performance // for lookup performance. // TODO: could probably be done more efficiently with transparent Hash and Equals // on the original implementation using unordered_set auto it = symbols.find(s); - if (it != symbols.end()) return it->second; + if (it != symbols.end()) return SymbolIdx(it->second.second + 1); - auto & rawSym = store.emplace_back(s); - return symbols.emplace(rawSym, Symbol(&rawSym)).first->second; + const auto & [rawSym, idx] = store.add(s); + symbols.emplace(rawSym, std::make_pair(&rawSym, idx)); + return SymbolIdx(idx + 1); + } + + const Symbol & operator[](SymbolIdx s) const + { + if (s.id == 0 || s.id > store.size()) + abort(); + return store[s.id - 1]; } size_t size() const { - return symbols.size(); + return store.size(); } size_t totalSize() const; template<typename T> - void dump(T callback) + void dump(T callback) const { - for (auto & s : store) - callback(s); + store.forEach(callback); } }; |