aboutsummaryrefslogtreecommitdiff
path: root/src/libexpr/symbol-table.hh
diff options
context:
space:
mode:
authorpennae <github@quasiparticle.net>2022-03-05 14:40:24 +0100
committerpennae <github@quasiparticle.net>2022-04-21 21:56:31 +0200
commit8775be33931ec3b1cad97035ff3d5370a97178a1 (patch)
tree0855d6b35e24153092738315176ea19aa72b9530 /src/libexpr/symbol-table.hh
parent00a32802328b58daa7af48ccac60f6154ef05639 (diff)
store Symbols in a table as well, like positions
this slightly increases the amount of memory used for any given symbol, but this increase is more than made up for if the symbol is referenced more than once in the EvalState that holds it. on average every symbol should be referenced at least twice (once to introduce a binding, once to use it), so we expect no increase in memory on average. symbol tables are limited to 2³² entries like position tables, and similar arguments apply to why overflow is not likely: 2³² symbols would require as many string instances (at 24 bytes each) and map entries (at 24 bytes or more each, assuming that the map holds on average at most one item per bucket as the docs say). a full symbol table would require at least 192GB of memory just for symbols, which is well out of reach. (an ofborg eval of nixpks today creates less than a million symbols!)
Diffstat (limited to 'src/libexpr/symbol-table.hh')
-rw-r--r--src/libexpr/symbol-table.hh77
1 files changed, 41 insertions, 36 deletions
diff --git a/src/libexpr/symbol-table.hh b/src/libexpr/symbol-table.hh
index 297605295..d0cd841a0 100644
--- a/src/libexpr/symbol-table.hh
+++ b/src/libexpr/symbol-table.hh
@@ -16,85 +16,90 @@ namespace nix {
class Symbol
{
-private:
- const std::string * s; // pointer into SymbolTable
- Symbol(const std::string * s) : s(s) { };
friend class SymbolTable;
+private:
+ std::string s;
public:
- Symbol() : s(0) { };
-
- bool operator == (const Symbol & s2) const
- {
- return s == s2.s;
- }
+ Symbol(std::string_view s) : s(s) { }
// FIXME: remove
bool operator == (std::string_view s2) const
{
- return s->compare(s2) == 0;
- }
-
- bool operator != (const Symbol & s2) const
- {
- return s != s2.s;
- }
-
- bool operator < (const Symbol & s2) const
- {
- return s < s2.s;
+ return s == s2;
}
operator const std::string & () const
{
- return *s;
+ return s;
}
operator const std::string_view () const
{
- return *s;
- }
-
- bool set() const
- {
return s;
}
friend std::ostream & operator << (std::ostream & str, const Symbol & sym);
};
+class SymbolIdx
+{
+ friend class SymbolTable;
+
+private:
+ uint32_t id;
+
+ explicit SymbolIdx(uint32_t id): id(id) {}
+
+public:
+ SymbolIdx() : id(0) {}
+
+ explicit operator bool() const { return id > 0; }
+
+ bool operator<(const SymbolIdx other) const { return id < other.id; }
+ bool operator==(const SymbolIdx other) const { return id == other.id; }
+ bool operator!=(const SymbolIdx other) const { return id != other.id; }
+};
+
class SymbolTable
{
private:
- std::unordered_map<std::string_view, Symbol> symbols;
- std::list<std::string> store;
+ std::unordered_map<std::string_view, std::pair<const Symbol *, uint32_t>> symbols;
+ ChunkedVector<Symbol, 8192> store{16};
public:
- Symbol create(std::string_view s)
+ SymbolIdx create(std::string_view s)
{
// Most symbols are looked up more than once, so we trade off insertion performance
// for lookup performance.
// TODO: could probably be done more efficiently with transparent Hash and Equals
// on the original implementation using unordered_set
auto it = symbols.find(s);
- if (it != symbols.end()) return it->second;
+ if (it != symbols.end()) return SymbolIdx(it->second.second + 1);
- auto & rawSym = store.emplace_back(s);
- return symbols.emplace(rawSym, Symbol(&rawSym)).first->second;
+ const auto & [rawSym, idx] = store.add(s);
+ symbols.emplace(rawSym, std::make_pair(&rawSym, idx));
+ return SymbolIdx(idx + 1);
+ }
+
+ const Symbol & operator[](SymbolIdx s) const
+ {
+ if (s.id == 0 || s.id > store.size())
+ abort();
+ return store[s.id - 1];
}
size_t size() const
{
- return symbols.size();
+ return store.size();
}
size_t totalSize() const;
template<typename T>
- void dump(T callback)
+ void dump(T callback) const
{
- for (auto & s : store)
- callback(s);
+ store.forEach(callback);
}
};