aboutsummaryrefslogtreecommitdiff
path: root/src/libexpr/symbol-table.hh
blob: 63fb25d73f430d6d4cf388212bea876264fc7427 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#pragma once

#include <list>
#include <map>
#include <unordered_map>

#include "types.hh"
#include "chunked-vector.hh"

namespace nix {

/* Symbol table used by the parser and evaluator to represent and look
   up identifiers and attributes efficiently.  SymbolTable::create()
   converts a string into a symbol.  Symbols have the property that
   they can be compared efficiently (using an equality test),
   because the symbol table stores only one copy of each string. */

/* This class mainly exists to give us an operator<< for ostreams. We could also
   return plain strings from SymbolTable, but then we'd have to wrap every
   instance of a symbol that is fmt()ed, which is inconvenient and error-prone. */
class SymbolStr
{
    friend class SymbolTable;

private:
    const std::string * s;

    explicit SymbolStr(const std::string & symbol): s(&symbol) {}

public:
    bool operator == (std::string_view s2) const
    {
        return *s == s2;
    }

    operator const std::string & () const
    {
        return *s;
    }

    operator const std::string_view () const
    {
        return *s;
    }

    friend std::ostream & operator <<(std::ostream & os, const SymbolStr & symbol);
};

class Symbol
{
    friend class SymbolTable;

private:
    uint32_t id;

    explicit Symbol(uint32_t id): id(id) {}

public:
    Symbol() : id(0) {}

    explicit operator bool() const { return id > 0; }

    bool operator<(const Symbol other) const { return id < other.id; }
    bool operator==(const Symbol other) const { return id == other.id; }
    bool operator!=(const Symbol other) const { return id != other.id; }
};

class SymbolTable
{
private:
    std::unordered_map<std::string_view, std::pair<const std::string *, uint32_t>> symbols;
    ChunkedVector<std::string, 8192> store{16};

public:
    Symbol create(std::string_view s)
    {
        // Most symbols are looked up more than once, so we trade off insertion performance
        // for lookup performance.
        // TODO: could probably be done more efficiently with transparent Hash and Equals
        // on the original implementation using unordered_set
        auto it = symbols.find(s);
        if (it != symbols.end()) return Symbol(it->second.second + 1);

        const auto & [rawSym, idx] = store.add(std::string(s));
        symbols.emplace(rawSym, std::make_pair(&rawSym, idx));
        return Symbol(idx + 1);
    }

    std::vector<SymbolStr> resolve(const std::vector<Symbol> & symbols) const
    {
        std::vector<SymbolStr> result;
        result.reserve(symbols.size());
        for (auto sym : symbols)
            result.push_back((*this)[sym]);
        return result;
    }

    SymbolStr operator[](Symbol s) const
    {
        if (s.id == 0 || s.id > store.size())
            abort();
        return SymbolStr(store[s.id - 1]);
    }

    size_t size() const
    {
        return store.size();
    }

    size_t totalSize() const;

    template<typename T>
    void dump(T callback) const
    {
        store.forEach(callback);
    }
};

}