diff options
author | Théophane Hufschmitt <theophane.hufschmitt@tweag.io> | 2023-03-17 15:51:08 +0100 |
---|---|---|
committer | Théophane Hufschmitt <theophane.hufschmitt@tweag.io> | 2023-05-24 14:11:50 +0200 |
commit | 3ebe1341abe1b0ad59bd4925517af18d9200f818 (patch) | |
tree | 66e6ef77c7c60ac559c7aac06f1054427a0afcb1 /src/libstore | |
parent | 6e4570234d5ac63a9483fb7f7aabaa1d17561a3a (diff) |
Make `RewritingSink` accept a map of rewrites
Giving it the same semantics as `rewriteStrings`.
Also add some tests for it
Diffstat (limited to 'src/libstore')
-rw-r--r-- | src/libstore/build/local-derivation-goal.cc | 2 | ||||
-rw-r--r-- | src/libstore/path-references.cc | 73 | ||||
-rw-r--r-- | src/libstore/path-references.hh | 25 | ||||
-rw-r--r-- | src/libstore/references.cc | 188 | ||||
-rw-r--r-- | src/libstore/references.hh | 71 |
5 files changed, 99 insertions, 260 deletions
diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 05d6685da..6f7ab8a3d 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -4,7 +4,7 @@ #include "worker.hh" #include "builtins.hh" #include "builtins/buildenv.hh" -#include "references.hh" +#include "path-references.hh" #include "finally.hh" #include "util.hh" #include "archive.hh" diff --git a/src/libstore/path-references.cc b/src/libstore/path-references.cc new file mode 100644 index 000000000..33cf66ce3 --- /dev/null +++ b/src/libstore/path-references.cc @@ -0,0 +1,73 @@ +#include "path-references.hh" +#include "hash.hh" +#include "util.hh" +#include "archive.hh" + +#include <map> +#include <cstdlib> +#include <mutex> +#include <algorithm> + + +namespace nix { + + +PathRefScanSink::PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap) + : RefScanSink(std::move(hashes)) + , backMap(std::move(backMap)) +{ } + +PathRefScanSink PathRefScanSink::fromPaths(const StorePathSet & refs) +{ + StringSet hashes; + std::map<std::string, StorePath> backMap; + + for (auto & i : refs) { + std::string hashPart(i.hashPart()); + auto inserted = backMap.emplace(hashPart, i).second; + assert(inserted); + hashes.insert(hashPart); + } + + return PathRefScanSink(std::move(hashes), std::move(backMap)); +} + +StorePathSet PathRefScanSink::getResultPaths() +{ + /* Map the hashes found back to their store paths. */ + StorePathSet found; + for (auto & i : getResult()) { + auto j = backMap.find(i); + assert(j != backMap.end()); + found.insert(j->second); + } + + return found; +} + + +std::pair<StorePathSet, HashResult> scanForReferences( + const std::string & path, + const StorePathSet & refs) +{ + HashSink hashSink { htSHA256 }; + auto found = scanForReferences(hashSink, path, refs); + auto hash = hashSink.finish(); + return std::pair<StorePathSet, HashResult>(found, hash); +} + +StorePathSet scanForReferences( + Sink & toTee, + const Path & path, + const StorePathSet & refs) +{ + PathRefScanSink refsSink = PathRefScanSink::fromPaths(refs); + TeeSink sink { refsSink, toTee }; + + /* Look for the hashes in the NAR dump of the path. */ + dumpPath(path, sink); + + return refsSink.getResultPaths(); +} + +} diff --git a/src/libstore/path-references.hh b/src/libstore/path-references.hh new file mode 100644 index 000000000..7b44e3261 --- /dev/null +++ b/src/libstore/path-references.hh @@ -0,0 +1,25 @@ +#pragma once + +#include "references.hh" +#include "path.hh" + +namespace nix { + +std::pair<StorePathSet, HashResult> scanForReferences(const Path & path, const StorePathSet & refs); + +StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathSet & refs); + +class PathRefScanSink : public RefScanSink +{ + std::map<std::string, StorePath> backMap; + + PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap); + +public: + + static PathRefScanSink fromPaths(const StorePathSet & refs); + + StorePathSet getResultPaths(); +}; + +} diff --git a/src/libstore/references.cc b/src/libstore/references.cc deleted file mode 100644 index 345f4528b..000000000 --- a/src/libstore/references.cc +++ /dev/null @@ -1,188 +0,0 @@ -#include "references.hh" -#include "hash.hh" -#include "util.hh" -#include "archive.hh" - -#include <map> -#include <cstdlib> -#include <mutex> - - -namespace nix { - - -static size_t refLength = 32; /* characters */ - - -static void search( - std::string_view s, - StringSet & hashes, - StringSet & seen) -{ - static std::once_flag initialised; - static bool isBase32[256]; - std::call_once(initialised, [](){ - for (unsigned int i = 0; i < 256; ++i) isBase32[i] = false; - for (unsigned int i = 0; i < base32Chars.size(); ++i) - isBase32[(unsigned char) base32Chars[i]] = true; - }); - - for (size_t i = 0; i + refLength <= s.size(); ) { - int j; - bool match = true; - for (j = refLength - 1; j >= 0; --j) - if (!isBase32[(unsigned char) s[i + j]]) { - i += j + 1; - match = false; - break; - } - if (!match) continue; - std::string ref(s.substr(i, refLength)); - if (hashes.erase(ref)) { - debug("found reference to '%1%' at offset '%2%'", ref, i); - seen.insert(ref); - } - ++i; - } -} - - -void RefScanSink::operator () (std::string_view data) -{ - /* It's possible that a reference spans the previous and current - fragment, so search in the concatenation of the tail of the - previous fragment and the start of the current fragment. */ - auto s = tail; - auto tailLen = std::min(data.size(), refLength); - s.append(data.data(), tailLen); - search(s, hashes, seen); - - search(data, hashes, seen); - - auto rest = refLength - tailLen; - if (rest < tail.size()) - tail = tail.substr(tail.size() - rest); - tail.append(data.data() + data.size() - tailLen, tailLen); -} - - -PathRefScanSink::PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap) - : RefScanSink(std::move(hashes)) - , backMap(std::move(backMap)) -{ } - -PathRefScanSink PathRefScanSink::fromPaths(const StorePathSet & refs) -{ - StringSet hashes; - std::map<std::string, StorePath> backMap; - - for (auto & i : refs) { - std::string hashPart(i.hashPart()); - auto inserted = backMap.emplace(hashPart, i).second; - assert(inserted); - hashes.insert(hashPart); - } - - return PathRefScanSink(std::move(hashes), std::move(backMap)); -} - -StorePathSet PathRefScanSink::getResultPaths() -{ - /* Map the hashes found back to their store paths. */ - StorePathSet found; - for (auto & i : getResult()) { - auto j = backMap.find(i); - assert(j != backMap.end()); - found.insert(j->second); - } - - return found; -} - - -std::pair<StorePathSet, HashResult> scanForReferences( - const std::string & path, - const StorePathSet & refs) -{ - HashSink hashSink { htSHA256 }; - auto found = scanForReferences(hashSink, path, refs); - auto hash = hashSink.finish(); - return std::pair<StorePathSet, HashResult>(found, hash); -} - -StorePathSet scanForReferences( - Sink & toTee, - const Path & path, - const StorePathSet & refs) -{ - PathRefScanSink refsSink = PathRefScanSink::fromPaths(refs); - TeeSink sink { refsSink, toTee }; - - /* Look for the hashes in the NAR dump of the path. */ - dumpPath(path, sink); - - return refsSink.getResultPaths(); -} - - -RewritingSink::RewritingSink(const std::string & from, const std::string & to, Sink & nextSink) - : from(from), to(to), nextSink(nextSink) -{ - assert(from.size() == to.size()); -} - -void RewritingSink::operator () (std::string_view data) -{ - std::string s(prev); - s.append(data); - - size_t j = 0; - while ((j = s.find(from, j)) != std::string::npos) { - matches.push_back(pos + j); - s.replace(j, from.size(), to); - } - - prev = s.size() < from.size() ? s : std::string(s, s.size() - from.size() + 1, from.size() - 1); - - auto consumed = s.size() - prev.size(); - - pos += consumed; - - if (consumed) nextSink(s.substr(0, consumed)); -} - -void RewritingSink::flush() -{ - if (prev.empty()) return; - pos += prev.size(); - nextSink(prev); - prev.clear(); -} - -HashModuloSink::HashModuloSink(HashType ht, const std::string & modulus) - : hashSink(ht) - , rewritingSink(modulus, std::string(modulus.size(), 0), hashSink) -{ -} - -void HashModuloSink::operator () (std::string_view data) -{ - rewritingSink(data); -} - -HashResult HashModuloSink::finish() -{ - rewritingSink.flush(); - - /* Hash the positions of the self-references. This ensures that a - NAR with self-references and a NAR with some of the - self-references already zeroed out do not produce a hash - collision. FIXME: proof. */ - for (auto & pos : rewritingSink.matches) - hashSink(fmt("|%d", pos)); - - auto h = hashSink.finish(); - return {h.first, rewritingSink.pos}; -} - -} diff --git a/src/libstore/references.hh b/src/libstore/references.hh deleted file mode 100644 index 52d71b333..000000000 --- a/src/libstore/references.hh +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once -///@file - -#include "hash.hh" -#include "path.hh" - -namespace nix { - -std::pair<StorePathSet, HashResult> scanForReferences(const Path & path, const StorePathSet & refs); - -StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathSet & refs); - -class RefScanSink : public Sink -{ - StringSet hashes; - StringSet seen; - - std::string tail; - -public: - - RefScanSink(StringSet && hashes) : hashes(hashes) - { } - - StringSet & getResult() - { return seen; } - - void operator () (std::string_view data) override; -}; - -class PathRefScanSink : public RefScanSink -{ - std::map<std::string, StorePath> backMap; - - PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap); - -public: - - static PathRefScanSink fromPaths(const StorePathSet & refs); - - StorePathSet getResultPaths(); -}; - -struct RewritingSink : Sink -{ - std::string from, to, prev; - Sink & nextSink; - uint64_t pos = 0; - - std::vector<uint64_t> matches; - - RewritingSink(const std::string & from, const std::string & to, Sink & nextSink); - - void operator () (std::string_view data) override; - - void flush(); -}; - -struct HashModuloSink : AbstractHashSink -{ - HashSink hashSink; - RewritingSink rewritingSink; - - HashModuloSink(HashType ht, const std::string & modulus); - - void operator () (std::string_view data) override; - - HashResult finish() override; -}; - -} |