aboutsummaryrefslogtreecommitdiff
path: root/src/libstore
diff options
context:
space:
mode:
authorThéophane Hufschmitt <theophane.hufschmitt@tweag.io>2023-03-17 15:51:08 +0100
committerThéophane Hufschmitt <theophane.hufschmitt@tweag.io>2023-05-24 14:11:50 +0200
commit3ebe1341abe1b0ad59bd4925517af18d9200f818 (patch)
tree66e6ef77c7c60ac559c7aac06f1054427a0afcb1 /src/libstore
parent6e4570234d5ac63a9483fb7f7aabaa1d17561a3a (diff)
Make `RewritingSink` accept a map of rewrites
Giving it the same semantics as `rewriteStrings`. Also add some tests for it
Diffstat (limited to 'src/libstore')
-rw-r--r--src/libstore/build/local-derivation-goal.cc2
-rw-r--r--src/libstore/path-references.cc73
-rw-r--r--src/libstore/path-references.hh25
-rw-r--r--src/libstore/references.cc188
-rw-r--r--src/libstore/references.hh71
5 files changed, 99 insertions, 260 deletions
diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc
index 05d6685da..6f7ab8a3d 100644
--- a/src/libstore/build/local-derivation-goal.cc
+++ b/src/libstore/build/local-derivation-goal.cc
@@ -4,7 +4,7 @@
#include "worker.hh"
#include "builtins.hh"
#include "builtins/buildenv.hh"
-#include "references.hh"
+#include "path-references.hh"
#include "finally.hh"
#include "util.hh"
#include "archive.hh"
diff --git a/src/libstore/path-references.cc b/src/libstore/path-references.cc
new file mode 100644
index 000000000..33cf66ce3
--- /dev/null
+++ b/src/libstore/path-references.cc
@@ -0,0 +1,73 @@
+#include "path-references.hh"
+#include "hash.hh"
+#include "util.hh"
+#include "archive.hh"
+
+#include <map>
+#include <cstdlib>
+#include <mutex>
+#include <algorithm>
+
+
+namespace nix {
+
+
+PathRefScanSink::PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap)
+ : RefScanSink(std::move(hashes))
+ , backMap(std::move(backMap))
+{ }
+
+PathRefScanSink PathRefScanSink::fromPaths(const StorePathSet & refs)
+{
+ StringSet hashes;
+ std::map<std::string, StorePath> backMap;
+
+ for (auto & i : refs) {
+ std::string hashPart(i.hashPart());
+ auto inserted = backMap.emplace(hashPart, i).second;
+ assert(inserted);
+ hashes.insert(hashPart);
+ }
+
+ return PathRefScanSink(std::move(hashes), std::move(backMap));
+}
+
+StorePathSet PathRefScanSink::getResultPaths()
+{
+ /* Map the hashes found back to their store paths. */
+ StorePathSet found;
+ for (auto & i : getResult()) {
+ auto j = backMap.find(i);
+ assert(j != backMap.end());
+ found.insert(j->second);
+ }
+
+ return found;
+}
+
+
+std::pair<StorePathSet, HashResult> scanForReferences(
+ const std::string & path,
+ const StorePathSet & refs)
+{
+ HashSink hashSink { htSHA256 };
+ auto found = scanForReferences(hashSink, path, refs);
+ auto hash = hashSink.finish();
+ return std::pair<StorePathSet, HashResult>(found, hash);
+}
+
+StorePathSet scanForReferences(
+ Sink & toTee,
+ const Path & path,
+ const StorePathSet & refs)
+{
+ PathRefScanSink refsSink = PathRefScanSink::fromPaths(refs);
+ TeeSink sink { refsSink, toTee };
+
+ /* Look for the hashes in the NAR dump of the path. */
+ dumpPath(path, sink);
+
+ return refsSink.getResultPaths();
+}
+
+}
diff --git a/src/libstore/path-references.hh b/src/libstore/path-references.hh
new file mode 100644
index 000000000..7b44e3261
--- /dev/null
+++ b/src/libstore/path-references.hh
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "references.hh"
+#include "path.hh"
+
+namespace nix {
+
+std::pair<StorePathSet, HashResult> scanForReferences(const Path & path, const StorePathSet & refs);
+
+StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathSet & refs);
+
+class PathRefScanSink : public RefScanSink
+{
+ std::map<std::string, StorePath> backMap;
+
+ PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap);
+
+public:
+
+ static PathRefScanSink fromPaths(const StorePathSet & refs);
+
+ StorePathSet getResultPaths();
+};
+
+}
diff --git a/src/libstore/references.cc b/src/libstore/references.cc
deleted file mode 100644
index 345f4528b..000000000
--- a/src/libstore/references.cc
+++ /dev/null
@@ -1,188 +0,0 @@
-#include "references.hh"
-#include "hash.hh"
-#include "util.hh"
-#include "archive.hh"
-
-#include <map>
-#include <cstdlib>
-#include <mutex>
-
-
-namespace nix {
-
-
-static size_t refLength = 32; /* characters */
-
-
-static void search(
- std::string_view s,
- StringSet & hashes,
- StringSet & seen)
-{
- static std::once_flag initialised;
- static bool isBase32[256];
- std::call_once(initialised, [](){
- for (unsigned int i = 0; i < 256; ++i) isBase32[i] = false;
- for (unsigned int i = 0; i < base32Chars.size(); ++i)
- isBase32[(unsigned char) base32Chars[i]] = true;
- });
-
- for (size_t i = 0; i + refLength <= s.size(); ) {
- int j;
- bool match = true;
- for (j = refLength - 1; j >= 0; --j)
- if (!isBase32[(unsigned char) s[i + j]]) {
- i += j + 1;
- match = false;
- break;
- }
- if (!match) continue;
- std::string ref(s.substr(i, refLength));
- if (hashes.erase(ref)) {
- debug("found reference to '%1%' at offset '%2%'", ref, i);
- seen.insert(ref);
- }
- ++i;
- }
-}
-
-
-void RefScanSink::operator () (std::string_view data)
-{
- /* It's possible that a reference spans the previous and current
- fragment, so search in the concatenation of the tail of the
- previous fragment and the start of the current fragment. */
- auto s = tail;
- auto tailLen = std::min(data.size(), refLength);
- s.append(data.data(), tailLen);
- search(s, hashes, seen);
-
- search(data, hashes, seen);
-
- auto rest = refLength - tailLen;
- if (rest < tail.size())
- tail = tail.substr(tail.size() - rest);
- tail.append(data.data() + data.size() - tailLen, tailLen);
-}
-
-
-PathRefScanSink::PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap)
- : RefScanSink(std::move(hashes))
- , backMap(std::move(backMap))
-{ }
-
-PathRefScanSink PathRefScanSink::fromPaths(const StorePathSet & refs)
-{
- StringSet hashes;
- std::map<std::string, StorePath> backMap;
-
- for (auto & i : refs) {
- std::string hashPart(i.hashPart());
- auto inserted = backMap.emplace(hashPart, i).second;
- assert(inserted);
- hashes.insert(hashPart);
- }
-
- return PathRefScanSink(std::move(hashes), std::move(backMap));
-}
-
-StorePathSet PathRefScanSink::getResultPaths()
-{
- /* Map the hashes found back to their store paths. */
- StorePathSet found;
- for (auto & i : getResult()) {
- auto j = backMap.find(i);
- assert(j != backMap.end());
- found.insert(j->second);
- }
-
- return found;
-}
-
-
-std::pair<StorePathSet, HashResult> scanForReferences(
- const std::string & path,
- const StorePathSet & refs)
-{
- HashSink hashSink { htSHA256 };
- auto found = scanForReferences(hashSink, path, refs);
- auto hash = hashSink.finish();
- return std::pair<StorePathSet, HashResult>(found, hash);
-}
-
-StorePathSet scanForReferences(
- Sink & toTee,
- const Path & path,
- const StorePathSet & refs)
-{
- PathRefScanSink refsSink = PathRefScanSink::fromPaths(refs);
- TeeSink sink { refsSink, toTee };
-
- /* Look for the hashes in the NAR dump of the path. */
- dumpPath(path, sink);
-
- return refsSink.getResultPaths();
-}
-
-
-RewritingSink::RewritingSink(const std::string & from, const std::string & to, Sink & nextSink)
- : from(from), to(to), nextSink(nextSink)
-{
- assert(from.size() == to.size());
-}
-
-void RewritingSink::operator () (std::string_view data)
-{
- std::string s(prev);
- s.append(data);
-
- size_t j = 0;
- while ((j = s.find(from, j)) != std::string::npos) {
- matches.push_back(pos + j);
- s.replace(j, from.size(), to);
- }
-
- prev = s.size() < from.size() ? s : std::string(s, s.size() - from.size() + 1, from.size() - 1);
-
- auto consumed = s.size() - prev.size();
-
- pos += consumed;
-
- if (consumed) nextSink(s.substr(0, consumed));
-}
-
-void RewritingSink::flush()
-{
- if (prev.empty()) return;
- pos += prev.size();
- nextSink(prev);
- prev.clear();
-}
-
-HashModuloSink::HashModuloSink(HashType ht, const std::string & modulus)
- : hashSink(ht)
- , rewritingSink(modulus, std::string(modulus.size(), 0), hashSink)
-{
-}
-
-void HashModuloSink::operator () (std::string_view data)
-{
- rewritingSink(data);
-}
-
-HashResult HashModuloSink::finish()
-{
- rewritingSink.flush();
-
- /* Hash the positions of the self-references. This ensures that a
- NAR with self-references and a NAR with some of the
- self-references already zeroed out do not produce a hash
- collision. FIXME: proof. */
- for (auto & pos : rewritingSink.matches)
- hashSink(fmt("|%d", pos));
-
- auto h = hashSink.finish();
- return {h.first, rewritingSink.pos};
-}
-
-}
diff --git a/src/libstore/references.hh b/src/libstore/references.hh
deleted file mode 100644
index 52d71b333..000000000
--- a/src/libstore/references.hh
+++ /dev/null
@@ -1,71 +0,0 @@
-#pragma once
-///@file
-
-#include "hash.hh"
-#include "path.hh"
-
-namespace nix {
-
-std::pair<StorePathSet, HashResult> scanForReferences(const Path & path, const StorePathSet & refs);
-
-StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathSet & refs);
-
-class RefScanSink : public Sink
-{
- StringSet hashes;
- StringSet seen;
-
- std::string tail;
-
-public:
-
- RefScanSink(StringSet && hashes) : hashes(hashes)
- { }
-
- StringSet & getResult()
- { return seen; }
-
- void operator () (std::string_view data) override;
-};
-
-class PathRefScanSink : public RefScanSink
-{
- std::map<std::string, StorePath> backMap;
-
- PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap);
-
-public:
-
- static PathRefScanSink fromPaths(const StorePathSet & refs);
-
- StorePathSet getResultPaths();
-};
-
-struct RewritingSink : Sink
-{
- std::string from, to, prev;
- Sink & nextSink;
- uint64_t pos = 0;
-
- std::vector<uint64_t> matches;
-
- RewritingSink(const std::string & from, const std::string & to, Sink & nextSink);
-
- void operator () (std::string_view data) override;
-
- void flush();
-};
-
-struct HashModuloSink : AbstractHashSink
-{
- HashSink hashSink;
- RewritingSink rewritingSink;
-
- HashModuloSink(HashType ht, const std::string & modulus);
-
- void operator () (std::string_view data) override;
-
- HashResult finish() override;
-};
-
-}