aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/libstore/build/local-derivation-goal.cc10
-rw-r--r--src/libstore/make-content-addressed.cc9
-rw-r--r--src/libutil/references.cc104
-rw-r--r--src/libutil/references.hh21
-rw-r--r--src/libutil/strings.cc24
-rw-r--r--tests/unit/libutil/references.cc10
6 files changed, 102 insertions, 76 deletions
diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc
index d523ef51f..a071883bb 100644
--- a/src/libstore/build/local-derivation-goal.cc
+++ b/src/libstore/build/local-derivation-goal.cc
@@ -2150,14 +2150,10 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
if (!rewrites.empty()) {
debug("rewriting hashes in '%1%'; cross fingers", actualPath);
- /* FIXME: Is this actually streaming? */
- auto source = sinkToSource([&](Sink & nextSink) {
- RewritingSink rsink(rewrites, nextSink);
- rsink << dumpPath(actualPath);
- rsink.flush();
- });
+ GeneratorSource dump{dumpPath(actualPath)};
+ RewritingSource rewritten(rewrites, dump);
Path tmpPath = actualPath + ".tmp";
- restorePath(tmpPath, *source);
+ restorePath(tmpPath, rewritten);
deletePath(actualPath);
movePath(tmpPath, actualPath);
diff --git a/src/libstore/make-content-addressed.cc b/src/libstore/make-content-addressed.cc
index b5397541c..1aa139b38 100644
--- a/src/libstore/make-content-addressed.cc
+++ b/src/libstore/make-content-addressed.cc
@@ -61,15 +61,12 @@ std::map<StorePath, StorePath> makeContentAddressed(
printInfo("rewriting '%s' to '%s'", pathS, dstStore.printStorePath(info.path));
- StringSink sink2;
- RewritingSink rsink2(oldHashPart, std::string(info.path.hashPart()), sink2);
- rsink2(sink.s);
- rsink2.flush();
+ const auto rewritten = rewriteStrings(sink.s, {{oldHashPart, std::string(info.path.hashPart())}});
- info.narHash = hashString(htSHA256, sink2.s);
+ info.narHash = hashString(htSHA256, rewritten);
info.narSize = sink.s.size();
- StringSource source(sink2.s);
+ StringSource source(rewritten);
dstStore.addToStore(info, source);
remappings.insert_or_assign(std::move(path), std::move(info.path));
diff --git a/src/libutil/references.cc b/src/libutil/references.cc
index 8792578d4..9f4ab0678 100644
--- a/src/libutil/references.cc
+++ b/src/libutil/references.cc
@@ -65,56 +65,102 @@ void RefScanSink::operator () (std::string_view data)
}
-RewritingSink::RewritingSink(const std::string & from, const std::string & to, Sink & nextSink)
- : RewritingSink({{from, to}}, nextSink)
+RewritingSource::RewritingSource(const std::string & from, const std::string & to, Source & inner)
+ : RewritingSource({{from, to}}, inner)
{
}
-RewritingSink::RewritingSink(const StringMap & rewrites, Sink & nextSink)
- : rewrites(rewrites), nextSink(nextSink)
+RewritingSource::RewritingSource(StringMap rewrites, Source & inner)
+ : RewritingSource(may_change_size, std::move(rewrites), inner)
{
- std::string::size_type maxRewriteSize = 0;
- for (auto & [from, to] : rewrites) {
+ for (auto & [from, to] : this->rewrites) {
assert(from.size() == to.size());
- maxRewriteSize = std::max(maxRewriteSize, from.size());
}
- this->maxRewriteSize = maxRewriteSize;
}
-void RewritingSink::operator () (std::string_view data)
+RewritingSource::RewritingSource(may_change_size_t, StringMap rewrites, Source & inner)
+ : maxRewriteSize([&, result = size_t(0)]() mutable {
+ for (auto & [k, v] : rewrites) {
+ result = std::max(result, k.size());
+ }
+ return result;
+ }())
+ , initials([&]() -> std::string {
+ std::string initials;
+ for (const auto & [k, v] : rewrites) {
+ assert(!k.empty());
+ initials.push_back(k[0]);
+ }
+ std::ranges::sort(initials);
+ auto [firstDupe, _end] = std::ranges::unique(initials);
+ return {initials.begin(), firstDupe};
+ }())
+ , rewrites(std::move(rewrites))
+ , inner(&inner)
{
- std::string s(prev);
- s.append(data);
+}
- s = rewriteStrings(s, rewrites);
+size_t RewritingSource::read(char * data, size_t len)
+{
+ if (rewrites.empty()) {
+ return inner->read(data, len);
+ }
+
+ if (unreturned.empty()) {
+ // always make sure to have at least *two* full rewrites in the buffer,
+ // otherwise we may end up incorrectly rewriting if the replacement map
+ // contains keys that are proper infixes of other keys in the map. take
+ // for example the set { ab -> cc, babb -> bbbb } on the input babb. if
+ // we feed the input bytewise without additional windowing we will miss
+ // the full babb match once the second b has been seen and bab has been
+ // rewritten to ccb, even though babb occurs first in the input string.
+ while (inner && buffered.size() < std::max(2 * maxRewriteSize, len)) {
+ try {
+ auto read = inner->read(data, std::min(2 * maxRewriteSize, len));
+ buffered.append(data, read);
+ } catch (EndOfFile &) {
+ inner = nullptr;
+ }
+ }
- prev = s.size() < maxRewriteSize
- ? s
- : maxRewriteSize == 0
- ? ""
- : std::string(s, s.size() - maxRewriteSize + 1, maxRewriteSize - 1);
+ if (buffered.empty() && !inner) {
+ throw EndOfFile("rewritten source exhausted");
+ }
- auto consumed = s.size() - prev.size();
+ const size_t reserved = inner ? maxRewriteSize : 0;
+ size_t j = 0;
+ while ((j = buffered.find_first_of(initials, j)) < buffered.size() - reserved) {
+ size_t skip = 1;
+ for (const auto & [from, to] : rewrites) {
+ if (buffered.compare(j, from.size(), from) == 0) {
+ buffered.replace(j, from.size(), to);
+ skip = to.size();
+ break;
+ }
+ }
+ j += skip;
+ }
- if (consumed) nextSink(s.substr(0, consumed));
-}
+ rewritten = std::move(buffered);
+ buffered = rewritten.substr(rewritten.size() - reserved);
+ unreturned = rewritten;
+ unreturned.remove_suffix(reserved);
+ }
-void RewritingSink::flush()
-{
- if (prev.empty()) return;
- nextSink(prev);
- prev.clear();
+ len = std::min(len, unreturned.size());
+ memcpy(data, unreturned.data(), len);
+ unreturned.remove_prefix(len);
+ return len;
}
HashResult computeHashModulo(HashType ht, const std::string & modulus, Source & source)
{
HashSink hashSink(ht);
LengthSink lengthSink;
- RewritingSink rewritingSink(modulus, std::string(modulus.size(), 0), hashSink);
+ RewritingSource rewritingSource(modulus, std::string(modulus.size(), 0), source);
- TeeSink tee{rewritingSink, lengthSink};
- source.drainInto(tee);
- rewritingSink.flush();
+ TeeSink tee{hashSink, lengthSink};
+ rewritingSource.drainInto(tee);
/* Hash the positions of the self-references. This ensures that a
NAR with self-references and a NAR with some of the
diff --git a/src/libutil/references.hh b/src/libutil/references.hh
index 3fefd824b..f0f467190 100644
--- a/src/libutil/references.hh
+++ b/src/libutil/references.hh
@@ -23,19 +23,24 @@ public:
void operator () (std::string_view data) override;
};
-struct RewritingSink : Sink
+struct RewritingSource : Source
{
+ const std::string::size_type maxRewriteSize;
+ const std::string initials;
const StringMap rewrites;
- std::string::size_type maxRewriteSize;
- std::string prev;
- Sink & nextSink;
+ std::string rewritten, buffered;
+ std::string_view unreturned;
+ Source * inner;
- RewritingSink(const std::string & from, const std::string & to, Sink & nextSink);
- RewritingSink(const StringMap & rewrites, Sink & nextSink);
+ static constexpr struct may_change_size_t {
+ explicit may_change_size_t() = default;
+ } may_change_size{};
- void operator () (std::string_view data) override;
+ RewritingSource(const std::string & from, const std::string & to, Source & inner);
+ RewritingSource(StringMap rewrites, Source & inner);
+ RewritingSource(may_change_size_t, StringMap rewrites, Source & inner);
- void flush();
+ size_t read(char * data, size_t len) override;
};
HashResult computeHashModulo(HashType ht, const std::string & modulus, Source & source);
diff --git a/src/libutil/strings.cc b/src/libutil/strings.cc
index 947478481..df48e9203 100644
--- a/src/libutil/strings.cc
+++ b/src/libutil/strings.cc
@@ -1,4 +1,5 @@
#include "strings.hh"
+#include "references.hh"
#include <boost/lexical_cast.hpp>
#include <stdint.h>
@@ -65,30 +66,13 @@ std::string replaceStrings(
Rewriter::Rewriter(std::map<std::string, std::string> rewrites)
: rewrites(std::move(rewrites))
{
- for (const auto & [k, v] : this->rewrites) {
- assert(!k.empty());
- initials.push_back(k[0]);
- }
- std::ranges::sort(initials);
- auto [firstDupe, end] = std::ranges::unique(initials);
- initials.erase(firstDupe, end);
}
std::string Rewriter::operator()(std::string s)
{
- size_t j = 0;
- while ((j = s.find_first_of(initials, j)) != std::string::npos) {
- size_t skip = 1;
- for (auto & [from, to] : rewrites) {
- if (s.compare(j, from.size(), from) == 0) {
- s.replace(j, from.size(), to);
- skip = to.size();
- break;
- }
- }
- j += skip;
- }
- return s;
+ StringSource src{s};
+ RewritingSource inner{RewritingSource::may_change_size, rewrites, src};
+ return inner.drain();
}
template<class N>
diff --git a/tests/unit/libutil/references.cc b/tests/unit/libutil/references.cc
index be2b59051..a914e6c70 100644
--- a/tests/unit/libutil/references.cc
+++ b/tests/unit/libutil/references.cc
@@ -25,11 +25,8 @@ class RewriteTest : public ::testing::TestWithParam<RewriteParams> {
TEST_P(RewriteTest, IdentityRewriteIsIdentity) {
RewriteParams param = GetParam();
- StringSink rewritten;
- auto rewriter = RewritingSink(param.rewrites, rewritten);
- rewriter(param.originalString);
- rewriter.flush();
- ASSERT_EQ(rewritten.s, param.finalString);
+ StringSource src{param.originalString};
+ ASSERT_EQ(RewritingSource(param.rewrites, src).drain(), param.finalString);
}
INSTANTIATE_TEST_CASE_P(
@@ -38,7 +35,8 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values(
RewriteParams{ "foooo", "baroo", {{"foo", "bar"}, {"bar", "baz"}}},
RewriteParams{ "foooo", "bazoo", {{"fou", "bar"}, {"foo", "baz"}}},
- RewriteParams{ "foooo", "foooo", {}}
+ RewriteParams{ "foooo", "foooo", {}},
+ RewriteParams{ "babb", "bbbb", {{"ab", "aa"}, {"babb", "bbbb"}}}
)
);