From 0abb3ad53795aa3a4792d30e5721a337f0eddfb7 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 30 Mar 2018 00:56:13 +0200 Subject: Allow content-addressable paths to have references This adds a command 'nix make-content-addressable' that rewrites the specified store paths into content-addressable paths. The advantage of such paths is that 1) they can be imported without signatures; 2) they can enable deduplication in cases where derivation changes do not cause output changes (apart from store path hashes). For example, $ nix make-content-addressable -r nixpkgs.cowsay rewrote '/nix/store/g1g31ah55xdia1jdqabv1imf6mcw0nb1-glibc-2.25-49' to '/nix/store/48jfj7bg78a8n4f2nhg269rgw1936vj4-glibc-2.25-49' ... rewrote '/nix/store/qbi6rzpk0bxjw8lw6azn2mc7ynnn455q-cowsay-3.03+dfsg1-16' to '/nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16' We can then copy the resulting closure to another store without signatures: $ nix copy --trusted-public-keys '' ---to ~/my-nix /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16 In order to support self-references in content-addressable paths, these paths are hashed "modulo" self-references, meaning that self-references are zeroed out during hashing. Somewhat annoyingly, this means that the NAR hash stored in the Nix database is no longer necessarily equal to the output of "nix hash-path"; for content-addressable paths, you need to pass the --modulo flag: $ nix path-info --json /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16 | jq -r .[].narHash sha256:0ri611gdilz2c9rsibqhsipbfs9vwcqvs811a52i2bnkhv7w9mgw $ nix hash-path --type sha256 --base32 /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16 1ggznh07khq0hz6id09pqws3a8q9pn03ya3c03nwck1kwq8rclzs $ nix hash-path --type sha256 --base32 /nix/store/iq6g2x4q62xp7y7493bibx0qn5w7xz67-cowsay-3.03+dfsg1-16 --modulo iq6g2x4q62xp7y7493bibx0qn5w7xz67 0ri611gdilz2c9rsibqhsipbfs9vwcqvs811a52i2bnkhv7w9mgw --- src/libstore/references.cc | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'src/libstore/references.cc') diff --git a/src/libstore/references.cc b/src/libstore/references.cc index 0dcc264c3..605ca9815 100644 --- a/src/libstore/references.cc +++ b/src/libstore/references.cc @@ -118,4 +118,66 @@ PathSet scanForReferences(const string & path, } +RewritingSink::RewritingSink(const std::string & from, const std::string & to, Sink & nextSink) + : from(from), to(to), nextSink(nextSink) +{ + assert(from.size() == to.size()); +} + +void RewritingSink::operator () (const unsigned char * data, size_t len) +{ + std::string s(prev); + s.append((const char *) data, len); + + size_t j = 0; + while ((j = s.find(from, j)) != string::npos) { + matches.push_back(pos + j); + s.replace(j, from.size(), to); + } + + prev = s.size() < from.size() ? s : std::string(s, s.size() - from.size() + 1, from.size() - 1); + + auto consumed = s.size() - prev.size(); + + pos += consumed; + + if (consumed) nextSink((unsigned char *) s.data(), consumed); +} + +void RewritingSink::flush() +{ + if (prev.empty()) return; + pos += prev.size(); + nextSink((unsigned char *) prev.data(), prev.size()); + prev.clear(); +} + +HashModuloSink::HashModuloSink(HashType ht, const std::string & modulus) + : hashSink(ht) + , rewritingSink(modulus, std::string(modulus.size(), 0), hashSink) +{ +} + +void HashModuloSink::operator () (const unsigned char * data, size_t len) +{ + rewritingSink(data, len); +} + +HashResult HashModuloSink::finish() +{ + rewritingSink.flush(); + + /* Hash the positions of the self-references. This ensures that a + NAR with self-references and a NAR with some of the + self-references already zeroed out do not produce a hash + collision. FIXME: proof. */ + for (auto & pos : rewritingSink.matches) { + auto s = fmt("|%d", pos); + hashSink((unsigned char *) s.data(), s.size()); + } + + auto h = hashSink.finish(); + return {h.first, rewritingSink.pos}; +} + } -- cgit v1.2.3