aboutsummaryrefslogtreecommitdiff
path: root/src/libstore
diff options
context:
space:
mode:
authorJohn Ericson <git@JohnEricson.me>2020-03-19 00:37:57 -0400
committerJohn Ericson <John.Ericson@Obsidian.Systems>2020-03-19 10:30:49 -0400
commitf1cf3ab870343a6894c08e2bb893ea69badfc397 (patch)
treea794a1913471f0d44f45e67655c217850d759a8f /src/libstore
parentef74fafc0368944e6cfc3b804b4bcdddd6bcf9c0 (diff)
hashDerivationModulo: Generalize for multiple fixed ouputs per drv
See documentattion in header and comments in implementation for details. This is actually done in preparation for floating ca derivations, not multi-output fixed ca derivations, but the distinction doesn't yet mattter. Thanks @cole-h for finding and fixing a bunch of typos.
Diffstat (limited to 'src/libstore')
-rw-r--r--src/libstore/derivations.cc98
-rw-r--r--src/libstore/derivations.hh33
-rw-r--r--src/libstore/local-store.cc4
3 files changed, 99 insertions, 36 deletions
diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc
index 205b90e55..13f2b4770 100644
--- a/src/libstore/derivations.cc
+++ b/src/libstore/derivations.cc
@@ -338,49 +338,81 @@ bool BasicDerivation::isFixedOutput() const
DrvHashes drvHashes;
+/* pathDerivationModulo and hashDerivationModulo are mutually recursive
+ */
-/* Returns the hash of a derivation modulo fixed-output
- subderivations. A fixed-output derivation is a derivation with one
- output (`out') for which an expected hash and hash algorithm are
- specified (using the `outputHash' and `outputHashAlgo'
- attributes). We don't want changes to such derivations to
- propagate upwards through the dependency graph, changing output
- paths everywhere.
-
- For instance, if we change the url in a call to the `fetchurl'
- function, we do not want to rebuild everything depending on it
- (after all, (the hash of) the file being downloaded is unchanged).
- So the *output paths* should not change. On the other hand, the
- *derivation paths* should change to reflect the new dependency
- graph.
-
- That's what this function does: it returns a hash which is just the
- hash of the derivation ATerm, except that any input derivation
- paths have been replaced by the result of a recursive call to this
- function, and that for fixed-output derivations we return a hash of
- its output path. */
-Hash hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs)
+/* Look up the derivation by value and memoize the
+ `hashDerivationModulo` call.
+ */
+static DrvHashModulo & pathDerivationModulo(Store & store, const StorePath & drvPath)
+{
+ auto h = drvHashes.find(drvPath);
+ if (h == drvHashes.end()) {
+ assert(store.isValidPath(drvPath));
+ // Cache it
+ h = drvHashes.insert_or_assign(
+ drvPath.clone(),
+ hashDerivationModulo(
+ store,
+ readDerivation(
+ store,
+ store.toRealPath(store.printStorePath(drvPath))),
+ false)).first;
+ }
+ return h->second;
+}
+
+/* See the header for interface details. These are the implementation details.
+
+ For fixed ouput derivations, each hash in the map is not the
+ corresponding output's content hash, but a hash of that hash along
+ with other constant data. The key point is that the value is a pure
+ function of the output's contents, and there are no preimage attacks
+ spoofing an either an output's contents for a derivation, or
+ derivation for an output's contents.
+
+ For regular derivations, it looks up each subderivation from its hash
+ and recurs. If the subderivation is also regular, it simply
+ substitutes the derivation path with its hash. If the subderivation
+ is fixed-output, however, it takes each output hash and pretends it
+ is a derivation hash producing a single "out" output. This is so we
+ don't leak the provenance of fixed outputs, reducing pointless cache
+ misses as the build itself won't know this.
+ */
+DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs)
{
/* Return a fixed hash for fixed-output derivations. */
if (drv.isFixedOutput()) {
- DerivationOutputs::const_iterator i = drv.outputs.begin();
- return hashString(htSHA256, "fixed:out:"
- + i->second.hashAlgo + ":"
- + i->second.hash + ":"
- + store.printStorePath(i->second.path));
+ std::map<std::string, Hash> outputHashes;
+ for (const auto & i : drv.outputs) {
+ const Hash h = hashString(htSHA256, "fixed:out:"
+ + i.second.hashAlgo + ":"
+ + i.second.hash + ":"
+ + store.printStorePath(i.second.path));
+ outputHashes.insert_or_assign(std::string(i.first), std::move(h));
+ }
+ return outputHashes;
}
/* For other derivations, replace the inputs paths with recursive
- calls to this function.*/
+ calls to this function. */
std::map<std::string, StringSet> inputs2;
for (auto & i : drv.inputDrvs) {
- auto h = drvHashes.find(i.first);
- if (h == drvHashes.end()) {
- assert(store.isValidPath(i.first));
- h = drvHashes.insert_or_assign(i.first.clone(), hashDerivationModulo(store,
- readDerivation(store, store.toRealPath(store.printStorePath(i.first))), false)).first;
+ const auto res = pathDerivationModulo(store, i.first);
+ if (const Hash *pval = std::get_if<0>(&res)) {
+ // regular non-CA derivation, replace derivation
+ inputs2.insert_or_assign(pval->to_string(Base16, false), i.second);
+ } else if (const std::map<std::string, Hash> *pval = std::get_if<1>(&res)) {
+ // CA derivation's output hashes
+ std::set justOut = { std::string("out") };
+ for (auto & output : i.second) {
+ /* Put each one in with a single "out" output.. */
+ const auto h = pval->at(output);
+ inputs2.insert_or_assign(
+ h.to_string(Base16, false),
+ justOut);
+ }
}
- inputs2.insert_or_assign(h->second.to_string(Base16, false), i.second);
}
return hashString(htSHA256, drv.unparse(store, maskOutputs, &inputs2));
diff --git a/src/libstore/derivations.hh b/src/libstore/derivations.hh
index c2df66229..c021bf907 100644
--- a/src/libstore/derivations.hh
+++ b/src/libstore/derivations.hh
@@ -5,6 +5,7 @@
#include "store-api.hh"
#include <map>
+#include <variant>
namespace nix {
@@ -87,10 +88,38 @@ Derivation readDerivation(const Store & store, const Path & drvPath);
// FIXME: remove
bool isDerivation(const string & fileName);
-Hash hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs);
+typedef std::variant<
+ Hash, // regular DRV normalized hash
+ std::map<std::string, Hash> // known CA drv's output hashes
+> DrvHashModulo;
+
+/* Returns hashes with the details of fixed-output subderivations
+ expunged.
+
+ A fixed-output derivation is a derivation whose outputs have a
+ specified content hash and hash algorithm. (Currently they must have
+ exactly one output (`out'), which is specified using the `outputHash'
+ and `outputHashAlgo' attributes, but the algorithm doesn't assume
+ this). We don't want changes to such derivations to propagate upwards
+ through the dependency graph, changing output paths everywhere.
+
+ For instance, if we change the url in a call to the `fetchurl'
+ function, we do not want to rebuild everything depending on it (after
+ all, (the hash of) the file being downloaded is unchanged). So the
+ *output paths* should not change. On the other hand, the *derivation
+ paths* should change to reflect the new dependency graph.
+
+ For fixed output derivations, this returns a map from the names of
+ each output to hashes unique up to the outputs' contents.
+
+ For regular derivations, it returns a single hash of the derivation
+ ATerm, after subderivations have been likewise expunged from that
+ derivation.
+ */
+DrvHashModulo hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutputs);
/* Memoisation of hashDerivationModulo(). */
-typedef std::map<StorePath, Hash> DrvHashes;
+typedef std::map<StorePath, DrvHashModulo> DrvHashes;
extern DrvHashes drvHashes; // FIXME: global, not thread-safe
diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index cd2e86f29..8639cbf20 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -571,7 +571,9 @@ void LocalStore::checkDerivationOutputs(const StorePath & drvPath, const Derivat
}
else {
- Hash h = hashDerivationModulo(*this, drv, true);
+ // Regular, non-CA derivation should always return a single hash and not
+ // hash per output.
+ Hash h = std::get<0>(hashDerivationModulo(*this, drv, true));
for (auto & i : drv.outputs)
check(makeOutputPath(i.first, h, drvName), i.second.path, i.first);
}