From 37fca662b0acef3c104a159709a394832e297dda Mon Sep 17 00:00:00 2001 From: John Ericson Date: Fri, 25 Mar 2022 01:26:07 +0000 Subject: Make `KeyedBuildResult`, `BuildResult` like before, and fix bug another way In https://github.com/NixOS/nix/pull/6311#discussion_r834863823, I realized since derivation goals' wanted outputs can "grow" due to overlapping dependencies (See `DerivationGoal::addWantedOutputs`, called by `Worker::makeDerivationGoalCommon`), the previous bug fix had an unfortunate side effect of causing more pointless rebuilds. In paticular, we have this situation: 1. Goal made from `DerivedPath::Built { foo, {a} }`. 2. Goal gives on on substituting, starts building. 3. Goal made from `DerivedPath::Built { foo, {b} }`, in fact is just modified original goal. 4. Though the goal had gotten as far as building, so all outputs were going to be produced, `addWantedOutputs` no longer knows that and so the goal is flagged to be restarted. This might sound far-fetched with input-addressed drvs, where we usually basically have all our goals "planned out" before we start doing anything, but with CA derivation goals and especially RFC 92, where *drv resolution* means goals are created after some building is completed, it is more likely to happen. So the first thing to do was restore the clearing of `wantedOutputs` we used to do, and then filter the outputs in `buildPathsWithResults` to only get the ones we care about. But fix also has its own side effect in that the `DerivedPath` in the `BuildResult` in `DerivationGoal` cannot be trusted; it is merely the *first* `DerivedPath` for which this goal was originally created. To remedy this, I made `BuildResult` be like it was before, and instead made `KeyedBuildResult` be a subclass wit the path. Only `buildPathsWithResults` returns `KeyedBuildResult`s, everything else just becomes like it was before, where the "key" is unambiguous from context. I think separating the "primary key" field(s) from the other fields is good practical in general anyways. (I would like to do the same thing for `ValidPathInfo`.) Among other things, it allows constructions like `std::map` where doesn't contain duplicate keys and just precludes the possibility of those duplicate keys being out of sync. We might leverage the above someday to overload `buildPathsWithResults` to take a *set* of return a *map* per the above. ----- Unfortunately, we need to avoid C++20 strictness on designated initializers. (BTW https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p2287r1.html this offers some new syntax for this use-case. Hopefully this will be adopted and we can eventually use it.) No having that yet, maybe it would be better to not make `KeyedBuildResult` a subclass to just avoid this. Co-authored-by: Robert Hensing --- src/libstore/remote-store.cc | 50 ++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'src/libstore/remote-store.cc') diff --git a/src/libstore/remote-store.cc b/src/libstore/remote-store.cc index b862902d1..734e6f27f 100644 --- a/src/libstore/remote-store.cc +++ b/src/libstore/remote-store.cc @@ -125,10 +125,26 @@ void write(const Store & store, Sink & out, const DrvOutput & drvOutput) } -BuildResult read(const Store & store, Source & from, Phantom _) +KeyedBuildResult read(const Store & store, Source & from, Phantom _) { auto path = worker_proto::read(store, from, Phantom {}); - BuildResult res { .path = path }; + auto br = worker_proto::read(store, from, Phantom {}); + return KeyedBuildResult { + std::move(br), + /* .path = */ std::move(path), + }; +} + +void write(const Store & store, Sink & to, const KeyedBuildResult & res) +{ + worker_proto::write(store, to, res.path); + worker_proto::write(store, to, static_cast(res)); +} + + +BuildResult read(const Store & store, Source & from, Phantom _) +{ + BuildResult res; res.status = (BuildResult::Status) readInt(from); from >> res.errorMsg @@ -142,7 +158,6 @@ BuildResult read(const Store & store, Source & from, Phantom _) void write(const Store & store, Sink & to, const BuildResult & res) { - worker_proto::write(store, to, res.path); to << res.status << res.errorMsg @@ -865,7 +880,7 @@ void RemoteStore::buildPaths(const std::vector & drvPaths, BuildMod readInt(conn->from); } -std::vector RemoteStore::buildPathsWithResults( +std::vector RemoteStore::buildPathsWithResults( const std::vector & paths, BuildMode buildMode, std::shared_ptr evalStore) @@ -880,7 +895,7 @@ std::vector RemoteStore::buildPathsWithResults( writeDerivedPaths(*this, conn, paths); conn->to << buildMode; conn.processStderr(); - return worker_proto::read(*this, conn->from, Phantom> {}); + return worker_proto::read(*this, conn->from, Phantom> {}); } else { // Avoid deadlock. conn_.reset(); @@ -889,21 +904,25 @@ std::vector RemoteStore::buildPathsWithResults( // fails, but meh. buildPaths(paths, buildMode, evalStore); - std::vector results; + std::vector results; for (auto & path : paths) { std::visit( overloaded { [&](const DerivedPath::Opaque & bo) { - results.push_back(BuildResult { - .status = BuildResult::Substituted, - .path = bo, + results.push_back(KeyedBuildResult { + { + .status = BuildResult::Substituted, + }, + /* .path = */ bo, }); }, [&](const DerivedPath::Built & bfd) { - BuildResult res { - .status = BuildResult::Built, - .path = bfd, + KeyedBuildResult res { + { + .status = BuildResult::Built + }, + /* .path = */ bfd, }; OutputPathMap outputs; @@ -952,12 +971,7 @@ BuildResult RemoteStore::buildDerivation(const StorePath & drvPath, const BasicD writeDerivation(conn->to, *this, drv); conn->to << buildMode; conn.processStderr(); - BuildResult res { - .path = DerivedPath::Built { - .drvPath = drvPath, - .outputs = OutputsSpec::All { }, - }, - }; + BuildResult res; res.status = (BuildResult::Status) readInt(conn->from); conn->from >> res.errorMsg; if (GET_PROTOCOL_MINOR(conn->daemonVersion) >= 29) { -- cgit v1.2.3 From 24866b71c40f0fcb5a601d90d4f87366fe626090 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Fri, 14 Apr 2023 18:18:32 -0400 Subject: Introduce `SingleDrvOutputs` In many cases we are dealing with a collection of realisations, they are all outputs of the same derivation. In that case, we don't need "derivation hashes modulos" to be part of our map key, because the output names alone will be unique. Those hashes are still part of the realisation proper, so we aren't loosing any information, we're just "normalizing our schema" by narrowing the "primary key". Besides making our data model a bit "tighter" this allows us to avoid a double `for` loop in `DerivationGoal::waiteeDone`. The inner `for` loop was previously just to select the output we cared about without knowing its hash. Now we can just select the output by name directly. Note that neither protocol is changed as part of this: we are still transferring `DrvOutputs` over the wire for `BuildResult`s. I would only consider revising this once #6223 is merged, and we can mention protocol versions inside factored-out serialization logic. Until then it is better not change anything because it would come a the cost of code reuse. --- src/libstore/remote-store.cc | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'src/libstore/remote-store.cc') diff --git a/src/libstore/remote-store.cc b/src/libstore/remote-store.cc index 734e6f27f..69e809a0f 100644 --- a/src/libstore/remote-store.cc +++ b/src/libstore/remote-store.cc @@ -152,7 +152,11 @@ BuildResult read(const Store & store, Source & from, Phantom _) >> res.isNonDeterministic >> res.startTime >> res.stopTime; - res.builtOutputs = worker_proto::read(store, from, Phantom {}); + auto builtOutputs = worker_proto::read(store, from, Phantom {}); + for (auto && [output, realisation] : builtOutputs) + res.builtOutputs.insert_or_assign( + std::move(output.outputName), + std::move(realisation)); return res; } @@ -165,7 +169,10 @@ void write(const Store & store, Sink & to, const BuildResult & res) << res.isNonDeterministic << res.startTime << res.stopTime; - worker_proto::write(store, to, res.builtOutputs); + DrvOutputs builtOutputs; + for (auto & [output, realisation] : res.builtOutputs) + builtOutputs.insert_or_assign(realisation.id, realisation); + worker_proto::write(store, to, builtOutputs); } @@ -941,10 +948,10 @@ std::vector RemoteStore::buildPathsWithResults( queryRealisation(outputId); if (!realisation) throw MissingRealisation(outputId); - res.builtOutputs.emplace(realisation->id, *realisation); + res.builtOutputs.emplace(output, *realisation); } else { res.builtOutputs.emplace( - outputId, + output, Realisation { .id = outputId, .outPath = outputPath, @@ -979,7 +986,10 @@ BuildResult RemoteStore::buildDerivation(const StorePath & drvPath, const BasicD } if (GET_PROTOCOL_MINOR(conn->daemonVersion) >= 28) { auto builtOutputs = worker_proto::read(*this, conn->from, Phantom {}); - res.builtOutputs = builtOutputs; + for (auto && [output, realisation] : builtOutputs) + res.builtOutputs.insert_or_assign( + std::move(output.outputName), + std::move(realisation)); } return res; } -- cgit v1.2.3