From 4eb8c698538e0c212214d22a3c516777d52f921b Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 12 Oct 2020 17:15:32 +0000 Subject: Rename to hand-hold git (derivation-goal.hh) --- src/libstore/build/derivation-goal.hh | 784 ++++++++++++++++++++++++++++++++++ 1 file changed, 784 insertions(+) create mode 100644 src/libstore/build/derivation-goal.hh (limited to 'src/libstore/build/derivation-goal.hh') diff --git a/src/libstore/build/derivation-goal.hh b/src/libstore/build/derivation-goal.hh new file mode 100644 index 000000000..8027c61b1 --- /dev/null +++ b/src/libstore/build/derivation-goal.hh @@ -0,0 +1,784 @@ +#pragma once + +#include "machines.hh" +#include "parsed-derivations.hh" +#include "lock.hh" +#include "local-store.hh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nix { + +using std::map; + + +/* Forward definition. */ +class Worker; +struct HookInstance; + + +/* A pointer to a goal. */ +struct Goal; +class DerivationGoal; +typedef std::shared_ptr GoalPtr; +typedef std::weak_ptr WeakGoalPtr; + +struct CompareGoalPtrs { + bool operator() (const GoalPtr & a, const GoalPtr & b) const; +}; + +/* Set of goals. */ +typedef set Goals; +typedef list WeakGoals; + +/* A map of paths to goals (and the other way around). */ +typedef std::map WeakGoalMap; + + + +struct Goal : public std::enable_shared_from_this +{ + typedef enum {ecBusy, ecSuccess, ecFailed, ecNoSubstituters, ecIncompleteClosure} ExitCode; + + /* Backlink to the worker. */ + Worker & worker; + + /* Goals that this goal is waiting for. */ + Goals waitees; + + /* Goals waiting for this one to finish. Must use weak pointers + here to prevent cycles. */ + WeakGoals waiters; + + /* Number of goals we are/were waiting for that have failed. */ + unsigned int nrFailed; + + /* Number of substitution goals we are/were waiting for that + failed because there are no substituters. */ + unsigned int nrNoSubstituters; + + /* Number of substitution goals we are/were waiting for that + failed because othey had unsubstitutable references. */ + unsigned int nrIncompleteClosure; + + /* Name of this goal for debugging purposes. */ + string name; + + /* Whether the goal is finished. */ + ExitCode exitCode; + + /* Exception containing an error message, if any. */ + std::optional ex; + + Goal(Worker & worker) : worker(worker) + { + nrFailed = nrNoSubstituters = nrIncompleteClosure = 0; + exitCode = ecBusy; + } + + virtual ~Goal() + { + trace("goal destroyed"); + } + + virtual void work() = 0; + + void addWaitee(GoalPtr waitee); + + virtual void waiteeDone(GoalPtr waitee, ExitCode result); + + virtual void handleChildOutput(int fd, const string & data) + { + abort(); + } + + virtual void handleEOF(int fd) + { + abort(); + } + + void trace(const FormatOrString & fs); + + string getName() + { + return name; + } + + /* Callback in case of a timeout. It should wake up its waiters, + get rid of any running child processes that are being monitored + by the worker (important!), etc. */ + virtual void timedOut(Error && ex) = 0; + + virtual string key() = 0; + + void amDone(ExitCode result, std::optional ex = {}); +}; + +typedef std::chrono::time_point steady_time_point; + + +/* A mapping used to remember for each child process to what goal it + belongs, and file descriptors for receiving log data and output + path creation commands. */ +struct Child +{ + WeakGoalPtr goal; + Goal * goal2; // ugly hackery + set fds; + bool respectTimeouts; + bool inBuildSlot; + steady_time_point lastOutput; /* time we last got output on stdout/stderr */ + steady_time_point timeStarted; +}; + + +/* The worker class. */ +class Worker +{ +private: + + /* Note: the worker should only have strong pointers to the + top-level goals. */ + + /* The top-level goals of the worker. */ + Goals topGoals; + + /* Goals that are ready to do some work. */ + WeakGoals awake; + + /* Goals waiting for a build slot. */ + WeakGoals wantingToBuild; + + /* Child processes currently running. */ + std::list children; + + /* Number of build slots occupied. This includes local builds and + substitutions but not remote builds via the build hook. */ + unsigned int nrLocalBuilds; + + /* Maps used to prevent multiple instantiations of a goal for the + same derivation / path. */ + WeakGoalMap derivationGoals; + WeakGoalMap substitutionGoals; + + /* Goals waiting for busy paths to be unlocked. */ + WeakGoals waitingForAnyGoal; + + /* Goals sleeping for a few seconds (polling a lock). */ + WeakGoals waitingForAWhile; + + /* Last time the goals in `waitingForAWhile' where woken up. */ + steady_time_point lastWokenUp; + + /* Cache for pathContentsGood(). */ + std::map pathContentsGoodCache; + +public: + + const Activity act; + const Activity actDerivations; + const Activity actSubstitutions; + + /* Set if at least one derivation had a BuildError (i.e. permanent + failure). */ + bool permanentFailure; + + /* Set if at least one derivation had a timeout. */ + bool timedOut; + + /* Set if at least one derivation fails with a hash mismatch. */ + bool hashMismatch; + + /* Set if at least one derivation is not deterministic in check mode. */ + bool checkMismatch; + + LocalStore & store; + + std::unique_ptr hook; + + uint64_t expectedBuilds = 0; + uint64_t doneBuilds = 0; + uint64_t failedBuilds = 0; + uint64_t runningBuilds = 0; + + uint64_t expectedSubstitutions = 0; + uint64_t doneSubstitutions = 0; + uint64_t failedSubstitutions = 0; + uint64_t runningSubstitutions = 0; + uint64_t expectedDownloadSize = 0; + uint64_t doneDownloadSize = 0; + uint64_t expectedNarSize = 0; + uint64_t doneNarSize = 0; + + /* Whether to ask the build hook if it can build a derivation. If + it answers with "decline-permanently", we don't try again. */ + bool tryBuildHook = true; + + Worker(LocalStore & store); + ~Worker(); + + /* Make a goal (with caching). */ + + /* derivation goal */ +private: + std::shared_ptr makeDerivationGoalCommon( + const StorePath & drvPath, const StringSet & wantedOutputs, + std::function()> mkDrvGoal); +public: + std::shared_ptr makeDerivationGoal( + const StorePath & drvPath, + const StringSet & wantedOutputs, BuildMode buildMode = bmNormal); + std::shared_ptr makeBasicDerivationGoal( + const StorePath & drvPath, const BasicDerivation & drv, + const StringSet & wantedOutputs, BuildMode buildMode = bmNormal); + + /* substitution goal */ + GoalPtr makeSubstitutionGoal(const StorePath & storePath, RepairFlag repair = NoRepair, std::optional ca = std::nullopt); + + /* Remove a dead goal. */ + void removeGoal(GoalPtr goal); + + /* Wake up a goal (i.e., there is something for it to do). */ + void wakeUp(GoalPtr goal); + + /* Return the number of local build and substitution processes + currently running (but not remote builds via the build + hook). */ + unsigned int getNrLocalBuilds(); + + /* Registers a running child process. `inBuildSlot' means that + the process counts towards the jobs limit. */ + void childStarted(GoalPtr goal, const set & fds, + bool inBuildSlot, bool respectTimeouts); + + /* Unregisters a running child process. `wakeSleepers' should be + false if there is no sense in waking up goals that are sleeping + because they can't run yet (e.g., there is no free build slot, + or the hook would still say `postpone'). */ + void childTerminated(Goal * goal, bool wakeSleepers = true); + + /* Put `goal' to sleep until a build slot becomes available (which + might be right away). */ + void waitForBuildSlot(GoalPtr goal); + + /* Wait for any goal to finish. Pretty indiscriminate way to + wait for some resource that some other goal is holding. */ + void waitForAnyGoal(GoalPtr goal); + + /* Wait for a few seconds and then retry this goal. Used when + waiting for a lock held by another process. This kind of + polling is inefficient, but POSIX doesn't really provide a way + to wait for multiple locks in the main select() loop. */ + void waitForAWhile(GoalPtr goal); + + /* Loop until the specified top-level goals have finished. */ + void run(const Goals & topGoals); + + /* Wait for input to become available. */ + void waitForInput(); + + unsigned int exitStatus(); + + /* Check whether the given valid path exists and has the right + contents. */ + bool pathContentsGood(const StorePath & path); + + void markContentsGood(const StorePath & path); + + void updateProgress() + { + actDerivations.progress(doneBuilds, expectedBuilds + doneBuilds, runningBuilds, failedBuilds); + actSubstitutions.progress(doneSubstitutions, expectedSubstitutions + doneSubstitutions, runningSubstitutions, failedSubstitutions); + act.setExpected(actFileTransfer, expectedDownloadSize + doneDownloadSize); + act.setExpected(actCopyPath, expectedNarSize + doneNarSize); + } +}; + +typedef enum {rpAccept, rpDecline, rpPostpone} HookReply; + +class SubstitutionGoal; + +/* Unless we are repairing, we don't both to test validity and just assume it, + so the choices are `Absent` or `Valid`. */ +enum struct PathStatus { + Corrupt, + Absent, + Valid, +}; + +struct InitialOutputStatus { + StorePath path; + PathStatus status; + /* Valid in the store, and additionally non-corrupt if we are repairing */ + bool isValid() const { + return status == PathStatus::Valid; + } + /* Merely present, allowed to be corrupt */ + bool isPresent() const { + return status == PathStatus::Corrupt + || status == PathStatus::Valid; + } +}; + +struct InitialOutput { + bool wanted; + std::optional known; +}; + +class DerivationGoal : public Goal +{ +private: + /* Whether to use an on-disk .drv file. */ + bool useDerivation; + + /* The path of the derivation. */ + StorePath drvPath; + + /* The specific outputs that we need to build. Empty means all of + them. */ + StringSet wantedOutputs; + + /* Whether additional wanted outputs have been added. */ + bool needRestart = false; + + /* Whether to retry substituting the outputs after building the + inputs. */ + bool retrySubstitution; + + /* The derivation stored at drvPath. */ + std::unique_ptr drv; + + std::unique_ptr parsedDrv; + + /* The remainder is state held during the build. */ + + /* Locks on (fixed) output paths. */ + PathLocks outputLocks; + + /* All input paths (that is, the union of FS closures of the + immediate input paths). */ + StorePathSet inputPaths; + + std::map initialOutputs; + + /* User selected for running the builder. */ + std::unique_ptr buildUser; + + /* The process ID of the builder. */ + Pid pid; + + /* The temporary directory. */ + Path tmpDir; + + /* The path of the temporary directory in the sandbox. */ + Path tmpDirInSandbox; + + /* File descriptor for the log file. */ + AutoCloseFD fdLogFile; + std::shared_ptr logFileSink, logSink; + + /* Number of bytes received from the builder's stdout/stderr. */ + unsigned long logSize; + + /* The most recent log lines. */ + std::list logTail; + + std::string currentLogLine; + size_t currentLogLinePos = 0; // to handle carriage return + + std::string currentHookLine; + + /* Pipe for the builder's standard output/error. */ + Pipe builderOut; + + /* Pipe for synchronising updates to the builder namespaces. */ + Pipe userNamespaceSync; + + /* The mount namespace of the builder, used to add additional + paths to the sandbox as a result of recursive Nix calls. */ + AutoCloseFD sandboxMountNamespace; + + /* On Linux, whether we're doing the build in its own user + namespace. */ + bool usingUserNamespace = true; + + /* The build hook. */ + std::unique_ptr hook; + + /* Whether we're currently doing a chroot build. */ + bool useChroot = false; + + Path chrootRootDir; + + /* RAII object to delete the chroot directory. */ + std::shared_ptr autoDelChroot; + + /* The sort of derivation we are building. */ + DerivationType derivationType; + + /* Whether to run the build in a private network namespace. */ + bool privateNetwork = false; + + typedef void (DerivationGoal::*GoalState)(); + GoalState state; + + /* Stuff we need to pass to initChild(). */ + struct ChrootPath { + Path source; + bool optional; + ChrootPath(Path source = "", bool optional = false) + : source(source), optional(optional) + { } + }; + typedef map DirsInChroot; // maps target path to source path + DirsInChroot dirsInChroot; + + typedef map Environment; + Environment env; + +#if __APPLE__ + typedef string SandboxProfile; + SandboxProfile additionalSandboxProfile; +#endif + + /* Hash rewriting. */ + StringMap inputRewrites, outputRewrites; + typedef map RedirectedOutputs; + RedirectedOutputs redirectedOutputs; + + /* The outputs paths used during the build. + + - Input-addressed derivations or fixed content-addressed outputs are + sometimes built when some of their outputs already exist, and can not + be hidden via sandboxing. We use temporary locations instead and + rewrite after the build. Otherwise the regular predetermined paths are + put here. + + - Floating content-addressed derivations do not know their final build + output paths until the outputs are hashed, so random locations are + used, and then renamed. The randomness helps guard against hidden + self-references. + */ + OutputPathMap scratchOutputs; + + /* The final output paths of the build. + + - For input-addressed derivations, always the precomputed paths + + - For content-addressed derivations, calcuated from whatever the hash + ends up being. (Note that fixed outputs derivations that produce the + "wrong" output still install that data under its true content-address.) + */ + OutputPathMap finalOutputs; + + BuildMode buildMode; + + /* If we're repairing without a chroot, there may be outputs that + are valid but corrupt. So we redirect these outputs to + temporary paths. */ + StorePathSet redirectedBadOutputs; + + BuildResult result; + + /* The current round, if we're building multiple times. */ + size_t curRound = 1; + + size_t nrRounds; + + /* Path registration info from the previous round, if we're + building multiple times. Since this contains the hash, it + allows us to compare whether two rounds produced the same + result. */ + std::map prevInfos; + + uid_t sandboxUid() { return usingUserNamespace ? 1000 : buildUser->getUID(); } + gid_t sandboxGid() { return usingUserNamespace ? 100 : buildUser->getGID(); } + + const static Path homeDir; + + std::unique_ptr> mcExpectedBuilds, mcRunningBuilds; + + std::unique_ptr act; + + /* Activity that denotes waiting for a lock. */ + std::unique_ptr actLock; + + std::map builderActivities; + + /* The remote machine on which we're building. */ + std::string machineName; + + /* The recursive Nix daemon socket. */ + AutoCloseFD daemonSocket; + + /* The daemon main thread. */ + std::thread daemonThread; + + /* The daemon worker threads. */ + std::vector daemonWorkerThreads; + + /* Paths that were added via recursive Nix calls. */ + StorePathSet addedPaths; + + /* Recursive Nix calls are only allowed to build or realize paths + in the original input closure or added via a recursive Nix call + (so e.g. you can't do 'nix-store -r /nix/store/' where + /nix/store/ is some arbitrary path in a binary cache). */ + bool isAllowed(const StorePath & path) + { + return inputPaths.count(path) || addedPaths.count(path); + } + + friend struct RestrictedStore; + +public: + DerivationGoal(const StorePath & drvPath, + const StringSet & wantedOutputs, Worker & worker, + BuildMode buildMode = bmNormal); + DerivationGoal(const StorePath & drvPath, const BasicDerivation & drv, + const StringSet & wantedOutputs, Worker & worker, + BuildMode buildMode = bmNormal); + ~DerivationGoal(); + + /* Whether we need to perform hash rewriting if there are valid output paths. */ + bool needsHashRewrite(); + + void timedOut(Error && ex) override; + + string key() override + { + /* Ensure that derivations get built in order of their name, + i.e. a derivation named "aardvark" always comes before + "baboon". And substitution goals always happen before + derivation goals (due to "b$"). */ + return "b$" + std::string(drvPath.name()) + "$" + worker.store.printStorePath(drvPath); + } + + void work() override; + + StorePath getDrvPath() + { + return drvPath; + } + + /* Add wanted outputs to an already existing derivation goal. */ + void addWantedOutputs(const StringSet & outputs); + + BuildResult getResult() { return result; } + +private: + /* The states. */ + void getDerivation(); + void loadDerivation(); + void haveDerivation(); + void outputsSubstitutionTried(); + void gaveUpOnSubstitution(); + void closureRepaired(); + void inputsRealised(); + void tryToBuild(); + void tryLocalBuild(); + void buildDone(); + + void resolvedFinished(); + + /* Is the build hook willing to perform the build? */ + HookReply tryBuildHook(); + + /* Start building a derivation. */ + void startBuilder(); + + /* Fill in the environment for the builder. */ + void initEnv(); + + /* Setup tmp dir location. */ + void initTmpDir(); + + /* Write a JSON file containing the derivation attributes. */ + void writeStructuredAttrs(); + + void startDaemon(); + + void stopDaemon(); + + /* Add 'path' to the set of paths that may be referenced by the + outputs, and make it appear in the sandbox. */ + void addDependency(const StorePath & path); + + /* Make a file owned by the builder. */ + void chownToBuilder(const Path & path); + + /* Run the builder's process. */ + void runChild(); + + friend int childEntry(void *); + + /* Check that the derivation outputs all exist and register them + as valid. */ + void registerOutputs(); + + /* Check that an output meets the requirements specified by the + 'outputChecks' attribute (or the legacy + '{allowed,disallowed}{References,Requisites}' attributes). */ + void checkOutputs(const std::map & outputs); + + /* Open a log file and a pipe to it. */ + Path openLogFile(); + + /* Close the log file. */ + void closeLogFile(); + + /* Delete the temporary directory, if we have one. */ + void deleteTmpDir(bool force); + + /* Callback used by the worker to write to the log. */ + void handleChildOutput(int fd, const string & data) override; + void handleEOF(int fd) override; + void flushLine(); + + /* Wrappers around the corresponding Store methods that first consult the + derivation. This is currently needed because when there is no drv file + there also is no DB entry. */ + std::map> queryPartialDerivationOutputMap(); + OutputPathMap queryDerivationOutputMap(); + + /* Return the set of (in)valid paths. */ + void checkPathValidity(); + + /* Forcibly kill the child process, if any. */ + void killChild(); + + /* Create alternative path calculated from but distinct from the + input, so we can avoid overwriting outputs (or other store paths) + that already exist. */ + StorePath makeFallbackPath(const StorePath & path); + /* Make a path to another based on the output name along with the + derivation hash. */ + /* FIXME add option to randomize, so we can audit whether our + rewrites caught everything */ + StorePath makeFallbackPath(std::string_view outputName); + + void repairClosure(); + + void started(); + + void done( + BuildResult::Status status, + std::optional ex = {}); + + StorePathSet exportReferences(const StorePathSet & storePaths); +}; + +class SubstitutionGoal : public Goal +{ + friend class Worker; + +private: + /* The store path that should be realised through a substitute. */ + StorePath storePath; + + /* The path the substituter refers to the path as. This will be + * different when the stores have different names. */ + std::optional subPath; + + /* The remaining substituters. */ + std::list> subs; + + /* The current substituter. */ + std::shared_ptr sub; + + /* Whether a substituter failed. */ + bool substituterFailed = false; + + /* Path info returned by the substituter's query info operation. */ + std::shared_ptr info; + + /* Pipe for the substituter's standard output. */ + Pipe outPipe; + + /* The substituter thread. */ + std::thread thr; + + std::promise promise; + + /* Whether to try to repair a valid path. */ + RepairFlag repair; + + /* Location where we're downloading the substitute. Differs from + storePath when doing a repair. */ + Path destPath; + + std::unique_ptr> maintainExpectedSubstitutions, + maintainRunningSubstitutions, maintainExpectedNar, maintainExpectedDownload; + + typedef void (SubstitutionGoal::*GoalState)(); + GoalState state; + + /* Content address for recomputing store path */ + std::optional ca; + +public: + SubstitutionGoal(const StorePath & storePath, Worker & worker, RepairFlag repair = NoRepair, std::optional ca = std::nullopt); + ~SubstitutionGoal(); + + void timedOut(Error && ex) override { abort(); }; + + string key() override + { + /* "a$" ensures substitution goals happen before derivation + goals. */ + return "a$" + std::string(storePath.name()) + "$" + worker.store.printStorePath(storePath); + } + + void work() override; + + /* The states. */ + void init(); + void tryNext(); + void gotInfo(); + void referencesValid(); + void tryToRun(); + void finished(); + + /* Callback used by the worker to write to the log. */ + void handleChildOutput(int fd, const string & data) override; + void handleEOF(int fd) override; + + StorePath getStorePath() { return storePath; } +}; + +struct HookInstance +{ + /* Pipes for talking to the build hook. */ + Pipe toHook; + + /* Pipe for the hook's standard output/error. */ + Pipe fromHook; + + /* Pipe for the builder's standard output/error. */ + Pipe builderOut; + + /* The process ID of the hook. */ + Pid pid; + + FdSink sink; + + std::map activities; + + HookInstance(); + + ~HookInstance(); +}; + + +void addToWeakGoals(WeakGoals & goals, GoalPtr p); + +} -- cgit v1.2.3