aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoreldritch horrors <pennae@lix.systems>2024-05-26 17:45:36 +0200
committereldritch horrors <pennae@lix.systems>2024-05-26 20:57:09 +0000
commited6b3165ea3de4e1dda93ce0f23ff61079e4fe0b (patch)
tree3794ddaac97871be3bb82ed5c9e847e4311c3cbb /src
parent742c62a6eb3d5411a0ab748387060c28ca16a25a (diff)
worker: process timeouts first, and exclusively
check goals for timeouts first, and their activity fds only if no timeout has occurred. checking for timeouts *after* activity sets us up for assertion failures by running multiple build completion notifiers, the first of which will kill/reap the the goal process and consuming the Pid instance. when the second notifier attempts to do the same it will core dump with an assertion failure in Pid and take down not only the single goal, but the entire daemon and all goals it was building. luckily this is rare in practice since it requires a build to both finish and time out at the same time. writing a test for this is not feasible due to how much it relies on scheduling to actually trigger the underlying bug, but on idle machines it can usually be triggered by running multiple sleeping builds with timeout set to the sleep duration and `--keep-going`: nix-build --timeout 10 --builders '' --keep-going -E ' with import <nixpkgs> {}; builtins.genList (i: runCommand "foo-${toString i}" {} "sleep 10") 100 ' Change-Id: I394d36b2e5ffb909cf8a19977d569bbdb71cb67b
Diffstat (limited to 'src')
-rw-r--r--src/libstore/build/worker.cc42
1 files changed, 22 insertions, 20 deletions
diff --git a/src/libstore/build/worker.cc b/src/libstore/build/worker.cc
index 83167b0f3..04be0da99 100644
--- a/src/libstore/build/worker.cc
+++ b/src/libstore/build/worker.cc
@@ -429,6 +429,28 @@ void Worker::waitForInput()
GoalPtr goal = j->goal.lock();
assert(goal);
+ if (goal->exitCode == Goal::ecBusy &&
+ 0 != settings.maxSilentTime &&
+ j->respectTimeouts &&
+ after - j->lastOutput >= std::chrono::seconds(settings.maxSilentTime))
+ {
+ goal->timedOut(Error(
+ "%1% timed out after %2% seconds of silence",
+ goal->getName(), settings.maxSilentTime));
+ continue;
+ }
+
+ else if (goal->exitCode == Goal::ecBusy &&
+ 0 != settings.buildTimeout &&
+ j->respectTimeouts &&
+ after - j->timeStarted >= std::chrono::seconds(settings.buildTimeout))
+ {
+ goal->timedOut(Error(
+ "%1% timed out after %2% seconds",
+ goal->getName(), settings.buildTimeout));
+ continue;
+ }
+
std::set<int> fds2(j->fds);
std::vector<unsigned char> buffer(4096);
for (auto & k : fds2) {
@@ -455,26 +477,6 @@ void Worker::waitForInput()
}
}
}
-
- if (goal->exitCode == Goal::ecBusy &&
- 0 != settings.maxSilentTime &&
- j->respectTimeouts &&
- after - j->lastOutput >= std::chrono::seconds(settings.maxSilentTime))
- {
- goal->timedOut(Error(
- "%1% timed out after %2% seconds of silence",
- goal->getName(), settings.maxSilentTime));
- }
-
- else if (goal->exitCode == Goal::ecBusy &&
- 0 != settings.buildTimeout &&
- j->respectTimeouts &&
- after - j->timeStarted >= std::chrono::seconds(settings.buildTimeout))
- {
- goal->timedOut(Error(
- "%1% timed out after %2% seconds",
- goal->getName(), settings.buildTimeout));
- }
}
if (!waitingForAWhile.empty() && lastWokenUp + std::chrono::seconds(settings.pollInterval) <= after) {