aboutsummaryrefslogtreecommitdiff
path: root/scripts/build-remote.pl.in
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/build-remote.pl.in')
-rwxr-xr-xscripts/build-remote.pl.in305
1 files changed, 167 insertions, 138 deletions
diff --git a/scripts/build-remote.pl.in b/scripts/build-remote.pl.in
index c440b6a0f..f9bff9c41 100755
--- a/scripts/build-remote.pl.in
+++ b/scripts/build-remote.pl.in
@@ -4,6 +4,7 @@ use Fcntl ':flock';
use English '-no_match_vars';
use IO::Handle;
use ssh qw/sshOpts openSSHConnection/;
+no warnings('once');
# General operation:
@@ -31,57 +32,22 @@ $ENV{"DISPLAY"} = "";
$ENV{"SSH_ASKPASS"} = "";
-my $loadIncreased = 0;
-
-my ($amWilling, $localSystem, $neededSystem, $drvPath, $maxSilentTime) = @ARGV;
-$maxSilentTime = 0 unless defined $maxSilentTime;
-
sub sendReply {
my $reply = shift;
print STDERR "# $reply\n";
}
-sub decline {
- sendReply "decline";
- exit 0;
-}
-
-my $currentLoad = $ENV{"NIX_CURRENT_LOAD"};
-decline unless defined $currentLoad;
-mkdir $currentLoad, 0777 or die unless -d $currentLoad;
+sub all { $_ || return 0 for @_; 1 }
-my $conf = $ENV{"NIX_REMOTE_SYSTEMS"};
-decline if !defined $conf || ! -e $conf;
-
-my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);
-
-
-# Read the list of machines.
-my @machines;
-open CONF, "< $conf" or die;
-
-while (<CONF>) {
- chomp;
- s/\#.*$//g;
- next if /^\s*$/;
- /^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)(\s+([0-9\.]+))?\s*$/ or die;
- push @machines,
- { hostName => $1
- , systemTypes => [split(/,/, $2)]
- , sshKeys => $3
- , maxJobs => $4
- , speedFactor => 1.0 * ($6 || 1)
- , enabled => 1
- };
-}
-close CONF;
+# Initialisation.
+my $loadIncreased = 0;
+my ($localSystem, $maxSilentTime, $printBuildTrace) = @ARGV;
+$maxSilentTime = 0 unless defined $maxSilentTime;
-# Acquire the exclusive lock on $currentLoad/main-lock.
-my $mainLock = "$currentLoad/main-lock";
-open MAINLOCK, ">>$mainLock" or die;
-flock(MAINLOCK, LOCK_EX) or die;
+my $currentLoad = $ENV{"NIX_CURRENT_LOAD"};
+my $conf = $ENV{"NIX_REMOTE_SYSTEMS"};
sub openSlotLock {
@@ -91,134 +57,189 @@ sub openSlotLock {
open $slotLock, ">>$slotLockFn" or die;
return $slotLock;
}
-
-my $hostName;
-my $slotLock;
-while (1) {
+# Read the list of machines.
+my @machines;
+if (defined $conf && -e $conf) {
+ open CONF, "< $conf" or die;
+ while (<CONF>) {
+ chomp;
+ s/\#.*$//g;
+ next if /^\s*$/;
+ my @tokens = split /\s/, $_;
+ push @machines,
+ { hostName => $tokens[0]
+ , systemTypes => [ split(/,/, $tokens[1]) ]
+ , sshKeys => $tokens[2]
+ , maxJobs => int($tokens[3])
+ , speedFactor => 1.0 * (defined $tokens[4] ? int($tokens[4]) : 1)
+ , features => [ split(/,/, $tokens[5] || "") ]
+ , enabled => 1
+ };
+ }
+ close CONF;
+}
+
+
+
+# Wait for the calling process to ask us whether we can build some derivation.
+my ($drvPath, $hostName, $slotLock);
+
+REQ: while (1) {
+ $_ = <STDIN> || exit 0;
+ my ($amWilling, $neededSystem);
+ ($amWilling, $neededSystem, $drvPath, $requiredFeatures) = split;
+ my @requiredFeatures = split /,/, $requiredFeatures;
+
+ my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);
+
+ if (!defined $currentLoad) {
+ sendReply "decline";
+ next;
+ }
- # Find all machine that can execute this build, i.e., that support
- # builds for the given platform and are not at their job limit.
- my $rightType = 0;
- my @available = ();
- LOOP: foreach my $cur (@machines) {
- if ($cur->{enabled} && grep { $neededSystem eq $_ } @{$cur->{systemTypes}}) {
- $rightType = 1;
-
- # We have a machine of the right type. Determine the load on
- # the machine.
- my $slot = 0;
- my $load = 0;
- my $free;
- while ($slot < $cur->{maxJobs}) {
- my $slotLock = openSlotLock($cur, $slot);
- if (flock($slotLock, LOCK_EX | LOCK_NB)) {
- $free = $slot unless defined $free;
- flock($slotLock, LOCK_UN) or die;
- } else {
- $load++;
+ # Acquire the exclusive lock on $currentLoad/main-lock.
+ mkdir $currentLoad, 0777 or die unless -d $currentLoad;
+ my $mainLock = "$currentLoad/main-lock";
+ open MAINLOCK, ">>$mainLock" or die;
+ flock(MAINLOCK, LOCK_EX) or die;
+
+
+ while (1) {
+ # Find all machine that can execute this build, i.e., that
+ # support builds for the given platform and features, and are
+ # not at their job limit.
+ my $rightType = 0;
+ my @available = ();
+ LOOP: foreach my $cur (@machines) {
+ if ($cur->{enabled}
+ && (grep { $neededSystem eq $_ } @{$cur->{systemTypes}})
+ && all(map { my $f = $_; 0 != grep { $f eq $_ } @{$cur->{features}} } @requiredFeatures))
+ {
+ $rightType = 1;
+
+ # We have a machine of the right type. Determine the load on
+ # the machine.
+ my $slot = 0;
+ my $load = 0;
+ my $free;
+ while ($slot < $cur->{maxJobs}) {
+ my $slotLock = openSlotLock($cur, $slot);
+ if (flock($slotLock, LOCK_EX | LOCK_NB)) {
+ $free = $slot unless defined $free;
+ flock($slotLock, LOCK_UN) or die;
+ } else {
+ $load++;
+ }
+ close $slotLock;
+ $slot++;
}
- close $slotLock;
- $slot++;
+
+ push @available, { machine => $cur, load => $load, free => $free }
+ if $load < $cur->{maxJobs};
}
-
- push @available, { machine => $cur, load => $load, free => $free }
- if $load < $cur->{maxJobs};
}
- }
- if (defined $ENV{NIX_DEBUG_HOOK}) {
- print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
- foreach @available;
- }
+ if (defined $ENV{NIX_DEBUG_HOOK}) {
+ print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
+ foreach @available;
+ }
- # Didn't find any available machine? Then decline or postpone.
- if (scalar @available == 0) {
- # Postpone if we have a machine of the right type, except if the
- # local system can and wants to do the build.
- if ($rightType && !$canBuildLocally) {
- sendReply "postpone";
- exit 0;
- } else {
- decline;
+ # Didn't find any available machine? Then decline or postpone.
+ if (scalar @available == 0) {
+ # Postpone if we have a machine of the right type, except
+ # if the local system can and wants to do the build.
+ if ($rightType && !$canBuildLocally) {
+ sendReply "postpone";
+ } else {
+ sendReply "decline";
+ }
+ close MAINLOCK;
+ next REQ;
}
- }
-
- # Prioritise the available machines as follows:
- # - First by load divided by speed factor, rounded to the nearest
- # integer. This causes fast machines to be preferred over slow
- # machines with similar loads.
- # - Then by speed factor.
- # - Finally by load.
- sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
- @available = sort
- { lf($a) <=> lf($b)
- || $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
- || $a->{load} <=> $b->{load}
- } @available;
+ # Prioritise the available machines as follows:
+ # - First by load divided by speed factor, rounded to the nearest
+ # integer. This causes fast machines to be preferred over slow
+ # machines with similar loads.
+ # - Then by speed factor.
+ # - Finally by load.
+ sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
+ @available = sort
+ { lf($a) <=> lf($b)
+ || $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
+ || $a->{load} <=> $b->{load}
+ } @available;
- # Select the best available machine and lock a free slot.
- my $selected = $available[0];
- my $machine = $selected->{machine};
- $slotLock = openSlotLock($machine, $selected->{free});
- flock($slotLock, LOCK_EX | LOCK_NB) or die;
- utime undef, undef, $slotLock;
+ # Select the best available machine and lock a free slot.
+ my $selected = $available[0];
+ my $machine = $selected->{machine};
+
+ $slotLock = openSlotLock($machine, $selected->{free});
+ flock($slotLock, LOCK_EX | LOCK_NB) or die;
+ utime undef, undef, $slotLock;
- close MAINLOCK;
+ close MAINLOCK;
- # Connect to the selected machine.
- @sshOpts = ("-i", $machine->{sshKeys}, "-x");
- $hostName = $machine->{hostName};
- last if openSSHConnection $hostName;
+ # Connect to the selected machine.
+ @sshOpts = ("-i", $machine->{sshKeys}, "-x");
+ $hostName = $machine->{hostName};
+ last REQ if openSSHConnection $hostName;
- warn "unable to open SSH connection to $hostName, trying other available machines...\n";
- $machine->{enabled} = 0;
+ warn "unable to open SSH connection to $hostName, trying other available machines...\n";
+ $machine->{enabled} = 0;
+ }
}
# Tell Nix we've accepted the build.
sendReply "accept";
-my $x = <STDIN>;
-chomp $x;
-
-if ($x ne "okay") {
- exit 0;
-}
+my @inputs = split /\s/, readline(STDIN);
+my @outputs = split /\s/, readline(STDIN);
-# Do the actual build.
print STDERR "building `$drvPath' on `$hostName'\n";
+print STDERR "@ build-remote $drvPath $hostName\n" if $printBuildTrace;
-my $inputs = `cat inputs`; die if ($? != 0);
-$inputs =~ s/\n/ /g;
-
-my $outputs = `cat outputs`; die if ($? != 0);
-$outputs =~ s/\n/ /g;
-
-print "copying inputs...\n";
my $maybeSign = "";
$maybeSign = "--sign" if -e "/nix/etc/nix/signing-key.sec";
-system("NIX_SSHOPTS=\"@sshOpts\" @bindir@/nix-copy-closure $hostName $maybeSign $drvPath $inputs") == 0
+
+# Register the derivation as a temporary GC root. Note that $PPID is
+# the PID of the remote SSH process, which, due to the use of a
+# persistant SSH connection, should be the same across all remote
+# command invocations for this session.
+my $rootsDir = "@localstatedir@/nix/gcroots/tmp";
+system("ssh $hostName @sshOpts 'mkdir -m 1777 -p $rootsDir; ln -sfn $drvPath $rootsDir/\$PPID.drv'");
+
+sub removeRoots {
+ system("ssh $hostName @sshOpts 'rm -f $rootsDir/\$PPID.drv $rootsDir/\$PPID.out'");
+}
+
+
+# Copy the derivation and its dependencies to the build machine.
+system("NIX_SSHOPTS=\"@sshOpts\" @bindir@/nix-copy-closure $hostName $maybeSign $drvPath @inputs") == 0
or die "cannot copy inputs to $hostName: $?";
-print "building...\n";
-my $buildFlags = "--max-silent-time $maxSilentTime --fallback";
+# Perform the build.
+my $buildFlags = "--max-silent-time $maxSilentTime --fallback --add-root $rootsDir/\$PPID.out --option verbosity 0";
-# `-tt' forces allocation of a pseudo-terminal. This is required to
-# make the remote nix-store process receive a signal when the
-# connection dies. Without it, the remote process might continue to
-# run indefinitely (that is, until it next tries to write to
-# stdout/stderr).
-if (system("ssh $hostName @sshOpts -tt 'nix-store -r $drvPath $buildFlags > /dev/null'") != 0) {
+# We let the remote side kill its process group when the connection is
+# closed unexpectedly. This is necessary to ensure that no processes
+# are left running on the remote system if the local Nix process is
+# killed. (SSH itself doesn't kill child processes if the connection
+# is interrupted unless the `-tt' flag is used to force a pseudo-tty,
+# in which case every child receives SIGHUP; however, `-tt' doesn't
+# work on some platforms when connection sharing is used.)
+pipe STDIN, DUMMY; # make sure we have a readable STDIN
+if (system("ssh $hostName @sshOpts '(read; kill -INT -\$\$) <&0 & nix-store -r $drvPath $buildFlags > /dev/null' 2>&4") != 0) {
# If we couldn't run ssh or there was an ssh problem (indicated by
# exit code 255), then we return exit code 1; otherwise we assume
# that the builder failed, which we indicate to Nix using exit
@@ -226,15 +247,23 @@ if (system("ssh $hostName @sshOpts -tt 'nix-store -r $drvPath $buildFlags > /dev
# the first is a transient failure and the latter is permanent.
my $res = $? == -1 || ($? >> 8) == 255 ? 1 : 100;
print STDERR "build of `$drvPath' on `$hostName' failed with exit code $?\n";
+ removeRoots;
exit $res;
}
-print "build of `$drvPath' on `$hostName' succeeded\n";
+#print "build of `$drvPath' on `$hostName' succeeded\n";
-foreach my $output (split '\n', $outputs) {
+
+# Copy the output from the build machine.
+foreach my $output (@outputs) {
my $maybeSignRemote = "";
$maybeSignRemote = "--sign" if $UID != 0;
- system("ssh $hostName @sshOpts 'nix-store --export $maybeSignRemote $output' | @bindir@/nix-store --import > /dev/null") == 0
+ system("ssh $hostName @sshOpts 'nix-store --export $maybeSignRemote $output'" .
+ "| NIX_HELD_LOCKS=$output @bindir@/nix-store --import > /dev/null") == 0
or die "cannot copy $output from $hostName: $?";
}
+
+
+# Get rid of the temporary GC roots.
+removeRoots;