aboutsummaryrefslogtreecommitdiff
path: root/scripts/generate-patches.pl.in
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/generate-patches.pl.in')
-rwxr-xr-xscripts/generate-patches.pl.in429
1 files changed, 0 insertions, 429 deletions
diff --git a/scripts/generate-patches.pl.in b/scripts/generate-patches.pl.in
deleted file mode 100755
index 05bf65093..000000000
--- a/scripts/generate-patches.pl.in
+++ /dev/null
@@ -1,429 +0,0 @@
-#! @perl@ -w -I@libexecdir@/nix
-
-use strict;
-use File::Temp qw(tempdir);
-use readmanifest;
-
-
-# Some patch generations options.
-
-# Max size of NAR archives to generate patches for.
-my $maxNarSize = $ENV{"NIX_MAX_NAR_SIZE"};
-$maxNarSize = 100 * 1024 * 1024 if !defined $maxNarSize;
-
-# If patch is bigger than this fraction of full archive, reject.
-my $maxPatchFraction = $ENV{"NIX_PATCH_FRACTION"};
-$maxPatchFraction = 0.60 if !defined $maxPatchFraction;
-
-my $timeLimit = $ENV{"NIX_BSDIFF_TIME_LIMIT"};
-$timeLimit = 180 if !defined $timeLimit;
-
-
-die unless scalar @ARGV == 5;
-
-my $hashAlgo = "sha256";
-
-my $narDir = $ARGV[0];
-my $patchesDir = $ARGV[1];
-my $patchesURL = $ARGV[2];
-my $srcManifest = $ARGV[3];
-my $dstManifest = $ARGV[4];
-
-my $tmpDir = tempdir("nix-generate-patches.XXXXXX", CLEANUP => 1, TMPDIR => 1)
- or die "cannot create a temporary directory";
-
-#END { rmdir $tmpDir; }
-
-my %srcNarFiles;
-my %srcLocalPaths;
-my %srcPatches;
-
-my %dstNarFiles;
-my %dstLocalPaths;
-my %dstPatches;
-
-readManifest "$srcManifest",
- \%srcNarFiles, \%srcLocalPaths, \%srcPatches;
-
-readManifest "$dstManifest",
- \%dstNarFiles, \%dstLocalPaths, \%dstPatches;
-
-
-sub findOutputPaths {
- my $narFiles = shift;
-
- my %outPaths;
-
- foreach my $p (keys %{$narFiles}) {
-
- # Ignore derivations.
- next if ($p =~ /\.drv$/);
-
- # Ignore builders (too much ambiguity -- they're all called
- # `builder.sh').
- next if ($p =~ /\.sh$/);
- next if ($p =~ /\.patch$/);
-
- # Don't bother including tar files etc.
- next if ($p =~ /\.tar$/ || $p =~ /\.tar\.(gz|bz2|Z|lzma|xz)$/ || $p =~ /\.zip$/ || $p =~ /\.bin$/ || $p =~ /\.tgz$/ || $p =~ /\.rpm$/ || $p =~ /cvs-export$/ || $p =~ /fetchhg$/);
-
- $outPaths{$p} = 1;
- }
-
- return %outPaths;
-}
-
-print "finding src output paths...\n";
-my %srcOutPaths = findOutputPaths \%srcNarFiles;
-
-print "finding dst output paths...\n";
-my %dstOutPaths = findOutputPaths \%dstNarFiles;
-
-
-sub getNameVersion {
- my $p = shift;
- $p =~ /\/[0-9a-z]+((?:-[a-zA-Z][^\/-]*)+)([^\/]*)$/;
- my $name = $1;
- my $version = $2;
- return undef unless defined $name && defined $version;
- $name =~ s/^-//;
- $version =~ s/^-//;
- return ($name, $version);
-}
-
-
-# A quick hack to get a measure of the `distance' between two
-# versions: it's just the position of the first character that differs
-# (or 999 if they are the same).
-sub versionDiff {
- my $s = shift;
- my $t = shift;
- my $i;
- return 999 if $s eq $t;
- for ($i = 0; $i < length $s; $i++) {
- return $i if $i >= length $t or
- substr($s, $i, 1) ne substr($t, $i, 1);
- }
- return $i;
-}
-
-
-sub getNarBz2 {
- my $narFiles = shift;
- my $storePath = shift;
-
- my $narFileList = $$narFiles{$storePath};
- die "missing path $storePath" unless defined $narFileList;
-
- my $narFile = @{$narFileList}[0];
- die unless defined $narFile;
-
- $narFile->{url} =~ /\/([^\/]+)$/;
- die unless defined $1;
- return "$narDir/$1";
-}
-
-
-sub containsPatch {
- my $patches = shift;
- my $storePath = shift;
- my $basePath = shift;
- my $patchList = $$patches{$storePath};
- return 0 if !defined $patchList;
- my $found = 0;
- foreach my $patch (@{$patchList}) {
- # !!! baseHash might differ
- return 1 if $patch->{basePath} eq $basePath;
- }
- return 0;
-}
-
-
-# Compute the "weighted" number of uses of a path in the build graph.
-sub computeUses {
- my $narFiles = shift;
- my $path = shift;
-
- # Find the deriver of $path.
- return 1 unless defined $$narFiles{$path};
- my $deriver = @{$$narFiles{$path}}[0]->{deriver};
- return 1 unless defined $deriver && $deriver ne "";
-
-# print " DERIVER $deriver\n";
-
- # Optimisation: build the referrers graph from the references
- # graph.
- my %referrers;
- foreach my $q (keys %{$narFiles}) {
- my @refs = split " ", @{$$narFiles{$q}}[0]->{references};
- foreach my $r (@refs) {
- $referrers{$r} = [] unless defined $referrers{$r};
- push @{$referrers{$r}}, $q;
- }
- }
-
- # Determine the shortest path from $deriver to all other reachable
- # paths in the `referrers' graph.
-
- my %dist;
- $dist{$deriver} = 0;
-
- my @queue = ($deriver);
- my $pos = 0;
-
- while ($pos < scalar @queue) {
- my $p = $queue[$pos];
- $pos++;
-
- foreach my $q (@{$referrers{$p}}) {
- if (!defined $dist{$q}) {
- $dist{$q} = $dist{$p} + 1;
-# print " $q $dist{$q}\n";
- push @queue, $q;
- }
- }
- }
-
- my $wuse = 1.0;
- foreach my $user (keys %dist) {
- next if $user eq $deriver;
-# print " $user $dist{$user}\n";
- $wuse += 1.0 / 2.0**$dist{$user};
- }
-
-# print " XXX $path $wuse\n";
-
- return $wuse;
-}
-
-
-# For each output path in the destination, see if we need to / can
-# create a patch.
-
-print "creating patches...\n";
-
-foreach my $p (keys %dstOutPaths) {
-
- # If exactly the same path already exists in the source, skip it.
- next if defined $srcOutPaths{$p};
-
- print " $p\n";
-
- # If not, then we should find the paths in the source that are
- # `most' likely to be present on a system that wants to install
- # this path.
-
- (my $name, my $version) = getNameVersion $p;
- next unless defined $name && defined $version;
-
- my @closest = ();
- my $closestVersion;
- my $minDist = -1; # actually, larger means closer
-
- # Find all source paths with the same name.
-
- foreach my $q (keys %srcOutPaths) {
- (my $name2, my $version2) = getNameVersion $q;
- next unless defined $name2 && defined $version2;
-
- if ($name eq $name2) {
-
- my $srcSystem = @{$dstNarFiles{$p}}[0]->{system};
- my $dstSystem = @{$srcNarFiles{$q}}[0]->{system};
- if (defined $srcSystem && defined $dstSystem && $srcSystem ne $dstSystem) {
- print " SKIPPING $q due to different systems ($srcSystem vs. $dstSystem)\n";
- next;
- }
-
- # If the sizes differ too much, then skip. This
- # disambiguates between, e.g., a real component and a
- # wrapper component (cf. Firefox in Nixpkgs).
- my $srcSize = @{$srcNarFiles{$q}}[0]->{size};
- my $dstSize = @{$dstNarFiles{$p}}[0]->{size};
- my $ratio = $srcSize / $dstSize;
- $ratio = 1 / $ratio if $ratio < 1;
-# print " SIZE $srcSize $dstSize $ratio $q\n";
-
- if ($ratio >= 3) {
- print " SKIPPING $q due to size ratio $ratio ($srcSize vs. $dstSize)\n";
- next;
- }
-
- # If the numbers of weighted uses differ too much, then
- # skip. This disambiguates between, e.g., the bootstrap
- # GCC and the final GCC in Nixpkgs.
-# my $srcUses = computeUses \%srcNarFiles, $q;
-# my $dstUses = computeUses \%dstNarFiles, $p;
-# $ratio = $srcUses / $dstUses;
-# $ratio = 1 / $ratio if $ratio < 1;
-# print " USE $srcUses $dstUses $ratio $q\n";
-
-# if ($ratio >= 2) {
-# print " SKIPPING $q due to use ratio $ratio ($srcUses $dstUses)\n";
-# next;
-# }
-
- # If there are multiple matching names, include the ones
- # with the closest version numbers.
- my $dist = versionDiff $version, $version2;
- if ($dist > $minDist) {
- $minDist = $dist;
- @closest = ($q);
- $closestVersion = $version2;
- } elsif ($dist == $minDist) {
- push @closest, $q;
- }
- }
- }
-
- if (scalar(@closest) == 0) {
- print " NO BASE: $p\n";
- next;
- }
-
- foreach my $closest (@closest) {
-
- # Generate a patch between $closest and $p.
- print " $p <- $closest\n";
-
- # If the patch already exists, skip it.
- if (containsPatch(\%srcPatches, $p, $closest) ||
- containsPatch(\%dstPatches, $p, $closest))
- {
- print " skipping, already exists\n";
- next;
- }
-
-# next;
-
- my $srcNarBz2 = getNarBz2 \%srcNarFiles, $closest;
- my $dstNarBz2 = getNarBz2 \%dstNarFiles, $p;
-
- if (! -f $srcNarBz2) {
- warn "patch source archive $srcNarBz2 is missing\n";
- next;
- }
-
- system("@bunzip2@ < $srcNarBz2 > $tmpDir/A") == 0
- or die "cannot unpack $srcNarBz2";
-
- if ((stat "$tmpDir/A")[7] >= $maxNarSize) {
- print " skipping, source is too large\n";
- next;
- }
-
- system("@bunzip2@ < $dstNarBz2 > $tmpDir/B") == 0
- or die "cannot unpack $dstNarBz2";
-
- if ((stat "$tmpDir/B")[7] >= $maxNarSize) {
- print " skipping, destination is too large\n";
- next;
- }
-
- my $time1 = time();
- my $res = system("ulimit -t $timeLimit; @libexecdir@/bsdiff $tmpDir/A $tmpDir/B $tmpDir/DIFF");
- my $time2 = time();
- if ($res) {
- warn "binary diff computation aborted after ", $time2 - $time1, " seconds\n";
- next;
- }
-
- my $baseHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/A` or die;
- chomp $baseHash;
-
- my $narHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/B` or die;
- chomp $narHash;
-
- my $narDiffHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/DIFF` or die;
- chomp $narDiffHash;
-
- my $narDiffSize = (stat "$tmpDir/DIFF")[7];
- my $dstNarBz2Size = (stat $dstNarBz2)[7];
-
- print " size $narDiffSize; full size $dstNarBz2Size; ", $time2 - $time1, " seconds\n";
-
- if ($narDiffSize >= $dstNarBz2Size) {
- print " rejecting; patch bigger than full archive\n";
- next;
- }
-
- if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) {
- print " rejecting; patch too large relative to full archive\n";
- next;
- }
-
- my $finalName =
- "$narDiffHash.nar-bsdiff";
-
- if (-e "$patchesDir/$finalName") {
- print " not copying, already exists\n";
- }
-
- else {
-
- system("cp '$tmpDir/DIFF' '$patchesDir/$finalName.tmp'") == 0
- or die "cannot copy diff";
-
- rename("$patchesDir/$finalName.tmp", "$patchesDir/$finalName")
- or die "cannot rename $patchesDir/$finalName.tmp";
-
- }
-
- # Add the patch to the manifest.
- addPatch \%dstPatches, $p,
- { url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash"
- , size => $narDiffSize, basePath => $closest, baseHash => "$hashAlgo:$baseHash"
- , narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff"
- }, 0;
- }
-}
-
-
-# Add in any potentially useful patches in the source (namely, those
-# patches that produce either paths in the destination or paths that
-# can be used as the base for other useful patches).
-
-print "propagating patches...\n";
-
-my $changed;
-do {
- # !!! we repeat this to reach the transitive closure; inefficient
- $changed = 0;
-
- print "loop\n";
-
- my %dstBasePaths;
- foreach my $q (keys %dstPatches) {
- foreach my $patch (@{$dstPatches{$q}}) {
- $dstBasePaths{$patch->{basePath}} = 1;
- }
- }
-
- foreach my $p (keys %srcPatches) {
- my $patchList = $srcPatches{$p};
-
- my $include = 0;
-
- # Is path $p included in the destination? If so, include
- # patches that produce it.
- $include = 1 if defined $dstNarFiles{$p};
-
- # Is path $p a path that serves as a base for paths in the
- # destination? If so, include patches that produce it.
- # !!! check baseHash
- $include = 1 if defined $dstBasePaths{$p};
-
- if ($include) {
- foreach my $patch (@{$patchList}) {
- $changed = 1 if addPatch \%dstPatches, $p, $patch;
- }
- }
-
- }
-
-} while $changed;
-
-
-# Rewrite the manifest of the destination (with the new patches).
-writeManifest "${dstManifest}",
- \%dstNarFiles, \%dstPatches;