diff options
Diffstat (limited to 'scripts/generate-patches.pl.in')
-rwxr-xr-x | scripts/generate-patches.pl.in | 429 |
1 files changed, 0 insertions, 429 deletions
diff --git a/scripts/generate-patches.pl.in b/scripts/generate-patches.pl.in deleted file mode 100755 index 05bf65093..000000000 --- a/scripts/generate-patches.pl.in +++ /dev/null @@ -1,429 +0,0 @@ -#! @perl@ -w -I@libexecdir@/nix - -use strict; -use File::Temp qw(tempdir); -use readmanifest; - - -# Some patch generations options. - -# Max size of NAR archives to generate patches for. -my $maxNarSize = $ENV{"NIX_MAX_NAR_SIZE"}; -$maxNarSize = 100 * 1024 * 1024 if !defined $maxNarSize; - -# If patch is bigger than this fraction of full archive, reject. -my $maxPatchFraction = $ENV{"NIX_PATCH_FRACTION"}; -$maxPatchFraction = 0.60 if !defined $maxPatchFraction; - -my $timeLimit = $ENV{"NIX_BSDIFF_TIME_LIMIT"}; -$timeLimit = 180 if !defined $timeLimit; - - -die unless scalar @ARGV == 5; - -my $hashAlgo = "sha256"; - -my $narDir = $ARGV[0]; -my $patchesDir = $ARGV[1]; -my $patchesURL = $ARGV[2]; -my $srcManifest = $ARGV[3]; -my $dstManifest = $ARGV[4]; - -my $tmpDir = tempdir("nix-generate-patches.XXXXXX", CLEANUP => 1, TMPDIR => 1) - or die "cannot create a temporary directory"; - -#END { rmdir $tmpDir; } - -my %srcNarFiles; -my %srcLocalPaths; -my %srcPatches; - -my %dstNarFiles; -my %dstLocalPaths; -my %dstPatches; - -readManifest "$srcManifest", - \%srcNarFiles, \%srcLocalPaths, \%srcPatches; - -readManifest "$dstManifest", - \%dstNarFiles, \%dstLocalPaths, \%dstPatches; - - -sub findOutputPaths { - my $narFiles = shift; - - my %outPaths; - - foreach my $p (keys %{$narFiles}) { - - # Ignore derivations. - next if ($p =~ /\.drv$/); - - # Ignore builders (too much ambiguity -- they're all called - # `builder.sh'). - next if ($p =~ /\.sh$/); - next if ($p =~ /\.patch$/); - - # Don't bother including tar files etc. - next if ($p =~ /\.tar$/ || $p =~ /\.tar\.(gz|bz2|Z|lzma|xz)$/ || $p =~ /\.zip$/ || $p =~ /\.bin$/ || $p =~ /\.tgz$/ || $p =~ /\.rpm$/ || $p =~ /cvs-export$/ || $p =~ /fetchhg$/); - - $outPaths{$p} = 1; - } - - return %outPaths; -} - -print "finding src output paths...\n"; -my %srcOutPaths = findOutputPaths \%srcNarFiles; - -print "finding dst output paths...\n"; -my %dstOutPaths = findOutputPaths \%dstNarFiles; - - -sub getNameVersion { - my $p = shift; - $p =~ /\/[0-9a-z]+((?:-[a-zA-Z][^\/-]*)+)([^\/]*)$/; - my $name = $1; - my $version = $2; - return undef unless defined $name && defined $version; - $name =~ s/^-//; - $version =~ s/^-//; - return ($name, $version); -} - - -# A quick hack to get a measure of the `distance' between two -# versions: it's just the position of the first character that differs -# (or 999 if they are the same). -sub versionDiff { - my $s = shift; - my $t = shift; - my $i; - return 999 if $s eq $t; - for ($i = 0; $i < length $s; $i++) { - return $i if $i >= length $t or - substr($s, $i, 1) ne substr($t, $i, 1); - } - return $i; -} - - -sub getNarBz2 { - my $narFiles = shift; - my $storePath = shift; - - my $narFileList = $$narFiles{$storePath}; - die "missing path $storePath" unless defined $narFileList; - - my $narFile = @{$narFileList}[0]; - die unless defined $narFile; - - $narFile->{url} =~ /\/([^\/]+)$/; - die unless defined $1; - return "$narDir/$1"; -} - - -sub containsPatch { - my $patches = shift; - my $storePath = shift; - my $basePath = shift; - my $patchList = $$patches{$storePath}; - return 0 if !defined $patchList; - my $found = 0; - foreach my $patch (@{$patchList}) { - # !!! baseHash might differ - return 1 if $patch->{basePath} eq $basePath; - } - return 0; -} - - -# Compute the "weighted" number of uses of a path in the build graph. -sub computeUses { - my $narFiles = shift; - my $path = shift; - - # Find the deriver of $path. - return 1 unless defined $$narFiles{$path}; - my $deriver = @{$$narFiles{$path}}[0]->{deriver}; - return 1 unless defined $deriver && $deriver ne ""; - -# print " DERIVER $deriver\n"; - - # Optimisation: build the referrers graph from the references - # graph. - my %referrers; - foreach my $q (keys %{$narFiles}) { - my @refs = split " ", @{$$narFiles{$q}}[0]->{references}; - foreach my $r (@refs) { - $referrers{$r} = [] unless defined $referrers{$r}; - push @{$referrers{$r}}, $q; - } - } - - # Determine the shortest path from $deriver to all other reachable - # paths in the `referrers' graph. - - my %dist; - $dist{$deriver} = 0; - - my @queue = ($deriver); - my $pos = 0; - - while ($pos < scalar @queue) { - my $p = $queue[$pos]; - $pos++; - - foreach my $q (@{$referrers{$p}}) { - if (!defined $dist{$q}) { - $dist{$q} = $dist{$p} + 1; -# print " $q $dist{$q}\n"; - push @queue, $q; - } - } - } - - my $wuse = 1.0; - foreach my $user (keys %dist) { - next if $user eq $deriver; -# print " $user $dist{$user}\n"; - $wuse += 1.0 / 2.0**$dist{$user}; - } - -# print " XXX $path $wuse\n"; - - return $wuse; -} - - -# For each output path in the destination, see if we need to / can -# create a patch. - -print "creating patches...\n"; - -foreach my $p (keys %dstOutPaths) { - - # If exactly the same path already exists in the source, skip it. - next if defined $srcOutPaths{$p}; - - print " $p\n"; - - # If not, then we should find the paths in the source that are - # `most' likely to be present on a system that wants to install - # this path. - - (my $name, my $version) = getNameVersion $p; - next unless defined $name && defined $version; - - my @closest = (); - my $closestVersion; - my $minDist = -1; # actually, larger means closer - - # Find all source paths with the same name. - - foreach my $q (keys %srcOutPaths) { - (my $name2, my $version2) = getNameVersion $q; - next unless defined $name2 && defined $version2; - - if ($name eq $name2) { - - my $srcSystem = @{$dstNarFiles{$p}}[0]->{system}; - my $dstSystem = @{$srcNarFiles{$q}}[0]->{system}; - if (defined $srcSystem && defined $dstSystem && $srcSystem ne $dstSystem) { - print " SKIPPING $q due to different systems ($srcSystem vs. $dstSystem)\n"; - next; - } - - # If the sizes differ too much, then skip. This - # disambiguates between, e.g., a real component and a - # wrapper component (cf. Firefox in Nixpkgs). - my $srcSize = @{$srcNarFiles{$q}}[0]->{size}; - my $dstSize = @{$dstNarFiles{$p}}[0]->{size}; - my $ratio = $srcSize / $dstSize; - $ratio = 1 / $ratio if $ratio < 1; -# print " SIZE $srcSize $dstSize $ratio $q\n"; - - if ($ratio >= 3) { - print " SKIPPING $q due to size ratio $ratio ($srcSize vs. $dstSize)\n"; - next; - } - - # If the numbers of weighted uses differ too much, then - # skip. This disambiguates between, e.g., the bootstrap - # GCC and the final GCC in Nixpkgs. -# my $srcUses = computeUses \%srcNarFiles, $q; -# my $dstUses = computeUses \%dstNarFiles, $p; -# $ratio = $srcUses / $dstUses; -# $ratio = 1 / $ratio if $ratio < 1; -# print " USE $srcUses $dstUses $ratio $q\n"; - -# if ($ratio >= 2) { -# print " SKIPPING $q due to use ratio $ratio ($srcUses $dstUses)\n"; -# next; -# } - - # If there are multiple matching names, include the ones - # with the closest version numbers. - my $dist = versionDiff $version, $version2; - if ($dist > $minDist) { - $minDist = $dist; - @closest = ($q); - $closestVersion = $version2; - } elsif ($dist == $minDist) { - push @closest, $q; - } - } - } - - if (scalar(@closest) == 0) { - print " NO BASE: $p\n"; - next; - } - - foreach my $closest (@closest) { - - # Generate a patch between $closest and $p. - print " $p <- $closest\n"; - - # If the patch already exists, skip it. - if (containsPatch(\%srcPatches, $p, $closest) || - containsPatch(\%dstPatches, $p, $closest)) - { - print " skipping, already exists\n"; - next; - } - -# next; - - my $srcNarBz2 = getNarBz2 \%srcNarFiles, $closest; - my $dstNarBz2 = getNarBz2 \%dstNarFiles, $p; - - if (! -f $srcNarBz2) { - warn "patch source archive $srcNarBz2 is missing\n"; - next; - } - - system("@bunzip2@ < $srcNarBz2 > $tmpDir/A") == 0 - or die "cannot unpack $srcNarBz2"; - - if ((stat "$tmpDir/A")[7] >= $maxNarSize) { - print " skipping, source is too large\n"; - next; - } - - system("@bunzip2@ < $dstNarBz2 > $tmpDir/B") == 0 - or die "cannot unpack $dstNarBz2"; - - if ((stat "$tmpDir/B")[7] >= $maxNarSize) { - print " skipping, destination is too large\n"; - next; - } - - my $time1 = time(); - my $res = system("ulimit -t $timeLimit; @libexecdir@/bsdiff $tmpDir/A $tmpDir/B $tmpDir/DIFF"); - my $time2 = time(); - if ($res) { - warn "binary diff computation aborted after ", $time2 - $time1, " seconds\n"; - next; - } - - my $baseHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/A` or die; - chomp $baseHash; - - my $narHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/B` or die; - chomp $narHash; - - my $narDiffHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/DIFF` or die; - chomp $narDiffHash; - - my $narDiffSize = (stat "$tmpDir/DIFF")[7]; - my $dstNarBz2Size = (stat $dstNarBz2)[7]; - - print " size $narDiffSize; full size $dstNarBz2Size; ", $time2 - $time1, " seconds\n"; - - if ($narDiffSize >= $dstNarBz2Size) { - print " rejecting; patch bigger than full archive\n"; - next; - } - - if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) { - print " rejecting; patch too large relative to full archive\n"; - next; - } - - my $finalName = - "$narDiffHash.nar-bsdiff"; - - if (-e "$patchesDir/$finalName") { - print " not copying, already exists\n"; - } - - else { - - system("cp '$tmpDir/DIFF' '$patchesDir/$finalName.tmp'") == 0 - or die "cannot copy diff"; - - rename("$patchesDir/$finalName.tmp", "$patchesDir/$finalName") - or die "cannot rename $patchesDir/$finalName.tmp"; - - } - - # Add the patch to the manifest. - addPatch \%dstPatches, $p, - { url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash" - , size => $narDiffSize, basePath => $closest, baseHash => "$hashAlgo:$baseHash" - , narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff" - }, 0; - } -} - - -# Add in any potentially useful patches in the source (namely, those -# patches that produce either paths in the destination or paths that -# can be used as the base for other useful patches). - -print "propagating patches...\n"; - -my $changed; -do { - # !!! we repeat this to reach the transitive closure; inefficient - $changed = 0; - - print "loop\n"; - - my %dstBasePaths; - foreach my $q (keys %dstPatches) { - foreach my $patch (@{$dstPatches{$q}}) { - $dstBasePaths{$patch->{basePath}} = 1; - } - } - - foreach my $p (keys %srcPatches) { - my $patchList = $srcPatches{$p}; - - my $include = 0; - - # Is path $p included in the destination? If so, include - # patches that produce it. - $include = 1 if defined $dstNarFiles{$p}; - - # Is path $p a path that serves as a base for paths in the - # destination? If so, include patches that produce it. - # !!! check baseHash - $include = 1 if defined $dstBasePaths{$p}; - - if ($include) { - foreach my $patch (@{$patchList}) { - $changed = 1 if addPatch \%dstPatches, $p, $patch; - } - } - - } - -} while $changed; - - -# Rewrite the manifest of the destination (with the new patches). -writeManifest "${dstManifest}", - \%dstNarFiles, \%dstPatches; |