diff options
Diffstat (limited to 'perl/lib')
-rw-r--r-- | perl/lib/Nix/Config.pm.in | 25 | ||||
-rw-r--r-- | perl/lib/Nix/GeneratePatches.pm | 340 | ||||
-rw-r--r-- | perl/lib/Nix/Manifest.pm | 357 |
3 files changed, 722 insertions, 0 deletions
diff --git a/perl/lib/Nix/Config.pm.in b/perl/lib/Nix/Config.pm.in new file mode 100644 index 000000000..658305fd9 --- /dev/null +++ b/perl/lib/Nix/Config.pm.in @@ -0,0 +1,25 @@ +package Nix::Config; + +$binDir = $ENV{"NIX_BIN_DIR"} || "@bindir@"; +$libexecDir = $ENV{"NIX_LIBEXEC_DIR"} || "@libexecdir@"; +$manifestDir = $ENV{"NIX_MANIFESTS_DIR"} || "@localstatedir@/nix/manifests"; +$logDir = $ENV{"NIX_LOG_DIR"} || "@localstatedir@/log/nix"; + +$bzip2 = $ENV{"NIX_BZIP2"} || "@bzip2@"; +$curl = "@curl@"; + +sub readConfig { + my %config; + my $config = "@sysconfdir@/nix/nix.conf"; + return unless -f $config; + + open CONFIG, "<$config" or die "cannot open `$config'"; + while (<CONFIG>) { + /^\s*([\w|-]+)\s*=\s*(.*)$/ or next; + $config{$1} = $2; + print "|$1| -> |$2|\n"; + } + close CONFIG; +} + +return 1; diff --git a/perl/lib/Nix/GeneratePatches.pm b/perl/lib/Nix/GeneratePatches.pm new file mode 100644 index 000000000..f9d83c49c --- /dev/null +++ b/perl/lib/Nix/GeneratePatches.pm @@ -0,0 +1,340 @@ +package Nix::GeneratePatches; + +use strict; +use File::Temp qw(tempdir); +use File::stat; +use Nix::Config; +use Nix::Manifest; + +our @ISA = qw(Exporter); +our @EXPORT = qw(generatePatches propagatePatches copyPatches); + + +# Some patch generations options. + +# Max size of NAR archives to generate patches for. +my $maxNarSize = $ENV{"NIX_MAX_NAR_SIZE"}; +$maxNarSize = 160 * 1024 * 1024 if !defined $maxNarSize; + +# If patch is bigger than this fraction of full archive, reject. +my $maxPatchFraction = $ENV{"NIX_PATCH_FRACTION"}; +$maxPatchFraction = 0.60 if !defined $maxPatchFraction; + +my $timeLimit = $ENV{"NIX_BSDIFF_TIME_LIMIT"}; +$timeLimit = 180 if !defined $timeLimit; + +my $hashAlgo = "sha256"; + + +sub findOutputPaths { + my $narFiles = shift; + + my %outPaths; + + foreach my $p (keys %{$narFiles}) { + + # Ignore derivations. + next if ($p =~ /\.drv$/); + + # Ignore builders (too much ambiguity -- they're all called + # `builder.sh'). + next if ($p =~ /\.sh$/); + next if ($p =~ /\.patch$/); + + # Don't bother including tar files etc. + next if ($p =~ /\.tar$/ || $p =~ /\.tar\.(gz|bz2|Z|lzma|xz)$/ || $p =~ /\.zip$/ || $p =~ /\.bin$/ || $p =~ /\.tgz$/ || $p =~ /\.rpm$/ || $p =~ /cvs-export$/ || $p =~ /fetchhg$/); + + $outPaths{$p} = 1; + } + + return %outPaths; +} + + +sub getNameVersion { + my $p = shift; + $p =~ /\/[0-9a-z]+((?:-[a-zA-Z][^\/-]*)+)([^\/]*)$/; + my $name = $1; + my $version = $2; + return undef unless defined $name && defined $version; + $name =~ s/^-//; + $version =~ s/^-//; + return ($name, $version); +} + + +# A quick hack to get a measure of the `distance' between two +# versions: it's just the position of the first character that differs +# (or 999 if they are the same). +sub versionDiff { + my $s = shift; + my $t = shift; + my $i; + return 999 if $s eq $t; + for ($i = 0; $i < length $s; $i++) { + return $i if $i >= length $t or + substr($s, $i, 1) ne substr($t, $i, 1); + } + return $i; +} + + +sub getNarBz2 { + my $narPath = shift; + my $narFiles = shift; + my $storePath = shift; + + my $narFileList = $$narFiles{$storePath}; + die "missing path $storePath" unless defined $narFileList; + + my $narFile = @{$narFileList}[0]; + die unless defined $narFile; + + $narFile->{url} =~ /\/([^\/]+)$/; + die unless defined $1; + return "$narPath/$1"; +} + + +sub containsPatch { + my $patches = shift; + my $storePath = shift; + my $basePath = shift; + my $patchList = $$patches{$storePath}; + return 0 if !defined $patchList; + my $found = 0; + foreach my $patch (@{$patchList}) { + # !!! baseHash might differ + return 1 if $patch->{basePath} eq $basePath; + } + return 0; +} + + +sub generatePatches { + my ($srcNarFiles, $dstNarFiles, $srcPatches, $dstPatches, $narPath, $patchesPath, $patchesURL, $tmpDir) = @_; + + my %srcOutPaths = findOutputPaths $srcNarFiles; + my %dstOutPaths = findOutputPaths $dstNarFiles; + + # For each output path in the destination, see if we need to / can + # create a patch. + + print STDERR "creating patches...\n"; + + foreach my $p (keys %dstOutPaths) { + + # If exactly the same path already exists in the source, skip it. + next if defined $srcOutPaths{$p}; + + print " $p\n"; + + # If not, then we should find the paths in the source that are + # `most' likely to be present on a system that wants to + # install this path. + + (my $name, my $version) = getNameVersion $p; + next unless defined $name && defined $version; + + my @closest = (); + my $closestVersion; + my $minDist = -1; # actually, larger means closer + + # Find all source paths with the same name. + + foreach my $q (keys %srcOutPaths) { + (my $name2, my $version2) = getNameVersion $q; + next unless defined $name2 && defined $version2; + + if ($name eq $name2) { + + my $srcSystem = @{$$dstNarFiles{$p}}[0]->{system}; + my $dstSystem = @{$$srcNarFiles{$q}}[0]->{system}; + if (defined $srcSystem && defined $dstSystem && $srcSystem ne $dstSystem) { + print " SKIPPING $q due to different systems ($srcSystem vs. $dstSystem)\n"; + next; + } + + # If the sizes differ too much, then skip. This + # disambiguates between, e.g., a real component and a + # wrapper component (cf. Firefox in Nixpkgs). + my $srcSize = @{$$srcNarFiles{$q}}[0]->{size}; + my $dstSize = @{$$dstNarFiles{$p}}[0]->{size}; + my $ratio = $srcSize / $dstSize; + $ratio = 1 / $ratio if $ratio < 1; + # print " SIZE $srcSize $dstSize $ratio $q\n"; + + if ($ratio >= 3) { + print " SKIPPING $q due to size ratio $ratio ($srcSize vs. $dstSize)\n"; + next; + } + + # If there are multiple matching names, include the + # ones with the closest version numbers. + my $dist = versionDiff $version, $version2; + if ($dist > $minDist) { + $minDist = $dist; + @closest = ($q); + $closestVersion = $version2; + } elsif ($dist == $minDist) { + push @closest, $q; + } + } + } + + if (scalar(@closest) == 0) { + print " NO BASE: $p\n"; + next; + } + + foreach my $closest (@closest) { + + # Generate a patch between $closest and $p. + print STDERR " $p <- $closest\n"; + + # If the patch already exists, skip it. + if (containsPatch($srcPatches, $p, $closest) || + containsPatch($dstPatches, $p, $closest)) + { + print " skipping, already exists\n"; + next; + } + + my $srcNarBz2 = getNarBz2 $narPath, $srcNarFiles, $closest; + my $dstNarBz2 = getNarBz2 $narPath, $dstNarFiles, $p; + + if (! -f $srcNarBz2) { + warn "patch source archive $srcNarBz2 is missing\n"; + next; + } + + system("$Nix::Config::bzip2 -d < $srcNarBz2 > $tmpDir/A") == 0 + or die "cannot unpack $srcNarBz2"; + + if (stat("$tmpDir/A")->size >= $maxNarSize) { + print " skipping, source is too large\n"; + next; + } + + system("$Nix::Config::bzip2 -d < $dstNarBz2 > $tmpDir/B") == 0 + or die "cannot unpack $dstNarBz2"; + + if (stat("$tmpDir/B")->size >= $maxNarSize) { + print " skipping, destination is too large\n"; + next; + } + + my $time1 = time(); + my $res = system("ulimit -t $timeLimit; $Nix::Config::libexecDir/bsdiff $tmpDir/A $tmpDir/B $tmpDir/DIFF"); + my $time2 = time(); + if ($res) { + warn "binary diff computation aborted after ", $time2 - $time1, " seconds\n"; + next; + } + + my $baseHash = `$Nix::Config::binDir/nix-hash --flat --type $hashAlgo --base32 $tmpDir/A` or die; + chomp $baseHash; + + my $narHash = `$Nix::Config::binDir/nix-hash --flat --type $hashAlgo --base32 $tmpDir/B` or die; + chomp $narHash; + + my $narDiffHash = `$Nix::Config::binDir/nix-hash --flat --type $hashAlgo --base32 $tmpDir/DIFF` or die; + chomp $narDiffHash; + + my $narDiffSize = stat("$tmpDir/DIFF")->size; + my $dstNarBz2Size = stat($dstNarBz2)->size; + + print " size $narDiffSize; full size $dstNarBz2Size; ", $time2 - $time1, " seconds\n"; + + if ($narDiffSize >= $dstNarBz2Size) { + print " rejecting; patch bigger than full archive\n"; + next; + } + + if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) { + print " rejecting; patch too large relative to full archive\n"; + next; + } + + my $finalName = "$narDiffHash.nar-bsdiff"; + + if (-e "$patchesPath/$finalName") { + print " not copying, already exists\n"; + } + + else { + system("cp '$tmpDir/DIFF' '$patchesPath/$finalName.tmp'") == 0 + or die "cannot copy diff"; + rename("$patchesPath/$finalName.tmp", "$patchesPath/$finalName") + or die "cannot rename $patchesPath/$finalName.tmp"; + } + + # Add the patch to the manifest. + addPatch $dstPatches, $p, + { url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash" + , size => $narDiffSize, basePath => $closest, baseHash => "$hashAlgo:$baseHash" + , narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff" + }; + } + } +} + + +# Propagate useful patches from $srcPatches to $dstPatches. A patch +# is useful if it produces either paths in the $dstNarFiles or paths +# that can be used as the base for other useful patches. +sub propagatePatches { + my ($srcPatches, $dstNarFiles, $dstPatches) = @_; + + print STDERR "propagating patches...\n"; + + my $changed; + do { + # !!! we repeat this to reach the transitive closure; inefficient + $changed = 0; + + print STDERR "loop\n"; + + my %dstBasePaths; + foreach my $q (keys %{$dstPatches}) { + foreach my $patch (@{$$dstPatches{$q}}) { + $dstBasePaths{$patch->{basePath}} = 1; + } + } + + foreach my $p (keys %{$srcPatches}) { + my $patchList = $$srcPatches{$p}; + + my $include = 0; + + # Is path $p included in the destination? If so, include + # patches that produce it. + $include = 1 if defined $$dstNarFiles{$p}; + + # Is path $p a path that serves as a base for paths in the + # destination? If so, include patches that produce it. + # !!! check baseHash + $include = 1 if defined $dstBasePaths{$p}; + + if ($include) { + foreach my $patch (@{$patchList}) { + $changed = 1 if addPatch $dstPatches, $p, $patch; + } + } + + } + + } while $changed; +} + + +# Add all new patches in $srcPatches to $dstPatches. +sub copyPatches { + my ($srcPatches, $dstPatches) = @_; + foreach my $p (keys %{$srcPatches}) { + addPatch $dstPatches, $p, $_ foreach @{$$srcPatches{$p}}; + } +} + + +return 1; diff --git a/perl/lib/Nix/Manifest.pm b/perl/lib/Nix/Manifest.pm new file mode 100644 index 000000000..7790cfe3b --- /dev/null +++ b/perl/lib/Nix/Manifest.pm @@ -0,0 +1,357 @@ +package Nix::Manifest; + +use strict; +use DBI; +use Cwd; +use File::stat; +use File::Path; +use Fcntl ':flock'; +use Nix::Config; + +our @ISA = qw(Exporter); +our @EXPORT = qw(readManifest writeManifest updateManifestDB addPatch); + + +sub addNAR { + my ($narFiles, $storePath, $info) = @_; + + $$narFiles{$storePath} = [] + unless defined $$narFiles{$storePath}; + + my $narFileList = $$narFiles{$storePath}; + + my $found = 0; + foreach my $narFile (@{$narFileList}) { + $found = 1 if $narFile->{url} eq $info->{url}; + } + + push @{$narFileList}, $info if !$found; +} + + +sub addPatch { + my ($patches, $storePath, $patch) = @_; + + $$patches{$storePath} = [] + unless defined $$patches{$storePath}; + + my $patchList = $$patches{$storePath}; + + my $found = 0; + foreach my $patch2 (@{$patchList}) { + $found = 1 if + $patch2->{url} eq $patch->{url} && + $patch2->{basePath} eq $patch->{basePath}; + } + + push @{$patchList}, $patch if !$found; + + return !$found; +} + + +sub readManifest_ { + my ($manifest, $addNAR, $addPatch) = @_; + + open MANIFEST, "<$manifest" + or die "cannot open `$manifest': $!"; + + my $inside = 0; + my $type; + + my $manifestVersion = 2; + + my ($storePath, $url, $hash, $size, $basePath, $baseHash, $patchType); + my ($narHash, $narSize, $references, $deriver, $copyFrom, $system); + + while (<MANIFEST>) { + chomp; + s/\#.*$//g; + next if (/^$/); + + if (!$inside) { + + if (/^\s*(\w*)\s*\{$/) { + $type = $1; + $type = "narfile" if $type eq ""; + $inside = 1; + undef $storePath; + undef $url; + undef $hash; + undef $size; + undef $narHash; + undef $narSize; + undef $basePath; + undef $baseHash; + undef $patchType; + undef $system; + $references = ""; + $deriver = ""; + } + + } else { + + if (/^\}$/) { + $inside = 0; + + if ($type eq "narfile") { + &$addNAR($storePath, + { url => $url, hash => $hash, size => $size + , narHash => $narHash, narSize => $narSize + , references => $references + , deriver => $deriver + , system => $system + }); + } + + elsif ($type eq "patch") { + &$addPatch($storePath, + { url => $url, hash => $hash, size => $size + , basePath => $basePath, baseHash => $baseHash + , narHash => $narHash, narSize => $narSize + , patchType => $patchType + }); + } + + } + + elsif (/^\s*StorePath:\s*(\/\S+)\s*$/) { $storePath = $1; } + elsif (/^\s*CopyFrom:\s*(\/\S+)\s*$/) { $copyFrom = $1; } + elsif (/^\s*Hash:\s*(\S+)\s*$/) { $hash = $1; } + elsif (/^\s*URL:\s*(\S+)\s*$/) { $url = $1; } + elsif (/^\s*Size:\s*(\d+)\s*$/) { $size = $1; } + elsif (/^\s*SuccOf:\s*(\/\S+)\s*$/) { } # obsolete + elsif (/^\s*BasePath:\s*(\/\S+)\s*$/) { $basePath = $1; } + elsif (/^\s*BaseHash:\s*(\S+)\s*$/) { $baseHash = $1; } + elsif (/^\s*Type:\s*(\S+)\s*$/) { $patchType = $1; } + elsif (/^\s*NarHash:\s*(\S+)\s*$/) { $narHash = $1; } + elsif (/^\s*NarSize:\s*(\d+)\s*$/) { $narSize = $1; } + elsif (/^\s*References:\s*(.*)\s*$/) { $references = $1; } + elsif (/^\s*Deriver:\s*(\S+)\s*$/) { $deriver = $1; } + elsif (/^\s*ManifestVersion:\s*(\d+)\s*$/) { $manifestVersion = $1; } + elsif (/^\s*System:\s*(\S+)\s*$/) { $system = $1; } + + # Compatibility; + elsif (/^\s*NarURL:\s*(\S+)\s*$/) { $url = $1; } + elsif (/^\s*MD5:\s*(\S+)\s*$/) { $hash = "md5:$1"; } + + } + } + + close MANIFEST; + + return $manifestVersion; +} + + +sub readManifest { + my ($manifest, $narFiles, $patches) = @_; + readManifest_($manifest, + sub { addNAR($narFiles, @_); }, + sub { addPatch($patches, @_); } ); +} + + +sub writeManifest { + my ($manifest, $narFiles, $patches, $noCompress) = @_; + + open MANIFEST, ">$manifest.tmp"; # !!! check exclusive + + print MANIFEST "version {\n"; + print MANIFEST " ManifestVersion: 3\n"; + print MANIFEST "}\n"; + + foreach my $storePath (sort (keys %{$narFiles})) { + my $narFileList = $$narFiles{$storePath}; + foreach my $narFile (@{$narFileList}) { + print MANIFEST "{\n"; + print MANIFEST " StorePath: $storePath\n"; + print MANIFEST " NarURL: $narFile->{url}\n"; + print MANIFEST " Hash: $narFile->{hash}\n" if defined $narFile->{hash}; + print MANIFEST " Size: $narFile->{size}\n" if defined $narFile->{size}; + print MANIFEST " NarHash: $narFile->{narHash}\n"; + print MANIFEST " NarSize: $narFile->{narSize}\n" if $narFile->{narSize}; + print MANIFEST " References: $narFile->{references}\n" + if defined $narFile->{references} && $narFile->{references} ne ""; + print MANIFEST " Deriver: $narFile->{deriver}\n" + if defined $narFile->{deriver} && $narFile->{deriver} ne ""; + print MANIFEST " System: $narFile->{system}\n" if defined $narFile->{system}; + print MANIFEST "}\n"; + } + } + + foreach my $storePath (sort (keys %{$patches})) { + my $patchList = $$patches{$storePath}; + foreach my $patch (@{$patchList}) { + print MANIFEST "patch {\n"; + print MANIFEST " StorePath: $storePath\n"; + print MANIFEST " NarURL: $patch->{url}\n"; + print MANIFEST " Hash: $patch->{hash}\n"; + print MANIFEST " Size: $patch->{size}\n"; + print MANIFEST " NarHash: $patch->{narHash}\n"; + print MANIFEST " NarSize: $patch->{narSize}\n" if $patch->{narSize}; + print MANIFEST " BasePath: $patch->{basePath}\n"; + print MANIFEST " BaseHash: $patch->{baseHash}\n"; + print MANIFEST " Type: $patch->{patchType}\n"; + print MANIFEST "}\n"; + } + } + + + close MANIFEST; + + rename("$manifest.tmp", $manifest) + or die "cannot rename $manifest.tmp: $!"; + + + # Create a bzipped manifest. + unless (defined $noCompress) { + system("$Nix::Config::bzip2 < $manifest > $manifest.bz2.tmp") == 0 + or die "cannot compress manifest"; + + rename("$manifest.bz2.tmp", "$manifest.bz2") + or die "cannot rename $manifest.bz2.tmp: $!"; + } +} + + +sub updateManifestDB { + my $manifestDir = $Nix::Config::manifestDir; + + mkpath($manifestDir); + + my $dbPath = "$manifestDir/cache.sqlite"; + + # Open/create the database. + our $dbh = DBI->connect("dbi:SQLite:dbname=$dbPath", "", "") + or die "cannot open database `$dbPath'"; + $dbh->{RaiseError} = 1; + $dbh->{PrintError} = 0; + + $dbh->do("pragma foreign_keys = on"); + $dbh->do("pragma synchronous = off"); # we can always reproduce the cache + $dbh->do("pragma journal_mode = truncate"); + + # Initialise the database schema, if necessary. + $dbh->do(<<EOF); + create table if not exists Manifests ( + id integer primary key autoincrement not null, + path text unique not null, + timestamp integer not null + ); +EOF + + $dbh->do(<<EOF); + create table if not exists NARs ( + id integer primary key autoincrement not null, + manifest integer not null, + storePath text not null, + url text not null, + hash text, + size integer, + narHash text, + narSize integer, + refs text, + deriver text, + system text, + foreign key (manifest) references Manifests(id) on delete cascade + ); +EOF + + $dbh->do("create index if not exists NARs_storePath on NARs(storePath)"); + + $dbh->do(<<EOF); + create table if not exists Patches ( + id integer primary key autoincrement not null, + manifest integer not null, + storePath text not null, + basePath text not null, + baseHash text not null, + url text not null, + hash text, + size integer, + narHash text, + narSize integer, + patchType text not null, + foreign key (manifest) references Manifests(id) on delete cascade + ); +EOF + + $dbh->do("create index if not exists Patches_storePath on Patches(storePath)"); + + # Acquire an exclusive lock to ensure that only one process + # updates the DB at the same time. This isn't really necessary, + # but it prevents work duplication and lock contention in SQLite. + my $lockFile = "$manifestDir/cache.lock"; + open MAINLOCK, ">>$lockFile" or die "unable to acquire lock ‘$lockFile’: $!\n"; + flock(MAINLOCK, LOCK_EX) or die; + + $dbh->begin_work; + + # Read each manifest in $manifestDir and add it to the database, + # unless we've already done so on a previous run. + my %seen; + + for my $manifest (glob "$manifestDir/*.nixmanifest") { + $manifest = Cwd::abs_path($manifest); + my $timestamp = lstat($manifest)->mtime; + $seen{$manifest} = 1; + + next if scalar @{$dbh->selectcol_arrayref( + "select 1 from Manifests where path = ? and timestamp = ?", + {}, $manifest, $timestamp)} == 1; + + print STDERR "caching $manifest...\n"; + + $dbh->do("delete from Manifests where path = ?", {}, $manifest); + + $dbh->do("insert into Manifests(path, timestamp) values (?, ?)", + {}, $manifest, $timestamp); + + our $id = $dbh->last_insert_id("", "", "", ""); + + sub addNARToDB { + my ($storePath, $narFile) = @_; + $dbh->do( + "insert into NARs(manifest, storePath, url, hash, size, narHash, " . + "narSize, refs, deriver, system) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + {}, $id, $storePath, $narFile->{url}, $narFile->{hash}, $narFile->{size}, + $narFile->{narHash}, $narFile->{narSize}, $narFile->{references}, + $narFile->{deriver}, $narFile->{system}); + }; + + sub addPatchToDB { + my ($storePath, $patch) = @_; + $dbh->do( + "insert into Patches(manifest, storePath, basePath, baseHash, url, hash, " . + "size, narHash, narSize, patchType) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + {}, $id, $storePath, $patch->{basePath}, $patch->{baseHash}, $patch->{url}, + $patch->{hash}, $patch->{size}, $patch->{narHash}, $patch->{narSize}, + $patch->{patchType}); + }; + + my $version = readManifest_($manifest, \&addNARToDB, \&addPatchToDB); + + if ($version < 3) { + die "you have an old-style manifest `$manifest'; please delete it"; + } + if ($version >= 10) { + die "manifest `$manifest' is too new; please delete it or upgrade Nix"; + } + } + + # Removed cached information for removed manifests from the DB. + foreach my $manifest (@{$dbh->selectcol_arrayref("select path from Manifests")}) { + next if defined $seen{$manifest}; + $dbh->do("delete from Manifests where path = ?", {}, $manifest); + } + + $dbh->commit; + + close MAINLOCK; + + return $dbh; +} + + +return 1; |