diff options
author | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2012-07-11 17:53:20 -0400 |
---|---|---|
committer | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2012-07-11 17:53:20 -0400 |
commit | b74d92755d1ca6a1538f292dcb5a906f66af7b51 (patch) | |
tree | 4369859a40dc8a13b58e2d507edace3c239078b3 | |
parent | 09a6321aeb7393cdb4b5af62d2e4106d83124fdf (diff) |
download-from-binary-cache: Use HEAD requests if possible
In "nix-env -qas", we don't need the substitute info, we just need to
know if it exists. This can be done using a HTTP HEAD request, which
saves bandwidth.
Note however that curl currently has a bug that prevents it from
reusing HTTP connections if HEAD requests return a 404:
https://sourceforge.net/tracker/?func=detail&aid=3542731&group_id=976&atid=100976
Without the patch attached to the issue, using HEAD is actually quite
a bit slower than GET.
-rw-r--r-- | scripts/download-from-binary-cache.pl.in | 91 |
1 files changed, 79 insertions, 12 deletions
diff --git a/scripts/download-from-binary-cache.pl.in b/scripts/download-from-binary-cache.pl.in index 2a70e5193..93155e0b5 100644 --- a/scripts/download-from-binary-cache.pl.in +++ b/scripts/download-from-binary-cache.pl.in @@ -18,7 +18,7 @@ my @binaryCacheUrls = map { s/\/+$//; $_ } split(/ /, my $maxParallelRequests = int($Nix::Config::config{"binary-caches-parallel-connections"} // 150); $maxParallelRequests = 1 if $maxParallelRequests < 1; -my ($dbh, $insertNAR, $queryNAR, $insertNegativeNAR, $queryNegativeNAR); +my ($dbh, $insertNAR, $queryNAR, $insertNARExistence, $queryNARExistence); my %cacheIds; my $curlm = WWW::Curl::Multi->new; @@ -30,7 +30,7 @@ my $caBundle = $ENV{"CURL_CA_BUNDLE"} // $ENV{"OPENSSL_X509_CERT_FILE"}; sub addRequest { - my ($storePath, $url) = @_; + my ($storePath, $url, $head) = @_; my $curl = WWW::Curl::Easy->new; my $curlId = $curlIdCount++; @@ -41,6 +41,7 @@ sub addRequest { $curl->setopt(CURLOPT_WRITEDATA, \$requests{$curlId}->{content}); $curl->setopt(CURLOPT_FOLLOWLOCATION, 1); $curl->setopt(CURLOPT_CAINFO, $caBundle) if defined $caBundle; + $curl->setopt(CURLOPT_NOBODY, 1) if $head; if ($activeRequests >= $maxParallelRequests) { $scheduled{$curlId} = 1; @@ -127,9 +128,10 @@ EOF EOF $dbh->do(<<EOF); - create table if not exists NegativeNARs ( + create table if not exists NARExistence ( cache integer not null, storePath text not null, + exist integer not null, timestamp integer not null, primary key (cache, storePath), foreign key (cache) references BinaryCaches(id) on delete cascade @@ -142,17 +144,28 @@ EOF $queryNAR = $dbh->prepare("select * from NARs where cache = ? and storePath = ?") or die; - $insertNegativeNAR = $dbh->prepare( - "insert or replace into NegativeNARs(cache, storePath, timestamp) values (?, ?, ?)") or die; + $insertNARExistence = $dbh->prepare( + "insert or replace into NARExistence(cache, storePath, exist, timestamp) values (?, ?, ?, ?)") or die; - $queryNegativeNAR = $dbh->prepare("select 1 from NegativeNARs where cache = ? and storePath = ?") or die; + $queryNARExistence = $dbh->prepare("select exist from NARExistence where cache = ? and storePath = ?") or die; } + sub negativeHit { my ($storePath, $binaryCacheUrl) = @_; - $queryNegativeNAR->execute(getCacheId($binaryCacheUrl), basename($storePath)); - return @{$queryNegativeNAR->fetchall_arrayref()} != 0; + $queryNARExistence->execute(getCacheId($binaryCacheUrl), basename($storePath)); + my $res = $queryNARExistence->fetchrow_hashref(); + return defined $res && $res->{exist} == 0; +} + + +sub positiveHit { + my ($storePath, $binaryCacheUrl) = @_; + return 1 if defined getCachedInfoFrom($storePath, $binaryCacheUrl); + $queryNARExistence->execute(getCacheId($binaryCacheUrl), basename($storePath)); + my $res = $queryNARExistence->fetchrow_hashref(); + return defined $res && $res->{exist} == 1; } @@ -166,7 +179,7 @@ sub processNARInfo { print STDERR "could not download ‘$request->{url}’ (" . ($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n"; } else { - $insertNegativeNAR->execute($cacheId, basename($storePath), time()); + $insertNARExistence->execute($cacheId, basename($storePath), 0, time()); } return undef; } @@ -319,6 +332,61 @@ sub printInfoParallel { } +sub printSubstitutablePaths { + my @paths = @_; + + # First look for paths that have cached info. + my @left; + foreach my $storePath (@paths) { + my $found = 0; + foreach my $binaryCacheUrl (@binaryCacheUrls) { + if (positiveHit($storePath, $binaryCacheUrl)) { + print "$storePath\n"; + $found = 1; + last; + } + } + push @left, $storePath if !$found; + } + + return if scalar @left == 0; + + # For remaining paths, do HEAD requests. + foreach my $binaryCacheUrl (@binaryCacheUrls) { + my $cacheId = getCacheId($binaryCacheUrl); + + my @left2; + %requests = (); + foreach my $storePath (@left) { + if (negativeHit($storePath, $binaryCacheUrl)) { + push @left2, $storePath; + next; + } + addRequest($storePath, infoUrl($binaryCacheUrl, $storePath), 1); + } + + processRequests; + + foreach my $request (values %requests) { + if ($request->{result} != 0 || $request->{httpStatus} != 200) { + if ($request->{httpStatus} != 404) { + print STDERR "could not check ‘$request->{url}’ (" . + ($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n"; + } else { + $insertNARExistence->execute($cacheId, basename($request->{storePath}), 0, time()); + } + push @left2, $request->{storePath}; + } else { + $insertNARExistence->execute($cacheId, basename($request->{storePath}), 1, time()); + print "$request->{storePath}\n"; + } + } + + @left = @left2; + } +} + + sub downloadBinary { my ($storePath) = @_; @@ -371,9 +439,8 @@ if ($ARGV[0] eq "--query") { my ($cmd, @args) = split " ", $_; if ($cmd eq "have") { - my $storePath = <STDIN>; chomp $storePath; - # FIXME: want to give correct info here, but it's too slow. - print "0\n"; + printSubstitutablePaths(@args); + print "\n"; } elsif ($cmd eq "info") { |