aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEelco Dolstra <eelco.dolstra@logicblox.com>2012-07-11 17:53:20 -0400
committerEelco Dolstra <eelco.dolstra@logicblox.com>2012-07-11 17:53:20 -0400
commitb74d92755d1ca6a1538f292dcb5a906f66af7b51 (patch)
tree4369859a40dc8a13b58e2d507edace3c239078b3
parent09a6321aeb7393cdb4b5af62d2e4106d83124fdf (diff)
download-from-binary-cache: Use HEAD requests if possible
In "nix-env -qas", we don't need the substitute info, we just need to know if it exists. This can be done using a HTTP HEAD request, which saves bandwidth. Note however that curl currently has a bug that prevents it from reusing HTTP connections if HEAD requests return a 404: https://sourceforge.net/tracker/?func=detail&aid=3542731&group_id=976&atid=100976 Without the patch attached to the issue, using HEAD is actually quite a bit slower than GET.
-rw-r--r--scripts/download-from-binary-cache.pl.in91
1 files changed, 79 insertions, 12 deletions
diff --git a/scripts/download-from-binary-cache.pl.in b/scripts/download-from-binary-cache.pl.in
index 2a70e5193..93155e0b5 100644
--- a/scripts/download-from-binary-cache.pl.in
+++ b/scripts/download-from-binary-cache.pl.in
@@ -18,7 +18,7 @@ my @binaryCacheUrls = map { s/\/+$//; $_ } split(/ /,
my $maxParallelRequests = int($Nix::Config::config{"binary-caches-parallel-connections"} // 150);
$maxParallelRequests = 1 if $maxParallelRequests < 1;
-my ($dbh, $insertNAR, $queryNAR, $insertNegativeNAR, $queryNegativeNAR);
+my ($dbh, $insertNAR, $queryNAR, $insertNARExistence, $queryNARExistence);
my %cacheIds;
my $curlm = WWW::Curl::Multi->new;
@@ -30,7 +30,7 @@ my $caBundle = $ENV{"CURL_CA_BUNDLE"} // $ENV{"OPENSSL_X509_CERT_FILE"};
sub addRequest {
- my ($storePath, $url) = @_;
+ my ($storePath, $url, $head) = @_;
my $curl = WWW::Curl::Easy->new;
my $curlId = $curlIdCount++;
@@ -41,6 +41,7 @@ sub addRequest {
$curl->setopt(CURLOPT_WRITEDATA, \$requests{$curlId}->{content});
$curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
$curl->setopt(CURLOPT_CAINFO, $caBundle) if defined $caBundle;
+ $curl->setopt(CURLOPT_NOBODY, 1) if $head;
if ($activeRequests >= $maxParallelRequests) {
$scheduled{$curlId} = 1;
@@ -127,9 +128,10 @@ EOF
EOF
$dbh->do(<<EOF);
- create table if not exists NegativeNARs (
+ create table if not exists NARExistence (
cache integer not null,
storePath text not null,
+ exist integer not null,
timestamp integer not null,
primary key (cache, storePath),
foreign key (cache) references BinaryCaches(id) on delete cascade
@@ -142,17 +144,28 @@ EOF
$queryNAR = $dbh->prepare("select * from NARs where cache = ? and storePath = ?") or die;
- $insertNegativeNAR = $dbh->prepare(
- "insert or replace into NegativeNARs(cache, storePath, timestamp) values (?, ?, ?)") or die;
+ $insertNARExistence = $dbh->prepare(
+ "insert or replace into NARExistence(cache, storePath, exist, timestamp) values (?, ?, ?, ?)") or die;
- $queryNegativeNAR = $dbh->prepare("select 1 from NegativeNARs where cache = ? and storePath = ?") or die;
+ $queryNARExistence = $dbh->prepare("select exist from NARExistence where cache = ? and storePath = ?") or die;
}
+
sub negativeHit {
my ($storePath, $binaryCacheUrl) = @_;
- $queryNegativeNAR->execute(getCacheId($binaryCacheUrl), basename($storePath));
- return @{$queryNegativeNAR->fetchall_arrayref()} != 0;
+ $queryNARExistence->execute(getCacheId($binaryCacheUrl), basename($storePath));
+ my $res = $queryNARExistence->fetchrow_hashref();
+ return defined $res && $res->{exist} == 0;
+}
+
+
+sub positiveHit {
+ my ($storePath, $binaryCacheUrl) = @_;
+ return 1 if defined getCachedInfoFrom($storePath, $binaryCacheUrl);
+ $queryNARExistence->execute(getCacheId($binaryCacheUrl), basename($storePath));
+ my $res = $queryNARExistence->fetchrow_hashref();
+ return defined $res && $res->{exist} == 1;
}
@@ -166,7 +179,7 @@ sub processNARInfo {
print STDERR "could not download ‘$request->{url}’ (" .
($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n";
} else {
- $insertNegativeNAR->execute($cacheId, basename($storePath), time());
+ $insertNARExistence->execute($cacheId, basename($storePath), 0, time());
}
return undef;
}
@@ -319,6 +332,61 @@ sub printInfoParallel {
}
+sub printSubstitutablePaths {
+ my @paths = @_;
+
+ # First look for paths that have cached info.
+ my @left;
+ foreach my $storePath (@paths) {
+ my $found = 0;
+ foreach my $binaryCacheUrl (@binaryCacheUrls) {
+ if (positiveHit($storePath, $binaryCacheUrl)) {
+ print "$storePath\n";
+ $found = 1;
+ last;
+ }
+ }
+ push @left, $storePath if !$found;
+ }
+
+ return if scalar @left == 0;
+
+ # For remaining paths, do HEAD requests.
+ foreach my $binaryCacheUrl (@binaryCacheUrls) {
+ my $cacheId = getCacheId($binaryCacheUrl);
+
+ my @left2;
+ %requests = ();
+ foreach my $storePath (@left) {
+ if (negativeHit($storePath, $binaryCacheUrl)) {
+ push @left2, $storePath;
+ next;
+ }
+ addRequest($storePath, infoUrl($binaryCacheUrl, $storePath), 1);
+ }
+
+ processRequests;
+
+ foreach my $request (values %requests) {
+ if ($request->{result} != 0 || $request->{httpStatus} != 200) {
+ if ($request->{httpStatus} != 404) {
+ print STDERR "could not check ‘$request->{url}’ (" .
+ ($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n";
+ } else {
+ $insertNARExistence->execute($cacheId, basename($request->{storePath}), 0, time());
+ }
+ push @left2, $request->{storePath};
+ } else {
+ $insertNARExistence->execute($cacheId, basename($request->{storePath}), 1, time());
+ print "$request->{storePath}\n";
+ }
+ }
+
+ @left = @left2;
+ }
+}
+
+
sub downloadBinary {
my ($storePath) = @_;
@@ -371,9 +439,8 @@ if ($ARGV[0] eq "--query") {
my ($cmd, @args) = split " ", $_;
if ($cmd eq "have") {
- my $storePath = <STDIN>; chomp $storePath;
- # FIXME: want to give correct info here, but it's too slow.
- print "0\n";
+ printSubstitutablePaths(@args);
+ print "\n";
}
elsif ($cmd eq "info") {