From 741e9012d3fa6f0fb41a4a5662c6b8b38ecfaa1f Mon Sep 17 00:00:00 2001 From: Nikola Knezevic Date: Mon, 6 Apr 2020 15:10:58 +0200 Subject: Rename src/lib/download.* to src/lib/datatransfer.* --- src/libexpr/common-eval-args.cc | 2 +- src/libexpr/eval.cc | 2 +- src/libexpr/parser.y | 2 +- src/libstore/build.cc | 2 +- src/libstore/builtins/fetchurl.cc | 2 +- src/libstore/datatransfer.cc | 814 +++++++++++++++++++++++++++++++ src/libstore/datatransfer.hh | 113 +++++ src/libstore/download.cc | 814 ------------------------------- src/libstore/download.hh | 113 ----- src/libstore/http-binary-cache-store.cc | 2 +- src/libstore/s3-binary-cache-store.cc | 2 +- src/nix-channel/nix-channel.cc | 2 +- src/nix-prefetch-url/nix-prefetch-url.cc | 2 +- src/nix/main.cc | 2 +- src/nix/upgrade-nix.cc | 2 +- 15 files changed, 938 insertions(+), 938 deletions(-) create mode 100644 src/libstore/datatransfer.cc create mode 100644 src/libstore/datatransfer.hh delete mode 100644 src/libstore/download.cc delete mode 100644 src/libstore/download.hh diff --git a/src/libexpr/common-eval-args.cc b/src/libexpr/common-eval-args.cc index 82bfeac36..26460601d 100644 --- a/src/libexpr/common-eval-args.cc +++ b/src/libexpr/common-eval-args.cc @@ -1,6 +1,6 @@ #include "common-eval-args.hh" #include "shared.hh" -#include "download.hh" +#include "datatransfer.hh" #include "util.hh" #include "eval.hh" #include "fetchers.hh" diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index dac32b6f5..7a20c6fa6 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -5,7 +5,7 @@ #include "derivations.hh" #include "globals.hh" #include "eval-inline.hh" -#include "download.hh" +#include "datatransfer.hh" #include "json.hh" #include "function-trace.hh" diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index 6f25f5cf0..96b6f0ecd 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -544,7 +544,7 @@ formal #include #include "eval.hh" -#include "download.hh" +#include "datatransfer.hh" #include "fetchers.hh" #include "store-api.hh" diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 632982ecd..db01d9510 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -7,7 +7,7 @@ #include "affinity.hh" #include "builtins.hh" #include "builtins/buildenv.hh" -#include "download.hh" +#include "datatransfer.hh" #include "finally.hh" #include "compression.hh" #include "json.hh" diff --git a/src/libstore/builtins/fetchurl.cc b/src/libstore/builtins/fetchurl.cc index a939f040f..01d21fa98 100644 --- a/src/libstore/builtins/fetchurl.cc +++ b/src/libstore/builtins/fetchurl.cc @@ -1,5 +1,5 @@ #include "builtins.hh" -#include "download.hh" +#include "datatransfer.hh" #include "store-api.hh" #include "archive.hh" #include "compression.hh" diff --git a/src/libstore/datatransfer.cc b/src/libstore/datatransfer.cc new file mode 100644 index 000000000..30ad48577 --- /dev/null +++ b/src/libstore/datatransfer.cc @@ -0,0 +1,814 @@ +#include "datatransfer.hh" +#include "util.hh" +#include "globals.hh" +#include "store-api.hh" +#include "s3.hh" +#include "compression.hh" +#include "finally.hh" + +#ifdef ENABLE_S3 +#include +#endif + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +using namespace std::string_literals; + +namespace nix { + +DownloadSettings downloadSettings; + +static GlobalConfig::Register r1(&downloadSettings); + +std::string resolveUri(const std::string & uri) +{ + if (uri.compare(0, 8, "channel:") == 0) + return "https://nixos.org/channels/" + std::string(uri, 8) + "/nixexprs.tar.xz"; + else + return uri; +} + +struct CurlDownloader : public Downloader +{ + CURLM * curlm = 0; + + std::random_device rd; + std::mt19937 mt19937; + + struct DownloadItem : public std::enable_shared_from_this + { + CurlDownloader & downloader; + DataTransferRequest request; + DownloadResult result; + Activity act; + bool done = false; // whether either the success or failure function has been called + Callback callback; + CURL * req = 0; + bool active = false; // whether the handle has been added to the multi object + std::string status; + + unsigned int attempt = 0; + + /* Don't start this download until the specified time point + has been reached. */ + std::chrono::steady_clock::time_point embargo; + + struct curl_slist * requestHeaders = 0; + + std::string encoding; + + bool acceptRanges = false; + + curl_off_t writtenToSink = 0; + + DownloadItem(CurlDownloader & downloader, + const DataTransferRequest & request, + Callback && callback) + : downloader(downloader) + , request(request) + , act(*logger, lvlTalkative, actDownload, + fmt(request.data ? "uploading '%s'" : "downloading '%s'", request.uri), + {request.uri}, request.parentAct) + , callback(std::move(callback)) + , finalSink([this](const unsigned char * data, size_t len) { + if (this->request.dataCallback) { + long httpStatus = 0; + curl_easy_getinfo(req, CURLINFO_RESPONSE_CODE, &httpStatus); + + /* Only write data to the sink if this is a + successful response. */ + if (httpStatus == 0 || httpStatus == 200 || httpStatus == 201 || httpStatus == 206) { + writtenToSink += len; + this->request.dataCallback((char *) data, len); + } + } else + this->result.data->append((char *) data, len); + }) + { + if (!request.expectedETag.empty()) + requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + request.expectedETag).c_str()); + if (!request.mimeType.empty()) + requestHeaders = curl_slist_append(requestHeaders, ("Content-Type: " + request.mimeType).c_str()); + } + + ~DownloadItem() + { + if (req) { + if (active) + curl_multi_remove_handle(downloader.curlm, req); + curl_easy_cleanup(req); + } + if (requestHeaders) curl_slist_free_all(requestHeaders); + try { + if (!done) + fail(DownloadError(Interrupted, format("download of '%s' was interrupted") % request.uri)); + } catch (...) { + ignoreException(); + } + } + + void failEx(std::exception_ptr ex) + { + assert(!done); + done = true; + callback.rethrow(ex); + } + + template + void fail(const T & e) + { + failEx(std::make_exception_ptr(e)); + } + + LambdaSink finalSink; + std::shared_ptr decompressionSink; + + std::exception_ptr writeException; + + size_t writeCallback(void * contents, size_t size, size_t nmemb) + { + try { + size_t realSize = size * nmemb; + result.bodySize += realSize; + + if (!decompressionSink) + decompressionSink = makeDecompressionSink(encoding, finalSink); + + (*decompressionSink)((unsigned char *) contents, realSize); + + return realSize; + } catch (...) { + writeException = std::current_exception(); + return 0; + } + } + + static size_t writeCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp) + { + return ((DownloadItem *) userp)->writeCallback(contents, size, nmemb); + } + + size_t headerCallback(void * contents, size_t size, size_t nmemb) + { + size_t realSize = size * nmemb; + std::string line((char *) contents, realSize); + printMsg(lvlVomit, format("got header for '%s': %s") % request.uri % trim(line)); + if (line.compare(0, 5, "HTTP/") == 0) { // new response starts + result.etag = ""; + auto ss = tokenizeString>(line, " "); + status = ss.size() >= 2 ? ss[1] : ""; + result.data = std::make_shared(); + result.bodySize = 0; + acceptRanges = false; + encoding = ""; + } else { + auto i = line.find(':'); + if (i != string::npos) { + string name = toLower(trim(string(line, 0, i))); + if (name == "etag") { + result.etag = trim(string(line, i + 1)); + /* Hack to work around a GitHub bug: it sends + ETags, but ignores If-None-Match. So if we get + the expected ETag on a 200 response, then shut + down the connection because we already have the + data. */ + if (result.etag == request.expectedETag && status == "200") { + debug(format("shutting down on 200 HTTP response with expected ETag")); + return 0; + } + } else if (name == "content-encoding") + encoding = trim(string(line, i + 1)); + else if (name == "accept-ranges" && toLower(trim(std::string(line, i + 1))) == "bytes") + acceptRanges = true; + } + } + return realSize; + } + + static size_t headerCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp) + { + return ((DownloadItem *) userp)->headerCallback(contents, size, nmemb); + } + + int progressCallback(double dltotal, double dlnow) + { + try { + act.progress(dlnow, dltotal); + } catch (nix::Interrupted &) { + assert(_isInterrupted); + } + return _isInterrupted; + } + + static int progressCallbackWrapper(void * userp, double dltotal, double dlnow, double ultotal, double ulnow) + { + return ((DownloadItem *) userp)->progressCallback(dltotal, dlnow); + } + + static int debugCallback(CURL * handle, curl_infotype type, char * data, size_t size, void * userptr) + { + if (type == CURLINFO_TEXT) + vomit("curl: %s", chomp(std::string(data, size))); + return 0; + } + + size_t readOffset = 0; + size_t readCallback(char *buffer, size_t size, size_t nitems) + { + if (readOffset == request.data->length()) + return 0; + auto count = std::min(size * nitems, request.data->length() - readOffset); + assert(count); + memcpy(buffer, request.data->data() + readOffset, count); + readOffset += count; + return count; + } + + static size_t readCallbackWrapper(char *buffer, size_t size, size_t nitems, void * userp) + { + return ((DownloadItem *) userp)->readCallback(buffer, size, nitems); + } + + void init() + { + if (!req) req = curl_easy_init(); + + curl_easy_reset(req); + + if (verbosity >= lvlVomit) { + curl_easy_setopt(req, CURLOPT_VERBOSE, 1); + curl_easy_setopt(req, CURLOPT_DEBUGFUNCTION, DownloadItem::debugCallback); + } + + curl_easy_setopt(req, CURLOPT_URL, request.uri.c_str()); + curl_easy_setopt(req, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(req, CURLOPT_MAXREDIRS, 10); + curl_easy_setopt(req, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(req, CURLOPT_USERAGENT, + ("curl/" LIBCURL_VERSION " Nix/" + nixVersion + + (downloadSettings.userAgentSuffix != "" ? " " + downloadSettings.userAgentSuffix.get() : "")).c_str()); + #if LIBCURL_VERSION_NUM >= 0x072b00 + curl_easy_setopt(req, CURLOPT_PIPEWAIT, 1); + #endif + #if LIBCURL_VERSION_NUM >= 0x072f00 + if (downloadSettings.enableHttp2) + curl_easy_setopt(req, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2TLS); + else + curl_easy_setopt(req, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); + #endif + curl_easy_setopt(req, CURLOPT_WRITEFUNCTION, DownloadItem::writeCallbackWrapper); + curl_easy_setopt(req, CURLOPT_WRITEDATA, this); + curl_easy_setopt(req, CURLOPT_HEADERFUNCTION, DownloadItem::headerCallbackWrapper); + curl_easy_setopt(req, CURLOPT_HEADERDATA, this); + + curl_easy_setopt(req, CURLOPT_PROGRESSFUNCTION, progressCallbackWrapper); + curl_easy_setopt(req, CURLOPT_PROGRESSDATA, this); + curl_easy_setopt(req, CURLOPT_NOPROGRESS, 0); + + curl_easy_setopt(req, CURLOPT_HTTPHEADER, requestHeaders); + + if (request.head) + curl_easy_setopt(req, CURLOPT_NOBODY, 1); + + if (request.data) { + curl_easy_setopt(req, CURLOPT_UPLOAD, 1L); + curl_easy_setopt(req, CURLOPT_READFUNCTION, readCallbackWrapper); + curl_easy_setopt(req, CURLOPT_READDATA, this); + curl_easy_setopt(req, CURLOPT_INFILESIZE_LARGE, (curl_off_t) request.data->length()); + } + + if (request.verifyTLS) { + debug("verify TLS: Nix CA file = '%s'", settings.caFile); + if (settings.caFile != "") + curl_easy_setopt(req, CURLOPT_CAINFO, settings.caFile.c_str()); + } else { + curl_easy_setopt(req, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(req, CURLOPT_SSL_VERIFYHOST, 0); + } + + curl_easy_setopt(req, CURLOPT_CONNECTTIMEOUT, downloadSettings.connectTimeout.get()); + + curl_easy_setopt(req, CURLOPT_LOW_SPEED_LIMIT, 1L); + curl_easy_setopt(req, CURLOPT_LOW_SPEED_TIME, downloadSettings.stalledDownloadTimeout.get()); + + /* If no file exist in the specified path, curl continues to work + anyway as if netrc support was disabled. */ + curl_easy_setopt(req, CURLOPT_NETRC_FILE, settings.netrcFile.get().c_str()); + curl_easy_setopt(req, CURLOPT_NETRC, CURL_NETRC_OPTIONAL); + + if (writtenToSink) + curl_easy_setopt(req, CURLOPT_RESUME_FROM_LARGE, writtenToSink); + + result.data = std::make_shared(); + result.bodySize = 0; + } + + void finish(CURLcode code) + { + long httpStatus = 0; + curl_easy_getinfo(req, CURLINFO_RESPONSE_CODE, &httpStatus); + + char * effectiveUriCStr; + curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr); + if (effectiveUriCStr) + result.effectiveUri = effectiveUriCStr; + + debug("finished %s of '%s'; curl status = %d, HTTP status = %d, body = %d bytes", + request.verb(), request.uri, code, httpStatus, result.bodySize); + + if (decompressionSink) { + try { + decompressionSink->finish(); + } catch (...) { + writeException = std::current_exception(); + } + } + + if (code == CURLE_WRITE_ERROR && result.etag == request.expectedETag) { + code = CURLE_OK; + httpStatus = 304; + } + + if (writeException) + failEx(writeException); + + else if (code == CURLE_OK && + (httpStatus == 200 || httpStatus == 201 || httpStatus == 204 || httpStatus == 206 || httpStatus == 304 || httpStatus == 226 /* FTP */ || httpStatus == 0 /* other protocol */)) + { + result.cached = httpStatus == 304; + act.progress(result.bodySize, result.bodySize); + done = true; + callback(std::move(result)); + } + + else { + // We treat most errors as transient, but won't retry when hopeless + Error err = Transient; + + if (httpStatus == 404 || httpStatus == 410 || code == CURLE_FILE_COULDNT_READ_FILE) { + // The file is definitely not there + err = NotFound; + } else if (httpStatus == 401 || httpStatus == 403 || httpStatus == 407) { + // Don't retry on authentication/authorization failures + err = Forbidden; + } else if (httpStatus >= 400 && httpStatus < 500 && httpStatus != 408 && httpStatus != 429) { + // Most 4xx errors are client errors and are probably not worth retrying: + // * 408 means the server timed out waiting for us, so we try again + // * 429 means too many requests, so we retry (with a delay) + err = Misc; + } else if (httpStatus == 501 || httpStatus == 505 || httpStatus == 511) { + // Let's treat most 5xx (server) errors as transient, except for a handful: + // * 501 not implemented + // * 505 http version not supported + // * 511 we're behind a captive portal + err = Misc; + } else { + // Don't bother retrying on certain cURL errors either + switch (code) { + case CURLE_FAILED_INIT: + case CURLE_URL_MALFORMAT: + case CURLE_NOT_BUILT_IN: + case CURLE_REMOTE_ACCESS_DENIED: + case CURLE_FILE_COULDNT_READ_FILE: + case CURLE_FUNCTION_NOT_FOUND: + case CURLE_ABORTED_BY_CALLBACK: + case CURLE_BAD_FUNCTION_ARGUMENT: + case CURLE_INTERFACE_FAILED: + case CURLE_UNKNOWN_OPTION: + case CURLE_SSL_CACERT_BADFILE: + case CURLE_TOO_MANY_REDIRECTS: + case CURLE_WRITE_ERROR: + case CURLE_UNSUPPORTED_PROTOCOL: + err = Misc; + break; + default: // Shut up warnings + break; + } + } + + attempt++; + + auto exc = + code == CURLE_ABORTED_BY_CALLBACK && _isInterrupted + ? DownloadError(Interrupted, fmt("%s of '%s' was interrupted", request.verb(), request.uri)) + : httpStatus != 0 + ? DownloadError(err, + fmt("unable to %s '%s': HTTP error %d", + request.verb(), request.uri, httpStatus) + + (code == CURLE_OK ? "" : fmt(" (curl error: %s)", curl_easy_strerror(code))) + ) + : DownloadError(err, + fmt("unable to %s '%s': %s (%d)", + request.verb(), request.uri, curl_easy_strerror(code), code)); + + /* If this is a transient error, then maybe retry the + download after a while. If we're writing to a + sink, we can only retry if the server supports + ranged requests. */ + if (err == Transient + && attempt < request.tries + && (!this->request.dataCallback + || writtenToSink == 0 + || (acceptRanges && encoding.empty()))) + { + int ms = request.baseRetryTimeMs * std::pow(2.0f, attempt - 1 + std::uniform_real_distribution<>(0.0, 0.5)(downloader.mt19937)); + if (writtenToSink) + warn("%s; retrying from offset %d in %d ms", exc.what(), writtenToSink, ms); + else + warn("%s; retrying in %d ms", exc.what(), ms); + embargo = std::chrono::steady_clock::now() + std::chrono::milliseconds(ms); + downloader.enqueueItem(shared_from_this()); + } + else + fail(exc); + } + } + }; + + struct State + { + struct EmbargoComparator { + bool operator() (const std::shared_ptr & i1, const std::shared_ptr & i2) { + return i1->embargo > i2->embargo; + } + }; + bool quit = false; + std::priority_queue, std::vector>, EmbargoComparator> incoming; + }; + + Sync state_; + + /* We can't use a std::condition_variable to wake up the curl + thread, because it only monitors file descriptors. So use a + pipe instead. */ + Pipe wakeupPipe; + + std::thread workerThread; + + CurlDownloader() + : mt19937(rd()) + { + static std::once_flag globalInit; + std::call_once(globalInit, curl_global_init, CURL_GLOBAL_ALL); + + curlm = curl_multi_init(); + + #if LIBCURL_VERSION_NUM >= 0x072b00 // Multiplex requires >= 7.43.0 + curl_multi_setopt(curlm, CURLMOPT_PIPELINING, CURLPIPE_MULTIPLEX); + #endif + #if LIBCURL_VERSION_NUM >= 0x071e00 // Max connections requires >= 7.30.0 + curl_multi_setopt(curlm, CURLMOPT_MAX_TOTAL_CONNECTIONS, + downloadSettings.httpConnections.get()); + #endif + + wakeupPipe.create(); + fcntl(wakeupPipe.readSide.get(), F_SETFL, O_NONBLOCK); + + workerThread = std::thread([&]() { workerThreadEntry(); }); + } + + ~CurlDownloader() + { + stopWorkerThread(); + + workerThread.join(); + + if (curlm) curl_multi_cleanup(curlm); + } + + void stopWorkerThread() + { + /* Signal the worker thread to exit. */ + { + auto state(state_.lock()); + state->quit = true; + } + writeFull(wakeupPipe.writeSide.get(), " ", false); + } + + void workerThreadMain() + { + /* Cause this thread to be notified on SIGINT. */ + auto callback = createInterruptCallback([&]() { + stopWorkerThread(); + }); + + std::map> items; + + bool quit = false; + + std::chrono::steady_clock::time_point nextWakeup; + + while (!quit) { + checkInterrupt(); + + /* Let curl do its thing. */ + int running; + CURLMcode mc = curl_multi_perform(curlm, &running); + if (mc != CURLM_OK) + throw nix::Error(format("unexpected error from curl_multi_perform(): %s") % curl_multi_strerror(mc)); + + /* Set the promises of any finished requests. */ + CURLMsg * msg; + int left; + while ((msg = curl_multi_info_read(curlm, &left))) { + if (msg->msg == CURLMSG_DONE) { + auto i = items.find(msg->easy_handle); + assert(i != items.end()); + i->second->finish(msg->data.result); + curl_multi_remove_handle(curlm, i->second->req); + i->second->active = false; + items.erase(i); + } + } + + /* Wait for activity, including wakeup events. */ + int numfds = 0; + struct curl_waitfd extraFDs[1]; + extraFDs[0].fd = wakeupPipe.readSide.get(); + extraFDs[0].events = CURL_WAIT_POLLIN; + extraFDs[0].revents = 0; + long maxSleepTimeMs = items.empty() ? 10000 : 100; + auto sleepTimeMs = + nextWakeup != std::chrono::steady_clock::time_point() + ? std::max(0, (int) std::chrono::duration_cast(nextWakeup - std::chrono::steady_clock::now()).count()) + : maxSleepTimeMs; + vomit("download thread waiting for %d ms", sleepTimeMs); + mc = curl_multi_wait(curlm, extraFDs, 1, sleepTimeMs, &numfds); + if (mc != CURLM_OK) + throw nix::Error(format("unexpected error from curl_multi_wait(): %s") % curl_multi_strerror(mc)); + + nextWakeup = std::chrono::steady_clock::time_point(); + + /* Add new curl requests from the incoming requests queue, + except for requests that are embargoed (waiting for a + retry timeout to expire). */ + if (extraFDs[0].revents & CURL_WAIT_POLLIN) { + char buf[1024]; + auto res = read(extraFDs[0].fd, buf, sizeof(buf)); + if (res == -1 && errno != EINTR) + throw SysError("reading curl wakeup socket"); + } + + std::vector> incoming; + auto now = std::chrono::steady_clock::now(); + + { + auto state(state_.lock()); + while (!state->incoming.empty()) { + auto item = state->incoming.top(); + if (item->embargo <= now) { + incoming.push_back(item); + state->incoming.pop(); + } else { + if (nextWakeup == std::chrono::steady_clock::time_point() + || item->embargo < nextWakeup) + nextWakeup = item->embargo; + break; + } + } + quit = state->quit; + } + + for (auto & item : incoming) { + debug("starting %s of %s", item->request.verb(), item->request.uri); + item->init(); + curl_multi_add_handle(curlm, item->req); + item->active = true; + items[item->req] = item; + } + } + + debug("download thread shutting down"); + } + + void workerThreadEntry() + { + try { + workerThreadMain(); + } catch (nix::Interrupted & e) { + } catch (std::exception & e) { + printError("unexpected error in download thread: %s", e.what()); + } + + { + auto state(state_.lock()); + while (!state->incoming.empty()) state->incoming.pop(); + state->quit = true; + } + } + + void enqueueItem(std::shared_ptr item) + { + if (item->request.data + && !hasPrefix(item->request.uri, "http://") + && !hasPrefix(item->request.uri, "https://")) + throw nix::Error("uploading to '%s' is not supported", item->request.uri); + + { + auto state(state_.lock()); + if (state->quit) + throw nix::Error("cannot enqueue download request because the download thread is shutting down"); + state->incoming.push(item); + } + writeFull(wakeupPipe.writeSide.get(), " "); + } + +#ifdef ENABLE_S3 + std::tuple parseS3Uri(std::string uri) + { + auto [path, params] = splitUriAndParams(uri); + + auto slash = path.find('/', 5); // 5 is the length of "s3://" prefix + if (slash == std::string::npos) + throw nix::Error("bad S3 URI '%s'", path); + + std::string bucketName(path, 5, slash - 5); + std::string key(path, slash + 1); + + return {bucketName, key, params}; + } +#endif + + void enqueueDownload(const DataTransferRequest & request, + Callback callback) override + { + /* Ugly hack to support s3:// URIs. */ + if (hasPrefix(request.uri, "s3://")) { + // FIXME: do this on a worker thread + try { +#ifdef ENABLE_S3 + auto [bucketName, key, params] = parseS3Uri(request.uri); + + std::string profile = get(params, "profile").value_or(""); + std::string region = get(params, "region").value_or(Aws::Region::US_EAST_1); + std::string scheme = get(params, "scheme").value_or(""); + std::string endpoint = get(params, "endpoint").value_or(""); + + S3Helper s3Helper(profile, region, scheme, endpoint); + + // FIXME: implement ETag + auto s3Res = s3Helper.getObject(bucketName, key); + DownloadResult res; + if (!s3Res.data) + throw DownloadError(NotFound, fmt("S3 object '%s' does not exist", request.uri)); + res.data = s3Res.data; + callback(std::move(res)); +#else + throw nix::Error("cannot download '%s' because Nix is not built with S3 support", request.uri); +#endif + } catch (...) { callback.rethrow(); } + return; + } + + enqueueItem(std::make_shared(*this, request, std::move(callback))); + } +}; + +ref getDownloader() +{ + static ref downloader = makeDownloader(); + return downloader; +} + +ref makeDownloader() +{ + return make_ref(); +} + +std::future Downloader::enqueueDownload(const DataTransferRequest & request) +{ + auto promise = std::make_shared>(); + enqueueDownload(request, + {[promise](std::future fut) { + try { + promise->set_value(fut.get()); + } catch (...) { + promise->set_exception(std::current_exception()); + } + }}); + return promise->get_future(); +} + +DownloadResult Downloader::download(const DataTransferRequest & request) +{ + return enqueueDownload(request).get(); +} + +void Downloader::download(DataTransferRequest && request, Sink & sink) +{ + /* Note: we can't call 'sink' via request.dataCallback, because + that would cause the sink to execute on the downloader + thread. If 'sink' is a coroutine, this will fail. Also, if the + sink is expensive (e.g. one that does decompression and writing + to the Nix store), it would stall the download thread too much. + Therefore we use a buffer to communicate data between the + download thread and the calling thread. */ + + struct State { + bool quit = false; + std::exception_ptr exc; + std::string data; + std::condition_variable avail, request; + }; + + auto _state = std::make_shared>(); + + /* In case of an exception, wake up the download thread. FIXME: + abort the download request. */ + Finally finally([&]() { + auto state(_state->lock()); + state->quit = true; + state->request.notify_one(); + }); + + request.dataCallback = [_state](char * buf, size_t len) { + + auto state(_state->lock()); + + if (state->quit) return; + + /* If the buffer is full, then go to sleep until the calling + thread wakes us up (i.e. when it has removed data from the + buffer). We don't wait forever to prevent stalling the + download thread. (Hopefully sleeping will throttle the + sender.) */ + if (state->data.size() > 1024 * 1024) { + debug("download buffer is full; going to sleep"); + state.wait_for(state->request, std::chrono::seconds(10)); + } + + /* Append data to the buffer and wake up the calling + thread. */ + state->data.append(buf, len); + state->avail.notify_one(); + }; + + enqueueDownload(request, + {[_state](std::future fut) { + auto state(_state->lock()); + state->quit = true; + try { + fut.get(); + } catch (...) { + state->exc = std::current_exception(); + } + state->avail.notify_one(); + state->request.notify_one(); + }}); + + while (true) { + checkInterrupt(); + + std::string chunk; + + /* Grab data if available, otherwise wait for the download + thread to wake us up. */ + { + auto state(_state->lock()); + + while (state->data.empty()) { + + if (state->quit) { + if (state->exc) std::rethrow_exception(state->exc); + return; + } + + state.wait(state->avail); + } + + chunk = std::move(state->data); + + state->request.notify_one(); + } + + /* Flush the data to the sink and wake up the download thread + if it's blocked on a full buffer. We don't hold the state + lock while doing this to prevent blocking the download + thread if sink() takes a long time. */ + sink((unsigned char *) chunk.data(), chunk.size()); + } +} + +bool isUri(const string & s) +{ + if (s.compare(0, 8, "channel:") == 0) return true; + size_t pos = s.find("://"); + if (pos == string::npos) return false; + string scheme(s, 0, pos); + return scheme == "http" || scheme == "https" || scheme == "file" || scheme == "channel" || scheme == "git" || scheme == "s3" || scheme == "ssh"; +} + + +} diff --git a/src/libstore/datatransfer.hh b/src/libstore/datatransfer.hh new file mode 100644 index 000000000..89d3fce7f --- /dev/null +++ b/src/libstore/datatransfer.hh @@ -0,0 +1,113 @@ +#pragma once + +#include "types.hh" +#include "hash.hh" +#include "config.hh" + +#include +#include + +namespace nix { + +struct DownloadSettings : Config +{ + Setting enableHttp2{this, true, "http2", + "Whether to enable HTTP/2 support."}; + + Setting userAgentSuffix{this, "", "user-agent-suffix", + "String appended to the user agent in HTTP requests."}; + + Setting httpConnections{this, 25, "http-connections", + "Number of parallel HTTP connections.", + {"binary-caches-parallel-connections"}}; + + Setting connectTimeout{this, 0, "connect-timeout", + "Timeout for connecting to servers during downloads. 0 means use curl's builtin default."}; + + Setting stalledDownloadTimeout{this, 300, "stalled-download-timeout", + "Timeout (in seconds) for receiving data from servers during download. Nix cancels idle downloads after this timeout's duration."}; + + Setting tries{this, 5, "download-attempts", + "How often Nix will attempt to download a file before giving up."}; +}; + +extern DownloadSettings downloadSettings; + +struct DataTransferRequest +{ + std::string uri; + std::string expectedETag; + bool verifyTLS = true; + bool head = false; + size_t tries = downloadSettings.tries; + unsigned int baseRetryTimeMs = 250; + ActivityId parentAct; + bool decompress = true; + std::shared_ptr data; + std::string mimeType; + std::function dataCallback; + + DataTransferRequest(const std::string & uri) + : uri(uri), parentAct(getCurActivity()) { } + + std::string verb() + { + return data ? "upload" : "download"; + } +}; + +struct DownloadResult +{ + bool cached = false; + std::string etag; + std::string effectiveUri; + std::shared_ptr data; + uint64_t bodySize = 0; +}; + +class Store; + +struct Downloader +{ + virtual ~Downloader() { } + + /* Enqueue a download request, returning a future to the result of + the download. The future may throw a DownloadError + exception. */ + virtual void enqueueDownload(const DataTransferRequest & request, + Callback callback) = 0; + + std::future enqueueDownload(const DataTransferRequest & request); + + /* Synchronously download a file. */ + DownloadResult download(const DataTransferRequest & request); + + /* Download a file, writing its data to a sink. The sink will be + invoked on the thread of the caller. */ + void download(DataTransferRequest && request, Sink & sink); + + enum Error { NotFound, Forbidden, Misc, Transient, Interrupted }; +}; + +/* Return a shared Downloader object. Using this object is preferred + because it enables connection reuse and HTTP/2 multiplexing. */ +ref getDownloader(); + +/* Return a new Downloader object. */ +ref makeDownloader(); + +class DownloadError : public Error +{ +public: + Downloader::Error error; + DownloadError(Downloader::Error error, const FormatOrString & fs) + : Error(fs), error(error) + { } +}; + +bool isUri(const string & s); + +/* Resolve deprecated 'channel:' URLs. */ +std::string resolveUri(const std::string & uri); + +} diff --git a/src/libstore/download.cc b/src/libstore/download.cc deleted file mode 100644 index 77f3c527a..000000000 --- a/src/libstore/download.cc +++ /dev/null @@ -1,814 +0,0 @@ -#include "download.hh" -#include "util.hh" -#include "globals.hh" -#include "store-api.hh" -#include "s3.hh" -#include "compression.hh" -#include "finally.hh" - -#ifdef ENABLE_S3 -#include -#endif - -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -using namespace std::string_literals; - -namespace nix { - -DownloadSettings downloadSettings; - -static GlobalConfig::Register r1(&downloadSettings); - -std::string resolveUri(const std::string & uri) -{ - if (uri.compare(0, 8, "channel:") == 0) - return "https://nixos.org/channels/" + std::string(uri, 8) + "/nixexprs.tar.xz"; - else - return uri; -} - -struct CurlDownloader : public Downloader -{ - CURLM * curlm = 0; - - std::random_device rd; - std::mt19937 mt19937; - - struct DownloadItem : public std::enable_shared_from_this - { - CurlDownloader & downloader; - DataTransferRequest request; - DownloadResult result; - Activity act; - bool done = false; // whether either the success or failure function has been called - Callback callback; - CURL * req = 0; - bool active = false; // whether the handle has been added to the multi object - std::string status; - - unsigned int attempt = 0; - - /* Don't start this download until the specified time point - has been reached. */ - std::chrono::steady_clock::time_point embargo; - - struct curl_slist * requestHeaders = 0; - - std::string encoding; - - bool acceptRanges = false; - - curl_off_t writtenToSink = 0; - - DownloadItem(CurlDownloader & downloader, - const DataTransferRequest & request, - Callback && callback) - : downloader(downloader) - , request(request) - , act(*logger, lvlTalkative, actDownload, - fmt(request.data ? "uploading '%s'" : "downloading '%s'", request.uri), - {request.uri}, request.parentAct) - , callback(std::move(callback)) - , finalSink([this](const unsigned char * data, size_t len) { - if (this->request.dataCallback) { - long httpStatus = 0; - curl_easy_getinfo(req, CURLINFO_RESPONSE_CODE, &httpStatus); - - /* Only write data to the sink if this is a - successful response. */ - if (httpStatus == 0 || httpStatus == 200 || httpStatus == 201 || httpStatus == 206) { - writtenToSink += len; - this->request.dataCallback((char *) data, len); - } - } else - this->result.data->append((char *) data, len); - }) - { - if (!request.expectedETag.empty()) - requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + request.expectedETag).c_str()); - if (!request.mimeType.empty()) - requestHeaders = curl_slist_append(requestHeaders, ("Content-Type: " + request.mimeType).c_str()); - } - - ~DownloadItem() - { - if (req) { - if (active) - curl_multi_remove_handle(downloader.curlm, req); - curl_easy_cleanup(req); - } - if (requestHeaders) curl_slist_free_all(requestHeaders); - try { - if (!done) - fail(DownloadError(Interrupted, format("download of '%s' was interrupted") % request.uri)); - } catch (...) { - ignoreException(); - } - } - - void failEx(std::exception_ptr ex) - { - assert(!done); - done = true; - callback.rethrow(ex); - } - - template - void fail(const T & e) - { - failEx(std::make_exception_ptr(e)); - } - - LambdaSink finalSink; - std::shared_ptr decompressionSink; - - std::exception_ptr writeException; - - size_t writeCallback(void * contents, size_t size, size_t nmemb) - { - try { - size_t realSize = size * nmemb; - result.bodySize += realSize; - - if (!decompressionSink) - decompressionSink = makeDecompressionSink(encoding, finalSink); - - (*decompressionSink)((unsigned char *) contents, realSize); - - return realSize; - } catch (...) { - writeException = std::current_exception(); - return 0; - } - } - - static size_t writeCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp) - { - return ((DownloadItem *) userp)->writeCallback(contents, size, nmemb); - } - - size_t headerCallback(void * contents, size_t size, size_t nmemb) - { - size_t realSize = size * nmemb; - std::string line((char *) contents, realSize); - printMsg(lvlVomit, format("got header for '%s': %s") % request.uri % trim(line)); - if (line.compare(0, 5, "HTTP/") == 0) { // new response starts - result.etag = ""; - auto ss = tokenizeString>(line, " "); - status = ss.size() >= 2 ? ss[1] : ""; - result.data = std::make_shared(); - result.bodySize = 0; - acceptRanges = false; - encoding = ""; - } else { - auto i = line.find(':'); - if (i != string::npos) { - string name = toLower(trim(string(line, 0, i))); - if (name == "etag") { - result.etag = trim(string(line, i + 1)); - /* Hack to work around a GitHub bug: it sends - ETags, but ignores If-None-Match. So if we get - the expected ETag on a 200 response, then shut - down the connection because we already have the - data. */ - if (result.etag == request.expectedETag && status == "200") { - debug(format("shutting down on 200 HTTP response with expected ETag")); - return 0; - } - } else if (name == "content-encoding") - encoding = trim(string(line, i + 1)); - else if (name == "accept-ranges" && toLower(trim(std::string(line, i + 1))) == "bytes") - acceptRanges = true; - } - } - return realSize; - } - - static size_t headerCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp) - { - return ((DownloadItem *) userp)->headerCallback(contents, size, nmemb); - } - - int progressCallback(double dltotal, double dlnow) - { - try { - act.progress(dlnow, dltotal); - } catch (nix::Interrupted &) { - assert(_isInterrupted); - } - return _isInterrupted; - } - - static int progressCallbackWrapper(void * userp, double dltotal, double dlnow, double ultotal, double ulnow) - { - return ((DownloadItem *) userp)->progressCallback(dltotal, dlnow); - } - - static int debugCallback(CURL * handle, curl_infotype type, char * data, size_t size, void * userptr) - { - if (type == CURLINFO_TEXT) - vomit("curl: %s", chomp(std::string(data, size))); - return 0; - } - - size_t readOffset = 0; - size_t readCallback(char *buffer, size_t size, size_t nitems) - { - if (readOffset == request.data->length()) - return 0; - auto count = std::min(size * nitems, request.data->length() - readOffset); - assert(count); - memcpy(buffer, request.data->data() + readOffset, count); - readOffset += count; - return count; - } - - static size_t readCallbackWrapper(char *buffer, size_t size, size_t nitems, void * userp) - { - return ((DownloadItem *) userp)->readCallback(buffer, size, nitems); - } - - void init() - { - if (!req) req = curl_easy_init(); - - curl_easy_reset(req); - - if (verbosity >= lvlVomit) { - curl_easy_setopt(req, CURLOPT_VERBOSE, 1); - curl_easy_setopt(req, CURLOPT_DEBUGFUNCTION, DownloadItem::debugCallback); - } - - curl_easy_setopt(req, CURLOPT_URL, request.uri.c_str()); - curl_easy_setopt(req, CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt(req, CURLOPT_MAXREDIRS, 10); - curl_easy_setopt(req, CURLOPT_NOSIGNAL, 1); - curl_easy_setopt(req, CURLOPT_USERAGENT, - ("curl/" LIBCURL_VERSION " Nix/" + nixVersion + - (downloadSettings.userAgentSuffix != "" ? " " + downloadSettings.userAgentSuffix.get() : "")).c_str()); - #if LIBCURL_VERSION_NUM >= 0x072b00 - curl_easy_setopt(req, CURLOPT_PIPEWAIT, 1); - #endif - #if LIBCURL_VERSION_NUM >= 0x072f00 - if (downloadSettings.enableHttp2) - curl_easy_setopt(req, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2TLS); - else - curl_easy_setopt(req, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); - #endif - curl_easy_setopt(req, CURLOPT_WRITEFUNCTION, DownloadItem::writeCallbackWrapper); - curl_easy_setopt(req, CURLOPT_WRITEDATA, this); - curl_easy_setopt(req, CURLOPT_HEADERFUNCTION, DownloadItem::headerCallbackWrapper); - curl_easy_setopt(req, CURLOPT_HEADERDATA, this); - - curl_easy_setopt(req, CURLOPT_PROGRESSFUNCTION, progressCallbackWrapper); - curl_easy_setopt(req, CURLOPT_PROGRESSDATA, this); - curl_easy_setopt(req, CURLOPT_NOPROGRESS, 0); - - curl_easy_setopt(req, CURLOPT_HTTPHEADER, requestHeaders); - - if (request.head) - curl_easy_setopt(req, CURLOPT_NOBODY, 1); - - if (request.data) { - curl_easy_setopt(req, CURLOPT_UPLOAD, 1L); - curl_easy_setopt(req, CURLOPT_READFUNCTION, readCallbackWrapper); - curl_easy_setopt(req, CURLOPT_READDATA, this); - curl_easy_setopt(req, CURLOPT_INFILESIZE_LARGE, (curl_off_t) request.data->length()); - } - - if (request.verifyTLS) { - debug("verify TLS: Nix CA file = '%s'", settings.caFile); - if (settings.caFile != "") - curl_easy_setopt(req, CURLOPT_CAINFO, settings.caFile.c_str()); - } else { - curl_easy_setopt(req, CURLOPT_SSL_VERIFYPEER, 0); - curl_easy_setopt(req, CURLOPT_SSL_VERIFYHOST, 0); - } - - curl_easy_setopt(req, CURLOPT_CONNECTTIMEOUT, downloadSettings.connectTimeout.get()); - - curl_easy_setopt(req, CURLOPT_LOW_SPEED_LIMIT, 1L); - curl_easy_setopt(req, CURLOPT_LOW_SPEED_TIME, downloadSettings.stalledDownloadTimeout.get()); - - /* If no file exist in the specified path, curl continues to work - anyway as if netrc support was disabled. */ - curl_easy_setopt(req, CURLOPT_NETRC_FILE, settings.netrcFile.get().c_str()); - curl_easy_setopt(req, CURLOPT_NETRC, CURL_NETRC_OPTIONAL); - - if (writtenToSink) - curl_easy_setopt(req, CURLOPT_RESUME_FROM_LARGE, writtenToSink); - - result.data = std::make_shared(); - result.bodySize = 0; - } - - void finish(CURLcode code) - { - long httpStatus = 0; - curl_easy_getinfo(req, CURLINFO_RESPONSE_CODE, &httpStatus); - - char * effectiveUriCStr; - curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr); - if (effectiveUriCStr) - result.effectiveUri = effectiveUriCStr; - - debug("finished %s of '%s'; curl status = %d, HTTP status = %d, body = %d bytes", - request.verb(), request.uri, code, httpStatus, result.bodySize); - - if (decompressionSink) { - try { - decompressionSink->finish(); - } catch (...) { - writeException = std::current_exception(); - } - } - - if (code == CURLE_WRITE_ERROR && result.etag == request.expectedETag) { - code = CURLE_OK; - httpStatus = 304; - } - - if (writeException) - failEx(writeException); - - else if (code == CURLE_OK && - (httpStatus == 200 || httpStatus == 201 || httpStatus == 204 || httpStatus == 206 || httpStatus == 304 || httpStatus == 226 /* FTP */ || httpStatus == 0 /* other protocol */)) - { - result.cached = httpStatus == 304; - act.progress(result.bodySize, result.bodySize); - done = true; - callback(std::move(result)); - } - - else { - // We treat most errors as transient, but won't retry when hopeless - Error err = Transient; - - if (httpStatus == 404 || httpStatus == 410 || code == CURLE_FILE_COULDNT_READ_FILE) { - // The file is definitely not there - err = NotFound; - } else if (httpStatus == 401 || httpStatus == 403 || httpStatus == 407) { - // Don't retry on authentication/authorization failures - err = Forbidden; - } else if (httpStatus >= 400 && httpStatus < 500 && httpStatus != 408 && httpStatus != 429) { - // Most 4xx errors are client errors and are probably not worth retrying: - // * 408 means the server timed out waiting for us, so we try again - // * 429 means too many requests, so we retry (with a delay) - err = Misc; - } else if (httpStatus == 501 || httpStatus == 505 || httpStatus == 511) { - // Let's treat most 5xx (server) errors as transient, except for a handful: - // * 501 not implemented - // * 505 http version not supported - // * 511 we're behind a captive portal - err = Misc; - } else { - // Don't bother retrying on certain cURL errors either - switch (code) { - case CURLE_FAILED_INIT: - case CURLE_URL_MALFORMAT: - case CURLE_NOT_BUILT_IN: - case CURLE_REMOTE_ACCESS_DENIED: - case CURLE_FILE_COULDNT_READ_FILE: - case CURLE_FUNCTION_NOT_FOUND: - case CURLE_ABORTED_BY_CALLBACK: - case CURLE_BAD_FUNCTION_ARGUMENT: - case CURLE_INTERFACE_FAILED: - case CURLE_UNKNOWN_OPTION: - case CURLE_SSL_CACERT_BADFILE: - case CURLE_TOO_MANY_REDIRECTS: - case CURLE_WRITE_ERROR: - case CURLE_UNSUPPORTED_PROTOCOL: - err = Misc; - break; - default: // Shut up warnings - break; - } - } - - attempt++; - - auto exc = - code == CURLE_ABORTED_BY_CALLBACK && _isInterrupted - ? DownloadError(Interrupted, fmt("%s of '%s' was interrupted", request.verb(), request.uri)) - : httpStatus != 0 - ? DownloadError(err, - fmt("unable to %s '%s': HTTP error %d", - request.verb(), request.uri, httpStatus) - + (code == CURLE_OK ? "" : fmt(" (curl error: %s)", curl_easy_strerror(code))) - ) - : DownloadError(err, - fmt("unable to %s '%s': %s (%d)", - request.verb(), request.uri, curl_easy_strerror(code), code)); - - /* If this is a transient error, then maybe retry the - download after a while. If we're writing to a - sink, we can only retry if the server supports - ranged requests. */ - if (err == Transient - && attempt < request.tries - && (!this->request.dataCallback - || writtenToSink == 0 - || (acceptRanges && encoding.empty()))) - { - int ms = request.baseRetryTimeMs * std::pow(2.0f, attempt - 1 + std::uniform_real_distribution<>(0.0, 0.5)(downloader.mt19937)); - if (writtenToSink) - warn("%s; retrying from offset %d in %d ms", exc.what(), writtenToSink, ms); - else - warn("%s; retrying in %d ms", exc.what(), ms); - embargo = std::chrono::steady_clock::now() + std::chrono::milliseconds(ms); - downloader.enqueueItem(shared_from_this()); - } - else - fail(exc); - } - } - }; - - struct State - { - struct EmbargoComparator { - bool operator() (const std::shared_ptr & i1, const std::shared_ptr & i2) { - return i1->embargo > i2->embargo; - } - }; - bool quit = false; - std::priority_queue, std::vector>, EmbargoComparator> incoming; - }; - - Sync state_; - - /* We can't use a std::condition_variable to wake up the curl - thread, because it only monitors file descriptors. So use a - pipe instead. */ - Pipe wakeupPipe; - - std::thread workerThread; - - CurlDownloader() - : mt19937(rd()) - { - static std::once_flag globalInit; - std::call_once(globalInit, curl_global_init, CURL_GLOBAL_ALL); - - curlm = curl_multi_init(); - - #if LIBCURL_VERSION_NUM >= 0x072b00 // Multiplex requires >= 7.43.0 - curl_multi_setopt(curlm, CURLMOPT_PIPELINING, CURLPIPE_MULTIPLEX); - #endif - #if LIBCURL_VERSION_NUM >= 0x071e00 // Max connections requires >= 7.30.0 - curl_multi_setopt(curlm, CURLMOPT_MAX_TOTAL_CONNECTIONS, - downloadSettings.httpConnections.get()); - #endif - - wakeupPipe.create(); - fcntl(wakeupPipe.readSide.get(), F_SETFL, O_NONBLOCK); - - workerThread = std::thread([&]() { workerThreadEntry(); }); - } - - ~CurlDownloader() - { - stopWorkerThread(); - - workerThread.join(); - - if (curlm) curl_multi_cleanup(curlm); - } - - void stopWorkerThread() - { - /* Signal the worker thread to exit. */ - { - auto state(state_.lock()); - state->quit = true; - } - writeFull(wakeupPipe.writeSide.get(), " ", false); - } - - void workerThreadMain() - { - /* Cause this thread to be notified on SIGINT. */ - auto callback = createInterruptCallback([&]() { - stopWorkerThread(); - }); - - std::map> items; - - bool quit = false; - - std::chrono::steady_clock::time_point nextWakeup; - - while (!quit) { - checkInterrupt(); - - /* Let curl do its thing. */ - int running; - CURLMcode mc = curl_multi_perform(curlm, &running); - if (mc != CURLM_OK) - throw nix::Error(format("unexpected error from curl_multi_perform(): %s") % curl_multi_strerror(mc)); - - /* Set the promises of any finished requests. */ - CURLMsg * msg; - int left; - while ((msg = curl_multi_info_read(curlm, &left))) { - if (msg->msg == CURLMSG_DONE) { - auto i = items.find(msg->easy_handle); - assert(i != items.end()); - i->second->finish(msg->data.result); - curl_multi_remove_handle(curlm, i->second->req); - i->second->active = false; - items.erase(i); - } - } - - /* Wait for activity, including wakeup events. */ - int numfds = 0; - struct curl_waitfd extraFDs[1]; - extraFDs[0].fd = wakeupPipe.readSide.get(); - extraFDs[0].events = CURL_WAIT_POLLIN; - extraFDs[0].revents = 0; - long maxSleepTimeMs = items.empty() ? 10000 : 100; - auto sleepTimeMs = - nextWakeup != std::chrono::steady_clock::time_point() - ? std::max(0, (int) std::chrono::duration_cast(nextWakeup - std::chrono::steady_clock::now()).count()) - : maxSleepTimeMs; - vomit("download thread waiting for %d ms", sleepTimeMs); - mc = curl_multi_wait(curlm, extraFDs, 1, sleepTimeMs, &numfds); - if (mc != CURLM_OK) - throw nix::Error(format("unexpected error from curl_multi_wait(): %s") % curl_multi_strerror(mc)); - - nextWakeup = std::chrono::steady_clock::time_point(); - - /* Add new curl requests from the incoming requests queue, - except for requests that are embargoed (waiting for a - retry timeout to expire). */ - if (extraFDs[0].revents & CURL_WAIT_POLLIN) { - char buf[1024]; - auto res = read(extraFDs[0].fd, buf, sizeof(buf)); - if (res == -1 && errno != EINTR) - throw SysError("reading curl wakeup socket"); - } - - std::vector> incoming; - auto now = std::chrono::steady_clock::now(); - - { - auto state(state_.lock()); - while (!state->incoming.empty()) { - auto item = state->incoming.top(); - if (item->embargo <= now) { - incoming.push_back(item); - state->incoming.pop(); - } else { - if (nextWakeup == std::chrono::steady_clock::time_point() - || item->embargo < nextWakeup) - nextWakeup = item->embargo; - break; - } - } - quit = state->quit; - } - - for (auto & item : incoming) { - debug("starting %s of %s", item->request.verb(), item->request.uri); - item->init(); - curl_multi_add_handle(curlm, item->req); - item->active = true; - items[item->req] = item; - } - } - - debug("download thread shutting down"); - } - - void workerThreadEntry() - { - try { - workerThreadMain(); - } catch (nix::Interrupted & e) { - } catch (std::exception & e) { - printError("unexpected error in download thread: %s", e.what()); - } - - { - auto state(state_.lock()); - while (!state->incoming.empty()) state->incoming.pop(); - state->quit = true; - } - } - - void enqueueItem(std::shared_ptr item) - { - if (item->request.data - && !hasPrefix(item->request.uri, "http://") - && !hasPrefix(item->request.uri, "https://")) - throw nix::Error("uploading to '%s' is not supported", item->request.uri); - - { - auto state(state_.lock()); - if (state->quit) - throw nix::Error("cannot enqueue download request because the download thread is shutting down"); - state->incoming.push(item); - } - writeFull(wakeupPipe.writeSide.get(), " "); - } - -#ifdef ENABLE_S3 - std::tuple parseS3Uri(std::string uri) - { - auto [path, params] = splitUriAndParams(uri); - - auto slash = path.find('/', 5); // 5 is the length of "s3://" prefix - if (slash == std::string::npos) - throw nix::Error("bad S3 URI '%s'", path); - - std::string bucketName(path, 5, slash - 5); - std::string key(path, slash + 1); - - return {bucketName, key, params}; - } -#endif - - void enqueueDownload(const DataTransferRequest & request, - Callback callback) override - { - /* Ugly hack to support s3:// URIs. */ - if (hasPrefix(request.uri, "s3://")) { - // FIXME: do this on a worker thread - try { -#ifdef ENABLE_S3 - auto [bucketName, key, params] = parseS3Uri(request.uri); - - std::string profile = get(params, "profile").value_or(""); - std::string region = get(params, "region").value_or(Aws::Region::US_EAST_1); - std::string scheme = get(params, "scheme").value_or(""); - std::string endpoint = get(params, "endpoint").value_or(""); - - S3Helper s3Helper(profile, region, scheme, endpoint); - - // FIXME: implement ETag - auto s3Res = s3Helper.getObject(bucketName, key); - DownloadResult res; - if (!s3Res.data) - throw DownloadError(NotFound, fmt("S3 object '%s' does not exist", request.uri)); - res.data = s3Res.data; - callback(std::move(res)); -#else - throw nix::Error("cannot download '%s' because Nix is not built with S3 support", request.uri); -#endif - } catch (...) { callback.rethrow(); } - return; - } - - enqueueItem(std::make_shared(*this, request, std::move(callback))); - } -}; - -ref getDownloader() -{ - static ref downloader = makeDownloader(); - return downloader; -} - -ref makeDownloader() -{ - return make_ref(); -} - -std::future Downloader::enqueueDownload(const DataTransferRequest & request) -{ - auto promise = std::make_shared>(); - enqueueDownload(request, - {[promise](std::future fut) { - try { - promise->set_value(fut.get()); - } catch (...) { - promise->set_exception(std::current_exception()); - } - }}); - return promise->get_future(); -} - -DownloadResult Downloader::download(const DataTransferRequest & request) -{ - return enqueueDownload(request).get(); -} - -void Downloader::download(DataTransferRequest && request, Sink & sink) -{ - /* Note: we can't call 'sink' via request.dataCallback, because - that would cause the sink to execute on the downloader - thread. If 'sink' is a coroutine, this will fail. Also, if the - sink is expensive (e.g. one that does decompression and writing - to the Nix store), it would stall the download thread too much. - Therefore we use a buffer to communicate data between the - download thread and the calling thread. */ - - struct State { - bool quit = false; - std::exception_ptr exc; - std::string data; - std::condition_variable avail, request; - }; - - auto _state = std::make_shared>(); - - /* In case of an exception, wake up the download thread. FIXME: - abort the download request. */ - Finally finally([&]() { - auto state(_state->lock()); - state->quit = true; - state->request.notify_one(); - }); - - request.dataCallback = [_state](char * buf, size_t len) { - - auto state(_state->lock()); - - if (state->quit) return; - - /* If the buffer is full, then go to sleep until the calling - thread wakes us up (i.e. when it has removed data from the - buffer). We don't wait forever to prevent stalling the - download thread. (Hopefully sleeping will throttle the - sender.) */ - if (state->data.size() > 1024 * 1024) { - debug("download buffer is full; going to sleep"); - state.wait_for(state->request, std::chrono::seconds(10)); - } - - /* Append data to the buffer and wake up the calling - thread. */ - state->data.append(buf, len); - state->avail.notify_one(); - }; - - enqueueDownload(request, - {[_state](std::future fut) { - auto state(_state->lock()); - state->quit = true; - try { - fut.get(); - } catch (...) { - state->exc = std::current_exception(); - } - state->avail.notify_one(); - state->request.notify_one(); - }}); - - while (true) { - checkInterrupt(); - - std::string chunk; - - /* Grab data if available, otherwise wait for the download - thread to wake us up. */ - { - auto state(_state->lock()); - - while (state->data.empty()) { - - if (state->quit) { - if (state->exc) std::rethrow_exception(state->exc); - return; - } - - state.wait(state->avail); - } - - chunk = std::move(state->data); - - state->request.notify_one(); - } - - /* Flush the data to the sink and wake up the download thread - if it's blocked on a full buffer. We don't hold the state - lock while doing this to prevent blocking the download - thread if sink() takes a long time. */ - sink((unsigned char *) chunk.data(), chunk.size()); - } -} - -bool isUri(const string & s) -{ - if (s.compare(0, 8, "channel:") == 0) return true; - size_t pos = s.find("://"); - if (pos == string::npos) return false; - string scheme(s, 0, pos); - return scheme == "http" || scheme == "https" || scheme == "file" || scheme == "channel" || scheme == "git" || scheme == "s3" || scheme == "ssh"; -} - - -} diff --git a/src/libstore/download.hh b/src/libstore/download.hh deleted file mode 100644 index 89d3fce7f..000000000 --- a/src/libstore/download.hh +++ /dev/null @@ -1,113 +0,0 @@ -#pragma once - -#include "types.hh" -#include "hash.hh" -#include "config.hh" - -#include -#include - -namespace nix { - -struct DownloadSettings : Config -{ - Setting enableHttp2{this, true, "http2", - "Whether to enable HTTP/2 support."}; - - Setting userAgentSuffix{this, "", "user-agent-suffix", - "String appended to the user agent in HTTP requests."}; - - Setting httpConnections{this, 25, "http-connections", - "Number of parallel HTTP connections.", - {"binary-caches-parallel-connections"}}; - - Setting connectTimeout{this, 0, "connect-timeout", - "Timeout for connecting to servers during downloads. 0 means use curl's builtin default."}; - - Setting stalledDownloadTimeout{this, 300, "stalled-download-timeout", - "Timeout (in seconds) for receiving data from servers during download. Nix cancels idle downloads after this timeout's duration."}; - - Setting tries{this, 5, "download-attempts", - "How often Nix will attempt to download a file before giving up."}; -}; - -extern DownloadSettings downloadSettings; - -struct DataTransferRequest -{ - std::string uri; - std::string expectedETag; - bool verifyTLS = true; - bool head = false; - size_t tries = downloadSettings.tries; - unsigned int baseRetryTimeMs = 250; - ActivityId parentAct; - bool decompress = true; - std::shared_ptr data; - std::string mimeType; - std::function dataCallback; - - DataTransferRequest(const std::string & uri) - : uri(uri), parentAct(getCurActivity()) { } - - std::string verb() - { - return data ? "upload" : "download"; - } -}; - -struct DownloadResult -{ - bool cached = false; - std::string etag; - std::string effectiveUri; - std::shared_ptr data; - uint64_t bodySize = 0; -}; - -class Store; - -struct Downloader -{ - virtual ~Downloader() { } - - /* Enqueue a download request, returning a future to the result of - the download. The future may throw a DownloadError - exception. */ - virtual void enqueueDownload(const DataTransferRequest & request, - Callback callback) = 0; - - std::future enqueueDownload(const DataTransferRequest & request); - - /* Synchronously download a file. */ - DownloadResult download(const DataTransferRequest & request); - - /* Download a file, writing its data to a sink. The sink will be - invoked on the thread of the caller. */ - void download(DataTransferRequest && request, Sink & sink); - - enum Error { NotFound, Forbidden, Misc, Transient, Interrupted }; -}; - -/* Return a shared Downloader object. Using this object is preferred - because it enables connection reuse and HTTP/2 multiplexing. */ -ref getDownloader(); - -/* Return a new Downloader object. */ -ref makeDownloader(); - -class DownloadError : public Error -{ -public: - Downloader::Error error; - DownloadError(Downloader::Error error, const FormatOrString & fs) - : Error(fs), error(error) - { } -}; - -bool isUri(const string & s); - -/* Resolve deprecated 'channel:' URLs. */ -std::string resolveUri(const std::string & uri); - -} diff --git a/src/libstore/http-binary-cache-store.cc b/src/libstore/http-binary-cache-store.cc index e152e7524..1fccee518 100644 --- a/src/libstore/http-binary-cache-store.cc +++ b/src/libstore/http-binary-cache-store.cc @@ -1,5 +1,5 @@ #include "binary-cache-store.hh" -#include "download.hh" +#include "datatransfer.hh" #include "globals.hh" #include "nar-info-disk-cache.hh" diff --git a/src/libstore/s3-binary-cache-store.cc b/src/libstore/s3-binary-cache-store.cc index f2e4b63e0..ed3c690b6 100644 --- a/src/libstore/s3-binary-cache-store.cc +++ b/src/libstore/s3-binary-cache-store.cc @@ -6,7 +6,7 @@ #include "nar-info-disk-cache.hh" #include "globals.hh" #include "compression.hh" -#include "download.hh" +#include "datatransfer.hh" #include "istringstream_nocopy.hh" #include diff --git a/src/nix-channel/nix-channel.cc b/src/nix-channel/nix-channel.cc index 2a9defb4e..d0719194d 100755 --- a/src/nix-channel/nix-channel.cc +++ b/src/nix-channel/nix-channel.cc @@ -1,6 +1,6 @@ #include "shared.hh" #include "globals.hh" -#include "download.hh" +#include "datatransfer.hh" #include "store-api.hh" #include "../nix/legacy.hh" #include "fetchers.hh" diff --git a/src/nix-prefetch-url/nix-prefetch-url.cc b/src/nix-prefetch-url/nix-prefetch-url.cc index bfd65da98..58a180e72 100644 --- a/src/nix-prefetch-url/nix-prefetch-url.cc +++ b/src/nix-prefetch-url/nix-prefetch-url.cc @@ -1,6 +1,6 @@ #include "hash.hh" #include "shared.hh" -#include "download.hh" +#include "datatransfer.hh" #include "store-api.hh" #include "eval.hh" #include "eval-inline.hh" diff --git a/src/nix/main.cc b/src/nix/main.cc index 3b5f5516f..ad4102201 100644 --- a/src/nix/main.cc +++ b/src/nix/main.cc @@ -8,7 +8,7 @@ #include "shared.hh" #include "store-api.hh" #include "progress-bar.hh" -#include "download.hh" +#include "datatransfer.hh" #include "finally.hh" #include diff --git a/src/nix/upgrade-nix.cc b/src/nix/upgrade-nix.cc index aeaf7b09c..414689ec5 100644 --- a/src/nix/upgrade-nix.cc +++ b/src/nix/upgrade-nix.cc @@ -1,7 +1,7 @@ #include "command.hh" #include "common-args.hh" #include "store-api.hh" -#include "download.hh" +#include "datatransfer.hh" #include "eval.hh" #include "attr-path.hh" #include "names.hh" -- cgit v1.2.3