diff options
Diffstat (limited to 'src/libfetchers')
-rw-r--r-- | src/libfetchers/attrs.cc | 107 | ||||
-rw-r--r-- | src/libfetchers/attrs.hh | 39 | ||||
-rw-r--r-- | src/libfetchers/cache.cc | 121 | ||||
-rw-r--r-- | src/libfetchers/cache.hh | 34 | ||||
-rw-r--r-- | src/libfetchers/fetchers.cc | 75 | ||||
-rw-r--r-- | src/libfetchers/fetchers.hh | 103 | ||||
-rw-r--r-- | src/libfetchers/git.cc | 441 | ||||
-rw-r--r-- | src/libfetchers/github.cc | 195 | ||||
-rw-r--r-- | src/libfetchers/local.mk | 11 | ||||
-rw-r--r-- | src/libfetchers/mercurial.cc | 303 | ||||
-rw-r--r-- | src/libfetchers/path.cc | 148 | ||||
-rw-r--r-- | src/libfetchers/tarball.cc | 275 | ||||
-rw-r--r-- | src/libfetchers/tree-info.cc | 14 | ||||
-rw-r--r-- | src/libfetchers/tree-info.hh | 29 |
14 files changed, 1895 insertions, 0 deletions
diff --git a/src/libfetchers/attrs.cc b/src/libfetchers/attrs.cc new file mode 100644 index 000000000..feb0a6085 --- /dev/null +++ b/src/libfetchers/attrs.cc @@ -0,0 +1,107 @@ +#include "attrs.hh" +#include "fetchers.hh" + +#include <nlohmann/json.hpp> + +namespace nix::fetchers { + +Attrs jsonToAttrs(const nlohmann::json & json) +{ + Attrs attrs; + + for (auto & i : json.items()) { + if (i.value().is_number()) + attrs.emplace(i.key(), i.value().get<int64_t>()); + else if (i.value().is_string()) + attrs.emplace(i.key(), i.value().get<std::string>()); + else if (i.value().is_boolean()) + attrs.emplace(i.key(), i.value().get<bool>()); + else + throw Error("unsupported input attribute type in lock file"); + } + + return attrs; +} + +nlohmann::json attrsToJson(const Attrs & attrs) +{ + nlohmann::json json; + for (auto & attr : attrs) { + if (auto v = std::get_if<int64_t>(&attr.second)) { + json[attr.first] = *v; + } else if (auto v = std::get_if<std::string>(&attr.second)) { + json[attr.first] = *v; + } else if (auto v = std::get_if<Explicit<bool>>(&attr.second)) { + json[attr.first] = v->t; + } else abort(); + } + return json; +} + +std::optional<std::string> maybeGetStrAttr(const Attrs & attrs, const std::string & name) +{ + auto i = attrs.find(name); + if (i == attrs.end()) return {}; + if (auto v = std::get_if<std::string>(&i->second)) + return *v; + throw Error("input attribute '%s' is not a string %s", name, attrsToJson(attrs).dump()); +} + +std::string getStrAttr(const Attrs & attrs, const std::string & name) +{ + auto s = maybeGetStrAttr(attrs, name); + if (!s) + throw Error("input attribute '%s' is missing", name); + return *s; +} + +std::optional<int64_t> maybeGetIntAttr(const Attrs & attrs, const std::string & name) +{ + auto i = attrs.find(name); + if (i == attrs.end()) return {}; + if (auto v = std::get_if<int64_t>(&i->second)) + return *v; + throw Error("input attribute '%s' is not an integer", name); +} + +int64_t getIntAttr(const Attrs & attrs, const std::string & name) +{ + auto s = maybeGetIntAttr(attrs, name); + if (!s) + throw Error("input attribute '%s' is missing", name); + return *s; +} + +std::optional<bool> maybeGetBoolAttr(const Attrs & attrs, const std::string & name) +{ + auto i = attrs.find(name); + if (i == attrs.end()) return {}; + if (auto v = std::get_if<int64_t>(&i->second)) + return *v; + throw Error("input attribute '%s' is not a Boolean", name); +} + +bool getBoolAttr(const Attrs & attrs, const std::string & name) +{ + auto s = maybeGetBoolAttr(attrs, name); + if (!s) + throw Error("input attribute '%s' is missing", name); + return *s; +} + +std::map<std::string, std::string> attrsToQuery(const Attrs & attrs) +{ + std::map<std::string, std::string> query; + for (auto & attr : attrs) { + if (auto v = std::get_if<int64_t>(&attr.second)) { + query.insert_or_assign(attr.first, fmt("%d", *v)); + } else if (auto v = std::get_if<std::string>(&attr.second)) { + query.insert_or_assign(attr.first, *v); + } else if (auto v = std::get_if<Explicit<bool>>(&attr.second)) { + query.insert_or_assign(attr.first, v->t ? "1" : "0"); + } else abort(); + } + return query; +} + +} diff --git a/src/libfetchers/attrs.hh b/src/libfetchers/attrs.hh new file mode 100644 index 000000000..d6e0ae000 --- /dev/null +++ b/src/libfetchers/attrs.hh @@ -0,0 +1,39 @@ +#pragma once + +#include "types.hh" + +#include <variant> + +#include <nlohmann/json_fwd.hpp> + +namespace nix::fetchers { + +/* Wrap bools to prevent string literals (i.e. 'char *') from being + cast to a bool in Attr. */ +template<typename T> +struct Explicit { + T t; +}; + +typedef std::variant<std::string, int64_t, Explicit<bool>> Attr; +typedef std::map<std::string, Attr> Attrs; + +Attrs jsonToAttrs(const nlohmann::json & json); + +nlohmann::json attrsToJson(const Attrs & attrs); + +std::optional<std::string> maybeGetStrAttr(const Attrs & attrs, const std::string & name); + +std::string getStrAttr(const Attrs & attrs, const std::string & name); + +std::optional<int64_t> maybeGetIntAttr(const Attrs & attrs, const std::string & name); + +int64_t getIntAttr(const Attrs & attrs, const std::string & name); + +std::optional<bool> maybeGetBoolAttr(const Attrs & attrs, const std::string & name); + +bool getBoolAttr(const Attrs & attrs, const std::string & name); + +std::map<std::string, std::string> attrsToQuery(const Attrs & attrs); + +} diff --git a/src/libfetchers/cache.cc b/src/libfetchers/cache.cc new file mode 100644 index 000000000..e1c7f3dee --- /dev/null +++ b/src/libfetchers/cache.cc @@ -0,0 +1,121 @@ +#include "cache.hh" +#include "sqlite.hh" +#include "sync.hh" +#include "store-api.hh" + +#include <nlohmann/json.hpp> + +namespace nix::fetchers { + +static const char * schema = R"sql( + +create table if not exists Cache ( + input text not null, + info text not null, + path text not null, + immutable integer not null, + timestamp integer not null, + primary key (input) +); +)sql"; + +struct CacheImpl : Cache +{ + struct State + { + SQLite db; + SQLiteStmt add, lookup; + }; + + Sync<State> _state; + + CacheImpl() + { + auto state(_state.lock()); + + auto dbPath = getCacheDir() + "/nix/fetcher-cache-v1.sqlite"; + createDirs(dirOf(dbPath)); + + state->db = SQLite(dbPath); + state->db.isCache(); + state->db.exec(schema); + + state->add.create(state->db, + "insert or replace into Cache(input, info, path, immutable, timestamp) values (?, ?, ?, ?, ?)"); + + state->lookup.create(state->db, + "select info, path, immutable, timestamp from Cache where input = ?"); + } + + void add( + ref<Store> store, + const Attrs & inAttrs, + const Attrs & infoAttrs, + const StorePath & storePath, + bool immutable) override + { + _state.lock()->add.use() + (attrsToJson(inAttrs).dump()) + (attrsToJson(infoAttrs).dump()) + (store->printStorePath(storePath)) + (immutable) + (time(0)).exec(); + } + + std::optional<std::pair<Attrs, StorePath>> lookup( + ref<Store> store, + const Attrs & inAttrs) override + { + if (auto res = lookupExpired(store, inAttrs)) { + if (!res->expired) + return std::make_pair(std::move(res->infoAttrs), std::move(res->storePath)); + debug("ignoring expired cache entry '%s'", + attrsToJson(inAttrs).dump()); + } + return {}; + } + + std::optional<Result> lookupExpired( + ref<Store> store, + const Attrs & inAttrs) override + { + auto state(_state.lock()); + + auto inAttrsJson = attrsToJson(inAttrs).dump(); + + auto stmt(state->lookup.use()(inAttrsJson)); + if (!stmt.next()) { + debug("did not find cache entry for '%s'", inAttrsJson); + return {}; + } + + auto infoJson = stmt.getStr(0); + auto storePath = store->parseStorePath(stmt.getStr(1)); + auto immutable = stmt.getInt(2) != 0; + auto timestamp = stmt.getInt(3); + + store->addTempRoot(storePath); + if (!store->isValidPath(storePath)) { + // FIXME: we could try to substitute 'storePath'. + debug("ignoring disappeared cache entry '%s'", inAttrsJson); + return {}; + } + + debug("using cache entry '%s' -> '%s', '%s'", + inAttrsJson, infoJson, store->printStorePath(storePath)); + + return Result { + .expired = !immutable && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)), + .infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJson)), + .storePath = std::move(storePath) + }; + } +}; + +ref<Cache> getCache() +{ + static auto cache = std::make_shared<CacheImpl>(); + return ref<Cache>(cache); +} + +} diff --git a/src/libfetchers/cache.hh b/src/libfetchers/cache.hh new file mode 100644 index 000000000..d76ab1233 --- /dev/null +++ b/src/libfetchers/cache.hh @@ -0,0 +1,34 @@ +#pragma once + +#include "fetchers.hh" + +namespace nix::fetchers { + +struct Cache +{ + virtual void add( + ref<Store> store, + const Attrs & inAttrs, + const Attrs & infoAttrs, + const StorePath & storePath, + bool immutable) = 0; + + virtual std::optional<std::pair<Attrs, StorePath>> lookup( + ref<Store> store, + const Attrs & inAttrs) = 0; + + struct Result + { + bool expired = false; + Attrs infoAttrs; + StorePath storePath; + }; + + virtual std::optional<Result> lookupExpired( + ref<Store> store, + const Attrs & inAttrs) = 0; +}; + +ref<Cache> getCache(); + +} diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc new file mode 100644 index 000000000..11cac4c55 --- /dev/null +++ b/src/libfetchers/fetchers.cc @@ -0,0 +1,75 @@ +#include "fetchers.hh" +#include "store-api.hh" + +#include <nlohmann/json.hpp> + +namespace nix::fetchers { + +std::unique_ptr<std::vector<std::unique_ptr<InputScheme>>> inputSchemes = nullptr; + +void registerInputScheme(std::unique_ptr<InputScheme> && inputScheme) +{ + if (!inputSchemes) inputSchemes = std::make_unique<std::vector<std::unique_ptr<InputScheme>>>(); + inputSchemes->push_back(std::move(inputScheme)); +} + +std::unique_ptr<Input> inputFromURL(const ParsedURL & url) +{ + for (auto & inputScheme : *inputSchemes) { + auto res = inputScheme->inputFromURL(url); + if (res) return res; + } + throw Error("input '%s' is unsupported", url.url); +} + +std::unique_ptr<Input> inputFromURL(const std::string & url) +{ + return inputFromURL(parseURL(url)); +} + +std::unique_ptr<Input> inputFromAttrs(const Attrs & attrs) +{ + auto attrs2(attrs); + attrs2.erase("narHash"); + for (auto & inputScheme : *inputSchemes) { + auto res = inputScheme->inputFromAttrs(attrs2); + if (res) { + if (auto narHash = maybeGetStrAttr(attrs, "narHash")) + // FIXME: require SRI hash. + res->narHash = newHashAllowEmpty(*narHash, htUnknown); + return res; + } + } + throw Error("input '%s' is unsupported", attrsToJson(attrs)); +} + +Attrs Input::toAttrs() const +{ + auto attrs = toAttrsInternal(); + if (narHash) + attrs.emplace("narHash", narHash->to_string(SRI, true)); + attrs.emplace("type", type()); + return attrs; +} + +std::pair<Tree, std::shared_ptr<const Input>> Input::fetchTree(ref<Store> store) const +{ + auto [tree, input] = fetchTreeInternal(store); + + if (tree.actualPath == "") + tree.actualPath = store->toRealPath(tree.storePath); + + if (!tree.info.narHash) + tree.info.narHash = store->queryPathInfo(tree.storePath)->narHash; + + if (input->narHash) + assert(input->narHash == tree.info.narHash); + + if (narHash && narHash != input->narHash) + throw Error("NAR hash mismatch in input '%s' (%s), expected '%s', got '%s'", + to_string(), tree.actualPath, narHash->to_string(SRI, true), input->narHash->to_string(SRI, true)); + + return {std::move(tree), input}; +} + +} diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh new file mode 100644 index 000000000..59a58ae67 --- /dev/null +++ b/src/libfetchers/fetchers.hh @@ -0,0 +1,103 @@ +#pragma once + +#include "types.hh" +#include "hash.hh" +#include "path.hh" +#include "tree-info.hh" +#include "attrs.hh" +#include "url.hh" + +#include <memory> + +namespace nix { class Store; } + +namespace nix::fetchers { + +struct Input; + +struct Tree +{ + Path actualPath; + StorePath storePath; + TreeInfo info; +}; + +struct Input : std::enable_shared_from_this<Input> +{ + std::optional<Hash> narHash; // FIXME: implement + + virtual std::string type() const = 0; + + virtual ~Input() { } + + virtual bool operator ==(const Input & other) const { return false; } + + /* Check whether this is a "direct" input, that is, not + one that goes through a registry. */ + virtual bool isDirect() const { return true; } + + /* Check whether this is an "immutable" input, that is, + one that contains a commit hash or content hash. */ + virtual bool isImmutable() const { return (bool) narHash; } + + virtual bool contains(const Input & other) const { return false; } + + virtual std::optional<std::string> getRef() const { return {}; } + + virtual std::optional<Hash> getRev() const { return {}; } + + virtual ParsedURL toURL() const = 0; + + std::string to_string() const + { + return toURL().to_string(); + } + + Attrs toAttrs() const; + + std::pair<Tree, std::shared_ptr<const Input>> fetchTree(ref<Store> store) const; + +private: + + virtual std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(ref<Store> store) const = 0; + + virtual Attrs toAttrsInternal() const = 0; +}; + +struct InputScheme +{ + virtual ~InputScheme() { } + + virtual std::unique_ptr<Input> inputFromURL(const ParsedURL & url) = 0; + + virtual std::unique_ptr<Input> inputFromAttrs(const Attrs & attrs) = 0; +}; + +std::unique_ptr<Input> inputFromURL(const ParsedURL & url); + +std::unique_ptr<Input> inputFromURL(const std::string & url); + +std::unique_ptr<Input> inputFromAttrs(const Attrs & attrs); + +void registerInputScheme(std::unique_ptr<InputScheme> && fetcher); + +struct DownloadFileResult +{ + StorePath storePath; + std::string etag; + std::string effectiveUrl; +}; + +DownloadFileResult downloadFile( + ref<Store> store, + const std::string & url, + const std::string & name, + bool immutable); + +Tree downloadTarball( + ref<Store> store, + const std::string & url, + const std::string & name, + bool immutable); + +} diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc new file mode 100644 index 000000000..75ce5ee8b --- /dev/null +++ b/src/libfetchers/git.cc @@ -0,0 +1,441 @@ +#include "fetchers.hh" +#include "cache.hh" +#include "globals.hh" +#include "tarfile.hh" +#include "store-api.hh" + +#include <sys/time.h> + +using namespace std::string_literals; + +namespace nix::fetchers { + +static std::string readHead(const Path & path) +{ + return chomp(runProgram("git", true, { "-C", path, "rev-parse", "--abbrev-ref", "HEAD" })); +} + +static bool isNotDotGitDirectory(const Path & path) +{ + static const std::regex gitDirRegex("^(?:.*/)?\\.git$"); + + return not std::regex_match(path, gitDirRegex); +} + +struct GitInput : Input +{ + ParsedURL url; + std::optional<std::string> ref; + std::optional<Hash> rev; + bool shallow = false; + bool submodules = false; + + GitInput(const ParsedURL & url) : url(url) + { } + + std::string type() const override { return "git"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast<const GitInput *>(&other); + return + other2 + && url == other2->url + && rev == other2->rev + && ref == other2->ref; + } + + bool isImmutable() const override + { + return (bool) rev || narHash; + } + + std::optional<std::string> getRef() const override { return ref; } + + std::optional<Hash> getRev() const override { return rev; } + + ParsedURL toURL() const override + { + ParsedURL url2(url); + if (url2.scheme != "git") url2.scheme = "git+" + url2.scheme; + if (rev) url2.query.insert_or_assign("rev", rev->gitRev()); + if (ref) url2.query.insert_or_assign("ref", *ref); + if (shallow) url2.query.insert_or_assign("shallow", "1"); + return url2; + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("url", url.to_string()); + if (ref) + attrs.emplace("ref", *ref); + if (rev) + attrs.emplace("rev", rev->gitRev()); + if (shallow) + attrs.emplace("shallow", true); + if (submodules) + attrs.emplace("submodules", true); + return attrs; + } + + std::pair<bool, std::string> getActualUrl() const + { + // Don't clone file:// URIs (but otherwise treat them the + // same as remote URIs, i.e. don't use the working tree or + // HEAD). + static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing + bool isLocal = url.scheme == "file" && !forceHttp; + return {isLocal, isLocal ? url.path : url.base}; + } + + std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override + { + auto name = "source"; + + auto input = std::make_shared<GitInput>(*this); + + assert(!rev || rev->type == htSHA1); + + std::string cacheType = "git"; + if (shallow) cacheType += "-shallow"; + if (submodules) cacheType += "-submodules"; + + auto getImmutableAttrs = [&]() + { + return Attrs({ + {"type", cacheType}, + {"name", name}, + {"rev", input->rev->gitRev()}, + }); + }; + + auto makeResult = [&](const Attrs & infoAttrs, StorePath && storePath) + -> std::pair<Tree, std::shared_ptr<const Input>> + { + assert(input->rev); + assert(!rev || rev == input->rev); + return { + Tree { + .actualPath = store->toRealPath(storePath), + .storePath = std::move(storePath), + .info = TreeInfo { + .revCount = shallow ? std::nullopt : std::optional(getIntAttr(infoAttrs, "revCount")), + .lastModified = getIntAttr(infoAttrs, "lastModified"), + }, + }, + input + }; + }; + + if (rev) { + if (auto res = getCache()->lookup(store, getImmutableAttrs())) + return makeResult(res->first, std::move(res->second)); + } + + auto [isLocal, actualUrl_] = getActualUrl(); + auto actualUrl = actualUrl_; // work around clang bug + + // If this is a local directory and no ref or revision is + // given, then allow the use of an unclean working tree. + if (!input->ref && !input->rev && isLocal) { + bool clean = false; + + /* Check whether this repo has any commits. There are + probably better ways to do this. */ + auto gitDir = actualUrl + "/.git"; + auto commonGitDir = chomp(runProgram( + "git", + true, + { "-C", actualUrl, "rev-parse", "--git-common-dir" } + )); + if (commonGitDir != ".git") + gitDir = commonGitDir; + + bool haveCommits = !readDirectory(gitDir + "/refs/heads").empty(); + + try { + if (haveCommits) { + runProgram("git", true, { "-C", actualUrl, "diff-index", "--quiet", "HEAD", "--" }); + clean = true; + } + } catch (ExecError & e) { + if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; + } + + if (!clean) { + + /* This is an unclean working tree. So copy all tracked files. */ + + if (!settings.allowDirty) + throw Error("Git tree '%s' is dirty", actualUrl); + + if (settings.warnDirty) + warn("Git tree '%s' is dirty", actualUrl); + + auto gitOpts = Strings({ "-C", actualUrl, "ls-files", "-z" }); + if (submodules) + gitOpts.emplace_back("--recurse-submodules"); + + auto files = tokenizeString<std::set<std::string>>( + runProgram("git", true, gitOpts), "\0"s); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualUrl)); + std::string file(p, actualUrl.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = store->addToStore("source", actualUrl, FileIngestionMethod::Recursive, htSHA256, filter); + + auto tree = Tree { + .actualPath = store->printStorePath(storePath), + .storePath = std::move(storePath), + .info = TreeInfo { + // FIXME: maybe we should use the timestamp of the last + // modified dirty file? + .lastModified = haveCommits ? std::stoull(runProgram("git", true, { "-C", actualUrl, "log", "-1", "--format=%ct", "HEAD" })) : 0, + } + }; + + return {std::move(tree), input}; + } + } + + if (!input->ref) input->ref = isLocal ? readHead(actualUrl) : "master"; + + Attrs mutableAttrs({ + {"type", cacheType}, + {"name", name}, + {"url", actualUrl}, + {"ref", *input->ref}, + }); + + Path repoDir; + + if (isLocal) { + + if (!input->rev) + input->rev = Hash(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input->ref })), htSHA1); + + repoDir = actualUrl; + + } else { + + if (auto res = getCache()->lookup(store, mutableAttrs)) { + auto rev2 = Hash(getStrAttr(res->first, "rev"), htSHA1); + if (!rev || rev == rev2) { + input->rev = rev2; + return makeResult(res->first, std::move(res->second)); + } + } + + Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false); + repoDir = cacheDir; + + if (!pathExists(cacheDir)) { + createDirs(dirOf(cacheDir)); + runProgram("git", true, { "init", "--bare", repoDir }); + } + + Path localRefFile = + input->ref->compare(0, 5, "refs/") == 0 + ? cacheDir + "/" + *input->ref + : cacheDir + "/refs/heads/" + *input->ref; + + bool doFetch; + time_t now = time(0); + + /* If a rev was specified, we need to fetch if it's not in the + repo. */ + if (input->rev) { + try { + runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input->rev->gitRev() }); + doFetch = false; + } catch (ExecError & e) { + if (WIFEXITED(e.status)) { + doFetch = true; + } else { + throw; + } + } + } else { + /* If the local ref is older than ‘tarball-ttl’ seconds, do a + git fetch to update the local ref to the remote ref. */ + struct stat st; + doFetch = stat(localRefFile.c_str(), &st) != 0 || + (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now; + } + + if (doFetch) { + Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", actualUrl)); + + // FIXME: git stderr messes up our progress indicator, so + // we're using --quiet for now. Should process its stderr. + try { + auto fetchRef = input->ref->compare(0, 5, "refs/") == 0 + ? *input->ref + : "refs/heads/" + *input->ref; + runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }); + } catch (Error & e) { + if (!pathExists(localRefFile)) throw; + warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); + } + + struct timeval times[2]; + times[0].tv_sec = now; + times[0].tv_usec = 0; + times[1].tv_sec = now; + times[1].tv_usec = 0; + + utimes(localRefFile.c_str(), times); + } + + if (!input->rev) + input->rev = Hash(chomp(readFile(localRefFile)), htSHA1); + } + + bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true"; + + if (isShallow && !shallow) + throw Error("'%s' is a shallow Git repository, but a non-shallow repository is needed", actualUrl); + + // FIXME: check whether rev is an ancestor of ref. + + printTalkative("using revision %s of repo '%s'", input->rev->gitRev(), actualUrl); + + /* Now that we know the ref, check again whether we have it in + the store. */ + if (auto res = getCache()->lookup(store, getImmutableAttrs())) + return makeResult(res->first, std::move(res->second)); + + Path tmpDir = createTempDir(); + AutoDelete delTmpDir(tmpDir, true); + PathFilter filter = defaultPathFilter; + + if (submodules) { + Path tmpGitDir = createTempDir(); + AutoDelete delTmpGitDir(tmpGitDir, true); + + runProgram("git", true, { "init", tmpDir, "--separate-git-dir", tmpGitDir }); + // TODO: repoDir might lack the ref (it only checks if rev + // exists, see FIXME above) so use a big hammer and fetch + // everything to ensure we get the rev. + runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force", + "--update-head-ok", "--", repoDir, "refs/*:refs/*" }); + + runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input->rev->gitRev() }); + runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", actualUrl }); + runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" }); + + filter = isNotDotGitDirectory; + } else { + // FIXME: should pipe this, or find some better way to extract a + // revision. + auto source = sinkToSource([&](Sink & sink) { + RunOptions gitOptions("git", { "-C", repoDir, "archive", input->rev->gitRev() }); + gitOptions.standardOut = &sink; + runProgram2(gitOptions); + }); + + unpackTarfile(*source, tmpDir); + } + + auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter); + + auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", input->rev->gitRev() })); + + Attrs infoAttrs({ + {"rev", input->rev->gitRev()}, + {"lastModified", lastModified}, + }); + + if (!shallow) + infoAttrs.insert_or_assign("revCount", + std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input->rev->gitRev() }))); + + if (!this->rev) + getCache()->add( + store, + mutableAttrs, + infoAttrs, + storePath, + false); + + getCache()->add( + store, + getImmutableAttrs(), + infoAttrs, + storePath, + true); + + return makeResult(infoAttrs, std::move(storePath)); + } +}; + +struct GitInputScheme : InputScheme +{ + std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "git" && + url.scheme != "git+http" && + url.scheme != "git+https" && + url.scheme != "git+ssh" && + url.scheme != "git+file") return nullptr; + + auto url2(url); + if (hasPrefix(url2.scheme, "git+")) url2.scheme = std::string(url2.scheme, 4); + url2.query.clear(); + + Attrs attrs; + attrs.emplace("type", "git"); + + for (auto &[name, value] : url.query) { + if (name == "rev" || name == "ref") + attrs.emplace(name, value); + else + url2.query.emplace(name, value); + } + + attrs.emplace("url", url2.to_string()); + + return inputFromAttrs(attrs); + } + + std::unique_ptr<Input> inputFromAttrs(const Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "git") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules") + throw Error("unsupported Git input attribute '%s'", name); + + auto input = std::make_unique<GitInput>(parseURL(getStrAttr(attrs, "url"))); + if (auto ref = maybeGetStrAttr(attrs, "ref")) { + if (std::regex_search(*ref, badGitRefRegex)) + throw BadURL("invalid Git branch/tag name '%s'", *ref); + input->ref = *ref; + } + if (auto rev = maybeGetStrAttr(attrs, "rev")) + input->rev = Hash(*rev, htSHA1); + + input->shallow = maybeGetBoolAttr(attrs, "shallow").value_or(false); + + input->submodules = maybeGetBoolAttr(attrs, "submodules").value_or(false); + + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<GitInputScheme>()); }); + +} diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc new file mode 100644 index 000000000..0bee1d6b3 --- /dev/null +++ b/src/libfetchers/github.cc @@ -0,0 +1,195 @@ +#include "filetransfer.hh" +#include "cache.hh" +#include "fetchers.hh" +#include "globals.hh" +#include "store-api.hh" + +#include <nlohmann/json.hpp> + +namespace nix::fetchers { + +std::regex ownerRegex("[a-zA-Z][a-zA-Z0-9_-]*", std::regex::ECMAScript); +std::regex repoRegex("[a-zA-Z][a-zA-Z0-9_-]*", std::regex::ECMAScript); + +struct GitHubInput : Input +{ + std::string owner; + std::string repo; + std::optional<std::string> ref; + std::optional<Hash> rev; + + std::string type() const override { return "github"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast<const GitHubInput *>(&other); + return + other2 + && owner == other2->owner + && repo == other2->repo + && rev == other2->rev + && ref == other2->ref; + } + + bool isImmutable() const override + { + return (bool) rev || narHash; + } + + std::optional<std::string> getRef() const override { return ref; } + + std::optional<Hash> getRev() const override { return rev; } + + ParsedURL toURL() const override + { + auto path = owner + "/" + repo; + assert(!(ref && rev)); + if (ref) path += "/" + *ref; + if (rev) path += "/" + rev->to_string(Base16, false); + return ParsedURL { + .scheme = "github", + .path = path, + }; + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("owner", owner); + attrs.emplace("repo", repo); + if (ref) + attrs.emplace("ref", *ref); + if (rev) + attrs.emplace("rev", rev->gitRev()); + return attrs; + } + + std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override + { + auto rev = this->rev; + auto ref = this->ref.value_or("master"); + + if (!rev) { + auto url = fmt("https://api.github.com/repos/%s/%s/commits/%s", + owner, repo, ref); + auto json = nlohmann::json::parse( + readFile( + store->toRealPath( + downloadFile(store, url, "source", false).storePath))); + rev = Hash(std::string { json["sha"] }, htSHA1); + debug("HEAD revision for '%s' is %s", url, rev->gitRev()); + } + + auto input = std::make_shared<GitHubInput>(*this); + input->ref = {}; + input->rev = *rev; + + Attrs immutableAttrs({ + {"type", "git-tarball"}, + {"rev", rev->gitRev()}, + }); + + if (auto res = getCache()->lookup(store, immutableAttrs)) { + return { + Tree{ + .actualPath = store->toRealPath(res->second), + .storePath = std::move(res->second), + .info = TreeInfo { + .lastModified = getIntAttr(res->first, "lastModified"), + }, + }, + input + }; + } + + // FIXME: use regular /archive URLs instead? api.github.com + // might have stricter rate limits. + + auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s", + owner, repo, rev->to_string(Base16, false)); + + std::string accessToken = settings.githubAccessToken.get(); + if (accessToken != "") + url += "?access_token=" + accessToken; + + auto tree = downloadTarball(store, url, "source", true); + + getCache()->add( + store, + immutableAttrs, + { + {"rev", rev->gitRev()}, + {"lastModified", *tree.info.lastModified} + }, + tree.storePath, + true); + + return {std::move(tree), input}; + } +}; + +struct GitHubInputScheme : InputScheme +{ + std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "github") return nullptr; + + auto path = tokenizeString<std::vector<std::string>>(url.path, "/"); + auto input = std::make_unique<GitHubInput>(); + + if (path.size() == 2) { + } else if (path.size() == 3) { + if (std::regex_match(path[2], revRegex)) + input->rev = Hash(path[2], htSHA1); + else if (std::regex_match(path[2], refRegex)) + input->ref = path[2]; + else + throw BadURL("in GitHub URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]); + } else + throw BadURL("GitHub URL '%s' is invalid", url.url); + + for (auto &[name, value] : url.query) { + if (name == "rev") { + if (input->rev) + throw BadURL("GitHub URL '%s' contains multiple commit hashes", url.url); + input->rev = Hash(value, htSHA1); + } + else if (name == "ref") { + if (!std::regex_match(value, refRegex)) + throw BadURL("GitHub URL '%s' contains an invalid branch/tag name", url.url); + if (input->ref) + throw BadURL("GitHub URL '%s' contains multiple branch/tag names", url.url); + input->ref = value; + } + } + + if (input->ref && input->rev) + throw BadURL("GitHub URL '%s' contains both a commit hash and a branch/tag name", url.url); + + input->owner = path[0]; + input->repo = path[1]; + + return input; + } + + std::unique_ptr<Input> inputFromAttrs(const Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "github") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev") + throw Error("unsupported GitHub input attribute '%s'", name); + + auto input = std::make_unique<GitHubInput>(); + input->owner = getStrAttr(attrs, "owner"); + input->repo = getStrAttr(attrs, "repo"); + input->ref = maybeGetStrAttr(attrs, "ref"); + if (auto rev = maybeGetStrAttr(attrs, "rev")) + input->rev = Hash(*rev, htSHA1); + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<GitHubInputScheme>()); }); + +} diff --git a/src/libfetchers/local.mk b/src/libfetchers/local.mk new file mode 100644 index 000000000..cfd705e22 --- /dev/null +++ b/src/libfetchers/local.mk @@ -0,0 +1,11 @@ +libraries += libfetchers + +libfetchers_NAME = libnixfetchers + +libfetchers_DIR := $(d) + +libfetchers_SOURCES := $(wildcard $(d)/*.cc) + +libfetchers_CXXFLAGS += -I src/libutil -I src/libstore + +libfetchers_LIBS = libutil libstore diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc new file mode 100644 index 000000000..2e0d4bf4d --- /dev/null +++ b/src/libfetchers/mercurial.cc @@ -0,0 +1,303 @@ +#include "fetchers.hh" +#include "cache.hh" +#include "globals.hh" +#include "tarfile.hh" +#include "store-api.hh" + +#include <sys/time.h> + +using namespace std::string_literals; + +namespace nix::fetchers { + +struct MercurialInput : Input +{ + ParsedURL url; + std::optional<std::string> ref; + std::optional<Hash> rev; + + MercurialInput(const ParsedURL & url) : url(url) + { } + + std::string type() const override { return "hg"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast<const MercurialInput *>(&other); + return + other2 + && url == other2->url + && rev == other2->rev + && ref == other2->ref; + } + + bool isImmutable() const override + { + return (bool) rev || narHash; + } + + std::optional<std::string> getRef() const override { return ref; } + + std::optional<Hash> getRev() const override { return rev; } + + ParsedURL toURL() const override + { + ParsedURL url2(url); + url2.scheme = "hg+" + url2.scheme; + if (rev) url2.query.insert_or_assign("rev", rev->gitRev()); + if (ref) url2.query.insert_or_assign("ref", *ref); + return url; + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("url", url.to_string()); + if (ref) + attrs.emplace("ref", *ref); + if (rev) + attrs.emplace("rev", rev->gitRev()); + return attrs; + } + + std::pair<bool, std::string> getActualUrl() const + { + bool isLocal = url.scheme == "file"; + return {isLocal, isLocal ? url.path : url.base}; + } + + std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override + { + auto name = "source"; + + auto input = std::make_shared<MercurialInput>(*this); + + auto [isLocal, actualUrl_] = getActualUrl(); + auto actualUrl = actualUrl_; // work around clang bug + + // FIXME: return lastModified. + + // FIXME: don't clone local repositories. + + if (!input->ref && !input->rev && isLocal && pathExists(actualUrl + "/.hg")) { + + bool clean = runProgram("hg", true, { "status", "-R", actualUrl, "--modified", "--added", "--removed" }) == ""; + + if (!clean) { + + /* This is an unclean working tree. So copy all tracked + files. */ + + if (!settings.allowDirty) + throw Error("Mercurial tree '%s' is unclean", actualUrl); + + if (settings.warnDirty) + warn("Mercurial tree '%s' is unclean", actualUrl); + + input->ref = chomp(runProgram("hg", true, { "branch", "-R", actualUrl })); + + auto files = tokenizeString<std::set<std::string>>( + runProgram("hg", true, { "status", "-R", actualUrl, "--clean", "--modified", "--added", "--no-status", "--print0" }), "\0"s); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualUrl)); + std::string file(p, actualUrl.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = store->addToStore("source", actualUrl, FileIngestionMethod::Recursive, htSHA256, filter); + + return {Tree { + .actualPath = store->printStorePath(storePath), + .storePath = std::move(storePath), + }, input}; + } + } + + if (!input->ref) input->ref = "default"; + + auto getImmutableAttrs = [&]() + { + return Attrs({ + {"type", "hg"}, + {"name", name}, + {"rev", input->rev->gitRev()}, + }); + }; + + auto makeResult = [&](const Attrs & infoAttrs, StorePath && storePath) + -> std::pair<Tree, std::shared_ptr<const Input>> + { + assert(input->rev); + assert(!rev || rev == input->rev); + return { + Tree{ + .actualPath = store->toRealPath(storePath), + .storePath = std::move(storePath), + .info = TreeInfo { + .revCount = getIntAttr(infoAttrs, "revCount"), + }, + }, + input + }; + }; + + if (input->rev) { + if (auto res = getCache()->lookup(store, getImmutableAttrs())) + return makeResult(res->first, std::move(res->second)); + } + + assert(input->rev || input->ref); + auto revOrRef = input->rev ? input->rev->gitRev() : *input->ref; + + Attrs mutableAttrs({ + {"type", "hg"}, + {"name", name}, + {"url", actualUrl}, + {"ref", *input->ref}, + }); + + if (auto res = getCache()->lookup(store, mutableAttrs)) { + auto rev2 = Hash(getStrAttr(res->first, "rev"), htSHA1); + if (!rev || rev == rev2) { + input->rev = rev2; + return makeResult(res->first, std::move(res->second)); + } + } + + Path cacheDir = fmt("%s/nix/hg/%s", getCacheDir(), hashString(htSHA256, actualUrl).to_string(Base32, false)); + + /* If this is a commit hash that we already have, we don't + have to pull again. */ + if (!(input->rev + && pathExists(cacheDir) + && runProgram( + RunOptions("hg", { "log", "-R", cacheDir, "-r", input->rev->gitRev(), "--template", "1" }) + .killStderr(true)).second == "1")) + { + Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Mercurial repository '%s'", actualUrl)); + + if (pathExists(cacheDir)) { + try { + runProgram("hg", true, { "pull", "-R", cacheDir, "--", actualUrl }); + } + catch (ExecError & e) { + string transJournal = cacheDir + "/.hg/store/journal"; + /* hg throws "abandoned transaction" error only if this file exists */ + if (pathExists(transJournal)) { + runProgram("hg", true, { "recover", "-R", cacheDir }); + runProgram("hg", true, { "pull", "-R", cacheDir, "--", actualUrl }); + } else { + throw ExecError(e.status, fmt("'hg pull' %s", statusToString(e.status))); + } + } + } else { + createDirs(dirOf(cacheDir)); + runProgram("hg", true, { "clone", "--noupdate", "--", actualUrl, cacheDir }); + } + } + + auto tokens = tokenizeString<std::vector<std::string>>( + runProgram("hg", true, { "log", "-R", cacheDir, "-r", revOrRef, "--template", "{node} {rev} {branch}" })); + assert(tokens.size() == 3); + + input->rev = Hash(tokens[0], htSHA1); + auto revCount = std::stoull(tokens[1]); + input->ref = tokens[2]; + + if (auto res = getCache()->lookup(store, getImmutableAttrs())) + return makeResult(res->first, std::move(res->second)); + + Path tmpDir = createTempDir(); + AutoDelete delTmpDir(tmpDir, true); + + runProgram("hg", true, { "archive", "-R", cacheDir, "-r", input->rev->gitRev(), tmpDir }); + + deletePath(tmpDir + "/.hg_archival.txt"); + + auto storePath = store->addToStore(name, tmpDir); + + Attrs infoAttrs({ + {"rev", input->rev->gitRev()}, + {"revCount", (int64_t) revCount}, + }); + + if (!this->rev) + getCache()->add( + store, + mutableAttrs, + infoAttrs, + storePath, + false); + + getCache()->add( + store, + getImmutableAttrs(), + infoAttrs, + storePath, + true); + + return makeResult(infoAttrs, std::move(storePath)); + } +}; + +struct MercurialInputScheme : InputScheme +{ + std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "hg+http" && + url.scheme != "hg+https" && + url.scheme != "hg+ssh" && + url.scheme != "hg+file") return nullptr; + + auto url2(url); + url2.scheme = std::string(url2.scheme, 3); + url2.query.clear(); + + Attrs attrs; + attrs.emplace("type", "hg"); + + for (auto &[name, value] : url.query) { + if (name == "rev" || name == "ref") + attrs.emplace(name, value); + else + url2.query.emplace(name, value); + } + + attrs.emplace("url", url2.to_string()); + + return inputFromAttrs(attrs); + } + + std::unique_ptr<Input> inputFromAttrs(const Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "hg") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "url" && name != "ref" && name != "rev") + throw Error("unsupported Mercurial input attribute '%s'", name); + + auto input = std::make_unique<MercurialInput>(parseURL(getStrAttr(attrs, "url"))); + if (auto ref = maybeGetStrAttr(attrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) + throw BadURL("invalid Mercurial branch/tag name '%s'", *ref); + input->ref = *ref; + } + if (auto rev = maybeGetStrAttr(attrs, "rev")) + input->rev = Hash(*rev, htSHA1); + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<MercurialInputScheme>()); }); + +} diff --git a/src/libfetchers/path.cc b/src/libfetchers/path.cc new file mode 100644 index 000000000..ba2cc192e --- /dev/null +++ b/src/libfetchers/path.cc @@ -0,0 +1,148 @@ +#include "fetchers.hh" +#include "store-api.hh" + +namespace nix::fetchers { + +struct PathInput : Input +{ + Path path; + + /* Allow the user to pass in "fake" tree info attributes. This is + useful for making a pinned tree work the same as the repository + from which is exported + (e.g. path:/nix/store/...-source?lastModified=1585388205&rev=b0c285...). */ + std::optional<Hash> rev; + std::optional<uint64_t> revCount; + std::optional<time_t> lastModified; + + std::string type() const override { return "path"; } + + std::optional<Hash> getRev() const override { return rev; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast<const PathInput *>(&other); + return + other2 + && path == other2->path + && rev == other2->rev + && revCount == other2->revCount + && lastModified == other2->lastModified; + } + + bool isImmutable() const override + { + return (bool) narHash; + } + + ParsedURL toURL() const override + { + auto query = attrsToQuery(toAttrsInternal()); + query.erase("path"); + return ParsedURL { + .scheme = "path", + .path = path, + .query = query, + }; + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("path", path); + if (rev) + attrs.emplace("rev", rev->gitRev()); + if (revCount) + attrs.emplace("revCount", *revCount); + if (lastModified) + attrs.emplace("lastModified", *lastModified); + return attrs; + } + + std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override + { + auto input = std::make_shared<PathInput>(*this); + + // FIXME: check whether access to 'path' is allowed. + + auto storePath = store->maybeParseStorePath(path); + + if (storePath) + store->addTempRoot(*storePath); + + if (!storePath || storePath->name() != "source" || !store->isValidPath(*storePath)) + // FIXME: try to substitute storePath. + storePath = store->addToStore("source", path); + + return + { + Tree { + .actualPath = store->toRealPath(*storePath), + .storePath = std::move(*storePath), + .info = TreeInfo { + .revCount = revCount, + .lastModified = lastModified + } + }, + input + }; + } + +}; + +struct PathInputScheme : InputScheme +{ + std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "path") return nullptr; + + auto input = std::make_unique<PathInput>(); + input->path = url.path; + + for (auto & [name, value] : url.query) + if (name == "rev") + input->rev = Hash(value, htSHA1); + else if (name == "revCount") { + uint64_t revCount; + if (!string2Int(value, revCount)) + throw Error("path URL '%s' has invalid parameter '%s'", url.to_string(), name); + input->revCount = revCount; + } + else if (name == "lastModified") { + time_t lastModified; + if (!string2Int(value, lastModified)) + throw Error("path URL '%s' has invalid parameter '%s'", url.to_string(), name); + input->lastModified = lastModified; + } + else + throw Error("path URL '%s' has unsupported parameter '%s'", url.to_string(), name); + + return input; + } + + std::unique_ptr<Input> inputFromAttrs(const Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "path") return {}; + + auto input = std::make_unique<PathInput>(); + input->path = getStrAttr(attrs, "path"); + + for (auto & [name, value] : attrs) + if (name == "rev") + input->rev = Hash(getStrAttr(attrs, "rev"), htSHA1); + else if (name == "revCount") + input->revCount = getIntAttr(attrs, "revCount"); + else if (name == "lastModified") + input->lastModified = getIntAttr(attrs, "lastModified"); + else if (name == "type" || name == "path") + ; + else + throw Error("unsupported path input attribute '%s'", name); + + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<PathInputScheme>()); }); + +} diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc new file mode 100644 index 000000000..7966da314 --- /dev/null +++ b/src/libfetchers/tarball.cc @@ -0,0 +1,275 @@ +#include "fetchers.hh" +#include "cache.hh" +#include "filetransfer.hh" +#include "globals.hh" +#include "store-api.hh" +#include "archive.hh" +#include "tarfile.hh" + +namespace nix::fetchers { + +DownloadFileResult downloadFile( + ref<Store> store, + const std::string & url, + const std::string & name, + bool immutable) +{ + // FIXME: check store + + Attrs inAttrs({ + {"type", "file"}, + {"url", url}, + {"name", name}, + }); + + auto cached = getCache()->lookupExpired(store, inAttrs); + + auto useCached = [&]() -> DownloadFileResult + { + return { + .storePath = std::move(cached->storePath), + .etag = getStrAttr(cached->infoAttrs, "etag"), + .effectiveUrl = getStrAttr(cached->infoAttrs, "url") + }; + }; + + if (cached && !cached->expired) + return useCached(); + + FileTransferRequest request(url); + if (cached) + request.expectedETag = getStrAttr(cached->infoAttrs, "etag"); + FileTransferResult res; + try { + res = getFileTransfer()->download(request); + } catch (FileTransferError & e) { + if (cached) { + warn("%s; using cached version", e.msg()); + return useCached(); + } else + throw; + } + + // FIXME: write to temporary file. + + Attrs infoAttrs({ + {"etag", res.etag}, + {"url", res.effectiveUri}, + }); + + std::optional<StorePath> storePath; + + if (res.cached) { + assert(cached); + assert(request.expectedETag == res.etag); + storePath = std::move(cached->storePath); + } else { + StringSink sink; + dumpString(*res.data, sink); + auto hash = hashString(htSHA256, *res.data); + ValidPathInfo info(store->makeFixedOutputPath(FileIngestionMethod::Flat, hash, name)); + info.narHash = hashString(htSHA256, *sink.s); + info.narSize = sink.s->size(); + info.ca = makeFixedOutputCA(FileIngestionMethod::Flat, hash); + auto source = StringSource { *sink.s }; + store->addToStore(info, source, NoRepair, NoCheckSigs); + storePath = std::move(info.path); + } + + getCache()->add( + store, + inAttrs, + infoAttrs, + *storePath, + immutable); + + if (url != res.effectiveUri) + getCache()->add( + store, + { + {"type", "file"}, + {"url", res.effectiveUri}, + {"name", name}, + }, + infoAttrs, + *storePath, + immutable); + + return { + .storePath = std::move(*storePath), + .etag = res.etag, + .effectiveUrl = res.effectiveUri, + }; +} + +Tree downloadTarball( + ref<Store> store, + const std::string & url, + const std::string & name, + bool immutable) +{ + Attrs inAttrs({ + {"type", "tarball"}, + {"url", url}, + {"name", name}, + }); + + auto cached = getCache()->lookupExpired(store, inAttrs); + + if (cached && !cached->expired) + return Tree { + .actualPath = store->toRealPath(cached->storePath), + .storePath = std::move(cached->storePath), + .info = TreeInfo { + .lastModified = getIntAttr(cached->infoAttrs, "lastModified"), + }, + }; + + auto res = downloadFile(store, url, name, immutable); + + std::optional<StorePath> unpackedStorePath; + time_t lastModified; + + if (cached && res.etag != "" && getStrAttr(cached->infoAttrs, "etag") == res.etag) { + unpackedStorePath = std::move(cached->storePath); + lastModified = getIntAttr(cached->infoAttrs, "lastModified"); + } else { + Path tmpDir = createTempDir(); + AutoDelete autoDelete(tmpDir, true); + unpackTarfile(store->toRealPath(res.storePath), tmpDir); + auto members = readDirectory(tmpDir); + if (members.size() != 1) + throw nix::Error("tarball '%s' contains an unexpected number of top-level files", url); + auto topDir = tmpDir + "/" + members.begin()->name; + lastModified = lstat(topDir).st_mtime; + unpackedStorePath = store->addToStore(name, topDir, FileIngestionMethod::Recursive, htSHA256, defaultPathFilter, NoRepair); + } + + Attrs infoAttrs({ + {"lastModified", lastModified}, + {"etag", res.etag}, + }); + + getCache()->add( + store, + inAttrs, + infoAttrs, + *unpackedStorePath, + immutable); + + return Tree { + .actualPath = store->toRealPath(*unpackedStorePath), + .storePath = std::move(*unpackedStorePath), + .info = TreeInfo { + .lastModified = lastModified, + }, + }; +} + +struct TarballInput : Input +{ + ParsedURL url; + std::optional<Hash> hash; + + TarballInput(const ParsedURL & url) : url(url) + { } + + std::string type() const override { return "tarball"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast<const TarballInput *>(&other); + return + other2 + && to_string() == other2->to_string() + && hash == other2->hash; + } + + bool isImmutable() const override + { + return hash || narHash; + } + + ParsedURL toURL() const override + { + auto url2(url); + // NAR hashes are preferred over file hashes since tar/zip files + // don't have a canonical representation. + if (narHash) + url2.query.insert_or_assign("narHash", narHash->to_string(SRI, true)); + else if (hash) + url2.query.insert_or_assign("hash", hash->to_string(SRI, true)); + return url2; + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("url", url.to_string()); + if (hash) + attrs.emplace("hash", hash->to_string(SRI, true)); + return attrs; + } + + std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override + { + auto tree = downloadTarball(store, url.to_string(), "source", false); + + auto input = std::make_shared<TarballInput>(*this); + input->narHash = store->queryPathInfo(tree.storePath)->narHash; + + return {std::move(tree), input}; + } +}; + +struct TarballInputScheme : InputScheme +{ + std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "file" && url.scheme != "http" && url.scheme != "https") return nullptr; + + if (!hasSuffix(url.path, ".zip") + && !hasSuffix(url.path, ".tar") + && !hasSuffix(url.path, ".tar.gz") + && !hasSuffix(url.path, ".tar.xz") + && !hasSuffix(url.path, ".tar.bz2")) + return nullptr; + + auto input = std::make_unique<TarballInput>(url); + + auto hash = input->url.query.find("hash"); + if (hash != input->url.query.end()) { + // FIXME: require SRI hash. + input->hash = Hash(hash->second); + input->url.query.erase(hash); + } + + auto narHash = input->url.query.find("narHash"); + if (narHash != input->url.query.end()) { + // FIXME: require SRI hash. + input->narHash = Hash(narHash->second); + input->url.query.erase(narHash); + } + + return input; + } + + std::unique_ptr<Input> inputFromAttrs(const Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "tarball") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "url" && name != "hash") + throw Error("unsupported tarball input attribute '%s'", name); + + auto input = std::make_unique<TarballInput>(parseURL(getStrAttr(attrs, "url"))); + if (auto hash = maybeGetStrAttr(attrs, "hash")) + input->hash = newHashAllowEmpty(*hash, htUnknown); + + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); }); + +} diff --git a/src/libfetchers/tree-info.cc b/src/libfetchers/tree-info.cc new file mode 100644 index 000000000..b2d8cfc8d --- /dev/null +++ b/src/libfetchers/tree-info.cc @@ -0,0 +1,14 @@ +#include "tree-info.hh" +#include "store-api.hh" + +#include <nlohmann/json.hpp> + +namespace nix::fetchers { + +StorePath TreeInfo::computeStorePath(Store & store) const +{ + assert(narHash); + return store.makeFixedOutputPath(FileIngestionMethod::Recursive, narHash, "source"); +} + +} diff --git a/src/libfetchers/tree-info.hh b/src/libfetchers/tree-info.hh new file mode 100644 index 000000000..2c7347281 --- /dev/null +++ b/src/libfetchers/tree-info.hh @@ -0,0 +1,29 @@ +#pragma once + +#include "path.hh" +#include "hash.hh" + +#include <nlohmann/json_fwd.hpp> + +namespace nix { class Store; } + +namespace nix::fetchers { + +struct TreeInfo +{ + Hash narHash; + std::optional<uint64_t> revCount; + std::optional<time_t> lastModified; + + bool operator ==(const TreeInfo & other) const + { + return + narHash == other.narHash + && revCount == other.revCount + && lastModified == other.lastModified; + } + + StorePath computeStorePath(Store & store) const; +}; + +} |