diff options
Diffstat (limited to 'src/libstore/fetchers')
-rw-r--r-- | src/libstore/fetchers/fetchers.cc | 137 | ||||
-rw-r--r-- | src/libstore/fetchers/fetchers.hh | 109 | ||||
-rw-r--r-- | src/libstore/fetchers/git.cc | 452 | ||||
-rw-r--r-- | src/libstore/fetchers/github.cc | 212 | ||||
-rw-r--r-- | src/libstore/fetchers/indirect.cc | 142 | ||||
-rw-r--r-- | src/libstore/fetchers/mercurial.cc | 331 | ||||
-rw-r--r-- | src/libstore/fetchers/parse.cc | 138 | ||||
-rw-r--r-- | src/libstore/fetchers/parse.hh | 30 | ||||
-rw-r--r-- | src/libstore/fetchers/regex.hh | 37 | ||||
-rw-r--r-- | src/libstore/fetchers/registry.cc | 183 | ||||
-rw-r--r-- | src/libstore/fetchers/registry.hh | 62 | ||||
-rw-r--r-- | src/libstore/fetchers/tarball.cc | 131 | ||||
-rw-r--r-- | src/libstore/fetchers/tree-info.hh | 26 |
13 files changed, 1990 insertions, 0 deletions
diff --git a/src/libstore/fetchers/fetchers.cc b/src/libstore/fetchers/fetchers.cc new file mode 100644 index 000000000..0cc6f1c91 --- /dev/null +++ b/src/libstore/fetchers/fetchers.cc @@ -0,0 +1,137 @@ +#include "fetchers.hh" +#include "parse.hh" +#include "store-api.hh" + +#include <nlohmann/json.hpp> + +namespace nix::fetchers { + +std::unique_ptr<std::vector<std::unique_ptr<InputScheme>>> inputSchemes = nullptr; + +void registerInputScheme(std::unique_ptr<InputScheme> && inputScheme) +{ + if (!inputSchemes) inputSchemes = std::make_unique<std::vector<std::unique_ptr<InputScheme>>>(); + inputSchemes->push_back(std::move(inputScheme)); +} + +std::unique_ptr<Input> inputFromURL(const ParsedURL & url) +{ + for (auto & inputScheme : *inputSchemes) { + auto res = inputScheme->inputFromURL(url); + if (res) return res; + } + throw Error("input '%s' is unsupported", url.url); +} + +std::unique_ptr<Input> inputFromURL(const std::string & url) +{ + return inputFromURL(parseURL(url)); +} + +std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) +{ + for (auto & inputScheme : *inputSchemes) { + auto res = inputScheme->inputFromAttrs(attrs); + if (res) { + if (auto narHash = maybeGetStrAttr(attrs, "narHash")) + // FIXME: require SRI hash. + res->narHash = Hash(*narHash); + return res; + } + } + throw Error("input '%s' is unsupported", attrsToJson(attrs)); +} + +Input::Attrs jsonToAttrs(const nlohmann::json & json) +{ + fetchers::Input::Attrs attrs; + + for (auto & i : json.items()) { + if (i.value().is_number()) + attrs.emplace(i.key(), i.value().get<int64_t>()); + else if (i.value().is_string()) + attrs.emplace(i.key(), i.value().get<std::string>()); + else + throw Error("unsupported input attribute type in lock file"); + } + + return attrs; +} + +nlohmann::json attrsToJson(const fetchers::Input::Attrs & attrs) +{ + nlohmann::json json; + for (auto & attr : attrs) { + if (auto v = std::get_if<int64_t>(&attr.second)) { + json[attr.first] = *v; + } else if (auto v = std::get_if<std::string>(&attr.second)) { + json[attr.first] = *v; + } else abort(); + } + return json; +} + +Input::Attrs Input::toAttrs() const +{ + auto attrs = toAttrsInternal(); + if (narHash) + attrs.emplace("narHash", narHash->to_string(SRI)); + attrs.emplace("type", type()); + return attrs; +} + +std::optional<std::string> maybeGetStrAttr(const Input::Attrs & attrs, const std::string & name) +{ + auto i = attrs.find(name); + if (i == attrs.end()) return {}; + if (auto v = std::get_if<std::string>(&i->second)) + return *v; + throw Error("input attribute '%s' is not a string", name); +} + +std::string getStrAttr(const Input::Attrs & attrs, const std::string & name) +{ + auto s = maybeGetStrAttr(attrs, name); + if (!s) + throw Error("input attribute '%s' is missing", name); + return *s; +} + +std::pair<Tree, std::shared_ptr<const Input>> Input::fetchTree(ref<Store> store) const +{ + auto [tree, input] = fetchTreeInternal(store); + + if (tree.actualPath == "") + tree.actualPath = store->toRealPath(tree.storePath); + + if (!tree.info.narHash) + tree.info.narHash = store->queryPathInfo(tree.storePath)->narHash; + + if (input->narHash) + assert(input->narHash == tree.info.narHash); + + if (narHash && narHash != input->narHash) + throw Error("NAR hash mismatch in input '%s' (%s), expected '%s', got '%s'", + to_string(), tree.actualPath, narHash->to_string(SRI), input->narHash->to_string(SRI)); + + return {std::move(tree), input}; +} + +std::shared_ptr<const Input> Input::applyOverrides( + std::optional<std::string> ref, + std::optional<Hash> rev) const +{ + if (ref) + throw Error("don't know how to apply '%s' to '%s'", *ref, to_string()); + if (rev) + throw Error("don't know how to apply '%s' to '%s'", rev->to_string(Base16, false), to_string()); + return shared_from_this(); +} + +StorePath TreeInfo::computeStorePath(Store & store) const +{ + assert(narHash); + return store.makeFixedOutputPath(true, narHash, "source"); +} + +} diff --git a/src/libstore/fetchers/fetchers.hh b/src/libstore/fetchers/fetchers.hh new file mode 100644 index 000000000..4202e8339 --- /dev/null +++ b/src/libstore/fetchers/fetchers.hh @@ -0,0 +1,109 @@ +#pragma once + +#include "types.hh" +#include "hash.hh" +#include "path.hh" +#include "tree-info.hh" + +#include <memory> +#include <variant> + +#include <nlohmann/json_fwd.hpp> + +namespace nix { class Store; } + +namespace nix::fetchers { + +struct Input; + +struct Tree +{ + Path actualPath; + StorePath storePath; + TreeInfo info; +}; + +struct Input : std::enable_shared_from_this<Input> +{ + std::optional<Hash> narHash; // FIXME: implement + + virtual std::string type() const = 0; + + virtual ~Input() { } + + virtual bool operator ==(const Input & other) const { return false; } + + /* Check whether this is a "direct" input, that is, not + one that goes through a registry. */ + virtual bool isDirect() const { return true; } + + /* Check whether this is an "immutable" input, that is, + one that contains a commit hash or content hash. */ + virtual bool isImmutable() const { return (bool) narHash; } + + virtual bool contains(const Input & other) const { return false; } + + virtual std::optional<std::string> getRef() const { return {}; } + + virtual std::optional<Hash> getRev() const { return {}; } + + virtual std::string to_string() const = 0; + + typedef std::variant<std::string, int64_t> Attr; + typedef std::map<std::string, Attr> Attrs; + + Attrs toAttrs() const; + + std::pair<Tree, std::shared_ptr<const Input>> fetchTree(ref<Store> store) const; + + virtual std::shared_ptr<const Input> applyOverrides( + std::optional<std::string> ref, + std::optional<Hash> rev) const; + + virtual std::optional<Path> getSourcePath() const { return {}; } + + virtual void markChangedFile( + std::string_view file, + std::optional<std::string> commitMsg) const + { assert(false); } + + virtual void clone(const Path & destDir) const + { + throw Error("do not know how to clone input '%s'", to_string()); + } + +private: + + virtual std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(ref<Store> store) const = 0; + + virtual Attrs toAttrsInternal() const = 0; +}; + +struct ParsedURL; + +struct InputScheme +{ + virtual ~InputScheme() { } + + virtual std::unique_ptr<Input> inputFromURL(const ParsedURL & url) = 0; + + virtual std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) = 0; +}; + +std::unique_ptr<Input> inputFromURL(const ParsedURL & url); + +std::unique_ptr<Input> inputFromURL(const std::string & url); + +std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs); + +void registerInputScheme(std::unique_ptr<InputScheme> && fetcher); + +Input::Attrs jsonToAttrs(const nlohmann::json & json); + +nlohmann::json attrsToJson(const Input::Attrs & attrs); + +std::optional<std::string> maybeGetStrAttr(const Input::Attrs & attrs, const std::string & name); + +std::string getStrAttr(const Input::Attrs & attrs, const std::string & name); + +} diff --git a/src/libstore/fetchers/git.cc b/src/libstore/fetchers/git.cc new file mode 100644 index 000000000..9276b0993 --- /dev/null +++ b/src/libstore/fetchers/git.cc @@ -0,0 +1,452 @@ +#include "fetchers.hh" +#include "parse.hh" +#include "globals.hh" +#include "tarfile.hh" +#include "store-api.hh" +#include "regex.hh" + +#include <sys/time.h> + +#include <nlohmann/json.hpp> + +using namespace std::string_literals; + +namespace nix::fetchers { + +static Path getCacheInfoPathFor(const std::string & name, const Hash & rev) +{ + Path cacheDir = getCacheDir() + "/nix/git-revs-v2"; + std::string linkName = + name == "source" + ? rev.gitRev() + : hashString(htSHA512, name + std::string("\0"s) + rev.gitRev()).to_string(Base32, false); + return cacheDir + "/" + linkName + ".link"; +} + +static std::string readHead(const Path & path) +{ + return chomp(runProgram("git", true, { "-C", path, "rev-parse", "--abbrev-ref", "HEAD" })); +} + +static void cacheGitInfo( + Store & store, + const std::string & name, + const Tree & tree, + const Hash & rev) +{ + nlohmann::json json; + json["storePath"] = store.printStorePath(tree.storePath); + json["name"] = name; + json["rev"] = rev.gitRev(); + json["revCount"] = *tree.info.revCount; + json["lastModified"] = *tree.info.lastModified; + + auto cacheInfoPath = getCacheInfoPathFor(name, rev); + createDirs(dirOf(cacheInfoPath)); + writeFile(cacheInfoPath, json.dump()); +} + +static std::optional<std::pair<Hash, Tree>> lookupGitInfo( + ref<Store> store, + const std::string & name, + const Hash & rev) +{ + try { + auto json = nlohmann::json::parse(readFile(getCacheInfoPathFor(name, rev))); + + assert(json["name"] == name && Hash((std::string) json["rev"], htSHA1) == rev); + + auto storePath = store->parseStorePath((std::string) json["storePath"]); + + if (store->isValidPath(storePath)) { + return {{rev, Tree{ + .actualPath = store->toRealPath(storePath), + .storePath = std::move(storePath), + .info = TreeInfo { + .revCount = json["revCount"], + .lastModified = json["lastModified"], + } + }}}; + } + + } catch (SysError & e) { + if (e.errNo != ENOENT) throw; + } + + return {}; +} + +struct GitInput : Input +{ + ParsedURL url; + std::optional<std::string> ref; + std::optional<Hash> rev; + + GitInput(const ParsedURL & url) : url(url) + { } + + std::string type() const override { return "git"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast<const GitInput *>(&other); + return + other2 + && url == other2->url + && rev == other2->rev + && ref == other2->ref; + } + + bool isImmutable() const override + { + return (bool) rev; + } + + std::optional<std::string> getRef() const override { return ref; } + + std::optional<Hash> getRev() const override { return rev; } + + std::string to_string() const override + { + ParsedURL url2(url); + if (url2.scheme != "git") url2.scheme = "git+" + url2.scheme; + if (rev) url2.query.insert_or_assign("rev", rev->gitRev()); + if (ref) url2.query.insert_or_assign("ref", *ref); + return url2.to_string(); + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("url", url.to_string()); + if (ref) + attrs.emplace("ref", *ref); + if (rev) + attrs.emplace("rev", rev->gitRev()); + return attrs; + } + + void clone(const Path & destDir) const override + { + auto [isLocal, actualUrl] = getActualUrl(); + + Strings args = {"clone"}; + + args.push_back(actualUrl); + + if (ref) { + args.push_back("--branch"); + args.push_back(*ref); + } + + if (rev) throw Error("cloning a specific revision is not implemented"); + + args.push_back(destDir); + + runProgram("git", true, args); + } + + std::shared_ptr<const Input> applyOverrides( + std::optional<std::string> ref, + std::optional<Hash> rev) const override + { + if (!ref && !rev) return shared_from_this(); + + auto res = std::make_shared<GitInput>(*this); + + if (ref) res->ref = ref; + if (rev) res->rev = rev; + + if (!res->ref && res->rev) + throw Error("Git input '%s' has a commit hash but no branch/tag name", res->to_string()); + + return res; + } + + std::optional<Path> getSourcePath() const override + { + if (url.scheme == "file" && !ref && !rev) + return url.path; + return {}; + } + void markChangedFile(std::string_view file, std::optional<std::string> commitMsg) const override + { + auto sourcePath = getSourcePath(); + assert(sourcePath); + + runProgram("git", true, + { "-C", *sourcePath, "add", "--force", "--intent-to-add", std::string(file) }); + + if (commitMsg) + runProgram("git", true, + { "-C", *sourcePath, "commit", std::string(file), "-m", *commitMsg }); + } + + std::pair<bool, std::string> getActualUrl() const + { + // Don't clone file:// URIs (but otherwise treat them the + // same as remote URIs, i.e. don't use the working tree or + // HEAD). + static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing + bool isLocal = url.scheme == "file" && !forceHttp; + return {isLocal, isLocal ? url.path : url.base}; + } + + std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override + { + auto name = "source"; + + auto input = std::make_shared<GitInput>(*this); + + assert(!rev || rev->type == htSHA1); + + if (rev) { + if (auto tree = lookupGitInfo(store, name, *rev)) { + input->rev = tree->first; + return {std::move(tree->second), input}; + } + } + + auto [isLocal, actualUrl_] = getActualUrl(); + auto actualUrl = actualUrl_; // work around clang bug + + // If this is a local directory and no ref or revision is + // given, then allow the use of an unclean working tree. + if (!input->ref && !input->rev && isLocal) { + bool clean = false; + + /* Check whether this repo has any commits. There are + probably better ways to do this. */ + auto gitDir = actualUrl + "/.git"; + auto commonGitDir = chomp(runProgram( + "git", + true, + { "-C", actualUrl, "rev-parse", "--git-common-dir" } + )); + if (commonGitDir != ".git") + gitDir = commonGitDir; + + bool haveCommits = !readDirectory(gitDir + "/refs/heads").empty(); + + try { + if (haveCommits) { + runProgram("git", true, { "-C", actualUrl, "diff-index", "--quiet", "HEAD", "--" }); + clean = true; + } + } catch (ExecError & e) { + if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; + } + + if (!clean) { + + /* This is an unclean working tree. So copy all tracked files. */ + + if (!settings.allowDirty) + throw Error("Git tree '%s' is dirty", actualUrl); + + if (settings.warnDirty) + warn("Git tree '%s' is dirty", actualUrl); + + auto files = tokenizeString<std::set<std::string>>( + runProgram("git", true, { "-C", actualUrl, "ls-files", "-z" }), "\0"s); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualUrl)); + std::string file(p, actualUrl.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = store->addToStore("source", actualUrl, true, htSHA256, filter); + + auto tree = Tree { + .actualPath = store->printStorePath(storePath), + .storePath = std::move(storePath), + .info = TreeInfo { + .revCount = haveCommits ? std::stoull(runProgram("git", true, { "-C", actualUrl, "rev-list", "--count", "HEAD" })) : 0, + // FIXME: maybe we should use the timestamp of the last + // modified dirty file? + .lastModified = haveCommits ? std::stoull(runProgram("git", true, { "-C", actualUrl, "log", "-1", "--format=%ct", "HEAD" })) : 0, + } + }; + + return {std::move(tree), input}; + } + } + + if (!input->ref) input->ref = isLocal ? readHead(actualUrl) : "master"; + + Path repoDir; + + if (isLocal) { + + if (!input->rev) + input->rev = Hash(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input->ref })), htSHA1); + + repoDir = actualUrl; + + } else { + + Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false); + repoDir = cacheDir; + + if (!pathExists(cacheDir)) { + createDirs(dirOf(cacheDir)); + runProgram("git", true, { "init", "--bare", repoDir }); + } + + Path localRefFile = + input->ref->compare(0, 5, "refs/") == 0 + ? cacheDir + "/" + *input->ref + : cacheDir + "/refs/heads/" + *input->ref; + + bool doFetch; + time_t now = time(0); + + /* If a rev was specified, we need to fetch if it's not in the + repo. */ + if (input->rev) { + try { + runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input->rev->gitRev() }); + doFetch = false; + } catch (ExecError & e) { + if (WIFEXITED(e.status)) { + doFetch = true; + } else { + throw; + } + } + } else { + /* If the local ref is older than ‘tarball-ttl’ seconds, do a + git fetch to update the local ref to the remote ref. */ + struct stat st; + doFetch = stat(localRefFile.c_str(), &st) != 0 || + (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now; + } + + if (doFetch) { + Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", actualUrl)); + + // FIXME: git stderr messes up our progress indicator, so + // we're using --quiet for now. Should process its stderr. + try { + runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", *input->ref, *input->ref) }); + } catch (Error & e) { + if (!pathExists(localRefFile)) throw; + warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); + } + + struct timeval times[2]; + times[0].tv_sec = now; + times[0].tv_usec = 0; + times[1].tv_sec = now; + times[1].tv_usec = 0; + + utimes(localRefFile.c_str(), times); + } + + if (!input->rev) + input->rev = Hash(chomp(readFile(localRefFile)), htSHA1); + } + + if (auto tree = lookupGitInfo(store, name, *input->rev)) { + assert(*input->rev == tree->first); + return {std::move(tree->second), input}; + } + + // FIXME: check whether rev is an ancestor of ref. + + printTalkative("using revision %s of repo '%s'", input->rev->gitRev(), actualUrl); + + // FIXME: should pipe this, or find some better way to extract a + // revision. + auto source = sinkToSource([&](Sink & sink) { + RunOptions gitOptions("git", { "-C", repoDir, "archive", input->rev->gitRev() }); + gitOptions.standardOut = &sink; + runProgram2(gitOptions); + }); + + Path tmpDir = createTempDir(); + AutoDelete delTmpDir(tmpDir, true); + + unpackTarfile(*source, tmpDir); + + auto storePath = store->addToStore(name, tmpDir); + auto revCount = std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input->rev->gitRev() })); + auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", input->rev->gitRev() })); + + auto tree = Tree { + .actualPath = store->toRealPath(storePath), + .storePath = std::move(storePath), + .info = TreeInfo { + .revCount = revCount, + .lastModified = lastModified + } + }; + + cacheGitInfo(*store, name, tree, *input->rev); + + return {std::move(tree), input}; + } +}; + +struct GitInputScheme : InputScheme +{ + std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "git" && + url.scheme != "git+http" && + url.scheme != "git+https" && + url.scheme != "git+ssh" && + url.scheme != "git+file") return nullptr; + + auto url2(url); + if (hasPrefix(url2.scheme, "git+")) url2.scheme = std::string(url2.scheme, 4); + url2.query.clear(); + + Input::Attrs attrs; + attrs.emplace("type", "git"); + + for (auto &[name, value] : url.query) { + if (name == "rev" || name == "ref") + attrs.emplace(name, value); + else + url2.query.emplace(name, value); + } + + attrs.emplace("url", url2.to_string()); + + return inputFromAttrs(attrs); + } + + std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "git") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "url" && name != "ref" && name != "rev") + throw Error("unsupported Git input attribute '%s'", name); + + auto input = std::make_unique<GitInput>(parseURL(getStrAttr(attrs, "url"))); + if (auto ref = maybeGetStrAttr(attrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) + throw BadURL("invalid Git branch/tag name '%s'", *ref); + input->ref = *ref; + } + if (auto rev = maybeGetStrAttr(attrs, "rev")) + input->rev = Hash(*rev, htSHA1); + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<GitInputScheme>()); }); + +} diff --git a/src/libstore/fetchers/github.cc b/src/libstore/fetchers/github.cc new file mode 100644 index 000000000..0a000e83f --- /dev/null +++ b/src/libstore/fetchers/github.cc @@ -0,0 +1,212 @@ +#include "fetchers.hh" +#include "download.hh" +#include "globals.hh" +#include "parse.hh" +#include "regex.hh" +#include "store-api.hh" + +#include <nlohmann/json.hpp> + +namespace nix::fetchers { + +std::regex ownerRegex("[a-zA-Z][a-zA-Z0-9_-]*", std::regex::ECMAScript); +std::regex repoRegex("[a-zA-Z][a-zA-Z0-9_-]*", std::regex::ECMAScript); + +struct GitHubInput : Input +{ + std::string owner; + std::string repo; + std::optional<std::string> ref; + std::optional<Hash> rev; + + std::string type() const override { return "github"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast<const GitHubInput *>(&other); + return + other2 + && owner == other2->owner + && repo == other2->repo + && rev == other2->rev + && ref == other2->ref; + } + + bool isImmutable() const override + { + return (bool) rev; + } + + std::optional<std::string> getRef() const override { return ref; } + + std::optional<Hash> getRev() const override { return rev; } + + std::string to_string() const override + { + auto s = fmt("github:%s/%s", owner, repo); + assert(!(ref && rev)); + if (ref) s += "/" + *ref; + if (rev) s += "/" + rev->to_string(Base16, false); + return s; + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("owner", owner); + attrs.emplace("repo", repo); + if (ref) + attrs.emplace("ref", *ref); + if (rev) + attrs.emplace("rev", rev->gitRev()); + return attrs; + } + + void clone(const Path & destDir) const override + { + std::shared_ptr<const Input> input = inputFromURL(fmt("git+ssh://git@github.com/%s/%s.git", owner, repo)); + input = input->applyOverrides(ref.value_or("master"), rev); + input->clone(destDir); + } + + std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override + { + auto rev = this->rev; + + #if 0 + if (rev) { + if (auto gitInfo = lookupGitInfo(store, "source", *rev)) + return *gitInfo; + } + #endif + + if (!rev) { + auto url = fmt("https://api.github.com/repos/%s/%s/commits/%s", + owner, repo, ref ? *ref : "master"); + CachedDownloadRequest request(url); + request.ttl = rev ? 1000000000 : settings.tarballTtl; + auto result = getDownloader()->downloadCached(store, request); + auto json = nlohmann::json::parse(readFile(result.path)); + rev = Hash(json["sha"], htSHA1); + debug("HEAD revision for '%s' is %s", url, rev->gitRev()); + } + + // FIXME: use regular /archive URLs instead? api.github.com + // might have stricter rate limits. + + auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s", + owner, repo, rev->to_string(Base16, false)); + + std::string accessToken = settings.githubAccessToken.get(); + if (accessToken != "") + url += "?access_token=" + accessToken; + + CachedDownloadRequest request(url); + request.unpack = true; + request.name = "source"; + request.ttl = 1000000000; + request.getLastModified = true; + auto dresult = getDownloader()->downloadCached(store, request); + + assert(dresult.lastModified); + + Tree result{ + .actualPath = dresult.path, + .storePath = store->parseStorePath(dresult.storePath), + .info = TreeInfo { + .lastModified = *dresult.lastModified, + }, + }; + + #if 0 + // FIXME: this can overwrite a cache file that contains a revCount. + cacheGitInfo("source", gitInfo); + #endif + + auto input = std::make_shared<GitHubInput>(*this); + input->ref = {}; + input->rev = *rev; + + return {std::move(result), input}; + } + + std::shared_ptr<const Input> applyOverrides( + std::optional<std::string> ref, + std::optional<Hash> rev) const override + { + if (!ref && !rev) return shared_from_this(); + + auto res = std::make_shared<GitHubInput>(*this); + + if (ref) res->ref = ref; + if (rev) res->rev = rev; + + return res; + } +}; + +struct GitHubInputScheme : InputScheme +{ + std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "github") return nullptr; + + auto path = tokenizeString<std::vector<std::string>>(url.path, "/"); + auto input = std::make_unique<GitHubInput>(); + + if (path.size() == 2) { + } else if (path.size() == 3) { + if (std::regex_match(path[2], revRegex)) + input->rev = Hash(path[2], htSHA1); + else if (std::regex_match(path[2], refRegex)) + input->ref = path[2]; + else + throw BadURL("in GitHub URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]); + } else + throw BadURL("GitHub URL '%s' is invalid", url.url); + + for (auto &[name, value] : url.query) { + if (name == "rev") { + if (input->rev) + throw BadURL("GitHub URL '%s' contains multiple commit hashes", url.url); + input->rev = Hash(value, htSHA1); + } + else if (name == "ref") { + if (!std::regex_match(value, refRegex)) + throw BadURL("GitHub URL '%s' contains an invalid branch/tag name", url.url); + if (input->ref) + throw BadURL("GitHub URL '%s' contains multiple branch/tag names", url.url); + input->ref = value; + } + } + + if (input->ref && input->rev) + throw BadURL("GitHub URL '%s' contains both a commit hash and a branch/tag name", url.url); + + input->owner = path[0]; + input->repo = path[1]; + + return input; + } + + std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "github") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev") + throw Error("unsupported GitHub input attribute '%s'", name); + + auto input = std::make_unique<GitHubInput>(); + input->owner = getStrAttr(attrs, "owner"); + input->repo = getStrAttr(attrs, "repo"); + input->ref = maybeGetStrAttr(attrs, "ref"); + if (auto rev = maybeGetStrAttr(attrs, "rev")) + input->rev = Hash(*rev, htSHA1); + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<GitHubInputScheme>()); }); + +} diff --git a/src/libstore/fetchers/indirect.cc b/src/libstore/fetchers/indirect.cc new file mode 100644 index 000000000..016f5fb39 --- /dev/null +++ b/src/libstore/fetchers/indirect.cc @@ -0,0 +1,142 @@ +#include "fetchers.hh" +#include "parse.hh" +#include "regex.hh" + +namespace nix::fetchers { + +std::regex flakeRegex("[a-zA-Z][a-zA-Z0-9_-]*", std::regex::ECMAScript); + +struct IndirectInput : Input +{ + std::string id; + std::optional<Hash> rev; + std::optional<std::string> ref; + + std::string type() const override { return "indirect"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast<const IndirectInput *>(&other); + return + other2 + && id == other2->id + && rev == other2->rev + && ref == other2->ref; + } + + bool isDirect() const override + { + return false; + } + + std::optional<std::string> getRef() const override { return ref; } + + std::optional<Hash> getRev() const override { return rev; } + + bool contains(const Input & other) const override + { + auto other2 = dynamic_cast<const IndirectInput *>(&other); + return + other2 + && id == other2->id + && (!ref || ref == other2->ref) + && (!rev || rev == other2->rev); + } + + std::string to_string() const override + { + ParsedURL url; + url.scheme = "flake"; + url.path = id; + if (ref) { url.path += '/'; url.path += *ref; }; + if (rev) { url.path += '/'; url.path += rev->gitRev(); }; + return url.to_string(); + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("id", id); + if (ref) + attrs.emplace("ref", *ref); + if (rev) + attrs.emplace("rev", rev->gitRev()); + return attrs; + } + + std::shared_ptr<const Input> applyOverrides( + std::optional<std::string> ref, + std::optional<Hash> rev) const override + { + if (!ref && !rev) return shared_from_this(); + + auto res = std::make_shared<IndirectInput>(*this); + + if (ref) res->ref = ref; + if (rev) res->rev = rev; + + return res; + } + + std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override + { + throw Error("indirect input '%s' cannot be fetched directly", to_string()); + } +}; + +struct IndirectInputScheme : InputScheme +{ + std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "flake") return nullptr; + + auto path = tokenizeString<std::vector<std::string>>(url.path, "/"); + auto input = std::make_unique<IndirectInput>(); + + if (path.size() == 1) { + } else if (path.size() == 2) { + if (std::regex_match(path[1], revRegex)) + input->rev = Hash(path[1], htSHA1); + else if (std::regex_match(path[1], refRegex)) + input->ref = path[1]; + else + throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]); + } else if (path.size() == 3) { + if (!std::regex_match(path[1], refRegex)) + throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]); + input->ref = path[1]; + if (!std::regex_match(path[2], revRegex)) + throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]); + input->rev = Hash(path[2], htSHA1); + } else + throw BadURL("GitHub URL '%s' is invalid", url.url); + + // FIXME: forbid query params? + + input->id = path[0]; + if (!std::regex_match(input->id, flakeRegex)) + throw BadURL("'%s' is not a valid flake ID", input->id); + + return input; + } + + std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "indirect") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "id" && name != "ref" && name != "rev") + throw Error("unsupported indirect input attribute '%s'", name); + + auto input = std::make_unique<IndirectInput>(); + input->id = getStrAttr(attrs, "id"); + input->ref = maybeGetStrAttr(attrs, "ref"); + if (auto rev = maybeGetStrAttr(attrs, "rev")) + input->rev = Hash(*rev, htSHA1); + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<IndirectInputScheme>()); }); + +} diff --git a/src/libstore/fetchers/mercurial.cc b/src/libstore/fetchers/mercurial.cc new file mode 100644 index 000000000..6ab0add1d --- /dev/null +++ b/src/libstore/fetchers/mercurial.cc @@ -0,0 +1,331 @@ +#include "fetchers.hh" +#include "parse.hh" +#include "globals.hh" +#include "tarfile.hh" +#include "store-api.hh" +#include "regex.hh" + +#include <sys/time.h> + +#include <nlohmann/json.hpp> + +using namespace std::string_literals; + +namespace nix::fetchers { + +struct MercurialInput : Input +{ + ParsedURL url; + std::optional<std::string> ref; + std::optional<Hash> rev; + + MercurialInput(const ParsedURL & url) : url(url) + { } + + std::string type() const override { return "hg"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast<const MercurialInput *>(&other); + return + other2 + && url == other2->url + && rev == other2->rev + && ref == other2->ref; + } + + bool isImmutable() const override + { + return (bool) rev; + } + + std::optional<std::string> getRef() const override { return ref; } + + std::optional<Hash> getRev() const override { return rev; } + + std::string to_string() const override + { + ParsedURL url2(url); + url2.scheme = "hg+" + url2.scheme; + if (rev) url2.query.insert_or_assign("rev", rev->gitRev()); + if (ref) url2.query.insert_or_assign("ref", *ref); + return url2.to_string(); + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("url", url.to_string()); + if (ref) + attrs.emplace("ref", *ref); + if (rev) + attrs.emplace("rev", rev->gitRev()); + return attrs; + } + + std::shared_ptr<const Input> applyOverrides( + std::optional<std::string> ref, + std::optional<Hash> rev) const override + { + if (!ref && !rev) return shared_from_this(); + + auto res = std::make_shared<MercurialInput>(*this); + + if (ref) res->ref = ref; + if (rev) res->rev = rev; + + return res; + } + + std::optional<Path> getSourcePath() const + { + if (url.scheme == "file" && !ref && !rev) + return url.path; + return {}; + } + + void markChangedFile(std::string_view file, std::optional<std::string> commitMsg) const override + { + auto sourcePath = getSourcePath(); + assert(sourcePath); + + // FIXME: shut up if file is already tracked. + runProgram("hg", true, + { "add", *sourcePath + "/" + std::string(file) }); + + if (commitMsg) + runProgram("hg", true, + { "commit", *sourcePath + "/" + std::string(file), "-m", *commitMsg }); + } + + std::pair<bool, std::string> getActualUrl() const + { + bool isLocal = url.scheme == "file"; + return {isLocal, isLocal ? url.path : url.base}; + } + + std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override + { + auto name = "source"; + + auto input = std::make_shared<MercurialInput>(*this); + + auto [isLocal, actualUrl_] = getActualUrl(); + auto actualUrl = actualUrl_; // work around clang bug + + // FIXME: return lastModified. + + // FIXME: don't clone local repositories. + + if (!input->ref && !input->rev && isLocal && pathExists(actualUrl + "/.hg")) { + + bool clean = runProgram("hg", true, { "status", "-R", actualUrl, "--modified", "--added", "--removed" }) == ""; + + if (!clean) { + + /* This is an unclean working tree. So copy all tracked + files. */ + + if (!settings.allowDirty) + throw Error("Mercurial tree '%s' is unclean", actualUrl); + + if (settings.warnDirty) + warn("Mercurial tree '%s' is unclean", actualUrl); + + input->ref = chomp(runProgram("hg", true, { "branch", "-R", actualUrl })); + + auto files = tokenizeString<std::set<std::string>>( + runProgram("hg", true, { "status", "-R", actualUrl, "--clean", "--modified", "--added", "--no-status", "--print0" }), "\0"s); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualUrl)); + std::string file(p, actualUrl.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = store->addToStore("source", actualUrl, true, htSHA256, filter); + + return {Tree { + .actualPath = store->printStorePath(storePath), + .storePath = std::move(storePath), + }, input}; + } + } + + if (!input->ref) input->ref = "default"; + + Path cacheDir = fmt("%s/nix/hg/%s", getCacheDir(), hashString(htSHA256, actualUrl).to_string(Base32, false)); + + assert(input->rev || input->ref); + auto revOrRef = input->rev ? input->rev->gitRev() : *input->ref; + + Path stampFile = fmt("%s/.hg/%s.stamp", cacheDir, hashString(htSHA512, revOrRef).to_string(Base32, false)); + + /* If we haven't pulled this repo less than ‘tarball-ttl’ seconds, + do so now. */ + time_t now = time(0); + struct stat st; + if (stat(stampFile.c_str(), &st) != 0 || + (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now) + { + /* Except that if this is a commit hash that we already have, + we don't have to pull again. */ + if (!(input->rev + && pathExists(cacheDir) + && runProgram( + RunOptions("hg", { "log", "-R", cacheDir, "-r", input->rev->gitRev(), "--template", "1" }) + .killStderr(true)).second == "1")) + { + Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Mercurial repository '%s'", actualUrl)); + + if (pathExists(cacheDir)) { + try { + runProgram("hg", true, { "pull", "-R", cacheDir, "--", actualUrl }); + } + catch (ExecError & e) { + string transJournal = cacheDir + "/.hg/store/journal"; + /* hg throws "abandoned transaction" error only if this file exists */ + if (pathExists(transJournal)) { + runProgram("hg", true, { "recover", "-R", cacheDir }); + runProgram("hg", true, { "pull", "-R", cacheDir, "--", actualUrl }); + } else { + throw ExecError(e.status, fmt("'hg pull' %s", statusToString(e.status))); + } + } + } else { + createDirs(dirOf(cacheDir)); + runProgram("hg", true, { "clone", "--noupdate", "--", actualUrl, cacheDir }); + } + } + + writeFile(stampFile, ""); + } + + auto tokens = tokenizeString<std::vector<std::string>>( + runProgram("hg", true, { "log", "-R", cacheDir, "-r", revOrRef, "--template", "{node} {rev} {branch}" })); + assert(tokens.size() == 3); + + input->rev = Hash(tokens[0], htSHA1); + auto revCount = std::stoull(tokens[1]); + input->ref = tokens[2]; + + std::string storeLinkName = hashString(htSHA512, name + std::string("\0"s) + input->rev->gitRev()).to_string(Base32, false); + Path storeLink = fmt("%s/.hg/%s.link", cacheDir, storeLinkName); + + try { + auto json = nlohmann::json::parse(readFile(storeLink)); + + assert(json["name"] == name && json["rev"] == input->rev->gitRev()); + + auto storePath = store->parseStorePath((std::string) json["storePath"]); + + if (store->isValidPath(storePath)) { + printTalkative("using cached Mercurial store path '%s'", store->printStorePath(storePath)); + return { + Tree { + .actualPath = store->printStorePath(storePath), + .storePath = std::move(storePath), + .info = TreeInfo { + .revCount = revCount, + }, + }, + input + }; + } + + } catch (SysError & e) { + if (e.errNo != ENOENT) throw; + } + + Path tmpDir = createTempDir(); + AutoDelete delTmpDir(tmpDir, true); + + runProgram("hg", true, { "archive", "-R", cacheDir, "-r", input->rev->gitRev(), tmpDir }); + + deletePath(tmpDir + "/.hg_archival.txt"); + + auto storePath = store->addToStore(name, tmpDir); + + nlohmann::json json; + json["storePath"] = store->printStorePath(storePath); + json["uri"] = actualUrl; + json["name"] = name; + json["branch"] = *input->ref; + json["rev"] = input->rev->gitRev(); + json["revCount"] = revCount; + + writeFile(storeLink, json.dump()); + + return { + Tree { + .actualPath = store->printStorePath(storePath), + .storePath = std::move(storePath), + .info = TreeInfo { + .revCount = revCount + } + }, + input + }; + } +}; + +struct MercurialInputScheme : InputScheme +{ + std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "hg+http" && + url.scheme != "hg+https" && + url.scheme != "hg+ssh" && + url.scheme != "hg+file") return nullptr; + + auto url2(url); + url2.scheme = std::string(url2.scheme, 3); + url2.query.clear(); + + Input::Attrs attrs; + attrs.emplace("type", "hg"); + + for (auto &[name, value] : url.query) { + if (name == "rev" || name == "ref") + attrs.emplace(name, value); + else + url2.query.emplace(name, value); + } + + attrs.emplace("url", url2.to_string()); + + return inputFromAttrs(attrs); + } + + std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "hg") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "url" && name != "ref" && name != "rev") + throw Error("unsupported Mercurial input attribute '%s'", name); + + auto input = std::make_unique<MercurialInput>(parseURL(getStrAttr(attrs, "url"))); + if (auto ref = maybeGetStrAttr(attrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) + throw BadURL("invalid Mercurial branch/tag name '%s'", *ref); + input->ref = *ref; + } + if (auto rev = maybeGetStrAttr(attrs, "rev")) + input->rev = Hash(*rev, htSHA1); + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<MercurialInputScheme>()); }); + +} diff --git a/src/libstore/fetchers/parse.cc b/src/libstore/fetchers/parse.cc new file mode 100644 index 000000000..a5ad14c87 --- /dev/null +++ b/src/libstore/fetchers/parse.cc @@ -0,0 +1,138 @@ +#include "parse.hh" +#include "util.hh" +#include "regex.hh" + +namespace nix::fetchers { + +std::regex refRegex(refRegexS, std::regex::ECMAScript); +std::regex revRegex(revRegexS, std::regex::ECMAScript); +std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript); + +ParsedURL parseURL(const std::string & url) +{ + static std::regex uriRegex( + "((" + schemeRegex + "):" + + "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))" + + "(?:\\?(" + queryRegex + "))?" + + "(?:#(" + queryRegex + "))?", + std::regex::ECMAScript); + + std::smatch match; + + if (std::regex_match(url, match, uriRegex)) { + auto & base = match[1]; + std::string scheme = match[2]; + auto authority = match[3].matched + ? std::optional<std::string>(match[3]) : std::nullopt; + std::string path = match[4].matched ? match[4] : match[5]; + auto & query = match[6]; + auto & fragment = match[7]; + + auto isFile = scheme.find("file") != std::string::npos; + + if (authority && *authority != "" && isFile) + throw Error("file:// URL '%s' has unexpected authority '%s'", + url, *authority); + + if (isFile && path.empty()) + path = "/"; + + return ParsedURL{ + .url = url, + .base = base, + .scheme = scheme, + .authority = authority, + .path = path, + .query = decodeQuery(query), + .fragment = percentDecode(std::string(fragment)) + }; + } + + else + throw BadURL("'%s' is not a valid URL", url); +} + +std::string percentDecode(std::string_view in) +{ + std::string decoded; + for (size_t i = 0; i < in.size(); ) { + if (in[i] == '%') { + if (i + 2 >= in.size()) + throw BadURL("invalid URI parameter '%s'", in); + try { + decoded += std::stoul(std::string(in, i + 1, 2), 0, 16); + i += 3; + } catch (...) { + throw BadURL("invalid URI parameter '%s'", in); + } + } else + decoded += in[i++]; + } + return decoded; +} + +std::map<std::string, std::string> decodeQuery(const std::string & query) +{ + std::map<std::string, std::string> result; + + for (auto s : tokenizeString<Strings>(query, "&")) { + auto e = s.find('='); + if (e != std::string::npos) + result.emplace( + s.substr(0, e), + percentDecode(std::string_view(s).substr(e + 1))); + } + + return result; +} + +std::string percentEncode(std::string_view s) +{ + std::string res; + for (auto & c : s) + if ((c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + || strchr("-._~!$&'()*+,;=:@", c)) + res += c; + else + res += fmt("%%%02x", (unsigned int) c); + return res; +} + +std::string encodeQuery(const std::map<std::string, std::string> & ss) +{ + std::string res; + bool first = true; + for (auto & [name, value] : ss) { + if (!first) res += '&'; + first = false; + res += percentEncode(name); + res += '='; + res += percentEncode(value); + } + return res; +} + +std::string ParsedURL::to_string() const +{ + return + scheme + + ":" + + (authority ? "//" + *authority : "") + + path + + (query.empty() ? "" : "?" + encodeQuery(query)) + + (fragment.empty() ? "" : "#" + percentEncode(fragment)); +} + +bool ParsedURL::operator ==(const ParsedURL & other) const +{ + return + scheme == other.scheme + && authority == other.authority + && path == other.path + && query == other.query + && fragment == other.fragment; +} + +} diff --git a/src/libstore/fetchers/parse.hh b/src/libstore/fetchers/parse.hh new file mode 100644 index 000000000..45d5182b0 --- /dev/null +++ b/src/libstore/fetchers/parse.hh @@ -0,0 +1,30 @@ +#pragma once + +#include "types.hh" + +namespace nix::fetchers { + +struct ParsedURL +{ + std::string url; + std::string base; // URL without query/fragment + std::string scheme; + std::optional<std::string> authority; + std::string path; + std::map<std::string, std::string> query; + std::string fragment; + + std::string to_string() const; + + bool operator ==(const ParsedURL & other) const; +}; + +MakeError(BadURL, Error); + +std::string percentDecode(std::string_view in); + +std::map<std::string, std::string> decodeQuery(const std::string & query); + +ParsedURL parseURL(const std::string & url); + +} diff --git a/src/libstore/fetchers/regex.hh b/src/libstore/fetchers/regex.hh new file mode 100644 index 000000000..e0989edfc --- /dev/null +++ b/src/libstore/fetchers/regex.hh @@ -0,0 +1,37 @@ +#pragma once + +#include <regex> + +namespace nix::fetchers { + +// URI stuff. +const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])"; +const static std::string schemeRegex = "(?:[a-z+]+)"; +const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])"; +const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])"; +const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])"; +const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)"; +const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")"; +const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)"; +const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?"; +const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])"; +const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*"; +const static std::string segmentRegex = "(?:" + pcharRegex + "+)"; +const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)"; +const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)"; + +// A Git ref (i.e. branch or tag name). +const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check +extern std::regex refRegex; + +// A Git revision (a SHA-1 commit hash). +const static std::string revRegexS = "[0-9a-fA-F]{40}"; +extern std::regex revRegex; + +// A ref or revision, or a ref followed by a revision. +const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))"; + +const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*"; +extern std::regex flakeIdRegex; + +} diff --git a/src/libstore/fetchers/registry.cc b/src/libstore/fetchers/registry.cc new file mode 100644 index 000000000..721af0c9b --- /dev/null +++ b/src/libstore/fetchers/registry.cc @@ -0,0 +1,183 @@ +#include "registry.hh" +#include "util.hh" +#include "fetchers.hh" +#include "globals.hh" +#include "download.hh" + +#include <nlohmann/json.hpp> + +namespace nix::fetchers { + +std::shared_ptr<Registry> Registry::read( + const Path & path, RegistryType type) +{ + auto registry = std::make_shared<Registry>(type); + + if (!pathExists(path)) + return std::make_shared<Registry>(type); + + auto json = nlohmann::json::parse(readFile(path)); + + auto version = json.value("version", 0); + + // FIXME: remove soon + if (version == 1) { + auto flakes = json["flakes"]; + for (auto i = flakes.begin(); i != flakes.end(); ++i) { + auto url = i->value("url", i->value("uri", "")); + if (url.empty()) + throw Error("flake registry '%s' lacks a 'url' attribute for entry '%s'", + path, i.key()); + registry->entries.push_back( + {inputFromURL(i.key()), inputFromURL(url), {}}); + } + } + + else if (version == 2) { + for (auto & i : json["flakes"]) { + auto toAttrs = jsonToAttrs(i["to"]); + Input::Attrs extraAttrs; + auto j = toAttrs.find("dir"); + if (j != toAttrs.end()) { + extraAttrs.insert(*j); + toAttrs.erase(j); + } + registry->entries.push_back( + { inputFromAttrs(jsonToAttrs(i["from"])) + , inputFromAttrs(toAttrs) + , extraAttrs + }); + } + } + + else + throw Error("flake registry '%s' has unsupported version %d", path, version); + + + return registry; +} + +void Registry::write(const Path & path) +{ + nlohmann::json arr; + for (auto & elem : entries) { + nlohmann::json obj; + obj["from"] = attrsToJson(std::get<0>(elem)->toAttrs()); + obj["to"] = attrsToJson(std::get<1>(elem)->toAttrs()); + if (!std::get<2>(elem).empty()) + obj["to"].update(attrsToJson(std::get<2>(elem))); + arr.emplace_back(std::move(obj)); + } + + nlohmann::json json; + json["version"] = 2; + json["flakes"] = std::move(arr); + + createDirs(dirOf(path)); + writeFile(path, json.dump(2)); +} + +void Registry::add( + const std::shared_ptr<const Input> & from, + const std::shared_ptr<const Input> & to, + const Input::Attrs & extraAttrs) +{ + entries.emplace_back(from, to, extraAttrs); +} + +void Registry::remove(const std::shared_ptr<const Input> & input) +{ + // FIXME: use C++20 std::erase. + for (auto i = entries.begin(); i != entries.end(); ) + if (*std::get<0>(*i) == *input) + i = entries.erase(i); + else + ++i; +} + +Path getUserRegistryPath() +{ + return getHome() + "/.config/nix/registry.json"; +} + +std::shared_ptr<Registry> getUserRegistry() +{ + return Registry::read(getUserRegistryPath(), Registry::User); +} + +static std::shared_ptr<Registry> flagRegistry = + std::make_shared<Registry>(Registry::Flag); + +std::shared_ptr<Registry> getFlagRegistry() +{ + return flagRegistry; +} + +void overrideRegistry( + const std::shared_ptr<const Input> & from, + const std::shared_ptr<const Input> & to, + const Input::Attrs & extraAttrs) +{ + flagRegistry->add(from, to, extraAttrs); +} + +static std::shared_ptr<Registry> getGlobalRegistry(ref<Store> store) +{ + static auto reg = [&]() { + auto path = settings.flakeRegistry; + + if (!hasPrefix(path, "/")) { + CachedDownloadRequest request(path); + request.name = "flake-registry.json"; + request.gcRoot = true; + path = getDownloader()->downloadCached(store, request).path; + } + + return Registry::read(path, Registry::Global); + }(); + + return reg; +} + +Registries getRegistries(ref<Store> store) +{ + Registries registries; + registries.push_back(getFlagRegistry()); + registries.push_back(getUserRegistry()); + registries.push_back(getGlobalRegistry(store)); + return registries; +} + +std::pair<std::shared_ptr<const Input>, Input::Attrs> lookupInRegistries( + ref<Store> store, + std::shared_ptr<const Input> input) +{ + Input::Attrs extraAttrs; + int n = 0; + + restart: + + n++; + if (n > 100) throw Error("cycle detected in flake registr for '%s'", input); + + for (auto & registry : getRegistries(store)) { + // FIXME: O(n) + for (auto & entry : registry->entries) { + auto from = std::get<0>(entry); + if (from->contains(*input)) { + input = std::get<1>(entry)->applyOverrides( + !from->getRef() && input->getRef() ? input->getRef() : std::optional<std::string>(), + !from->getRev() && input->getRev() ? input->getRev() : std::optional<Hash>()); + extraAttrs = std::get<2>(entry); + goto restart; + } + } + } + + if (!input->isDirect()) + throw Error("cannot find flake '%s' in the flake registries", input->to_string()); + + return {input, extraAttrs}; +} + +} diff --git a/src/libstore/fetchers/registry.hh b/src/libstore/fetchers/registry.hh new file mode 100644 index 000000000..6063f51d6 --- /dev/null +++ b/src/libstore/fetchers/registry.hh @@ -0,0 +1,62 @@ +#pragma once + +#include "types.hh" +#include "fetchers.hh" + +namespace nix { class Store; } + +namespace nix::fetchers { + +struct Registry +{ + enum RegistryType { + Flag = 0, + User = 1, + Global = 2, + }; + + RegistryType type; + + std::vector< + std::tuple< + std::shared_ptr<const Input>, // from + std::shared_ptr<const Input>, // to + Input::Attrs // extra attributes + > + > entries; + + Registry(RegistryType type) + : type(type) + { } + + static std::shared_ptr<Registry> read( + const Path & path, RegistryType type); + + void write(const Path & path); + + void add( + const std::shared_ptr<const Input> & from, + const std::shared_ptr<const Input> & to, + const Input::Attrs & extraAttrs); + + void remove(const std::shared_ptr<const Input> & input); +}; + +typedef std::vector<std::shared_ptr<Registry>> Registries; + +std::shared_ptr<Registry> getUserRegistry(); + +Path getUserRegistryPath(); + +Registries getRegistries(ref<Store> store); + +void overrideRegistry( + const std::shared_ptr<const Input> & from, + const std::shared_ptr<const Input> & to, + const Input::Attrs & extraAttrs); + +std::pair<std::shared_ptr<const Input>, Input::Attrs> lookupInRegistries( + ref<Store> store, + std::shared_ptr<const Input> input); + +} diff --git a/src/libstore/fetchers/tarball.cc b/src/libstore/fetchers/tarball.cc new file mode 100644 index 000000000..7c0b6690d --- /dev/null +++ b/src/libstore/fetchers/tarball.cc @@ -0,0 +1,131 @@ +#include "fetchers.hh" +#include "download.hh" +#include "globals.hh" +#include "parse.hh" +#include "store-api.hh" + +namespace nix::fetchers { + +struct TarballInput : Input +{ + ParsedURL url; + std::optional<Hash> hash; + + TarballInput(const ParsedURL & url) : url(url) + { } + + std::string type() const override { return "tarball"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast<const TarballInput *>(&other); + return + other2 + && to_string() == other2->to_string() + && hash == other2->hash; + } + + bool isImmutable() const override + { + return hash || narHash; + } + + std::string to_string() const override + { + auto url2(url); + // NAR hashes are preferred over file hashes since tar/zip files + // don't have a canonical representation. + if (narHash) + url2.query.insert_or_assign("narHash", narHash->to_string(SRI)); + else if (hash) + url2.query.insert_or_assign("hash", hash->to_string(SRI)); + return url2.to_string(); + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("url", url.to_string()); + if (narHash) + attrs.emplace("narHash", hash->to_string(SRI)); + else if (hash) + attrs.emplace("hash", hash->to_string(SRI)); + return attrs; + } + + std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override + { + CachedDownloadRequest request(url.to_string()); + request.unpack = true; + request.getLastModified = true; + request.name = "source"; + + auto res = getDownloader()->downloadCached(store, request); + + auto input = std::make_shared<TarballInput>(*this); + + auto storePath = store->parseStorePath(res.storePath); + + input->narHash = store->queryPathInfo(storePath)->narHash; + + return { + Tree { + .actualPath = res.path, + .storePath = std::move(storePath), + .info = TreeInfo { + .lastModified = *res.lastModified, + }, + }, + input + }; + } +}; + +struct TarballInputScheme : InputScheme +{ + std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "file" && url.scheme != "http" && url.scheme != "https") return nullptr; + + if (!hasSuffix(url.path, ".zip") + && !hasSuffix(url.path, ".tar") + && !hasSuffix(url.path, ".tar.gz") + && !hasSuffix(url.path, ".tar.xz") + && !hasSuffix(url.path, ".tar.bz2")) + return nullptr; + + auto input = std::make_unique<TarballInput>(url); + + auto hash = url.query.find("hash"); + if (hash != url.query.end()) + // FIXME: require SRI hash. + input->hash = Hash(hash->second); + + auto narHash = url.query.find("narHash"); + if (narHash != url.query.end()) + // FIXME: require SRI hash. + input->narHash = Hash(narHash->second); + + return input; + } + + std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "tarball") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "url" && name != "hash" && name != "narHash") + throw Error("unsupported tarball input attribute '%s'", name); + + auto input = std::make_unique<TarballInput>(parseURL(getStrAttr(attrs, "url"))); + if (auto hash = maybeGetStrAttr(attrs, "hash")) + // FIXME: require SRI hash. + input->hash = Hash(*hash); + + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); }); + +} diff --git a/src/libstore/fetchers/tree-info.hh b/src/libstore/fetchers/tree-info.hh new file mode 100644 index 000000000..02e92759b --- /dev/null +++ b/src/libstore/fetchers/tree-info.hh @@ -0,0 +1,26 @@ +#pragma once + +#include "path.hh" + +namespace nix { class Store; } + +namespace nix::fetchers { + +struct TreeInfo +{ + Hash narHash; + std::optional<uint64_t> revCount; + std::optional<time_t> lastModified; + + bool operator ==(const TreeInfo & other) const + { + return + narHash == other.narHash + && revCount == other.revCount + && lastModified == other.lastModified; + } + + StorePath computeStorePath(Store & store) const; +}; + +} |