aboutsummaryrefslogtreecommitdiff
path: root/src/libstore/fetchers
diff options
context:
space:
mode:
Diffstat (limited to 'src/libstore/fetchers')
-rw-r--r--src/libstore/fetchers/fetchers.cc137
-rw-r--r--src/libstore/fetchers/fetchers.hh109
-rw-r--r--src/libstore/fetchers/git.cc452
-rw-r--r--src/libstore/fetchers/github.cc212
-rw-r--r--src/libstore/fetchers/indirect.cc142
-rw-r--r--src/libstore/fetchers/mercurial.cc331
-rw-r--r--src/libstore/fetchers/parse.cc138
-rw-r--r--src/libstore/fetchers/parse.hh30
-rw-r--r--src/libstore/fetchers/regex.hh37
-rw-r--r--src/libstore/fetchers/registry.cc183
-rw-r--r--src/libstore/fetchers/registry.hh62
-rw-r--r--src/libstore/fetchers/tarball.cc131
-rw-r--r--src/libstore/fetchers/tree-info.hh26
13 files changed, 1990 insertions, 0 deletions
diff --git a/src/libstore/fetchers/fetchers.cc b/src/libstore/fetchers/fetchers.cc
new file mode 100644
index 000000000..0cc6f1c91
--- /dev/null
+++ b/src/libstore/fetchers/fetchers.cc
@@ -0,0 +1,137 @@
+#include "fetchers.hh"
+#include "parse.hh"
+#include "store-api.hh"
+
+#include <nlohmann/json.hpp>
+
+namespace nix::fetchers {
+
+std::unique_ptr<std::vector<std::unique_ptr<InputScheme>>> inputSchemes = nullptr;
+
+void registerInputScheme(std::unique_ptr<InputScheme> && inputScheme)
+{
+ if (!inputSchemes) inputSchemes = std::make_unique<std::vector<std::unique_ptr<InputScheme>>>();
+ inputSchemes->push_back(std::move(inputScheme));
+}
+
+std::unique_ptr<Input> inputFromURL(const ParsedURL & url)
+{
+ for (auto & inputScheme : *inputSchemes) {
+ auto res = inputScheme->inputFromURL(url);
+ if (res) return res;
+ }
+ throw Error("input '%s' is unsupported", url.url);
+}
+
+std::unique_ptr<Input> inputFromURL(const std::string & url)
+{
+ return inputFromURL(parseURL(url));
+}
+
+std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs)
+{
+ for (auto & inputScheme : *inputSchemes) {
+ auto res = inputScheme->inputFromAttrs(attrs);
+ if (res) {
+ if (auto narHash = maybeGetStrAttr(attrs, "narHash"))
+ // FIXME: require SRI hash.
+ res->narHash = Hash(*narHash);
+ return res;
+ }
+ }
+ throw Error("input '%s' is unsupported", attrsToJson(attrs));
+}
+
+Input::Attrs jsonToAttrs(const nlohmann::json & json)
+{
+ fetchers::Input::Attrs attrs;
+
+ for (auto & i : json.items()) {
+ if (i.value().is_number())
+ attrs.emplace(i.key(), i.value().get<int64_t>());
+ else if (i.value().is_string())
+ attrs.emplace(i.key(), i.value().get<std::string>());
+ else
+ throw Error("unsupported input attribute type in lock file");
+ }
+
+ return attrs;
+}
+
+nlohmann::json attrsToJson(const fetchers::Input::Attrs & attrs)
+{
+ nlohmann::json json;
+ for (auto & attr : attrs) {
+ if (auto v = std::get_if<int64_t>(&attr.second)) {
+ json[attr.first] = *v;
+ } else if (auto v = std::get_if<std::string>(&attr.second)) {
+ json[attr.first] = *v;
+ } else abort();
+ }
+ return json;
+}
+
+Input::Attrs Input::toAttrs() const
+{
+ auto attrs = toAttrsInternal();
+ if (narHash)
+ attrs.emplace("narHash", narHash->to_string(SRI));
+ attrs.emplace("type", type());
+ return attrs;
+}
+
+std::optional<std::string> maybeGetStrAttr(const Input::Attrs & attrs, const std::string & name)
+{
+ auto i = attrs.find(name);
+ if (i == attrs.end()) return {};
+ if (auto v = std::get_if<std::string>(&i->second))
+ return *v;
+ throw Error("input attribute '%s' is not a string", name);
+}
+
+std::string getStrAttr(const Input::Attrs & attrs, const std::string & name)
+{
+ auto s = maybeGetStrAttr(attrs, name);
+ if (!s)
+ throw Error("input attribute '%s' is missing", name);
+ return *s;
+}
+
+std::pair<Tree, std::shared_ptr<const Input>> Input::fetchTree(ref<Store> store) const
+{
+ auto [tree, input] = fetchTreeInternal(store);
+
+ if (tree.actualPath == "")
+ tree.actualPath = store->toRealPath(tree.storePath);
+
+ if (!tree.info.narHash)
+ tree.info.narHash = store->queryPathInfo(tree.storePath)->narHash;
+
+ if (input->narHash)
+ assert(input->narHash == tree.info.narHash);
+
+ if (narHash && narHash != input->narHash)
+ throw Error("NAR hash mismatch in input '%s' (%s), expected '%s', got '%s'",
+ to_string(), tree.actualPath, narHash->to_string(SRI), input->narHash->to_string(SRI));
+
+ return {std::move(tree), input};
+}
+
+std::shared_ptr<const Input> Input::applyOverrides(
+ std::optional<std::string> ref,
+ std::optional<Hash> rev) const
+{
+ if (ref)
+ throw Error("don't know how to apply '%s' to '%s'", *ref, to_string());
+ if (rev)
+ throw Error("don't know how to apply '%s' to '%s'", rev->to_string(Base16, false), to_string());
+ return shared_from_this();
+}
+
+StorePath TreeInfo::computeStorePath(Store & store) const
+{
+ assert(narHash);
+ return store.makeFixedOutputPath(true, narHash, "source");
+}
+
+}
diff --git a/src/libstore/fetchers/fetchers.hh b/src/libstore/fetchers/fetchers.hh
new file mode 100644
index 000000000..4202e8339
--- /dev/null
+++ b/src/libstore/fetchers/fetchers.hh
@@ -0,0 +1,109 @@
+#pragma once
+
+#include "types.hh"
+#include "hash.hh"
+#include "path.hh"
+#include "tree-info.hh"
+
+#include <memory>
+#include <variant>
+
+#include <nlohmann/json_fwd.hpp>
+
+namespace nix { class Store; }
+
+namespace nix::fetchers {
+
+struct Input;
+
+struct Tree
+{
+ Path actualPath;
+ StorePath storePath;
+ TreeInfo info;
+};
+
+struct Input : std::enable_shared_from_this<Input>
+{
+ std::optional<Hash> narHash; // FIXME: implement
+
+ virtual std::string type() const = 0;
+
+ virtual ~Input() { }
+
+ virtual bool operator ==(const Input & other) const { return false; }
+
+ /* Check whether this is a "direct" input, that is, not
+ one that goes through a registry. */
+ virtual bool isDirect() const { return true; }
+
+ /* Check whether this is an "immutable" input, that is,
+ one that contains a commit hash or content hash. */
+ virtual bool isImmutable() const { return (bool) narHash; }
+
+ virtual bool contains(const Input & other) const { return false; }
+
+ virtual std::optional<std::string> getRef() const { return {}; }
+
+ virtual std::optional<Hash> getRev() const { return {}; }
+
+ virtual std::string to_string() const = 0;
+
+ typedef std::variant<std::string, int64_t> Attr;
+ typedef std::map<std::string, Attr> Attrs;
+
+ Attrs toAttrs() const;
+
+ std::pair<Tree, std::shared_ptr<const Input>> fetchTree(ref<Store> store) const;
+
+ virtual std::shared_ptr<const Input> applyOverrides(
+ std::optional<std::string> ref,
+ std::optional<Hash> rev) const;
+
+ virtual std::optional<Path> getSourcePath() const { return {}; }
+
+ virtual void markChangedFile(
+ std::string_view file,
+ std::optional<std::string> commitMsg) const
+ { assert(false); }
+
+ virtual void clone(const Path & destDir) const
+ {
+ throw Error("do not know how to clone input '%s'", to_string());
+ }
+
+private:
+
+ virtual std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(ref<Store> store) const = 0;
+
+ virtual Attrs toAttrsInternal() const = 0;
+};
+
+struct ParsedURL;
+
+struct InputScheme
+{
+ virtual ~InputScheme() { }
+
+ virtual std::unique_ptr<Input> inputFromURL(const ParsedURL & url) = 0;
+
+ virtual std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) = 0;
+};
+
+std::unique_ptr<Input> inputFromURL(const ParsedURL & url);
+
+std::unique_ptr<Input> inputFromURL(const std::string & url);
+
+std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs);
+
+void registerInputScheme(std::unique_ptr<InputScheme> && fetcher);
+
+Input::Attrs jsonToAttrs(const nlohmann::json & json);
+
+nlohmann::json attrsToJson(const Input::Attrs & attrs);
+
+std::optional<std::string> maybeGetStrAttr(const Input::Attrs & attrs, const std::string & name);
+
+std::string getStrAttr(const Input::Attrs & attrs, const std::string & name);
+
+}
diff --git a/src/libstore/fetchers/git.cc b/src/libstore/fetchers/git.cc
new file mode 100644
index 000000000..9276b0993
--- /dev/null
+++ b/src/libstore/fetchers/git.cc
@@ -0,0 +1,452 @@
+#include "fetchers.hh"
+#include "parse.hh"
+#include "globals.hh"
+#include "tarfile.hh"
+#include "store-api.hh"
+#include "regex.hh"
+
+#include <sys/time.h>
+
+#include <nlohmann/json.hpp>
+
+using namespace std::string_literals;
+
+namespace nix::fetchers {
+
+static Path getCacheInfoPathFor(const std::string & name, const Hash & rev)
+{
+ Path cacheDir = getCacheDir() + "/nix/git-revs-v2";
+ std::string linkName =
+ name == "source"
+ ? rev.gitRev()
+ : hashString(htSHA512, name + std::string("\0"s) + rev.gitRev()).to_string(Base32, false);
+ return cacheDir + "/" + linkName + ".link";
+}
+
+static std::string readHead(const Path & path)
+{
+ return chomp(runProgram("git", true, { "-C", path, "rev-parse", "--abbrev-ref", "HEAD" }));
+}
+
+static void cacheGitInfo(
+ Store & store,
+ const std::string & name,
+ const Tree & tree,
+ const Hash & rev)
+{
+ nlohmann::json json;
+ json["storePath"] = store.printStorePath(tree.storePath);
+ json["name"] = name;
+ json["rev"] = rev.gitRev();
+ json["revCount"] = *tree.info.revCount;
+ json["lastModified"] = *tree.info.lastModified;
+
+ auto cacheInfoPath = getCacheInfoPathFor(name, rev);
+ createDirs(dirOf(cacheInfoPath));
+ writeFile(cacheInfoPath, json.dump());
+}
+
+static std::optional<std::pair<Hash, Tree>> lookupGitInfo(
+ ref<Store> store,
+ const std::string & name,
+ const Hash & rev)
+{
+ try {
+ auto json = nlohmann::json::parse(readFile(getCacheInfoPathFor(name, rev)));
+
+ assert(json["name"] == name && Hash((std::string) json["rev"], htSHA1) == rev);
+
+ auto storePath = store->parseStorePath((std::string) json["storePath"]);
+
+ if (store->isValidPath(storePath)) {
+ return {{rev, Tree{
+ .actualPath = store->toRealPath(storePath),
+ .storePath = std::move(storePath),
+ .info = TreeInfo {
+ .revCount = json["revCount"],
+ .lastModified = json["lastModified"],
+ }
+ }}};
+ }
+
+ } catch (SysError & e) {
+ if (e.errNo != ENOENT) throw;
+ }
+
+ return {};
+}
+
+struct GitInput : Input
+{
+ ParsedURL url;
+ std::optional<std::string> ref;
+ std::optional<Hash> rev;
+
+ GitInput(const ParsedURL & url) : url(url)
+ { }
+
+ std::string type() const override { return "git"; }
+
+ bool operator ==(const Input & other) const override
+ {
+ auto other2 = dynamic_cast<const GitInput *>(&other);
+ return
+ other2
+ && url == other2->url
+ && rev == other2->rev
+ && ref == other2->ref;
+ }
+
+ bool isImmutable() const override
+ {
+ return (bool) rev;
+ }
+
+ std::optional<std::string> getRef() const override { return ref; }
+
+ std::optional<Hash> getRev() const override { return rev; }
+
+ std::string to_string() const override
+ {
+ ParsedURL url2(url);
+ if (url2.scheme != "git") url2.scheme = "git+" + url2.scheme;
+ if (rev) url2.query.insert_or_assign("rev", rev->gitRev());
+ if (ref) url2.query.insert_or_assign("ref", *ref);
+ return url2.to_string();
+ }
+
+ Attrs toAttrsInternal() const override
+ {
+ Attrs attrs;
+ attrs.emplace("url", url.to_string());
+ if (ref)
+ attrs.emplace("ref", *ref);
+ if (rev)
+ attrs.emplace("rev", rev->gitRev());
+ return attrs;
+ }
+
+ void clone(const Path & destDir) const override
+ {
+ auto [isLocal, actualUrl] = getActualUrl();
+
+ Strings args = {"clone"};
+
+ args.push_back(actualUrl);
+
+ if (ref) {
+ args.push_back("--branch");
+ args.push_back(*ref);
+ }
+
+ if (rev) throw Error("cloning a specific revision is not implemented");
+
+ args.push_back(destDir);
+
+ runProgram("git", true, args);
+ }
+
+ std::shared_ptr<const Input> applyOverrides(
+ std::optional<std::string> ref,
+ std::optional<Hash> rev) const override
+ {
+ if (!ref && !rev) return shared_from_this();
+
+ auto res = std::make_shared<GitInput>(*this);
+
+ if (ref) res->ref = ref;
+ if (rev) res->rev = rev;
+
+ if (!res->ref && res->rev)
+ throw Error("Git input '%s' has a commit hash but no branch/tag name", res->to_string());
+
+ return res;
+ }
+
+ std::optional<Path> getSourcePath() const override
+ {
+ if (url.scheme == "file" && !ref && !rev)
+ return url.path;
+ return {};
+ }
+ void markChangedFile(std::string_view file, std::optional<std::string> commitMsg) const override
+ {
+ auto sourcePath = getSourcePath();
+ assert(sourcePath);
+
+ runProgram("git", true,
+ { "-C", *sourcePath, "add", "--force", "--intent-to-add", std::string(file) });
+
+ if (commitMsg)
+ runProgram("git", true,
+ { "-C", *sourcePath, "commit", std::string(file), "-m", *commitMsg });
+ }
+
+ std::pair<bool, std::string> getActualUrl() const
+ {
+ // Don't clone file:// URIs (but otherwise treat them the
+ // same as remote URIs, i.e. don't use the working tree or
+ // HEAD).
+ static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing
+ bool isLocal = url.scheme == "file" && !forceHttp;
+ return {isLocal, isLocal ? url.path : url.base};
+ }
+
+ std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override
+ {
+ auto name = "source";
+
+ auto input = std::make_shared<GitInput>(*this);
+
+ assert(!rev || rev->type == htSHA1);
+
+ if (rev) {
+ if (auto tree = lookupGitInfo(store, name, *rev)) {
+ input->rev = tree->first;
+ return {std::move(tree->second), input};
+ }
+ }
+
+ auto [isLocal, actualUrl_] = getActualUrl();
+ auto actualUrl = actualUrl_; // work around clang bug
+
+ // If this is a local directory and no ref or revision is
+ // given, then allow the use of an unclean working tree.
+ if (!input->ref && !input->rev && isLocal) {
+ bool clean = false;
+
+ /* Check whether this repo has any commits. There are
+ probably better ways to do this. */
+ auto gitDir = actualUrl + "/.git";
+ auto commonGitDir = chomp(runProgram(
+ "git",
+ true,
+ { "-C", actualUrl, "rev-parse", "--git-common-dir" }
+ ));
+ if (commonGitDir != ".git")
+ gitDir = commonGitDir;
+
+ bool haveCommits = !readDirectory(gitDir + "/refs/heads").empty();
+
+ try {
+ if (haveCommits) {
+ runProgram("git", true, { "-C", actualUrl, "diff-index", "--quiet", "HEAD", "--" });
+ clean = true;
+ }
+ } catch (ExecError & e) {
+ if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw;
+ }
+
+ if (!clean) {
+
+ /* This is an unclean working tree. So copy all tracked files. */
+
+ if (!settings.allowDirty)
+ throw Error("Git tree '%s' is dirty", actualUrl);
+
+ if (settings.warnDirty)
+ warn("Git tree '%s' is dirty", actualUrl);
+
+ auto files = tokenizeString<std::set<std::string>>(
+ runProgram("git", true, { "-C", actualUrl, "ls-files", "-z" }), "\0"s);
+
+ PathFilter filter = [&](const Path & p) -> bool {
+ assert(hasPrefix(p, actualUrl));
+ std::string file(p, actualUrl.size() + 1);
+
+ auto st = lstat(p);
+
+ if (S_ISDIR(st.st_mode)) {
+ auto prefix = file + "/";
+ auto i = files.lower_bound(prefix);
+ return i != files.end() && hasPrefix(*i, prefix);
+ }
+
+ return files.count(file);
+ };
+
+ auto storePath = store->addToStore("source", actualUrl, true, htSHA256, filter);
+
+ auto tree = Tree {
+ .actualPath = store->printStorePath(storePath),
+ .storePath = std::move(storePath),
+ .info = TreeInfo {
+ .revCount = haveCommits ? std::stoull(runProgram("git", true, { "-C", actualUrl, "rev-list", "--count", "HEAD" })) : 0,
+ // FIXME: maybe we should use the timestamp of the last
+ // modified dirty file?
+ .lastModified = haveCommits ? std::stoull(runProgram("git", true, { "-C", actualUrl, "log", "-1", "--format=%ct", "HEAD" })) : 0,
+ }
+ };
+
+ return {std::move(tree), input};
+ }
+ }
+
+ if (!input->ref) input->ref = isLocal ? readHead(actualUrl) : "master";
+
+ Path repoDir;
+
+ if (isLocal) {
+
+ if (!input->rev)
+ input->rev = Hash(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input->ref })), htSHA1);
+
+ repoDir = actualUrl;
+
+ } else {
+
+ Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false);
+ repoDir = cacheDir;
+
+ if (!pathExists(cacheDir)) {
+ createDirs(dirOf(cacheDir));
+ runProgram("git", true, { "init", "--bare", repoDir });
+ }
+
+ Path localRefFile =
+ input->ref->compare(0, 5, "refs/") == 0
+ ? cacheDir + "/" + *input->ref
+ : cacheDir + "/refs/heads/" + *input->ref;
+
+ bool doFetch;
+ time_t now = time(0);
+
+ /* If a rev was specified, we need to fetch if it's not in the
+ repo. */
+ if (input->rev) {
+ try {
+ runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input->rev->gitRev() });
+ doFetch = false;
+ } catch (ExecError & e) {
+ if (WIFEXITED(e.status)) {
+ doFetch = true;
+ } else {
+ throw;
+ }
+ }
+ } else {
+ /* If the local ref is older than ‘tarball-ttl’ seconds, do a
+ git fetch to update the local ref to the remote ref. */
+ struct stat st;
+ doFetch = stat(localRefFile.c_str(), &st) != 0 ||
+ (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now;
+ }
+
+ if (doFetch) {
+ Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", actualUrl));
+
+ // FIXME: git stderr messes up our progress indicator, so
+ // we're using --quiet for now. Should process its stderr.
+ try {
+ runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", *input->ref, *input->ref) });
+ } catch (Error & e) {
+ if (!pathExists(localRefFile)) throw;
+ warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl);
+ }
+
+ struct timeval times[2];
+ times[0].tv_sec = now;
+ times[0].tv_usec = 0;
+ times[1].tv_sec = now;
+ times[1].tv_usec = 0;
+
+ utimes(localRefFile.c_str(), times);
+ }
+
+ if (!input->rev)
+ input->rev = Hash(chomp(readFile(localRefFile)), htSHA1);
+ }
+
+ if (auto tree = lookupGitInfo(store, name, *input->rev)) {
+ assert(*input->rev == tree->first);
+ return {std::move(tree->second), input};
+ }
+
+ // FIXME: check whether rev is an ancestor of ref.
+
+ printTalkative("using revision %s of repo '%s'", input->rev->gitRev(), actualUrl);
+
+ // FIXME: should pipe this, or find some better way to extract a
+ // revision.
+ auto source = sinkToSource([&](Sink & sink) {
+ RunOptions gitOptions("git", { "-C", repoDir, "archive", input->rev->gitRev() });
+ gitOptions.standardOut = &sink;
+ runProgram2(gitOptions);
+ });
+
+ Path tmpDir = createTempDir();
+ AutoDelete delTmpDir(tmpDir, true);
+
+ unpackTarfile(*source, tmpDir);
+
+ auto storePath = store->addToStore(name, tmpDir);
+ auto revCount = std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input->rev->gitRev() }));
+ auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", input->rev->gitRev() }));
+
+ auto tree = Tree {
+ .actualPath = store->toRealPath(storePath),
+ .storePath = std::move(storePath),
+ .info = TreeInfo {
+ .revCount = revCount,
+ .lastModified = lastModified
+ }
+ };
+
+ cacheGitInfo(*store, name, tree, *input->rev);
+
+ return {std::move(tree), input};
+ }
+};
+
+struct GitInputScheme : InputScheme
+{
+ std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override
+ {
+ if (url.scheme != "git" &&
+ url.scheme != "git+http" &&
+ url.scheme != "git+https" &&
+ url.scheme != "git+ssh" &&
+ url.scheme != "git+file") return nullptr;
+
+ auto url2(url);
+ if (hasPrefix(url2.scheme, "git+")) url2.scheme = std::string(url2.scheme, 4);
+ url2.query.clear();
+
+ Input::Attrs attrs;
+ attrs.emplace("type", "git");
+
+ for (auto &[name, value] : url.query) {
+ if (name == "rev" || name == "ref")
+ attrs.emplace(name, value);
+ else
+ url2.query.emplace(name, value);
+ }
+
+ attrs.emplace("url", url2.to_string());
+
+ return inputFromAttrs(attrs);
+ }
+
+ std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) override
+ {
+ if (maybeGetStrAttr(attrs, "type") != "git") return {};
+
+ for (auto & [name, value] : attrs)
+ if (name != "type" && name != "url" && name != "ref" && name != "rev")
+ throw Error("unsupported Git input attribute '%s'", name);
+
+ auto input = std::make_unique<GitInput>(parseURL(getStrAttr(attrs, "url")));
+ if (auto ref = maybeGetStrAttr(attrs, "ref")) {
+ if (!std::regex_match(*ref, refRegex))
+ throw BadURL("invalid Git branch/tag name '%s'", *ref);
+ input->ref = *ref;
+ }
+ if (auto rev = maybeGetStrAttr(attrs, "rev"))
+ input->rev = Hash(*rev, htSHA1);
+ return input;
+ }
+};
+
+static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<GitInputScheme>()); });
+
+}
diff --git a/src/libstore/fetchers/github.cc b/src/libstore/fetchers/github.cc
new file mode 100644
index 000000000..0a000e83f
--- /dev/null
+++ b/src/libstore/fetchers/github.cc
@@ -0,0 +1,212 @@
+#include "fetchers.hh"
+#include "download.hh"
+#include "globals.hh"
+#include "parse.hh"
+#include "regex.hh"
+#include "store-api.hh"
+
+#include <nlohmann/json.hpp>
+
+namespace nix::fetchers {
+
+std::regex ownerRegex("[a-zA-Z][a-zA-Z0-9_-]*", std::regex::ECMAScript);
+std::regex repoRegex("[a-zA-Z][a-zA-Z0-9_-]*", std::regex::ECMAScript);
+
+struct GitHubInput : Input
+{
+ std::string owner;
+ std::string repo;
+ std::optional<std::string> ref;
+ std::optional<Hash> rev;
+
+ std::string type() const override { return "github"; }
+
+ bool operator ==(const Input & other) const override
+ {
+ auto other2 = dynamic_cast<const GitHubInput *>(&other);
+ return
+ other2
+ && owner == other2->owner
+ && repo == other2->repo
+ && rev == other2->rev
+ && ref == other2->ref;
+ }
+
+ bool isImmutable() const override
+ {
+ return (bool) rev;
+ }
+
+ std::optional<std::string> getRef() const override { return ref; }
+
+ std::optional<Hash> getRev() const override { return rev; }
+
+ std::string to_string() const override
+ {
+ auto s = fmt("github:%s/%s", owner, repo);
+ assert(!(ref && rev));
+ if (ref) s += "/" + *ref;
+ if (rev) s += "/" + rev->to_string(Base16, false);
+ return s;
+ }
+
+ Attrs toAttrsInternal() const override
+ {
+ Attrs attrs;
+ attrs.emplace("owner", owner);
+ attrs.emplace("repo", repo);
+ if (ref)
+ attrs.emplace("ref", *ref);
+ if (rev)
+ attrs.emplace("rev", rev->gitRev());
+ return attrs;
+ }
+
+ void clone(const Path & destDir) const override
+ {
+ std::shared_ptr<const Input> input = inputFromURL(fmt("git+ssh://git@github.com/%s/%s.git", owner, repo));
+ input = input->applyOverrides(ref.value_or("master"), rev);
+ input->clone(destDir);
+ }
+
+ std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override
+ {
+ auto rev = this->rev;
+
+ #if 0
+ if (rev) {
+ if (auto gitInfo = lookupGitInfo(store, "source", *rev))
+ return *gitInfo;
+ }
+ #endif
+
+ if (!rev) {
+ auto url = fmt("https://api.github.com/repos/%s/%s/commits/%s",
+ owner, repo, ref ? *ref : "master");
+ CachedDownloadRequest request(url);
+ request.ttl = rev ? 1000000000 : settings.tarballTtl;
+ auto result = getDownloader()->downloadCached(store, request);
+ auto json = nlohmann::json::parse(readFile(result.path));
+ rev = Hash(json["sha"], htSHA1);
+ debug("HEAD revision for '%s' is %s", url, rev->gitRev());
+ }
+
+ // FIXME: use regular /archive URLs instead? api.github.com
+ // might have stricter rate limits.
+
+ auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s",
+ owner, repo, rev->to_string(Base16, false));
+
+ std::string accessToken = settings.githubAccessToken.get();
+ if (accessToken != "")
+ url += "?access_token=" + accessToken;
+
+ CachedDownloadRequest request(url);
+ request.unpack = true;
+ request.name = "source";
+ request.ttl = 1000000000;
+ request.getLastModified = true;
+ auto dresult = getDownloader()->downloadCached(store, request);
+
+ assert(dresult.lastModified);
+
+ Tree result{
+ .actualPath = dresult.path,
+ .storePath = store->parseStorePath(dresult.storePath),
+ .info = TreeInfo {
+ .lastModified = *dresult.lastModified,
+ },
+ };
+
+ #if 0
+ // FIXME: this can overwrite a cache file that contains a revCount.
+ cacheGitInfo("source", gitInfo);
+ #endif
+
+ auto input = std::make_shared<GitHubInput>(*this);
+ input->ref = {};
+ input->rev = *rev;
+
+ return {std::move(result), input};
+ }
+
+ std::shared_ptr<const Input> applyOverrides(
+ std::optional<std::string> ref,
+ std::optional<Hash> rev) const override
+ {
+ if (!ref && !rev) return shared_from_this();
+
+ auto res = std::make_shared<GitHubInput>(*this);
+
+ if (ref) res->ref = ref;
+ if (rev) res->rev = rev;
+
+ return res;
+ }
+};
+
+struct GitHubInputScheme : InputScheme
+{
+ std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override
+ {
+ if (url.scheme != "github") return nullptr;
+
+ auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
+ auto input = std::make_unique<GitHubInput>();
+
+ if (path.size() == 2) {
+ } else if (path.size() == 3) {
+ if (std::regex_match(path[2], revRegex))
+ input->rev = Hash(path[2], htSHA1);
+ else if (std::regex_match(path[2], refRegex))
+ input->ref = path[2];
+ else
+ throw BadURL("in GitHub URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]);
+ } else
+ throw BadURL("GitHub URL '%s' is invalid", url.url);
+
+ for (auto &[name, value] : url.query) {
+ if (name == "rev") {
+ if (input->rev)
+ throw BadURL("GitHub URL '%s' contains multiple commit hashes", url.url);
+ input->rev = Hash(value, htSHA1);
+ }
+ else if (name == "ref") {
+ if (!std::regex_match(value, refRegex))
+ throw BadURL("GitHub URL '%s' contains an invalid branch/tag name", url.url);
+ if (input->ref)
+ throw BadURL("GitHub URL '%s' contains multiple branch/tag names", url.url);
+ input->ref = value;
+ }
+ }
+
+ if (input->ref && input->rev)
+ throw BadURL("GitHub URL '%s' contains both a commit hash and a branch/tag name", url.url);
+
+ input->owner = path[0];
+ input->repo = path[1];
+
+ return input;
+ }
+
+ std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) override
+ {
+ if (maybeGetStrAttr(attrs, "type") != "github") return {};
+
+ for (auto & [name, value] : attrs)
+ if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev")
+ throw Error("unsupported GitHub input attribute '%s'", name);
+
+ auto input = std::make_unique<GitHubInput>();
+ input->owner = getStrAttr(attrs, "owner");
+ input->repo = getStrAttr(attrs, "repo");
+ input->ref = maybeGetStrAttr(attrs, "ref");
+ if (auto rev = maybeGetStrAttr(attrs, "rev"))
+ input->rev = Hash(*rev, htSHA1);
+ return input;
+ }
+};
+
+static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<GitHubInputScheme>()); });
+
+}
diff --git a/src/libstore/fetchers/indirect.cc b/src/libstore/fetchers/indirect.cc
new file mode 100644
index 000000000..016f5fb39
--- /dev/null
+++ b/src/libstore/fetchers/indirect.cc
@@ -0,0 +1,142 @@
+#include "fetchers.hh"
+#include "parse.hh"
+#include "regex.hh"
+
+namespace nix::fetchers {
+
+std::regex flakeRegex("[a-zA-Z][a-zA-Z0-9_-]*", std::regex::ECMAScript);
+
+struct IndirectInput : Input
+{
+ std::string id;
+ std::optional<Hash> rev;
+ std::optional<std::string> ref;
+
+ std::string type() const override { return "indirect"; }
+
+ bool operator ==(const Input & other) const override
+ {
+ auto other2 = dynamic_cast<const IndirectInput *>(&other);
+ return
+ other2
+ && id == other2->id
+ && rev == other2->rev
+ && ref == other2->ref;
+ }
+
+ bool isDirect() const override
+ {
+ return false;
+ }
+
+ std::optional<std::string> getRef() const override { return ref; }
+
+ std::optional<Hash> getRev() const override { return rev; }
+
+ bool contains(const Input & other) const override
+ {
+ auto other2 = dynamic_cast<const IndirectInput *>(&other);
+ return
+ other2
+ && id == other2->id
+ && (!ref || ref == other2->ref)
+ && (!rev || rev == other2->rev);
+ }
+
+ std::string to_string() const override
+ {
+ ParsedURL url;
+ url.scheme = "flake";
+ url.path = id;
+ if (ref) { url.path += '/'; url.path += *ref; };
+ if (rev) { url.path += '/'; url.path += rev->gitRev(); };
+ return url.to_string();
+ }
+
+ Attrs toAttrsInternal() const override
+ {
+ Attrs attrs;
+ attrs.emplace("id", id);
+ if (ref)
+ attrs.emplace("ref", *ref);
+ if (rev)
+ attrs.emplace("rev", rev->gitRev());
+ return attrs;
+ }
+
+ std::shared_ptr<const Input> applyOverrides(
+ std::optional<std::string> ref,
+ std::optional<Hash> rev) const override
+ {
+ if (!ref && !rev) return shared_from_this();
+
+ auto res = std::make_shared<IndirectInput>(*this);
+
+ if (ref) res->ref = ref;
+ if (rev) res->rev = rev;
+
+ return res;
+ }
+
+ std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override
+ {
+ throw Error("indirect input '%s' cannot be fetched directly", to_string());
+ }
+};
+
+struct IndirectInputScheme : InputScheme
+{
+ std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override
+ {
+ if (url.scheme != "flake") return nullptr;
+
+ auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
+ auto input = std::make_unique<IndirectInput>();
+
+ if (path.size() == 1) {
+ } else if (path.size() == 2) {
+ if (std::regex_match(path[1], revRegex))
+ input->rev = Hash(path[1], htSHA1);
+ else if (std::regex_match(path[1], refRegex))
+ input->ref = path[1];
+ else
+ throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]);
+ } else if (path.size() == 3) {
+ if (!std::regex_match(path[1], refRegex))
+ throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]);
+ input->ref = path[1];
+ if (!std::regex_match(path[2], revRegex))
+ throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]);
+ input->rev = Hash(path[2], htSHA1);
+ } else
+ throw BadURL("GitHub URL '%s' is invalid", url.url);
+
+ // FIXME: forbid query params?
+
+ input->id = path[0];
+ if (!std::regex_match(input->id, flakeRegex))
+ throw BadURL("'%s' is not a valid flake ID", input->id);
+
+ return input;
+ }
+
+ std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) override
+ {
+ if (maybeGetStrAttr(attrs, "type") != "indirect") return {};
+
+ for (auto & [name, value] : attrs)
+ if (name != "type" && name != "id" && name != "ref" && name != "rev")
+ throw Error("unsupported indirect input attribute '%s'", name);
+
+ auto input = std::make_unique<IndirectInput>();
+ input->id = getStrAttr(attrs, "id");
+ input->ref = maybeGetStrAttr(attrs, "ref");
+ if (auto rev = maybeGetStrAttr(attrs, "rev"))
+ input->rev = Hash(*rev, htSHA1);
+ return input;
+ }
+};
+
+static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<IndirectInputScheme>()); });
+
+}
diff --git a/src/libstore/fetchers/mercurial.cc b/src/libstore/fetchers/mercurial.cc
new file mode 100644
index 000000000..6ab0add1d
--- /dev/null
+++ b/src/libstore/fetchers/mercurial.cc
@@ -0,0 +1,331 @@
+#include "fetchers.hh"
+#include "parse.hh"
+#include "globals.hh"
+#include "tarfile.hh"
+#include "store-api.hh"
+#include "regex.hh"
+
+#include <sys/time.h>
+
+#include <nlohmann/json.hpp>
+
+using namespace std::string_literals;
+
+namespace nix::fetchers {
+
+struct MercurialInput : Input
+{
+ ParsedURL url;
+ std::optional<std::string> ref;
+ std::optional<Hash> rev;
+
+ MercurialInput(const ParsedURL & url) : url(url)
+ { }
+
+ std::string type() const override { return "hg"; }
+
+ bool operator ==(const Input & other) const override
+ {
+ auto other2 = dynamic_cast<const MercurialInput *>(&other);
+ return
+ other2
+ && url == other2->url
+ && rev == other2->rev
+ && ref == other2->ref;
+ }
+
+ bool isImmutable() const override
+ {
+ return (bool) rev;
+ }
+
+ std::optional<std::string> getRef() const override { return ref; }
+
+ std::optional<Hash> getRev() const override { return rev; }
+
+ std::string to_string() const override
+ {
+ ParsedURL url2(url);
+ url2.scheme = "hg+" + url2.scheme;
+ if (rev) url2.query.insert_or_assign("rev", rev->gitRev());
+ if (ref) url2.query.insert_or_assign("ref", *ref);
+ return url2.to_string();
+ }
+
+ Attrs toAttrsInternal() const override
+ {
+ Attrs attrs;
+ attrs.emplace("url", url.to_string());
+ if (ref)
+ attrs.emplace("ref", *ref);
+ if (rev)
+ attrs.emplace("rev", rev->gitRev());
+ return attrs;
+ }
+
+ std::shared_ptr<const Input> applyOverrides(
+ std::optional<std::string> ref,
+ std::optional<Hash> rev) const override
+ {
+ if (!ref && !rev) return shared_from_this();
+
+ auto res = std::make_shared<MercurialInput>(*this);
+
+ if (ref) res->ref = ref;
+ if (rev) res->rev = rev;
+
+ return res;
+ }
+
+ std::optional<Path> getSourcePath() const
+ {
+ if (url.scheme == "file" && !ref && !rev)
+ return url.path;
+ return {};
+ }
+
+ void markChangedFile(std::string_view file, std::optional<std::string> commitMsg) const override
+ {
+ auto sourcePath = getSourcePath();
+ assert(sourcePath);
+
+ // FIXME: shut up if file is already tracked.
+ runProgram("hg", true,
+ { "add", *sourcePath + "/" + std::string(file) });
+
+ if (commitMsg)
+ runProgram("hg", true,
+ { "commit", *sourcePath + "/" + std::string(file), "-m", *commitMsg });
+ }
+
+ std::pair<bool, std::string> getActualUrl() const
+ {
+ bool isLocal = url.scheme == "file";
+ return {isLocal, isLocal ? url.path : url.base};
+ }
+
+ std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override
+ {
+ auto name = "source";
+
+ auto input = std::make_shared<MercurialInput>(*this);
+
+ auto [isLocal, actualUrl_] = getActualUrl();
+ auto actualUrl = actualUrl_; // work around clang bug
+
+ // FIXME: return lastModified.
+
+ // FIXME: don't clone local repositories.
+
+ if (!input->ref && !input->rev && isLocal && pathExists(actualUrl + "/.hg")) {
+
+ bool clean = runProgram("hg", true, { "status", "-R", actualUrl, "--modified", "--added", "--removed" }) == "";
+
+ if (!clean) {
+
+ /* This is an unclean working tree. So copy all tracked
+ files. */
+
+ if (!settings.allowDirty)
+ throw Error("Mercurial tree '%s' is unclean", actualUrl);
+
+ if (settings.warnDirty)
+ warn("Mercurial tree '%s' is unclean", actualUrl);
+
+ input->ref = chomp(runProgram("hg", true, { "branch", "-R", actualUrl }));
+
+ auto files = tokenizeString<std::set<std::string>>(
+ runProgram("hg", true, { "status", "-R", actualUrl, "--clean", "--modified", "--added", "--no-status", "--print0" }), "\0"s);
+
+ PathFilter filter = [&](const Path & p) -> bool {
+ assert(hasPrefix(p, actualUrl));
+ std::string file(p, actualUrl.size() + 1);
+
+ auto st = lstat(p);
+
+ if (S_ISDIR(st.st_mode)) {
+ auto prefix = file + "/";
+ auto i = files.lower_bound(prefix);
+ return i != files.end() && hasPrefix(*i, prefix);
+ }
+
+ return files.count(file);
+ };
+
+ auto storePath = store->addToStore("source", actualUrl, true, htSHA256, filter);
+
+ return {Tree {
+ .actualPath = store->printStorePath(storePath),
+ .storePath = std::move(storePath),
+ }, input};
+ }
+ }
+
+ if (!input->ref) input->ref = "default";
+
+ Path cacheDir = fmt("%s/nix/hg/%s", getCacheDir(), hashString(htSHA256, actualUrl).to_string(Base32, false));
+
+ assert(input->rev || input->ref);
+ auto revOrRef = input->rev ? input->rev->gitRev() : *input->ref;
+
+ Path stampFile = fmt("%s/.hg/%s.stamp", cacheDir, hashString(htSHA512, revOrRef).to_string(Base32, false));
+
+ /* If we haven't pulled this repo less than ‘tarball-ttl’ seconds,
+ do so now. */
+ time_t now = time(0);
+ struct stat st;
+ if (stat(stampFile.c_str(), &st) != 0 ||
+ (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now)
+ {
+ /* Except that if this is a commit hash that we already have,
+ we don't have to pull again. */
+ if (!(input->rev
+ && pathExists(cacheDir)
+ && runProgram(
+ RunOptions("hg", { "log", "-R", cacheDir, "-r", input->rev->gitRev(), "--template", "1" })
+ .killStderr(true)).second == "1"))
+ {
+ Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Mercurial repository '%s'", actualUrl));
+
+ if (pathExists(cacheDir)) {
+ try {
+ runProgram("hg", true, { "pull", "-R", cacheDir, "--", actualUrl });
+ }
+ catch (ExecError & e) {
+ string transJournal = cacheDir + "/.hg/store/journal";
+ /* hg throws "abandoned transaction" error only if this file exists */
+ if (pathExists(transJournal)) {
+ runProgram("hg", true, { "recover", "-R", cacheDir });
+ runProgram("hg", true, { "pull", "-R", cacheDir, "--", actualUrl });
+ } else {
+ throw ExecError(e.status, fmt("'hg pull' %s", statusToString(e.status)));
+ }
+ }
+ } else {
+ createDirs(dirOf(cacheDir));
+ runProgram("hg", true, { "clone", "--noupdate", "--", actualUrl, cacheDir });
+ }
+ }
+
+ writeFile(stampFile, "");
+ }
+
+ auto tokens = tokenizeString<std::vector<std::string>>(
+ runProgram("hg", true, { "log", "-R", cacheDir, "-r", revOrRef, "--template", "{node} {rev} {branch}" }));
+ assert(tokens.size() == 3);
+
+ input->rev = Hash(tokens[0], htSHA1);
+ auto revCount = std::stoull(tokens[1]);
+ input->ref = tokens[2];
+
+ std::string storeLinkName = hashString(htSHA512, name + std::string("\0"s) + input->rev->gitRev()).to_string(Base32, false);
+ Path storeLink = fmt("%s/.hg/%s.link", cacheDir, storeLinkName);
+
+ try {
+ auto json = nlohmann::json::parse(readFile(storeLink));
+
+ assert(json["name"] == name && json["rev"] == input->rev->gitRev());
+
+ auto storePath = store->parseStorePath((std::string) json["storePath"]);
+
+ if (store->isValidPath(storePath)) {
+ printTalkative("using cached Mercurial store path '%s'", store->printStorePath(storePath));
+ return {
+ Tree {
+ .actualPath = store->printStorePath(storePath),
+ .storePath = std::move(storePath),
+ .info = TreeInfo {
+ .revCount = revCount,
+ },
+ },
+ input
+ };
+ }
+
+ } catch (SysError & e) {
+ if (e.errNo != ENOENT) throw;
+ }
+
+ Path tmpDir = createTempDir();
+ AutoDelete delTmpDir(tmpDir, true);
+
+ runProgram("hg", true, { "archive", "-R", cacheDir, "-r", input->rev->gitRev(), tmpDir });
+
+ deletePath(tmpDir + "/.hg_archival.txt");
+
+ auto storePath = store->addToStore(name, tmpDir);
+
+ nlohmann::json json;
+ json["storePath"] = store->printStorePath(storePath);
+ json["uri"] = actualUrl;
+ json["name"] = name;
+ json["branch"] = *input->ref;
+ json["rev"] = input->rev->gitRev();
+ json["revCount"] = revCount;
+
+ writeFile(storeLink, json.dump());
+
+ return {
+ Tree {
+ .actualPath = store->printStorePath(storePath),
+ .storePath = std::move(storePath),
+ .info = TreeInfo {
+ .revCount = revCount
+ }
+ },
+ input
+ };
+ }
+};
+
+struct MercurialInputScheme : InputScheme
+{
+ std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override
+ {
+ if (url.scheme != "hg+http" &&
+ url.scheme != "hg+https" &&
+ url.scheme != "hg+ssh" &&
+ url.scheme != "hg+file") return nullptr;
+
+ auto url2(url);
+ url2.scheme = std::string(url2.scheme, 3);
+ url2.query.clear();
+
+ Input::Attrs attrs;
+ attrs.emplace("type", "hg");
+
+ for (auto &[name, value] : url.query) {
+ if (name == "rev" || name == "ref")
+ attrs.emplace(name, value);
+ else
+ url2.query.emplace(name, value);
+ }
+
+ attrs.emplace("url", url2.to_string());
+
+ return inputFromAttrs(attrs);
+ }
+
+ std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) override
+ {
+ if (maybeGetStrAttr(attrs, "type") != "hg") return {};
+
+ for (auto & [name, value] : attrs)
+ if (name != "type" && name != "url" && name != "ref" && name != "rev")
+ throw Error("unsupported Mercurial input attribute '%s'", name);
+
+ auto input = std::make_unique<MercurialInput>(parseURL(getStrAttr(attrs, "url")));
+ if (auto ref = maybeGetStrAttr(attrs, "ref")) {
+ if (!std::regex_match(*ref, refRegex))
+ throw BadURL("invalid Mercurial branch/tag name '%s'", *ref);
+ input->ref = *ref;
+ }
+ if (auto rev = maybeGetStrAttr(attrs, "rev"))
+ input->rev = Hash(*rev, htSHA1);
+ return input;
+ }
+};
+
+static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<MercurialInputScheme>()); });
+
+}
diff --git a/src/libstore/fetchers/parse.cc b/src/libstore/fetchers/parse.cc
new file mode 100644
index 000000000..a5ad14c87
--- /dev/null
+++ b/src/libstore/fetchers/parse.cc
@@ -0,0 +1,138 @@
+#include "parse.hh"
+#include "util.hh"
+#include "regex.hh"
+
+namespace nix::fetchers {
+
+std::regex refRegex(refRegexS, std::regex::ECMAScript);
+std::regex revRegex(revRegexS, std::regex::ECMAScript);
+std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript);
+
+ParsedURL parseURL(const std::string & url)
+{
+ static std::regex uriRegex(
+ "((" + schemeRegex + "):"
+ + "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))"
+ + "(?:\\?(" + queryRegex + "))?"
+ + "(?:#(" + queryRegex + "))?",
+ std::regex::ECMAScript);
+
+ std::smatch match;
+
+ if (std::regex_match(url, match, uriRegex)) {
+ auto & base = match[1];
+ std::string scheme = match[2];
+ auto authority = match[3].matched
+ ? std::optional<std::string>(match[3]) : std::nullopt;
+ std::string path = match[4].matched ? match[4] : match[5];
+ auto & query = match[6];
+ auto & fragment = match[7];
+
+ auto isFile = scheme.find("file") != std::string::npos;
+
+ if (authority && *authority != "" && isFile)
+ throw Error("file:// URL '%s' has unexpected authority '%s'",
+ url, *authority);
+
+ if (isFile && path.empty())
+ path = "/";
+
+ return ParsedURL{
+ .url = url,
+ .base = base,
+ .scheme = scheme,
+ .authority = authority,
+ .path = path,
+ .query = decodeQuery(query),
+ .fragment = percentDecode(std::string(fragment))
+ };
+ }
+
+ else
+ throw BadURL("'%s' is not a valid URL", url);
+}
+
+std::string percentDecode(std::string_view in)
+{
+ std::string decoded;
+ for (size_t i = 0; i < in.size(); ) {
+ if (in[i] == '%') {
+ if (i + 2 >= in.size())
+ throw BadURL("invalid URI parameter '%s'", in);
+ try {
+ decoded += std::stoul(std::string(in, i + 1, 2), 0, 16);
+ i += 3;
+ } catch (...) {
+ throw BadURL("invalid URI parameter '%s'", in);
+ }
+ } else
+ decoded += in[i++];
+ }
+ return decoded;
+}
+
+std::map<std::string, std::string> decodeQuery(const std::string & query)
+{
+ std::map<std::string, std::string> result;
+
+ for (auto s : tokenizeString<Strings>(query, "&")) {
+ auto e = s.find('=');
+ if (e != std::string::npos)
+ result.emplace(
+ s.substr(0, e),
+ percentDecode(std::string_view(s).substr(e + 1)));
+ }
+
+ return result;
+}
+
+std::string percentEncode(std::string_view s)
+{
+ std::string res;
+ for (auto & c : s)
+ if ((c >= 'a' && c <= 'z')
+ || (c >= 'A' && c <= 'Z')
+ || (c >= '0' && c <= '9')
+ || strchr("-._~!$&'()*+,;=:@", c))
+ res += c;
+ else
+ res += fmt("%%%02x", (unsigned int) c);
+ return res;
+}
+
+std::string encodeQuery(const std::map<std::string, std::string> & ss)
+{
+ std::string res;
+ bool first = true;
+ for (auto & [name, value] : ss) {
+ if (!first) res += '&';
+ first = false;
+ res += percentEncode(name);
+ res += '=';
+ res += percentEncode(value);
+ }
+ return res;
+}
+
+std::string ParsedURL::to_string() const
+{
+ return
+ scheme
+ + ":"
+ + (authority ? "//" + *authority : "")
+ + path
+ + (query.empty() ? "" : "?" + encodeQuery(query))
+ + (fragment.empty() ? "" : "#" + percentEncode(fragment));
+}
+
+bool ParsedURL::operator ==(const ParsedURL & other) const
+{
+ return
+ scheme == other.scheme
+ && authority == other.authority
+ && path == other.path
+ && query == other.query
+ && fragment == other.fragment;
+}
+
+}
diff --git a/src/libstore/fetchers/parse.hh b/src/libstore/fetchers/parse.hh
new file mode 100644
index 000000000..45d5182b0
--- /dev/null
+++ b/src/libstore/fetchers/parse.hh
@@ -0,0 +1,30 @@
+#pragma once
+
+#include "types.hh"
+
+namespace nix::fetchers {
+
+struct ParsedURL
+{
+ std::string url;
+ std::string base; // URL without query/fragment
+ std::string scheme;
+ std::optional<std::string> authority;
+ std::string path;
+ std::map<std::string, std::string> query;
+ std::string fragment;
+
+ std::string to_string() const;
+
+ bool operator ==(const ParsedURL & other) const;
+};
+
+MakeError(BadURL, Error);
+
+std::string percentDecode(std::string_view in);
+
+std::map<std::string, std::string> decodeQuery(const std::string & query);
+
+ParsedURL parseURL(const std::string & url);
+
+}
diff --git a/src/libstore/fetchers/regex.hh b/src/libstore/fetchers/regex.hh
new file mode 100644
index 000000000..e0989edfc
--- /dev/null
+++ b/src/libstore/fetchers/regex.hh
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <regex>
+
+namespace nix::fetchers {
+
+// URI stuff.
+const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])";
+const static std::string schemeRegex = "(?:[a-z+]+)";
+const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])";
+const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])";
+const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])";
+const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)";
+const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")";
+const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)";
+const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?";
+const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])";
+const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*";
+const static std::string segmentRegex = "(?:" + pcharRegex + "+)";
+const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)";
+const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)";
+
+// A Git ref (i.e. branch or tag name).
+const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check
+extern std::regex refRegex;
+
+// A Git revision (a SHA-1 commit hash).
+const static std::string revRegexS = "[0-9a-fA-F]{40}";
+extern std::regex revRegex;
+
+// A ref or revision, or a ref followed by a revision.
+const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))";
+
+const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*";
+extern std::regex flakeIdRegex;
+
+}
diff --git a/src/libstore/fetchers/registry.cc b/src/libstore/fetchers/registry.cc
new file mode 100644
index 000000000..721af0c9b
--- /dev/null
+++ b/src/libstore/fetchers/registry.cc
@@ -0,0 +1,183 @@
+#include "registry.hh"
+#include "util.hh"
+#include "fetchers.hh"
+#include "globals.hh"
+#include "download.hh"
+
+#include <nlohmann/json.hpp>
+
+namespace nix::fetchers {
+
+std::shared_ptr<Registry> Registry::read(
+ const Path & path, RegistryType type)
+{
+ auto registry = std::make_shared<Registry>(type);
+
+ if (!pathExists(path))
+ return std::make_shared<Registry>(type);
+
+ auto json = nlohmann::json::parse(readFile(path));
+
+ auto version = json.value("version", 0);
+
+ // FIXME: remove soon
+ if (version == 1) {
+ auto flakes = json["flakes"];
+ for (auto i = flakes.begin(); i != flakes.end(); ++i) {
+ auto url = i->value("url", i->value("uri", ""));
+ if (url.empty())
+ throw Error("flake registry '%s' lacks a 'url' attribute for entry '%s'",
+ path, i.key());
+ registry->entries.push_back(
+ {inputFromURL(i.key()), inputFromURL(url), {}});
+ }
+ }
+
+ else if (version == 2) {
+ for (auto & i : json["flakes"]) {
+ auto toAttrs = jsonToAttrs(i["to"]);
+ Input::Attrs extraAttrs;
+ auto j = toAttrs.find("dir");
+ if (j != toAttrs.end()) {
+ extraAttrs.insert(*j);
+ toAttrs.erase(j);
+ }
+ registry->entries.push_back(
+ { inputFromAttrs(jsonToAttrs(i["from"]))
+ , inputFromAttrs(toAttrs)
+ , extraAttrs
+ });
+ }
+ }
+
+ else
+ throw Error("flake registry '%s' has unsupported version %d", path, version);
+
+
+ return registry;
+}
+
+void Registry::write(const Path & path)
+{
+ nlohmann::json arr;
+ for (auto & elem : entries) {
+ nlohmann::json obj;
+ obj["from"] = attrsToJson(std::get<0>(elem)->toAttrs());
+ obj["to"] = attrsToJson(std::get<1>(elem)->toAttrs());
+ if (!std::get<2>(elem).empty())
+ obj["to"].update(attrsToJson(std::get<2>(elem)));
+ arr.emplace_back(std::move(obj));
+ }
+
+ nlohmann::json json;
+ json["version"] = 2;
+ json["flakes"] = std::move(arr);
+
+ createDirs(dirOf(path));
+ writeFile(path, json.dump(2));
+}
+
+void Registry::add(
+ const std::shared_ptr<const Input> & from,
+ const std::shared_ptr<const Input> & to,
+ const Input::Attrs & extraAttrs)
+{
+ entries.emplace_back(from, to, extraAttrs);
+}
+
+void Registry::remove(const std::shared_ptr<const Input> & input)
+{
+ // FIXME: use C++20 std::erase.
+ for (auto i = entries.begin(); i != entries.end(); )
+ if (*std::get<0>(*i) == *input)
+ i = entries.erase(i);
+ else
+ ++i;
+}
+
+Path getUserRegistryPath()
+{
+ return getHome() + "/.config/nix/registry.json";
+}
+
+std::shared_ptr<Registry> getUserRegistry()
+{
+ return Registry::read(getUserRegistryPath(), Registry::User);
+}
+
+static std::shared_ptr<Registry> flagRegistry =
+ std::make_shared<Registry>(Registry::Flag);
+
+std::shared_ptr<Registry> getFlagRegistry()
+{
+ return flagRegistry;
+}
+
+void overrideRegistry(
+ const std::shared_ptr<const Input> & from,
+ const std::shared_ptr<const Input> & to,
+ const Input::Attrs & extraAttrs)
+{
+ flagRegistry->add(from, to, extraAttrs);
+}
+
+static std::shared_ptr<Registry> getGlobalRegistry(ref<Store> store)
+{
+ static auto reg = [&]() {
+ auto path = settings.flakeRegistry;
+
+ if (!hasPrefix(path, "/")) {
+ CachedDownloadRequest request(path);
+ request.name = "flake-registry.json";
+ request.gcRoot = true;
+ path = getDownloader()->downloadCached(store, request).path;
+ }
+
+ return Registry::read(path, Registry::Global);
+ }();
+
+ return reg;
+}
+
+Registries getRegistries(ref<Store> store)
+{
+ Registries registries;
+ registries.push_back(getFlagRegistry());
+ registries.push_back(getUserRegistry());
+ registries.push_back(getGlobalRegistry(store));
+ return registries;
+}
+
+std::pair<std::shared_ptr<const Input>, Input::Attrs> lookupInRegistries(
+ ref<Store> store,
+ std::shared_ptr<const Input> input)
+{
+ Input::Attrs extraAttrs;
+ int n = 0;
+
+ restart:
+
+ n++;
+ if (n > 100) throw Error("cycle detected in flake registr for '%s'", input);
+
+ for (auto & registry : getRegistries(store)) {
+ // FIXME: O(n)
+ for (auto & entry : registry->entries) {
+ auto from = std::get<0>(entry);
+ if (from->contains(*input)) {
+ input = std::get<1>(entry)->applyOverrides(
+ !from->getRef() && input->getRef() ? input->getRef() : std::optional<std::string>(),
+ !from->getRev() && input->getRev() ? input->getRev() : std::optional<Hash>());
+ extraAttrs = std::get<2>(entry);
+ goto restart;
+ }
+ }
+ }
+
+ if (!input->isDirect())
+ throw Error("cannot find flake '%s' in the flake registries", input->to_string());
+
+ return {input, extraAttrs};
+}
+
+}
diff --git a/src/libstore/fetchers/registry.hh b/src/libstore/fetchers/registry.hh
new file mode 100644
index 000000000..6063f51d6
--- /dev/null
+++ b/src/libstore/fetchers/registry.hh
@@ -0,0 +1,62 @@
+#pragma once
+
+#include "types.hh"
+#include "fetchers.hh"
+
+namespace nix { class Store; }
+
+namespace nix::fetchers {
+
+struct Registry
+{
+ enum RegistryType {
+ Flag = 0,
+ User = 1,
+ Global = 2,
+ };
+
+ RegistryType type;
+
+ std::vector<
+ std::tuple<
+ std::shared_ptr<const Input>, // from
+ std::shared_ptr<const Input>, // to
+ Input::Attrs // extra attributes
+ >
+ > entries;
+
+ Registry(RegistryType type)
+ : type(type)
+ { }
+
+ static std::shared_ptr<Registry> read(
+ const Path & path, RegistryType type);
+
+ void write(const Path & path);
+
+ void add(
+ const std::shared_ptr<const Input> & from,
+ const std::shared_ptr<const Input> & to,
+ const Input::Attrs & extraAttrs);
+
+ void remove(const std::shared_ptr<const Input> & input);
+};
+
+typedef std::vector<std::shared_ptr<Registry>> Registries;
+
+std::shared_ptr<Registry> getUserRegistry();
+
+Path getUserRegistryPath();
+
+Registries getRegistries(ref<Store> store);
+
+void overrideRegistry(
+ const std::shared_ptr<const Input> & from,
+ const std::shared_ptr<const Input> & to,
+ const Input::Attrs & extraAttrs);
+
+std::pair<std::shared_ptr<const Input>, Input::Attrs> lookupInRegistries(
+ ref<Store> store,
+ std::shared_ptr<const Input> input);
+
+}
diff --git a/src/libstore/fetchers/tarball.cc b/src/libstore/fetchers/tarball.cc
new file mode 100644
index 000000000..7c0b6690d
--- /dev/null
+++ b/src/libstore/fetchers/tarball.cc
@@ -0,0 +1,131 @@
+#include "fetchers.hh"
+#include "download.hh"
+#include "globals.hh"
+#include "parse.hh"
+#include "store-api.hh"
+
+namespace nix::fetchers {
+
+struct TarballInput : Input
+{
+ ParsedURL url;
+ std::optional<Hash> hash;
+
+ TarballInput(const ParsedURL & url) : url(url)
+ { }
+
+ std::string type() const override { return "tarball"; }
+
+ bool operator ==(const Input & other) const override
+ {
+ auto other2 = dynamic_cast<const TarballInput *>(&other);
+ return
+ other2
+ && to_string() == other2->to_string()
+ && hash == other2->hash;
+ }
+
+ bool isImmutable() const override
+ {
+ return hash || narHash;
+ }
+
+ std::string to_string() const override
+ {
+ auto url2(url);
+ // NAR hashes are preferred over file hashes since tar/zip files
+ // don't have a canonical representation.
+ if (narHash)
+ url2.query.insert_or_assign("narHash", narHash->to_string(SRI));
+ else if (hash)
+ url2.query.insert_or_assign("hash", hash->to_string(SRI));
+ return url2.to_string();
+ }
+
+ Attrs toAttrsInternal() const override
+ {
+ Attrs attrs;
+ attrs.emplace("url", url.to_string());
+ if (narHash)
+ attrs.emplace("narHash", hash->to_string(SRI));
+ else if (hash)
+ attrs.emplace("hash", hash->to_string(SRI));
+ return attrs;
+ }
+
+ std::pair<Tree, std::shared_ptr<const Input>> fetchTreeInternal(nix::ref<Store> store) const override
+ {
+ CachedDownloadRequest request(url.to_string());
+ request.unpack = true;
+ request.getLastModified = true;
+ request.name = "source";
+
+ auto res = getDownloader()->downloadCached(store, request);
+
+ auto input = std::make_shared<TarballInput>(*this);
+
+ auto storePath = store->parseStorePath(res.storePath);
+
+ input->narHash = store->queryPathInfo(storePath)->narHash;
+
+ return {
+ Tree {
+ .actualPath = res.path,
+ .storePath = std::move(storePath),
+ .info = TreeInfo {
+ .lastModified = *res.lastModified,
+ },
+ },
+ input
+ };
+ }
+};
+
+struct TarballInputScheme : InputScheme
+{
+ std::unique_ptr<Input> inputFromURL(const ParsedURL & url) override
+ {
+ if (url.scheme != "file" && url.scheme != "http" && url.scheme != "https") return nullptr;
+
+ if (!hasSuffix(url.path, ".zip")
+ && !hasSuffix(url.path, ".tar")
+ && !hasSuffix(url.path, ".tar.gz")
+ && !hasSuffix(url.path, ".tar.xz")
+ && !hasSuffix(url.path, ".tar.bz2"))
+ return nullptr;
+
+ auto input = std::make_unique<TarballInput>(url);
+
+ auto hash = url.query.find("hash");
+ if (hash != url.query.end())
+ // FIXME: require SRI hash.
+ input->hash = Hash(hash->second);
+
+ auto narHash = url.query.find("narHash");
+ if (narHash != url.query.end())
+ // FIXME: require SRI hash.
+ input->narHash = Hash(narHash->second);
+
+ return input;
+ }
+
+ std::unique_ptr<Input> inputFromAttrs(const Input::Attrs & attrs) override
+ {
+ if (maybeGetStrAttr(attrs, "type") != "tarball") return {};
+
+ for (auto & [name, value] : attrs)
+ if (name != "type" && name != "url" && name != "hash" && name != "narHash")
+ throw Error("unsupported tarball input attribute '%s'", name);
+
+ auto input = std::make_unique<TarballInput>(parseURL(getStrAttr(attrs, "url")));
+ if (auto hash = maybeGetStrAttr(attrs, "hash"))
+ // FIXME: require SRI hash.
+ input->hash = Hash(*hash);
+
+ return input;
+ }
+};
+
+static auto r1 = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); });
+
+}
diff --git a/src/libstore/fetchers/tree-info.hh b/src/libstore/fetchers/tree-info.hh
new file mode 100644
index 000000000..02e92759b
--- /dev/null
+++ b/src/libstore/fetchers/tree-info.hh
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "path.hh"
+
+namespace nix { class Store; }
+
+namespace nix::fetchers {
+
+struct TreeInfo
+{
+ Hash narHash;
+ std::optional<uint64_t> revCount;
+ std::optional<time_t> lastModified;
+
+ bool operator ==(const TreeInfo & other) const
+ {
+ return
+ narHash == other.narHash
+ && revCount == other.revCount
+ && lastModified == other.lastModified;
+ }
+
+ StorePath computeStorePath(Store & store) const;
+};
+
+}