diff options
author | John Ericson <John.Ericson@Obsidian.Systems> | 2023-01-06 10:35:20 -0500 |
---|---|---|
committer | John Ericson <John.Ericson@Obsidian.Systems> | 2023-01-06 10:35:20 -0500 |
commit | e9fc1e4fdb0ab5adb6b163c3db361b86a4f5c69b (patch) | |
tree | 25522f96d7aa54f7c93ba3c5e187374d3a50dfe6 /src/libfetchers | |
parent | 55caef36ed1cee2e924c82cf49b3ceb17bdde910 (diff) | |
parent | 3172c51baff5c81362fcdafa2e28773c2949c660 (diff) |
Merge remote-tracking branch 'upstream/master' into path-info
Diffstat (limited to 'src/libfetchers')
-rw-r--r-- | src/libfetchers/fetch-settings.hh | 9 | ||||
-rw-r--r-- | src/libfetchers/fetchers.cc | 6 | ||||
-rw-r--r-- | src/libfetchers/fetchers.hh | 13 | ||||
-rw-r--r-- | src/libfetchers/git.cc | 406 | ||||
-rw-r--r-- | src/libfetchers/github.cc | 107 | ||||
-rw-r--r-- | src/libfetchers/indirect.cc | 10 | ||||
-rw-r--r-- | src/libfetchers/mercurial.cc | 14 | ||||
-rw-r--r-- | src/libfetchers/path.cc | 8 | ||||
-rw-r--r-- | src/libfetchers/registry.cc | 3 | ||||
-rw-r--r-- | src/libfetchers/tarball.cc | 97 |
10 files changed, 448 insertions, 225 deletions
diff --git a/src/libfetchers/fetch-settings.hh b/src/libfetchers/fetch-settings.hh index 04c9feda0..f33cbdcfc 100644 --- a/src/libfetchers/fetch-settings.hh +++ b/src/libfetchers/fetch-settings.hh @@ -70,8 +70,13 @@ struct FetchSettings : public Config Setting<bool> warnDirty{this, true, "warn-dirty", "Whether to warn about dirty Git/Mercurial trees."}; - Setting<std::string> flakeRegistry{this, "https://github.com/NixOS/flake-registry/raw/master/flake-registry.json", "flake-registry", - "Path or URI of the global flake registry."}; + Setting<std::string> flakeRegistry{this, "https://channels.nixos.org/flake-registry.json", "flake-registry", + R"( + Path or URI of the global flake registry. + + When empty, disables the global flake registry. + )"}; + Setting<bool> useRegistries{this, true, "use-registries", "Whether to use flake registries to resolve flake references."}; diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index 2a1529a60..735d9fc93 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -272,7 +272,7 @@ std::optional<time_t> Input::getLastModified() const return {}; } -ParsedURL InputScheme::toURL(const Input & input) +ParsedURL InputScheme::toURL(const Input & input) const { throw Error("don't know how to convert input '%s' to a URL", attrsToJSON(input.attrs)); } @@ -280,7 +280,7 @@ ParsedURL InputScheme::toURL(const Input & input) Input InputScheme::applyOverrides( const Input & input, std::optional<std::string> ref, - std::optional<Hash> rev) + std::optional<Hash> rev) const { if (ref) throw Error("don't know how to set branch/tag name of input '%s' to '%s'", input.to_string(), *ref); @@ -299,7 +299,7 @@ void InputScheme::markChangedFile(const Input & input, std::string_view file, st assert(false); } -void InputScheme::clone(const Input & input, const Path & destDir) +void InputScheme::clone(const Input & input, const Path & destDir) const { throw Error("do not know how to clone input '%s'", input.to_string()); } diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index bc9a76b0b..17da37f47 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -107,26 +107,25 @@ public: * recognized. The Input object contains the information the fetcher * needs to actually perform the "fetch()" when called. */ - struct InputScheme { virtual ~InputScheme() { } - virtual std::optional<Input> inputFromURL(const ParsedURL & url) = 0; + virtual std::optional<Input> inputFromURL(const ParsedURL & url) const = 0; - virtual std::optional<Input> inputFromAttrs(const Attrs & attrs) = 0; + virtual std::optional<Input> inputFromAttrs(const Attrs & attrs) const = 0; - virtual ParsedURL toURL(const Input & input); + virtual ParsedURL toURL(const Input & input) const; - virtual bool hasAllInfo(const Input & input) = 0; + virtual bool hasAllInfo(const Input & input) const = 0; virtual Input applyOverrides( const Input & input, std::optional<std::string> ref, - std::optional<Hash> rev); + std::optional<Hash> rev) const; - virtual void clone(const Input & input, const Path & destDir); + virtual void clone(const Input & input, const Path & destDir) const; virtual std::optional<Path> getSourcePath(const Input & input); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 34b1342a0..1f7d7c07d 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -5,9 +5,13 @@ #include "store-api.hh" #include "url-parts.hh" #include "pathlocks.hh" +#include "util.hh" +#include "git.hh" #include "fetch-settings.hh" +#include <regex> +#include <string.h> #include <sys/time.h> #include <sys/wait.h> @@ -15,25 +19,236 @@ using namespace std::string_literals; namespace nix::fetchers { +namespace { + // Explicit initial branch of our bare repo to suppress warnings from new version of git. // The value itself does not matter, since we always fetch a specific revision or branch. // It is set with `-c init.defaultBranch=` instead of `--initial-branch=` to stay compatible with // old version of git, which will ignore unrecognized `-c` options. const std::string gitInitialBranch = "__nix_dummy_branch"; -static std::string readHead(const Path & path) +bool isCacheFileWithinTtl(time_t now, const struct stat & st) +{ + return st.st_mtime + settings.tarballTtl > now; +} + +bool touchCacheFile(const Path & path, time_t touch_time) +{ + struct timeval times[2]; + times[0].tv_sec = touch_time; + times[0].tv_usec = 0; + times[1].tv_sec = touch_time; + times[1].tv_usec = 0; + + return lutimes(path.c_str(), times) == 0; +} + +Path getCachePath(std::string_view key) +{ + return getCacheDir() + "/nix/gitv3/" + + hashString(htSHA256, key).to_string(Base32, false); +} + +// Returns the name of the HEAD branch. +// +// Returns the head branch name as reported by git ls-remote --symref, e.g., if +// ls-remote returns the output below, "main" is returned based on the ref line. +// +// ref: refs/heads/main HEAD +// ... +std::optional<std::string> readHead(const Path & path) { - return chomp(runProgram("git", true, { "-C", path, "rev-parse", "--abbrev-ref", "HEAD" })); + auto [status, output] = runProgram(RunOptions { + .program = "git", + // FIXME: use 'HEAD' to avoid returning all refs + .args = {"ls-remote", "--symref", path}, + }); + if (status != 0) return std::nullopt; + + std::string_view line = output; + line = line.substr(0, line.find("\n")); + if (const auto parseResult = git::parseLsRemoteLine(line)) { + switch (parseResult->kind) { + case git::LsRemoteRefLine::Kind::Symbolic: + debug("resolved HEAD ref '%s' for repo '%s'", parseResult->target, path); + break; + case git::LsRemoteRefLine::Kind::Object: + debug("resolved HEAD rev '%s' for repo '%s'", parseResult->target, path); + break; + } + return parseResult->target; + } + return std::nullopt; +} + +// Persist the HEAD ref from the remote repo in the local cached repo. +bool storeCachedHead(const std::string & actualUrl, const std::string & headRef) +{ + Path cacheDir = getCachePath(actualUrl); + try { + runProgram("git", true, { "-C", cacheDir, "--git-dir", ".", "symbolic-ref", "--", "HEAD", headRef }); + } catch (ExecError &e) { + if (!WIFEXITED(e.status)) throw; + return false; + } + /* No need to touch refs/HEAD, because `git symbolic-ref` updates the mtime. */ + return true; +} + +std::optional<std::string> readHeadCached(const std::string & actualUrl) +{ + // Create a cache path to store the branch of the HEAD ref. Append something + // in front of the URL to prevent collision with the repository itself. + Path cacheDir = getCachePath(actualUrl); + Path headRefFile = cacheDir + "/HEAD"; + + time_t now = time(0); + struct stat st; + std::optional<std::string> cachedRef; + if (stat(headRefFile.c_str(), &st) == 0) { + cachedRef = readHead(cacheDir); + if (cachedRef != std::nullopt && + *cachedRef != gitInitialBranch && + isCacheFileWithinTtl(now, st)) + { + debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl); + return cachedRef; + } + } + + auto ref = readHead(actualUrl); + if (ref) return ref; + + if (cachedRef) { + // If the cached git ref is expired in fetch() below, and the 'git fetch' + // fails, it falls back to continuing with the most recent version. + // This function must behave the same way, so we return the expired + // cached ref here. + warn("could not get HEAD ref for repository '%s'; using expired cached ref '%s'", actualUrl, *cachedRef); + return *cachedRef; + } + + return std::nullopt; } -static bool isNotDotGitDirectory(const Path & path) +bool isNotDotGitDirectory(const Path & path) { return baseNameOf(path) != ".git"; } +struct WorkdirInfo +{ + bool clean = false; + bool hasHead = false; +}; + +// Returns whether a git workdir is clean and has commits. +WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir) +{ + const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + std::string gitDir(".git"); + + auto env = getEnv(); + // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong + // that way unknown errors can lead to a failure instead of continuing through the wrong code path + env["LC_ALL"] = "C"; + + /* Check whether HEAD points to something that looks like a commit, + since that is the refrence we want to use later on. */ + auto result = runProgram(RunOptions { + .program = "git", + .args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, + .environment = env, + .mergeStderrToStdout = true + }); + auto exitCode = WEXITSTATUS(result.first); + auto errorMessage = result.second; + + if (errorMessage.find("fatal: not a git repository") != std::string::npos) { + throw Error("'%s' is not a Git repository", workdir); + } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { + // indicates that the repo does not have any commits + // we want to proceed and will consider it dirty later + } else if (exitCode != 0) { + // any other errors should lead to a failure + throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage); + } + + bool clean = false; + bool hasHead = exitCode == 0; + + try { + if (hasHead) { + // Using git diff is preferrable over lower-level operations here, + // because its conceptually simpler and we only need the exit code anyways. + auto gitDiffOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "diff", "HEAD", "--quiet"}); + if (!submodules) { + // Changes in submodules should only make the tree dirty + // when those submodules will be copied as well. + gitDiffOpts.emplace_back("--ignore-submodules"); + } + gitDiffOpts.emplace_back("--"); + runProgram("git", true, gitDiffOpts); + + clean = true; + } + } catch (ExecError & e) { + if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; + } + + return WorkdirInfo { .clean = clean, .hasHead = hasHead }; +} + +std::pair<StorePath, Input> fetchFromWorkdir(ref<Store> store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo) +{ + const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + auto gitDir = ".git"; + + if (!fetchSettings.allowDirty) + throw Error("Git tree '%s' is dirty", workdir); + + if (fetchSettings.warnDirty) + warn("Git tree '%s' is dirty", workdir); + + auto gitOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "ls-files", "-z" }); + if (submodules) + gitOpts.emplace_back("--recurse-submodules"); + + auto files = tokenizeString<std::set<std::string>>( + runProgram("git", true, gitOpts), "\0"s); + + Path actualPath(absPath(workdir)); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualPath)); + std::string file(p, actualPath.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); + + // FIXME: maybe we should use the timestamp of the last + // modified dirty file? + input.attrs.insert_or_assign( + "lastModified", + workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); + + return {std::move(storePath), input}; +} +} // end namespace + struct GitInputScheme : InputScheme { - std::optional<Input> inputFromURL(const ParsedURL & url) override + std::optional<Input> inputFromURL(const ParsedURL & url) const override { if (url.scheme != "git" && url.scheme != "git+http" && @@ -48,7 +263,7 @@ struct GitInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "git"); - for (auto &[name, value] : url.query) { + for (auto & [name, value] : url.query) { if (name == "rev" || name == "ref") attrs.emplace(name, value); else if (name == "shallow" || name == "submodules") @@ -62,7 +277,7 @@ struct GitInputScheme : InputScheme return inputFromAttrs(attrs); } - std::optional<Input> inputFromAttrs(const Attrs & attrs) override + std::optional<Input> inputFromAttrs(const Attrs & attrs) const override { if (maybeGetStrAttr(attrs, "type") != "git") return {}; @@ -85,7 +300,7 @@ struct GitInputScheme : InputScheme return input; } - ParsedURL toURL(const Input & input) override + ParsedURL toURL(const Input & input) const override { auto url = parseURL(getStrAttr(input.attrs, "url")); if (url.scheme != "git") url.scheme = "git+" + url.scheme; @@ -96,7 +311,7 @@ struct GitInputScheme : InputScheme return url; } - bool hasAllInfo(const Input & input) override + bool hasAllInfo(const Input & input) const override { bool maybeDirty = !input.getRef(); bool shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false); @@ -108,7 +323,7 @@ struct GitInputScheme : InputScheme Input applyOverrides( const Input & input, std::optional<std::string> ref, - std::optional<Hash> rev) override + std::optional<Hash> rev) const override { auto res(input); if (rev) res.attrs.insert_or_assign("rev", rev->gitRev()); @@ -118,7 +333,7 @@ struct GitInputScheme : InputScheme return res; } - void clone(const Input & input, const Path & destDir) override + void clone(const Input & input, const Path & destDir) const override { auto [isLocal, actualUrl] = getActualUrl(input); @@ -150,13 +365,14 @@ struct GitInputScheme : InputScheme { auto sourcePath = getSourcePath(input); assert(sourcePath); + auto gitDir = ".git"; runProgram("git", true, - { "-C", *sourcePath, "add", "--force", "--intent-to-add", "--", std::string(file) }); + { "-C", *sourcePath, "--git-dir", gitDir, "add", "--intent-to-add", "--", std::string(file) }); if (commitMsg) runProgram("git", true, - { "-C", *sourcePath, "commit", std::string(file), "-m", *commitMsg }); + { "-C", *sourcePath, "--git-dir", gitDir, "commit", std::string(file), "-m", *commitMsg }); } std::pair<bool, std::string> getActualUrl(const Input & input) const @@ -175,6 +391,7 @@ struct GitInputScheme : InputScheme std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override { Input input(_input); + auto gitDir = ".git"; std::string name = input.getName(); @@ -223,123 +440,54 @@ struct GitInputScheme : InputScheme auto [isLocal, actualUrl_] = getActualUrl(input); auto actualUrl = actualUrl_; // work around clang bug - // If this is a local directory and no ref or revision is - // given, then allow the use of an unclean working tree. + /* If this is a local directory and no ref or revision is given, + allow fetching directly from a dirty workdir. */ if (!input.getRef() && !input.getRev() && isLocal) { - bool clean = false; - - auto env = getEnv(); - // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong - // that way unknown errors can lead to a failure instead of continuing through the wrong code path - env["LC_ALL"] = "C"; - - /* Check whether HEAD points to something that looks like a commit, - since that is the refrence we want to use later on. */ - auto result = runProgram(RunOptions { - .program = "git", - .args = { "-C", actualUrl, "--git-dir=.git", "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, - .environment = env, - .mergeStderrToStdout = true - }); - auto exitCode = WEXITSTATUS(result.first); - auto errorMessage = result.second; - - if (errorMessage.find("fatal: not a git repository") != std::string::npos) { - throw Error("'%s' is not a Git repository", actualUrl); - } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { - // indicates that the repo does not have any commits - // we want to proceed and will consider it dirty later - } else if (exitCode != 0) { - // any other errors should lead to a failure - throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", actualUrl, exitCode, errorMessage); - } - - bool hasHead = exitCode == 0; - try { - if (hasHead) { - // Using git diff is preferrable over lower-level operations here, - // because its conceptually simpler and we only need the exit code anyways. - auto gitDiffOpts = Strings({ "-C", actualUrl, "diff", "HEAD", "--quiet"}); - if (!submodules) { - // Changes in submodules should only make the tree dirty - // when those submodules will be copied as well. - gitDiffOpts.emplace_back("--ignore-submodules"); - } - gitDiffOpts.emplace_back("--"); - runProgram("git", true, gitDiffOpts); - - clean = true; - } - } catch (ExecError & e) { - if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; - } - - if (!clean) { - - /* This is an unclean working tree. So copy all tracked files. */ - - if (!fetchSettings.allowDirty) - throw Error("Git tree '%s' is dirty", actualUrl); - - if (fetchSettings.warnDirty) - warn("Git tree '%s' is dirty", actualUrl); - - auto gitOpts = Strings({ "-C", actualUrl, "ls-files", "-z" }); - if (submodules) - gitOpts.emplace_back("--recurse-submodules"); - - auto files = tokenizeString<std::set<std::string>>( - runProgram("git", true, gitOpts), "\0"s); - - Path actualPath(absPath(actualUrl)); - - PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, actualPath)); - std::string file(p, actualPath.size() + 1); - - auto st = lstat(p); - - if (S_ISDIR(st.st_mode)) { - auto prefix = file + "/"; - auto i = files.lower_bound(prefix); - return i != files.end() && hasPrefix(*i, prefix); - } - - return files.count(file); - }; - - auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); - - // FIXME: maybe we should use the timestamp of the last - // modified dirty file? - input.attrs.insert_or_assign( - "lastModified", - hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); - - return {std::move(storePath), input}; + auto workdirInfo = getWorkdirInfo(input, actualUrl); + if (!workdirInfo.clean) { + return fetchFromWorkdir(store, input, actualUrl, workdirInfo); } } - if (!input.getRef()) input.attrs.insert_or_assign("ref", isLocal ? readHead(actualUrl) : "master"); - Attrs unlockedAttrs({ {"type", cacheType}, {"name", name}, {"url", actualUrl}, - {"ref", *input.getRef()}, }); Path repoDir; if (isLocal) { + if (!input.getRef()) { + auto head = readHead(actualUrl); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); + head = "master"; + } + input.attrs.insert_or_assign("ref", *head); + unlockedAttrs.insert_or_assign("ref", *head); + } if (!input.getRev()) input.attrs.insert_or_assign("rev", - Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input.getRef() })), htSHA1).gitRev()); + Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev()); repoDir = actualUrl; - } else { + const bool useHeadRef = !input.getRef(); + if (useHeadRef) { + auto head = readHeadCached(actualUrl); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); + head = "master"; + } + input.attrs.insert_or_assign("ref", *head); + unlockedAttrs.insert_or_assign("ref", *head); + } else { + if (!input.getRev()) { + unlockedAttrs.insert_or_assign("ref", input.getRef().value()); + } + } if (auto res = getCache()->lookup(store, unlockedAttrs)) { auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1); @@ -349,8 +497,9 @@ struct GitInputScheme : InputScheme } } - Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false); + Path cacheDir = getCachePath(actualUrl); repoDir = cacheDir; + gitDir = "."; createDirs(dirOf(cacheDir)); PathLocks cacheDirLock({cacheDir + ".lock"}); @@ -371,7 +520,7 @@ struct GitInputScheme : InputScheme repo. */ if (input.getRev()) { try { - runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input.getRev()->gitRev() }); + runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-e", input.getRev()->gitRev() }); doFetch = false; } catch (ExecError & e) { if (WIFEXITED(e.status)) { @@ -388,7 +537,7 @@ struct GitInputScheme : InputScheme git fetch to update the local ref to the remote ref. */ struct stat st; doFetch = stat(localRefFile.c_str(), &st) != 0 || - (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now; + !isCacheFileWithinTtl(now, st); } } @@ -406,19 +555,16 @@ struct GitInputScheme : InputScheme : ref == "HEAD" ? *ref : "refs/heads/" + *ref; - runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }); + runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }); } catch (Error & e) { if (!pathExists(localRefFile)) throw; warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); } - struct timeval times[2]; - times[0].tv_sec = now; - times[0].tv_usec = 0; - times[1].tv_sec = now; - times[1].tv_usec = 0; - - utimes(localRefFile.c_str(), times); + if (!touchCacheFile(localRefFile, now)) + warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno)); + if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef())) + warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl); } if (!input.getRev()) @@ -427,10 +573,10 @@ struct GitInputScheme : InputScheme // cache dir lock is removed at scope end; we will only use read-only operations on specific revisions in the remainder } - bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true"; + bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-parse", "--is-shallow-repository" })) == "true"; if (isShallow && !shallow) - throw Error("'%s' is a shallow Git repository, but a non-shallow repository is needed", actualUrl); + throw Error("'%s' is a shallow Git repository, but shallow repositories are only allowed when `shallow = true;` is specified.", actualUrl); // FIXME: check whether rev is an ancestor of ref. @@ -447,7 +593,7 @@ struct GitInputScheme : InputScheme auto result = runProgram(RunOptions { .program = "git", - .args = { "-C", repoDir, "cat-file", "commit", input.getRev()->gitRev() }, + .args = { "-C", repoDir, "--git-dir", gitDir, "cat-file", "commit", input.getRev()->gitRev() }, .mergeStderrToStdout = true }); if (WEXITSTATUS(result.first) == 128 @@ -455,9 +601,9 @@ struct GitInputScheme : InputScheme { throw Error( "Cannot find Git revision '%s' in ref '%s' of repository '%s'! " - "Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the " - ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD - "allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".", + "Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the " + ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD + "allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".", input.getRev()->gitRev(), *input.getRef(), actualUrl @@ -486,7 +632,7 @@ struct GitInputScheme : InputScheme auto source = sinkToSource([&](Sink & sink) { runProgram2({ .program = "git", - .args = { "-C", repoDir, "archive", input.getRev()->gitRev() }, + .args = { "-C", repoDir, "--git-dir", gitDir, "archive", input.getRev()->gitRev() }, .standardOut = &sink }); }); @@ -496,7 +642,7 @@ struct GitInputScheme : InputScheme auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter); - auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() })); + auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() })); Attrs infoAttrs({ {"rev", input.getRev()->gitRev()}, @@ -505,7 +651,7 @@ struct GitInputScheme : InputScheme if (!shallow) infoAttrs.insert_or_assign("revCount", - std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input.getRev()->gitRev() }))); + std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--count", input.getRev()->gitRev() }))); if (!_input.getRev()) getCache()->add( diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 58b6e7c04..1ed09d30d 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -4,7 +4,7 @@ #include "store-api.hh" #include "types.hh" #include "url-parts.hh" - +#include "git.hh" #include "fetchers.hh" #include "fetch-settings.hh" @@ -26,11 +26,11 @@ std::regex hostRegex(hostRegexS, std::regex::ECMAScript); struct GitArchiveInputScheme : InputScheme { - virtual std::string type() = 0; + virtual std::string type() const = 0; virtual std::optional<std::pair<std::string, std::string>> accessHeaderFromToken(const std::string & token) const = 0; - std::optional<Input> inputFromURL(const ParsedURL & url) override + std::optional<Input> inputFromURL(const ParsedURL & url) const override { if (url.scheme != type()) return {}; @@ -100,7 +100,7 @@ struct GitArchiveInputScheme : InputScheme return input; } - std::optional<Input> inputFromAttrs(const Attrs & attrs) override + std::optional<Input> inputFromAttrs(const Attrs & attrs) const override { if (maybeGetStrAttr(attrs, "type") != type()) return {}; @@ -116,7 +116,7 @@ struct GitArchiveInputScheme : InputScheme return input; } - ParsedURL toURL(const Input & input) override + ParsedURL toURL(const Input & input) const override { auto owner = getStrAttr(input.attrs, "owner"); auto repo = getStrAttr(input.attrs, "repo"); @@ -132,7 +132,7 @@ struct GitArchiveInputScheme : InputScheme }; } - bool hasAllInfo(const Input & input) override + bool hasAllInfo(const Input & input) const override { return input.getRev() && maybeGetIntAttr(input.attrs, "lastModified"); } @@ -140,7 +140,7 @@ struct GitArchiveInputScheme : InputScheme Input applyOverrides( const Input & _input, std::optional<std::string> ref, - std::optional<Hash> rev) override + std::optional<Hash> rev) const override { auto input(_input); if (rev && ref) @@ -227,7 +227,7 @@ struct GitArchiveInputScheme : InputScheme struct GitHubInputScheme : GitArchiveInputScheme { - std::string type() override { return "github"; } + std::string type() const override { return "github"; } std::optional<std::pair<std::string, std::string>> accessHeaderFromToken(const std::string & token) const override { @@ -240,11 +240,29 @@ struct GitHubInputScheme : GitArchiveInputScheme return std::pair<std::string, std::string>("Authorization", fmt("token %s", token)); } + std::string getHost(const Input & input) const + { + return maybeGetStrAttr(input.attrs, "host").value_or("github.com"); + } + + std::string getOwner(const Input & input) const + { + return getStrAttr(input.attrs, "owner"); + } + + std::string getRepo(const Input & input) const + { + return getStrAttr(input.attrs, "repo"); + } + Hash getRevFromRef(nix::ref<Store> store, const Input & input) const override { - auto host = maybeGetStrAttr(input.attrs, "host").value_or("github.com"); - auto url = fmt("https://api.%s/repos/%s/%s/commits/%s", // FIXME: check - host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"), *input.getRef()); + auto host = getHost(input); + auto url = fmt( + host == "github.com" + ? "https://api.%s/repos/%s/%s/commits/%s" + : "https://%s/api/v3/repos/%s/%s/commits/%s", + host, getOwner(input), getRepo(input), *input.getRef()); Headers headers = makeHeadersWithAuthTokens(host); @@ -259,22 +277,30 @@ struct GitHubInputScheme : GitArchiveInputScheme DownloadUrl getDownloadUrl(const Input & input) const override { - // FIXME: use regular /archive URLs instead? api.github.com - // might have stricter rate limits. - auto host = maybeGetStrAttr(input.attrs, "host").value_or("github.com"); - auto url = fmt("https://api.%s/repos/%s/%s/tarball/%s", // FIXME: check if this is correct for self hosted instances - host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"), - input.getRev()->to_string(Base16, false)); + auto host = getHost(input); Headers headers = makeHeadersWithAuthTokens(host); + + // If we have no auth headers then we default to the public archive + // urls so we do not run into rate limits. + const auto urlFmt = + host != "github.com" + ? "https://%s/api/v3/repos/%s/%s/tarball/%s" + : headers.empty() + ? "https://%s/%s/%s/archive/%s.tar.gz" + : "https://api.%s/repos/%s/%s/tarball/%s"; + + const auto url = fmt(urlFmt, host, getOwner(input), getRepo(input), + input.getRev()->to_string(Base16, false)); + return DownloadUrl { url, headers }; } - void clone(const Input & input, const Path & destDir) override + void clone(const Input & input, const Path & destDir) const override { - auto host = maybeGetStrAttr(input.attrs, "host").value_or("github.com"); + auto host = getHost(input); Input::fromURL(fmt("git+https://%s/%s/%s.git", - host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"))) + host, getOwner(input), getRepo(input))) .applyOverrides(input.getRef(), input.getRev()) .clone(destDir); } @@ -282,7 +308,7 @@ struct GitHubInputScheme : GitArchiveInputScheme struct GitLabInputScheme : GitArchiveInputScheme { - std::string type() override { return "gitlab"; } + std::string type() const override { return "gitlab"; } std::optional<std::pair<std::string, std::string>> accessHeaderFromToken(const std::string & token) const override { @@ -337,7 +363,7 @@ struct GitLabInputScheme : GitArchiveInputScheme return DownloadUrl { url, headers }; } - void clone(const Input & input, const Path & destDir) override + void clone(const Input & input, const Path & destDir) const override { auto host = maybeGetStrAttr(input.attrs, "host").value_or("gitlab.com"); // FIXME: get username somewhere @@ -350,7 +376,7 @@ struct GitLabInputScheme : GitArchiveInputScheme struct SourceHutInputScheme : GitArchiveInputScheme { - std::string type() override { return "sourcehut"; } + std::string type() const override { return "sourcehut"; } std::optional<std::pair<std::string, std::string>> accessHeaderFromToken(const std::string & token) const override { @@ -375,7 +401,7 @@ struct SourceHutInputScheme : GitArchiveInputScheme Headers headers = makeHeadersWithAuthTokens(host); - std::string ref_uri; + std::string refUri; if (ref == "HEAD") { auto file = store->toRealPath( downloadFile(store, fmt("%s/HEAD", base_url), "source", false, headers).storePath); @@ -383,35 +409,32 @@ struct SourceHutInputScheme : GitArchiveInputScheme std::string line; getline(is, line); - auto ref_index = line.find("ref: "); - if (ref_index == std::string::npos) { + auto remoteLine = git::parseLsRemoteLine(line); + if (!remoteLine) { throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref); } - - ref_uri = line.substr(ref_index+5, line.length()-1); - } else - ref_uri = fmt("refs/(heads|tags)/%s", ref); + refUri = remoteLine->target; + } else { + refUri = fmt("refs/(heads|tags)/%s", ref); + } + std::regex refRegex(refUri); auto file = store->toRealPath( downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath); std::ifstream is(file); std::string line; - std::string id; - while(getline(is, line)) { - // Append $ to avoid partial name matches - std::regex pattern(fmt("%s$", ref_uri)); - - if (std::regex_search(line, pattern)) { - id = line.substr(0, line.find('\t')); - break; - } + std::optional<std::string> id; + while(!id && getline(is, line)) { + auto parsedLine = git::parseLsRemoteLine(line); + if (parsedLine && parsedLine->reference && std::regex_match(*parsedLine->reference, refRegex)) + id = parsedLine->target; } - if(id.empty()) + if(!id) throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref); - auto rev = Hash::parseAny(id, htSHA1); + auto rev = Hash::parseAny(*id, htSHA1); debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev()); return rev; } @@ -427,7 +450,7 @@ struct SourceHutInputScheme : GitArchiveInputScheme return DownloadUrl { url, headers }; } - void clone(const Input & input, const Path & destDir) override + void clone(const Input & input, const Path & destDir) const override { auto host = maybeGetStrAttr(input.attrs, "host").value_or("git.sr.ht"); Input::fromURL(fmt("git+https://%s/%s/%s", diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index 9288fc6cf..b99504a16 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -7,7 +7,7 @@ std::regex flakeRegex("[a-zA-Z][a-zA-Z0-9_-]*", std::regex::ECMAScript); struct IndirectInputScheme : InputScheme { - std::optional<Input> inputFromURL(const ParsedURL & url) override + std::optional<Input> inputFromURL(const ParsedURL & url) const override { if (url.scheme != "flake") return {}; @@ -50,7 +50,7 @@ struct IndirectInputScheme : InputScheme return input; } - std::optional<Input> inputFromAttrs(const Attrs & attrs) override + std::optional<Input> inputFromAttrs(const Attrs & attrs) const override { if (maybeGetStrAttr(attrs, "type") != "indirect") return {}; @@ -68,7 +68,7 @@ struct IndirectInputScheme : InputScheme return input; } - ParsedURL toURL(const Input & input) override + ParsedURL toURL(const Input & input) const override { ParsedURL url; url.scheme = "flake"; @@ -78,7 +78,7 @@ struct IndirectInputScheme : InputScheme return url; } - bool hasAllInfo(const Input & input) override + bool hasAllInfo(const Input & input) const override { return false; } @@ -86,7 +86,7 @@ struct IndirectInputScheme : InputScheme Input applyOverrides( const Input & _input, std::optional<std::string> ref, - std::optional<Hash> rev) override + std::optional<Hash> rev) const override { auto input(_input); if (rev) input.attrs.insert_or_assign("rev", rev->gitRev()); diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 1beb8b944..86e8f81f4 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -36,14 +36,14 @@ static std::string runHg(const Strings & args, const std::optional<std::string> auto res = runProgram(std::move(opts)); if (!statusOk(res.first)) - throw ExecError(res.first, fmt("hg %1%", statusToString(res.first))); + throw ExecError(res.first, "hg %1%", statusToString(res.first)); return res.second; } struct MercurialInputScheme : InputScheme { - std::optional<Input> inputFromURL(const ParsedURL & url) override + std::optional<Input> inputFromURL(const ParsedURL & url) const override { if (url.scheme != "hg+http" && url.scheme != "hg+https" && @@ -69,7 +69,7 @@ struct MercurialInputScheme : InputScheme return inputFromAttrs(attrs); } - std::optional<Input> inputFromAttrs(const Attrs & attrs) override + std::optional<Input> inputFromAttrs(const Attrs & attrs) const override { if (maybeGetStrAttr(attrs, "type") != "hg") return {}; @@ -89,7 +89,7 @@ struct MercurialInputScheme : InputScheme return input; } - ParsedURL toURL(const Input & input) override + ParsedURL toURL(const Input & input) const override { auto url = parseURL(getStrAttr(input.attrs, "url")); url.scheme = "hg+" + url.scheme; @@ -98,7 +98,7 @@ struct MercurialInputScheme : InputScheme return url; } - bool hasAllInfo(const Input & input) override + bool hasAllInfo(const Input & input) const override { // FIXME: ugly, need to distinguish between dirty and clean // default trees. @@ -108,7 +108,7 @@ struct MercurialInputScheme : InputScheme Input applyOverrides( const Input & input, std::optional<std::string> ref, - std::optional<Hash> rev) override + std::optional<Hash> rev) const override { auto res(input); if (rev) res.attrs.insert_or_assign("rev", rev->gitRev()); @@ -273,7 +273,7 @@ struct MercurialInputScheme : InputScheme runHg({ "recover", "-R", cacheDir }); runHg({ "pull", "-R", cacheDir, "--", actualUrl }); } else { - throw ExecError(e.status, fmt("'hg pull' %s", statusToString(e.status))); + throw ExecError(e.status, "'hg pull' %s", statusToString(e.status)); } } } else { diff --git a/src/libfetchers/path.cc b/src/libfetchers/path.cc index f0ef97da5..61541e69d 100644 --- a/src/libfetchers/path.cc +++ b/src/libfetchers/path.cc @@ -6,7 +6,7 @@ namespace nix::fetchers { struct PathInputScheme : InputScheme { - std::optional<Input> inputFromURL(const ParsedURL & url) override + std::optional<Input> inputFromURL(const ParsedURL & url) const override { if (url.scheme != "path") return {}; @@ -32,7 +32,7 @@ struct PathInputScheme : InputScheme return input; } - std::optional<Input> inputFromAttrs(const Attrs & attrs) override + std::optional<Input> inputFromAttrs(const Attrs & attrs) const override { if (maybeGetStrAttr(attrs, "type") != "path") return {}; @@ -54,7 +54,7 @@ struct PathInputScheme : InputScheme return input; } - ParsedURL toURL(const Input & input) override + ParsedURL toURL(const Input & input) const override { auto query = attrsToQuery(input.attrs); query.erase("path"); @@ -66,7 +66,7 @@ struct PathInputScheme : InputScheme }; } - bool hasAllInfo(const Input & input) override + bool hasAllInfo(const Input & input) const override { return true; } diff --git a/src/libfetchers/registry.cc b/src/libfetchers/registry.cc index acd1ff866..43c03beec 100644 --- a/src/libfetchers/registry.cc +++ b/src/libfetchers/registry.cc @@ -153,6 +153,9 @@ static std::shared_ptr<Registry> getGlobalRegistry(ref<Store> store) { static auto reg = [&]() { auto path = fetchSettings.flakeRegistry.get(); + if (path == "") { + return std::make_shared<Registry>(Registry::Global); // empty registry + } if (!hasPrefix(path, "/")) { auto storePath = downloadFile(store, path, "flake-registry.json", false).storePath; diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 4be923148..d52d19797 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -6,6 +6,7 @@ #include "archive.hh" #include "tarfile.hh" #include "types.hh" +#include "split.hh" namespace nix::fetchers { @@ -174,63 +175,108 @@ std::pair<Tree, time_t> downloadTarball( }; } -struct TarballInputScheme : InputScheme +// An input scheme corresponding to a curl-downloadable resource. +struct CurlInputScheme : InputScheme { - std::optional<Input> inputFromURL(const ParsedURL & url) override + virtual const std::string inputType() const = 0; + const std::set<std::string> transportUrlSchemes = {"file", "http", "https"}; + + const bool hasTarballExtension(std::string_view path) const { - if (url.scheme != "file" && url.scheme != "http" && url.scheme != "https") return {}; + return hasSuffix(path, ".zip") || hasSuffix(path, ".tar") + || hasSuffix(path, ".tgz") || hasSuffix(path, ".tar.gz") + || hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2") + || hasSuffix(path, ".tar.zst"); + } - if (!hasSuffix(url.path, ".zip") - && !hasSuffix(url.path, ".tar") - && !hasSuffix(url.path, ".tgz") - && !hasSuffix(url.path, ".tar.gz") - && !hasSuffix(url.path, ".tar.xz") - && !hasSuffix(url.path, ".tar.bz2") - && !hasSuffix(url.path, ".tar.zst")) - return {}; + virtual bool isValidURL(const ParsedURL & url) const = 0; + + std::optional<Input> inputFromURL(const ParsedURL & url) const override + { + if (!isValidURL(url)) + return std::nullopt; Input input; - input.attrs.insert_or_assign("type", "tarball"); - input.attrs.insert_or_assign("url", url.to_string()); + + auto urlWithoutApplicationScheme = url; + urlWithoutApplicationScheme.scheme = parseUrlScheme(url.scheme).transport; + + input.attrs.insert_or_assign("type", inputType()); + input.attrs.insert_or_assign("url", urlWithoutApplicationScheme.to_string()); auto narHash = url.query.find("narHash"); if (narHash != url.query.end()) input.attrs.insert_or_assign("narHash", narHash->second); return input; } - std::optional<Input> inputFromAttrs(const Attrs & attrs) override + std::optional<Input> inputFromAttrs(const Attrs & attrs) const override { - if (maybeGetStrAttr(attrs, "type") != "tarball") return {}; + auto type = maybeGetStrAttr(attrs, "type"); + if (type != inputType()) return {}; + std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack"}; for (auto & [name, value] : attrs) - if (name != "type" && name != "url" && /* name != "hash" && */ name != "narHash" && name != "name") - throw Error("unsupported tarball input attribute '%s'", name); + if (!allowedNames.count(name)) + throw Error("unsupported %s input attribute '%s'", *type, name); Input input; input.attrs = attrs; + //input.locked = (bool) maybeGetStrAttr(input.attrs, "hash"); return input; } - ParsedURL toURL(const Input & input) override + ParsedURL toURL(const Input & input) const override { auto url = parseURL(getStrAttr(input.attrs, "url")); - // NAR hashes are preferred over file hashes since tar/zip files - // don't have a canonical representation. + // NAR hashes are preferred over file hashes since tar/zip + // files don't have a canonical representation. if (auto narHash = input.getNarHash()) url.query.insert_or_assign("narHash", narHash->to_string(SRI, true)); - /* - else if (auto hash = maybeGetStrAttr(input.attrs, "hash")) - url.query.insert_or_assign("hash", Hash(*hash).to_string(SRI, true)); - */ return url; } - bool hasAllInfo(const Input & input) override + bool hasAllInfo(const Input & input) const override { return true; } +}; + +struct FileInputScheme : CurlInputScheme +{ + const std::string inputType() const override { return "file"; } + + bool isValidURL(const ParsedURL & url) const override + { + auto parsedUrlScheme = parseUrlScheme(url.scheme); + return transportUrlSchemes.count(std::string(parsedUrlScheme.transport)) + && (parsedUrlScheme.application + ? parsedUrlScheme.application.value() == inputType() + : !hasTarballExtension(url.path)); + } + + std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override + { + auto file = downloadFile(store, getStrAttr(input.attrs, "url"), input.getName(), false); + return {std::move(file.storePath), input}; + } +}; + +struct TarballInputScheme : CurlInputScheme +{ + const std::string inputType() const override { return "tarball"; } + + bool isValidURL(const ParsedURL & url) const override + { + auto parsedUrlScheme = parseUrlScheme(url.scheme); + + return transportUrlSchemes.count(std::string(parsedUrlScheme.transport)) + && (parsedUrlScheme.application + ? parsedUrlScheme.application.value() == inputType() + : hasTarballExtension(url.path)); + } + std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override { auto tree = downloadTarball(store, getStrAttr(input.attrs, "url"), input.getName(), false).first; @@ -239,5 +285,6 @@ struct TarballInputScheme : InputScheme }; static auto rTarballInputScheme = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); }); +static auto rFileInputScheme = OnStartup([] { registerInputScheme(std::make_unique<FileInputScheme>()); }); } |