diff options
author | Eelco Dolstra <edolstra@gmail.com> | 2022-11-03 17:43:40 +0100 |
---|---|---|
committer | Eelco Dolstra <edolstra@gmail.com> | 2022-11-03 17:43:40 +0100 |
commit | b95faccf03e5213b6087626ab8d46e0704aad6b5 (patch) | |
tree | f9d560f2004f951d8efbf1024292a88366d8efd7 /src/libfetchers | |
parent | 47dec825c5daeeb9d615eb4d1eead3dbaa06c7c9 (diff) | |
parent | dd1970c233a82328445b69e903574e14115ee933 (diff) |
Merge remote-tracking branch 'origin/master' into auto-uid-allocation
Diffstat (limited to 'src/libfetchers')
-rw-r--r-- | src/libfetchers/fetch-settings.cc | 13 | ||||
-rw-r--r-- | src/libfetchers/fetch-settings.hh | 93 | ||||
-rw-r--r-- | src/libfetchers/fetchers.cc | 15 | ||||
-rw-r--r-- | src/libfetchers/git.cc | 374 | ||||
-rw-r--r-- | src/libfetchers/github.cc | 58 | ||||
-rw-r--r-- | src/libfetchers/mercurial.cc | 27 | ||||
-rw-r--r-- | src/libfetchers/path.cc | 14 | ||||
-rw-r--r-- | src/libfetchers/registry.cc | 4 | ||||
-rw-r--r-- | src/libfetchers/tarball.cc | 90 |
9 files changed, 532 insertions, 156 deletions
diff --git a/src/libfetchers/fetch-settings.cc b/src/libfetchers/fetch-settings.cc new file mode 100644 index 000000000..e7d5244dc --- /dev/null +++ b/src/libfetchers/fetch-settings.cc @@ -0,0 +1,13 @@ +#include "fetch-settings.hh" + +namespace nix { + +FetchSettings::FetchSettings() +{ +} + +FetchSettings fetchSettings; + +static GlobalConfig::Register rFetchSettings(&fetchSettings); + +} diff --git a/src/libfetchers/fetch-settings.hh b/src/libfetchers/fetch-settings.hh new file mode 100644 index 000000000..6452143a1 --- /dev/null +++ b/src/libfetchers/fetch-settings.hh @@ -0,0 +1,93 @@ +#pragma once + +#include "types.hh" +#include "config.hh" +#include "util.hh" + +#include <map> +#include <limits> + +#include <sys/types.h> + +namespace nix { + +struct FetchSettings : public Config +{ + FetchSettings(); + + Setting<StringMap> accessTokens{this, {}, "access-tokens", + R"( + Access tokens used to access protected GitHub, GitLab, or + other locations requiring token-based authentication. + + Access tokens are specified as a string made up of + space-separated `host=token` values. The specific token + used is selected by matching the `host` portion against the + "host" specification of the input. The actual use of the + `token` value is determined by the type of resource being + accessed: + + * Github: the token value is the OAUTH-TOKEN string obtained + as the Personal Access Token from the Github server (see + https://docs.github.com/en/developers/apps/building-oauth-apps/authorizing-oauth-apps). + + * Gitlab: the token value is either the OAuth2 token or the + Personal Access Token (these are different types tokens + for gitlab, see + https://docs.gitlab.com/12.10/ee/api/README.html#authentication). + The `token` value should be `type:tokenstring` where + `type` is either `OAuth2` or `PAT` to indicate which type + of token is being specified. + + Example `~/.config/nix/nix.conf`: + + ``` + access-tokens = github.com=23ac...b289 gitlab.mycompany.com=PAT:A123Bp_Cd..EfG gitlab.com=OAuth2:1jklw3jk + ``` + + Example `~/code/flake.nix`: + + ```nix + input.foo = { + type = "gitlab"; + host = "gitlab.mycompany.com"; + owner = "mycompany"; + repo = "pro"; + }; + ``` + + This example specifies three tokens, one each for accessing + github.com, gitlab.mycompany.com, and sourceforge.net. + + The `input.foo` uses the "gitlab" fetcher, which might + requires specifying the token type along with the token + value. + )"}; + + Setting<bool> allowDirty{this, true, "allow-dirty", + "Whether to allow dirty Git/Mercurial trees."}; + + Setting<bool> warnDirty{this, true, "warn-dirty", + "Whether to warn about dirty Git/Mercurial trees."}; + + Setting<std::string> flakeRegistry{this, "https://channels.nixos.org/flake-registry.json", "flake-registry", + "Path or URI of the global flake registry."}; + + Setting<bool> useRegistries{this, true, "use-registries", + "Whether to use flake registries to resolve flake references."}; + + Setting<bool> acceptFlakeConfig{this, false, "accept-flake-config", + "Whether to accept nix configuration from a flake without prompting."}; + + Setting<std::string> commitLockFileSummary{ + this, "", "commit-lockfile-summary", + R"( + The commit summary to use when committing changed flake lock files. If + empty, the summary is generated based on the action performed. + )"}; +}; + +// FIXME: don't use a global variable. +extern FetchSettings fetchSettings; + +} diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index 976f40d3b..6957d2da4 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -238,9 +238,18 @@ std::optional<std::string> Input::getRef() const std::optional<Hash> Input::getRev() const { - if (auto s = maybeGetStrAttr(attrs, "rev")) - return Hash::parseAny(*s, htSHA1); - return {}; + std::optional<Hash> hash = {}; + + if (auto s = maybeGetStrAttr(attrs, "rev")) { + try { + hash = Hash::parseAnyPrefixed(*s); + } catch (BadHash &e) { + // Default to sha1 for backwards compatibility with existing flakes + hash = Hash::parseAny(*s, htSHA1); + } + } + + return hash; } std::optional<uint64_t> Input::getRevCount() const diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 7f65c1533..7b7a1be35 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -5,13 +5,20 @@ #include "store-api.hh" #include "url-parts.hh" #include "pathlocks.hh" +#include "util.hh" +#include "git.hh" +#include "fetch-settings.hh" + +#include <regex> +#include <string.h> #include <sys/time.h> #include <sys/wait.h> using namespace std::string_literals; namespace nix::fetchers { +namespace { // Explicit initial branch of our bare repo to suppress warnings from new version of git. // The value itself does not matter, since we always fetch a specific revision or branch. @@ -19,17 +26,227 @@ namespace nix::fetchers { // old version of git, which will ignore unrecognized `-c` options. const std::string gitInitialBranch = "__nix_dummy_branch"; -static std::string readHead(const Path & path) +bool isCacheFileWithinTtl(const time_t now, const struct stat & st) +{ + return st.st_mtime + settings.tarballTtl > now; +} + +bool touchCacheFile(const Path& path, const time_t& touch_time) +{ + struct timeval times[2]; + times[0].tv_sec = touch_time; + times[0].tv_usec = 0; + times[1].tv_sec = touch_time; + times[1].tv_usec = 0; + + return lutimes(path.c_str(), times) == 0; +} + +Path getCachePath(std::string key) +{ + return getCacheDir() + "/nix/gitv3/" + + hashString(htSHA256, key).to_string(Base32, false); +} + +// Returns the name of the HEAD branch. +// +// Returns the head branch name as reported by git ls-remote --symref, e.g., if +// ls-remote returns the output below, "main" is returned based on the ref line. +// +// ref: refs/heads/main HEAD +// ... +std::optional<std::string> readHead(const Path & path) +{ + auto [exit_code, output] = runProgram(RunOptions { + .program = "git", + .args = {"ls-remote", "--symref", path}, + }); + if (exit_code != 0) { + return std::nullopt; + } + + std::string_view line = output; + line = line.substr(0, line.find("\n")); + if (const auto parseResult = git::parseLsRemoteLine(line)) { + switch (parseResult->kind) { + case git::LsRemoteRefLine::Kind::Symbolic: + debug("resolved HEAD ref '%s' for repo '%s'", parseResult->target, path); + break; + case git::LsRemoteRefLine::Kind::Object: + debug("resolved HEAD rev '%s' for repo '%s'", parseResult->target, path); + break; + } + return parseResult->target; + } + return std::nullopt; +} + +// Persist the HEAD ref from the remote repo in the local cached repo. +bool storeCachedHead(const std::string& actualUrl, const std::string& headRef) +{ + Path cacheDir = getCachePath(actualUrl); + auto gitDir = "."; + try { + runProgram("git", true, { "-C", cacheDir, "--git-dir", gitDir, "symbolic-ref", "--", "HEAD", headRef }); + } catch (ExecError &e) { + if (!WIFEXITED(e.status)) throw; + return false; + } + /* No need to touch refs/HEAD, because `git symbolic-ref` updates the mtime. */ + return true; +} + +std::optional<std::string> readHeadCached(const std::string& actualUrl) { - return chomp(runProgram("git", true, { "-C", path, "rev-parse", "--abbrev-ref", "HEAD" })); + // Create a cache path to store the branch of the HEAD ref. Append something + // in front of the URL to prevent collision with the repository itself. + Path cacheDir = getCachePath(actualUrl); + Path headRefFile = cacheDir + "/HEAD"; + + time_t now = time(0); + struct stat st; + std::optional<std::string> cachedRef; + if (stat(headRefFile.c_str(), &st) == 0) { + cachedRef = readHead(cacheDir); + if (cachedRef != std::nullopt && + *cachedRef != gitInitialBranch && + isCacheFileWithinTtl(now, st)) { + debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl); + return cachedRef; + } + } + + auto ref = readHead(actualUrl); + if (ref) { + return ref; + } + + if (cachedRef) { + // If the cached git ref is expired in fetch() below, and the 'git fetch' + // fails, it falls back to continuing with the most recent version. + // This function must behave the same way, so we return the expired + // cached ref here. + warn("could not get HEAD ref for repository '%s'; using expired cached ref '%s'", actualUrl, *cachedRef); + return *cachedRef; + } + + return std::nullopt; +} + +bool isNotDotGitDirectory(const Path & path) +{ + return baseNameOf(path) != ".git"; +} + +struct WorkdirInfo +{ + bool clean = false; + bool hasHead = false; +}; + +// Returns whether a git workdir is clean and has commits. +WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir) +{ + const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + std::string gitDir(".git"); + + auto env = getEnv(); + // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong + // that way unknown errors can lead to a failure instead of continuing through the wrong code path + env["LC_ALL"] = "C"; + + /* Check whether HEAD points to something that looks like a commit, + since that is the refrence we want to use later on. */ + auto result = runProgram(RunOptions { + .program = "git", + .args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, + .environment = env, + .mergeStderrToStdout = true + }); + auto exitCode = WEXITSTATUS(result.first); + auto errorMessage = result.second; + + if (errorMessage.find("fatal: not a git repository") != std::string::npos) { + throw Error("'%s' is not a Git repository", workdir); + } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { + // indicates that the repo does not have any commits + // we want to proceed and will consider it dirty later + } else if (exitCode != 0) { + // any other errors should lead to a failure + throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage); + } + + bool clean = false; + bool hasHead = exitCode == 0; + + try { + if (hasHead) { + // Using git diff is preferrable over lower-level operations here, + // because its conceptually simpler and we only need the exit code anyways. + auto gitDiffOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "diff", "HEAD", "--quiet"}); + if (!submodules) { + // Changes in submodules should only make the tree dirty + // when those submodules will be copied as well. + gitDiffOpts.emplace_back("--ignore-submodules"); + } + gitDiffOpts.emplace_back("--"); + runProgram("git", true, gitDiffOpts); + + clean = true; + } + } catch (ExecError & e) { + if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; + } + + return WorkdirInfo { .clean = clean, .hasHead = hasHead }; } -static bool isNotDotGitDirectory(const Path & path) +std::pair<StorePath, Input> fetchFromWorkdir(ref<Store> store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo) { - static const std::regex gitDirRegex("^(?:.*/)?\\.git$"); + const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + auto gitDir = ".git"; + + if (!fetchSettings.allowDirty) + throw Error("Git tree '%s' is dirty", workdir); + + if (fetchSettings.warnDirty) + warn("Git tree '%s' is dirty", workdir); + + auto gitOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "ls-files", "-z" }); + if (submodules) + gitOpts.emplace_back("--recurse-submodules"); + + auto files = tokenizeString<std::set<std::string>>( + runProgram("git", true, gitOpts), "\0"s); + + Path actualPath(absPath(workdir)); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualPath)); + std::string file(p, actualPath.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } - return not std::regex_match(path, gitDirRegex); + return files.count(file); + }; + + auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); + + // FIXME: maybe we should use the timestamp of the last + // modified dirty file? + input.attrs.insert_or_assign( + "lastModified", + workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); + + return {std::move(storePath), input}; } +} // end namespace struct GitInputScheme : InputScheme { @@ -150,13 +367,14 @@ struct GitInputScheme : InputScheme { auto sourcePath = getSourcePath(input); assert(sourcePath); + auto gitDir = ".git"; runProgram("git", true, - { "-C", *sourcePath, "add", "--force", "--intent-to-add", "--", std::string(file) }); + { "-C", *sourcePath, "--git-dir", gitDir, "add", "--intent-to-add", "--", std::string(file) }); if (commitMsg) runProgram("git", true, - { "-C", *sourcePath, "commit", std::string(file), "-m", *commitMsg }); + { "-C", *sourcePath, "--git-dir", gitDir, "commit", std::string(file), "-m", *commitMsg }); } std::pair<bool, std::string> getActualUrl(const Input & input) const @@ -175,6 +393,7 @@ struct GitInputScheme : InputScheme std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override { Input input(_input); + auto gitDir = ".git"; std::string name = input.getName(); @@ -187,8 +406,16 @@ struct GitInputScheme : InputScheme if (submodules) cacheType += "-submodules"; if (allRefs) cacheType += "-all-refs"; + auto checkHashType = [&](const std::optional<Hash> & hash) + { + if (hash.has_value() && !(hash->type == htSHA1 || hash->type == htSHA256)) + throw Error("Hash '%s' is not supported by Git. Supported types are sha1 and sha256.", hash->to_string(Base16, true)); + }; + auto getLockedAttrs = [&]() { + checkHashType(input.getRev()); + return Attrs({ {"type", cacheType}, {"name", name}, @@ -215,97 +442,54 @@ struct GitInputScheme : InputScheme auto [isLocal, actualUrl_] = getActualUrl(input); auto actualUrl = actualUrl_; // work around clang bug - // If this is a local directory and no ref or revision is - // given, then allow the use of an unclean working tree. + /* If this is a local directory and no ref or revision is given, + allow fetching directly from a dirty workdir. */ if (!input.getRef() && !input.getRev() && isLocal) { - bool clean = false; - - /* Check whether this repo has any commits. There are - probably better ways to do this. */ - auto gitDir = actualUrl + "/.git"; - auto commonGitDir = chomp(runProgram( - "git", - true, - { "-C", actualUrl, "rev-parse", "--git-common-dir" } - )); - if (commonGitDir != ".git") - gitDir = commonGitDir; - - bool haveCommits = !readDirectory(gitDir + "/refs/heads").empty(); - - try { - if (haveCommits) { - runProgram("git", true, { "-C", actualUrl, "diff-index", "--quiet", "HEAD", "--" }); - clean = true; - } - } catch (ExecError & e) { - if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; - } - - if (!clean) { - - /* This is an unclean working tree. So copy all tracked files. */ - - if (!settings.allowDirty) - throw Error("Git tree '%s' is dirty", actualUrl); - - if (settings.warnDirty) - warn("Git tree '%s' is dirty", actualUrl); - - auto gitOpts = Strings({ "-C", actualUrl, "ls-files", "-z" }); - if (submodules) - gitOpts.emplace_back("--recurse-submodules"); - - auto files = tokenizeString<std::set<std::string>>( - runProgram("git", true, gitOpts), "\0"s); - - PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, actualUrl)); - std::string file(p, actualUrl.size() + 1); - - auto st = lstat(p); - - if (S_ISDIR(st.st_mode)) { - auto prefix = file + "/"; - auto i = files.lower_bound(prefix); - return i != files.end() && hasPrefix(*i, prefix); - } - - return files.count(file); - }; - - auto storePath = store->addToStore(input.getName(), actualUrl, FileIngestionMethod::Recursive, htSHA256, filter); - - // FIXME: maybe we should use the timestamp of the last - // modified dirty file? - input.attrs.insert_or_assign( - "lastModified", - haveCommits ? std::stoull(runProgram("git", true, { "-C", actualUrl, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); - - return {std::move(storePath), input}; + auto workdirInfo = getWorkdirInfo(input, actualUrl); + if (!workdirInfo.clean) { + return fetchFromWorkdir(store, input, actualUrl, workdirInfo); } } - if (!input.getRef()) input.attrs.insert_or_assign("ref", isLocal ? readHead(actualUrl) : "master"); - Attrs unlockedAttrs({ {"type", cacheType}, {"name", name}, {"url", actualUrl}, - {"ref", *input.getRef()}, }); Path repoDir; if (isLocal) { + if (!input.getRef()) { + auto head = readHead(actualUrl); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); + head = "master"; + } + input.attrs.insert_or_assign("ref", *head); + unlockedAttrs.insert_or_assign("ref", *head); + } if (!input.getRev()) input.attrs.insert_or_assign("rev", - Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input.getRef() })), htSHA1).gitRev()); + Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev()); repoDir = actualUrl; - } else { + const bool useHeadRef = !input.getRef(); + if (useHeadRef) { + auto head = readHeadCached(actualUrl); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); + head = "master"; + } + input.attrs.insert_or_assign("ref", *head); + unlockedAttrs.insert_or_assign("ref", *head); + } else { + if (!input.getRev()) { + unlockedAttrs.insert_or_assign("ref", input.getRef().value()); + } + } if (auto res = getCache()->lookup(store, unlockedAttrs)) { auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1); @@ -315,8 +499,9 @@ struct GitInputScheme : InputScheme } } - Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false); + Path cacheDir = getCachePath(actualUrl); repoDir = cacheDir; + gitDir = "."; createDirs(dirOf(cacheDir)); PathLocks cacheDirLock({cacheDir + ".lock"}); @@ -337,7 +522,7 @@ struct GitInputScheme : InputScheme repo. */ if (input.getRev()) { try { - runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input.getRev()->gitRev() }); + runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-e", input.getRev()->gitRev() }); doFetch = false; } catch (ExecError & e) { if (WIFEXITED(e.status)) { @@ -354,7 +539,7 @@ struct GitInputScheme : InputScheme git fetch to update the local ref to the remote ref. */ struct stat st; doFetch = stat(localRefFile.c_str(), &st) != 0 || - (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now; + !isCacheFileWithinTtl(now, st); } } @@ -372,19 +557,16 @@ struct GitInputScheme : InputScheme : ref == "HEAD" ? *ref : "refs/heads/" + *ref; - runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }); + runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }); } catch (Error & e) { if (!pathExists(localRefFile)) throw; warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); } - struct timeval times[2]; - times[0].tv_sec = now; - times[0].tv_usec = 0; - times[1].tv_sec = now; - times[1].tv_usec = 0; - - utimes(localRefFile.c_str(), times); + if (!touchCacheFile(localRefFile, now)) + warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno)); + if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef())) + warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl); } if (!input.getRev()) @@ -393,10 +575,10 @@ struct GitInputScheme : InputScheme // cache dir lock is removed at scope end; we will only use read-only operations on specific revisions in the remainder } - bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true"; + bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-parse", "--is-shallow-repository" })) == "true"; if (isShallow && !shallow) - throw Error("'%s' is a shallow Git repository, but a non-shallow repository is needed", actualUrl); + throw Error("'%s' is a shallow Git repository, but shallow repositories are only allowed when `shallow = true;` is specified.", actualUrl); // FIXME: check whether rev is an ancestor of ref. @@ -413,7 +595,7 @@ struct GitInputScheme : InputScheme auto result = runProgram(RunOptions { .program = "git", - .args = { "-C", repoDir, "cat-file", "commit", input.getRev()->gitRev() }, + .args = { "-C", repoDir, "--git-dir", gitDir, "cat-file", "commit", input.getRev()->gitRev() }, .mergeStderrToStdout = true }); if (WEXITSTATUS(result.first) == 128 @@ -452,7 +634,7 @@ struct GitInputScheme : InputScheme auto source = sinkToSource([&](Sink & sink) { runProgram2({ .program = "git", - .args = { "-C", repoDir, "archive", input.getRev()->gitRev() }, + .args = { "-C", repoDir, "--git-dir", gitDir, "archive", input.getRev()->gitRev() }, .standardOut = &sink }); }); @@ -462,7 +644,7 @@ struct GitInputScheme : InputScheme auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter); - auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() })); + auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() })); Attrs infoAttrs({ {"rev", input.getRev()->gitRev()}, @@ -471,7 +653,7 @@ struct GitInputScheme : InputScheme if (!shallow) infoAttrs.insert_or_assign("revCount", - std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input.getRev()->gitRev() }))); + std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--count", input.getRev()->gitRev() }))); if (!_input.getRev()) getCache()->add( diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 70622bf79..2115ce2f5 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -1,10 +1,12 @@ #include "filetransfer.hh" #include "cache.hh" -#include "fetchers.hh" #include "globals.hh" #include "store-api.hh" #include "types.hh" #include "url-parts.hh" +#include "git.hh" +#include "fetchers.hh" +#include "fetch-settings.hh" #include <optional> #include <nlohmann/json.hpp> @@ -157,7 +159,7 @@ struct GitArchiveInputScheme : InputScheme std::optional<std::string> getAccessToken(const std::string & host) const { - auto tokens = settings.accessTokens.get(); + auto tokens = fetchSettings.accessTokens.get(); if (auto token = get(tokens, host)) return *token; return {}; @@ -241,7 +243,10 @@ struct GitHubInputScheme : GitArchiveInputScheme Hash getRevFromRef(nix::ref<Store> store, const Input & input) const override { auto host = maybeGetStrAttr(input.attrs, "host").value_or("github.com"); - auto url = fmt("https://api.%s/repos/%s/%s/commits/%s", // FIXME: check + auto url = fmt( + host == "github.com" + ? "https://api.%s/repos/%s/%s/commits/%s" + : "https://%s/api/v3/repos/%s/%s/commits/%s", host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"), *input.getRef()); Headers headers = makeHeadersWithAuthTokens(host); @@ -257,14 +262,20 @@ struct GitHubInputScheme : GitArchiveInputScheme DownloadUrl getDownloadUrl(const Input & input) const override { - // FIXME: use regular /archive URLs instead? api.github.com - // might have stricter rate limits. auto host = maybeGetStrAttr(input.attrs, "host").value_or("github.com"); - auto url = fmt("https://api.%s/repos/%s/%s/tarball/%s", // FIXME: check if this is correct for self hosted instances - host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"), + Headers headers = makeHeadersWithAuthTokens(host); + // If we have no auth headers then we default to the public archive + // urls so we do not run into rate limits. + const auto urlFmt = + host != "github.com" + ? "https://%s/api/v3/repos/%s/%s/tarball/%s" + : headers.empty() + ? "https://%s/%s/%s/archive/%s.tar.gz" + : "https://api.%s/repos/%s/%s/tarball/%s"; + + const auto url = fmt(urlFmt, host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"), input.getRev()->to_string(Base16, false)); - Headers headers = makeHeadersWithAuthTokens(host); return DownloadUrl { url, headers }; } @@ -373,7 +384,7 @@ struct SourceHutInputScheme : GitArchiveInputScheme Headers headers = makeHeadersWithAuthTokens(host); - std::string ref_uri; + std::string refUri; if (ref == "HEAD") { auto file = store->toRealPath( downloadFile(store, fmt("%s/HEAD", base_url), "source", false, headers).storePath); @@ -381,33 +392,32 @@ struct SourceHutInputScheme : GitArchiveInputScheme std::string line; getline(is, line); - auto ref_index = line.find("ref: "); - if (ref_index == std::string::npos) { + auto remoteLine = git::parseLsRemoteLine(line); + if (!remoteLine) { throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref); } - - ref_uri = line.substr(ref_index+5, line.length()-1); - } else - ref_uri = fmt("refs/heads/%s", ref); + refUri = remoteLine->target; + } else { + refUri = fmt("refs/(heads|tags)/%s", ref); + } + std::regex refRegex(refUri); auto file = store->toRealPath( downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath); std::ifstream is(file); std::string line; - std::string id; - while(getline(is, line)) { - auto index = line.find(ref_uri); - if (index != std::string::npos) { - id = line.substr(0, index-1); - break; - } + std::optional<std::string> id; + while(!id && getline(is, line)) { + auto parsedLine = git::parseLsRemoteLine(line); + if (parsedLine && parsedLine->reference && std::regex_match(*parsedLine->reference, refRegex)) + id = parsedLine->target; } - if(id.empty()) + if(!id) throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref); - auto rev = Hash::parseAny(id, htSHA1); + auto rev = Hash::parseAny(*id, htSHA1); debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev()); return rev; } diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 12cdecbc1..5c5671681 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -5,6 +5,8 @@ #include "store-api.hh" #include "url-parts.hh" +#include "fetch-settings.hh" + #include <sys/time.h> using namespace std::string_literals; @@ -34,7 +36,7 @@ static std::string runHg(const Strings & args, const std::optional<std::string> auto res = runProgram(std::move(opts)); if (!statusOk(res.first)) - throw ExecError(res.first, fmt("hg %1%", statusToString(res.first))); + throw ExecError(res.first, "hg %1%", statusToString(res.first)); return res.second; } @@ -165,10 +167,10 @@ struct MercurialInputScheme : InputScheme /* This is an unclean working tree. So copy all tracked files. */ - if (!settings.allowDirty) + if (!fetchSettings.allowDirty) throw Error("Mercurial tree '%s' is unclean", actualUrl); - if (settings.warnDirty) + if (fetchSettings.warnDirty) warn("Mercurial tree '%s' is unclean", actualUrl); input.attrs.insert_or_assign("ref", chomp(runHg({ "branch", "-R", actualUrl }))); @@ -176,9 +178,11 @@ struct MercurialInputScheme : InputScheme auto files = tokenizeString<std::set<std::string>>( runHg({ "status", "-R", actualUrl, "--clean", "--modified", "--added", "--no-status", "--print0" }), "\0"s); + Path actualPath(absPath(actualUrl)); + PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, actualUrl)); - std::string file(p, actualUrl.size() + 1); + assert(hasPrefix(p, actualPath)); + std::string file(p, actualPath.size() + 1); auto st = lstat(p); @@ -191,7 +195,7 @@ struct MercurialInputScheme : InputScheme return files.count(file); }; - auto storePath = store->addToStore(input.getName(), actualUrl, FileIngestionMethod::Recursive, htSHA256, filter); + auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); return {std::move(storePath), input}; } @@ -199,8 +203,17 @@ struct MercurialInputScheme : InputScheme if (!input.getRef()) input.attrs.insert_or_assign("ref", "default"); + auto checkHashType = [&](const std::optional<Hash> & hash) + { + if (hash.has_value() && hash->type != htSHA1) + throw Error("Hash '%s' is not supported by Mercurial. Only sha1 is supported.", hash->to_string(Base16, true)); + }; + + auto getLockedAttrs = [&]() { + checkHashType(input.getRev()); + return Attrs({ {"type", "hg"}, {"name", name}, @@ -260,7 +273,7 @@ struct MercurialInputScheme : InputScheme runHg({ "recover", "-R", cacheDir }); runHg({ "pull", "-R", cacheDir, "--", actualUrl }); } else { - throw ExecError(e.status, fmt("'hg pull' %s", statusToString(e.status))); + throw ExecError(e.status, "'hg pull' %s", statusToString(e.status)); } } } else { diff --git a/src/libfetchers/path.cc b/src/libfetchers/path.cc index 59e228e97..f0ef97da5 100644 --- a/src/libfetchers/path.cc +++ b/src/libfetchers/path.cc @@ -1,5 +1,6 @@ #include "fetchers.hh" #include "store-api.hh" +#include "archive.hh" namespace nix::fetchers { @@ -80,8 +81,9 @@ struct PathInputScheme : InputScheme // nothing to do } - std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override + std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override { + Input input(_input); std::string absPath; auto path = getStrAttr(input.attrs, "path"); @@ -111,9 +113,15 @@ struct PathInputScheme : InputScheme if (storePath) store->addTempRoot(*storePath); - if (!storePath || storePath->name() != "source" || !store->isValidPath(*storePath)) + time_t mtime = 0; + if (!storePath || storePath->name() != "source" || !store->isValidPath(*storePath)) { // FIXME: try to substitute storePath. - storePath = store->addToStore("source", absPath); + auto src = sinkToSource([&](Sink & sink) { + mtime = dumpPathAndGetMtime(absPath, sink, defaultPathFilter); + }); + storePath = store->addToStoreFromDump(*src, "source"); + } + input.attrs.insert_or_assign("lastModified", uint64_t(mtime)); return {std::move(*storePath), input}; } diff --git a/src/libfetchers/registry.cc b/src/libfetchers/registry.cc index f35359d4b..acd1ff866 100644 --- a/src/libfetchers/registry.cc +++ b/src/libfetchers/registry.cc @@ -5,6 +5,8 @@ #include "store-api.hh" #include "local-fs-store.hh" +#include "fetch-settings.hh" + #include <nlohmann/json.hpp> namespace nix::fetchers { @@ -150,7 +152,7 @@ void overrideRegistry( static std::shared_ptr<Registry> getGlobalRegistry(ref<Store> store) { static auto reg = [&]() { - auto path = settings.flakeRegistry.get(); + auto path = fetchSettings.flakeRegistry.get(); if (!hasPrefix(path, "/")) { auto storePath = downloadFile(store, path, "flake-registry.json", false).storePath; diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index dde0ad761..6c551bd93 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -6,6 +6,7 @@ #include "archive.hh" #include "tarfile.hh" #include "types.hh" +#include "split.hh" namespace nix::fetchers { @@ -168,24 +169,34 @@ std::pair<Tree, time_t> downloadTarball( }; } -struct TarballInputScheme : InputScheme +// An input scheme corresponding to a curl-downloadable resource. +struct CurlInputScheme : InputScheme { - std::optional<Input> inputFromURL(const ParsedURL & url) override + virtual const std::string inputType() const = 0; + const std::set<std::string> transportUrlSchemes = {"file", "http", "https"}; + + const bool hasTarballExtension(std::string_view path) const { - if (url.scheme != "file" && url.scheme != "http" && url.scheme != "https") return {}; + return hasSuffix(path, ".zip") || hasSuffix(path, ".tar") + || hasSuffix(path, ".tgz") || hasSuffix(path, ".tar.gz") + || hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2") + || hasSuffix(path, ".tar.zst"); + } - if (!hasSuffix(url.path, ".zip") - && !hasSuffix(url.path, ".tar") - && !hasSuffix(url.path, ".tgz") - && !hasSuffix(url.path, ".tar.gz") - && !hasSuffix(url.path, ".tar.xz") - && !hasSuffix(url.path, ".tar.bz2") - && !hasSuffix(url.path, ".tar.zst")) - return {}; + virtual bool isValidURL(const ParsedURL & url) const = 0; + + std::optional<Input> inputFromURL(const ParsedURL & url) override + { + if (!isValidURL(url)) + return std::nullopt; Input input; - input.attrs.insert_or_assign("type", "tarball"); - input.attrs.insert_or_assign("url", url.to_string()); + + auto urlWithoutApplicationScheme = url; + urlWithoutApplicationScheme.scheme = parseUrlScheme(url.scheme).transport; + + input.attrs.insert_or_assign("type", inputType()); + input.attrs.insert_or_assign("url", urlWithoutApplicationScheme.to_string()); auto narHash = url.query.find("narHash"); if (narHash != url.query.end()) input.attrs.insert_or_assign("narHash", narHash->second); @@ -194,14 +205,17 @@ struct TarballInputScheme : InputScheme std::optional<Input> inputFromAttrs(const Attrs & attrs) override { - if (maybeGetStrAttr(attrs, "type") != "tarball") return {}; + auto type = maybeGetStrAttr(attrs, "type"); + if (type != inputType()) return {}; + std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack"}; for (auto & [name, value] : attrs) - if (name != "type" && name != "url" && /* name != "hash" && */ name != "narHash" && name != "name") - throw Error("unsupported tarball input attribute '%s'", name); + if (!allowedNames.count(name)) + throw Error("unsupported %s input attribute '%s'", *type, name); Input input; input.attrs = attrs; + //input.locked = (bool) maybeGetStrAttr(input.attrs, "hash"); return input; } @@ -209,14 +223,9 @@ struct TarballInputScheme : InputScheme ParsedURL toURL(const Input & input) override { auto url = parseURL(getStrAttr(input.attrs, "url")); - // NAR hashes are preferred over file hashes since tar/zip files - // don't have a canonical representation. + // NAR hashes are preferred over file hashes since tar/zip files // don't have a canonical representation. if (auto narHash = input.getNarHash()) url.query.insert_or_assign("narHash", narHash->to_string(SRI, true)); - /* - else if (auto hash = maybeGetStrAttr(input.attrs, "hash")) - url.query.insert_or_assign("hash", Hash(*hash).to_string(SRI, true)); - */ return url; } @@ -225,6 +234,42 @@ struct TarballInputScheme : InputScheme return true; } +}; + +struct FileInputScheme : CurlInputScheme +{ + const std::string inputType() const override { return "file"; } + + bool isValidURL(const ParsedURL & url) const override + { + auto parsedUrlScheme = parseUrlScheme(url.scheme); + return transportUrlSchemes.count(std::string(parsedUrlScheme.transport)) + && (parsedUrlScheme.application + ? parsedUrlScheme.application.value() == inputType() + : !hasTarballExtension(url.path)); + } + + std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override + { + auto file = downloadFile(store, getStrAttr(input.attrs, "url"), input.getName(), false); + return {std::move(file.storePath), input}; + } +}; + +struct TarballInputScheme : CurlInputScheme +{ + const std::string inputType() const override { return "tarball"; } + + bool isValidURL(const ParsedURL & url) const override + { + auto parsedUrlScheme = parseUrlScheme(url.scheme); + + return transportUrlSchemes.count(std::string(parsedUrlScheme.transport)) + && (parsedUrlScheme.application + ? parsedUrlScheme.application.value() == inputType() + : hasTarballExtension(url.path)); + } + std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override { auto tree = downloadTarball(store, getStrAttr(input.attrs, "url"), input.getName(), false).first; @@ -233,5 +278,6 @@ struct TarballInputScheme : InputScheme }; static auto rTarballInputScheme = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); }); +static auto rFileInputScheme = OnStartup([] { registerInputScheme(std::make_unique<FileInputScheme>()); }); } |