aboutsummaryrefslogtreecommitdiff
path: root/src/libfetchers
diff options
context:
space:
mode:
authorEelco Dolstra <edolstra@gmail.com>2022-11-03 17:43:40 +0100
committerEelco Dolstra <edolstra@gmail.com>2022-11-03 17:43:40 +0100
commitb95faccf03e5213b6087626ab8d46e0704aad6b5 (patch)
treef9d560f2004f951d8efbf1024292a88366d8efd7 /src/libfetchers
parent47dec825c5daeeb9d615eb4d1eead3dbaa06c7c9 (diff)
parentdd1970c233a82328445b69e903574e14115ee933 (diff)
Merge remote-tracking branch 'origin/master' into auto-uid-allocation
Diffstat (limited to 'src/libfetchers')
-rw-r--r--src/libfetchers/fetch-settings.cc13
-rw-r--r--src/libfetchers/fetch-settings.hh93
-rw-r--r--src/libfetchers/fetchers.cc15
-rw-r--r--src/libfetchers/git.cc374
-rw-r--r--src/libfetchers/github.cc58
-rw-r--r--src/libfetchers/mercurial.cc27
-rw-r--r--src/libfetchers/path.cc14
-rw-r--r--src/libfetchers/registry.cc4
-rw-r--r--src/libfetchers/tarball.cc90
9 files changed, 532 insertions, 156 deletions
diff --git a/src/libfetchers/fetch-settings.cc b/src/libfetchers/fetch-settings.cc
new file mode 100644
index 000000000..e7d5244dc
--- /dev/null
+++ b/src/libfetchers/fetch-settings.cc
@@ -0,0 +1,13 @@
+#include "fetch-settings.hh"
+
+namespace nix {
+
+FetchSettings::FetchSettings()
+{
+}
+
+FetchSettings fetchSettings;
+
+static GlobalConfig::Register rFetchSettings(&fetchSettings);
+
+}
diff --git a/src/libfetchers/fetch-settings.hh b/src/libfetchers/fetch-settings.hh
new file mode 100644
index 000000000..6452143a1
--- /dev/null
+++ b/src/libfetchers/fetch-settings.hh
@@ -0,0 +1,93 @@
+#pragma once
+
+#include "types.hh"
+#include "config.hh"
+#include "util.hh"
+
+#include <map>
+#include <limits>
+
+#include <sys/types.h>
+
+namespace nix {
+
+struct FetchSettings : public Config
+{
+ FetchSettings();
+
+ Setting<StringMap> accessTokens{this, {}, "access-tokens",
+ R"(
+ Access tokens used to access protected GitHub, GitLab, or
+ other locations requiring token-based authentication.
+
+ Access tokens are specified as a string made up of
+ space-separated `host=token` values. The specific token
+ used is selected by matching the `host` portion against the
+ "host" specification of the input. The actual use of the
+ `token` value is determined by the type of resource being
+ accessed:
+
+ * Github: the token value is the OAUTH-TOKEN string obtained
+ as the Personal Access Token from the Github server (see
+ https://docs.github.com/en/developers/apps/building-oauth-apps/authorizing-oauth-apps).
+
+ * Gitlab: the token value is either the OAuth2 token or the
+ Personal Access Token (these are different types tokens
+ for gitlab, see
+ https://docs.gitlab.com/12.10/ee/api/README.html#authentication).
+ The `token` value should be `type:tokenstring` where
+ `type` is either `OAuth2` or `PAT` to indicate which type
+ of token is being specified.
+
+ Example `~/.config/nix/nix.conf`:
+
+ ```
+ access-tokens = github.com=23ac...b289 gitlab.mycompany.com=PAT:A123Bp_Cd..EfG gitlab.com=OAuth2:1jklw3jk
+ ```
+
+ Example `~/code/flake.nix`:
+
+ ```nix
+ input.foo = {
+ type = "gitlab";
+ host = "gitlab.mycompany.com";
+ owner = "mycompany";
+ repo = "pro";
+ };
+ ```
+
+ This example specifies three tokens, one each for accessing
+ github.com, gitlab.mycompany.com, and sourceforge.net.
+
+ The `input.foo` uses the "gitlab" fetcher, which might
+ requires specifying the token type along with the token
+ value.
+ )"};
+
+ Setting<bool> allowDirty{this, true, "allow-dirty",
+ "Whether to allow dirty Git/Mercurial trees."};
+
+ Setting<bool> warnDirty{this, true, "warn-dirty",
+ "Whether to warn about dirty Git/Mercurial trees."};
+
+ Setting<std::string> flakeRegistry{this, "https://channels.nixos.org/flake-registry.json", "flake-registry",
+ "Path or URI of the global flake registry."};
+
+ Setting<bool> useRegistries{this, true, "use-registries",
+ "Whether to use flake registries to resolve flake references."};
+
+ Setting<bool> acceptFlakeConfig{this, false, "accept-flake-config",
+ "Whether to accept nix configuration from a flake without prompting."};
+
+ Setting<std::string> commitLockFileSummary{
+ this, "", "commit-lockfile-summary",
+ R"(
+ The commit summary to use when committing changed flake lock files. If
+ empty, the summary is generated based on the action performed.
+ )"};
+};
+
+// FIXME: don't use a global variable.
+extern FetchSettings fetchSettings;
+
+}
diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc
index 976f40d3b..6957d2da4 100644
--- a/src/libfetchers/fetchers.cc
+++ b/src/libfetchers/fetchers.cc
@@ -238,9 +238,18 @@ std::optional<std::string> Input::getRef() const
std::optional<Hash> Input::getRev() const
{
- if (auto s = maybeGetStrAttr(attrs, "rev"))
- return Hash::parseAny(*s, htSHA1);
- return {};
+ std::optional<Hash> hash = {};
+
+ if (auto s = maybeGetStrAttr(attrs, "rev")) {
+ try {
+ hash = Hash::parseAnyPrefixed(*s);
+ } catch (BadHash &e) {
+ // Default to sha1 for backwards compatibility with existing flakes
+ hash = Hash::parseAny(*s, htSHA1);
+ }
+ }
+
+ return hash;
}
std::optional<uint64_t> Input::getRevCount() const
diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc
index 7f65c1533..7b7a1be35 100644
--- a/src/libfetchers/git.cc
+++ b/src/libfetchers/git.cc
@@ -5,13 +5,20 @@
#include "store-api.hh"
#include "url-parts.hh"
#include "pathlocks.hh"
+#include "util.hh"
+#include "git.hh"
+#include "fetch-settings.hh"
+
+#include <regex>
+#include <string.h>
#include <sys/time.h>
#include <sys/wait.h>
using namespace std::string_literals;
namespace nix::fetchers {
+namespace {
// Explicit initial branch of our bare repo to suppress warnings from new version of git.
// The value itself does not matter, since we always fetch a specific revision or branch.
@@ -19,17 +26,227 @@ namespace nix::fetchers {
// old version of git, which will ignore unrecognized `-c` options.
const std::string gitInitialBranch = "__nix_dummy_branch";
-static std::string readHead(const Path & path)
+bool isCacheFileWithinTtl(const time_t now, const struct stat & st)
+{
+ return st.st_mtime + settings.tarballTtl > now;
+}
+
+bool touchCacheFile(const Path& path, const time_t& touch_time)
+{
+ struct timeval times[2];
+ times[0].tv_sec = touch_time;
+ times[0].tv_usec = 0;
+ times[1].tv_sec = touch_time;
+ times[1].tv_usec = 0;
+
+ return lutimes(path.c_str(), times) == 0;
+}
+
+Path getCachePath(std::string key)
+{
+ return getCacheDir() + "/nix/gitv3/" +
+ hashString(htSHA256, key).to_string(Base32, false);
+}
+
+// Returns the name of the HEAD branch.
+//
+// Returns the head branch name as reported by git ls-remote --symref, e.g., if
+// ls-remote returns the output below, "main" is returned based on the ref line.
+//
+// ref: refs/heads/main HEAD
+// ...
+std::optional<std::string> readHead(const Path & path)
+{
+ auto [exit_code, output] = runProgram(RunOptions {
+ .program = "git",
+ .args = {"ls-remote", "--symref", path},
+ });
+ if (exit_code != 0) {
+ return std::nullopt;
+ }
+
+ std::string_view line = output;
+ line = line.substr(0, line.find("\n"));
+ if (const auto parseResult = git::parseLsRemoteLine(line)) {
+ switch (parseResult->kind) {
+ case git::LsRemoteRefLine::Kind::Symbolic:
+ debug("resolved HEAD ref '%s' for repo '%s'", parseResult->target, path);
+ break;
+ case git::LsRemoteRefLine::Kind::Object:
+ debug("resolved HEAD rev '%s' for repo '%s'", parseResult->target, path);
+ break;
+ }
+ return parseResult->target;
+ }
+ return std::nullopt;
+}
+
+// Persist the HEAD ref from the remote repo in the local cached repo.
+bool storeCachedHead(const std::string& actualUrl, const std::string& headRef)
+{
+ Path cacheDir = getCachePath(actualUrl);
+ auto gitDir = ".";
+ try {
+ runProgram("git", true, { "-C", cacheDir, "--git-dir", gitDir, "symbolic-ref", "--", "HEAD", headRef });
+ } catch (ExecError &e) {
+ if (!WIFEXITED(e.status)) throw;
+ return false;
+ }
+ /* No need to touch refs/HEAD, because `git symbolic-ref` updates the mtime. */
+ return true;
+}
+
+std::optional<std::string> readHeadCached(const std::string& actualUrl)
{
- return chomp(runProgram("git", true, { "-C", path, "rev-parse", "--abbrev-ref", "HEAD" }));
+ // Create a cache path to store the branch of the HEAD ref. Append something
+ // in front of the URL to prevent collision with the repository itself.
+ Path cacheDir = getCachePath(actualUrl);
+ Path headRefFile = cacheDir + "/HEAD";
+
+ time_t now = time(0);
+ struct stat st;
+ std::optional<std::string> cachedRef;
+ if (stat(headRefFile.c_str(), &st) == 0) {
+ cachedRef = readHead(cacheDir);
+ if (cachedRef != std::nullopt &&
+ *cachedRef != gitInitialBranch &&
+ isCacheFileWithinTtl(now, st)) {
+ debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl);
+ return cachedRef;
+ }
+ }
+
+ auto ref = readHead(actualUrl);
+ if (ref) {
+ return ref;
+ }
+
+ if (cachedRef) {
+ // If the cached git ref is expired in fetch() below, and the 'git fetch'
+ // fails, it falls back to continuing with the most recent version.
+ // This function must behave the same way, so we return the expired
+ // cached ref here.
+ warn("could not get HEAD ref for repository '%s'; using expired cached ref '%s'", actualUrl, *cachedRef);
+ return *cachedRef;
+ }
+
+ return std::nullopt;
+}
+
+bool isNotDotGitDirectory(const Path & path)
+{
+ return baseNameOf(path) != ".git";
+}
+
+struct WorkdirInfo
+{
+ bool clean = false;
+ bool hasHead = false;
+};
+
+// Returns whether a git workdir is clean and has commits.
+WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir)
+{
+ const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
+ std::string gitDir(".git");
+
+ auto env = getEnv();
+ // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong
+ // that way unknown errors can lead to a failure instead of continuing through the wrong code path
+ env["LC_ALL"] = "C";
+
+ /* Check whether HEAD points to something that looks like a commit,
+ since that is the refrence we want to use later on. */
+ auto result = runProgram(RunOptions {
+ .program = "git",
+ .args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" },
+ .environment = env,
+ .mergeStderrToStdout = true
+ });
+ auto exitCode = WEXITSTATUS(result.first);
+ auto errorMessage = result.second;
+
+ if (errorMessage.find("fatal: not a git repository") != std::string::npos) {
+ throw Error("'%s' is not a Git repository", workdir);
+ } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) {
+ // indicates that the repo does not have any commits
+ // we want to proceed and will consider it dirty later
+ } else if (exitCode != 0) {
+ // any other errors should lead to a failure
+ throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage);
+ }
+
+ bool clean = false;
+ bool hasHead = exitCode == 0;
+
+ try {
+ if (hasHead) {
+ // Using git diff is preferrable over lower-level operations here,
+ // because its conceptually simpler and we only need the exit code anyways.
+ auto gitDiffOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "diff", "HEAD", "--quiet"});
+ if (!submodules) {
+ // Changes in submodules should only make the tree dirty
+ // when those submodules will be copied as well.
+ gitDiffOpts.emplace_back("--ignore-submodules");
+ }
+ gitDiffOpts.emplace_back("--");
+ runProgram("git", true, gitDiffOpts);
+
+ clean = true;
+ }
+ } catch (ExecError & e) {
+ if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw;
+ }
+
+ return WorkdirInfo { .clean = clean, .hasHead = hasHead };
}
-static bool isNotDotGitDirectory(const Path & path)
+std::pair<StorePath, Input> fetchFromWorkdir(ref<Store> store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo)
{
- static const std::regex gitDirRegex("^(?:.*/)?\\.git$");
+ const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
+ auto gitDir = ".git";
+
+ if (!fetchSettings.allowDirty)
+ throw Error("Git tree '%s' is dirty", workdir);
+
+ if (fetchSettings.warnDirty)
+ warn("Git tree '%s' is dirty", workdir);
+
+ auto gitOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "ls-files", "-z" });
+ if (submodules)
+ gitOpts.emplace_back("--recurse-submodules");
+
+ auto files = tokenizeString<std::set<std::string>>(
+ runProgram("git", true, gitOpts), "\0"s);
+
+ Path actualPath(absPath(workdir));
+
+ PathFilter filter = [&](const Path & p) -> bool {
+ assert(hasPrefix(p, actualPath));
+ std::string file(p, actualPath.size() + 1);
+
+ auto st = lstat(p);
+
+ if (S_ISDIR(st.st_mode)) {
+ auto prefix = file + "/";
+ auto i = files.lower_bound(prefix);
+ return i != files.end() && hasPrefix(*i, prefix);
+ }
- return not std::regex_match(path, gitDirRegex);
+ return files.count(file);
+ };
+
+ auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter);
+
+ // FIXME: maybe we should use the timestamp of the last
+ // modified dirty file?
+ input.attrs.insert_or_assign(
+ "lastModified",
+ workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0);
+
+ return {std::move(storePath), input};
}
+} // end namespace
struct GitInputScheme : InputScheme
{
@@ -150,13 +367,14 @@ struct GitInputScheme : InputScheme
{
auto sourcePath = getSourcePath(input);
assert(sourcePath);
+ auto gitDir = ".git";
runProgram("git", true,
- { "-C", *sourcePath, "add", "--force", "--intent-to-add", "--", std::string(file) });
+ { "-C", *sourcePath, "--git-dir", gitDir, "add", "--intent-to-add", "--", std::string(file) });
if (commitMsg)
runProgram("git", true,
- { "-C", *sourcePath, "commit", std::string(file), "-m", *commitMsg });
+ { "-C", *sourcePath, "--git-dir", gitDir, "commit", std::string(file), "-m", *commitMsg });
}
std::pair<bool, std::string> getActualUrl(const Input & input) const
@@ -175,6 +393,7 @@ struct GitInputScheme : InputScheme
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override
{
Input input(_input);
+ auto gitDir = ".git";
std::string name = input.getName();
@@ -187,8 +406,16 @@ struct GitInputScheme : InputScheme
if (submodules) cacheType += "-submodules";
if (allRefs) cacheType += "-all-refs";
+ auto checkHashType = [&](const std::optional<Hash> & hash)
+ {
+ if (hash.has_value() && !(hash->type == htSHA1 || hash->type == htSHA256))
+ throw Error("Hash '%s' is not supported by Git. Supported types are sha1 and sha256.", hash->to_string(Base16, true));
+ };
+
auto getLockedAttrs = [&]()
{
+ checkHashType(input.getRev());
+
return Attrs({
{"type", cacheType},
{"name", name},
@@ -215,97 +442,54 @@ struct GitInputScheme : InputScheme
auto [isLocal, actualUrl_] = getActualUrl(input);
auto actualUrl = actualUrl_; // work around clang bug
- // If this is a local directory and no ref or revision is
- // given, then allow the use of an unclean working tree.
+ /* If this is a local directory and no ref or revision is given,
+ allow fetching directly from a dirty workdir. */
if (!input.getRef() && !input.getRev() && isLocal) {
- bool clean = false;
-
- /* Check whether this repo has any commits. There are
- probably better ways to do this. */
- auto gitDir = actualUrl + "/.git";
- auto commonGitDir = chomp(runProgram(
- "git",
- true,
- { "-C", actualUrl, "rev-parse", "--git-common-dir" }
- ));
- if (commonGitDir != ".git")
- gitDir = commonGitDir;
-
- bool haveCommits = !readDirectory(gitDir + "/refs/heads").empty();
-
- try {
- if (haveCommits) {
- runProgram("git", true, { "-C", actualUrl, "diff-index", "--quiet", "HEAD", "--" });
- clean = true;
- }
- } catch (ExecError & e) {
- if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw;
- }
-
- if (!clean) {
-
- /* This is an unclean working tree. So copy all tracked files. */
-
- if (!settings.allowDirty)
- throw Error("Git tree '%s' is dirty", actualUrl);
-
- if (settings.warnDirty)
- warn("Git tree '%s' is dirty", actualUrl);
-
- auto gitOpts = Strings({ "-C", actualUrl, "ls-files", "-z" });
- if (submodules)
- gitOpts.emplace_back("--recurse-submodules");
-
- auto files = tokenizeString<std::set<std::string>>(
- runProgram("git", true, gitOpts), "\0"s);
-
- PathFilter filter = [&](const Path & p) -> bool {
- assert(hasPrefix(p, actualUrl));
- std::string file(p, actualUrl.size() + 1);
-
- auto st = lstat(p);
-
- if (S_ISDIR(st.st_mode)) {
- auto prefix = file + "/";
- auto i = files.lower_bound(prefix);
- return i != files.end() && hasPrefix(*i, prefix);
- }
-
- return files.count(file);
- };
-
- auto storePath = store->addToStore(input.getName(), actualUrl, FileIngestionMethod::Recursive, htSHA256, filter);
-
- // FIXME: maybe we should use the timestamp of the last
- // modified dirty file?
- input.attrs.insert_or_assign(
- "lastModified",
- haveCommits ? std::stoull(runProgram("git", true, { "-C", actualUrl, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0);
-
- return {std::move(storePath), input};
+ auto workdirInfo = getWorkdirInfo(input, actualUrl);
+ if (!workdirInfo.clean) {
+ return fetchFromWorkdir(store, input, actualUrl, workdirInfo);
}
}
- if (!input.getRef()) input.attrs.insert_or_assign("ref", isLocal ? readHead(actualUrl) : "master");
-
Attrs unlockedAttrs({
{"type", cacheType},
{"name", name},
{"url", actualUrl},
- {"ref", *input.getRef()},
});
Path repoDir;
if (isLocal) {
+ if (!input.getRef()) {
+ auto head = readHead(actualUrl);
+ if (!head) {
+ warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl);
+ head = "master";
+ }
+ input.attrs.insert_or_assign("ref", *head);
+ unlockedAttrs.insert_or_assign("ref", *head);
+ }
if (!input.getRev())
input.attrs.insert_or_assign("rev",
- Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input.getRef() })), htSHA1).gitRev());
+ Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev());
repoDir = actualUrl;
-
} else {
+ const bool useHeadRef = !input.getRef();
+ if (useHeadRef) {
+ auto head = readHeadCached(actualUrl);
+ if (!head) {
+ warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl);
+ head = "master";
+ }
+ input.attrs.insert_or_assign("ref", *head);
+ unlockedAttrs.insert_or_assign("ref", *head);
+ } else {
+ if (!input.getRev()) {
+ unlockedAttrs.insert_or_assign("ref", input.getRef().value());
+ }
+ }
if (auto res = getCache()->lookup(store, unlockedAttrs)) {
auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1);
@@ -315,8 +499,9 @@ struct GitInputScheme : InputScheme
}
}
- Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false);
+ Path cacheDir = getCachePath(actualUrl);
repoDir = cacheDir;
+ gitDir = ".";
createDirs(dirOf(cacheDir));
PathLocks cacheDirLock({cacheDir + ".lock"});
@@ -337,7 +522,7 @@ struct GitInputScheme : InputScheme
repo. */
if (input.getRev()) {
try {
- runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input.getRev()->gitRev() });
+ runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-e", input.getRev()->gitRev() });
doFetch = false;
} catch (ExecError & e) {
if (WIFEXITED(e.status)) {
@@ -354,7 +539,7 @@ struct GitInputScheme : InputScheme
git fetch to update the local ref to the remote ref. */
struct stat st;
doFetch = stat(localRefFile.c_str(), &st) != 0 ||
- (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now;
+ !isCacheFileWithinTtl(now, st);
}
}
@@ -372,19 +557,16 @@ struct GitInputScheme : InputScheme
: ref == "HEAD"
? *ref
: "refs/heads/" + *ref;
- runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) });
+ runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) });
} catch (Error & e) {
if (!pathExists(localRefFile)) throw;
warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl);
}
- struct timeval times[2];
- times[0].tv_sec = now;
- times[0].tv_usec = 0;
- times[1].tv_sec = now;
- times[1].tv_usec = 0;
-
- utimes(localRefFile.c_str(), times);
+ if (!touchCacheFile(localRefFile, now))
+ warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno));
+ if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef()))
+ warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl);
}
if (!input.getRev())
@@ -393,10 +575,10 @@ struct GitInputScheme : InputScheme
// cache dir lock is removed at scope end; we will only use read-only operations on specific revisions in the remainder
}
- bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true";
+ bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-parse", "--is-shallow-repository" })) == "true";
if (isShallow && !shallow)
- throw Error("'%s' is a shallow Git repository, but a non-shallow repository is needed", actualUrl);
+ throw Error("'%s' is a shallow Git repository, but shallow repositories are only allowed when `shallow = true;` is specified.", actualUrl);
// FIXME: check whether rev is an ancestor of ref.
@@ -413,7 +595,7 @@ struct GitInputScheme : InputScheme
auto result = runProgram(RunOptions {
.program = "git",
- .args = { "-C", repoDir, "cat-file", "commit", input.getRev()->gitRev() },
+ .args = { "-C", repoDir, "--git-dir", gitDir, "cat-file", "commit", input.getRev()->gitRev() },
.mergeStderrToStdout = true
});
if (WEXITSTATUS(result.first) == 128
@@ -452,7 +634,7 @@ struct GitInputScheme : InputScheme
auto source = sinkToSource([&](Sink & sink) {
runProgram2({
.program = "git",
- .args = { "-C", repoDir, "archive", input.getRev()->gitRev() },
+ .args = { "-C", repoDir, "--git-dir", gitDir, "archive", input.getRev()->gitRev() },
.standardOut = &sink
});
});
@@ -462,7 +644,7 @@ struct GitInputScheme : InputScheme
auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter);
- auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() }));
+ auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() }));
Attrs infoAttrs({
{"rev", input.getRev()->gitRev()},
@@ -471,7 +653,7 @@ struct GitInputScheme : InputScheme
if (!shallow)
infoAttrs.insert_or_assign("revCount",
- std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input.getRev()->gitRev() })));
+ std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--count", input.getRev()->gitRev() })));
if (!_input.getRev())
getCache()->add(
diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc
index 70622bf79..2115ce2f5 100644
--- a/src/libfetchers/github.cc
+++ b/src/libfetchers/github.cc
@@ -1,10 +1,12 @@
#include "filetransfer.hh"
#include "cache.hh"
-#include "fetchers.hh"
#include "globals.hh"
#include "store-api.hh"
#include "types.hh"
#include "url-parts.hh"
+#include "git.hh"
+#include "fetchers.hh"
+#include "fetch-settings.hh"
#include <optional>
#include <nlohmann/json.hpp>
@@ -157,7 +159,7 @@ struct GitArchiveInputScheme : InputScheme
std::optional<std::string> getAccessToken(const std::string & host) const
{
- auto tokens = settings.accessTokens.get();
+ auto tokens = fetchSettings.accessTokens.get();
if (auto token = get(tokens, host))
return *token;
return {};
@@ -241,7 +243,10 @@ struct GitHubInputScheme : GitArchiveInputScheme
Hash getRevFromRef(nix::ref<Store> store, const Input & input) const override
{
auto host = maybeGetStrAttr(input.attrs, "host").value_or("github.com");
- auto url = fmt("https://api.%s/repos/%s/%s/commits/%s", // FIXME: check
+ auto url = fmt(
+ host == "github.com"
+ ? "https://api.%s/repos/%s/%s/commits/%s"
+ : "https://%s/api/v3/repos/%s/%s/commits/%s",
host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"), *input.getRef());
Headers headers = makeHeadersWithAuthTokens(host);
@@ -257,14 +262,20 @@ struct GitHubInputScheme : GitArchiveInputScheme
DownloadUrl getDownloadUrl(const Input & input) const override
{
- // FIXME: use regular /archive URLs instead? api.github.com
- // might have stricter rate limits.
auto host = maybeGetStrAttr(input.attrs, "host").value_or("github.com");
- auto url = fmt("https://api.%s/repos/%s/%s/tarball/%s", // FIXME: check if this is correct for self hosted instances
- host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"),
+ Headers headers = makeHeadersWithAuthTokens(host);
+ // If we have no auth headers then we default to the public archive
+ // urls so we do not run into rate limits.
+ const auto urlFmt =
+ host != "github.com"
+ ? "https://%s/api/v3/repos/%s/%s/tarball/%s"
+ : headers.empty()
+ ? "https://%s/%s/%s/archive/%s.tar.gz"
+ : "https://api.%s/repos/%s/%s/tarball/%s";
+
+ const auto url = fmt(urlFmt, host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"),
input.getRev()->to_string(Base16, false));
- Headers headers = makeHeadersWithAuthTokens(host);
return DownloadUrl { url, headers };
}
@@ -373,7 +384,7 @@ struct SourceHutInputScheme : GitArchiveInputScheme
Headers headers = makeHeadersWithAuthTokens(host);
- std::string ref_uri;
+ std::string refUri;
if (ref == "HEAD") {
auto file = store->toRealPath(
downloadFile(store, fmt("%s/HEAD", base_url), "source", false, headers).storePath);
@@ -381,33 +392,32 @@ struct SourceHutInputScheme : GitArchiveInputScheme
std::string line;
getline(is, line);
- auto ref_index = line.find("ref: ");
- if (ref_index == std::string::npos) {
+ auto remoteLine = git::parseLsRemoteLine(line);
+ if (!remoteLine) {
throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref);
}
-
- ref_uri = line.substr(ref_index+5, line.length()-1);
- } else
- ref_uri = fmt("refs/heads/%s", ref);
+ refUri = remoteLine->target;
+ } else {
+ refUri = fmt("refs/(heads|tags)/%s", ref);
+ }
+ std::regex refRegex(refUri);
auto file = store->toRealPath(
downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath);
std::ifstream is(file);
std::string line;
- std::string id;
- while(getline(is, line)) {
- auto index = line.find(ref_uri);
- if (index != std::string::npos) {
- id = line.substr(0, index-1);
- break;
- }
+ std::optional<std::string> id;
+ while(!id && getline(is, line)) {
+ auto parsedLine = git::parseLsRemoteLine(line);
+ if (parsedLine && parsedLine->reference && std::regex_match(*parsedLine->reference, refRegex))
+ id = parsedLine->target;
}
- if(id.empty())
+ if(!id)
throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref);
- auto rev = Hash::parseAny(id, htSHA1);
+ auto rev = Hash::parseAny(*id, htSHA1);
debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev());
return rev;
}
diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc
index 12cdecbc1..5c5671681 100644
--- a/src/libfetchers/mercurial.cc
+++ b/src/libfetchers/mercurial.cc
@@ -5,6 +5,8 @@
#include "store-api.hh"
#include "url-parts.hh"
+#include "fetch-settings.hh"
+
#include <sys/time.h>
using namespace std::string_literals;
@@ -34,7 +36,7 @@ static std::string runHg(const Strings & args, const std::optional<std::string>
auto res = runProgram(std::move(opts));
if (!statusOk(res.first))
- throw ExecError(res.first, fmt("hg %1%", statusToString(res.first)));
+ throw ExecError(res.first, "hg %1%", statusToString(res.first));
return res.second;
}
@@ -165,10 +167,10 @@ struct MercurialInputScheme : InputScheme
/* This is an unclean working tree. So copy all tracked
files. */
- if (!settings.allowDirty)
+ if (!fetchSettings.allowDirty)
throw Error("Mercurial tree '%s' is unclean", actualUrl);
- if (settings.warnDirty)
+ if (fetchSettings.warnDirty)
warn("Mercurial tree '%s' is unclean", actualUrl);
input.attrs.insert_or_assign("ref", chomp(runHg({ "branch", "-R", actualUrl })));
@@ -176,9 +178,11 @@ struct MercurialInputScheme : InputScheme
auto files = tokenizeString<std::set<std::string>>(
runHg({ "status", "-R", actualUrl, "--clean", "--modified", "--added", "--no-status", "--print0" }), "\0"s);
+ Path actualPath(absPath(actualUrl));
+
PathFilter filter = [&](const Path & p) -> bool {
- assert(hasPrefix(p, actualUrl));
- std::string file(p, actualUrl.size() + 1);
+ assert(hasPrefix(p, actualPath));
+ std::string file(p, actualPath.size() + 1);
auto st = lstat(p);
@@ -191,7 +195,7 @@ struct MercurialInputScheme : InputScheme
return files.count(file);
};
- auto storePath = store->addToStore(input.getName(), actualUrl, FileIngestionMethod::Recursive, htSHA256, filter);
+ auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter);
return {std::move(storePath), input};
}
@@ -199,8 +203,17 @@ struct MercurialInputScheme : InputScheme
if (!input.getRef()) input.attrs.insert_or_assign("ref", "default");
+ auto checkHashType = [&](const std::optional<Hash> & hash)
+ {
+ if (hash.has_value() && hash->type != htSHA1)
+ throw Error("Hash '%s' is not supported by Mercurial. Only sha1 is supported.", hash->to_string(Base16, true));
+ };
+
+
auto getLockedAttrs = [&]()
{
+ checkHashType(input.getRev());
+
return Attrs({
{"type", "hg"},
{"name", name},
@@ -260,7 +273,7 @@ struct MercurialInputScheme : InputScheme
runHg({ "recover", "-R", cacheDir });
runHg({ "pull", "-R", cacheDir, "--", actualUrl });
} else {
- throw ExecError(e.status, fmt("'hg pull' %s", statusToString(e.status)));
+ throw ExecError(e.status, "'hg pull' %s", statusToString(e.status));
}
}
} else {
diff --git a/src/libfetchers/path.cc b/src/libfetchers/path.cc
index 59e228e97..f0ef97da5 100644
--- a/src/libfetchers/path.cc
+++ b/src/libfetchers/path.cc
@@ -1,5 +1,6 @@
#include "fetchers.hh"
#include "store-api.hh"
+#include "archive.hh"
namespace nix::fetchers {
@@ -80,8 +81,9 @@ struct PathInputScheme : InputScheme
// nothing to do
}
- std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
+ std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override
{
+ Input input(_input);
std::string absPath;
auto path = getStrAttr(input.attrs, "path");
@@ -111,9 +113,15 @@ struct PathInputScheme : InputScheme
if (storePath)
store->addTempRoot(*storePath);
- if (!storePath || storePath->name() != "source" || !store->isValidPath(*storePath))
+ time_t mtime = 0;
+ if (!storePath || storePath->name() != "source" || !store->isValidPath(*storePath)) {
// FIXME: try to substitute storePath.
- storePath = store->addToStore("source", absPath);
+ auto src = sinkToSource([&](Sink & sink) {
+ mtime = dumpPathAndGetMtime(absPath, sink, defaultPathFilter);
+ });
+ storePath = store->addToStoreFromDump(*src, "source");
+ }
+ input.attrs.insert_or_assign("lastModified", uint64_t(mtime));
return {std::move(*storePath), input};
}
diff --git a/src/libfetchers/registry.cc b/src/libfetchers/registry.cc
index f35359d4b..acd1ff866 100644
--- a/src/libfetchers/registry.cc
+++ b/src/libfetchers/registry.cc
@@ -5,6 +5,8 @@
#include "store-api.hh"
#include "local-fs-store.hh"
+#include "fetch-settings.hh"
+
#include <nlohmann/json.hpp>
namespace nix::fetchers {
@@ -150,7 +152,7 @@ void overrideRegistry(
static std::shared_ptr<Registry> getGlobalRegistry(ref<Store> store)
{
static auto reg = [&]() {
- auto path = settings.flakeRegistry.get();
+ auto path = fetchSettings.flakeRegistry.get();
if (!hasPrefix(path, "/")) {
auto storePath = downloadFile(store, path, "flake-registry.json", false).storePath;
diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc
index dde0ad761..6c551bd93 100644
--- a/src/libfetchers/tarball.cc
+++ b/src/libfetchers/tarball.cc
@@ -6,6 +6,7 @@
#include "archive.hh"
#include "tarfile.hh"
#include "types.hh"
+#include "split.hh"
namespace nix::fetchers {
@@ -168,24 +169,34 @@ std::pair<Tree, time_t> downloadTarball(
};
}
-struct TarballInputScheme : InputScheme
+// An input scheme corresponding to a curl-downloadable resource.
+struct CurlInputScheme : InputScheme
{
- std::optional<Input> inputFromURL(const ParsedURL & url) override
+ virtual const std::string inputType() const = 0;
+ const std::set<std::string> transportUrlSchemes = {"file", "http", "https"};
+
+ const bool hasTarballExtension(std::string_view path) const
{
- if (url.scheme != "file" && url.scheme != "http" && url.scheme != "https") return {};
+ return hasSuffix(path, ".zip") || hasSuffix(path, ".tar")
+ || hasSuffix(path, ".tgz") || hasSuffix(path, ".tar.gz")
+ || hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2")
+ || hasSuffix(path, ".tar.zst");
+ }
- if (!hasSuffix(url.path, ".zip")
- && !hasSuffix(url.path, ".tar")
- && !hasSuffix(url.path, ".tgz")
- && !hasSuffix(url.path, ".tar.gz")
- && !hasSuffix(url.path, ".tar.xz")
- && !hasSuffix(url.path, ".tar.bz2")
- && !hasSuffix(url.path, ".tar.zst"))
- return {};
+ virtual bool isValidURL(const ParsedURL & url) const = 0;
+
+ std::optional<Input> inputFromURL(const ParsedURL & url) override
+ {
+ if (!isValidURL(url))
+ return std::nullopt;
Input input;
- input.attrs.insert_or_assign("type", "tarball");
- input.attrs.insert_or_assign("url", url.to_string());
+
+ auto urlWithoutApplicationScheme = url;
+ urlWithoutApplicationScheme.scheme = parseUrlScheme(url.scheme).transport;
+
+ input.attrs.insert_or_assign("type", inputType());
+ input.attrs.insert_or_assign("url", urlWithoutApplicationScheme.to_string());
auto narHash = url.query.find("narHash");
if (narHash != url.query.end())
input.attrs.insert_or_assign("narHash", narHash->second);
@@ -194,14 +205,17 @@ struct TarballInputScheme : InputScheme
std::optional<Input> inputFromAttrs(const Attrs & attrs) override
{
- if (maybeGetStrAttr(attrs, "type") != "tarball") return {};
+ auto type = maybeGetStrAttr(attrs, "type");
+ if (type != inputType()) return {};
+ std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack"};
for (auto & [name, value] : attrs)
- if (name != "type" && name != "url" && /* name != "hash" && */ name != "narHash" && name != "name")
- throw Error("unsupported tarball input attribute '%s'", name);
+ if (!allowedNames.count(name))
+ throw Error("unsupported %s input attribute '%s'", *type, name);
Input input;
input.attrs = attrs;
+
//input.locked = (bool) maybeGetStrAttr(input.attrs, "hash");
return input;
}
@@ -209,14 +223,9 @@ struct TarballInputScheme : InputScheme
ParsedURL toURL(const Input & input) override
{
auto url = parseURL(getStrAttr(input.attrs, "url"));
- // NAR hashes are preferred over file hashes since tar/zip files
- // don't have a canonical representation.
+ // NAR hashes are preferred over file hashes since tar/zip files // don't have a canonical representation.
if (auto narHash = input.getNarHash())
url.query.insert_or_assign("narHash", narHash->to_string(SRI, true));
- /*
- else if (auto hash = maybeGetStrAttr(input.attrs, "hash"))
- url.query.insert_or_assign("hash", Hash(*hash).to_string(SRI, true));
- */
return url;
}
@@ -225,6 +234,42 @@ struct TarballInputScheme : InputScheme
return true;
}
+};
+
+struct FileInputScheme : CurlInputScheme
+{
+ const std::string inputType() const override { return "file"; }
+
+ bool isValidURL(const ParsedURL & url) const override
+ {
+ auto parsedUrlScheme = parseUrlScheme(url.scheme);
+ return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
+ && (parsedUrlScheme.application
+ ? parsedUrlScheme.application.value() == inputType()
+ : !hasTarballExtension(url.path));
+ }
+
+ std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
+ {
+ auto file = downloadFile(store, getStrAttr(input.attrs, "url"), input.getName(), false);
+ return {std::move(file.storePath), input};
+ }
+};
+
+struct TarballInputScheme : CurlInputScheme
+{
+ const std::string inputType() const override { return "tarball"; }
+
+ bool isValidURL(const ParsedURL & url) const override
+ {
+ auto parsedUrlScheme = parseUrlScheme(url.scheme);
+
+ return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
+ && (parsedUrlScheme.application
+ ? parsedUrlScheme.application.value() == inputType()
+ : hasTarballExtension(url.path));
+ }
+
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
{
auto tree = downloadTarball(store, getStrAttr(input.attrs, "url"), input.getName(), false).first;
@@ -233,5 +278,6 @@ struct TarballInputScheme : InputScheme
};
static auto rTarballInputScheme = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); });
+static auto rFileInputScheme = OnStartup([] { registerInputScheme(std::make_unique<FileInputScheme>()); });
}