aboutsummaryrefslogtreecommitdiff
path: root/src/libfetchers
diff options
context:
space:
mode:
Diffstat (limited to 'src/libfetchers')
-rw-r--r--src/libfetchers/git-utils.cc27
-rw-r--r--src/libfetchers/git-utils.hh23
-rw-r--r--src/libfetchers/git.cc371
-rw-r--r--src/libfetchers/github.cc28
4 files changed, 312 insertions, 137 deletions
diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc
new file mode 100644
index 000000000..b2d6b7893
--- /dev/null
+++ b/src/libfetchers/git-utils.cc
@@ -0,0 +1,27 @@
+#include "git-utils.hh"
+
+#include <regex>
+
+std::optional<std::string> parseListReferenceHeadRef(std::string_view line)
+{
+ const static std::regex head_ref_regex("^ref: ([^\\s]+)\\t+HEAD$");
+ std::match_results<std::string_view::const_iterator> match;
+ if (std::regex_match(line.cbegin(), line.cend(), match, head_ref_regex)) {
+ return match[1];
+ } else {
+ return std::nullopt;
+ }
+}
+
+std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line)
+{
+ const static std::regex rev_regex("^([^\\t]+)\\t+(.*)$");
+ std::match_results<std::string_view::const_iterator> match;
+ if (!std::regex_match(line.cbegin(), line.cend(), match, rev_regex)) {
+ return std::nullopt;
+ }
+ if (rev != match[2].str()) {
+ return std::nullopt;
+ }
+ return match[1];
+}
diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh
new file mode 100644
index 000000000..946a68a9e
--- /dev/null
+++ b/src/libfetchers/git-utils.hh
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <string>
+#include <string_view>
+#include <optional>
+
+// Parses the HEAD ref as reported by `git ls-remote --symref`
+//
+// Returns the head branch name as reported by `git ls-remote --symref`, e.g., if
+// ls-remote returns the output below, "main" is returned based on the ref line.
+//
+// ref: refs/heads/main HEAD
+//
+// If the repository is in 'detached head' state (HEAD is pointing to a rev
+// instead of a branch), parseListReferenceForRev("HEAD") may be used instead.
+std::optional<std::string> parseListReferenceHeadRef(std::string_view line);
+
+// Parses a reference line from `git ls-remote --symref`, e.g.,
+// parseListReferenceForRev("refs/heads/master", line) will return 6926...
+// given the line below.
+//
+// 6926beab444c33fb57b21819b6642d032016bb1e refs/heads/master
+std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line);
diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc
index af40990e5..266246fe9 100644
--- a/src/libfetchers/git.cc
+++ b/src/libfetchers/git.cc
@@ -5,15 +5,20 @@
#include "store-api.hh"
#include "url-parts.hh"
#include "pathlocks.hh"
+#include "util.hh"
+#include "git-utils.hh"
#include "fetch-settings.hh"
+#include <regex>
+#include <string.h>
#include <sys/time.h>
#include <sys/wait.h>
using namespace std::string_literals;
namespace nix::fetchers {
+namespace {
// Explicit initial branch of our bare repo to suppress warnings from new version of git.
// The value itself does not matter, since we always fetch a specific revision or branch.
@@ -21,25 +26,228 @@ namespace nix::fetchers {
// old version of git, which will ignore unrecognized `-c` options.
const std::string gitInitialBranch = "__nix_dummy_branch";
-static std::string getGitDir()
+std::string getGitDir()
{
- auto gitDir = getEnv("GIT_DIR");
- if (!gitDir) {
- return ".git";
+ return getEnv("GIT_DIR").value_or(".git");
+}
+
+bool isCacheFileWithinTtl(const time_t now, const struct stat & st)
+{
+ return st.st_mtime + settings.tarballTtl > now;
+}
+
+bool touchCacheFile(const Path& path, const time_t& touch_time)
+{
+ struct timeval times[2];
+ times[0].tv_sec = touch_time;
+ times[0].tv_usec = 0;
+ times[1].tv_sec = touch_time;
+ times[1].tv_usec = 0;
+
+ return lutimes(path.c_str(), times) == 0;
+}
+
+Path getCachePath(std::string key)
+{
+ return getCacheDir() + "/nix/gitv3/" +
+ hashString(htSHA256, key).to_string(Base32, false);
+}
+
+// Returns the name of the HEAD branch.
+//
+// Returns the head branch name as reported by git ls-remote --symref, e.g., if
+// ls-remote returns the output below, "main" is returned based on the ref line.
+//
+// ref: refs/heads/main HEAD
+// ...
+std::optional<std::string> readHead(const Path & path)
+{
+ auto [exit_code, output] = runProgram(RunOptions {
+ .program = "git",
+ .args = {"ls-remote", "--symref", path},
+ });
+ if (exit_code != 0) {
+ return std::nullopt;
+ }
+
+ std::string_view line = output;
+ line = line.substr(0, line.find("\n"));
+ if (const auto ref = parseListReferenceHeadRef(line); ref) {
+ debug("resolved HEAD ref '%s' for repo '%s'", *ref, path);
+ return *ref;
+ }
+ if (const auto rev = parseListReferenceForRev("HEAD", line); rev) {
+ debug("resolved HEAD rev '%s' for repo '%s'", *rev, path);
+ return *rev;
}
- return *gitDir;
+ return std::nullopt;
}
-static std::string readHead(const Path & path)
+// Persist the HEAD ref from the remote repo in the local cached repo.
+bool storeCachedHead(const std::string& actualUrl, const std::string& headRef)
{
- return chomp(runProgram("git", true, { "-C", path, "--git-dir", ".git", "rev-parse", "--abbrev-ref", "HEAD" }));
+ Path cacheDir = getCachePath(actualUrl);
+ try {
+ runProgram("git", true, { "-C", cacheDir, "symbolic-ref", "--", "HEAD", headRef });
+ } catch (ExecError &e) {
+ if (!WIFEXITED(e.status)) throw;
+ return false;
+ }
+ /* No need to touch refs/HEAD, because `git symbolic-ref` updates the mtime. */
+ return true;
}
-static bool isNotDotGitDirectory(const Path & path)
+std::optional<std::string> readHeadCached(const std::string& actualUrl)
+{
+ // Create a cache path to store the branch of the HEAD ref. Append something
+ // in front of the URL to prevent collision with the repository itself.
+ Path cacheDir = getCachePath(actualUrl);
+ Path headRefFile = cacheDir + "/HEAD";
+
+ time_t now = time(0);
+ struct stat st;
+ std::optional<std::string> cachedRef;
+ if (stat(headRefFile.c_str(), &st) == 0) {
+ cachedRef = readHead(cacheDir);
+ if (cachedRef != std::nullopt &&
+ *cachedRef != gitInitialBranch &&
+ isCacheFileWithinTtl(now, st)) {
+ debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl);
+ return cachedRef;
+ }
+ }
+
+ auto ref = readHead(actualUrl);
+ if (ref) {
+ return ref;
+ }
+
+ if (cachedRef) {
+ // If the cached git ref is expired in fetch() below, and the 'git fetch'
+ // fails, it falls back to continuing with the most recent version.
+ // This function must behave the same way, so we return the expired
+ // cached ref here.
+ warn("could not get HEAD ref for repository '%s'; using expired cached ref '%s'", actualUrl, *cachedRef);
+ return *cachedRef;
+ }
+
+ return std::nullopt;
+}
+
+bool isNotDotGitDirectory(const Path & path)
{
return baseNameOf(path) != ".git";
}
+struct WorkdirInfo
+{
+ bool clean = false;
+ bool hasHead = false;
+};
+
+// Returns whether a git workdir is clean and has commits.
+WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir)
+{
+ const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
+ auto gitDir = getGitDir();
+
+ auto env = getEnv();
+ // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong
+ // that way unknown errors can lead to a failure instead of continuing through the wrong code path
+ env["LC_ALL"] = "C";
+
+ /* Check whether HEAD points to something that looks like a commit,
+ since that is the refrence we want to use later on. */
+ auto result = runProgram(RunOptions {
+ .program = "git",
+ .args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" },
+ .environment = env,
+ .mergeStderrToStdout = true
+ });
+ auto exitCode = WEXITSTATUS(result.first);
+ auto errorMessage = result.second;
+
+ if (errorMessage.find("fatal: not a git repository") != std::string::npos) {
+ throw Error("'%s' is not a Git repository", workdir);
+ } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) {
+ // indicates that the repo does not have any commits
+ // we want to proceed and will consider it dirty later
+ } else if (exitCode != 0) {
+ // any other errors should lead to a failure
+ throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage);
+ }
+
+ bool clean = false;
+ bool hasHead = exitCode == 0;
+
+ try {
+ if (hasHead) {
+ // Using git diff is preferrable over lower-level operations here,
+ // because its conceptually simpler and we only need the exit code anyways.
+ auto gitDiffOpts = Strings({ "-C", workdir, "diff", "HEAD", "--quiet"});
+ if (!submodules) {
+ // Changes in submodules should only make the tree dirty
+ // when those submodules will be copied as well.
+ gitDiffOpts.emplace_back("--ignore-submodules");
+ }
+ gitDiffOpts.emplace_back("--");
+ runProgram("git", true, gitDiffOpts);
+
+ clean = true;
+ }
+ } catch (ExecError & e) {
+ if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw;
+ }
+
+ return WorkdirInfo { .clean = clean, .hasHead = hasHead };
+}
+
+std::pair<StorePath, Input> fetchFromWorkdir(ref<Store> store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo)
+{
+ const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
+
+ if (!fetchSettings.allowDirty)
+ throw Error("Git tree '%s' is dirty", workdir);
+
+ if (fetchSettings.warnDirty)
+ warn("Git tree '%s' is dirty", workdir);
+
+ auto gitOpts = Strings({ "-C", workdir, "ls-files", "-z" });
+ if (submodules)
+ gitOpts.emplace_back("--recurse-submodules");
+
+ auto files = tokenizeString<std::set<std::string>>(
+ runProgram("git", true, gitOpts), "\0"s);
+
+ Path actualPath(absPath(workdir));
+
+ PathFilter filter = [&](const Path & p) -> bool {
+ assert(hasPrefix(p, actualPath));
+ std::string file(p, actualPath.size() + 1);
+
+ auto st = lstat(p);
+
+ if (S_ISDIR(st.st_mode)) {
+ auto prefix = file + "/";
+ auto i = files.lower_bound(prefix);
+ return i != files.end() && hasPrefix(*i, prefix);
+ }
+
+ return files.count(file);
+ };
+
+ auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter);
+
+ // FIXME: maybe we should use the timestamp of the last
+ // modified dirty file?
+ input.attrs.insert_or_assign(
+ "lastModified",
+ workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0);
+
+ return {std::move(storePath), input};
+}
+} // end namespace
+
struct GitInputScheme : InputScheme
{
std::optional<Input> inputFromURL(const ParsedURL & url) override
@@ -234,106 +442,16 @@ struct GitInputScheme : InputScheme
auto [isLocal, actualUrl_] = getActualUrl(input);
auto actualUrl = actualUrl_; // work around clang bug
- // If this is a local directory and no ref or revision is
- // given, then allow the use of an unclean working tree.
+ /* If this is a local directory and no ref or revision is given,
+ allow fetching directly from a dirty workdir. */
if (!input.getRef() && !input.getRev() && isLocal) {
- bool clean = false;
-
- auto env = getEnv();
- // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong
- // that way unknown errors can lead to a failure instead of continuing through the wrong code path
- env["LC_ALL"] = "C";
-
- /* Check whether HEAD points to something that looks like a commit,
- since that is the refrence we want to use later on. */
- auto result = runProgram(RunOptions {
- .program = "git",
- .args = { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" },
- .environment = env,
- .mergeStderrToStdout = true
- });
- auto exitCode = WEXITSTATUS(result.first);
- auto errorMessage = result.second;
-
- if (errorMessage.find("fatal: not a git repository") != std::string::npos) {
- throw Error("'%s' is not a Git repository", actualUrl);
- } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) {
- // indicates that the repo does not have any commits
- // we want to proceed and will consider it dirty later
- } else if (exitCode != 0) {
- // any other errors should lead to a failure
- throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", actualUrl, exitCode, errorMessage);
- }
-
- bool hasHead = exitCode == 0;
- try {
- if (hasHead) {
- // Using git diff is preferrable over lower-level operations here,
- // because its conceptually simpler and we only need the exit code anyways.
- auto gitDiffOpts = Strings({ "-C", actualUrl, "--git-dir", gitDir, "diff", "HEAD", "--quiet"});
- if (!submodules) {
- // Changes in submodules should only make the tree dirty
- // when those submodules will be copied as well.
- gitDiffOpts.emplace_back("--ignore-submodules");
- }
- gitDiffOpts.emplace_back("--");
- runProgram("git", true, gitDiffOpts);
-
- clean = true;
- }
- } catch (ExecError & e) {
- if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw;
- }
-
- if (!clean) {
-
- /* This is an unclean working tree. So copy all tracked files. */
-
- if (!fetchSettings.allowDirty)
- throw Error("Git tree '%s' is dirty", actualUrl);
-
- if (fetchSettings.warnDirty)
- warn("Git tree '%s' is dirty", actualUrl);
-
- auto gitOpts = Strings({ "-C", actualUrl, "--git-dir", gitDir, "ls-files", "-z" });
- if (submodules)
- gitOpts.emplace_back("--recurse-submodules");
-
- auto files = tokenizeString<std::set<std::string>>(
- runProgram("git", true, gitOpts), "\0"s);
-
- Path actualPath(absPath(actualUrl));
-
- PathFilter filter = [&](const Path & p) -> bool {
- assert(hasPrefix(p, actualPath));
- std::string file(p, actualPath.size() + 1);
-
- auto st = lstat(p);
-
- if (S_ISDIR(st.st_mode)) {
- auto prefix = file + "/";
- auto i = files.lower_bound(prefix);
- return i != files.end() && hasPrefix(*i, prefix);
- }
-
- return files.count(file);
- };
-
- auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter);
-
- // FIXME: maybe we should use the timestamp of the last
- // modified dirty file?
- input.attrs.insert_or_assign(
- "lastModified",
- hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0);
-
- return {std::move(storePath), input};
+ auto workdirInfo = getWorkdirInfo(input, actualUrl);
+ if (!workdirInfo.clean) {
+ return fetchFromWorkdir(store, input, actualUrl, workdirInfo);
}
}
- if (!input.getRef()) input.attrs.insert_or_assign("ref", isLocal ? readHead(actualUrl) : "master");
-
- Attrs unlockedAttrs({
+ const Attrs unlockedAttrs({
{"type", cacheType},
{"name", name},
{"url", actualUrl},
@@ -343,14 +461,30 @@ struct GitInputScheme : InputScheme
Path repoDir;
if (isLocal) {
+ if (!input.getRef()) {
+ auto head = readHead(actualUrl);
+ if (!head) {
+ warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl);
+ head = "master";
+ }
+ input.attrs.insert_or_assign("ref", *head);
+ }
if (!input.getRev())
input.attrs.insert_or_assign("rev",
Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev());
repoDir = actualUrl;
-
} else {
+ const bool useHeadRef = !input.getRef();
+ if (useHeadRef) {
+ auto head = readHeadCached(actualUrl);
+ if (!head) {
+ warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl);
+ head = "master";
+ }
+ input.attrs.insert_or_assign("ref", *head);
+ }
if (auto res = getCache()->lookup(store, unlockedAttrs)) {
auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1);
@@ -360,7 +494,7 @@ struct GitInputScheme : InputScheme
}
}
- Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false);
+ Path cacheDir = getCachePath(actualUrl);
repoDir = cacheDir;
gitDir = ".";
@@ -383,7 +517,7 @@ struct GitInputScheme : InputScheme
repo. */
if (input.getRev()) {
try {
- runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input.getRev()->gitRev() });
+ runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-e", input.getRev()->gitRev() });
doFetch = false;
} catch (ExecError & e) {
if (WIFEXITED(e.status)) {
@@ -400,7 +534,7 @@ struct GitInputScheme : InputScheme
git fetch to update the local ref to the remote ref. */
struct stat st;
doFetch = stat(localRefFile.c_str(), &st) != 0 ||
- (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now;
+ !isCacheFileWithinTtl(now, st);
}
}
@@ -418,19 +552,16 @@ struct GitInputScheme : InputScheme
: ref == "HEAD"
? *ref
: "refs/heads/" + *ref;
- runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) });
+ runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) });
} catch (Error & e) {
if (!pathExists(localRefFile)) throw;
warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl);
}
- struct timeval times[2];
- times[0].tv_sec = now;
- times[0].tv_usec = 0;
- times[1].tv_sec = now;
- times[1].tv_usec = 0;
-
- utimes(localRefFile.c_str(), times);
+ if (!touchCacheFile(localRefFile, now))
+ warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno));
+ if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef()))
+ warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl);
}
if (!input.getRev())
@@ -459,7 +590,7 @@ struct GitInputScheme : InputScheme
auto result = runProgram(RunOptions {
.program = "git",
- .args = { "-C", repoDir, "cat-file", "commit", input.getRev()->gitRev() },
+ .args = { "-C", repoDir, "--git-dir", gitDir, "cat-file", "commit", input.getRev()->gitRev() },
.mergeStderrToStdout = true
});
if (WEXITSTATUS(result.first) == 128
@@ -498,7 +629,7 @@ struct GitInputScheme : InputScheme
auto source = sinkToSource([&](Sink & sink) {
runProgram2({
.program = "git",
- .args = { "-C", repoDir, "archive", input.getRev()->gitRev() },
+ .args = { "-C", repoDir, "--git-dir", gitDir, "archive", input.getRev()->gitRev() },
.standardOut = &sink
});
});
@@ -508,7 +639,7 @@ struct GitInputScheme : InputScheme
auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter);
- auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() }));
+ auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() }));
Attrs infoAttrs({
{"rev", input.getRev()->gitRev()},
@@ -517,7 +648,7 @@ struct GitInputScheme : InputScheme
if (!shallow)
infoAttrs.insert_or_assign("revCount",
- std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input.getRev()->gitRev() })));
+ std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--count", input.getRev()->gitRev() })));
if (!_input.getRev())
getCache()->add(
diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc
index 58b6e7c04..1bdf2759f 100644
--- a/src/libfetchers/github.cc
+++ b/src/libfetchers/github.cc
@@ -4,7 +4,7 @@
#include "store-api.hh"
#include "types.hh"
#include "url-parts.hh"
-
+#include "git-utils.hh"
#include "fetchers.hh"
#include "fetch-settings.hh"
@@ -383,35 +383,29 @@ struct SourceHutInputScheme : GitArchiveInputScheme
std::string line;
getline(is, line);
- auto ref_index = line.find("ref: ");
- if (ref_index == std::string::npos) {
+ auto r = parseListReferenceHeadRef(line);
+ if (!r) {
throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref);
}
-
- ref_uri = line.substr(ref_index+5, line.length()-1);
- } else
+ ref_uri = *r;
+ } else {
ref_uri = fmt("refs/(heads|tags)/%s", ref);
+ }
auto file = store->toRealPath(
downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath);
std::ifstream is(file);
std::string line;
- std::string id;
- while(getline(is, line)) {
- // Append $ to avoid partial name matches
- std::regex pattern(fmt("%s$", ref_uri));
-
- if (std::regex_search(line, pattern)) {
- id = line.substr(0, line.find('\t'));
- break;
- }
+ std::optional<std::string> id;
+ while(!id && getline(is, line)) {
+ id = parseListReferenceForRev(ref_uri, line);
}
- if(id.empty())
+ if(!id)
throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref);
- auto rev = Hash::parseAny(id, htSHA1);
+ auto rev = Hash::parseAny(*id, htSHA1);
debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev());
return rev;
}