diff options
Diffstat (limited to 'src/libfetchers/git.cc')
-rw-r--r-- | src/libfetchers/git.cc | 406 |
1 files changed, 276 insertions, 130 deletions
diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 34b1342a0..1f7d7c07d 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -5,9 +5,13 @@ #include "store-api.hh" #include "url-parts.hh" #include "pathlocks.hh" +#include "util.hh" +#include "git.hh" #include "fetch-settings.hh" +#include <regex> +#include <string.h> #include <sys/time.h> #include <sys/wait.h> @@ -15,25 +19,236 @@ using namespace std::string_literals; namespace nix::fetchers { +namespace { + // Explicit initial branch of our bare repo to suppress warnings from new version of git. // The value itself does not matter, since we always fetch a specific revision or branch. // It is set with `-c init.defaultBranch=` instead of `--initial-branch=` to stay compatible with // old version of git, which will ignore unrecognized `-c` options. const std::string gitInitialBranch = "__nix_dummy_branch"; -static std::string readHead(const Path & path) +bool isCacheFileWithinTtl(time_t now, const struct stat & st) +{ + return st.st_mtime + settings.tarballTtl > now; +} + +bool touchCacheFile(const Path & path, time_t touch_time) +{ + struct timeval times[2]; + times[0].tv_sec = touch_time; + times[0].tv_usec = 0; + times[1].tv_sec = touch_time; + times[1].tv_usec = 0; + + return lutimes(path.c_str(), times) == 0; +} + +Path getCachePath(std::string_view key) +{ + return getCacheDir() + "/nix/gitv3/" + + hashString(htSHA256, key).to_string(Base32, false); +} + +// Returns the name of the HEAD branch. +// +// Returns the head branch name as reported by git ls-remote --symref, e.g., if +// ls-remote returns the output below, "main" is returned based on the ref line. +// +// ref: refs/heads/main HEAD +// ... +std::optional<std::string> readHead(const Path & path) { - return chomp(runProgram("git", true, { "-C", path, "rev-parse", "--abbrev-ref", "HEAD" })); + auto [status, output] = runProgram(RunOptions { + .program = "git", + // FIXME: use 'HEAD' to avoid returning all refs + .args = {"ls-remote", "--symref", path}, + }); + if (status != 0) return std::nullopt; + + std::string_view line = output; + line = line.substr(0, line.find("\n")); + if (const auto parseResult = git::parseLsRemoteLine(line)) { + switch (parseResult->kind) { + case git::LsRemoteRefLine::Kind::Symbolic: + debug("resolved HEAD ref '%s' for repo '%s'", parseResult->target, path); + break; + case git::LsRemoteRefLine::Kind::Object: + debug("resolved HEAD rev '%s' for repo '%s'", parseResult->target, path); + break; + } + return parseResult->target; + } + return std::nullopt; +} + +// Persist the HEAD ref from the remote repo in the local cached repo. +bool storeCachedHead(const std::string & actualUrl, const std::string & headRef) +{ + Path cacheDir = getCachePath(actualUrl); + try { + runProgram("git", true, { "-C", cacheDir, "--git-dir", ".", "symbolic-ref", "--", "HEAD", headRef }); + } catch (ExecError &e) { + if (!WIFEXITED(e.status)) throw; + return false; + } + /* No need to touch refs/HEAD, because `git symbolic-ref` updates the mtime. */ + return true; +} + +std::optional<std::string> readHeadCached(const std::string & actualUrl) +{ + // Create a cache path to store the branch of the HEAD ref. Append something + // in front of the URL to prevent collision with the repository itself. + Path cacheDir = getCachePath(actualUrl); + Path headRefFile = cacheDir + "/HEAD"; + + time_t now = time(0); + struct stat st; + std::optional<std::string> cachedRef; + if (stat(headRefFile.c_str(), &st) == 0) { + cachedRef = readHead(cacheDir); + if (cachedRef != std::nullopt && + *cachedRef != gitInitialBranch && + isCacheFileWithinTtl(now, st)) + { + debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl); + return cachedRef; + } + } + + auto ref = readHead(actualUrl); + if (ref) return ref; + + if (cachedRef) { + // If the cached git ref is expired in fetch() below, and the 'git fetch' + // fails, it falls back to continuing with the most recent version. + // This function must behave the same way, so we return the expired + // cached ref here. + warn("could not get HEAD ref for repository '%s'; using expired cached ref '%s'", actualUrl, *cachedRef); + return *cachedRef; + } + + return std::nullopt; } -static bool isNotDotGitDirectory(const Path & path) +bool isNotDotGitDirectory(const Path & path) { return baseNameOf(path) != ".git"; } +struct WorkdirInfo +{ + bool clean = false; + bool hasHead = false; +}; + +// Returns whether a git workdir is clean and has commits. +WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir) +{ + const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + std::string gitDir(".git"); + + auto env = getEnv(); + // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong + // that way unknown errors can lead to a failure instead of continuing through the wrong code path + env["LC_ALL"] = "C"; + + /* Check whether HEAD points to something that looks like a commit, + since that is the refrence we want to use later on. */ + auto result = runProgram(RunOptions { + .program = "git", + .args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, + .environment = env, + .mergeStderrToStdout = true + }); + auto exitCode = WEXITSTATUS(result.first); + auto errorMessage = result.second; + + if (errorMessage.find("fatal: not a git repository") != std::string::npos) { + throw Error("'%s' is not a Git repository", workdir); + } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { + // indicates that the repo does not have any commits + // we want to proceed and will consider it dirty later + } else if (exitCode != 0) { + // any other errors should lead to a failure + throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage); + } + + bool clean = false; + bool hasHead = exitCode == 0; + + try { + if (hasHead) { + // Using git diff is preferrable over lower-level operations here, + // because its conceptually simpler and we only need the exit code anyways. + auto gitDiffOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "diff", "HEAD", "--quiet"}); + if (!submodules) { + // Changes in submodules should only make the tree dirty + // when those submodules will be copied as well. + gitDiffOpts.emplace_back("--ignore-submodules"); + } + gitDiffOpts.emplace_back("--"); + runProgram("git", true, gitDiffOpts); + + clean = true; + } + } catch (ExecError & e) { + if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; + } + + return WorkdirInfo { .clean = clean, .hasHead = hasHead }; +} + +std::pair<StorePath, Input> fetchFromWorkdir(ref<Store> store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo) +{ + const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + auto gitDir = ".git"; + + if (!fetchSettings.allowDirty) + throw Error("Git tree '%s' is dirty", workdir); + + if (fetchSettings.warnDirty) + warn("Git tree '%s' is dirty", workdir); + + auto gitOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "ls-files", "-z" }); + if (submodules) + gitOpts.emplace_back("--recurse-submodules"); + + auto files = tokenizeString<std::set<std::string>>( + runProgram("git", true, gitOpts), "\0"s); + + Path actualPath(absPath(workdir)); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualPath)); + std::string file(p, actualPath.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); + + // FIXME: maybe we should use the timestamp of the last + // modified dirty file? + input.attrs.insert_or_assign( + "lastModified", + workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); + + return {std::move(storePath), input}; +} +} // end namespace + struct GitInputScheme : InputScheme { - std::optional<Input> inputFromURL(const ParsedURL & url) override + std::optional<Input> inputFromURL(const ParsedURL & url) const override { if (url.scheme != "git" && url.scheme != "git+http" && @@ -48,7 +263,7 @@ struct GitInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "git"); - for (auto &[name, value] : url.query) { + for (auto & [name, value] : url.query) { if (name == "rev" || name == "ref") attrs.emplace(name, value); else if (name == "shallow" || name == "submodules") @@ -62,7 +277,7 @@ struct GitInputScheme : InputScheme return inputFromAttrs(attrs); } - std::optional<Input> inputFromAttrs(const Attrs & attrs) override + std::optional<Input> inputFromAttrs(const Attrs & attrs) const override { if (maybeGetStrAttr(attrs, "type") != "git") return {}; @@ -85,7 +300,7 @@ struct GitInputScheme : InputScheme return input; } - ParsedURL toURL(const Input & input) override + ParsedURL toURL(const Input & input) const override { auto url = parseURL(getStrAttr(input.attrs, "url")); if (url.scheme != "git") url.scheme = "git+" + url.scheme; @@ -96,7 +311,7 @@ struct GitInputScheme : InputScheme return url; } - bool hasAllInfo(const Input & input) override + bool hasAllInfo(const Input & input) const override { bool maybeDirty = !input.getRef(); bool shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false); @@ -108,7 +323,7 @@ struct GitInputScheme : InputScheme Input applyOverrides( const Input & input, std::optional<std::string> ref, - std::optional<Hash> rev) override + std::optional<Hash> rev) const override { auto res(input); if (rev) res.attrs.insert_or_assign("rev", rev->gitRev()); @@ -118,7 +333,7 @@ struct GitInputScheme : InputScheme return res; } - void clone(const Input & input, const Path & destDir) override + void clone(const Input & input, const Path & destDir) const override { auto [isLocal, actualUrl] = getActualUrl(input); @@ -150,13 +365,14 @@ struct GitInputScheme : InputScheme { auto sourcePath = getSourcePath(input); assert(sourcePath); + auto gitDir = ".git"; runProgram("git", true, - { "-C", *sourcePath, "add", "--force", "--intent-to-add", "--", std::string(file) }); + { "-C", *sourcePath, "--git-dir", gitDir, "add", "--intent-to-add", "--", std::string(file) }); if (commitMsg) runProgram("git", true, - { "-C", *sourcePath, "commit", std::string(file), "-m", *commitMsg }); + { "-C", *sourcePath, "--git-dir", gitDir, "commit", std::string(file), "-m", *commitMsg }); } std::pair<bool, std::string> getActualUrl(const Input & input) const @@ -175,6 +391,7 @@ struct GitInputScheme : InputScheme std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override { Input input(_input); + auto gitDir = ".git"; std::string name = input.getName(); @@ -223,123 +440,54 @@ struct GitInputScheme : InputScheme auto [isLocal, actualUrl_] = getActualUrl(input); auto actualUrl = actualUrl_; // work around clang bug - // If this is a local directory and no ref or revision is - // given, then allow the use of an unclean working tree. + /* If this is a local directory and no ref or revision is given, + allow fetching directly from a dirty workdir. */ if (!input.getRef() && !input.getRev() && isLocal) { - bool clean = false; - - auto env = getEnv(); - // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong - // that way unknown errors can lead to a failure instead of continuing through the wrong code path - env["LC_ALL"] = "C"; - - /* Check whether HEAD points to something that looks like a commit, - since that is the refrence we want to use later on. */ - auto result = runProgram(RunOptions { - .program = "git", - .args = { "-C", actualUrl, "--git-dir=.git", "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, - .environment = env, - .mergeStderrToStdout = true - }); - auto exitCode = WEXITSTATUS(result.first); - auto errorMessage = result.second; - - if (errorMessage.find("fatal: not a git repository") != std::string::npos) { - throw Error("'%s' is not a Git repository", actualUrl); - } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { - // indicates that the repo does not have any commits - // we want to proceed and will consider it dirty later - } else if (exitCode != 0) { - // any other errors should lead to a failure - throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", actualUrl, exitCode, errorMessage); - } - - bool hasHead = exitCode == 0; - try { - if (hasHead) { - // Using git diff is preferrable over lower-level operations here, - // because its conceptually simpler and we only need the exit code anyways. - auto gitDiffOpts = Strings({ "-C", actualUrl, "diff", "HEAD", "--quiet"}); - if (!submodules) { - // Changes in submodules should only make the tree dirty - // when those submodules will be copied as well. - gitDiffOpts.emplace_back("--ignore-submodules"); - } - gitDiffOpts.emplace_back("--"); - runProgram("git", true, gitDiffOpts); - - clean = true; - } - } catch (ExecError & e) { - if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; - } - - if (!clean) { - - /* This is an unclean working tree. So copy all tracked files. */ - - if (!fetchSettings.allowDirty) - throw Error("Git tree '%s' is dirty", actualUrl); - - if (fetchSettings.warnDirty) - warn("Git tree '%s' is dirty", actualUrl); - - auto gitOpts = Strings({ "-C", actualUrl, "ls-files", "-z" }); - if (submodules) - gitOpts.emplace_back("--recurse-submodules"); - - auto files = tokenizeString<std::set<std::string>>( - runProgram("git", true, gitOpts), "\0"s); - - Path actualPath(absPath(actualUrl)); - - PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, actualPath)); - std::string file(p, actualPath.size() + 1); - - auto st = lstat(p); - - if (S_ISDIR(st.st_mode)) { - auto prefix = file + "/"; - auto i = files.lower_bound(prefix); - return i != files.end() && hasPrefix(*i, prefix); - } - - return files.count(file); - }; - - auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); - - // FIXME: maybe we should use the timestamp of the last - // modified dirty file? - input.attrs.insert_or_assign( - "lastModified", - hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); - - return {std::move(storePath), input}; + auto workdirInfo = getWorkdirInfo(input, actualUrl); + if (!workdirInfo.clean) { + return fetchFromWorkdir(store, input, actualUrl, workdirInfo); } } - if (!input.getRef()) input.attrs.insert_or_assign("ref", isLocal ? readHead(actualUrl) : "master"); - Attrs unlockedAttrs({ {"type", cacheType}, {"name", name}, {"url", actualUrl}, - {"ref", *input.getRef()}, }); Path repoDir; if (isLocal) { + if (!input.getRef()) { + auto head = readHead(actualUrl); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); + head = "master"; + } + input.attrs.insert_or_assign("ref", *head); + unlockedAttrs.insert_or_assign("ref", *head); + } if (!input.getRev()) input.attrs.insert_or_assign("rev", - Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input.getRef() })), htSHA1).gitRev()); + Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev()); repoDir = actualUrl; - } else { + const bool useHeadRef = !input.getRef(); + if (useHeadRef) { + auto head = readHeadCached(actualUrl); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); + head = "master"; + } + input.attrs.insert_or_assign("ref", *head); + unlockedAttrs.insert_or_assign("ref", *head); + } else { + if (!input.getRev()) { + unlockedAttrs.insert_or_assign("ref", input.getRef().value()); + } + } if (auto res = getCache()->lookup(store, unlockedAttrs)) { auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1); @@ -349,8 +497,9 @@ struct GitInputScheme : InputScheme } } - Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false); + Path cacheDir = getCachePath(actualUrl); repoDir = cacheDir; + gitDir = "."; createDirs(dirOf(cacheDir)); PathLocks cacheDirLock({cacheDir + ".lock"}); @@ -371,7 +520,7 @@ struct GitInputScheme : InputScheme repo. */ if (input.getRev()) { try { - runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input.getRev()->gitRev() }); + runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-e", input.getRev()->gitRev() }); doFetch = false; } catch (ExecError & e) { if (WIFEXITED(e.status)) { @@ -388,7 +537,7 @@ struct GitInputScheme : InputScheme git fetch to update the local ref to the remote ref. */ struct stat st; doFetch = stat(localRefFile.c_str(), &st) != 0 || - (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now; + !isCacheFileWithinTtl(now, st); } } @@ -406,19 +555,16 @@ struct GitInputScheme : InputScheme : ref == "HEAD" ? *ref : "refs/heads/" + *ref; - runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }); + runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }); } catch (Error & e) { if (!pathExists(localRefFile)) throw; warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); } - struct timeval times[2]; - times[0].tv_sec = now; - times[0].tv_usec = 0; - times[1].tv_sec = now; - times[1].tv_usec = 0; - - utimes(localRefFile.c_str(), times); + if (!touchCacheFile(localRefFile, now)) + warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno)); + if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef())) + warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl); } if (!input.getRev()) @@ -427,10 +573,10 @@ struct GitInputScheme : InputScheme // cache dir lock is removed at scope end; we will only use read-only operations on specific revisions in the remainder } - bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true"; + bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-parse", "--is-shallow-repository" })) == "true"; if (isShallow && !shallow) - throw Error("'%s' is a shallow Git repository, but a non-shallow repository is needed", actualUrl); + throw Error("'%s' is a shallow Git repository, but shallow repositories are only allowed when `shallow = true;` is specified.", actualUrl); // FIXME: check whether rev is an ancestor of ref. @@ -447,7 +593,7 @@ struct GitInputScheme : InputScheme auto result = runProgram(RunOptions { .program = "git", - .args = { "-C", repoDir, "cat-file", "commit", input.getRev()->gitRev() }, + .args = { "-C", repoDir, "--git-dir", gitDir, "cat-file", "commit", input.getRev()->gitRev() }, .mergeStderrToStdout = true }); if (WEXITSTATUS(result.first) == 128 @@ -455,9 +601,9 @@ struct GitInputScheme : InputScheme { throw Error( "Cannot find Git revision '%s' in ref '%s' of repository '%s'! " - "Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the " - ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD - "allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".", + "Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the " + ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD + "allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".", input.getRev()->gitRev(), *input.getRef(), actualUrl @@ -486,7 +632,7 @@ struct GitInputScheme : InputScheme auto source = sinkToSource([&](Sink & sink) { runProgram2({ .program = "git", - .args = { "-C", repoDir, "archive", input.getRev()->gitRev() }, + .args = { "-C", repoDir, "--git-dir", gitDir, "archive", input.getRev()->gitRev() }, .standardOut = &sink }); }); @@ -496,7 +642,7 @@ struct GitInputScheme : InputScheme auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter); - auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() })); + auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() })); Attrs infoAttrs({ {"rev", input.getRev()->gitRev()}, @@ -505,7 +651,7 @@ struct GitInputScheme : InputScheme if (!shallow) infoAttrs.insert_or_assign("revCount", - std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input.getRev()->gitRev() }))); + std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--count", input.getRev()->gitRev() }))); if (!_input.getRev()) getCache()->add( |