diff options
Diffstat (limited to 'src/libexpr/primops/fetchGit.cc')
-rw-r--r-- | src/libexpr/primops/fetchGit.cc | 345 |
1 files changed, 242 insertions, 103 deletions
diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index 4aee1073e..80588f54f 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -1,3 +1,4 @@ +#include "fetchGit.hh" #include "primops.hh" #include "eval-inline.hh" #include "download.hh" @@ -16,40 +17,115 @@ using namespace std::string_literals; namespace nix { -struct GitInfo +extern std::regex revRegex; + +static Path getCacheInfoPathFor(const std::string & name, const Hash & rev) +{ + Path cacheDir = getCacheDir() + "/nix/git-revs"; + std::string linkName = + name == "source" + ? rev.gitRev() + : hashString(htSHA512, name + std::string("\0"s) + rev.gitRev()).to_string(Base32, false); + return cacheDir + "/" + linkName + ".link"; +} + +static void cacheGitInfo(const std::string & name, const GitInfo & gitInfo) { - Path storePath; - std::string rev; - std::string shortRev; - uint64_t revCount = 0; -}; + nlohmann::json json; + json["storePath"] = gitInfo.storePath; + json["name"] = name; + json["rev"] = gitInfo.rev.gitRev(); + if (gitInfo.revCount) + json["revCount"] = *gitInfo.revCount; + json["lastModified"] = gitInfo.lastModified; + + auto cacheInfoPath = getCacheInfoPathFor(name, gitInfo.rev); + createDirs(dirOf(cacheInfoPath)); + writeFile(cacheInfoPath, json.dump()); +} + +static std::optional<GitInfo> lookupGitInfo( + ref<Store> store, + const std::string & name, + const Hash & rev) +{ + try { + auto json = nlohmann::json::parse(readFile(getCacheInfoPathFor(name, rev))); + + assert(json["name"] == name && Hash((std::string) json["rev"], htSHA1) == rev); + + Path storePath = json["storePath"]; + + if (store->isValidPath(store->parseStorePath(storePath))) { + GitInfo gitInfo; + gitInfo.storePath = storePath; + gitInfo.rev = rev; + if (json.find("revCount") != json.end()) + gitInfo.revCount = json["revCount"]; + gitInfo.lastModified = json["lastModified"]; + return gitInfo; + } + + } catch (SysError & e) { + if (e.errNo != ENOENT) throw; + } -std::regex revRegex("^[0-9a-fA-F]{40}$"); + return {}; +} -GitInfo exportGit(ref<Store> store, const std::string & uri, - std::optional<std::string> ref, std::string rev, +GitInfo exportGit(ref<Store> store, std::string uri, + std::optional<std::string> ref, + std::optional<Hash> rev, const std::string & name) { - if (evalSettings.pureEval && rev == "") - throw Error("in pure evaluation mode, 'fetchGit' requires a Git revision"); + assert(!rev || rev->type == htSHA1); + + if (rev) { + if (auto gitInfo = lookupGitInfo(store, name, *rev)) { + // If this gitInfo was produced by exportGitHub, then it won't + // have a revCount. So we have to do a full clone. + if (gitInfo->revCount) { + gitInfo->ref = ref; + return *gitInfo; + } + } + } + + if (hasPrefix(uri, "git+")) uri = std::string(uri, 4); - if (!ref && rev == "" && hasPrefix(uri, "/") && pathExists(uri + "/.git")) { + bool isLocal = hasPrefix(uri, "/") && pathExists(uri + "/.git"); - bool clean = true; + // If this is a local directory (but not a file:// URI) and no ref + // or revision is given, then allow the use of an unclean working + // tree. + if (!ref && !rev && isLocal) { + bool clean = false; + + /* Check whether this repo has any commits. There are + probably better ways to do this. */ + bool haveCommits = !readDirectory(uri + "/.git/refs/heads").empty(); try { - runProgram("git", true, { "-C", uri, "diff-index", "--quiet", "HEAD", "--" }); + if (haveCommits) { + runProgram("git", true, { "-C", uri, "diff-index", "--quiet", "HEAD", "--" }); + clean = true; + } } catch (ExecError & e) { if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; - clean = false; } if (!clean) { /* This is an unclean working tree. So copy all tracked files. */ + + if (!evalSettings.allowDirty) + throw Error("Git tree '%s' is dirty", uri); + + if (evalSettings.warnDirty) + warn("Git tree '%s' is dirty", uri); + GitInfo gitInfo; - gitInfo.rev = "0000000000000000000000000000000000000000"; - gitInfo.shortRev = std::string(gitInfo.rev, 0, 7); + gitInfo.ref = "HEAD"; auto files = tokenizeString<std::set<std::string>>( runProgram("git", true, { "-C", uri, "ls-files", "-z" }), "\0"s); @@ -70,103 +146,116 @@ GitInfo exportGit(ref<Store> store, const std::string & uri, }; gitInfo.storePath = store->printStorePath(store->addToStore("source", uri, true, htSHA256, filter)); + gitInfo.revCount = haveCommits ? std::stoull(runProgram("git", true, { "-C", uri, "rev-list", "--count", "HEAD" })) : 0; + // FIXME: maybe we should use the timestamp of the last + // modified dirty file? + gitInfo.lastModified = haveCommits ? std::stoull(runProgram("git", true, { "-C", uri, "log", "-1", "--format=%ct", "HEAD" })) : 0; return gitInfo; } - - // clean working tree, but no ref or rev specified. Use 'HEAD'. - rev = chomp(runProgram("git", true, { "-C", uri, "rev-parse", "HEAD" })); - ref = "HEAD"s; } - if (!ref) ref = "HEAD"s; + if (!ref) ref = isLocal ? "HEAD" : "master"; - if (rev != "" && !std::regex_match(rev, revRegex)) - throw Error("invalid Git revision '%s'", rev); + // Don't clone file:// URIs (but otherwise treat them the same as + // remote URIs, i.e. don't use the working tree or HEAD). + static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing + if (!forceHttp && hasPrefix(uri, "file://")) { + uri = std::string(uri, 7); + isLocal = true; + } - deletePath(getCacheDir() + "/nix/git"); + Path repoDir; - Path cacheDir = getCacheDir() + "/nix/gitv2/" + hashString(htSHA256, uri).to_string(Base32, false); + if (isLocal) { - if (!pathExists(cacheDir)) { - createDirs(dirOf(cacheDir)); - runProgram("git", true, { "init", "--bare", cacheDir }); - } + if (!rev) + rev = Hash(chomp(runProgram("git", true, { "-C", uri, "rev-parse", *ref })), htSHA1); - Path localRefFile; - if (ref->compare(0, 5, "refs/") == 0) - localRefFile = cacheDir + "/" + *ref; - else - localRefFile = cacheDir + "/refs/heads/" + *ref; - - bool doFetch; - time_t now = time(0); - /* If a rev was specified, we need to fetch if it's not in the - repo. */ - if (rev != "") { - try { - runProgram("git", true, { "-C", cacheDir, "cat-file", "-e", rev }); - doFetch = false; - } catch (ExecError & e) { - if (WIFEXITED(e.status)) { - doFetch = true; - } else { - throw; - } - } - } else { - /* If the local ref is older than ‘tarball-ttl’ seconds, do a - git fetch to update the local ref to the remote ref. */ - struct stat st; - doFetch = stat(localRefFile.c_str(), &st) != 0 || - (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now; - } - if (doFetch) - { - Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", uri)); + repoDir = uri; - // FIXME: git stderr messes up our progress indicator, so - // we're using --quiet for now. Should process its stderr. - runProgram("git", true, { "-C", cacheDir, "fetch", "--quiet", "--force", "--", uri, fmt("%s:%s", *ref, *ref) }); + } else { - struct timeval times[2]; - times[0].tv_sec = now; - times[0].tv_usec = 0; - times[1].tv_sec = now; - times[1].tv_usec = 0; + Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, uri).to_string(Base32, false); + repoDir = cacheDir; - utimes(localRefFile.c_str(), times); - } + if (!pathExists(cacheDir)) { + createDirs(dirOf(cacheDir)); + runProgram("git", true, { "init", "--bare", repoDir }); + } - // FIXME: check whether rev is an ancestor of ref. - GitInfo gitInfo; - gitInfo.rev = rev != "" ? rev : chomp(readFile(localRefFile)); - gitInfo.shortRev = std::string(gitInfo.rev, 0, 7); + Path localRefFile = + ref->compare(0, 5, "refs/") == 0 + ? cacheDir + "/" + *ref + : cacheDir + "/refs/heads/" + *ref; + + bool doFetch; + time_t now = time(0); + + /* If a rev was specified, we need to fetch if it's not in the + repo. */ + if (rev) { + try { + runProgram("git", true, { "-C", repoDir, "cat-file", "-e", rev->gitRev() }); + doFetch = false; + } catch (ExecError & e) { + if (WIFEXITED(e.status)) { + doFetch = true; + } else { + throw; + } + } + } else { + /* If the local ref is older than ‘tarball-ttl’ seconds, do a + git fetch to update the local ref to the remote ref. */ + struct stat st; + doFetch = stat(localRefFile.c_str(), &st) != 0 || + (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now; + } - printTalkative("using revision %s of repo '%s'", gitInfo.rev, uri); + if (doFetch) { + Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", uri)); - std::string storeLinkName = hashString(htSHA512, name + std::string("\0"s) + gitInfo.rev).to_string(Base32, false); - Path storeLink = cacheDir + "/" + storeLinkName + ".link"; - PathLocks storeLinkLock({storeLink}, fmt("waiting for lock on '%1%'...", storeLink)); // FIXME: broken + // FIXME: git stderr messes up our progress indicator, so + // we're using --quiet for now. Should process its stderr. + try { + runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", uri, fmt("%s:%s", *ref, *ref) }); + } catch (Error & e) { + if (!pathExists(localRefFile)) throw; + warn("could not update local clone of Git repository '%s'; continuing with the most recent version", uri); + } - try { - auto json = nlohmann::json::parse(readFile(storeLink)); + struct timeval times[2]; + times[0].tv_sec = now; + times[0].tv_usec = 0; + times[1].tv_sec = now; + times[1].tv_usec = 0; - assert(json["name"] == name && json["rev"] == gitInfo.rev); + utimes(localRefFile.c_str(), times); + } - gitInfo.storePath = json["storePath"]; + if (!rev) + rev = Hash(chomp(readFile(localRefFile)), htSHA1); + } - if (store->isValidPath(store->parseStorePath(gitInfo.storePath))) { - gitInfo.revCount = json["revCount"]; - return gitInfo; + if (auto gitInfo = lookupGitInfo(store, name, *rev)) { + if (gitInfo->revCount) { + gitInfo->ref = ref; + return *gitInfo; } - - } catch (SysError & e) { - if (e.errNo != ENOENT) throw; } + // FIXME: check whether rev is an ancestor of ref. + GitInfo gitInfo; + gitInfo.ref = *ref; + gitInfo.rev = *rev; + + printTalkative("using revision %s of repo '%s'", gitInfo.rev, uri); + + // FIXME: should pipe this, or find some better way to extract a + // revision. auto source = sinkToSource([&](Sink & sink) { - RunOptions gitOptions("git", { "-C", cacheDir, "archive", gitInfo.rev }); + RunOptions gitOptions("git", { "-C", repoDir, "archive", gitInfo.rev.gitRev() }); gitOptions.standardOut = &sink; runProgram2(gitOptions); }); @@ -178,16 +267,62 @@ GitInfo exportGit(ref<Store> store, const std::string & uri, gitInfo.storePath = store->printStorePath(store->addToStore(name, tmpDir)); - gitInfo.revCount = std::stoull(runProgram("git", true, { "-C", cacheDir, "rev-list", "--count", gitInfo.rev })); + gitInfo.revCount = std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", gitInfo.rev.gitRev() })); + gitInfo.lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", gitInfo.rev.gitRev() })); - nlohmann::json json; - json["storePath"] = gitInfo.storePath; - json["uri"] = uri; - json["name"] = name; - json["rev"] = gitInfo.rev; - json["revCount"] = gitInfo.revCount; + cacheGitInfo(name, gitInfo); + + return gitInfo; +} + +GitInfo exportGitHub( + ref<Store> store, + const std::string & owner, + const std::string & repo, + std::optional<std::string> ref, + std::optional<Hash> rev) +{ + if (rev) { + if (auto gitInfo = lookupGitInfo(store, "source", *rev)) + return *gitInfo; + } + + if (!rev) { + auto url = fmt("https://api.github.com/repos/%s/%s/commits/%s", + owner, repo, ref ? *ref : "master"); + CachedDownloadRequest request(url); + request.ttl = rev ? 1000000000 : settings.tarballTtl; + auto result = getDownloader()->downloadCached(store, request); + auto json = nlohmann::json::parse(readFile(result.path)); + rev = Hash(json["sha"], htSHA1); + } + + // FIXME: use regular /archive URLs instead? api.github.com + // might have stricter rate limits. + + auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s", + owner, repo, rev->to_string(Base16, false)); + + std::string accessToken = settings.githubAccessToken.get(); + if (accessToken != "") + url += "?access_token=" + accessToken; + + CachedDownloadRequest request(url); + request.unpack = true; + request.name = "source"; + request.ttl = 1000000000; + request.getLastModified = true; + auto result = getDownloader()->downloadCached(store, request); - writeFile(storeLink, json.dump()); + assert(result.lastModified); + + GitInfo gitInfo; + gitInfo.storePath = result.storePath; + gitInfo.rev = *rev; + gitInfo.lastModified = *result.lastModified; + + // FIXME: this can overwrite a cache file that contains a revCount. + cacheGitInfo("source", gitInfo); return gitInfo; } @@ -196,7 +331,7 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va { std::string url; std::optional<std::string> ref; - std::string rev; + std::optional<Hash> rev; std::string name = "source"; PathSet context; @@ -213,7 +348,7 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va else if (n == "ref") ref = state.forceStringNoCtx(*attr.value, *attr.pos); else if (n == "rev") - rev = state.forceStringNoCtx(*attr.value, *attr.pos); + rev = Hash(state.forceStringNoCtx(*attr.value, *attr.pos), htSHA1); else if (n == "name") name = state.forceStringNoCtx(*attr.value, *attr.pos); else @@ -230,13 +365,17 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va // whitelist. Ah well. state.checkURI(url); + if (evalSettings.pureEval && !rev) + throw Error("in pure evaluation mode, 'fetchGit' requires a Git revision"); + auto gitInfo = exportGit(state.store, url, ref, rev, name); state.mkAttrs(v, 8); mkString(*state.allocAttr(v, state.sOutPath), gitInfo.storePath, PathSet({gitInfo.storePath})); - mkString(*state.allocAttr(v, state.symbols.create("rev")), gitInfo.rev); - mkString(*state.allocAttr(v, state.symbols.create("shortRev")), gitInfo.shortRev); - mkInt(*state.allocAttr(v, state.symbols.create("revCount")), gitInfo.revCount); + mkString(*state.allocAttr(v, state.symbols.create("rev")), gitInfo.rev.gitRev()); + mkString(*state.allocAttr(v, state.symbols.create("shortRev")), gitInfo.rev.gitShortRev()); + assert(gitInfo.revCount); + mkInt(*state.allocAttr(v, state.symbols.create("revCount")), *gitInfo.revCount); v.attrs->sort(); if (state.allowedPaths) |