diff options
author | Kjetil Orbekk <kj@orbekk.com> | 2022-04-29 18:30:00 -0400 |
---|---|---|
committer | Kjetil Orbekk <kj@orbekk.com> | 2022-04-29 18:46:21 -0400 |
commit | 9bf296c970bf33b7ed53d7e2d8fbe44197482518 (patch) | |
tree | c172f61f1b5e51b6d353b99ab75d0e1e206bc54f /src/libfetchers | |
parent | c21afd684cb5f59337b879684728884fd8275ce4 (diff) |
Extract git reference parsing to a shared library
These utility functions can be shared between the git and github fetchers.
Diffstat (limited to 'src/libfetchers')
-rw-r--r-- | src/libfetchers/git-utils.cc | 25 | ||||
-rw-r--r-- | src/libfetchers/git-utils.hh | 23 | ||||
-rw-r--r-- | src/libfetchers/git.cc | 29 | ||||
-rw-r--r-- | src/libfetchers/github.cc | 28 |
4 files changed, 70 insertions, 35 deletions
diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc new file mode 100644 index 000000000..060077098 --- /dev/null +++ b/src/libfetchers/git-utils.cc @@ -0,0 +1,25 @@ +#include "git-utils.hh" + +#include <regex> + +std::optional<std::string> parseListReferenceHeadRef(std::string_view line) { + const static std::regex head_ref_regex("^ref: ([^\\s]+)\\t+HEAD$"); + std::match_results<std::string_view::const_iterator> match; + if (std::regex_match(line.cbegin(), line.cend(), match, head_ref_regex)) { + return match[1]; + } else { + return std::nullopt; + } +} + +std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line) { + const static std::regex rev_regex("^([^\\t]+)\\t+(.*)$"); + std::match_results<std::string_view::const_iterator> match; + if (!std::regex_match(line.cbegin(), line.cend(), match, rev_regex)) { + return std::nullopt; + } + if (rev != match[2].str()) { + return std::nullopt; + } + return match[1]; +} diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh new file mode 100644 index 000000000..946a68a9e --- /dev/null +++ b/src/libfetchers/git-utils.hh @@ -0,0 +1,23 @@ +#pragma once + +#include <string> +#include <string_view> +#include <optional> + +// Parses the HEAD ref as reported by `git ls-remote --symref` +// +// Returns the head branch name as reported by `git ls-remote --symref`, e.g., if +// ls-remote returns the output below, "main" is returned based on the ref line. +// +// ref: refs/heads/main HEAD +// +// If the repository is in 'detached head' state (HEAD is pointing to a rev +// instead of a branch), parseListReferenceForRev("HEAD") may be used instead. +std::optional<std::string> parseListReferenceHeadRef(std::string_view line); + +// Parses a reference line from `git ls-remote --symref`, e.g., +// parseListReferenceForRev("refs/heads/master", line) will return 6926... +// given the line below. +// +// 6926beab444c33fb57b21819b6642d032016bb1e refs/heads/master +std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 968cd642a..9d4348cf1 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -6,6 +6,7 @@ #include "url-parts.hh" #include "pathlocks.hh" #include "util.hh" +#include "git-utils.hh" #include "fetch-settings.hh" @@ -69,27 +70,19 @@ std::optional<std::string> readHead(const Path & path) .args = {"ls-remote", "--symref", path}, }); if (exit_code != 0) { - return std::nullopt; + return std::nullopt; } - // Matches the common case when HEAD points to a branch, e.g.: - // "ref: refs/heads/main HEAD". - const static std::regex head_ref_regex("^ref:\\s*([^\\s]+)\\s*HEAD$"); - // Matches when HEAD points directly at a commit, e.g.: - // "71abcd... HEAD". - const static std::regex head_rev_regex("^([^\\s]+)\\s*HEAD$"); - - for (const auto & line : tokenizeString<std::vector<std::string>>(output, "\n")) { - std::smatch match; - if (std::regex_match(line, match, head_ref_regex)) { - debug("resolved HEAD ref '%s' for repo '%s'", match[1], path); - return match[1]; - } else if (std::regex_match(line, match, head_rev_regex)) { - debug("resolved HEAD ref '%s' for repo '%s'", match[1], path); - return match[1]; - } + std::string_view line = output; + line = line.substr(0, line.find("\n")); + if (const auto ref = parseListReferenceHeadRef(line); ref) { + debug("resolved HEAD ref '%s' for repo '%s'", *ref, path); + return *ref; + } + if (const auto rev = parseListReferenceForRev("HEAD", line); rev) { + debug("resolved HEAD rev '%s' for repo '%s'", *rev, path); + return *rev; } - return std::nullopt; } diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 58b6e7c04..1bdf2759f 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -4,7 +4,7 @@ #include "store-api.hh" #include "types.hh" #include "url-parts.hh" - +#include "git-utils.hh" #include "fetchers.hh" #include "fetch-settings.hh" @@ -383,35 +383,29 @@ struct SourceHutInputScheme : GitArchiveInputScheme std::string line; getline(is, line); - auto ref_index = line.find("ref: "); - if (ref_index == std::string::npos) { + auto r = parseListReferenceHeadRef(line); + if (!r) { throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref); } - - ref_uri = line.substr(ref_index+5, line.length()-1); - } else + ref_uri = *r; + } else { ref_uri = fmt("refs/(heads|tags)/%s", ref); + } auto file = store->toRealPath( downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath); std::ifstream is(file); std::string line; - std::string id; - while(getline(is, line)) { - // Append $ to avoid partial name matches - std::regex pattern(fmt("%s$", ref_uri)); - - if (std::regex_search(line, pattern)) { - id = line.substr(0, line.find('\t')); - break; - } + std::optional<std::string> id; + while(!id && getline(is, line)) { + id = parseListReferenceForRev(ref_uri, line); } - if(id.empty()) + if(!id) throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref); - auto rev = Hash::parseAny(id, htSHA1); + auto rev = Hash::parseAny(*id, htSHA1); debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev()); return rev; } |