aboutsummaryrefslogtreecommitdiff
path: root/src/libfetchers
diff options
context:
space:
mode:
authorKjetil Orbekk <kj@orbekk.com>2022-04-29 18:30:00 -0400
committerKjetil Orbekk <kj@orbekk.com>2022-04-29 18:46:21 -0400
commit9bf296c970bf33b7ed53d7e2d8fbe44197482518 (patch)
treec172f61f1b5e51b6d353b99ab75d0e1e206bc54f /src/libfetchers
parentc21afd684cb5f59337b879684728884fd8275ce4 (diff)
Extract git reference parsing to a shared library
These utility functions can be shared between the git and github fetchers.
Diffstat (limited to 'src/libfetchers')
-rw-r--r--src/libfetchers/git-utils.cc25
-rw-r--r--src/libfetchers/git-utils.hh23
-rw-r--r--src/libfetchers/git.cc29
-rw-r--r--src/libfetchers/github.cc28
4 files changed, 70 insertions, 35 deletions
diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc
new file mode 100644
index 000000000..060077098
--- /dev/null
+++ b/src/libfetchers/git-utils.cc
@@ -0,0 +1,25 @@
+#include "git-utils.hh"
+
+#include <regex>
+
+std::optional<std::string> parseListReferenceHeadRef(std::string_view line) {
+ const static std::regex head_ref_regex("^ref: ([^\\s]+)\\t+HEAD$");
+ std::match_results<std::string_view::const_iterator> match;
+ if (std::regex_match(line.cbegin(), line.cend(), match, head_ref_regex)) {
+ return match[1];
+ } else {
+ return std::nullopt;
+ }
+}
+
+std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line) {
+ const static std::regex rev_regex("^([^\\t]+)\\t+(.*)$");
+ std::match_results<std::string_view::const_iterator> match;
+ if (!std::regex_match(line.cbegin(), line.cend(), match, rev_regex)) {
+ return std::nullopt;
+ }
+ if (rev != match[2].str()) {
+ return std::nullopt;
+ }
+ return match[1];
+}
diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh
new file mode 100644
index 000000000..946a68a9e
--- /dev/null
+++ b/src/libfetchers/git-utils.hh
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <string>
+#include <string_view>
+#include <optional>
+
+// Parses the HEAD ref as reported by `git ls-remote --symref`
+//
+// Returns the head branch name as reported by `git ls-remote --symref`, e.g., if
+// ls-remote returns the output below, "main" is returned based on the ref line.
+//
+// ref: refs/heads/main HEAD
+//
+// If the repository is in 'detached head' state (HEAD is pointing to a rev
+// instead of a branch), parseListReferenceForRev("HEAD") may be used instead.
+std::optional<std::string> parseListReferenceHeadRef(std::string_view line);
+
+// Parses a reference line from `git ls-remote --symref`, e.g.,
+// parseListReferenceForRev("refs/heads/master", line) will return 6926...
+// given the line below.
+//
+// 6926beab444c33fb57b21819b6642d032016bb1e refs/heads/master
+std::optional<std::string> parseListReferenceForRev(std::string_view rev, std::string_view line);
diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc
index 968cd642a..9d4348cf1 100644
--- a/src/libfetchers/git.cc
+++ b/src/libfetchers/git.cc
@@ -6,6 +6,7 @@
#include "url-parts.hh"
#include "pathlocks.hh"
#include "util.hh"
+#include "git-utils.hh"
#include "fetch-settings.hh"
@@ -69,27 +70,19 @@ std::optional<std::string> readHead(const Path & path)
.args = {"ls-remote", "--symref", path},
});
if (exit_code != 0) {
- return std::nullopt;
+ return std::nullopt;
}
- // Matches the common case when HEAD points to a branch, e.g.:
- // "ref: refs/heads/main HEAD".
- const static std::regex head_ref_regex("^ref:\\s*([^\\s]+)\\s*HEAD$");
- // Matches when HEAD points directly at a commit, e.g.:
- // "71abcd... HEAD".
- const static std::regex head_rev_regex("^([^\\s]+)\\s*HEAD$");
-
- for (const auto & line : tokenizeString<std::vector<std::string>>(output, "\n")) {
- std::smatch match;
- if (std::regex_match(line, match, head_ref_regex)) {
- debug("resolved HEAD ref '%s' for repo '%s'", match[1], path);
- return match[1];
- } else if (std::regex_match(line, match, head_rev_regex)) {
- debug("resolved HEAD ref '%s' for repo '%s'", match[1], path);
- return match[1];
- }
+ std::string_view line = output;
+ line = line.substr(0, line.find("\n"));
+ if (const auto ref = parseListReferenceHeadRef(line); ref) {
+ debug("resolved HEAD ref '%s' for repo '%s'", *ref, path);
+ return *ref;
+ }
+ if (const auto rev = parseListReferenceForRev("HEAD", line); rev) {
+ debug("resolved HEAD rev '%s' for repo '%s'", *rev, path);
+ return *rev;
}
-
return std::nullopt;
}
diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc
index 58b6e7c04..1bdf2759f 100644
--- a/src/libfetchers/github.cc
+++ b/src/libfetchers/github.cc
@@ -4,7 +4,7 @@
#include "store-api.hh"
#include "types.hh"
#include "url-parts.hh"
-
+#include "git-utils.hh"
#include "fetchers.hh"
#include "fetch-settings.hh"
@@ -383,35 +383,29 @@ struct SourceHutInputScheme : GitArchiveInputScheme
std::string line;
getline(is, line);
- auto ref_index = line.find("ref: ");
- if (ref_index == std::string::npos) {
+ auto r = parseListReferenceHeadRef(line);
+ if (!r) {
throw BadURL("in '%d', couldn't resolve HEAD ref '%d'", input.to_string(), ref);
}
-
- ref_uri = line.substr(ref_index+5, line.length()-1);
- } else
+ ref_uri = *r;
+ } else {
ref_uri = fmt("refs/(heads|tags)/%s", ref);
+ }
auto file = store->toRealPath(
downloadFile(store, fmt("%s/info/refs", base_url), "source", false, headers).storePath);
std::ifstream is(file);
std::string line;
- std::string id;
- while(getline(is, line)) {
- // Append $ to avoid partial name matches
- std::regex pattern(fmt("%s$", ref_uri));
-
- if (std::regex_search(line, pattern)) {
- id = line.substr(0, line.find('\t'));
- break;
- }
+ std::optional<std::string> id;
+ while(!id && getline(is, line)) {
+ id = parseListReferenceForRev(ref_uri, line);
}
- if(id.empty())
+ if(!id)
throw BadURL("in '%d', couldn't find ref '%d'", input.to_string(), ref);
- auto rev = Hash::parseAny(id, htSHA1);
+ auto rev = Hash::parseAny(*id, htSHA1);
debug("HEAD revision for '%s' is %s", fmt("%s/%s", base_url, ref), rev.gitRev());
return rev;
}