aboutsummaryrefslogtreecommitdiff
path: root/src/libutil
diff options
context:
space:
mode:
authorEelco Dolstra <edolstra@gmail.com>2020-03-30 16:04:18 +0200
committerEelco Dolstra <edolstra@gmail.com>2020-04-07 09:03:14 +0200
commit462421d34588c227eb736b07f7b40a38aa0972a6 (patch)
tree73778f8392f303eb001b95f2c7f0a99b24f06478 /src/libutil
parentebb20a5356af023498506324bd0f88a99175e295 (diff)
Backport libfetchers from the flakes branch
This provides a pluggable mechanism for defining new fetchers. It adds a builtin function 'fetchTree' that generalizes existing fetchers like 'fetchGit', 'fetchMercurial' and 'fetchTarball'. 'fetchTree' takes a set of attributes, e.g. fetchTree { type = "git"; url = "https://example.org/repo.git"; ref = "some-branch"; rev = "abcdef..."; } The existing fetchers are just wrappers around this. Note that the input attributes to fetchTree are the same as flake input specifications and flake lock file entries. All fetchers share a common cache stored in ~/.cache/nix/fetcher-cache-v1.sqlite. This replaces the ad hoc caching mechanisms in fetchGit and download.cc (e.g. ~/.cache/nix/{tarballs,git-revs*}). This also adds support for Git worktrees (c169ea59049f861aaba429f48b828d0820b74d1d).
Diffstat (limited to 'src/libutil')
-rw-r--r--src/libutil/types.hh8
-rw-r--r--src/libutil/url.cc137
-rw-r--r--src/libutil/url.hh62
3 files changed, 207 insertions, 0 deletions
diff --git a/src/libutil/types.hh b/src/libutil/types.hh
index 20b96a85c..a1ce7b372 100644
--- a/src/libutil/types.hh
+++ b/src/libutil/types.hh
@@ -157,4 +157,12 @@ typedef list<Path> Paths;
typedef set<Path> PathSet;
+/* Helper class to run code at startup. */
+template<typename T>
+struct OnStartup
+{
+ OnStartup(T && t) { t(); }
+};
+
+
}
diff --git a/src/libutil/url.cc b/src/libutil/url.cc
new file mode 100644
index 000000000..5d5328e5d
--- /dev/null
+++ b/src/libutil/url.cc
@@ -0,0 +1,137 @@
+#include "url.hh"
+#include "util.hh"
+
+namespace nix {
+
+std::regex refRegex(refRegexS, std::regex::ECMAScript);
+std::regex revRegex(revRegexS, std::regex::ECMAScript);
+std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript);
+
+ParsedURL parseURL(const std::string & url)
+{
+ static std::regex uriRegex(
+ "((" + schemeRegex + "):"
+ + "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))"
+ + "(?:\\?(" + queryRegex + "))?"
+ + "(?:#(" + queryRegex + "))?",
+ std::regex::ECMAScript);
+
+ std::smatch match;
+
+ if (std::regex_match(url, match, uriRegex)) {
+ auto & base = match[1];
+ std::string scheme = match[2];
+ auto authority = match[3].matched
+ ? std::optional<std::string>(match[3]) : std::nullopt;
+ std::string path = match[4].matched ? match[4] : match[5];
+ auto & query = match[6];
+ auto & fragment = match[7];
+
+ auto isFile = scheme.find("file") != std::string::npos;
+
+ if (authority && *authority != "" && isFile)
+ throw Error("file:// URL '%s' has unexpected authority '%s'",
+ url, *authority);
+
+ if (isFile && path.empty())
+ path = "/";
+
+ return ParsedURL{
+ .url = url,
+ .base = base,
+ .scheme = scheme,
+ .authority = authority,
+ .path = path,
+ .query = decodeQuery(query),
+ .fragment = percentDecode(std::string(fragment))
+ };
+ }
+
+ else
+ throw BadURL("'%s' is not a valid URL", url);
+}
+
+std::string percentDecode(std::string_view in)
+{
+ std::string decoded;
+ for (size_t i = 0; i < in.size(); ) {
+ if (in[i] == '%') {
+ if (i + 2 >= in.size())
+ throw BadURL("invalid URI parameter '%s'", in);
+ try {
+ decoded += std::stoul(std::string(in, i + 1, 2), 0, 16);
+ i += 3;
+ } catch (...) {
+ throw BadURL("invalid URI parameter '%s'", in);
+ }
+ } else
+ decoded += in[i++];
+ }
+ return decoded;
+}
+
+std::map<std::string, std::string> decodeQuery(const std::string & query)
+{
+ std::map<std::string, std::string> result;
+
+ for (auto s : tokenizeString<Strings>(query, "&")) {
+ auto e = s.find('=');
+ if (e != std::string::npos)
+ result.emplace(
+ s.substr(0, e),
+ percentDecode(std::string_view(s).substr(e + 1)));
+ }
+
+ return result;
+}
+
+std::string percentEncode(std::string_view s)
+{
+ std::string res;
+ for (auto & c : s)
+ if ((c >= 'a' && c <= 'z')
+ || (c >= 'A' && c <= 'Z')
+ || (c >= '0' && c <= '9')
+ || strchr("-._~!$&'()*+,;=:@", c))
+ res += c;
+ else
+ res += fmt("%%%02x", (unsigned int) c);
+ return res;
+}
+
+std::string encodeQuery(const std::map<std::string, std::string> & ss)
+{
+ std::string res;
+ bool first = true;
+ for (auto & [name, value] : ss) {
+ if (!first) res += '&';
+ first = false;
+ res += percentEncode(name);
+ res += '=';
+ res += percentEncode(value);
+ }
+ return res;
+}
+
+std::string ParsedURL::to_string() const
+{
+ return
+ scheme
+ + ":"
+ + (authority ? "//" + *authority : "")
+ + path
+ + (query.empty() ? "" : "?" + encodeQuery(query))
+ + (fragment.empty() ? "" : "#" + percentEncode(fragment));
+}
+
+bool ParsedURL::operator ==(const ParsedURL & other) const
+{
+ return
+ scheme == other.scheme
+ && authority == other.authority
+ && path == other.path
+ && query == other.query
+ && fragment == other.fragment;
+}
+
+}
diff --git a/src/libutil/url.hh b/src/libutil/url.hh
new file mode 100644
index 000000000..1503023a2
--- /dev/null
+++ b/src/libutil/url.hh
@@ -0,0 +1,62 @@
+#pragma once
+
+#include "types.hh"
+
+#include <regex>
+
+namespace nix {
+
+struct ParsedURL
+{
+ std::string url;
+ std::string base; // URL without query/fragment
+ std::string scheme;
+ std::optional<std::string> authority;
+ std::string path;
+ std::map<std::string, std::string> query;
+ std::string fragment;
+
+ std::string to_string() const;
+
+ bool operator ==(const ParsedURL & other) const;
+};
+
+MakeError(BadURL, Error);
+
+std::string percentDecode(std::string_view in);
+
+std::map<std::string, std::string> decodeQuery(const std::string & query);
+
+ParsedURL parseURL(const std::string & url);
+
+// URI stuff.
+const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])";
+const static std::string schemeRegex = "(?:[a-z+]+)";
+const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])";
+const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])";
+const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])";
+const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)";
+const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")";
+const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)";
+const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?";
+const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])";
+const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*";
+const static std::string segmentRegex = "(?:" + pcharRegex + "+)";
+const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)";
+const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)";
+
+// A Git ref (i.e. branch or tag name).
+const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check
+extern std::regex refRegex;
+
+// A Git revision (a SHA-1 commit hash).
+const static std::string revRegexS = "[0-9a-fA-F]{40}";
+extern std::regex revRegex;
+
+// A ref or revision, or a ref followed by a revision.
+const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))";
+
+const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*";
+extern std::regex flakeIdRegex;
+
+}