aboutsummaryrefslogtreecommitdiff
path: root/src/libutil/url.cc
diff options
context:
space:
mode:
authorEelco Dolstra <edolstra@gmail.com>2020-03-30 16:04:18 +0200
committerEelco Dolstra <edolstra@gmail.com>2020-04-07 09:03:14 +0200
commit462421d34588c227eb736b07f7b40a38aa0972a6 (patch)
tree73778f8392f303eb001b95f2c7f0a99b24f06478 /src/libutil/url.cc
parentebb20a5356af023498506324bd0f88a99175e295 (diff)
Backport libfetchers from the flakes branch
This provides a pluggable mechanism for defining new fetchers. It adds a builtin function 'fetchTree' that generalizes existing fetchers like 'fetchGit', 'fetchMercurial' and 'fetchTarball'. 'fetchTree' takes a set of attributes, e.g. fetchTree { type = "git"; url = "https://example.org/repo.git"; ref = "some-branch"; rev = "abcdef..."; } The existing fetchers are just wrappers around this. Note that the input attributes to fetchTree are the same as flake input specifications and flake lock file entries. All fetchers share a common cache stored in ~/.cache/nix/fetcher-cache-v1.sqlite. This replaces the ad hoc caching mechanisms in fetchGit and download.cc (e.g. ~/.cache/nix/{tarballs,git-revs*}). This also adds support for Git worktrees (c169ea59049f861aaba429f48b828d0820b74d1d).
Diffstat (limited to 'src/libutil/url.cc')
-rw-r--r--src/libutil/url.cc137
1 files changed, 137 insertions, 0 deletions
diff --git a/src/libutil/url.cc b/src/libutil/url.cc
new file mode 100644
index 000000000..5d5328e5d
--- /dev/null
+++ b/src/libutil/url.cc
@@ -0,0 +1,137 @@
+#include "url.hh"
+#include "util.hh"
+
+namespace nix {
+
+std::regex refRegex(refRegexS, std::regex::ECMAScript);
+std::regex revRegex(revRegexS, std::regex::ECMAScript);
+std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript);
+
+ParsedURL parseURL(const std::string & url)
+{
+ static std::regex uriRegex(
+ "((" + schemeRegex + "):"
+ + "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))"
+ + "(?:\\?(" + queryRegex + "))?"
+ + "(?:#(" + queryRegex + "))?",
+ std::regex::ECMAScript);
+
+ std::smatch match;
+
+ if (std::regex_match(url, match, uriRegex)) {
+ auto & base = match[1];
+ std::string scheme = match[2];
+ auto authority = match[3].matched
+ ? std::optional<std::string>(match[3]) : std::nullopt;
+ std::string path = match[4].matched ? match[4] : match[5];
+ auto & query = match[6];
+ auto & fragment = match[7];
+
+ auto isFile = scheme.find("file") != std::string::npos;
+
+ if (authority && *authority != "" && isFile)
+ throw Error("file:// URL '%s' has unexpected authority '%s'",
+ url, *authority);
+
+ if (isFile && path.empty())
+ path = "/";
+
+ return ParsedURL{
+ .url = url,
+ .base = base,
+ .scheme = scheme,
+ .authority = authority,
+ .path = path,
+ .query = decodeQuery(query),
+ .fragment = percentDecode(std::string(fragment))
+ };
+ }
+
+ else
+ throw BadURL("'%s' is not a valid URL", url);
+}
+
+std::string percentDecode(std::string_view in)
+{
+ std::string decoded;
+ for (size_t i = 0; i < in.size(); ) {
+ if (in[i] == '%') {
+ if (i + 2 >= in.size())
+ throw BadURL("invalid URI parameter '%s'", in);
+ try {
+ decoded += std::stoul(std::string(in, i + 1, 2), 0, 16);
+ i += 3;
+ } catch (...) {
+ throw BadURL("invalid URI parameter '%s'", in);
+ }
+ } else
+ decoded += in[i++];
+ }
+ return decoded;
+}
+
+std::map<std::string, std::string> decodeQuery(const std::string & query)
+{
+ std::map<std::string, std::string> result;
+
+ for (auto s : tokenizeString<Strings>(query, "&")) {
+ auto e = s.find('=');
+ if (e != std::string::npos)
+ result.emplace(
+ s.substr(0, e),
+ percentDecode(std::string_view(s).substr(e + 1)));
+ }
+
+ return result;
+}
+
+std::string percentEncode(std::string_view s)
+{
+ std::string res;
+ for (auto & c : s)
+ if ((c >= 'a' && c <= 'z')
+ || (c >= 'A' && c <= 'Z')
+ || (c >= '0' && c <= '9')
+ || strchr("-._~!$&'()*+,;=:@", c))
+ res += c;
+ else
+ res += fmt("%%%02x", (unsigned int) c);
+ return res;
+}
+
+std::string encodeQuery(const std::map<std::string, std::string> & ss)
+{
+ std::string res;
+ bool first = true;
+ for (auto & [name, value] : ss) {
+ if (!first) res += '&';
+ first = false;
+ res += percentEncode(name);
+ res += '=';
+ res += percentEncode(value);
+ }
+ return res;
+}
+
+std::string ParsedURL::to_string() const
+{
+ return
+ scheme
+ + ":"
+ + (authority ? "//" + *authority : "")
+ + path
+ + (query.empty() ? "" : "?" + encodeQuery(query))
+ + (fragment.empty() ? "" : "#" + percentEncode(fragment));
+}
+
+bool ParsedURL::operator ==(const ParsedURL & other) const
+{
+ return
+ scheme == other.scheme
+ && authority == other.authority
+ && path == other.path
+ && query == other.query
+ && fragment == other.fragment;
+}
+
+}