aboutsummaryrefslogtreecommitdiff
path: root/src/libstore
diff options
context:
space:
mode:
authorEelco Dolstra <edolstra@gmail.com>2020-02-03 15:27:26 +0100
committerEelco Dolstra <edolstra@gmail.com>2020-02-03 15:27:26 +0100
commita2628b43bbfe4368a3b5963e8b80eb6f463d94c3 (patch)
treec72b9fa978113ea550e20ae0b00e3829f4bdf25a /src/libstore
parentd070e1c5321b43496f1113198e62b2b647433459 (diff)
Fix URL parser
Fixes #3062.
Diffstat (limited to 'src/libstore')
-rw-r--r--src/libstore/fetchers/parse.cc18
-rw-r--r--src/libstore/fetchers/regex.hh24
2 files changed, 22 insertions, 20 deletions
diff --git a/src/libstore/fetchers/parse.cc b/src/libstore/fetchers/parse.cc
index dc1b3efe6..4f7cb3c6b 100644
--- a/src/libstore/fetchers/parse.cc
+++ b/src/libstore/fetchers/parse.cc
@@ -11,24 +11,22 @@ std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript);
ParsedURL parseURL(const std::string & url)
{
static std::regex uriRegex(
- "(((" + schemeRegex + "):"
- + "(//(" + authorityRegex + "))?"
- + "(" + pathRegex + "))"
+ "((" + schemeRegex + "):"
+ + "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))"
+ "(?:\\?(" + queryRegex + "))?"
- + "(?:#(" + queryRegex + "))?"
- + ")",
+ + "(?:#(" + queryRegex + "))?",
std::regex::ECMAScript);
std::smatch match;
if (std::regex_match(url, match, uriRegex)) {
- auto & base = match[2];
- std::string scheme = match[3];
+ auto & base = match[1];
+ std::string scheme = match[2];
auto authority = match[4].matched
? std::optional<std::string>(match[5]) : std::nullopt;
- std::string path = match[6];
- auto & query = match[7];
- auto & fragment = match[8];
+ std::string path = match[4].matched ? match[4] : match[5];
+ auto & query = match[6];
+ auto & fragment = match[7];
auto isFile = scheme.find("file") != std::string::npos;
diff --git a/src/libstore/fetchers/regex.hh b/src/libstore/fetchers/regex.hh
index 504d7bf18..e0989edfc 100644
--- a/src/libstore/fetchers/regex.hh
+++ b/src/libstore/fetchers/regex.hh
@@ -5,16 +5,20 @@
namespace nix::fetchers {
// URI stuff.
-const static std::string pctEncoded = "%[0-9a-fA-F][0-9a-fA-F]";
-const static std::string schemeRegex = "[a-z+]+";
-const static std::string authorityRegex =
- "(?:(?:[a-z])*@)?"
- "[a-zA-Z0-9._~-]*";
-const static std::string segmentRegex = "[a-zA-Z0-9._~-]+";
-const static std::string pathRegex = "(?:/?" + segmentRegex + "(?:/" + segmentRegex + ")*|/?)";
-const static std::string pcharRegex =
- "(?:[a-zA-Z0-9-._~!$&'\"()*+,;=:@ ]|" + pctEncoded + ")";
-const static std::string queryRegex = "(?:" + pcharRegex + "|[/?])*";
+const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])";
+const static std::string schemeRegex = "(?:[a-z+]+)";
+const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])";
+const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])";
+const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])";
+const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)";
+const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")";
+const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)";
+const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?";
+const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])";
+const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*";
+const static std::string segmentRegex = "(?:" + pcharRegex + "+)";
+const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)";
+const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)";
// A Git ref (i.e. branch or tag name).
const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check