diff options
author | Eelco Dolstra <edolstra@gmail.com> | 2020-02-03 15:27:26 +0100 |
---|---|---|
committer | Eelco Dolstra <edolstra@gmail.com> | 2020-02-03 15:27:26 +0100 |
commit | a2628b43bbfe4368a3b5963e8b80eb6f463d94c3 (patch) | |
tree | c72b9fa978113ea550e20ae0b00e3829f4bdf25a /src/libstore | |
parent | d070e1c5321b43496f1113198e62b2b647433459 (diff) |
Fix URL parser
Fixes #3062.
Diffstat (limited to 'src/libstore')
-rw-r--r-- | src/libstore/fetchers/parse.cc | 18 | ||||
-rw-r--r-- | src/libstore/fetchers/regex.hh | 24 |
2 files changed, 22 insertions, 20 deletions
diff --git a/src/libstore/fetchers/parse.cc b/src/libstore/fetchers/parse.cc index dc1b3efe6..4f7cb3c6b 100644 --- a/src/libstore/fetchers/parse.cc +++ b/src/libstore/fetchers/parse.cc @@ -11,24 +11,22 @@ std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript); ParsedURL parseURL(const std::string & url) { static std::regex uriRegex( - "(((" + schemeRegex + "):" - + "(//(" + authorityRegex + "))?" - + "(" + pathRegex + "))" + "((" + schemeRegex + "):" + + "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))" + "(?:\\?(" + queryRegex + "))?" - + "(?:#(" + queryRegex + "))?" - + ")", + + "(?:#(" + queryRegex + "))?", std::regex::ECMAScript); std::smatch match; if (std::regex_match(url, match, uriRegex)) { - auto & base = match[2]; - std::string scheme = match[3]; + auto & base = match[1]; + std::string scheme = match[2]; auto authority = match[4].matched ? std::optional<std::string>(match[5]) : std::nullopt; - std::string path = match[6]; - auto & query = match[7]; - auto & fragment = match[8]; + std::string path = match[4].matched ? match[4] : match[5]; + auto & query = match[6]; + auto & fragment = match[7]; auto isFile = scheme.find("file") != std::string::npos; diff --git a/src/libstore/fetchers/regex.hh b/src/libstore/fetchers/regex.hh index 504d7bf18..e0989edfc 100644 --- a/src/libstore/fetchers/regex.hh +++ b/src/libstore/fetchers/regex.hh @@ -5,16 +5,20 @@ namespace nix::fetchers { // URI stuff. -const static std::string pctEncoded = "%[0-9a-fA-F][0-9a-fA-F]"; -const static std::string schemeRegex = "[a-z+]+"; -const static std::string authorityRegex = - "(?:(?:[a-z])*@)?" - "[a-zA-Z0-9._~-]*"; -const static std::string segmentRegex = "[a-zA-Z0-9._~-]+"; -const static std::string pathRegex = "(?:/?" + segmentRegex + "(?:/" + segmentRegex + ")*|/?)"; -const static std::string pcharRegex = - "(?:[a-zA-Z0-9-._~!$&'\"()*+,;=:@ ]|" + pctEncoded + ")"; -const static std::string queryRegex = "(?:" + pcharRegex + "|[/?])*"; +const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])"; +const static std::string schemeRegex = "(?:[a-z+]+)"; +const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])"; +const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])"; +const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])"; +const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)"; +const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")"; +const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)"; +const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?"; +const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])"; +const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*"; +const static std::string segmentRegex = "(?:" + pcharRegex + "+)"; +const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)"; +const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)"; // A Git ref (i.e. branch or tag name). const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check |