diff options
author | Maximilian Bosch <maximilian@mbosch.me> | 2024-06-25 12:31:52 +0200 |
---|---|---|
committer | Jade Lovelace <lix@jade.fyi> | 2024-08-01 15:41:30 -0700 |
commit | 87fd1f024c7c979e5d96c41af4ef7e8bdb5792e1 (patch) | |
tree | 4b197ddf8aec8e08208ee5f40e2efab9c48e7e5a /src/libfetchers | |
parent | 6abad7cb238c5c7bf59a83bed55e7590c544fc2e (diff) |
Reapply "libfetchers: make attribute / URL query handling consistent"
The original attempt at this introduced a regression; this commit
reverts the revert and fixes the regression.
This reverts commit 3e151d4d77b5296b9da8c3ad209932d1dfa44c68.
Fix to the regression:
flakeref: fix handling of `?dir=` param for flakes in subdirs
As reported in #419[1], accessing a flake in a subdir of a Git
repository fails with the previous commit[2] applied with the error
error: unsupported Git input attribute 'dir'
The problem is that the `dir`-param is inserted into the parsed URL if a
flake is fetched from the subdir of a Git repository. However, for the
fetching part this isn't even needed. The fix is to just pass `subdir`
as second argument to `FlakeRef` (which needs a `basedir` that can be
empty) and leave the parsedURL as-is.
Added a regression test to make sure we don't run into this again.
[1] https://git.lix.systems/lix-project/lix/issues/419
[2] e22172aaf6b6a366cecd3c025590e68fa2b91bcc,
originally 3e151d4d77b5296b9da8c3ad209932d1dfa44c68
Change-Id: I2c72d5a32e406a7ca308e271730bd0af01c5d18b
Diffstat (limited to 'src/libfetchers')
-rw-r--r-- | src/libfetchers/fetchers.hh | 31 | ||||
-rw-r--r-- | src/libfetchers/git.cc | 17 | ||||
-rw-r--r-- | src/libfetchers/github.cc | 114 | ||||
-rw-r--r-- | src/libfetchers/indirect.cc | 34 | ||||
-rw-r--r-- | src/libfetchers/mercurial.cc | 7 | ||||
-rw-r--r-- | src/libfetchers/tarball.cc | 26 |
6 files changed, 135 insertions, 94 deletions
diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index 2bb4248be..40f2b6294 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -159,6 +159,37 @@ struct InputScheme std::optional<std::string> commitMsg) const; virtual std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) = 0; + +protected: + void emplaceURLQueryIntoAttrs( + const ParsedURL & parsedURL, + Attrs & attrs, + const StringSet & numericParams, + const StringSet & booleanParams) const + { + for (auto &[name, value] : parsedURL.query) { + if (name == "url") { + throw BadURL( + "URL '%s' must not override url via query param!", + parsedURL.to_string() + ); + } else if (numericParams.count(name) != 0) { + if (auto n = string2Int<uint64_t>(value)) { + attrs.insert_or_assign(name, *n); + } else { + throw BadURL( + "URL '%s' has non-numeric parameter '%s'", + parsedURL.to_string(), + name + ); + } + } else if (booleanParams.count(name) != 0) { + attrs.emplace(name, Explicit<bool> { value == "1" }); + } else { + attrs.emplace(name, value); + } + } + } }; void registerInputScheme(std::shared_ptr<InputScheme> && fetcher); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 9fd8d7bbf..8e3165ff6 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -273,18 +273,15 @@ struct GitInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "git"); - - for (auto & [name, value] : url.query) { - if (name == "rev" || name == "ref") - attrs.emplace(name, value); - else if (name == "shallow" || name == "submodules" || name == "allRefs") - attrs.emplace(name, Explicit<bool> { value == "1" }); - else - url2.query.emplace(name, value); - } - attrs.emplace("url", url2.to_string()); + emplaceURLQueryIntoAttrs( + url, + attrs, + {"lastModified", "revCount"}, + {"shallow", "submodules", "allRefs"} + ); + return inputFromAttrs(attrs); } diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 60fefd1f3..b971781ae 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -1,3 +1,4 @@ +#include "attrs.hh" #include "filetransfer.hh" #include "cache.hh" #include "globals.hh" @@ -36,18 +37,11 @@ struct GitArchiveInputScheme : InputScheme auto path = tokenizeString<std::vector<std::string>>(url.path, "/"); - std::optional<Hash> rev; - std::optional<std::string> ref; - std::optional<std::string> host_url; + std::optional<std::string> refOrRev; auto size = path.size(); if (size == 3) { - if (std::regex_match(path[2], revRegex)) - rev = Hash::parseAny(path[2], htSHA1); - else if (std::regex_match(path[2], refRegex)) - ref = path[2]; - else - throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]); + refOrRev = path[2]; } else if (size > 3) { std::string rs; for (auto i = std::next(path.begin(), 2); i != path.end(); i++) { @@ -58,61 +52,91 @@ struct GitArchiveInputScheme : InputScheme } if (std::regex_match(rs, refRegex)) { - ref = rs; + refOrRev = rs; } else { throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs); } } else if (size < 2) throw BadURL("URL '%s' is invalid", url.url); + Attrs attrs; + attrs.emplace("type", type()); + attrs.emplace("owner", path[0]); + attrs.emplace("repo", path[1]); + for (auto &[name, value] : url.query) { - if (name == "rev") { - if (rev) - throw BadURL("URL '%s' contains multiple commit hashes", url.url); - rev = Hash::parseAny(value, htSHA1); - } - else if (name == "ref") { - if (!std::regex_match(value, refRegex)) - throw BadURL("URL '%s' contains an invalid branch/tag name", url.url); - if (ref) - throw BadURL("URL '%s' contains multiple branch/tag names", url.url); - ref = value; - } - else if (name == "host") { - if (!std::regex_match(value, hostRegex)) - throw BadURL("URL '%s' contains an invalid instance host", url.url); - host_url = value; + if (name == "rev" || name == "ref") { + if (refOrRev) { + throw BadURL("URL '%s' already contains a ref or rev", url.url); + } else { + refOrRev = value; + } + } else if (name == "lastModified") { + if (auto n = string2Int<uint64_t>(value)) { + attrs.emplace(name, *n); + } else { + throw Error( + "Attribute 'lastModified' in URL '%s' must be an integer", + url.to_string() + ); + } + } else { + attrs.emplace(name, value); } - // FIXME: barf on unsupported attributes } - if (ref && rev) - throw BadURL("URL '%s' contains both a commit hash and a branch/tag name %s %s", url.url, *ref, rev->gitRev()); + if (refOrRev) attrs.emplace("refOrRev", *refOrRev); - Input input; - input.attrs.insert_or_assign("type", type()); - input.attrs.insert_or_assign("owner", path[0]); - input.attrs.insert_or_assign("repo", path[1]); - if (rev) input.attrs.insert_or_assign("rev", rev->gitRev()); - if (ref) input.attrs.insert_or_assign("ref", *ref); - if (host_url) input.attrs.insert_or_assign("host", *host_url); - - return input; + return inputFromAttrs(attrs); } std::optional<Input> inputFromAttrs(const Attrs & attrs) const override { - if (maybeGetStrAttr(attrs, "type") != type()) return {}; + // Attributes can contain refOrRev and it needs to be figured out + // which one it is (see inputFromURL for when that may happen). + // The correct one (ref or rev) will be written into finalAttrs and + // it needs to be mutable for that. + Attrs finalAttrs(attrs); + auto type_ = maybeGetStrAttr(finalAttrs, "type"); + if (type_ != type()) return {}; + + auto owner = getStrAttr(finalAttrs, "owner"); + auto repo = getStrAttr(finalAttrs, "repo"); + + auto url = fmt("%s:%s/%s", *type_, owner, repo); + if (auto host = maybeGetStrAttr(finalAttrs, "host")) { + if (!std::regex_match(*host, hostRegex)) { + throw BadURL("URL '%s' contains an invalid instance host", url); + } + } - for (auto & [name, value] : attrs) - if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") - throw Error("unsupported input attribute '%s'", name); + if (auto refOrRev = maybeGetStrAttr(finalAttrs, "refOrRev")) { + finalAttrs.erase("refOrRev"); + if (std::regex_match(*refOrRev, revRegex)) { + finalAttrs.emplace("rev", *refOrRev); + } else if (std::regex_match(*refOrRev, refRegex)) { + finalAttrs.emplace("ref", *refOrRev); + } else { + throw Error( + "in URL '%s', '%s' is not a commit hash or a branch/tag name", + url, + *refOrRev + ); + } + } else if (auto ref = maybeGetStrAttr(finalAttrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) { + throw BadURL("URL '%s' contains an invalid branch/tag name", url); + } + } - getStrAttr(attrs, "owner"); - getStrAttr(attrs, "repo"); + for (auto & [name, value] : finalAttrs) { + if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") { + throw Error("unsupported input attribute '%s'", name); + } + } Input input; - input.attrs = attrs; + input.attrs = finalAttrs; return input; } diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index c73505b31..8c0176e84 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -17,6 +17,8 @@ struct IndirectInputScheme : InputScheme std::optional<Hash> rev; std::optional<std::string> ref; + Attrs attrs; + if (path.size() == 1) { } else if (path.size() == 2) { if (std::regex_match(path[1], revRegex)) @@ -26,29 +28,21 @@ struct IndirectInputScheme : InputScheme else throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]); } else if (path.size() == 3) { - if (!std::regex_match(path[1], refRegex)) - throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]); ref = path[1]; - if (!std::regex_match(path[2], revRegex)) - throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]); rev = Hash::parseAny(path[2], htSHA1); } else throw BadURL("GitHub URL '%s' is invalid", url.url); std::string id = path[0]; - if (!std::regex_match(id, flakeRegex)) - throw BadURL("'%s' is not a valid flake ID", id); - // FIXME: forbid query params? + attrs.emplace("type", "indirect"); + attrs.emplace("id", id); + if (rev) attrs.emplace("rev", rev->gitRev()); + if (ref) attrs.emplace("ref", *ref); - Input input; - input.direct = false; - input.attrs.insert_or_assign("type", "indirect"); - input.attrs.insert_or_assign("id", id); - if (rev) input.attrs.insert_or_assign("rev", rev->gitRev()); - if (ref) input.attrs.insert_or_assign("ref", *ref); + emplaceURLQueryIntoAttrs(url, attrs, {}, {}); - return input; + return inputFromAttrs(attrs); } std::optional<Input> inputFromAttrs(const Attrs & attrs) const override @@ -63,6 +57,18 @@ struct IndirectInputScheme : InputScheme if (!std::regex_match(id, flakeRegex)) throw BadURL("'%s' is not a valid flake ID", id); + // TODO come up with a nicer error message for those two. + if (auto rev = maybeGetStrAttr(attrs, "rev")) { + if (!std::regex_match(*rev, revRegex)) { + throw BadURL("in flake '%s', '%s' is not a commit hash", id, *rev); + } + } + if (auto ref = maybeGetStrAttr(attrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) { + throw BadURL("in flake '%s', '%s' is not a valid branch/tag name", id, *ref); + } + } + Input input; input.direct = false; input.attrs = attrs; diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 23cf7b51d..b4150e9df 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -56,12 +56,7 @@ struct MercurialInputScheme : InputScheme Attrs attrs; attrs.emplace("type", "hg"); - for (auto &[name, value] : url.query) { - if (name == "rev" || name == "ref") - attrs.emplace(name, value); - else - url2.query.emplace(name, value); - } + emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); attrs.emplace("url", url2.to_string()); diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 6ce35aeb2..b11665805 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -201,29 +201,17 @@ struct CurlInputScheme : InputScheme if (!isValidURL(_url, requireTree)) return std::nullopt; - Input input; - auto url = _url; - url.scheme = parseUrlScheme(url.scheme).transport; - - auto narHash = url.query.find("narHash"); - if (narHash != url.query.end()) - input.attrs.insert_or_assign("narHash", narHash->second); + Attrs attrs; + attrs.emplace("type", inputType()); - if (auto i = get(url.query, "rev")) - input.attrs.insert_or_assign("rev", *i); - - if (auto i = get(url.query, "revCount")) - if (auto n = string2Int<uint64_t>(*i)) - input.attrs.insert_or_assign("revCount", *n); + url.scheme = parseUrlScheme(url.scheme).transport; - url.query.erase("rev"); - url.query.erase("revCount"); + emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); - input.attrs.insert_or_assign("type", inputType()); - input.attrs.insert_or_assign("url", url.to_string()); - return input; + attrs.emplace("url", url.to_string()); + return inputFromAttrs(attrs); } std::optional<Input> inputFromAttrs(const Attrs & attrs) const override @@ -235,7 +223,7 @@ struct CurlInputScheme : InputScheme std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"}; for (auto & [name, value] : attrs) if (!allowedNames.count(name)) - throw Error("unsupported %s input attribute '%s'", *type, name); + throw Error("unsupported %s input attribute '%s'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }'", *type, name); Input input; input.attrs = attrs; |