diff options
author | Rebecca Turner <rbt@sent.as> | 2024-03-14 17:44:43 -0700 |
---|---|---|
committer | Rebecca Turner <rbt@sent.as> | 2024-03-26 16:44:04 -0700 |
commit | aee3d639b5096349413021537ae842c8c33ef6cf (patch) | |
tree | ee8557f970f5c477116bd1b77a8b9cacce59d03a /src | |
parent | da22dbc33397c9c6c5d115ce753d5cf11585291e (diff) |
Move `shell_words` into its own file
Change-Id: I34c0ebfb6dcea49bf632d8880e04075335a132bf
Diffstat (limited to 'src')
-rw-r--r-- | src/libutil/meson.build | 2 | ||||
-rw-r--r-- | src/libutil/shlex.cc | 77 | ||||
-rw-r--r-- | src/libutil/shlex.hh | 30 | ||||
-rw-r--r-- | src/nix-build/nix-build.cc | 61 |
4 files changed, 111 insertions, 59 deletions
diff --git a/src/libutil/meson.build b/src/libutil/meson.build index 084d7ed11..91a7b33ba 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -20,6 +20,7 @@ libutil_sources = files( 'position.cc', 'references.cc', 'serialise.cc', + 'shlex.cc', 'signals.cc', 'source-path.cc', 'suggestions.cc', @@ -69,6 +70,7 @@ libutil_headers = files( 'regex-combinators.hh', 'repair-flag.hh', 'serialise.hh', + 'shlex.hh', 'signals.hh', 'source-path.hh', 'split.hh', diff --git a/src/libutil/shlex.cc b/src/libutil/shlex.cc new file mode 100644 index 000000000..b5f340251 --- /dev/null +++ b/src/libutil/shlex.cc @@ -0,0 +1,77 @@ +#include "shlex.hh" +#include "util.hh" + +namespace nix { + +std::vector<std::string> shell_split(const std::string & input) +{ + std::vector<std::string> result; + + // Hack: `shell_split` is janky and parses ` a` as `{"", "a"}`, so we trim + // whitespace before starting. + auto inputTrimmed = trim(input); + + if (inputTrimmed.empty()) { + return result; + } + + std::regex whitespace("^\\s+"); + auto begin = inputTrimmed.cbegin(); + std::string currentToken; + enum State { sBegin, sSingleQuote, sDoubleQuote }; + State state = sBegin; + auto iterator = begin; + + for (; iterator != inputTrimmed.cend(); ++iterator) { + if (state == sBegin) { + std::smatch match; + if (regex_search(iterator, inputTrimmed.cend(), match, whitespace)) { + currentToken.append(begin, iterator); + result.push_back(currentToken); + iterator = match[0].second; + if (iterator == inputTrimmed.cend()) { + return result; + } + begin = iterator; + currentToken.clear(); + } + } + + switch (*iterator) { + case '\'': + if (state != sDoubleQuote) { + currentToken.append(begin, iterator); + begin = iterator + 1; + state = state == sBegin ? sSingleQuote : sBegin; + } + break; + + case '"': + if (state != sSingleQuote) { + currentToken.append(begin, iterator); + begin = iterator + 1; + state = state == sBegin ? sDoubleQuote : sBegin; + } + break; + + case '\\': + if (state != sSingleQuote) { + // perl shellwords mostly just treats the next char as part + // of the string with no special processing + currentToken.append(begin, iterator); + begin = ++iterator; + } + break; + } + } + + if (state != sBegin) { + throw ShlexError(input); + } + + currentToken.append(begin, iterator); + result.push_back(currentToken); + return result; +} + +} diff --git a/src/libutil/shlex.hh b/src/libutil/shlex.hh new file mode 100644 index 000000000..4e7a48597 --- /dev/null +++ b/src/libutil/shlex.hh @@ -0,0 +1,30 @@ +#pragma once + +#include <regex> +#include <string> +#include <vector> + +#include "error.hh" + +namespace nix { + +class ShlexError : public Error +{ +public: + const std::string input; + + ShlexError(const std::string input) + : Error("Failed to parse shell arguments (unterminated quote?): %1%", input) + , input(input) + { + } +}; + +/** + * Parse a string into shell arguments. + * + * Takes care of whitespace, quotes, and backslashes (at least a bit). + */ +std::vector<std::string> shell_split(const std::string & input); + +} // namespace nix diff --git a/src/nix-build/nix-build.cc b/src/nix-build/nix-build.cc index 3928e39a9..9a3994842 100644 --- a/src/nix-build/nix-build.cc +++ b/src/nix-build/nix-build.cc @@ -23,70 +23,13 @@ #include "common-eval-args.hh" #include "attr-path.hh" #include "legacy.hh" +#include "shlex.hh" using namespace nix; using namespace std::string_literals; extern char * * environ __attribute__((weak)); -/* Recreate the effect of the perl shellwords function, breaking up a - * string into arguments like a shell word, including escapes - */ -static std::vector<std::string> shellwords(const std::string & s) -{ - std::regex whitespace("^\\s+"); - auto begin = s.cbegin(); - std::vector<std::string> res; - std::string cur; - enum state { - sBegin, - sSingleQuote, - sDoubleQuote - }; - state st = sBegin; - auto it = begin; - for (; it != s.cend(); ++it) { - if (st == sBegin) { - std::smatch match; - if (regex_search(it, s.cend(), match, whitespace)) { - cur.append(begin, it); - res.push_back(cur); - it = match[0].second; - if (it == s.cend()) return res; - begin = it; - cur.clear(); - } - } - switch (*it) { - case '\'': - if (st != sDoubleQuote) { - cur.append(begin, it); - begin = it + 1; - st = st == sBegin ? sSingleQuote : sBegin; - } - break; - case '"': - if (st != sSingleQuote) { - cur.append(begin, it); - begin = it + 1; - st = st == sBegin ? sDoubleQuote : sBegin; - } - break; - case '\\': - if (st != sSingleQuote) { - /* perl shellwords mostly just treats the next char as part of the string with no special processing */ - cur.append(begin, it); - begin = ++it; - } - break; - } - } - if (st != sBegin) throw Error("unterminated quote in shebang line"); - cur.append(begin, it); - res.push_back(cur); - return res; -} - static void main_nix_build(int argc, char * * argv) { auto dryRun = false; @@ -143,7 +86,7 @@ static void main_nix_build(int argc, char * * argv) line = chomp(line); std::smatch match; if (std::regex_match(line, match, std::regex("^#!\\s*nix-shell\\s+(.*)$"))) - for (const auto & word : shellwords(match[1].str())) + for (const auto & word : shell_split(match[1].str())) args.push_back(word); } } |