diff options
Diffstat (limited to 'src/libutil')
-rw-r--r-- | src/libutil/meson.build | 2 | ||||
-rw-r--r-- | src/libutil/shlex.cc | 77 | ||||
-rw-r--r-- | src/libutil/shlex.hh | 30 |
3 files changed, 109 insertions, 0 deletions
diff --git a/src/libutil/meson.build b/src/libutil/meson.build index 66eba9d85..8e4b5211d 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -20,6 +20,7 @@ libutil_sources = files( 'position.cc', 'references.cc', 'serialise.cc', + 'shlex.cc', 'signals.cc', 'source-path.cc', 'suggestions.cc', @@ -71,6 +72,7 @@ libutil_headers = files( 'regex-combinators.hh', 'repair-flag.hh', 'serialise.hh', + 'shlex.hh', 'signals.hh', 'source-path.hh', 'split.hh', diff --git a/src/libutil/shlex.cc b/src/libutil/shlex.cc new file mode 100644 index 000000000..b5f340251 --- /dev/null +++ b/src/libutil/shlex.cc @@ -0,0 +1,77 @@ +#include "shlex.hh" +#include "util.hh" + +namespace nix { + +std::vector<std::string> shell_split(const std::string & input) +{ + std::vector<std::string> result; + + // Hack: `shell_split` is janky and parses ` a` as `{"", "a"}`, so we trim + // whitespace before starting. + auto inputTrimmed = trim(input); + + if (inputTrimmed.empty()) { + return result; + } + + std::regex whitespace("^\\s+"); + auto begin = inputTrimmed.cbegin(); + std::string currentToken; + enum State { sBegin, sSingleQuote, sDoubleQuote }; + State state = sBegin; + auto iterator = begin; + + for (; iterator != inputTrimmed.cend(); ++iterator) { + if (state == sBegin) { + std::smatch match; + if (regex_search(iterator, inputTrimmed.cend(), match, whitespace)) { + currentToken.append(begin, iterator); + result.push_back(currentToken); + iterator = match[0].second; + if (iterator == inputTrimmed.cend()) { + return result; + } + begin = iterator; + currentToken.clear(); + } + } + + switch (*iterator) { + case '\'': + if (state != sDoubleQuote) { + currentToken.append(begin, iterator); + begin = iterator + 1; + state = state == sBegin ? sSingleQuote : sBegin; + } + break; + + case '"': + if (state != sSingleQuote) { + currentToken.append(begin, iterator); + begin = iterator + 1; + state = state == sBegin ? sDoubleQuote : sBegin; + } + break; + + case '\\': + if (state != sSingleQuote) { + // perl shellwords mostly just treats the next char as part + // of the string with no special processing + currentToken.append(begin, iterator); + begin = ++iterator; + } + break; + } + } + + if (state != sBegin) { + throw ShlexError(input); + } + + currentToken.append(begin, iterator); + result.push_back(currentToken); + return result; +} + +} diff --git a/src/libutil/shlex.hh b/src/libutil/shlex.hh new file mode 100644 index 000000000..4e7a48597 --- /dev/null +++ b/src/libutil/shlex.hh @@ -0,0 +1,30 @@ +#pragma once + +#include <regex> +#include <string> +#include <vector> + +#include "error.hh" + +namespace nix { + +class ShlexError : public Error +{ +public: + const std::string input; + + ShlexError(const std::string input) + : Error("Failed to parse shell arguments (unterminated quote?): %1%", input) + , input(input) + { + } +}; + +/** + * Parse a string into shell arguments. + * + * Takes care of whitespace, quotes, and backslashes (at least a bit). + */ +std::vector<std::string> shell_split(const std::string & input); + +} // namespace nix |