aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRebecca Turner <rbt@sent.as>2024-03-14 17:44:43 -0700
committerRebecca Turner <rbt@sent.as>2024-03-26 16:44:04 -0700
commitaee3d639b5096349413021537ae842c8c33ef6cf (patch)
treeee8557f970f5c477116bd1b77a8b9cacce59d03a /src
parentda22dbc33397c9c6c5d115ce753d5cf11585291e (diff)
Move `shell_words` into its own file
Change-Id: I34c0ebfb6dcea49bf632d8880e04075335a132bf
Diffstat (limited to 'src')
-rw-r--r--src/libutil/meson.build2
-rw-r--r--src/libutil/shlex.cc77
-rw-r--r--src/libutil/shlex.hh30
-rw-r--r--src/nix-build/nix-build.cc61
4 files changed, 111 insertions, 59 deletions
diff --git a/src/libutil/meson.build b/src/libutil/meson.build
index 084d7ed11..91a7b33ba 100644
--- a/src/libutil/meson.build
+++ b/src/libutil/meson.build
@@ -20,6 +20,7 @@ libutil_sources = files(
'position.cc',
'references.cc',
'serialise.cc',
+ 'shlex.cc',
'signals.cc',
'source-path.cc',
'suggestions.cc',
@@ -69,6 +70,7 @@ libutil_headers = files(
'regex-combinators.hh',
'repair-flag.hh',
'serialise.hh',
+ 'shlex.hh',
'signals.hh',
'source-path.hh',
'split.hh',
diff --git a/src/libutil/shlex.cc b/src/libutil/shlex.cc
new file mode 100644
index 000000000..b5f340251
--- /dev/null
+++ b/src/libutil/shlex.cc
@@ -0,0 +1,77 @@
+#include "shlex.hh"
+#include "util.hh"
+
+namespace nix {
+
+std::vector<std::string> shell_split(const std::string & input)
+{
+ std::vector<std::string> result;
+
+ // Hack: `shell_split` is janky and parses ` a` as `{"", "a"}`, so we trim
+ // whitespace before starting.
+ auto inputTrimmed = trim(input);
+
+ if (inputTrimmed.empty()) {
+ return result;
+ }
+
+ std::regex whitespace("^\\s+");
+ auto begin = inputTrimmed.cbegin();
+ std::string currentToken;
+ enum State { sBegin, sSingleQuote, sDoubleQuote };
+ State state = sBegin;
+ auto iterator = begin;
+
+ for (; iterator != inputTrimmed.cend(); ++iterator) {
+ if (state == sBegin) {
+ std::smatch match;
+ if (regex_search(iterator, inputTrimmed.cend(), match, whitespace)) {
+ currentToken.append(begin, iterator);
+ result.push_back(currentToken);
+ iterator = match[0].second;
+ if (iterator == inputTrimmed.cend()) {
+ return result;
+ }
+ begin = iterator;
+ currentToken.clear();
+ }
+ }
+
+ switch (*iterator) {
+ case '\'':
+ if (state != sDoubleQuote) {
+ currentToken.append(begin, iterator);
+ begin = iterator + 1;
+ state = state == sBegin ? sSingleQuote : sBegin;
+ }
+ break;
+
+ case '"':
+ if (state != sSingleQuote) {
+ currentToken.append(begin, iterator);
+ begin = iterator + 1;
+ state = state == sBegin ? sDoubleQuote : sBegin;
+ }
+ break;
+
+ case '\\':
+ if (state != sSingleQuote) {
+ // perl shellwords mostly just treats the next char as part
+ // of the string with no special processing
+ currentToken.append(begin, iterator);
+ begin = ++iterator;
+ }
+ break;
+ }
+ }
+
+ if (state != sBegin) {
+ throw ShlexError(input);
+ }
+
+ currentToken.append(begin, iterator);
+ result.push_back(currentToken);
+ return result;
+}
+
+}
diff --git a/src/libutil/shlex.hh b/src/libutil/shlex.hh
new file mode 100644
index 000000000..4e7a48597
--- /dev/null
+++ b/src/libutil/shlex.hh
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <regex>
+#include <string>
+#include <vector>
+
+#include "error.hh"
+
+namespace nix {
+
+class ShlexError : public Error
+{
+public:
+ const std::string input;
+
+ ShlexError(const std::string input)
+ : Error("Failed to parse shell arguments (unterminated quote?): %1%", input)
+ , input(input)
+ {
+ }
+};
+
+/**
+ * Parse a string into shell arguments.
+ *
+ * Takes care of whitespace, quotes, and backslashes (at least a bit).
+ */
+std::vector<std::string> shell_split(const std::string & input);
+
+} // namespace nix
diff --git a/src/nix-build/nix-build.cc b/src/nix-build/nix-build.cc
index 3928e39a9..9a3994842 100644
--- a/src/nix-build/nix-build.cc
+++ b/src/nix-build/nix-build.cc
@@ -23,70 +23,13 @@
#include "common-eval-args.hh"
#include "attr-path.hh"
#include "legacy.hh"
+#include "shlex.hh"
using namespace nix;
using namespace std::string_literals;
extern char * * environ __attribute__((weak));
-/* Recreate the effect of the perl shellwords function, breaking up a
- * string into arguments like a shell word, including escapes
- */
-static std::vector<std::string> shellwords(const std::string & s)
-{
- std::regex whitespace("^\\s+");
- auto begin = s.cbegin();
- std::vector<std::string> res;
- std::string cur;
- enum state {
- sBegin,
- sSingleQuote,
- sDoubleQuote
- };
- state st = sBegin;
- auto it = begin;
- for (; it != s.cend(); ++it) {
- if (st == sBegin) {
- std::smatch match;
- if (regex_search(it, s.cend(), match, whitespace)) {
- cur.append(begin, it);
- res.push_back(cur);
- it = match[0].second;
- if (it == s.cend()) return res;
- begin = it;
- cur.clear();
- }
- }
- switch (*it) {
- case '\'':
- if (st != sDoubleQuote) {
- cur.append(begin, it);
- begin = it + 1;
- st = st == sBegin ? sSingleQuote : sBegin;
- }
- break;
- case '"':
- if (st != sSingleQuote) {
- cur.append(begin, it);
- begin = it + 1;
- st = st == sBegin ? sDoubleQuote : sBegin;
- }
- break;
- case '\\':
- if (st != sSingleQuote) {
- /* perl shellwords mostly just treats the next char as part of the string with no special processing */
- cur.append(begin, it);
- begin = ++it;
- }
- break;
- }
- }
- if (st != sBegin) throw Error("unterminated quote in shebang line");
- cur.append(begin, it);
- res.push_back(cur);
- return res;
-}
-
static void main_nix_build(int argc, char * * argv)
{
auto dryRun = false;
@@ -143,7 +86,7 @@ static void main_nix_build(int argc, char * * argv)
line = chomp(line);
std::smatch match;
if (std::regex_match(line, match, std::regex("^#!\\s*nix-shell\\s+(.*)$")))
- for (const auto & word : shellwords(match[1].str()))
+ for (const auto & word : shell_split(match[1].str()))
args.push_back(word);
}
}