blob: b923fef65b6f881770e8f71ec742deb7b67fba85 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
#include "shlex.hh"
#include "strings.hh"
namespace nix {
std::vector<std::string> shell_split(const std::string & input)
{
std::vector<std::string> result;
// Hack: `shell_split` is janky and parses ` a` as `{"", "a"}`, so we trim
// whitespace before starting.
auto inputTrimmed = trim(input);
if (inputTrimmed.empty()) {
return result;
}
std::regex whitespace("^\\s+");
auto begin = inputTrimmed.cbegin();
std::string currentToken;
enum State { sBegin, sSingleQuote, sDoubleQuote };
State state = sBegin;
auto iterator = begin;
for (; iterator != inputTrimmed.cend(); ++iterator) {
if (state == sBegin) {
std::smatch match;
if (regex_search(iterator, inputTrimmed.cend(), match, whitespace)) {
currentToken.append(begin, iterator);
result.push_back(currentToken);
iterator = match[0].second;
if (iterator == inputTrimmed.cend()) {
return result;
}
begin = iterator;
currentToken.clear();
}
}
switch (*iterator) {
case '\'':
if (state != sDoubleQuote) {
currentToken.append(begin, iterator);
begin = iterator + 1;
state = state == sBegin ? sSingleQuote : sBegin;
}
break;
case '"':
if (state != sSingleQuote) {
currentToken.append(begin, iterator);
begin = iterator + 1;
state = state == sBegin ? sDoubleQuote : sBegin;
}
break;
case '\\':
if (state != sSingleQuote) {
// perl shellwords mostly just treats the next char as part
// of the string with no special processing
currentToken.append(begin, iterator);
begin = ++iterator;
}
break;
// no other relevant cases; silence exhaustiveness compiler warning
default: break;
}
}
if (state != sBegin) {
throw ShlexError(input);
}
currentToken.append(begin, iterator);
result.push_back(currentToken);
return result;
}
}
|