src/libutil/shlex.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

#include "shlex.hh"
#include "strings.hh"

namespace nix {

std::vector<std::string> shell_split(const std::string & input)
{
    std::vector<std::string> result;

    // Hack: `shell_split` is janky and parses ` a` as `{"", "a"}`, so we trim
    // whitespace before starting.
    auto inputTrimmed = trim(input);

    if (inputTrimmed.empty()) {
        return result;
    }

    std::regex whitespace("^\\s+");
    auto begin = inputTrimmed.cbegin();
    std::string currentToken;
    enum State { sBegin, sSingleQuote, sDoubleQuote };
    State state = sBegin;
    auto iterator = begin;

    for (; iterator != inputTrimmed.cend(); ++iterator) {
        if (state == sBegin) {
            std::smatch match;
            if (regex_search(iterator, inputTrimmed.cend(), match, whitespace)) {
                currentToken.append(begin, iterator);
                result.push_back(currentToken);
                iterator = match[0].second;
                if (iterator == inputTrimmed.cend()) {
                    return result;
                }
                begin = iterator;
                currentToken.clear();
            }
        }

        switch (*iterator) {
        case '\'':
            if (state != sDoubleQuote) {
                currentToken.append(begin, iterator);
                begin = iterator + 1;
                state = state == sBegin ? sSingleQuote : sBegin;
            }
            break;

        case '"':
            if (state != sSingleQuote) {
                currentToken.append(begin, iterator);
                begin = iterator + 1;
                state = state == sBegin ? sDoubleQuote : sBegin;
            }
            break;

        case '\\':
            if (state != sSingleQuote) {
                // perl shellwords mostly just treats the next char as part
                // of the string with no special processing
                currentToken.append(begin, iterator);
                begin = ++iterator;
            }
            break;
            // no other relevant cases; silence exhaustiveness compiler warning
            default: break;
        }
    }

    if (state != sBegin) {
        throw ShlexError(input);
    }

    currentToken.append(begin, iterator);
    result.push_back(currentToken);
    return result;
}

}