diff options
author | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2014-11-25 11:47:06 +0100 |
---|---|---|
committer | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2014-11-25 11:47:06 +0100 |
commit | 976df480c918f050608f7a23a4a21415c43475c3 (patch) | |
tree | 41463834cb5e30bd50c719f6ccaa8ecdb3a8a976 | |
parent | 4e340a983f928973d3915455d46a4bbadbc3269c (diff) |
Add a primop for regular expression pattern matching
The function ‘builtins.match’ takes a POSIX extended regular
expression and an arbitrary string. It returns ‘null’ if the string
does not match the regular expression. Otherwise, it returns a list
containing substring matches corresponding to parenthesis groups in
the regex. The regex must match the entire string (i.e. there is an
implied "^<pat>$" around the regex). For example:
match "foo" "foobar" => null
match "foo" "foo" => []
match "f(o+)(.*)" "foooobar" => ["oooo" "bar"]
match "(.*/)?([^/]*)" "/dir/file.nix" => ["/dir/" "file.nix"]
match "(.*/)?([^/]*)" "file.nix" => [null "file.nix"]
The following example finds all regular files with extension .nix or
.patch underneath the current directory:
let
findFiles = pat: dir: concatLists (mapAttrsToList (name: type:
if type == "directory" then
findFiles pat (dir + "/" + name)
else if type == "regular" && match pat name != null then
[(dir + "/" + name)]
else []) (readDir dir));
in findFiles ".*\\.(nix|patch)" (toString ./.)
-rw-r--r-- | src/libexpr/primops.cc | 30 | ||||
-rw-r--r-- | src/libutil/regex.cc | 23 | ||||
-rw-r--r-- | src/libutil/regex.hh | 9 | ||||
-rw-r--r-- | tests/lang/eval-okay-regex-match.exp | 1 | ||||
-rw-r--r-- | tests/lang/eval-okay-regex-match.nix | 26 |
5 files changed, 84 insertions, 5 deletions
diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index ed50c8091..b0596dad9 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1430,7 +1430,34 @@ static void prim_hashString(EvalState & state, const Pos & pos, Value * * args, string s = state.forceString(*args[1], context, pos); mkString(v, printHash(hashString(ht, s)), context); -}; +} + + +/* Match a regular expression against a string and return either + ‘null’ or a list containing substring matches. */ +static void prim_match(EvalState & state, const Pos & pos, Value * * args, Value & v) +{ + Regex regex(state.forceStringNoCtx(*args[0], pos), true); + + PathSet context; + string s = state.forceString(*args[1], context, pos); + + Regex::Subs subs; + if (!regex.matches(s, subs)) { + mkNull(v); + return; + } + + unsigned int len = subs.empty() ? 0 : subs.rbegin()->first + 1; + state.mkList(v, len); + for (unsigned int n = 0; n < len; ++n) { + auto i = subs.find(n); + if (i == subs.end()) + mkNull(*(v.list.elems[n] = state.allocValue())); + else + mkString(*(v.list.elems[n] = state.allocValue()), i->second); + } +} /************************************************************* @@ -1584,6 +1611,7 @@ void EvalState::createBaseEnv() addPrimOp("__unsafeDiscardStringContext", 1, prim_unsafeDiscardStringContext); addPrimOp("__unsafeDiscardOutputDependency", 1, prim_unsafeDiscardOutputDependency); addPrimOp("__hashString", 2, prim_hashString); + addPrimOp("__match", 2, prim_match); // Versions addPrimOp("__parseDrvName", 1, prim_parseDrvName); diff --git a/src/libutil/regex.cc b/src/libutil/regex.cc index 36c8458ce..84274b3e1 100644 --- a/src/libutil/regex.cc +++ b/src/libutil/regex.cc @@ -1,13 +1,16 @@ #include "regex.hh" #include "types.hh" +#include <algorithm> + namespace nix { -Regex::Regex(const string & pattern) +Regex::Regex(const string & pattern, bool subs) { /* Patterns must match the entire string. */ - int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), REG_NOSUB | REG_EXTENDED); - if (err) throw Error(format("compiling pattern ‘%1%’: %2%") % pattern % showError(err)); + int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), (subs ? 0 : REG_NOSUB) | REG_EXTENDED); + if (err) throw RegexError(format("compiling pattern ‘%1%’: %2%") % pattern % showError(err)); + nrParens = subs ? std::count(pattern.begin(), pattern.end(), '(') : 0; } Regex::~Regex() @@ -23,6 +26,20 @@ bool Regex::matches(const string & s) throw Error(format("matching string ‘%1%’: %2%") % s % showError(err)); } +bool Regex::matches(const string & s, Subs & subs) +{ + regmatch_t pmatch[nrParens + 2]; + int err = regexec(&preg, s.c_str(), nrParens + 2, pmatch, 0); + if (err == 0) { + for (unsigned int n = 2; n < nrParens + 2; ++n) + if (pmatch[n].rm_eo != -1) + subs[n - 2] = string(s, pmatch[n].rm_so, pmatch[n].rm_eo - pmatch[n].rm_so); + return true; + } + else if (err == REG_NOMATCH) return false; + throw Error(format("matching string ‘%1%’: %2%") % s % showError(err)); +} + string Regex::showError(int err) { char buf[256]; diff --git a/src/libutil/regex.hh b/src/libutil/regex.hh index aa012b721..53e31f4ed 100644 --- a/src/libutil/regex.hh +++ b/src/libutil/regex.hh @@ -5,16 +5,23 @@ #include <sys/types.h> #include <regex.h> +#include <map> + namespace nix { +MakeError(RegexError, Error) + class Regex { public: - Regex(const string & pattern); + Regex(const string & pattern, bool subs = false); ~Regex(); bool matches(const string & s); + typedef std::map<unsigned int, string> Subs; + bool matches(const string & s, Subs & subs); private: + unsigned nrParens; regex_t preg; string showError(int err); }; diff --git a/tests/lang/eval-okay-regex-match.exp b/tests/lang/eval-okay-regex-match.exp new file mode 100644 index 000000000..27ba77dda --- /dev/null +++ b/tests/lang/eval-okay-regex-match.exp @@ -0,0 +1 @@ +true diff --git a/tests/lang/eval-okay-regex-match.nix b/tests/lang/eval-okay-regex-match.nix new file mode 100644 index 000000000..ae6501532 --- /dev/null +++ b/tests/lang/eval-okay-regex-match.nix @@ -0,0 +1,26 @@ +with builtins; + +let + + matches = pat: s: match pat s != null; + + splitFN = match "((.*)/)?([^/]*)\\.(nix|cc)"; + +in + +assert matches "foobar" "foobar"; +assert matches "fo*" "f"; +assert !matches "fo+" "f"; +assert matches "fo*" "fo"; +assert matches "fo*" "foo"; +assert matches "fo+" "foo"; +assert matches "fo{1,2}" "foo"; +assert !matches "fo{1,2}" "fooo"; +assert !matches "fo*" "foobar"; + +assert match "(.*)\\.nix" "foobar.nix" == [ "foobar" ]; + +assert splitFN "/path/to/foobar.nix" == [ "/path/to/" "/path/to" "foobar" "nix" ]; +assert splitFN "foobar.cc" == [ null null "foobar" "cc" ]; + +true |