diff options
author | Eelco Dolstra <edolstra@gmail.com> | 2022-01-18 19:43:28 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-18 19:43:28 +0100 |
commit | 4af88a4c91e45f1b0f51c5f49c09a4c829d83c39 (patch) | |
tree | 66bb190b48ac5344e6f3cc96a16f3b792132c70c | |
parent | 9901cb96c7c1b40394f3741608b3457729e098db (diff) | |
parent | ad60dfde2af6bcdb77e50562a2f2b28107e28588 (diff) |
Merge pull request #5906 from pennae/primops-optimization
optimize primops and utils by caching more and copying less
-rw-r--r-- | src/libexpr/attr-set.hh | 7 | ||||
-rw-r--r-- | src/libexpr/eval.cc | 5 | ||||
-rw-r--r-- | src/libexpr/eval.hh | 4 | ||||
-rw-r--r-- | src/libexpr/primops.cc | 62 | ||||
-rw-r--r-- | src/libutil/types.hh | 1 | ||||
-rw-r--r-- | src/libutil/util.cc | 43 | ||||
-rw-r--r-- | src/libutil/util.hh | 18 |
7 files changed, 90 insertions, 50 deletions
diff --git a/src/libexpr/attr-set.hh b/src/libexpr/attr-set.hh index 82c348287..3e4899efc 100644 --- a/src/libexpr/attr-set.hh +++ b/src/libexpr/attr-set.hh @@ -121,6 +121,8 @@ class BindingsBuilder Bindings * bindings; public: + // needed by std::back_inserter + using value_type = Attr; EvalState & state; @@ -135,6 +137,11 @@ public: void insert(const Attr & attr) { + push_back(attr); + } + + void push_back(const Attr & attr) + { bindings->push_back(attr); } diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 61bccd6e2..94ffab175 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -425,6 +425,11 @@ EvalState::EvalState( , sDescription(symbols.create("description")) , sSelf(symbols.create("self")) , sEpsilon(symbols.create("")) + , sStartSet(symbols.create("startSet")) + , sOperator(symbols.create("operator")) + , sKey(symbols.create("key")) + , sPath(symbols.create("path")) + , sPrefix(symbols.create("prefix")) , repair(NoRepair) , emptyBindings(0) , store(store) diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index 850c5bae6..82ce9d1b3 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -80,7 +80,8 @@ public: sContentAddressed, sOutputHash, sOutputHashAlgo, sOutputHashMode, sRecurseForDerivations, - sDescription, sSelf, sEpsilon; + sDescription, sSelf, sEpsilon, sStartSet, sOperator, sKey, sPath, + sPrefix; Symbol sDerivationNix; /* If set, force copying files to the Nix store even if they @@ -399,6 +400,7 @@ private: friend struct ExprSelect; friend void prim_getAttr(EvalState & state, const Pos & pos, Value * * args, Value & v); friend void prim_match(EvalState & state, const Pos & pos, Value * * args, Value & v); + friend void prim_split(EvalState & state, const Pos & pos, Value * * args, Value & v); friend struct Value; }; diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index b819918ad..5c476ddd4 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -12,6 +12,8 @@ #include "value-to-xml.hh" #include "primops.hh" +#include <boost/container/small_vector.hpp> + #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> @@ -592,16 +594,16 @@ typedef list<Value *> ValueList; static Bindings::iterator getAttr( EvalState & state, - string funcName, - string attrName, + std::string_view funcName, + Symbol attrSym, Bindings * attrSet, const Pos & pos) { - Bindings::iterator value = attrSet->find(state.symbols.create(attrName)); + Bindings::iterator value = attrSet->find(attrSym); if (value == attrSet->end()) { hintformat errorMsg = hintfmt( "attribute '%s' missing for call to '%s'", - attrName, + attrSym, funcName ); @@ -635,7 +637,7 @@ static void prim_genericClosure(EvalState & state, const Pos & pos, Value * * ar Bindings::iterator startSet = getAttr( state, "genericClosure", - "startSet", + state.sStartSet, args[0]->attrs, pos ); @@ -650,7 +652,7 @@ static void prim_genericClosure(EvalState & state, const Pos & pos, Value * * ar Bindings::iterator op = getAttr( state, "genericClosure", - "operator", + state.sOperator, args[0]->attrs, pos ); @@ -672,7 +674,7 @@ static void prim_genericClosure(EvalState & state, const Pos & pos, Value * * ar state.forceAttrs(*e, pos); Bindings::iterator key = - e->attrs->find(state.symbols.create("key")); + e->attrs->find(state.sKey); if (key == e->attrs->end()) throw EvalError({ .msg = hintfmt("attribute 'key' required"), @@ -1079,10 +1081,10 @@ static void prim_derivationStrict(EvalState & state, const Pos & pos, Value * * } else { auto s = state.coerceToString(*i->pos, *i->value, context, true); drv.env.emplace(key, s); - if (i->name == state.sBuilder) drv.builder = s; - else if (i->name == state.sSystem) drv.platform = s; - else if (i->name == state.sOutputHash) outputHash = s; - else if (i->name == state.sOutputHashAlgo) outputHashAlgo = s; + if (i->name == state.sBuilder) drv.builder = std::move(s); + else if (i->name == state.sSystem) drv.platform = std::move(s); + else if (i->name == state.sOutputHash) outputHash = std::move(s); + else if (i->name == state.sOutputHashAlgo) outputHashAlgo = std::move(s); else if (i->name == state.sOutputHashMode) handleHashMode(s); else if (i->name == state.sOutputs) handleOutputs(tokenizeString<Strings>(s)); @@ -1498,14 +1500,14 @@ static void prim_findFile(EvalState & state, const Pos & pos, Value * * args, Va state.forceAttrs(*v2, pos); string prefix; - Bindings::iterator i = v2->attrs->find(state.symbols.create("prefix")); + Bindings::iterator i = v2->attrs->find(state.sPrefix); if (i != v2->attrs->end()) prefix = state.forceStringNoCtx(*i->value, pos); i = getAttr( state, "findFile", - "path", + state.sPath, v2->attrs, pos ); @@ -2163,7 +2165,10 @@ static void prim_attrValues(EvalState & state, const Pos & pos, Value * * args, v.listElems()[n++] = (Value *) &i; std::sort(v.listElems(), v.listElems() + n, - [](Value * v1, Value * v2) { return (string) ((Attr *) v1)->name < (string) ((Attr *) v2)->name; }); + [](Value * v1, Value * v2) { + std::string_view s1 = ((Attr *) v1)->name, s2 = ((Attr *) v2)->name; + return s1 < s2; + }); for (unsigned int i = 0; i < n; ++i) v.listElems()[i] = ((Attr *) v.listElems()[i])->value; @@ -2184,11 +2189,10 @@ void prim_getAttr(EvalState & state, const Pos & pos, Value * * args, Value & v) { string attr = state.forceStringNoCtx(*args[0], pos); state.forceAttrs(*args[1], pos); - // !!! Should we create a symbol here or just do a lookup? Bindings::iterator i = getAttr( state, "getAttr", - attr, + state.symbols.create(attr), args[1]->attrs, pos ); @@ -2268,21 +2272,25 @@ static void prim_removeAttrs(EvalState & state, const Pos & pos, Value * * args, state.forceAttrs(*args[0], pos); state.forceList(*args[1], pos); - /* Get the attribute names to be removed. */ - std::set<Symbol> names; + /* Get the attribute names to be removed. + We keep them as Attrs instead of Symbols so std::set_difference + can be used to remove them from attrs[0]. */ + boost::container::small_vector<Attr, 64> names; + names.reserve(args[1]->listSize()); for (auto elem : args[1]->listItems()) { state.forceStringNoCtx(*elem, pos); - names.insert(state.symbols.create(elem->string.s)); + names.emplace_back(state.symbols.create(elem->string.s), nullptr); } + std::sort(names.begin(), names.end()); /* Copy all attributes not in that set. Note that we don't need to sort v.attrs because it's a subset of an already sorted vector. */ auto attrs = state.buildBindings(args[0]->attrs->size()); - for (auto & i : *args[0]->attrs) { - if (!names.count(i.name)) - attrs.insert(i); - } + std::set_difference( + args[0]->attrs->begin(), args[0]->attrs->end(), + names.begin(), names.end(), + std::back_inserter(attrs)); v.mkAttrs(attrs.alreadySorted()); } @@ -3520,18 +3528,20 @@ static RegisterPrimOp primop_match({ /* Split a string with a regular expression, and return a list of the non-matching parts interleaved by the lists of the matching groups. */ -static void prim_split(EvalState & state, const Pos & pos, Value * * args, Value & v) +void prim_split(EvalState & state, const Pos & pos, Value * * args, Value & v) { auto re = state.forceStringNoCtx(*args[0], pos); try { - std::regex regex(re, std::regex::extended); + auto regex = state.regexCache->cache.find(re); + if (regex == state.regexCache->cache.end()) + regex = state.regexCache->cache.emplace(re, std::regex(re, std::regex::extended)).first; PathSet context; const std::string str = state.forceString(*args[1], context, pos); - auto begin = std::sregex_iterator(str.begin(), str.end(), regex); + auto begin = std::sregex_iterator(str.begin(), str.end(), regex->second); auto end = std::sregex_iterator(); // Any matches results are surrounded by non-matching results. diff --git a/src/libutil/types.hh b/src/libutil/types.hh index 9c85fef62..8f72c926f 100644 --- a/src/libutil/types.hh +++ b/src/libutil/types.hh @@ -22,6 +22,7 @@ typedef std::map<string, string> StringMap; /* Paths are just strings. */ typedef string Path; +typedef std::string_view PathView; typedef list<Path> Paths; typedef set<Path> PathSet; diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 3e7a9e2f3..1f1f2c861 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -106,16 +106,16 @@ Path absPath(Path path, std::optional<Path> dir, bool resolveSymlinks) } -Path canonPath(const Path & path, bool resolveSymlinks) +Path canonPath(PathView path, bool resolveSymlinks) { assert(path != ""); string s; + s.reserve(256); if (path[0] != '/') throw Error("not an absolute path: '%1%'", path); - string::const_iterator i = path.begin(), end = path.end(); string temp; /* Count the number of times we follow a symlink and stop at some @@ -125,33 +125,37 @@ Path canonPath(const Path & path, bool resolveSymlinks) while (1) { /* Skip slashes. */ - while (i != end && *i == '/') i++; - if (i == end) break; + while (!path.empty() && path[0] == '/') path.remove_prefix(1); + if (path.empty()) break; /* Ignore `.'. */ - if (*i == '.' && (i + 1 == end || i[1] == '/')) - i++; + if (path == "." || path.substr(0, 2) == "./") + path.remove_prefix(1); /* If `..', delete the last component. */ - else if (*i == '.' && i + 1 < end && i[1] == '.' && - (i + 2 == end || i[2] == '/')) + else if (path == ".." || path.substr(0, 3) == "../") { if (!s.empty()) s.erase(s.rfind('/')); - i += 2; + path.remove_prefix(2); } /* Normal component; copy it. */ else { s += '/'; - while (i != end && *i != '/') s += *i++; + if (const auto slash = path.find('/'); slash == string::npos) { + s += path; + path = {}; + } else { + s += path.substr(0, slash); + path = path.substr(slash + 1); + } /* If s points to a symlink, resolve it and continue from there */ if (resolveSymlinks && isLink(s)) { if (++followCount >= maxFollow) throw Error("infinite symlink recursion in path '%1%'", path); - temp = readLink(s) + string(i, end); - i = temp.begin(); - end = temp.end(); + temp = concatStrings(readLink(s), path); + path = temp; if (!temp.empty() && temp[0] == '/') { s.clear(); /* restart for symlinks pointing to absolute path */ } else { @@ -164,7 +168,7 @@ Path canonPath(const Path & path, bool resolveSymlinks) } } - return s.empty() ? "/" : s; + return s.empty() ? "/" : std::move(s); } @@ -1231,23 +1235,22 @@ void _interrupted() ////////////////////////////////////////////////////////////////////// -template<class C> C tokenizeString(std::string_view s, const string & separators) +template<class C> C tokenizeString(std::string_view s, std::string_view separators) { C result; string::size_type pos = s.find_first_not_of(separators, 0); while (pos != string::npos) { string::size_type end = s.find_first_of(separators, pos + 1); if (end == string::npos) end = s.size(); - string token(s, pos, end - pos); - result.insert(result.end(), token); + result.insert(result.end(), string(s, pos, end - pos)); pos = s.find_first_not_of(separators, end); } return result; } -template Strings tokenizeString(std::string_view s, const string & separators); -template StringSet tokenizeString(std::string_view s, const string & separators); -template vector<string> tokenizeString(std::string_view s, const string & separators); +template Strings tokenizeString(std::string_view s, std::string_view separators); +template StringSet tokenizeString(std::string_view s, std::string_view separators); +template vector<string> tokenizeString(std::string_view s, std::string_view separators); string chomp(std::string_view s) diff --git a/src/libutil/util.hh b/src/libutil/util.hh index c900033f8..369c44f78 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -56,7 +56,7 @@ Path absPath(Path path, double or trailing slashes. Optionally resolves all symlink components such that each component of the resulting path is *not* a symbolic link. */ -Path canonPath(const Path & path, bool resolveSymlinks = false); +Path canonPath(PathView path, bool resolveSymlinks = false); /* Return the directory part of the given canonical path, i.e., everything before the final `/'. If the path is the root or an @@ -368,15 +368,19 @@ MakeError(FormatError, Error); /* String tokenizer. */ -template<class C> C tokenizeString(std::string_view s, const string & separators = " \t\n\r"); +template<class C> C tokenizeString(std::string_view s, std::string_view separators = " \t\n\r"); /* Concatenate the given strings with a separator between the elements. */ template<class C> -string concatStringsSep(const string & sep, const C & ss) +string concatStringsSep(const std::string_view sep, const C & ss) { + size_t size = 0; + // need a cast to string_view since this is also called with Symbols + for (const auto & s : ss) size += sep.size() + std::string_view(s).size(); string s; + s.reserve(size); for (auto & i : ss) { if (s.size() != 0) s += sep; s += i; @@ -384,6 +388,14 @@ string concatStringsSep(const string & sep, const C & ss) return s; } +template<class ... Parts> +auto concatStrings(Parts && ... parts) + -> std::enable_if_t<(... && std::is_convertible_v<Parts, std::string_view>), string> +{ + std::string_view views[sizeof...(parts)] = { parts... }; + return concatStringsSep({}, views); +} + /* Add quotes around a collection of strings. */ template<class C> Strings quoteStrings(const C & c) |