aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThéophane Hufschmitt <7226587+thufschmitt@users.noreply.github.com>2023-01-02 15:22:38 +0100
committerGitHub <noreply@github.com>2023-01-02 15:22:38 +0100
commit9af16c5f742300e831a2cc400e43df1e22f87f31 (patch)
treeeb7315785dca9f78700a89ac33270a9ffba908d6
parent9c05b80db0320e560fdc0b2c65752c7cdeae46f3 (diff)
parent336908cf4ccb9708398f4bdc398e90c02a400a04 (diff)
Merge pull request #5941 from hercules-ci/optimize-intersectAttrs
Optimize intersectAttrs performance
-rw-r--r--src/libexpr/primops.cc64
-rw-r--r--tests/lang/eval-okay-intersectAttrs.exp1
-rw-r--r--tests/lang/eval-okay-intersectAttrs.nix50
3 files changed, 109 insertions, 6 deletions
diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc
index 7cad041af..07faa9dd3 100644
--- a/src/libexpr/primops.cc
+++ b/src/libexpr/primops.cc
@@ -2448,12 +2448,62 @@ static void prim_intersectAttrs(EvalState & state, const PosIdx pos, Value * * a
state.forceAttrs(*args[0], pos);
state.forceAttrs(*args[1], pos);
- auto attrs = state.buildBindings(std::min(args[0]->attrs->size(), args[1]->attrs->size()));
-
- for (auto & i : *args[0]->attrs) {
- Bindings::iterator j = args[1]->attrs->find(i.name);
- if (j != args[1]->attrs->end())
- attrs.insert(*j);
+ Bindings &left = *args[0]->attrs;
+ Bindings &right = *args[1]->attrs;
+
+ auto attrs = state.buildBindings(std::min(left.size(), right.size()));
+
+ // The current implementation has good asymptotic complexity and is reasonably
+ // simple. Further optimization may be possible, but does not seem productive,
+ // considering the state of eval performance in 2022.
+ //
+ // I have looked for reusable and/or standard solutions and these are my
+ // findings:
+ //
+ // STL
+ // ===
+ // std::set_intersection is not suitable, as it only performs a simultaneous
+ // linear scan; not taking advantage of random access. This is O(n + m), so
+ // linear in the largest set, which is not acceptable for callPackage in Nixpkgs.
+ //
+ // Simultaneous scan, with alternating simple binary search
+ // ===
+ // One alternative algorithm scans the attrsets simultaneously, jumping
+ // forward using `lower_bound` in case of inequality. This should perform
+ // well on very similar sets, having a local and predictable access pattern.
+ // On dissimilar sets, it seems to need more comparisons than the current
+ // algorithm, as few consecutive attrs match. `lower_bound` could take
+ // advantage of the decreasing remaining search space, but this causes
+ // the medians to move, which can mean that they don't stay in the cache
+ // like they would with the current naive `find`.
+ //
+ // Double binary search
+ // ===
+ // The optimal algorithm may be "Double binary search", which doesn't
+ // scan at all, but rather divides both sets simultaneously.
+ // See "Fast Intersection Algorithms for Sorted Sequences" by Baeza-Yates et al.
+ // https://cs.uwaterloo.ca/~ajsaling/papers/intersection_alg_app10.pdf
+ // The only downsides I can think of are not having a linear access pattern
+ // for similar sets, and having to maintain a more intricate algorithm.
+ //
+ // Adaptive
+ // ===
+ // Finally one could run try a simultaneous scan, count misses and fall back
+ // to double binary search when the counter hit some threshold and/or ratio.
+
+ if (left.size() < right.size()) {
+ for (auto & l : left) {
+ Bindings::iterator r = right.find(l.name);
+ if (r != right.end())
+ attrs.insert(*r);
+ }
+ }
+ else {
+ for (auto & r : right) {
+ Bindings::iterator l = left.find(r.name);
+ if (l != left.end())
+ attrs.insert(r);
+ }
}
v.mkAttrs(attrs.alreadySorted());
@@ -2465,6 +2515,8 @@ static RegisterPrimOp primop_intersectAttrs({
.doc = R"(
Return a set consisting of the attributes in the set *e2* which have the
same name as some attribute in *e1*.
+
+ Performs in O(*n* log *m*) where *n* is the size of the smaller set and *m* the larger set's size.
)",
.fun = prim_intersectAttrs,
});
diff --git a/tests/lang/eval-okay-intersectAttrs.exp b/tests/lang/eval-okay-intersectAttrs.exp
new file mode 100644
index 000000000..50445bc0e
--- /dev/null
+++ b/tests/lang/eval-okay-intersectAttrs.exp
@@ -0,0 +1 @@
+[ { } { a = 1; } { a = 1; } { a = "a"; } { m = 1; } { m = "m"; } { n = 1; } { n = "n"; } { n = 1; p = 2; } { n = "n"; p = "p"; } { n = 1; p = 2; } { n = "n"; p = "p"; } { a = "a"; b = "b"; c = "c"; d = "d"; e = "e"; f = "f"; g = "g"; h = "h"; i = "i"; j = "j"; k = "k"; l = "l"; m = "m"; n = "n"; o = "o"; p = "p"; q = "q"; r = "r"; s = "s"; t = "t"; u = "u"; v = "v"; w = "w"; x = "x"; y = "y"; z = "z"; } true ]
diff --git a/tests/lang/eval-okay-intersectAttrs.nix b/tests/lang/eval-okay-intersectAttrs.nix
new file mode 100644
index 000000000..39d49938c
--- /dev/null
+++ b/tests/lang/eval-okay-intersectAttrs.nix
@@ -0,0 +1,50 @@
+let
+ alphabet =
+ { a = "a";
+ b = "b";
+ c = "c";
+ d = "d";
+ e = "e";
+ f = "f";
+ g = "g";
+ h = "h";
+ i = "i";
+ j = "j";
+ k = "k";
+ l = "l";
+ m = "m";
+ n = "n";
+ o = "o";
+ p = "p";
+ q = "q";
+ r = "r";
+ s = "s";
+ t = "t";
+ u = "u";
+ v = "v";
+ w = "w";
+ x = "x";
+ y = "y";
+ z = "z";
+ };
+ foo = {
+ inherit (alphabet) f o b a r z q u x;
+ aa = throw "aa";
+ };
+ alphabetFail = builtins.mapAttrs throw alphabet;
+in
+[ (builtins.intersectAttrs { a = abort "l1"; } { b = abort "r1"; })
+ (builtins.intersectAttrs { a = abort "l2"; } { a = 1; })
+ (builtins.intersectAttrs alphabetFail { a = 1; })
+ (builtins.intersectAttrs { a = abort "laa"; } alphabet)
+ (builtins.intersectAttrs alphabetFail { m = 1; })
+ (builtins.intersectAttrs { m = abort "lam"; } alphabet)
+ (builtins.intersectAttrs alphabetFail { n = 1; })
+ (builtins.intersectAttrs { n = abort "lan"; } alphabet)
+ (builtins.intersectAttrs alphabetFail { n = 1; p = 2; })
+ (builtins.intersectAttrs { n = abort "lan2"; p = abort "lap"; } alphabet)
+ (builtins.intersectAttrs alphabetFail { n = 1; p = 2; })
+ (builtins.intersectAttrs { n = abort "lan2"; p = abort "lap"; } alphabet)
+ (builtins.intersectAttrs alphabetFail alphabet)
+ (builtins.intersectAttrs alphabet foo == builtins.intersectAttrs foo alphabet)
+]