From 57c3c48f6660f905fb974cff7ec58f746a1a6970 Mon Sep 17 00:00:00 2001 From: Aria Shrimpton Date: Mon, 25 Mar 2024 14:01:37 +0000 Subject: cleanup analysis code --- analysis/vis.livemd | 139 ++++++++++++++++++++++++++++------------------------ 1 file changed, 76 insertions(+), 63 deletions(-) (limited to 'analysis') diff --git a/analysis/vis.livemd b/analysis/vis.livemd index 360e2fe..4913e76 100644 --- a/analysis/vis.livemd +++ b/analysis/vis.livemd @@ -88,42 +88,8 @@ cost_model_points = |> DF.concat_rows() ``` -```elixir -cost_models -|> DF.filter(op == "contains") -``` - ## Cost model exploratory plots -```elixir -startn = 1 -endn = 5000 -resolution = 1 - -points_for = fn impl, op -> - %{"coeffs" => [coeffs]} = - DF.filter(cost_models, impl == ^impl and op == ^op) - |> DF.to_columns() - - Enum.map(startn..endn//resolution, fn n -> - t = - (coeffs - |> Enum.take(3) - |> Enum.with_index() - |> Enum.map(fn {coeff, idx} -> coeff * n ** idx end) - |> Enum.sum()) + Enum.at(coeffs, 3) * Math.log2(n) - - %{ - impl: String.split(impl, "::") |> List.last(), - op: op, - n: n, - t: max(t, 0) - } - end) - |> DF.new() -end -``` - ```elixir @@ -131,36 +97,83 @@ set_impls = ["BTreeSet", "HashSet", "VecSet", "SortedVecSet"] mapping_impls = ["HashMap", "BTreeMap", "VecMap", "SortedVecMap"] other_impls = ["Vec", "LinkedList", "SortedVec"] -inspect_op = "remove" -impls = ["Vec", "LinkedList"] +impls = other_impls -Tucan.layers([ - cost_models - |> DF.filter(op == ^inspect_op) - |> DF.distinct(["impl"]) - |> DF.to_rows() - |> Enum.map(fn %{"impl" => impl} -> points_for.(impl, inspect_op) end) - |> DF.concat_rows() - |> DF.filter(impl in ^impls) - |> Tucan.lineplot("n", "t", color_by: "impl", clip: true) - # Tucan.scatter( - # cost_model_points - # |> DF.filter(op == ^inspect_op and impl in ^impls) - # |> DF.group_by(["impl", "n"]), - # # |> DF.summarise(t: mean(t)) - # "n", - # "t", - # color_by: "impl", - # clip: true - # ) -]) -|> Tucan.Axes.set_y_title("Estimated cost") -|> Tucan.Axes.set_x_title("Size of container (n)") -|> Tucan.Scale.set_x_domain(startn, endn) -# |> Tucan.Scale.set_y_domain(0, 10_000) -|> Tucan.set_size(500, 250) -|> Tucan.Legend.set_title(:color, "Implementation") -|> Tucan.Legend.set_orientation(:color, "bottom") +defmodule CostModel do + @defaults %{y_domain: nil, ns: 1..60_000//100, draw_points: true} + + def points_for(cost_models, ns, impl, op) do + %{"coeffs" => [coeffs]} = + DF.filter(cost_models, impl == ^impl and op == ^op) + |> DF.to_columns() + + Enum.map(ns, fn n -> + t = + (coeffs + |> Enum.take(3) + |> Enum.with_index() + |> Enum.map(fn {coeff, idx} -> coeff * n ** idx end) + |> Enum.sum()) + Enum.at(coeffs, 3) * Math.log2(n) + + %{ + impl: String.split(impl, "::") |> List.last(), + op: op, + n: n, + t: max(t, 0) + } + end) + |> DF.new() + end + + def plot(cost_models, cost_model_points, impls, op, opts \\ []) do + %{y_domain: y_domain, ns: ns, draw_points: draw_points} = Enum.into(opts, @defaults) + + plot = + Tucan.layers( + [ + cost_models + |> DF.filter(op == ^op) + |> DF.distinct(["impl"]) + |> DF.to_rows() + |> Enum.map(fn %{"impl" => impl} -> points_for(cost_models, ns, impl, op) end) + |> DF.concat_rows() + |> DF.filter(impl in ^impls) + |> Tucan.lineplot("n", "t", color_by: "impl", clip: true) + ] ++ + if(draw_points, + do: [ + Tucan.scatter( + cost_model_points + |> DF.filter(op == ^op and impl in ^impls) + |> DF.group_by(["impl", "n"]) + |> DF.summarise(t: mean(t)), + "n", + "t", + color_by: "impl", + clip: true + ) + ], + else: [] + ) + ) + + plot = + plot + |> Tucan.Axes.set_y_title("Estimated cost") + |> Tucan.Axes.set_x_title("Size of container (n)") + |> Tucan.set_size(500, 250) + |> Tucan.Legend.set_title(:color, "Implementation") + + case y_domain do + [lo, hi] -> Tucan.Scale.set_y_domain(plot, lo, hi) + _ -> plot + end + end +end +``` + +```elixir +CostModel.plot(cost_models, cost_model_points, other_impls, "remove") ``` ## Read benchmark data -- cgit v1.2.3