aboutsummaryrefslogtreecommitdiff
path: root/analysis
diff options
context:
space:
mode:
authorAria Shrimpton <me@aria.rip>2024-03-25 14:01:37 +0000
committerAria Shrimpton <me@aria.rip>2024-03-25 15:41:42 +0000
commit57c3c48f6660f905fb974cff7ec58f746a1a6970 (patch)
treedb50f95d044e1f18fc481402888fdff5c24d540c /analysis
parent11885b43e9b0e854459f0ef1b2aaeaad27fa24d2 (diff)
cleanup analysis code
Diffstat (limited to 'analysis')
-rw-r--r--analysis/vis.livemd139
1 files changed, 76 insertions, 63 deletions
diff --git a/analysis/vis.livemd b/analysis/vis.livemd
index 360e2fe..4913e76 100644
--- a/analysis/vis.livemd
+++ b/analysis/vis.livemd
@@ -88,42 +88,8 @@ cost_model_points =
|> DF.concat_rows()
```
-```elixir
-cost_models
-|> DF.filter(op == "contains")
-```
-
## Cost model exploratory plots
-```elixir
-startn = 1
-endn = 5000
-resolution = 1
-
-points_for = fn impl, op ->
- %{"coeffs" => [coeffs]} =
- DF.filter(cost_models, impl == ^impl and op == ^op)
- |> DF.to_columns()
-
- Enum.map(startn..endn//resolution, fn n ->
- t =
- (coeffs
- |> Enum.take(3)
- |> Enum.with_index()
- |> Enum.map(fn {coeff, idx} -> coeff * n ** idx end)
- |> Enum.sum()) + Enum.at(coeffs, 3) * Math.log2(n)
-
- %{
- impl: String.split(impl, "::") |> List.last(),
- op: op,
- n: n,
- t: max(t, 0)
- }
- end)
- |> DF.new()
-end
-```
-
<!-- livebook:{"reevaluate_automatically":true} -->
```elixir
@@ -131,36 +97,83 @@ set_impls = ["BTreeSet", "HashSet", "VecSet", "SortedVecSet"]
mapping_impls = ["HashMap", "BTreeMap", "VecMap", "SortedVecMap"]
other_impls = ["Vec", "LinkedList", "SortedVec"]
-inspect_op = "remove"
-impls = ["Vec", "LinkedList"]
+impls = other_impls
-Tucan.layers([
- cost_models
- |> DF.filter(op == ^inspect_op)
- |> DF.distinct(["impl"])
- |> DF.to_rows()
- |> Enum.map(fn %{"impl" => impl} -> points_for.(impl, inspect_op) end)
- |> DF.concat_rows()
- |> DF.filter(impl in ^impls)
- |> Tucan.lineplot("n", "t", color_by: "impl", clip: true)
- # Tucan.scatter(
- # cost_model_points
- # |> DF.filter(op == ^inspect_op and impl in ^impls)
- # |> DF.group_by(["impl", "n"]),
- # # |> DF.summarise(t: mean(t))
- # "n",
- # "t",
- # color_by: "impl",
- # clip: true
- # )
-])
-|> Tucan.Axes.set_y_title("Estimated cost")
-|> Tucan.Axes.set_x_title("Size of container (n)")
-|> Tucan.Scale.set_x_domain(startn, endn)
-# |> Tucan.Scale.set_y_domain(0, 10_000)
-|> Tucan.set_size(500, 250)
-|> Tucan.Legend.set_title(:color, "Implementation")
-|> Tucan.Legend.set_orientation(:color, "bottom")
+defmodule CostModel do
+ @defaults %{y_domain: nil, ns: 1..60_000//100, draw_points: true}
+
+ def points_for(cost_models, ns, impl, op) do
+ %{"coeffs" => [coeffs]} =
+ DF.filter(cost_models, impl == ^impl and op == ^op)
+ |> DF.to_columns()
+
+ Enum.map(ns, fn n ->
+ t =
+ (coeffs
+ |> Enum.take(3)
+ |> Enum.with_index()
+ |> Enum.map(fn {coeff, idx} -> coeff * n ** idx end)
+ |> Enum.sum()) + Enum.at(coeffs, 3) * Math.log2(n)
+
+ %{
+ impl: String.split(impl, "::") |> List.last(),
+ op: op,
+ n: n,
+ t: max(t, 0)
+ }
+ end)
+ |> DF.new()
+ end
+
+ def plot(cost_models, cost_model_points, impls, op, opts \\ []) do
+ %{y_domain: y_domain, ns: ns, draw_points: draw_points} = Enum.into(opts, @defaults)
+
+ plot =
+ Tucan.layers(
+ [
+ cost_models
+ |> DF.filter(op == ^op)
+ |> DF.distinct(["impl"])
+ |> DF.to_rows()
+ |> Enum.map(fn %{"impl" => impl} -> points_for(cost_models, ns, impl, op) end)
+ |> DF.concat_rows()
+ |> DF.filter(impl in ^impls)
+ |> Tucan.lineplot("n", "t", color_by: "impl", clip: true)
+ ] ++
+ if(draw_points,
+ do: [
+ Tucan.scatter(
+ cost_model_points
+ |> DF.filter(op == ^op and impl in ^impls)
+ |> DF.group_by(["impl", "n"])
+ |> DF.summarise(t: mean(t)),
+ "n",
+ "t",
+ color_by: "impl",
+ clip: true
+ )
+ ],
+ else: []
+ )
+ )
+
+ plot =
+ plot
+ |> Tucan.Axes.set_y_title("Estimated cost")
+ |> Tucan.Axes.set_x_title("Size of container (n)")
+ |> Tucan.set_size(500, 250)
+ |> Tucan.Legend.set_title(:color, "Implementation")
+
+ case y_domain do
+ [lo, hi] -> Tucan.Scale.set_y_domain(plot, lo, hi)
+ _ -> plot
+ end
+ end
+end
+```
+
+```elixir
+CostModel.plot(cost_models, cost_model_points, other_impls, "remove")
```
## Read benchmark data