From dd3d6a04d6e4e6e343d7ae8546ba2f7207082d29 Mon Sep 17 00:00:00 2001
From: Aria Shrimpton <me@aria.rip>
Date: Thu, 7 Mar 2024 20:53:17 +0000
Subject: update analysis code

---
 analysis/vis.livemd | 71 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 27 deletions(-)

(limited to 'analysis')

diff --git a/analysis/vis.livemd b/analysis/vis.livemd
index 001b53d..613b419 100644
--- a/analysis/vis.livemd
+++ b/analysis/vis.livemd
@@ -94,8 +94,8 @@ cost_models
 ## Cost model exploratory plots
 
 ```elixir
-startn = 200
-endn = 2000
+startn = 100
+endn = 5000
 resolution = 50
 
 points_for = fn impl, op ->
@@ -125,8 +125,11 @@ end
 <!-- livebook:{"reevaluate_automatically":true} -->
 
 ```elixir
-inspect_op = "insert"
-impls = ["BTreeSet", "EagerSortedVec", "EagerUniqueVec", "HashSet"]
+# inspect_op = "insert"
+# impls = ["BTreeSet", "EagerSortedVec", "HashSet"]
+
+inspect_op = "pop"
+impls = ["Vec", "LinkedList"]
 
 Tucan.layers([
   cost_models
@@ -178,7 +181,7 @@ raw_benchmarks =
         bench_id: bench <> "/" <> subbench,
         proj: String.split(bench, "-") |> hd,
         using:
-          Regex.scan(~r/\"(\w*)\", ([\w:]*)/, Path.basename(dir))
+          Regex.scan(~r/\"(\w*)\", ([^)]*)/, Path.basename(dir))
           |> Enum.map(fn [_, ctn, impl] -> %{ctn: ctn, impl: impl} end),
         mean: raw_results["mean"]["point_estimate"] / 10 ** 9
       }
@@ -190,20 +193,34 @@ raw_benchmarks =
 
 ```elixir
 # Aggregate benchmark results by project, since we can only do assignments by project
-uniq_proj_using =
-  DF.select(raw_benchmarks, ["proj", "using"])
-  |> DF.to_rows()
-  |> Enum.uniq()
-  |> DF.new()
 
-uniq_proj_using
-|> DF.mutate(vals: DF.filter(^raw_benchmarks, proj == proj and using == using))
+# Can't group by lists, so we need to do this weird shit
 
-# |> Enum.map(fn %{"proj" => proj, "using" => using} ->
-#   DF.filter(raw_benchmarks, proj == ^proj and using == ^using)
-#   # |> DF.summarise()
-# end)
-# |> DF.concat_rows()
+# Build list of using values to index into
+usings =
+  raw_benchmarks["using"]
+  |> SE.to_list()
+  |> Enum.uniq()
+
+benchmarks =
+  raw_benchmarks
+  # Make a column corresponding to using that isn't a list
+  |> DF.put(
+    "using_idx",
+    raw_benchmarks["using"]
+    |> SE.to_list()
+    |> Enum.map(fn using -> Enum.find_index(usings, &(&1 == using)) end)
+  )
+  # Get the total benchmark time for each project and assignment
+  |> DF.group_by(["proj", "using_idx"])
+  |> DF.summarise(time: sum(mean))
+  # Convert using_idx back to original using values
+  |> DF.to_rows()
+  |> Enum.map(fn row = %{"using_idx" => using_idx} ->
+    Map.put(row, "using", Enum.at(usings, using_idx))
+  end)
+  |> DF.new()
+  |> DF.select(["proj", "time", "using"])
 ```
 
 ```elixir
@@ -261,9 +278,9 @@ cost_of_assignment.("example_stack", [%{"ctn" => "StackCon", "impl" => "std::vec
 estimated_costs =
   benchmarks
   |> DF.to_rows_stream()
-  |> Enum.map(fn %{"bench_id" => bench_id, "proj" => proj, "using" => using} ->
+  |> Enum.map(fn %{"proj" => proj, "using" => using} ->
     %{
-      bench_id: bench_id,
+      proj: proj,
       using: using,
       estimated_cost: cost_of_assignment.(proj, using)
     }
@@ -275,27 +292,27 @@ estimated_costs =
 # Compare each assignments position in the estimates to its position in the results
 sorted_estimates =
   estimated_costs
-  |> DF.group_by(["bench_id"])
+  |> DF.group_by(["proj"])
   |> DF.sort_by(estimated_cost)
 
 sorted_results =
   benchmarks
-  |> DF.group_by(["bench_id"])
-  |> DF.sort_by(mean)
+  |> DF.group_by(["proj"])
+  |> DF.sort_by(time)
 
 position_comparison =
   sorted_estimates
   |> DF.to_rows_stream()
-  |> Enum.map(fn %{"bench_id" => bench_id, "using" => using} ->
+  |> Enum.map(fn %{"proj" => proj, "using" => using} ->
     %{
-      bench_id: bench_id,
+      proj: proj,
       using: using,
       pos_estimate:
-        DF.filter(sorted_estimates, bench_id == ^bench_id)["using"]
+        DF.filter(sorted_estimates, proj == ^proj)["using"]
         |> SE.to_list()
         |> Enum.find_index(fn u -> u == using end),
       pos_results:
-        DF.filter(sorted_results, bench_id == ^bench_id)["using"]
+        DF.filter(sorted_results, proj == ^proj)["using"]
         |> SE.to_list()
         |> Enum.find_index(fn u -> u == using end)
     }
@@ -312,7 +329,7 @@ position_comparison
 ```elixir
 position_comparison
 |> DF.filter(pos_estimate == 0)
-|> DF.select(["bench_id", "using"])
+|> DF.select(["proj", "using"])
 ```
 
 <!-- livebook:{"reevaluate_automatically":true} -->
-- 
cgit v1.2.3