From 40029fd036a45b7ea9447f158fe3133257107c16 Mon Sep 17 00:00:00 2001
From: Aria Shrimpton <me@aria.rip>
Date: Thu, 7 Mar 2024 22:26:22 +0000
Subject: cleanup analysis notebook a bit

---
 analysis/vis.livemd | 54 +++++++++++++++++++++++++++--------------------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/analysis/vis.livemd b/analysis/vis.livemd
index 613b419..9f5a58d 100644
--- a/analysis/vis.livemd
+++ b/analysis/vis.livemd
@@ -1,3 +1,5 @@
+<!-- livebook:{"app_settings":{"slug":"asdf"}} -->
+
 # Dissertation Visualisations
 
 ```elixir
@@ -160,6 +162,7 @@ Tucan.layers([
 ## Read benchmark data
 
 ```elixir
+# Read in the results of every individual criterion benchmark
 raw_benchmarks =
   File.ls!(criterion_dir)
   |> Enum.map(fn name ->
@@ -193,8 +196,11 @@ raw_benchmarks =
 
 ```elixir
 # Aggregate benchmark results by project, since we can only do assignments by project
-
-# Can't group by lists, so we need to do this weird shit
+# Unfortunately we can't group by lists, so we need to do some weird shit.
+# This is basically equivalent to:
+# benchmarks = raw_benchmarks
+# |> DF.group_by(["proj", "using"])
+# |> DF.summarise(time: sum(mean))
 
 # Build list of using values to index into
 usings =
@@ -223,6 +229,8 @@ benchmarks =
   |> DF.select(["proj", "time", "using"])
 ```
 
+## Read cost estimate data
+
 ```elixir
 # Cost estimates by project, ctn, and implementation
 projs = SE.distinct(benchmarks["proj"])
@@ -261,7 +269,20 @@ cost_estimates =
 ```
 
 ```elixir
-# Get cost of assignment from cost estimates
+# Double-check that we have all of the cost estimates for everything mentioned in the assignments
+estimate_impls = SE.distinct(cost_estimates["impl"])
+
+true =
+  (benchmarks
+   |> DF.explode("using")
+   |> DF.unnest("using"))["impl"]
+  |> SE.distinct()
+  |> SE.to_list()
+  |> Enum.all?(fn impl -> SE.equal(estimate_impls, impl) |> SE.any?() end)
+```
+
+```elixir
+# Gets the cost of assignment from cost estimates
 cost_of_assignment = fn proj, assignment ->
   assignment
   |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} ->
@@ -274,7 +295,7 @@ cost_of_assignment.("example_stack", [%{"ctn" => "StackCon", "impl" => "std::vec
 ```
 
 ```elixir
-# Estimate cost for each benchmarked assignment
+# For each benchmarked assignment, estimate the cost.
 estimated_costs =
   benchmarks
   |> DF.to_rows_stream()
@@ -288,6 +309,8 @@ estimated_costs =
   |> DF.new()
 ```
 
+## Estimates vs results
+
 ```elixir
 # Compare each assignments position in the estimates to its position in the results
 sorted_estimates =
@@ -321,29 +344,8 @@ position_comparison =
 ```
 
 ```elixir
+# Everywhere we predicted wrong.
 position_comparison
 |> DF.filter(pos_estimate != pos_results)
 |> DF.collect()
 ```
-
-```elixir
-position_comparison
-|> DF.filter(pos_estimate == 0)
-|> DF.select(["proj", "using"])
-```
-
-<!-- livebook:{"reevaluate_automatically":true} -->
-
-```elixir
-# Difference in execution time between worst and best selection
-Tucan.bar(
-  benchmarks
-  |> DF.group_by("bench_id")
-  |> DF.summarise(range: max(mean) - min(mean)),
-  "bench_id",
-  "range",
-  orient: :horizontal,
-  clip: true
-)
-|> Tucan.Scale.set_x_domain(0, 5)
-```
-- 
cgit v1.2.3