1 files changed, 69 insertions, 36 deletions
diff --git a/analysis/vis.livemd b/analysis/vis.livemd
index 0ef8597..dd8a593 100644
--- a/analysis/vis.livemd
+++ b/analysis/vis.livemd
@@ -13,9 +13,10 @@ Mix.install([
 ])
 ```
 
-## Variables
+## Setup
 
 ```elixir
+# Some common variables
 require Explorer.DataFrame
 require Explorer.Series
 require VegaLite
@@ -28,7 +29,11 @@ cm_dir = Path.join([job_dir, "candelabra", "benchmark_results"])
 criterion_dir = Path.join(job_dir, "criterion")
 ```
 
-## Read cost model data
+<!-- livebook:{"branch_parent_index":0} -->
+
+## Cost models
+
+We read in the cost models from the JSON output.
 
 ```elixir
 {:ok, cost_model_files} = File.ls(cm_dir)
@@ -37,13 +42,12 @@ cost_model_files =
   cost_model_files
   |> Enum.map(fn fname -> Path.join(cm_dir, fname) |> Path.absname() end)
 
+# Should be one for each library implementation
 cost_model_files
 ```
 
-<!-- livebook:{"reevaluate_automatically":true} -->
-
 ```elixir
-# Parse cost model information
+# Find the coefficients, ie the actual cost models
 cost_models =
   cost_model_files
   |> Enum.map(fn fname ->
@@ -65,7 +69,7 @@ cost_models =
 ```
 
 ```elixir
-# Parse cost model information
+# Get the raw data points
 cost_model_points =
   cost_model_files
   |> Enum.map(fn fname ->
@@ -100,7 +104,7 @@ cost_model_points =
   |> DF.mutate(t: cast(t, {:duration, :nanosecond}))
 ```
 
-## Cost model exploratory plots
+We can now plot our graphs. The below module provides most of the code, with cells below it specifying our actual graphs.
 
 ```elixir
 defmodule CostModel do
@@ -119,11 +123,14 @@ defmodule CostModel do
                "LinkedList"
              ])
 
+  #  Make the names in the legends shorter and more readable
   def friendly_impl_name(impl) do
     String.split(impl, "::") |> List.last()
   end
 
+  # Get a dataframe of points lying on the cost model, one point for each of `ns`.
   def points_for(cost_models, ns, impl, op) do
+    # Get coefficients
     %{"coeffs" => [coeffs]} =
       DF.filter(cost_models, impl == ^impl and op == ^op)
       |> DF.to_columns()
@@ -146,11 +153,13 @@ defmodule CostModel do
     |> DF.new()
   end
 
+  # Plot the specified cost model, optionally specifying the x/y domains and omitting points
   def plot(cost_models, cost_model_points, impls, op, opts \\ []) do
     %{y_domain: y_domain, ns: ns, draw_points: draw_points} = Enum.into(opts, @defaults)
 
     plot =
       Tucan.layers(
+        # The actual cost model function
         [
           cost_models
           |> DF.filter(op == ^op)
@@ -163,6 +172,7 @@ defmodule CostModel do
           |> Tucan.lineplot("n", "t", color_by: "impl", clip: true)
         ] ++
           if(draw_points,
+            # The raw points, if necessary
             do: [
               cost_model_points
               |> DF.filter(op == ^op and impl in ^impls)
@@ -180,6 +190,7 @@ defmodule CostModel do
           )
       )
 
+    # Adjust x/y domain and set title, etc
     plot =
       plot
       |> Tucan.Axes.set_y_title("Estimated cost")
@@ -194,16 +205,18 @@ defmodule CostModel do
     end
   end
 
+  # Plot the cost models for `op` across all implementations, grouped by the 2D array `impl_splits`
   def split_plot(cost_models, cost_model_points, impl_splits, op) do
-    @all_impls = List.flatten(impl_splits) |> Enum.sort()
-
     Enum.map(impl_splits, &plot(cost_models, cost_model_points, &1, op))
     |> Tucan.vconcat()
+    # Ensures we don't share a legend for them all
     |> VegaLite.resolve(:scale, color: :independent)
   end
 end
 ```
 
+Below are our actual graphs, which are displayed and exported to JSON files in the thesis directory.
+
 <!-- livebook:{"reevaluate_automatically":true} -->
 
 ```elixir
@@ -266,7 +279,25 @@ VegaLite.Export.save!(graph, "../thesis/assets/contains.json")
 graph
 ```
 
-## Read benchmark data
+The below block can be used to inspect the cost models of certain operations and implementations
+
+```elixir
+impls = ["SortedVec", "SortedVecSet", "SortedVecMap"]
+op = "insert"
+
+CostModel.plot(
+  cost_models,
+  cost_model_points,
+  impls,
+  op
+)
+```
+
+<!-- livebook:{"branch_parent_index":0} -->
+
+## Benchmarks
+
+We read in benchmark data from criterion's JSON output.
 
 ```elixir
 # Read in the results of every individual criterion benchmark
@@ -307,10 +338,7 @@ raw_benchmarks =
 ```
 
 ```elixir
-# `using` is a list of structs, but we aren't gonna make use of this mostly
-# and we want to be able to group by that column, so add a new column that's just a nice
-# string representation
-# also parse out the n value, which all of our benchmarks have
+# Helper function for making the `using` field look nicer
 display_using = fn using ->
   using
   |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> ctn <> "=" <> impl end)
@@ -353,7 +381,7 @@ benchmarks =
   |> DF.select(["proj", "time", "using"])
 ```
 
-## Read cost estimate data
+We read our cost estimates from the log output.
 
 ```elixir
 # Cost estimates by project, ctn, and implementation
@@ -405,6 +433,8 @@ true =
   |> Enum.all?(&SE.any?(SE.equal(estimate_impls, &1)))
 ```
 
+We then find the estimated cost of every assignment that we benchmarked
+
 ```elixir
 # Gets the cost of assignment from cost estimates
 cost_of_assignment = fn proj, assignment ->
@@ -433,7 +463,9 @@ estimated_costs =
   |> DF.new()
 ```
 
-## Estimates vs results (ignoring adaptive containers)
+Now we can compare our benchmark results to our estimated costs.
+
+We first filter out adaptive containers, to later consider them separately.
 
 ```elixir
 # Don't worry about adaptive containers for now
@@ -457,14 +489,6 @@ DF.n_rows(singular_benchmarks)
 ```
 
 ```elixir
-display_using = fn using ->
-  using
-  |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> ctn <> "=" <> impl end)
-  |> Enum.join(", ")
-end
-```
-
-```elixir
 # Tools for printing out latex
 defmodule Latex do
   def escape_latex(val) do
@@ -495,6 +519,8 @@ defmodule Latex do
 end
 ```
 
+Compare the fastest and slowest assignments for each project
+
 ```elixir
 singular_benchmarks
 |> DF.group_by("proj")
@@ -507,12 +533,14 @@ singular_benchmarks
   "spread" => "Maximum slowdown (ms)",
   "slowdown" => "Maximum relative slowdown"
 })
-|> Latex.table()
-|> IO.puts()
+
+# |> Latex.table()
+# |> IO.puts()
 ```
 
+Compare the predicted and actual best implementation for each container type
+
 ```elixir
-# Best and predicted best implementation for each container type
 selection_comparison =
   singular_benchmarks
   |> DF.explode("using")
@@ -531,8 +559,6 @@ selection_comparison =
 ```
 
 ```elixir
-Latex.table(selection_comparison)
-
 selection_comparison
 |> DF.put(
   "best_impl",
@@ -556,11 +582,12 @@ selection_comparison
   "best_impl" => "Best implementation",
   "predicted_impl" => "Predicted best"
 })
-|> Latex.table()
-|> IO.puts()
+
+# |> Latex.table()
+# |> IO.puts()
 ```
 
-## Adaptive Containers
+We now look at adaptive containers, starting by seeing when they get suggested
 
 ```elixir
 # Container types where an adaptive container was suggested
@@ -572,7 +599,7 @@ adaptive_suggestions =
   |> DF.distinct(["proj", "ctn", "impl"])
 
 adaptive_suggestions
-# hacky but oh well
+# Hacky way to make things look nicer
 |> DF.mutate(impl: replace(impl, "std::collections::", ""))
 |> DF.mutate(impl: replace(impl, "std::vec::", ""))
 |> DF.mutate(impl: replace(impl, "primrose_library::", ""))
@@ -582,10 +609,13 @@ adaptive_suggestions
   "ctn" => "Container Type",
   "impl" => "Suggestion"
 })
-|> Latex.table()
-|> IO.puts()
+
+# |> Latex.table()
+# |> IO.puts()
 ```
 
+Get benchmarks for projects we suggested an adaptive container for, and find the benchmark 'size' as a new column
+
 ```elixir
 adaptive_projs = DF.distinct(adaptive_suggestions, ["proj"])["proj"]
 adaptive_estimated_costs = estimated_costs |> DF.filter(proj in ^adaptive_projs)
@@ -609,6 +639,8 @@ adaptive_raw_benchmarks =
   )
 ```
 
+We then summarise the results for each benchmark size, for assignments that either involve an adaptive container or are the best possible assignment
+
 ```elixir
 format_dur = fn dur ->
   String.split(to_string(dur), " ") |> hd
@@ -629,7 +661,6 @@ best_usings =
   |> DF.filter(using == best_using or contains(using, "until"))
 
 # summary data point
-
 best_usings =
   best_usings
   |> DF.put("mean", SE.transform(best_usings["mean"], format_dur))
@@ -638,6 +669,8 @@ best_usings =
   |> DF.select(["proj", "using", "n", "value"])
 ```
 
+Finally, we print them out per-project for clarity
+
 ```elixir
 for proj <- SE.distinct(best_usings["proj"]) |> SE.to_enum() do
   best_usings