aboutsummaryrefslogtreecommitdiff
path: root/analysis/vis.livemd
diff options
context:
space:
mode:
authorAria Shrimpton <me@aria.rip>2024-03-30 17:34:18 +0000
committerAria Shrimpton <me@aria.rip>2024-03-30 17:34:56 +0000
commit0260147246507960bb86fa088eb986bc773dd824 (patch)
treec552b34556c06c4556f2431640aee5e6c9bc4563 /analysis/vis.livemd
parentca565e3d32c0815722302331edbc59ce8a1ca9b8 (diff)
notebook cleanup
Diffstat (limited to 'analysis/vis.livemd')
-rw-r--r--analysis/vis.livemd105
1 files changed, 69 insertions, 36 deletions
diff --git a/analysis/vis.livemd b/analysis/vis.livemd
index 0ef8597..dd8a593 100644
--- a/analysis/vis.livemd
+++ b/analysis/vis.livemd
@@ -13,9 +13,10 @@ Mix.install([
])
```
-## Variables
+## Setup
```elixir
+# Some common variables
require Explorer.DataFrame
require Explorer.Series
require VegaLite
@@ -28,7 +29,11 @@ cm_dir = Path.join([job_dir, "candelabra", "benchmark_results"])
criterion_dir = Path.join(job_dir, "criterion")
```
-## Read cost model data
+<!-- livebook:{"branch_parent_index":0} -->
+
+## Cost models
+
+We read in the cost models from the JSON output.
```elixir
{:ok, cost_model_files} = File.ls(cm_dir)
@@ -37,13 +42,12 @@ cost_model_files =
cost_model_files
|> Enum.map(fn fname -> Path.join(cm_dir, fname) |> Path.absname() end)
+# Should be one for each library implementation
cost_model_files
```
-<!-- livebook:{"reevaluate_automatically":true} -->
-
```elixir
-# Parse cost model information
+# Find the coefficients, ie the actual cost models
cost_models =
cost_model_files
|> Enum.map(fn fname ->
@@ -65,7 +69,7 @@ cost_models =
```
```elixir
-# Parse cost model information
+# Get the raw data points
cost_model_points =
cost_model_files
|> Enum.map(fn fname ->
@@ -100,7 +104,7 @@ cost_model_points =
|> DF.mutate(t: cast(t, {:duration, :nanosecond}))
```
-## Cost model exploratory plots
+We can now plot our graphs. The below module provides most of the code, with cells below it specifying our actual graphs.
```elixir
defmodule CostModel do
@@ -119,11 +123,14 @@ defmodule CostModel do
"LinkedList"
])
+ # Make the names in the legends shorter and more readable
def friendly_impl_name(impl) do
String.split(impl, "::") |> List.last()
end
+ # Get a dataframe of points lying on the cost model, one point for each of `ns`.
def points_for(cost_models, ns, impl, op) do
+ # Get coefficients
%{"coeffs" => [coeffs]} =
DF.filter(cost_models, impl == ^impl and op == ^op)
|> DF.to_columns()
@@ -146,11 +153,13 @@ defmodule CostModel do
|> DF.new()
end
+ # Plot the specified cost model, optionally specifying the x/y domains and omitting points
def plot(cost_models, cost_model_points, impls, op, opts \\ []) do
%{y_domain: y_domain, ns: ns, draw_points: draw_points} = Enum.into(opts, @defaults)
plot =
Tucan.layers(
+ # The actual cost model function
[
cost_models
|> DF.filter(op == ^op)
@@ -163,6 +172,7 @@ defmodule CostModel do
|> Tucan.lineplot("n", "t", color_by: "impl", clip: true)
] ++
if(draw_points,
+ # The raw points, if necessary
do: [
cost_model_points
|> DF.filter(op == ^op and impl in ^impls)
@@ -180,6 +190,7 @@ defmodule CostModel do
)
)
+ # Adjust x/y domain and set title, etc
plot =
plot
|> Tucan.Axes.set_y_title("Estimated cost")
@@ -194,16 +205,18 @@ defmodule CostModel do
end
end
+ # Plot the cost models for `op` across all implementations, grouped by the 2D array `impl_splits`
def split_plot(cost_models, cost_model_points, impl_splits, op) do
- @all_impls = List.flatten(impl_splits) |> Enum.sort()
-
Enum.map(impl_splits, &plot(cost_models, cost_model_points, &1, op))
|> Tucan.vconcat()
+ # Ensures we don't share a legend for them all
|> VegaLite.resolve(:scale, color: :independent)
end
end
```
+Below are our actual graphs, which are displayed and exported to JSON files in the thesis directory.
+
<!-- livebook:{"reevaluate_automatically":true} -->
```elixir
@@ -266,7 +279,25 @@ VegaLite.Export.save!(graph, "../thesis/assets/contains.json")
graph
```
-## Read benchmark data
+The below block can be used to inspect the cost models of certain operations and implementations
+
+```elixir
+impls = ["SortedVec", "SortedVecSet", "SortedVecMap"]
+op = "insert"
+
+CostModel.plot(
+ cost_models,
+ cost_model_points,
+ impls,
+ op
+)
+```
+
+<!-- livebook:{"branch_parent_index":0} -->
+
+## Benchmarks
+
+We read in benchmark data from criterion's JSON output.
```elixir
# Read in the results of every individual criterion benchmark
@@ -307,10 +338,7 @@ raw_benchmarks =
```
```elixir
-# `using` is a list of structs, but we aren't gonna make use of this mostly
-# and we want to be able to group by that column, so add a new column that's just a nice
-# string representation
-# also parse out the n value, which all of our benchmarks have
+# Helper function for making the `using` field look nicer
display_using = fn using ->
using
|> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> ctn <> "=" <> impl end)
@@ -353,7 +381,7 @@ benchmarks =
|> DF.select(["proj", "time", "using"])
```
-## Read cost estimate data
+We read our cost estimates from the log output.
```elixir
# Cost estimates by project, ctn, and implementation
@@ -405,6 +433,8 @@ true =
|> Enum.all?(&SE.any?(SE.equal(estimate_impls, &1)))
```
+We then find the estimated cost of every assignment that we benchmarked
+
```elixir
# Gets the cost of assignment from cost estimates
cost_of_assignment = fn proj, assignment ->
@@ -433,7 +463,9 @@ estimated_costs =
|> DF.new()
```
-## Estimates vs results (ignoring adaptive containers)
+Now we can compare our benchmark results to our estimated costs.
+
+We first filter out adaptive containers, to later consider them separately.
```elixir
# Don't worry about adaptive containers for now
@@ -457,14 +489,6 @@ DF.n_rows(singular_benchmarks)
```
```elixir
-display_using = fn using ->
- using
- |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> ctn <> "=" <> impl end)
- |> Enum.join(", ")
-end
-```
-
-```elixir
# Tools for printing out latex
defmodule Latex do
def escape_latex(val) do
@@ -495,6 +519,8 @@ defmodule Latex do
end
```
+Compare the fastest and slowest assignments for each project
+
```elixir
singular_benchmarks
|> DF.group_by("proj")
@@ -507,12 +533,14 @@ singular_benchmarks
"spread" => "Maximum slowdown (ms)",
"slowdown" => "Maximum relative slowdown"
})
-|> Latex.table()
-|> IO.puts()
+
+# |> Latex.table()
+# |> IO.puts()
```
+Compare the predicted and actual best implementation for each container type
+
```elixir
-# Best and predicted best implementation for each container type
selection_comparison =
singular_benchmarks
|> DF.explode("using")
@@ -531,8 +559,6 @@ selection_comparison =
```
```elixir
-Latex.table(selection_comparison)
-
selection_comparison
|> DF.put(
"best_impl",
@@ -556,11 +582,12 @@ selection_comparison
"best_impl" => "Best implementation",
"predicted_impl" => "Predicted best"
})
-|> Latex.table()
-|> IO.puts()
+
+# |> Latex.table()
+# |> IO.puts()
```
-## Adaptive Containers
+We now look at adaptive containers, starting by seeing when they get suggested
```elixir
# Container types where an adaptive container was suggested
@@ -572,7 +599,7 @@ adaptive_suggestions =
|> DF.distinct(["proj", "ctn", "impl"])
adaptive_suggestions
-# hacky but oh well
+# Hacky way to make things look nicer
|> DF.mutate(impl: replace(impl, "std::collections::", ""))
|> DF.mutate(impl: replace(impl, "std::vec::", ""))
|> DF.mutate(impl: replace(impl, "primrose_library::", ""))
@@ -582,10 +609,13 @@ adaptive_suggestions
"ctn" => "Container Type",
"impl" => "Suggestion"
})
-|> Latex.table()
-|> IO.puts()
+
+# |> Latex.table()
+# |> IO.puts()
```
+Get benchmarks for projects we suggested an adaptive container for, and find the benchmark 'size' as a new column
+
```elixir
adaptive_projs = DF.distinct(adaptive_suggestions, ["proj"])["proj"]
adaptive_estimated_costs = estimated_costs |> DF.filter(proj in ^adaptive_projs)
@@ -609,6 +639,8 @@ adaptive_raw_benchmarks =
)
```
+We then summarise the results for each benchmark size, for assignments that either involve an adaptive container or are the best possible assignment
+
```elixir
format_dur = fn dur ->
String.split(to_string(dur), " ") |> hd
@@ -629,7 +661,6 @@ best_usings =
|> DF.filter(using == best_using or contains(using, "until"))
# summary data point
-
best_usings =
best_usings
|> DF.put("mean", SE.transform(best_usings["mean"], format_dur))
@@ -638,6 +669,8 @@ best_usings =
|> DF.select(["proj", "using", "n", "value"])
```
+Finally, we print them out per-project for clarity
+
```elixir
for proj <- SE.distinct(best_usings["proj"]) |> SE.to_enum() do
best_usings