diff options
author | Aria Shrimpton <me@aria.rip> | 2024-03-30 17:34:18 +0000 |
---|---|---|
committer | Aria Shrimpton <me@aria.rip> | 2024-03-30 17:34:56 +0000 |
commit | 0260147246507960bb86fa088eb986bc773dd824 (patch) | |
tree | c552b34556c06c4556f2431640aee5e6c9bc4563 /analysis/vis.livemd | |
parent | ca565e3d32c0815722302331edbc59ce8a1ca9b8 (diff) |
notebook cleanup
Diffstat (limited to 'analysis/vis.livemd')
-rw-r--r-- | analysis/vis.livemd | 105 |
1 files changed, 69 insertions, 36 deletions
diff --git a/analysis/vis.livemd b/analysis/vis.livemd index 0ef8597..dd8a593 100644 --- a/analysis/vis.livemd +++ b/analysis/vis.livemd @@ -13,9 +13,10 @@ Mix.install([ ]) ``` -## Variables +## Setup ```elixir +# Some common variables require Explorer.DataFrame require Explorer.Series require VegaLite @@ -28,7 +29,11 @@ cm_dir = Path.join([job_dir, "candelabra", "benchmark_results"]) criterion_dir = Path.join(job_dir, "criterion") ``` -## Read cost model data +<!-- livebook:{"branch_parent_index":0} --> + +## Cost models + +We read in the cost models from the JSON output. ```elixir {:ok, cost_model_files} = File.ls(cm_dir) @@ -37,13 +42,12 @@ cost_model_files = cost_model_files |> Enum.map(fn fname -> Path.join(cm_dir, fname) |> Path.absname() end) +# Should be one for each library implementation cost_model_files ``` -<!-- livebook:{"reevaluate_automatically":true} --> - ```elixir -# Parse cost model information +# Find the coefficients, ie the actual cost models cost_models = cost_model_files |> Enum.map(fn fname -> @@ -65,7 +69,7 @@ cost_models = ``` ```elixir -# Parse cost model information +# Get the raw data points cost_model_points = cost_model_files |> Enum.map(fn fname -> @@ -100,7 +104,7 @@ cost_model_points = |> DF.mutate(t: cast(t, {:duration, :nanosecond})) ``` -## Cost model exploratory plots +We can now plot our graphs. The below module provides most of the code, with cells below it specifying our actual graphs. ```elixir defmodule CostModel do @@ -119,11 +123,14 @@ defmodule CostModel do "LinkedList" ]) + # Make the names in the legends shorter and more readable def friendly_impl_name(impl) do String.split(impl, "::") |> List.last() end + # Get a dataframe of points lying on the cost model, one point for each of `ns`. def points_for(cost_models, ns, impl, op) do + # Get coefficients %{"coeffs" => [coeffs]} = DF.filter(cost_models, impl == ^impl and op == ^op) |> DF.to_columns() @@ -146,11 +153,13 @@ defmodule CostModel do |> DF.new() end + # Plot the specified cost model, optionally specifying the x/y domains and omitting points def plot(cost_models, cost_model_points, impls, op, opts \\ []) do %{y_domain: y_domain, ns: ns, draw_points: draw_points} = Enum.into(opts, @defaults) plot = Tucan.layers( + # The actual cost model function [ cost_models |> DF.filter(op == ^op) @@ -163,6 +172,7 @@ defmodule CostModel do |> Tucan.lineplot("n", "t", color_by: "impl", clip: true) ] ++ if(draw_points, + # The raw points, if necessary do: [ cost_model_points |> DF.filter(op == ^op and impl in ^impls) @@ -180,6 +190,7 @@ defmodule CostModel do ) ) + # Adjust x/y domain and set title, etc plot = plot |> Tucan.Axes.set_y_title("Estimated cost") @@ -194,16 +205,18 @@ defmodule CostModel do end end + # Plot the cost models for `op` across all implementations, grouped by the 2D array `impl_splits` def split_plot(cost_models, cost_model_points, impl_splits, op) do - @all_impls = List.flatten(impl_splits) |> Enum.sort() - Enum.map(impl_splits, &plot(cost_models, cost_model_points, &1, op)) |> Tucan.vconcat() + # Ensures we don't share a legend for them all |> VegaLite.resolve(:scale, color: :independent) end end ``` +Below are our actual graphs, which are displayed and exported to JSON files in the thesis directory. + <!-- livebook:{"reevaluate_automatically":true} --> ```elixir @@ -266,7 +279,25 @@ VegaLite.Export.save!(graph, "../thesis/assets/contains.json") graph ``` -## Read benchmark data +The below block can be used to inspect the cost models of certain operations and implementations + +```elixir +impls = ["SortedVec", "SortedVecSet", "SortedVecMap"] +op = "insert" + +CostModel.plot( + cost_models, + cost_model_points, + impls, + op +) +``` + +<!-- livebook:{"branch_parent_index":0} --> + +## Benchmarks + +We read in benchmark data from criterion's JSON output. ```elixir # Read in the results of every individual criterion benchmark @@ -307,10 +338,7 @@ raw_benchmarks = ``` ```elixir -# `using` is a list of structs, but we aren't gonna make use of this mostly -# and we want to be able to group by that column, so add a new column that's just a nice -# string representation -# also parse out the n value, which all of our benchmarks have +# Helper function for making the `using` field look nicer display_using = fn using -> using |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> ctn <> "=" <> impl end) @@ -353,7 +381,7 @@ benchmarks = |> DF.select(["proj", "time", "using"]) ``` -## Read cost estimate data +We read our cost estimates from the log output. ```elixir # Cost estimates by project, ctn, and implementation @@ -405,6 +433,8 @@ true = |> Enum.all?(&SE.any?(SE.equal(estimate_impls, &1))) ``` +We then find the estimated cost of every assignment that we benchmarked + ```elixir # Gets the cost of assignment from cost estimates cost_of_assignment = fn proj, assignment -> @@ -433,7 +463,9 @@ estimated_costs = |> DF.new() ``` -## Estimates vs results (ignoring adaptive containers) +Now we can compare our benchmark results to our estimated costs. + +We first filter out adaptive containers, to later consider them separately. ```elixir # Don't worry about adaptive containers for now @@ -457,14 +489,6 @@ DF.n_rows(singular_benchmarks) ``` ```elixir -display_using = fn using -> - using - |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> ctn <> "=" <> impl end) - |> Enum.join(", ") -end -``` - -```elixir # Tools for printing out latex defmodule Latex do def escape_latex(val) do @@ -495,6 +519,8 @@ defmodule Latex do end ``` +Compare the fastest and slowest assignments for each project + ```elixir singular_benchmarks |> DF.group_by("proj") @@ -507,12 +533,14 @@ singular_benchmarks "spread" => "Maximum slowdown (ms)", "slowdown" => "Maximum relative slowdown" }) -|> Latex.table() -|> IO.puts() + +# |> Latex.table() +# |> IO.puts() ``` +Compare the predicted and actual best implementation for each container type + ```elixir -# Best and predicted best implementation for each container type selection_comparison = singular_benchmarks |> DF.explode("using") @@ -531,8 +559,6 @@ selection_comparison = ``` ```elixir -Latex.table(selection_comparison) - selection_comparison |> DF.put( "best_impl", @@ -556,11 +582,12 @@ selection_comparison "best_impl" => "Best implementation", "predicted_impl" => "Predicted best" }) -|> Latex.table() -|> IO.puts() + +# |> Latex.table() +# |> IO.puts() ``` -## Adaptive Containers +We now look at adaptive containers, starting by seeing when they get suggested ```elixir # Container types where an adaptive container was suggested @@ -572,7 +599,7 @@ adaptive_suggestions = |> DF.distinct(["proj", "ctn", "impl"]) adaptive_suggestions -# hacky but oh well +# Hacky way to make things look nicer |> DF.mutate(impl: replace(impl, "std::collections::", "")) |> DF.mutate(impl: replace(impl, "std::vec::", "")) |> DF.mutate(impl: replace(impl, "primrose_library::", "")) @@ -582,10 +609,13 @@ adaptive_suggestions "ctn" => "Container Type", "impl" => "Suggestion" }) -|> Latex.table() -|> IO.puts() + +# |> Latex.table() +# |> IO.puts() ``` +Get benchmarks for projects we suggested an adaptive container for, and find the benchmark 'size' as a new column + ```elixir adaptive_projs = DF.distinct(adaptive_suggestions, ["proj"])["proj"] adaptive_estimated_costs = estimated_costs |> DF.filter(proj in ^adaptive_projs) @@ -609,6 +639,8 @@ adaptive_raw_benchmarks = ) ``` +We then summarise the results for each benchmark size, for assignments that either involve an adaptive container or are the best possible assignment + ```elixir format_dur = fn dur -> String.split(to_string(dur), " ") |> hd @@ -629,7 +661,6 @@ best_usings = |> DF.filter(using == best_using or contains(using, "until")) # summary data point - best_usings = best_usings |> DF.put("mean", SE.transform(best_usings["mean"], format_dur)) @@ -638,6 +669,8 @@ best_usings = |> DF.select(["proj", "using", "n", "value"]) ``` +Finally, we print them out per-project for clarity + ```elixir for proj <- SE.distinct(best_usings["proj"]) |> SE.to_enum() do best_usings |