From 40029fd036a45b7ea9447f158fe3133257107c16 Mon Sep 17 00:00:00 2001 From: Aria Shrimpton Date: Thu, 7 Mar 2024 22:26:22 +0000 Subject: cleanup analysis notebook a bit --- analysis/vis.livemd | 54 +++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/analysis/vis.livemd b/analysis/vis.livemd index 613b419..9f5a58d 100644 --- a/analysis/vis.livemd +++ b/analysis/vis.livemd @@ -1,3 +1,5 @@ + + # Dissertation Visualisations ```elixir @@ -160,6 +162,7 @@ Tucan.layers([ ## Read benchmark data ```elixir +# Read in the results of every individual criterion benchmark raw_benchmarks = File.ls!(criterion_dir) |> Enum.map(fn name -> @@ -193,8 +196,11 @@ raw_benchmarks = ```elixir # Aggregate benchmark results by project, since we can only do assignments by project - -# Can't group by lists, so we need to do this weird shit +# Unfortunately we can't group by lists, so we need to do some weird shit. +# This is basically equivalent to: +# benchmarks = raw_benchmarks +# |> DF.group_by(["proj", "using"]) +# |> DF.summarise(time: sum(mean)) # Build list of using values to index into usings = @@ -223,6 +229,8 @@ benchmarks = |> DF.select(["proj", "time", "using"]) ``` +## Read cost estimate data + ```elixir # Cost estimates by project, ctn, and implementation projs = SE.distinct(benchmarks["proj"]) @@ -261,7 +269,20 @@ cost_estimates = ``` ```elixir -# Get cost of assignment from cost estimates +# Double-check that we have all of the cost estimates for everything mentioned in the assignments +estimate_impls = SE.distinct(cost_estimates["impl"]) + +true = + (benchmarks + |> DF.explode("using") + |> DF.unnest("using"))["impl"] + |> SE.distinct() + |> SE.to_list() + |> Enum.all?(fn impl -> SE.equal(estimate_impls, impl) |> SE.any?() end) +``` + +```elixir +# Gets the cost of assignment from cost estimates cost_of_assignment = fn proj, assignment -> assignment |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> @@ -274,7 +295,7 @@ cost_of_assignment.("example_stack", [%{"ctn" => "StackCon", "impl" => "std::vec ``` ```elixir -# Estimate cost for each benchmarked assignment +# For each benchmarked assignment, estimate the cost. estimated_costs = benchmarks |> DF.to_rows_stream() @@ -288,6 +309,8 @@ estimated_costs = |> DF.new() ``` +## Estimates vs results + ```elixir # Compare each assignments position in the estimates to its position in the results sorted_estimates = @@ -321,29 +344,8 @@ position_comparison = ``` ```elixir +# Everywhere we predicted wrong. position_comparison |> DF.filter(pos_estimate != pos_results) |> DF.collect() ``` - -```elixir -position_comparison -|> DF.filter(pos_estimate == 0) -|> DF.select(["proj", "using"]) -``` - - - -```elixir -# Difference in execution time between worst and best selection -Tucan.bar( - benchmarks - |> DF.group_by("bench_id") - |> DF.summarise(range: max(mean) - min(mean)), - "bench_id", - "range", - orient: :horizontal, - clip: true -) -|> Tucan.Scale.set_x_domain(0, 5) -``` -- cgit v1.2.3