From 7924e466d32cf93b7e455d1360bc22fa86340100 Mon Sep 17 00:00:00 2001 From: Aria Shrimpton Date: Fri, 29 Mar 2024 21:49:15 +0000 Subject: rest of results chapter --- analysis/vis.livemd | 149 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 108 insertions(+), 41 deletions(-) (limited to 'analysis') diff --git a/analysis/vis.livemd b/analysis/vis.livemd index 0aa2711..6a0a7c5 100644 --- a/analysis/vis.livemd +++ b/analysis/vis.livemd @@ -243,6 +243,21 @@ VegaLite.Export.save!(graph, "../thesis/assets/insert_small_n.json") graph ``` +```elixir +graph = + CostModel.plot( + cost_models, + cost_model_points, + ["HashSet", "BTreeSet"], + "insert", + ns: 1..500//10, + # y_domain: [0, 4000], + draw_points: true + ) + +graph +``` + ```elixir graph = CostModel.split_plot( @@ -457,29 +472,22 @@ DF.n_rows(singular_benchmarks) ``` ```elixir -# Best and predicted best implementation for each container type -selection_comparison = - singular_benchmarks - |> DF.explode("using") - |> DF.unnest("using") - |> DF.group_by(["proj"]) - |> DF.filter(time == min(time)) - |> DF.join( - cost_estimates - |> DF.filter(not contains(impl, "until")) - |> DF.group_by(["proj", "ctn"]) - |> DF.filter(cost == min(cost)) - |> DF.rename(%{"impl" => "predicted_impl"}) - ) - |> DF.select(["proj", "ctn", "impl", "predicted_impl"]) - |> DF.rename(%{"impl" => "best_impl"}) +display_using = fn using -> + using + |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> ctn <> "=" <> impl end) + |> Enum.join(", ") +end ``` ```elixir # Tools for printing out latex defmodule Latex do - def escape_latex(str) do - String.replace(str, ~r/(\\|{|}|_|\^|#|&|\$|%|~)/, "\\\\\\1") + def escape_latex(val) do + if is_number(val) do + "$" <> to_string(val) <> "$" + else + String.replace(to_string(val), ~r/(\\|{|}|_|\^|#|&|\$|%|~)/, "\\\\\\1") + end end def table(df) do @@ -493,14 +501,51 @@ defmodule Latex do (DF.to_rows(df) |> Enum.map(fn row -> cols - |> Enum.map(&escape_latex(Kernel.to_string(row[&1]))) + |> Enum.map(&escape_latex(row[&1])) |> Enum.join(" & ") end) |> Enum.join(" \\\\\n")) <> " \\\\\n\\end{tabular}" end end +``` + +```elixir +singular_benchmarks +|> DF.group_by("proj") +|> DF.summarise(max: max(time), min: min(time)) +|> DF.mutate(spread: round((max - min) * ^(10 ** -6), 2), slowdown: round(max / min - 1, 1)) +|> DF.discard(["max", "min"]) +|> DF.sort_by(proj) +|> DF.rename(%{ + "proj" => "Project", + "spread" => "Maximum slowdown (ms)", + "slowdown" => "Maximum relative slowdown" +}) +|> Latex.table() +|> IO.puts() +``` +```elixir +# Best and predicted best implementation for each container type +selection_comparison = + singular_benchmarks + |> DF.explode("using") + |> DF.unnest("using") + |> DF.group_by(["proj"]) + |> DF.filter(time == min(time)) + |> DF.join( + cost_estimates + |> DF.filter(not contains(impl, "until")) + |> DF.group_by(["proj", "ctn"]) + |> DF.filter(cost == min(cost)) + |> DF.rename(%{"impl" => "predicted_impl"}) + ) + |> DF.select(["proj", "ctn", "impl", "predicted_impl"]) + |> DF.rename(%{"impl" => "best_impl"}) +``` + +```elixir Latex.table(selection_comparison) selection_comparison @@ -517,6 +562,8 @@ selection_comparison SE.not_equal(selection_comparison["best_impl"], selection_comparison["predicted_impl"]) |> SE.transform(&if &1, do: "*", else: "") ) +|> DF.ungroup() +|> DF.sort_by(proj) |> DF.rename(%{ "mark" => " ", "proj" => "Project", @@ -531,32 +578,37 @@ selection_comparison ## Adaptive Containers ```elixir -# Projects where an adaptive container was suggested -adaptive_projs = - (estimated_costs - |> DF.to_rows() - |> Enum.filter(fn %{"using" => using} -> - using - |> Enum.map(fn %{"impl" => impl} -> String.contains?(impl, "until") end) - |> Enum.any?() - end) - |> DF.new() - |> DF.distinct(["proj"]))["proj"] +# Container types where an adaptive container was suggested +adaptive_suggestions = + estimated_costs + |> DF.explode("using") + |> DF.unnest("using") + |> DF.filter(contains(impl, "until")) + |> DF.distinct(["proj", "ctn", "impl"]) + +adaptive_suggestions +# hacky but oh well +|> DF.mutate(impl: replace(impl, "std::collections::", "")) +|> DF.mutate(impl: replace(impl, "std::vec::", "")) +|> DF.mutate(impl: replace(impl, "primrose_library::", "")) +|> DF.sort_by(asc: proj, asc: ctn) +|> DF.rename(%{ + "proj" => "Project", + "ctn" => "Container Type", + "impl" => "Suggestion" +}) +|> Latex.table() +|> IO.puts() ``` ```elixir +adaptive_projs = DF.distinct(adaptive_suggestions, ["proj"])["proj"] adaptive_estimated_costs = estimated_costs |> DF.filter(proj in ^adaptive_projs) adaptive_raw_benchmarks = raw_benchmarks |> DF.filter(proj in ^adaptive_projs) -display_using = fn using -> - using - |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> ctn <> "=" <> impl end) - |> Enum.join(", ") -end - adaptive_raw_benchmarks = adaptive_raw_benchmarks |> DF.put( @@ -573,6 +625,10 @@ adaptive_raw_benchmarks = ``` ```elixir +format_dur = fn dur -> + String.split(to_string(dur), " ") |> hd +end + best_usings = adaptive_raw_benchmarks # get best set of assignments for each project @@ -586,13 +642,24 @@ best_usings = # select adaptive container and the best assignment for each project |> DF.join(adaptive_raw_benchmarks) |> DF.filter(using == best_using or contains(using, "until")) - # summary data point - |> DF.mutate(value: cast(mean, :string) <> " +/- " <> cast(stderr, :string)) + +# summary data point + +best_usings = + best_usings + |> DF.put("mean", SE.transform(best_usings["mean"], format_dur)) + |> DF.put("stderr", SE.transform(best_usings["stderr"], format_dur)) + |> DF.mutate(value: mean <> " +/- " <> stderr) |> DF.select(["proj", "using", "n", "value"]) ``` ```elixir -best_usings -|> DF.filter(proj == "aoc_2022_09") -|> DF.pivot_wider("n", "value") +for proj <- SE.distinct(best_usings["proj"]) |> SE.to_enum() do + best_usings + |> DF.filter(proj == ^proj) + |> DF.select(["proj", "using", "n", "value"]) + |> DF.pivot_wider("n", "value") + |> Latex.table() + |> IO.puts() +end ``` -- cgit v1.2.3