diff options
-rw-r--r-- | analysis/vis.livemd | 71 |
1 files changed, 44 insertions, 27 deletions
diff --git a/analysis/vis.livemd b/analysis/vis.livemd index 001b53d..613b419 100644 --- a/analysis/vis.livemd +++ b/analysis/vis.livemd @@ -94,8 +94,8 @@ cost_models ## Cost model exploratory plots ```elixir -startn = 200 -endn = 2000 +startn = 100 +endn = 5000 resolution = 50 points_for = fn impl, op -> @@ -125,8 +125,11 @@ end <!-- livebook:{"reevaluate_automatically":true} --> ```elixir -inspect_op = "insert" -impls = ["BTreeSet", "EagerSortedVec", "EagerUniqueVec", "HashSet"] +# inspect_op = "insert" +# impls = ["BTreeSet", "EagerSortedVec", "HashSet"] + +inspect_op = "pop" +impls = ["Vec", "LinkedList"] Tucan.layers([ cost_models @@ -178,7 +181,7 @@ raw_benchmarks = bench_id: bench <> "/" <> subbench, proj: String.split(bench, "-") |> hd, using: - Regex.scan(~r/\"(\w*)\", ([\w:]*)/, Path.basename(dir)) + Regex.scan(~r/\"(\w*)\", ([^)]*)/, Path.basename(dir)) |> Enum.map(fn [_, ctn, impl] -> %{ctn: ctn, impl: impl} end), mean: raw_results["mean"]["point_estimate"] / 10 ** 9 } @@ -190,20 +193,34 @@ raw_benchmarks = ```elixir # Aggregate benchmark results by project, since we can only do assignments by project -uniq_proj_using = - DF.select(raw_benchmarks, ["proj", "using"]) - |> DF.to_rows() - |> Enum.uniq() - |> DF.new() -uniq_proj_using -|> DF.mutate(vals: DF.filter(^raw_benchmarks, proj == proj and using == using)) +# Can't group by lists, so we need to do this weird shit -# |> Enum.map(fn %{"proj" => proj, "using" => using} -> -# DF.filter(raw_benchmarks, proj == ^proj and using == ^using) -# # |> DF.summarise() -# end) -# |> DF.concat_rows() +# Build list of using values to index into +usings = + raw_benchmarks["using"] + |> SE.to_list() + |> Enum.uniq() + +benchmarks = + raw_benchmarks + # Make a column corresponding to using that isn't a list + |> DF.put( + "using_idx", + raw_benchmarks["using"] + |> SE.to_list() + |> Enum.map(fn using -> Enum.find_index(usings, &(&1 == using)) end) + ) + # Get the total benchmark time for each project and assignment + |> DF.group_by(["proj", "using_idx"]) + |> DF.summarise(time: sum(mean)) + # Convert using_idx back to original using values + |> DF.to_rows() + |> Enum.map(fn row = %{"using_idx" => using_idx} -> + Map.put(row, "using", Enum.at(usings, using_idx)) + end) + |> DF.new() + |> DF.select(["proj", "time", "using"]) ``` ```elixir @@ -261,9 +278,9 @@ cost_of_assignment.("example_stack", [%{"ctn" => "StackCon", "impl" => "std::vec estimated_costs = benchmarks |> DF.to_rows_stream() - |> Enum.map(fn %{"bench_id" => bench_id, "proj" => proj, "using" => using} -> + |> Enum.map(fn %{"proj" => proj, "using" => using} -> %{ - bench_id: bench_id, + proj: proj, using: using, estimated_cost: cost_of_assignment.(proj, using) } @@ -275,27 +292,27 @@ estimated_costs = # Compare each assignments position in the estimates to its position in the results sorted_estimates = estimated_costs - |> DF.group_by(["bench_id"]) + |> DF.group_by(["proj"]) |> DF.sort_by(estimated_cost) sorted_results = benchmarks - |> DF.group_by(["bench_id"]) - |> DF.sort_by(mean) + |> DF.group_by(["proj"]) + |> DF.sort_by(time) position_comparison = sorted_estimates |> DF.to_rows_stream() - |> Enum.map(fn %{"bench_id" => bench_id, "using" => using} -> + |> Enum.map(fn %{"proj" => proj, "using" => using} -> %{ - bench_id: bench_id, + proj: proj, using: using, pos_estimate: - DF.filter(sorted_estimates, bench_id == ^bench_id)["using"] + DF.filter(sorted_estimates, proj == ^proj)["using"] |> SE.to_list() |> Enum.find_index(fn u -> u == using end), pos_results: - DF.filter(sorted_results, bench_id == ^bench_id)["using"] + DF.filter(sorted_results, proj == ^proj)["using"] |> SE.to_list() |> Enum.find_index(fn u -> u == using end) } @@ -312,7 +329,7 @@ position_comparison ```elixir position_comparison |> DF.filter(pos_estimate == 0) -|> DF.select(["bench_id", "using"]) +|> DF.select(["proj", "using"]) ``` <!-- livebook:{"reevaluate_automatically":true} --> |