From 597fbc6b5ebcfbdac97ec9100ba34149d4fc1984 Mon Sep 17 00:00:00 2001 From: Aria Shrimpton Date: Thu, 29 Feb 2024 16:10:41 +0000 Subject: compare real vs estimated positions --- analysis/vis.livemd | 110 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 101 insertions(+), 9 deletions(-) diff --git a/analysis/vis.livemd b/analysis/vis.livemd index bbc05d8..f48a12a 100644 --- a/analysis/vis.livemd +++ b/analysis/vis.livemd @@ -15,6 +15,7 @@ Mix.install([ ```elixir require Explorer.DataFrame alias Explorer.DataFrame, as: DF +alias Explorer.Series, as: SE job_id = "1146" job_dir = Path.expand(~c"./" ++ job_id) |> Path.absname() sections_dir = Path.join(job_dir, "sections") @@ -158,16 +159,11 @@ benchmarks = |> JSON.decode!() %{ - bench: bench, - subbench: subbench, bench_id: bench <> "/" <> subbench, using: Regex.scan(~r/\"(\w*)\", ([\w:]*)/, Path.basename(dir)) |> Enum.map(fn [_, ctn, impl] -> %{ctn: ctn, impl: impl} end), - dir: dir, - lower_bound: raw_results["mean"]["confidence_interval"]["lower_bound"] / 10 ** 9, - mean: raw_results["mean"]["point_estimate"] / 10 ** 9, - upper_bound: raw_results["mean"]["confidence_interval"]["upper_bound"] / 10 ** 9 + mean: raw_results["mean"]["point_estimate"] / 10 ** 9 } end) end) @@ -176,8 +172,104 @@ benchmarks = ``` ```elixir -benchmarks -|> DF.distinct([:bench_id]) +# Cost estimates by project, ctn, and implementation +projs = + benchmarks["bench_id"] + |> SE.split("-") + |> SE.transform(&hd/1) + +benchmarks = DF.put(benchmarks, "proj", projs) + +# TODO: consistently name rest of benchmarks +projs = SE.mask(projs, SE.contains(projs, "example")) + +cost_estimates = + SE.transform(projs |> SE.distinct(), fn proj_name -> + [_, table | _] = + Path.join(sections_dir, "compare-" <> proj_name) + |> File.read!() + |> String.split("& file \\\\\n\\hline\n") + + table + |> String.split("\n\\end{tabular}") + |> hd + |> String.split("\n") + |> Enum.map(fn x -> String.split(x, " & ") end) + |> Enum.map(fn [ctn, impl, cost | _] -> + %{ + proj: proj_name, + ctn: ctn, + impl: + impl + |> String.replace("\\_", "_"), + cost: String.to_float(cost) + } + end) + end) + |> SE.to_list() + |> List.flatten() + |> DF.new() +``` + +```elixir +# Get cost of assignment from cost estimates +cost_of_assignment = fn proj, assignment -> + assignment + |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> + DF.filter(cost_estimates, proj == ^proj and ctn == ^ctn and impl == ^impl)["cost"][0] + end) + |> Enum.sum() +end + +cost_of_assignment.("example_stack", [%{"ctn" => "StackCon", "impl" => "std::vec::Vec"}]) +``` + +```elixir +# Estimate cost for each benchmarked assignment +estimated_costs = + benchmarks + |> DF.to_rows_stream() + # TODO + |> Enum.filter(fn %{"proj" => proj} -> String.contains?(proj, "example") end) + |> Enum.map(fn %{"bench_id" => bench_id, "proj" => proj, "using" => using} -> + %{ + bench_id: bench_id, + using: using, + estimated_cost: cost_of_assignment.(proj, using) + } + end) + |> DF.new() +``` + +```elixir +# Compare each assignments position in the estimates to its position in the results +sorted_estimates = + estimated_costs + |> DF.group_by(["bench_id"]) + |> DF.sort_by(estimated_cost) + +sorted_results = + benchmarks + |> DF.group_by(["bench_id"]) + |> DF.sort_by(mean) + +sorted_estimates +|> DF.to_rows_stream() +|> Enum.map(fn %{"bench_id" => bench_id, "using" => using} -> + %{ + bench_id: bench_id, + using: using, + pos_estimate: + DF.filter(sorted_estimates, bench_id == ^bench_id)["using"] + |> SE.to_list() + |> Enum.find_index(fn u -> u == using end), + pos_results: + DF.filter(sorted_results, bench_id == ^bench_id)["using"] + |> SE.to_list() + |> Enum.find_index(fn u -> u == using end) + } +end) +|> DF.new() ``` @@ -193,5 +285,5 @@ Tucan.bar( orient: :horizontal, clip: true ) -|> Tucan.Scale.set_x_domain(0, 10) +|> Tucan.Scale.set_x_domain(0, 5) ``` -- cgit v1.2.3