aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAria Shrimpton <me@aria.rip>2024-02-29 16:10:41 +0000
committerAria Shrimpton <me@aria.rip>2024-02-29 16:10:41 +0000
commit597fbc6b5ebcfbdac97ec9100ba34149d4fc1984 (patch)
treeb5f3423c528872fecfb8aaec38e6e9355aa0ac5f
parent1b476265f3f6043529d252db5353c27ebd0507b3 (diff)
compare real vs estimated positions
-rw-r--r--analysis/vis.livemd110
1 files changed, 101 insertions, 9 deletions
diff --git a/analysis/vis.livemd b/analysis/vis.livemd
index bbc05d8..f48a12a 100644
--- a/analysis/vis.livemd
+++ b/analysis/vis.livemd
@@ -15,6 +15,7 @@ Mix.install([
```elixir
require Explorer.DataFrame
alias Explorer.DataFrame, as: DF
+alias Explorer.Series, as: SE
job_id = "1146"
job_dir = Path.expand(~c"./" ++ job_id) |> Path.absname()
sections_dir = Path.join(job_dir, "sections")
@@ -158,16 +159,11 @@ benchmarks =
|> JSON.decode!()
%{
- bench: bench,
- subbench: subbench,
bench_id: bench <> "/" <> subbench,
using:
Regex.scan(~r/\"(\w*)\", ([\w:]*)/, Path.basename(dir))
|> Enum.map(fn [_, ctn, impl] -> %{ctn: ctn, impl: impl} end),
- dir: dir,
- lower_bound: raw_results["mean"]["confidence_interval"]["lower_bound"] / 10 ** 9,
- mean: raw_results["mean"]["point_estimate"] / 10 ** 9,
- upper_bound: raw_results["mean"]["confidence_interval"]["upper_bound"] / 10 ** 9
+ mean: raw_results["mean"]["point_estimate"] / 10 ** 9
}
end)
end)
@@ -176,8 +172,104 @@ benchmarks =
```
```elixir
-benchmarks
-|> DF.distinct([:bench_id])
+# Cost estimates by project, ctn, and implementation
+projs =
+ benchmarks["bench_id"]
+ |> SE.split("-")
+ |> SE.transform(&hd/1)
+
+benchmarks = DF.put(benchmarks, "proj", projs)
+
+# TODO: consistently name rest of benchmarks
+projs = SE.mask(projs, SE.contains(projs, "example"))
+
+cost_estimates =
+ SE.transform(projs |> SE.distinct(), fn proj_name ->
+ [_, table | _] =
+ Path.join(sections_dir, "compare-" <> proj_name)
+ |> File.read!()
+ |> String.split("& file \\\\\n\\hline\n")
+
+ table
+ |> String.split("\n\\end{tabular}")
+ |> hd
+ |> String.split("\n")
+ |> Enum.map(fn x -> String.split(x, " & ") end)
+ |> Enum.map(fn [ctn, impl, cost | _] ->
+ %{
+ proj: proj_name,
+ ctn: ctn,
+ impl:
+ impl
+ |> String.replace("\\_", "_"),
+ cost: String.to_float(cost)
+ }
+ end)
+ end)
+ |> SE.to_list()
+ |> List.flatten()
+ |> DF.new()
+```
+
+```elixir
+# Get cost of assignment from cost estimates
+cost_of_assignment = fn proj, assignment ->
+ assignment
+ |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} ->
+ DF.filter(cost_estimates, proj == ^proj and ctn == ^ctn and impl == ^impl)["cost"][0]
+ end)
+ |> Enum.sum()
+end
+
+cost_of_assignment.("example_stack", [%{"ctn" => "StackCon", "impl" => "std::vec::Vec"}])
+```
+
+```elixir
+# Estimate cost for each benchmarked assignment
+estimated_costs =
+ benchmarks
+ |> DF.to_rows_stream()
+ # TODO
+ |> Enum.filter(fn %{"proj" => proj} -> String.contains?(proj, "example") end)
+ |> Enum.map(fn %{"bench_id" => bench_id, "proj" => proj, "using" => using} ->
+ %{
+ bench_id: bench_id,
+ using: using,
+ estimated_cost: cost_of_assignment.(proj, using)
+ }
+ end)
+ |> DF.new()
+```
+
+```elixir
+# Compare each assignments position in the estimates to its position in the results
+sorted_estimates =
+ estimated_costs
+ |> DF.group_by(["bench_id"])
+ |> DF.sort_by(estimated_cost)
+
+sorted_results =
+ benchmarks
+ |> DF.group_by(["bench_id"])
+ |> DF.sort_by(mean)
+
+sorted_estimates
+|> DF.to_rows_stream()
+|> Enum.map(fn %{"bench_id" => bench_id, "using" => using} ->
+ %{
+ bench_id: bench_id,
+ using: using,
+ pos_estimate:
+ DF.filter(sorted_estimates, bench_id == ^bench_id)["using"]
+ |> SE.to_list()
+ |> Enum.find_index(fn u -> u == using end),
+ pos_results:
+ DF.filter(sorted_results, bench_id == ^bench_id)["using"]
+ |> SE.to_list()
+ |> Enum.find_index(fn u -> u == using end)
+ }
+end)
+|> DF.new()
```
<!-- livebook:{"reevaluate_automatically":true} -->
@@ -193,5 +285,5 @@ Tucan.bar(
orient: :horizontal,
clip: true
)
-|> Tucan.Scale.set_x_domain(0, 10)
+|> Tucan.Scale.set_x_domain(0, 5)
```