# Dissertation Visualisations ```elixir Mix.install([ {:tucan, "~> 0.3.0"}, {:kino_vega_lite, "~> 0.1.8"}, {:json, "~> 1.4"}, {:explorer, "~> 0.8.0"}, {:kino_explorer, "~> 0.1.11"} ]) ``` ## Variables ```elixir require Explorer.DataFrame require Explorer.Series alias Explorer.DataFrame, as: DF alias Explorer.Series, as: SE job_id = "1177" job_dir = Path.expand(~c"./" ++ job_id) |> Path.absname() sections_dir = Path.join(job_dir, "sections") cm_dir = Path.join([job_dir, "candelabra", "benchmark_results"]) criterion_dir = Path.join(job_dir, "criterion") ``` ## Read cost model data ```elixir {:ok, cost_model_files} = File.ls(cm_dir) cost_model_files = cost_model_files |> Enum.map(fn fname -> Path.join(cm_dir, fname) |> Path.absname() end) cost_model_files ``` ```elixir # Parse cost model information cost_models = cost_model_files |> Enum.map(fn fname -> impl = Path.basename(fname) |> String.replace("_", ":") contents = File.read!(fname) contents = JSON.decode!(contents) contents["model"]["by_op"] |> Enum.map(fn {op, %{"coeffs" => coeffs}} -> %{ op: op, impl: impl, coeffs: coeffs } end) |> DF.new() end) |> DF.concat_rows() ``` ```elixir # Parse cost model information cost_model_points = cost_model_files |> Enum.map(fn fname -> impl = Path.basename(fname) |> String.replace("_", ":") contents = File.read!(fname) contents = JSON.decode!(contents) contents["results"]["by_op"] |> Enum.flat_map(fn {op, results} -> Enum.map(results, fn [n, cost] -> %{ op: op, impl: String.split(impl, "::") |> List.last(), n: n, t: cost } end) end) |> DF.new() end) |> DF.concat_rows() ``` ```elixir cost_models |> DF.filter(op == "contains") ``` ## Cost model exploratory plots ```elixir startn = 0 endn = 60_000 resolution = 100 points_for = fn impl, op -> %{"coeffs" => [coeffs]} = DF.filter(cost_models, impl == ^impl and op == ^op) |> DF.to_columns() Enum.map(startn..endn//resolution, fn n -> %{ impl: String.split(impl, "::") |> List.last(), op: op, n: n, t: coeffs |> Enum.with_index() |> Enum.map(fn {coeff, idx} -> coeff * n ** idx end) |> Enum.sum() |> max(0) } end) |> DF.new() end ``` ```elixir inspect_op = "insert" Tucan.layers([ cost_models |> DF.filter(op == ^inspect_op) |> DF.distinct(["impl"]) |> DF.to_rows() |> Enum.map(fn %{"impl" => impl} -> points_for.(impl, inspect_op) end) |> DF.concat_rows() |> Tucan.lineplot("n", "t", color_by: "impl", clip: true) |> Tucan.Scale.set_y_domain(0, 200), Tucan.scatter( cost_model_points |> DF.filter(op == ^inspect_op) |> DF.group_by(["impl", "n"]) |> DF.summarise(t: mean(t)), "n", "t", color_by: "impl", clip: true ) ]) ``` ## Read benchmark data ```elixir raw_benchmarks = File.ls!(criterion_dir) |> Enum.map(fn name -> File.ls!(Path.join(criterion_dir, name)) |> Enum.map(fn p -> %{bench: name, subbench: p} end) end) |> List.flatten() |> Enum.map(fn %{bench: bench, subbench: subbench} -> File.ls!(Path.join([criterion_dir, bench, subbench])) |> Enum.filter(fn x -> String.contains?(x, "Mapping2D") end) |> Enum.map(fn x -> Path.join([criterion_dir, bench, subbench, x]) end) |> Enum.map(fn dir -> raw_results = Path.join(dir, "estimates.json") |> File.read!() |> JSON.decode!() %{ bench_id: bench <> "/" <> subbench, proj: String.split(bench, "-") |> hd, using: Regex.scan(~r/\"(\w*)\", ([\w:]*)/, Path.basename(dir)) |> Enum.map(fn [_, ctn, impl] -> %{ctn: ctn, impl: impl} end), mean: raw_results["mean"]["point_estimate"] / 10 ** 9 } end) end) |> List.flatten() |> DF.new() ``` ```elixir # Aggregate benchmark results by project, since we can only do assignments by project uniq_proj_using = DF.select(raw_benchmarks, ["proj", "using"]) |> DF.to_rows() |> Enum.uniq() |> DF.new() uniq_proj_using |> DF.mutate(vals: DF.filter(^raw_benchmarks, proj == proj and using == using)) # |> Enum.map(fn %{"proj" => proj, "using" => using} -> # DF.filter(raw_benchmarks, proj == ^proj and using == ^using) # # |> DF.summarise() # end) # |> DF.concat_rows() ``` ```elixir # Cost estimates by project, ctn, and implementation projs = SE.distinct(benchmarks["proj"]) cost_estimates = SE.transform(projs, fn proj_name -> [_, table | _] = Path.join(sections_dir, "compare-" <> proj_name) |> File.read!() |> String.split("& file \\\\\n\\hline\n") table |> String.split("\n\\end{tabular}") |> hd |> String.split("\n") |> Enum.map(fn x -> String.split(x, " & ") end) |> Enum.map(fn [ctn, impl, cost | _] -> %{ proj: proj_name, ctn: ctn, impl: impl |> String.replace("\\_", "_"), cost: if String.contains?(cost, ".") do String.to_float(cost) else String.to_integer(cost) end } end) end) |> SE.to_list() |> List.flatten() |> DF.new() ``` ```elixir # Get cost of assignment from cost estimates cost_of_assignment = fn proj, assignment -> assignment |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> DF.filter(cost_estimates, proj == ^proj and ctn == ^ctn and impl == ^impl)["cost"][0] end) |> Enum.sum() end cost_of_assignment.("example_stack", [%{"ctn" => "StackCon", "impl" => "std::vec::Vec"}]) ``` ```elixir # Estimate cost for each benchmarked assignment estimated_costs = benchmarks |> DF.to_rows_stream() |> Enum.map(fn %{"bench_id" => bench_id, "proj" => proj, "using" => using} -> %{ bench_id: bench_id, using: using, estimated_cost: cost_of_assignment.(proj, using) } end) |> DF.new() ``` ```elixir # Compare each assignments position in the estimates to its position in the results sorted_estimates = estimated_costs |> DF.group_by(["bench_id"]) |> DF.sort_by(estimated_cost) sorted_results = benchmarks |> DF.group_by(["bench_id"]) |> DF.sort_by(mean) position_comparison = sorted_estimates |> DF.to_rows_stream() |> Enum.map(fn %{"bench_id" => bench_id, "using" => using} -> %{ bench_id: bench_id, using: using, pos_estimate: DF.filter(sorted_estimates, bench_id == ^bench_id)["using"] |> SE.to_list() |> Enum.find_index(fn u -> u == using end), pos_results: DF.filter(sorted_results, bench_id == ^bench_id)["using"] |> SE.to_list() |> Enum.find_index(fn u -> u == using end) } end) |> DF.new() ``` ```elixir position_comparison |> DF.filter(pos_estimate != pos_results) |> DF.collect() ``` ```elixir position_comparison |> DF.filter(pos_estimate == 0) |> DF.select(["bench_id", "using"]) ``` ```elixir # Difference in execution time between worst and best selection Tucan.bar( benchmarks |> DF.group_by("bench_id") |> DF.summarise(range: max(mean) - min(mean)), "bench_id", "range", orient: :horizontal, clip: true ) |> Tucan.Scale.set_x_domain(0, 5) ```