# Dissertation Visualisations ```elixir Mix.install([ {:tucan, "~> 0.3.0"}, {:kino_vega_lite, "~> 0.1.8"}, {:json, "~> 1.4"}, {:explorer, "~> 0.8.0"}, {:kino_explorer, "~> 0.1.11"} ]) ``` ## Variables ```elixir require Explorer.DataFrame alias Explorer.DataFrame, as: DF alias Explorer.Series, as: SE job_id = "1146" job_dir = Path.expand(~c"./" ++ job_id) |> Path.absname() sections_dir = Path.join(job_dir, "sections") criterion_dir = Path.join(job_dir, "criterion") ``` ## Read cost model data ```elixir {:ok, cost_model_files} = File.ls(sections_dir) cost_model_files = cost_model_files |> Enum.filter(fn name -> String.contains?(name, "cost-model") end) |> Enum.filter(fn name -> String.contains?(name, "vec--Vec") end) |> Enum.map(fn fname -> Path.join(sections_dir, fname) |> Path.absname() end) cost_model_files ``` ```elixir defmodule CostModel do defstruct impl: nil, ops: [] end defmodule OpCostModel do defstruct op: nil, x0: nil, x1: nil, x2: nil, x3: nil, nmrse: nil end defmodule Parse do def cost_model_row(row) do [name, x0, x1, x2, x3, nmrse] = String.split(row, " & ") [nmrse | _] = String.split(nmrse) %OpCostModel{ op: name, x0: String.to_float(x0), x1: String.to_float(x1), x2: String.to_float(x2), x3: String.to_float(x3), nmrse: String.to_float(nmrse) } end def cost_model_output(fname) do {:ok, contents} = File.read(fname) [_, table | _] = String.split(contents, "line\n") [rows | _] = String.split(table, "\n\\end") rows = String.split(rows, "\n") [_, impl] = String.split(fname, "cost-model-") impl = String.replace(impl, "-", ":") %CostModel{ impl: impl, ops: rows |> Enum.map(&cost_model_row/1) } end end ``` ```elixir cost_models = cost_model_files |> Enum.map(&Parse.cost_model_output/1) ``` ## Cost model exploratory plots ```elixir defmodule PlotCostModel do @startn 0 @endn 20000 @resolution 100 def gen_ts(ns, %OpCostModel{x0: x0, x1: x1, x2: x2, x3: x3}) do Enum.map(ns, fn n -> %{n: n, t: x0 + n * x1 + n * n * x2 + n * n * n * x3} end) end def points_op(op) do ns = @startn..@endn//@resolution gen_ts(ns, op) |> Enum.map(fn data -> Map.put(data, :name, op.op) end) end def points_model(model) do model.ops |> Enum.map(fn op -> points_op(op) |> Enum.map(fn point -> Map.put(point, :impl, model.impl) end) end) |> List.flatten() end def points(models) do models |> Enum.map(&points_model/1) |> List.flatten() end def plot_op(op) do points = points_op(op) Tucan.lineplot(points, "n", "t", title: op.op) end def plot_all_ops(%CostModel{impl: impl, ops: ops}) do ops |> Enum.map(&plot_op/1) |> Tucan.concat(columns: 3) |> Tucan.set_title(impl) end end ``` ```elixir cost_models |> Enum.map(&PlotCostModel.plot_all_ops/1) |> Tucan.vconcat() ``` ## Read benchmark data ```elixir benchmarks = File.ls!(criterion_dir) |> Enum.map(fn name -> File.ls!(Path.join(criterion_dir, name)) |> Enum.map(fn p -> %{bench: name, subbench: p} end) end) |> List.flatten() |> Enum.map(fn %{bench: bench, subbench: subbench} -> File.ls!(Path.join([criterion_dir, bench, subbench])) |> Enum.filter(fn x -> String.contains?(x, "Mapping2D") end) |> Enum.map(fn x -> Path.join([criterion_dir, bench, subbench, x]) end) |> Enum.map(fn dir -> raw_results = Path.join(dir, "estimates.json") |> File.read!() |> JSON.decode!() %{ bench_id: bench <> "/" <> subbench, using: Regex.scan(~r/\"(\w*)\", ([\w:]*)/, Path.basename(dir)) |> Enum.map(fn [_, ctn, impl] -> %{ctn: ctn, impl: impl} end), mean: raw_results["mean"]["point_estimate"] / 10 ** 9 } end) end) |> List.flatten() |> DF.new() ``` ```elixir # Cost estimates by project, ctn, and implementation projs = benchmarks["bench_id"] |> SE.split("-") |> SE.transform(&hd/1) benchmarks = DF.put(benchmarks, "proj", projs) # TODO: consistently name rest of benchmarks projs = SE.mask(projs, SE.contains(projs, "example")) cost_estimates = SE.transform(projs |> SE.distinct(), fn proj_name -> [_, table | _] = Path.join(sections_dir, "compare-" <> proj_name) |> File.read!() |> String.split("& file \\\\\n\\hline\n") table |> String.split("\n\\end{tabular}") |> hd |> String.split("\n") |> Enum.map(fn x -> String.split(x, " & ") end) |> Enum.map(fn [ctn, impl, cost | _] -> %{ proj: proj_name, ctn: ctn, impl: impl |> String.replace("\\_", "_"), cost: String.to_float(cost) } end) end) |> SE.to_list() |> List.flatten() |> DF.new() ``` ```elixir # Get cost of assignment from cost estimates cost_of_assignment = fn proj, assignment -> assignment |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} -> DF.filter(cost_estimates, proj == ^proj and ctn == ^ctn and impl == ^impl)["cost"][0] end) |> Enum.sum() end cost_of_assignment.("example_stack", [%{"ctn" => "StackCon", "impl" => "std::vec::Vec"}]) ``` ```elixir # Estimate cost for each benchmarked assignment estimated_costs = benchmarks |> DF.to_rows_stream() # TODO |> Enum.filter(fn %{"proj" => proj} -> String.contains?(proj, "example") end) |> Enum.map(fn %{"bench_id" => bench_id, "proj" => proj, "using" => using} -> %{ bench_id: bench_id, using: using, estimated_cost: cost_of_assignment.(proj, using) } end) |> DF.new() ``` ```elixir # Compare each assignments position in the estimates to its position in the results sorted_estimates = estimated_costs |> DF.group_by(["bench_id"]) |> DF.sort_by(estimated_cost) sorted_results = benchmarks |> DF.group_by(["bench_id"]) |> DF.sort_by(mean) sorted_estimates |> DF.to_rows_stream() |> Enum.map(fn %{"bench_id" => bench_id, "using" => using} -> %{ bench_id: bench_id, using: using, pos_estimate: DF.filter(sorted_estimates, bench_id == ^bench_id)["using"] |> SE.to_list() |> Enum.find_index(fn u -> u == using end), pos_results: DF.filter(sorted_results, bench_id == ^bench_id)["using"] |> SE.to_list() |> Enum.find_index(fn u -> u == using end) } end) |> DF.new() ``` ```elixir # Difference in execution time between worst and best selection Tucan.bar( benchmarks |> DF.group_by("bench_id") |> DF.summarise(range: max(mean) - min(mean)), "bench_id", "range", orient: :horizontal, clip: true ) |> Tucan.Scale.set_x_domain(0, 5) ```