# Dissertation Visualisations

```elixir
Mix.install([
  {:tucan, "~> 0.3.0"},
  {:kino_vega_lite, "~> 0.1.8"},
  {:json, "~> 1.4"},
  {:explorer, "~> 0.8.0"},
  {:kino_explorer, "~> 0.1.11"}
])
```

## Variables

```elixir
require Explorer.DataFrame
alias Explorer.DataFrame, as: DF
alias Explorer.Series, as: SE
job_id = "1146"
job_dir = Path.expand(~c"./" ++ job_id) |> Path.absname()
sections_dir = Path.join(job_dir, "sections")
criterion_dir = Path.join(job_dir, "criterion")
```

## Read cost model data

```elixir
{:ok, cost_model_files} = File.ls(sections_dir)

cost_model_files =
  cost_model_files
  |> Enum.filter(fn name -> String.contains?(name, "cost-model") end)
  |> Enum.filter(fn name -> String.contains?(name, "vec--Vec") end)
  |> Enum.map(fn fname -> Path.join(sections_dir, fname) |> Path.absname() end)

cost_model_files
```

```elixir
defmodule CostModel do
  defstruct impl: nil, ops: []
end

defmodule OpCostModel do
  defstruct op: nil, x0: nil, x1: nil, x2: nil, x3: nil, nmrse: nil
end

defmodule Parse do
  def cost_model_row(row) do
    [name, x0, x1, x2, x3, nmrse] = String.split(row, " & ")
    [nmrse | _] = String.split(nmrse)

    %OpCostModel{
      op: name,
      x0: String.to_float(x0),
      x1: String.to_float(x1),
      x2: String.to_float(x2),
      x3: String.to_float(x3),
      nmrse: String.to_float(nmrse)
    }
  end

  def cost_model_output(fname) do
    {:ok, contents} = File.read(fname)
    [_, table | _] = String.split(contents, "line\n")
    [rows | _] = String.split(table, "\n\\end")
    rows = String.split(rows, "\n")

    [_, impl] = String.split(fname, "cost-model-")
    impl = String.replace(impl, "-", ":")

    %CostModel{
      impl: impl,
      ops: rows |> Enum.map(&cost_model_row/1)
    }
  end
end
```

<!-- livebook:{"reevaluate_automatically":true} -->

```elixir
cost_models = cost_model_files |> Enum.map(&Parse.cost_model_output/1)
```

## Cost model exploratory plots

```elixir
defmodule PlotCostModel do
  @startn 0
  @endn 20000
  @resolution 100
  def gen_ts(ns, %OpCostModel{x0: x0, x1: x1, x2: x2, x3: x3}) do
    Enum.map(ns, fn n -> %{n: n, t: x0 + n * x1 + n * n * x2 + n * n * n * x3} end)
  end

  def points_op(op) do
    ns = @startn..@endn//@resolution

    gen_ts(ns, op)
    |> Enum.map(fn data -> Map.put(data, :name, op.op) end)
  end

  def points_model(model) do
    model.ops
    |> Enum.map(fn op ->
      points_op(op)
      |> Enum.map(fn point -> Map.put(point, :impl, model.impl) end)
    end)
    |> List.flatten()
  end

  def points(models) do
    models
    |> Enum.map(&points_model/1)
    |> List.flatten()
  end

  def plot_op(op) do
    points = points_op(op)
    Tucan.lineplot(points, "n", "t", title: op.op)
  end

  def plot_all_ops(%CostModel{impl: impl, ops: ops}) do
    ops
    |> Enum.map(&plot_op/1)
    |> Tucan.concat(columns: 3)
    |> Tucan.set_title(impl)
  end
end
```

<!-- livebook:{"reevaluate_automatically":true} -->

```elixir
cost_models
|> Enum.map(&PlotCostModel.plot_all_ops/1)
|> Tucan.vconcat()
```

## Read benchmark data

```elixir
benchmarks =
  File.ls!(criterion_dir)
  |> Enum.map(fn name ->
    File.ls!(Path.join(criterion_dir, name))
    |> Enum.map(fn p -> %{bench: name, subbench: p} end)
  end)
  |> List.flatten()
  |> Enum.map(fn %{bench: bench, subbench: subbench} ->
    File.ls!(Path.join([criterion_dir, bench, subbench]))
    |> Enum.filter(fn x -> String.contains?(x, "Mapping2D") end)
    |> Enum.map(fn x -> Path.join([criterion_dir, bench, subbench, x]) end)
    |> Enum.map(fn dir ->
      raw_results =
        Path.join(dir, "estimates.json")
        |> File.read!()
        |> JSON.decode!()

      %{
        bench_id: bench <> "/" <> subbench,
        using:
          Regex.scan(~r/\"(\w*)\", ([\w:]*)/, Path.basename(dir))
          |> Enum.map(fn [_, ctn, impl] -> %{ctn: ctn, impl: impl} end),
        mean: raw_results["mean"]["point_estimate"] / 10 ** 9
      }
    end)
  end)
  |> List.flatten()
  |> DF.new()
```

```elixir
# Cost estimates by project, ctn, and implementation
projs =
  benchmarks["bench_id"]
  |> SE.split("-")
  |> SE.transform(&hd/1)

benchmarks = DF.put(benchmarks, "proj", projs)

# TODO: consistently name rest of benchmarks
projs = SE.mask(projs, SE.contains(projs, "example"))

cost_estimates =
  SE.transform(projs |> SE.distinct(), fn proj_name ->
    [_, table | _] =
      Path.join(sections_dir, "compare-" <> proj_name)
      |> File.read!()
      |> String.split("& file \\\\\n\\hline\n")

    table
    |> String.split("\n\\end{tabular}")
    |> hd
    |> String.split("\n")
    |> Enum.map(fn x -> String.split(x, " & ") end)
    |> Enum.map(fn [ctn, impl, cost | _] ->
      %{
        proj: proj_name,
        ctn: ctn,
        impl:
          impl
          |> String.replace("\\_", "_"),
        cost: String.to_float(cost)
      }
    end)
  end)
  |> SE.to_list()
  |> List.flatten()
  |> DF.new()
```

```elixir
# Get cost of assignment from cost estimates
cost_of_assignment = fn proj, assignment ->
  assignment
  |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} ->
    DF.filter(cost_estimates, proj == ^proj and ctn == ^ctn and impl == ^impl)["cost"][0]
  end)
  |> Enum.sum()
end

cost_of_assignment.("example_stack", [%{"ctn" => "StackCon", "impl" => "std::vec::Vec"}])
```

```elixir
# Estimate cost for each benchmarked assignment
estimated_costs =
  benchmarks
  |> DF.to_rows_stream()
  # TODO
  |> Enum.filter(fn %{"proj" => proj} -> String.contains?(proj, "example") end)
  |> Enum.map(fn %{"bench_id" => bench_id, "proj" => proj, "using" => using} ->
    %{
      bench_id: bench_id,
      using: using,
      estimated_cost: cost_of_assignment.(proj, using)
    }
  end)
  |> DF.new()
```

```elixir
# Compare each assignments position in the estimates to its position in the results
sorted_estimates =
  estimated_costs
  |> DF.group_by(["bench_id"])
  |> DF.sort_by(estimated_cost)

sorted_results =
  benchmarks
  |> DF.group_by(["bench_id"])
  |> DF.sort_by(mean)

sorted_estimates
|> DF.to_rows_stream()
|> Enum.map(fn %{"bench_id" => bench_id, "using" => using} ->
  %{
    bench_id: bench_id,
    using: using,
    pos_estimate:
      DF.filter(sorted_estimates, bench_id == ^bench_id)["using"]
      |> SE.to_list()
      |> Enum.find_index(fn u -> u == using end),
    pos_results:
      DF.filter(sorted_results, bench_id == ^bench_id)["using"]
      |> SE.to_list()
      |> Enum.find_index(fn u -> u == using end)
  }
end)
|> DF.new()
```

<!-- livebook:{"reevaluate_automatically":true} -->

```elixir
# Difference in execution time between worst and best selection
Tucan.bar(
  benchmarks
  |> DF.group_by("bench_id")
  |> DF.summarise(range: max(mean) - min(mean)),
  "bench_id",
  "range",
  orient: :horizontal,
  clip: true
)
|> Tucan.Scale.set_x_domain(0, 5)
```