# Dissertation Visualisations

```elixir
Mix.install([
  {:tucan, "~> 0.3.0"},
  {:kino_vega_lite, "~> 0.1.8"},
  {:json, "~> 1.4"},
  {:explorer, "~> 0.8.0"},
  {:kino_explorer, "~> 0.1.11"}
])
```

## Variables

```elixir
require Explorer.DataFrame
require Explorer.Series
alias Explorer.DataFrame, as: DF
alias Explorer.Series, as: SE
job_id = "1177"
job_dir = Path.expand(~c"./" ++ job_id) |> Path.absname()
sections_dir = Path.join(job_dir, "sections")
cm_dir = Path.join([job_dir, "candelabra", "benchmark_results"])
criterion_dir = Path.join(job_dir, "criterion")
```

## Read cost model data

```elixir
{:ok, cost_model_files} = File.ls(cm_dir)

cost_model_files =
  cost_model_files
  |> Enum.map(fn fname -> Path.join(cm_dir, fname) |> Path.absname() end)

cost_model_files
```

<!-- livebook:{"reevaluate_automatically":true} -->

```elixir
# Parse cost model information
cost_models =
  cost_model_files
  |> Enum.map(fn fname ->
    impl = Path.basename(fname) |> String.replace("_", ":")
    contents = File.read!(fname)
    contents = JSON.decode!(contents)

    contents["model"]["by_op"]
    |> Enum.map(fn {op, %{"coeffs" => coeffs}} ->
      %{
        op: op,
        impl: impl,
        coeffs: coeffs
      }
    end)
    |> DF.new()
  end)
  |> DF.concat_rows()
```

```elixir
# Parse cost model information
cost_model_points =
  cost_model_files
  |> Enum.map(fn fname ->
    impl = Path.basename(fname) |> String.replace("_", ":")
    contents = File.read!(fname)
    contents = JSON.decode!(contents)

    contents["results"]["by_op"]
    |> Enum.flat_map(fn {op, results} ->
      Enum.map(results, fn [n, cost] ->
        %{
          op: op,
          impl: String.split(impl, "::") |> List.last(),
          n: n,
          t: cost
        }
      end)
    end)
    |> DF.new()
  end)
  |> DF.concat_rows()
```

```elixir
cost_models
|> DF.filter(op == "contains")
```

## Cost model exploratory plots

```elixir
startn = 0
endn = 60_000
resolution = 100

points_for = fn impl, op ->
  %{"coeffs" => [coeffs]} =
    DF.filter(cost_models, impl == ^impl and op == ^op)
    |> DF.to_columns()

  Enum.map(startn..endn//resolution, fn n ->
    %{
      impl: String.split(impl, "::") |> List.last(),
      op: op,
      n: n,
      t:
        coeffs
        |> Enum.with_index()
        |> Enum.map(fn {coeff, idx} -> coeff * n ** idx end)
        |> Enum.sum()
        |> max(0)
    }
  end)
  |> DF.new()
end
```

<!-- livebook:{"reevaluate_automatically":true} -->

```elixir
inspect_op = "insert"

Tucan.layers([
  cost_models
  |> DF.filter(op == ^inspect_op)
  |> DF.distinct(["impl"])
  |> DF.to_rows()
  |> Enum.map(fn %{"impl" => impl} -> points_for.(impl, inspect_op) end)
  |> DF.concat_rows()
  |> Tucan.lineplot("n", "t", color_by: "impl", clip: true)
  |> Tucan.Scale.set_y_domain(0, 200),
  Tucan.scatter(
    cost_model_points
    |> DF.filter(op == ^inspect_op)
    |> DF.group_by(["impl", "n"])
    |> DF.summarise(t: mean(t)),
    "n",
    "t",
    color_by: "impl",
    clip: true
  )
])
```

## Read benchmark data

```elixir
raw_benchmarks =
  File.ls!(criterion_dir)
  |> Enum.map(fn name ->
    File.ls!(Path.join(criterion_dir, name))
    |> Enum.map(fn p -> %{bench: name, subbench: p} end)
  end)
  |> List.flatten()
  |> Enum.map(fn %{bench: bench, subbench: subbench} ->
    File.ls!(Path.join([criterion_dir, bench, subbench]))
    |> Enum.filter(fn x -> String.contains?(x, "Mapping2D") end)
    |> Enum.map(fn x -> Path.join([criterion_dir, bench, subbench, x]) end)
    |> Enum.map(fn dir ->
      raw_results =
        Path.join(dir, "estimates.json")
        |> File.read!()
        |> JSON.decode!()

      %{
        bench_id: bench <> "/" <> subbench,
        proj: String.split(bench, "-") |> hd,
        using:
          Regex.scan(~r/\"(\w*)\", ([\w:]*)/, Path.basename(dir))
          |> Enum.map(fn [_, ctn, impl] -> %{ctn: ctn, impl: impl} end),
        mean: raw_results["mean"]["point_estimate"] / 10 ** 9
      }
    end)
  end)
  |> List.flatten()
  |> DF.new()
```

```elixir
# Aggregate benchmark results by project, since we can only do assignments by project
uniq_proj_using =
  DF.select(raw_benchmarks, ["proj", "using"])
  |> DF.to_rows()
  |> Enum.uniq()
  |> DF.new()

uniq_proj_using
|> DF.mutate(vals: DF.filter(^raw_benchmarks, proj == proj and using == using))

# |> Enum.map(fn %{"proj" => proj, "using" => using} ->
#   DF.filter(raw_benchmarks, proj == ^proj and using == ^using)
#   # |> DF.summarise()
# end)
# |> DF.concat_rows()
```

```elixir
# Cost estimates by project, ctn, and implementation
projs = SE.distinct(benchmarks["proj"])

cost_estimates =
  SE.transform(projs, fn proj_name ->
    [_, table | _] =
      Path.join(sections_dir, "compare-" <> proj_name)
      |> File.read!()
      |> String.split("& file \\\\\n\\hline\n")

    table
    |> String.split("\n\\end{tabular}")
    |> hd
    |> String.split("\n")
    |> Enum.map(fn x -> String.split(x, " & ") end)
    |> Enum.map(fn [ctn, impl, cost | _] ->
      %{
        proj: proj_name,
        ctn: ctn,
        impl:
          impl
          |> String.replace("\\_", "_"),
        cost:
          if String.contains?(cost, ".") do
            String.to_float(cost)
          else
            String.to_integer(cost)
          end
      }
    end)
  end)
  |> SE.to_list()
  |> List.flatten()
  |> DF.new()
```

```elixir
# Get cost of assignment from cost estimates
cost_of_assignment = fn proj, assignment ->
  assignment
  |> Enum.map(fn %{"ctn" => ctn, "impl" => impl} ->
    DF.filter(cost_estimates, proj == ^proj and ctn == ^ctn and impl == ^impl)["cost"][0]
  end)
  |> Enum.sum()
end

cost_of_assignment.("example_stack", [%{"ctn" => "StackCon", "impl" => "std::vec::Vec"}])
```

```elixir
# Estimate cost for each benchmarked assignment
estimated_costs =
  benchmarks
  |> DF.to_rows_stream()
  |> Enum.map(fn %{"bench_id" => bench_id, "proj" => proj, "using" => using} ->
    %{
      bench_id: bench_id,
      using: using,
      estimated_cost: cost_of_assignment.(proj, using)
    }
  end)
  |> DF.new()
```

```elixir
# Compare each assignments position in the estimates to its position in the results
sorted_estimates =
  estimated_costs
  |> DF.group_by(["bench_id"])
  |> DF.sort_by(estimated_cost)

sorted_results =
  benchmarks
  |> DF.group_by(["bench_id"])
  |> DF.sort_by(mean)

position_comparison =
  sorted_estimates
  |> DF.to_rows_stream()
  |> Enum.map(fn %{"bench_id" => bench_id, "using" => using} ->
    %{
      bench_id: bench_id,
      using: using,
      pos_estimate:
        DF.filter(sorted_estimates, bench_id == ^bench_id)["using"]
        |> SE.to_list()
        |> Enum.find_index(fn u -> u == using end),
      pos_results:
        DF.filter(sorted_results, bench_id == ^bench_id)["using"]
        |> SE.to_list()
        |> Enum.find_index(fn u -> u == using end)
    }
  end)
  |> DF.new()
```

```elixir
position_comparison
|> DF.filter(pos_estimate != pos_results)
|> DF.collect()
```

```elixir
position_comparison
|> DF.filter(pos_estimate == 0)
|> DF.select(["bench_id", "using"])
```

<!-- livebook:{"reevaluate_automatically":true} -->

```elixir
# Difference in execution time between worst and best selection
Tucan.bar(
  benchmarks
  |> DF.group_by("bench_id")
  |> DF.summarise(range: max(mean) - min(mean)),
  "bench_id",
  "range",
  orient: :horizontal,
  clip: true
)
|> Tucan.Scale.set_x_domain(0, 5)
```