From 9da1961a33b20cc64d920ae82f6cc49c42d0c728 Mon Sep 17 00:00:00 2001
From: Aria <me@aria.rip>
Date: Thu, 14 Dec 2023 18:42:24 +0000
Subject: refactor(candelabra): split cli, reduce duplication

introduce an invalidation function to the cache helper to get rid of
repetitive code
split cli and candelabra out to separate crates
move most top-level operations into the State struct
---
 src/Cargo.toml                              |   5 +-
 src/crates/candelabra/Cargo.toml            |  19 ++++
 src/crates/candelabra/src/cache.rs          | 126 +++++++++++++++++++++
 src/crates/candelabra/src/candidates.rs     | 116 ++++++++++++++++++++
 src/crates/candelabra/src/cost/benchmark.rs | 164 ++++++++++++++++++++++++++++
 src/crates/candelabra/src/cost/fit.rs       |  79 ++++++++++++++
 src/crates/candelabra/src/cost/mod.rs       | 117 ++++++++++++++++++++
 src/crates/candelabra/src/lib.rs            |  41 +++++++
 src/crates/candelabra/src/paths.rs          |  47 ++++++++
 src/crates/candelabra/src/profiler.rs       | 132 ++++++++++++++++++++++
 src/crates/candelabra/src/project.rs        |  35 ++++++
 src/crates/cli/Cargo.toml                   |  17 +--
 src/crates/cli/src/cache.rs                 | 111 -------------------
 src/crates/cli/src/candidates.rs            | 136 -----------------------
 src/crates/cli/src/cmd.rs                   |  39 -------
 src/crates/cli/src/cost/benchmark.rs        | 164 ----------------------------
 src/crates/cli/src/cost/fit.rs              |  79 --------------
 src/crates/cli/src/cost/mod.rs              | 121 --------------------
 src/crates/cli/src/main.rs                  | 143 +++++-------------------
 src/crates/cli/src/paths.rs                 |  47 --------
 src/crates/cli/src/profiler/mod.rs          | 121 --------------------
 src/crates/cli/src/project.rs               |  35 ------
 22 files changed, 911 insertions(+), 983 deletions(-)
 create mode 100644 src/crates/candelabra/Cargo.toml
 create mode 100644 src/crates/candelabra/src/cache.rs
 create mode 100644 src/crates/candelabra/src/candidates.rs
 create mode 100644 src/crates/candelabra/src/cost/benchmark.rs
 create mode 100644 src/crates/candelabra/src/cost/fit.rs
 create mode 100644 src/crates/candelabra/src/cost/mod.rs
 create mode 100644 src/crates/candelabra/src/lib.rs
 create mode 100644 src/crates/candelabra/src/paths.rs
 create mode 100644 src/crates/candelabra/src/profiler.rs
 create mode 100644 src/crates/candelabra/src/project.rs
 delete mode 100644 src/crates/cli/src/cache.rs
 delete mode 100644 src/crates/cli/src/candidates.rs
 delete mode 100644 src/crates/cli/src/cmd.rs
 delete mode 100644 src/crates/cli/src/cost/benchmark.rs
 delete mode 100644 src/crates/cli/src/cost/fit.rs
 delete mode 100644 src/crates/cli/src/cost/mod.rs
 delete mode 100644 src/crates/cli/src/paths.rs
 delete mode 100644 src/crates/cli/src/profiler/mod.rs
 delete mode 100644 src/crates/cli/src/project.rs

diff --git a/src/Cargo.toml b/src/Cargo.toml
index d3658ac..3b0a7ba 100644
--- a/src/Cargo.toml
+++ b/src/Cargo.toml
@@ -3,8 +3,9 @@ resolver = "2"
 members = [
     "crates/primrose",
     "crates/library",
-    "crates/cli",
-    "crates/benchmarker"
+    "crates/benchmarker",
+    "crates/candelabra",
+    "crates/cli"
 ]
 
 [workspace.dependencies]
diff --git a/src/crates/candelabra/Cargo.toml b/src/crates/candelabra/Cargo.toml
new file mode 100644
index 0000000..909e577
--- /dev/null
+++ b/src/crates/candelabra/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "candelabra"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+log = { workspace = true }
+primrose = { path = "../primrose" }
+anyhow = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+camino = "1.1.6"
+cargo_metadata = "0.18.1"
+glob = "0.3.1"
+tempfile = "3"
+nalgebra = "0.32.3"
+polars = { version = "0.35.4", features = ["describe"] }
diff --git a/src/crates/candelabra/src/cache.rs b/src/crates/candelabra/src/cache.rs
new file mode 100644
index 0000000..424b2e7
--- /dev/null
+++ b/src/crates/candelabra/src/cache.rs
@@ -0,0 +1,126 @@
+//! Common utilities for caching results
+use std::{
+    cell::RefCell,
+    collections::hash_map::DefaultHasher,
+    fs::{create_dir_all, metadata, remove_file, File},
+    hash::{Hash, Hasher},
+    marker::PhantomData,
+};
+
+use anyhow::{anyhow, Context, Result};
+use camino::{Utf8Path, Utf8PathBuf};
+use glob::glob;
+use log::{debug, warn};
+use serde::{Deserialize, Serialize};
+use serde_json::{from_reader, to_writer};
+
+/// A filesystem-based K/V cache
+/// This doesn't deal with key invalidation or anything, just the filesystem/serialisation stuff
+pub struct FileCache<K: 'static + ?Sized, V> {
+    base_dir: Utf8PathBuf,
+    validator: RefCell<Box<dyn FnMut(&K, &V) -> bool>>,
+    _data: PhantomData<(&'static K, V, V)>,
+}
+
+impl<K: ?Sized + ToString, V: Serialize + for<'a> Deserialize<'a>> FileCache<K, V> {
+    /// Create a new file store in the given directory.
+    pub fn new(
+        base_dir: Utf8PathBuf,
+        validator: impl FnMut(&K, &V) -> bool + 'static,
+    ) -> Result<Self> {
+        create_dir_all(base_dir.as_std_path()).context("Error creating cache directory")?;
+        Ok(Self {
+            base_dir,
+            validator: RefCell::new(Box::new(validator)),
+            _data: PhantomData,
+        })
+    }
+
+    /// Store the given value with the given `key`
+    pub fn put(&self, key: &K, value: &V) -> Result<()> {
+        let path = self.path_for(key);
+        let mut file = File::create(path)?;
+        to_writer(&mut file, value)?;
+
+        Ok(())
+    }
+
+    /// Attempt to load cache entry with the given `key`
+    /// `Ok(None)` indicates no valid cache entry, while `Err(e)` indicates an IO error
+    /// Invalid cache entries will be deleted.
+    pub fn find(&self, key: &K) -> Result<Option<V>> {
+        let path = self.path_for(key);
+        if !path.exists() {
+            return Ok(None);
+        }
+
+        let file = File::open(&path).context("Error opening cache entry")?;
+        let contents: V = match self.attempt_load(key, &file) {
+            Ok(x) => x,
+            Err(e) => {
+                debug!("Invalid cache entry: {}", e);
+                if let Err(e) = self.remove(key) {
+                    warn!("Error deleting invalid cache entry: {}", e);
+                }
+
+                return Ok(None);
+            }
+        };
+
+        Ok(Some(contents))
+    }
+
+    fn attempt_load(&self, key: &K, f: &File) -> Result<V> {
+        let c = from_reader(f)?;
+        if (self.validator.borrow_mut())(key, &c) {
+            Ok(c)
+        } else {
+            Err(anyhow!("validation function said no"))
+        }
+    }
+
+    /// Remove value for the given key
+    pub fn remove(&self, key: &K) -> Result<()> {
+        Ok(remove_file(self.path_for(key))?)
+    }
+
+    /// Get the path for a given key
+    fn path_for(&self, key: &K) -> Utf8PathBuf {
+        // Sanitise key name
+        let key = key.to_string();
+        let mut san = String::with_capacity(key.len());
+        for chr in key.chars() {
+            if chr == '/'
+                || chr == '\\'
+                || chr == ':'
+                || chr == '*'
+                || chr == '?'
+                || chr == '"'
+                || chr == '<'
+                || chr == '>'
+                || chr == '|'
+            {
+                san += "_";
+            } else {
+                san.push(chr);
+            }
+        }
+        if san.is_empty() {
+            san += "_";
+        }
+        self.base_dir.join(san) // TODO: santisation
+    }
+}
+
+/// Generate a hash from the current state of the given directory
+/// This is built from the modification time of all files in that directory and all children.
+pub fn gen_tree_hash(dir: &Utf8Path) -> Result<u64> {
+    let mut hasher = DefaultHasher::new();
+
+    for f in glob(&format!("{}/**/*", dir)).unwrap() {
+        let modified = metadata(f?)?.modified()?;
+        modified.hash(&mut hasher);
+    }
+
+    Ok(hasher.finish())
+}
diff --git a/src/crates/candelabra/src/candidates.rs b/src/crates/candelabra/src/candidates.rs
new file mode 100644
index 0000000..0d76862
--- /dev/null
+++ b/src/crates/candelabra/src/candidates.rs
@@ -0,0 +1,116 @@
+//! Generating and caching primrose candidate results
+
+use std::{collections::HashMap, fs::metadata, time::SystemTime};
+
+use anyhow::{Context, Result};
+use camino::{Utf8Path, Utf8PathBuf};
+use log::{debug, warn};
+use primrose::ContainerSelector;
+use serde::{Deserialize, Serialize};
+
+use crate::{
+    cache::{gen_tree_hash, FileCache},
+    paths::Paths,
+    project::Project,
+    State,
+};
+
+/// Names a container type we want to select.
+pub type ConTypeName = String;
+
+/// Name of a container implementation we are considering
+pub type ImplName = String;
+
+/// A list of candidate container types
+pub type Candidates = HashMap<ConTypeName, Vec<ImplName>>;
+
+/// A list of candidates for each selection site, and each file in a given project
+pub type ProjectCandidateList = Vec<(Utf8PathBuf, Vec<(ConTypeName, Vec<ImplName>)>)>;
+
+/// Info for getting & caching candidate types
+pub struct CandidatesStore {
+    pub store: FileCache<Utf8Path, CacheEntry>,
+    pub lib_hash: u64,
+}
+
+/// Entry in the benchmark cache
+#[derive(Serialize, Deserialize, Debug)]
+pub struct CacheEntry {
+    lib_hash: u64,
+    mod_time: SystemTime,
+    value: Candidates,
+}
+
+impl CandidatesStore {
+    /// Create a new store, using the given paths.
+    /// Benchmarks are cached in `paths.target_dir / candelabra / primrose_results`
+    pub fn new(paths: &Paths) -> Result<Self> {
+        let base_dir = paths.target_dir.join("candelabra").join("primrose_results");
+
+        let lib_hash =
+            gen_tree_hash(&paths.library_crate).context("Error generating library hash")?;
+        debug!("Initialised candidate cacher with hash {}", lib_hash);
+
+        Ok(Self {
+            store: FileCache::new(base_dir, move |k, v: &CacheEntry| {
+                let mod_time = metadata(k)
+                    .map(|m| m.modified())
+                    .unwrap_or(Ok(SystemTime::UNIX_EPOCH))
+                    .unwrap();
+                v.lib_hash == lib_hash && v.mod_time == mod_time
+            })?,
+            lib_hash,
+        })
+    }
+}
+
+impl State {
+    /// Run primrose on all files in the given project.
+    /// Returns a list of all candidates for each container type in each file.
+    pub fn project_candidate_list(&self, project: &Project) -> Result<ProjectCandidateList> {
+        let mut all_candidates = Vec::new();
+        for file in project.find_primrose_files()? {
+            let result = match self.candidates.store.find(&file)? {
+                Some(x) => x.value,
+                None => self.calc_candidates(&file)?,
+            };
+
+            let mut typs = Vec::new();
+            for (con_type_id, candidates) in result {
+                typs.push((con_type_id.clone(), candidates));
+            }
+            all_candidates.push((file, typs));
+        }
+
+        Ok(all_candidates)
+    }
+
+    /// Find candidate types for every selection site in a given path
+    fn calc_candidates(&self, path: &Utf8Path) -> Result<Candidates> {
+        let selector = ContainerSelector::from_path(
+            path.as_std_path(),
+            self.paths.library_src.as_std_path(),
+            self.model_size,
+        )
+        .with_context(|| format!("error getting container selector for {}", path))?;
+
+        let candidates: Candidates = selector
+            .find_all_candidates()?
+            .into_iter()
+            .map(|(k, v)| (k.to_string(), v))
+            .collect();
+
+        let mod_time = metadata(path)?.modified()?;
+        if let Err(e) = self.candidates.store.put(
+            path,
+            &CacheEntry {
+                lib_hash: self.candidates.lib_hash,
+                value: candidates.clone(),
+                mod_time,
+            },
+        ) {
+            warn!("Error caching candidates for {}: {}", path, e);
+        }
+        Ok(candidates)
+    }
+}
diff --git a/src/crates/candelabra/src/cost/benchmark.rs b/src/crates/candelabra/src/cost/benchmark.rs
new file mode 100644
index 0000000..a1e0e18
--- /dev/null
+++ b/src/crates/candelabra/src/cost/benchmark.rs
@@ -0,0 +1,164 @@
+//! Benchmarking of container types
+
+use std::{
+    collections::HashMap,
+    fs::{copy, create_dir, File},
+    io::Write,
+    process::Command,
+    time::Duration,
+};
+
+use anyhow::{bail, Context, Result};
+use log::{debug, log_enabled, Level};
+use primrose::{LibSpec, LibSpecs};
+use serde::{Deserialize, Serialize};
+use tempfile::{tempdir, TempDir};
+
+use crate::paths::Paths;
+
+/// The name of the element type we use for benchmarking
+pub const ELEM_TYPE: &str = "usize";
+
+/// String representation of the array of N values we use for benchmarking
+pub const NS: &str = "[8, 256, 1024, 65536]";
+
+/// Results for a whole suite of benchmarks
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Results {
+    /// Results for each collection operation
+    pub by_op: HashMap<OpName, Vec<Observation>>,
+}
+
+/// Name of an operation
+pub type OpName = String;
+
+/// The first key in the tuple is the `n` of the container before the benchmark was taken, and the second the results of the benchmark.
+pub type Observation = (usize, BenchmarkResult);
+
+/// Results for a single benchmark
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct BenchmarkResult {
+    /// Number of times the benchmark was run
+    pub times: usize,
+
+    /// The minimum time taken
+    pub min: Duration,
+
+    /// The maximum time taken
+    pub max: Duration,
+
+    /// The average (mean) time taken
+    pub avg: Duration,
+}
+
+/// Run benchmarks for the given container type, returning the results.
+/// Panics if the given name is not in the library specs.
+pub fn run_benchmarks(name: &str, paths: &Paths, lib_specs: &LibSpecs) -> Result<Results> {
+    let lib_spec = lib_specs
+        .get(name)
+        .expect("name passed to benchmarkspec not in libspecs");
+
+    // Generate crate & source
+    let crate_ = prepare_crate(name, paths, lib_spec)?;
+
+    // Build and run
+    debug!("Building and running benchmarks for {}", name);
+    let run_output = Command::new("cargo")
+        .args(["run", "--release", "--", "--bench"])
+        .current_dir(crate_.path())
+        .env("CARGO_TARGET_DIR", &paths.target_dir) // Share target directory
+        .output()
+        .context("Error running build command")?;
+
+    if !run_output.status.success() {
+        bail!("Error result from benchmark. Output: {:?}", run_output);
+    }
+
+    if log_enabled!(Level::Debug) {
+        if let Ok(stdout) = String::from_utf8(run_output.stdout.clone()) {
+            debug!("stdout: {:?}", stdout);
+        }
+        if let Ok(stderr) = String::from_utf8(run_output.stderr.clone()) {
+            debug!("stderr: {:?}", stderr);
+        }
+    }
+
+    // Deserialise benchmark results
+    todo!()
+}
+
+fn prepare_crate(name: &str, paths: &Paths, lib_spec: &LibSpec) -> Result<TempDir> {
+    // Directory we will create the crate in
+    let crate_tempdir = tempdir()?;
+    let crate_dir = crate_tempdir.path();
+    debug!("Preparing benchmark crate for {} in {:?}", name, crate_dir);
+
+    // Write the manifest
+    let mut manifest =
+        File::create(crate_dir.join("Cargo.toml")).context("Error creating Cargo.toml")?;
+    manifest
+        .write_all(
+            format!(
+                "
+[package]
+name = \"bench\"
+version = \"0.1.0\"
+edition = \"2021\"
+
+[dependencies]
+candelabra-benchmarker = {{ path = \"{}\" }}
+primrose-library = {{ path = \"{}\" }}
+",
+                paths.benchmarker_crate, paths.library_crate,
+            )
+            .as_bytes(),
+        )
+        .context("Error writing Cargo.toml")?;
+
+    // Ensure we use the right toolchain
+    let orig_toolchain_file = paths.base.join("rust-toolchain.toml");
+    copy(orig_toolchain_file, crate_dir.join("rust-toolchain.toml"))
+        .context("Error writing rust-toolchain.toml")?;
+
+    // Generate the code for running our benchmarks
+    let mut benchmark_statements = String::new();
+
+    // Add benchmarks for implemented traits
+    let implemented_traits = lib_spec.interface_provide_map.keys();
+    for tr in implemented_traits {
+        benchmark_statements += &format!(
+            "candelabra_benchmarker::benchmark_{}::<{}<{}>, _>(c, &NS);",
+            tr.to_lowercase(),
+            name,
+            ELEM_TYPE,
+        );
+    }
+
+    // Write the benchmarking source, using our generated benchmarker code.
+    let src_dir = crate_dir.join("src");
+    create_dir(&src_dir).context("Error creating src directory")?;
+
+    let mut src_file = File::create(src_dir.join("main.rs")).context("Error creating main.rs")?;
+    src_file
+        .write_all(
+            format!(
+                "
+use candelabra_benchmarker::criterion::{{criterion_group, criterion_main, Criterion}};
+
+const NS: &[usize] = &{};
+
+fn run_benches(c: &mut Criterion) {{
+    {}
+}}
+
+criterion_group!(benches, run_benches);
+criterion_main!(benches);
+",
+                NS, benchmark_statements
+            )
+            .as_bytes(),
+        )
+        .context("Error writing to main.rs")?;
+
+    Ok(crate_tempdir)
+}
diff --git a/src/crates/candelabra/src/cost/fit.rs b/src/crates/candelabra/src/cost/fit.rs
new file mode 100644
index 0000000..ace3634
--- /dev/null
+++ b/src/crates/candelabra/src/cost/fit.rs
@@ -0,0 +1,79 @@
+//! Fitting a 3rd-order polynomial to benchmark results
+//! Based on code from al-jshen: <https://github.com/al-jshen/compute/tree/master>
+
+use super::benchmark::Observation;
+use na::{Dyn, MatrixXx4, OVector};
+use serde::{Deserialize, Serialize};
+
+/// Estimates durations using a 3rd-order polynomial.
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct Estimator([f64; 4]);
+
+/// Approximate cost of an action.
+/// This is an approximation for the number of nanoseconds it would take.
+pub type Cost = f64;
+
+impl Estimator {
+    /// Fit from the given set of observations, using the least squared method.
+    pub fn fit(results: &[Observation]) -> Self {
+        let (xs, ys) = Self::to_data(results);
+
+        let xv = vandermonde(&xs);
+        let xtx = xv.transpose() * xv.clone();
+        let xtxinv = xtx.try_inverse().unwrap();
+        let xty = xv.transpose() * ys;
+        let coeffs = xtxinv * xty;
+
+        Self(coeffs.into())
+    }
+
+    /// Calculate the residual sum of squares for the given data.
+    pub fn rss(&self, results: &[Observation]) -> f64 {
+        // TODO: there's a more efficient way to do this / bulk esimations
+        let (xs, ys) = Self::to_data(results);
+
+        xs.iter()
+            .zip(ys.iter())
+            .map(|(x, y)| (y - self.estimatef(*x)).powi(2))
+            .sum()
+    }
+
+    /// Estimate the cost of a given operation at the given `n`.
+    pub fn estimate(&self, n: usize) -> Cost {
+        self.estimatef(n as f64)
+    }
+
+    /// Estimate the cost of a given operation at the given `n`.
+    pub fn estimatef(&self, n: f64) -> Cost {
+        let [a, b, c, d] = self.0;
+        a + b * n + c * n.powi(2) + d * n.powi(3)
+    }
+
+    /// Convert a list of observations to the format we use internally.
+    fn to_data(results: &[Observation]) -> (Vec<f64>, OVector<f64, Dyn>) {
+        let xs = results.iter().map(|(n, _)| *n as f64).collect::<Vec<_>>();
+        let ys = OVector::<f64, Dyn>::from_iterator(
+            results.len(),
+            results
+                .iter()
+                .map(|(_, results)| results.avg.as_nanos() as f64),
+        );
+
+        (xs, ys)
+    }
+}
+
+/// Calculate a Vandermode matrix with 4 columns.
+/// https://en.wikipedia.org/wiki/Vandermonde_matrix
+fn vandermonde(xs: &[f64]) -> MatrixXx4<f64> {
+    let mut mat = MatrixXx4::repeat(xs.len(), 1.0);
+
+    for (row, x) in xs.iter().enumerate() {
+        // First column is all 1s so skip
+        for col in 1..=3 {
+            mat[(row, col)] = x.powi(col as i32);
+        }
+    }
+
+    mat
+}
diff --git a/src/crates/candelabra/src/cost/mod.rs b/src/crates/candelabra/src/cost/mod.rs
new file mode 100644
index 0000000..18bcb79
--- /dev/null
+++ b/src/crates/candelabra/src/cost/mod.rs
@@ -0,0 +1,117 @@
+//! Generating, caching, and using cost models
+mod benchmark;
+mod fit;
+
+pub use benchmark::{BenchmarkResult, Results as BenchmarkResults};
+pub use fit::Estimator;
+
+use std::collections::HashMap;
+
+use anyhow::{anyhow, Context, Result};
+
+use benchmark::Results;
+use log::{debug, warn};
+use primrose::{LibSpec, LibSpecs};
+use serde::{Deserialize, Serialize};
+
+use crate::{
+    cache::{gen_tree_hash, FileCache},
+    cost::benchmark::run_benchmarks,
+    paths::Paths,
+    State,
+};
+
+/// Cost model for a container, capable of estimating cost of each supported operation.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CostModel {
+    by_op: HashMap<String, Estimator>,
+}
+
+/// Information for getting & caching cost information for container implementations.
+pub struct ResultsStore {
+    store: FileCache<str, CacheEntry>,
+    lib_specs: LibSpecs,
+    lib_hash: u64,
+}
+
+/// Entry in the cost info cache
+#[derive(Serialize, Deserialize)]
+struct CacheEntry {
+    /// Hash of the primrose library at the time measurements were taken
+    lib_hash: u64,
+
+    /// The resulting cost model
+    model: CostModel,
+
+    /// The raw benchmark results
+    results: Results,
+}
+
+impl ResultsStore {
+    /// Create a new store, using the given paths.
+    /// Benchmarks are cached in `paths.target_dir / candelabra / benchmark_results`
+    pub fn new(paths: &Paths) -> Result<Self> {
+        let lib_specs =
+            LibSpec::read_all(paths.library_src.as_std_path()).map_err(|e| anyhow!("{}", e))?;
+
+        // TODO: this should be home folder or smth
+        let base_dir = paths
+            .target_dir
+            .join("candelabra")
+            .join("benchmark_results");
+
+        // TODO: Doesn't take NS or ELEM_TYPE into account
+        let lib_hash =
+            gen_tree_hash(&paths.library_crate).context("Error generating library hash")?;
+
+        debug!("Initialised benchmark cacher with hash {}", lib_hash);
+
+        Ok(Self {
+            store: FileCache::new(base_dir, move |_, v: &CacheEntry| v.lib_hash == lib_hash)?,
+            lib_specs,
+            lib_hash,
+        })
+    }
+}
+
+impl State {
+    /// Get or calculate the cost model for the given type.
+    /// Will panic if `name` is not in library specs.
+    pub fn cost_model(&self, name: &str) -> Result<CostModel> {
+        match self.results.store.find(&name)? {
+            Some(x) => Ok(x.model),
+            None => self.calc_cost_model(&name),
+        }
+    }
+
+    /// Calculate cost information for the given type
+    /// Will panic if `name` is not in library specs.
+    fn calc_cost_model(&self, name: &str) -> Result<CostModel> {
+        let results = run_benchmarks(name, &self.paths, &self.results.lib_specs)?;
+        let model = build_cost_model(results.clone())?;
+        if let Err(e) = self.results.store.put(
+            name,
+            &CacheEntry {
+                lib_hash: self.results.lib_hash,
+                model: model.clone(),
+                results: results.clone(),
+            },
+        ) {
+            warn!("Error caching benchmark outputs for {}: {}", name, e);
+        }
+        Ok(model)
+    }
+}
+
+fn build_cost_model(results: Results) -> Result<CostModel> {
+    Ok(CostModel {
+        by_op: results
+            .by_op
+            .into_iter()
+            .map(|(op, os)| {
+                debug!("Fitting op {} with {} observations", op, os.len());
+                (op, Estimator::fit(&os))
+            })
+            .collect(),
+    })
+}
diff --git a/src/crates/candelabra/src/lib.rs b/src/crates/candelabra/src/lib.rs
new file mode 100644
index 0000000..2836e78
--- /dev/null
+++ b/src/crates/candelabra/src/lib.rs
@@ -0,0 +1,41 @@
+use anyhow::Result;
+
+use crate::{candidates::CandidatesStore, cost::ResultsStore};
+
+extern crate nalgebra as na;
+
+mod cache;
+pub mod candidates;
+pub mod cost;
+pub mod profiler;
+
+mod paths;
+mod project;
+pub use paths::Paths;
+pub use project::Project;
+
+/// Shared state for program execution
+pub struct State {
+    /// Paths used throughout execution
+    paths: Paths,
+
+    /// Cache for candidate types for primrose files & annotations
+    candidates: CandidatesStore,
+
+    /// Results and cost models
+    results: ResultsStore,
+
+    /// The model size used for primrose operations
+    model_size: usize,
+}
+
+impl State {
+    pub fn new(paths: Paths) -> Result<Self> {
+        Ok(Self {
+            candidates: CandidatesStore::new(&paths)?,
+            results: ResultsStore::new(&paths)?,
+            model_size: 3, // TODO
+            paths,
+        })
+    }
+}
diff --git a/src/crates/candelabra/src/paths.rs b/src/crates/candelabra/src/paths.rs
new file mode 100644
index 0000000..d25c174
--- /dev/null
+++ b/src/crates/candelabra/src/paths.rs
@@ -0,0 +1,47 @@
+use camino::Utf8PathBuf;
+use std::{env, path::PathBuf};
+
+/// Paths used throughout execution
+#[derive(Debug, Clone)]
+pub struct Paths {
+    pub base: Utf8PathBuf,
+    pub library_crate: Utf8PathBuf,
+    pub library_src: Utf8PathBuf,
+    pub benchmarker_crate: Utf8PathBuf,
+    pub target_dir: Utf8PathBuf,
+}
+
+impl Paths {
+    pub fn from_base(base: Utf8PathBuf) -> Self {
+        Paths {
+            library_crate: base.join("crates").join("library"),
+            library_src: base.join("crates").join("library").join("src"),
+            benchmarker_crate: base.join("crates").join("benchmarker"),
+            target_dir: base.join("target"),
+            base,
+        }
+    }
+}
+
+impl Default for Paths {
+    fn default() -> Self {
+        let path = if let Ok(var) = env::var("CANDELABRA_SRC_DIR") {
+            var.into()
+        } else {
+            // Most the time this won't work, but it's worth a shot.
+            let mut path = PathBuf::from(file!());
+            path.pop(); // main.rs
+            path.pop(); // src
+            path.pop(); // candelabra-cli
+            path.pop(); // crates
+            if path.components().count() == 0 {
+                path.push(".");
+            }
+            path
+        };
+
+        Paths::from_base(path.canonicalize().expect(
+            "candelabra source directory not found. please specify it with CANDELABRA_SRC_DIR",
+        ).try_into().expect("candelabra source directory has non-utf8 components in it (???)"))
+    }
+}
diff --git a/src/crates/candelabra/src/profiler.rs b/src/crates/candelabra/src/profiler.rs
new file mode 100644
index 0000000..b240258
--- /dev/null
+++ b/src/crates/candelabra/src/profiler.rs
@@ -0,0 +1,132 @@
+//! Profiling applications for info about container usage
+
+use anyhow::{anyhow, bail, Context, Result};
+use camino::Utf8Path;
+use log::{debug, trace};
+use polars::prelude::*;
+use primrose::ContainerSelector;
+use std::io::Write;
+use std::str::FromStr;
+use std::{
+    fs::{read_dir, File},
+    io::Read,
+    process::{Command, Stdio},
+};
+use tempfile::tempdir;
+
+use crate::project::Project;
+use crate::State;
+
+/// The information we get from profiling.
+pub type ProfilerInfo = DataFrame;
+
+impl State {
+    /// Get/calculate profiler info for the given project.
+    pub fn calc_profiler_info(&self, project: &Project) -> Result<ProfilerInfo> {
+        self.project_profiling_prep(project)?;
+        project
+            .benchmarks
+            .iter()
+            .map(|name| {
+                self.profile_benchmark(project, name)
+                    .with_context(|| format!("Error profiling benchmark {}", name))
+            })
+            .reduce(|acc, df| acc?.vstack(&df?).map_err(Into::into))
+            .ok_or(anyhow!("nothing to run or types are not used"))?
+    }
+
+    /// Prepare the given project to be profiled, by replacing all candidate types with the profiler wrapper.
+    fn project_profiling_prep(&self, project: &Project) -> Result<()> {
+        for (file, candidates) in self.project_candidate_list(project)? {
+            self.file_profiling_prep(&file, &candidates)
+                .with_context(|| format!("error preparing {} for profiling", file))?;
+        }
+
+        Ok(())
+    }
+
+    /// Prepare the given file to be profiled, by replacing all candidate types with the profiler wrapper.
+    fn file_profiling_prep(
+        &self,
+        file: &Utf8Path,
+        candidates: &[(String, Vec<String>)],
+    ) -> Result<()> {
+        debug!("Setting up {} for profiling", file);
+
+        let selector = ContainerSelector::from_path(
+            file.as_std_path(),
+            self.paths.library_src.as_std_path(),
+            self.model_size,
+        )
+        .context("error creating container selector")?;
+
+        let chosen = candidates
+            .iter()
+            .map(|(dest_name, impls)| (dest_name, &impls[0]))
+            .collect::<Vec<_>>();
+
+        let new_code = selector.gen_profiling_file(chosen.iter().map(|(d, c)| (*d, c.as_str())));
+
+        let new_path = file.to_string().replace(".pr", "");
+
+        trace!("New code: {}", new_code);
+        trace!("New path: {}", new_path);
+
+        let mut f = File::create(new_path).context("error creating new source file")?;
+        f.write_all(new_code.as_bytes())
+            .context("error writing new code")?;
+
+        Ok(())
+    }
+
+    /// Run the given benchmark on the project, and parse the resulting profiling information.
+    fn profile_benchmark(&self, project: &Project, name: &str) -> Result<DataFrame> {
+        let profiler_out_dir = tempdir()?;
+        debug!(
+            "Running benchmark {} with out dir {:?}",
+            name, profiler_out_dir
+        );
+
+        let output = Command::new("cargo")
+            .current_dir(&project.source_dir)
+            .args(["bench", "--bench", name])
+            .env("PROFILER_OUT_DIR", profiler_out_dir.as_ref()) // Where profiler info gets outputted
+            .stderr(Stdio::inherit())
+            .stdout(Stdio::inherit())
+            .output()?;
+
+        if !output.status.success() {
+            bail!("Error running benchmark");
+        }
+
+        let mut info = ProfilerInfo::default();
+        for file in read_dir(&profiler_out_dir)? {
+            let file = file?;
+            let mut contents = String::new();
+            File::open(file.path())?.read_to_string(&mut contents)?;
+
+            info = info.vstack(&parse_output(&contents)?)?;
+        }
+
+        Ok(info)
+    }
+}
+
+/// Parse the output of the profiler
+fn parse_output(contents: &str) -> Result<DataFrame> {
+    let mut lines = contents.lines().map(i32::from_str);
+    let missing_line_err = || anyhow!("wrong number of lines in ");
+
+    Ok(df!(
+        "n" => &[lines.next().ok_or_else(missing_line_err)??],
+        "contains" => &[lines.next().ok_or_else(missing_line_err)??],
+        "insert" => &[lines.next().ok_or_else(missing_line_err)??],
+        "clear" => &[lines.next().ok_or_else(missing_line_err)??],
+        "remove" => &[lines.next().ok_or_else(missing_line_err)??],
+        "first" => &[lines.next().ok_or_else(missing_line_err)??],
+        "last" => &[lines.next().ok_or_else(missing_line_err)??],
+        "nth" => &[lines.next().ok_or_else(missing_line_err)??],
+        "push" => &[lines.next().ok_or_else(missing_line_err)??],
+        "pop" => &[lines.next().ok_or_else(missing_line_err)??],
+    )?)
+}
diff --git a/src/crates/candelabra/src/project.rs b/src/crates/candelabra/src/project.rs
new file mode 100644
index 0000000..cc8b4a2
--- /dev/null
+++ b/src/crates/candelabra/src/project.rs
@@ -0,0 +1,35 @@
+use anyhow::{Context, Result};
+use cargo_metadata::{camino::Utf8PathBuf, Package, Target};
+use glob::glob;
+
+/// A single package or crate that we wish to process.
+#[derive(Debug, Clone)]
+pub struct Project {
+    pub name: String,
+    pub benchmarks: Vec<String>,
+    pub source_dir: Utf8PathBuf,
+}
+
+impl Project {
+    pub fn new(package: Package) -> Self {
+        Project {
+            name: package.name.clone(),
+            source_dir: package.manifest_path.parent().unwrap().to_path_buf(),
+            benchmarks: package
+                .targets
+                .into_iter()
+                .filter(Target::is_bench)
+                .map(|t| t.name)
+                .collect(),
+        }
+    }
+
+    /// Find all primrose files (`.pr.rs`) in this project.
+    pub fn find_primrose_files(&self) -> Result<Vec<Utf8PathBuf>> {
+        glob(&format!("{}/**/*.pr.rs", self.source_dir))
+            .unwrap()
+            .flat_map(|p| p.map(|p| p.try_into()))
+            .collect::<Result<Vec<_>, _>>()
+            .context("error finding primrose files in project")
+    }
+}
diff --git a/src/crates/cli/Cargo.toml b/src/crates/cli/Cargo.toml
index 0a3a643..c0180ee 100644
--- a/src/crates/cli/Cargo.toml
+++ b/src/crates/cli/Cargo.toml
@@ -1,22 +1,9 @@
 [package]
-name = "candelabra-cli"
+name = "cli"
 version = "0.1.0"
 edition = "2021"
-default-run = "candelabra-cli"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-log = { workspace = true }
-env_logger = { workspace = true }
-primrose = { path = "../primrose" }
-anyhow = { workspace = true }
-serde = { workspace = true }
-serde_json = { workspace = true }
-camino = "1.1.6"
-cargo_metadata = "0.18.1"
-argh = "0.1.12"
-glob = "0.3.1"
-tempfile = "3"
-nalgebra = "0.32.3"
-polars = { version = "0.35.4", features = ["describe"] }
+candelabra = { path = "../candelabra" }
\ No newline at end of file
diff --git a/src/crates/cli/src/cache.rs b/src/crates/cli/src/cache.rs
deleted file mode 100644
index 4775a0f..0000000
--- a/src/crates/cli/src/cache.rs
+++ /dev/null
@@ -1,111 +0,0 @@
-//! Common utilities for caching results
-use std::{
-    collections::hash_map::DefaultHasher,
-    fs::{create_dir_all, metadata, remove_file, File},
-    hash::{Hash, Hasher},
-    marker::PhantomData,
-};
-
-use anyhow::{Context, Result};
-use camino::{Utf8Path, Utf8PathBuf};
-use glob::glob;
-use log::{debug, warn};
-use serde::{Deserialize, Serialize};
-use serde_json::{from_reader, to_writer};
-
-/// A filesystem-based K/V cache
-/// This doesn't deal with key invalidation or anything, just the filesystem/serialisation stuff
-pub struct FileCache<K: 'static + ?Sized, V, VR = V> {
-    base_dir: Utf8PathBuf,
-    _data: PhantomData<(&'static K, V, VR)>,
-}
-
-impl<K: ?Sized + ToString, V: for<'a> Deserialize<'a>, VR: Serialize> FileCache<K, V, VR> {
-    /// Create a new file store in the given directory.
-    pub fn new(base_dir: Utf8PathBuf) -> Result<Self> {
-        create_dir_all(base_dir.as_std_path()).context("Error creating cache directory")?;
-        Ok(Self {
-            base_dir,
-            _data: PhantomData,
-        })
-    }
-
-    /// Store the given value with the given `key`
-    pub fn put(&self, key: &K, value: &VR) -> Result<()> {
-        let path = self.path_for(key);
-        let mut file = File::create(path)?;
-        to_writer(&mut file, value)?;
-
-        Ok(())
-    }
-
-    /// Attempt to load cache entry with the given `key`
-    /// `Ok(None)` indicates no valid cache entry, while `Err(e)` indicates an IO error
-    /// Invalid cache entries will be deleted.
-    pub fn find(&self, key: &K) -> Result<Option<V>> {
-        let path = self.path_for(key);
-        if !path.exists() {
-            return Ok(None);
-        }
-
-        let file = File::open(&path).context("Error opening cache entry")?;
-        let contents: V = match from_reader(file) {
-            Ok(x) => x,
-            Err(e) => {
-                debug!("Invalid cache entry: {}", e);
-                if let Err(e) = self.remove(key) {
-                    warn!("Error deleting invalid cache entry: {}", e);
-                }
-
-                return Ok(None);
-            }
-        };
-
-        Ok(Some(contents))
-    }
-
-    /// Remove value for the given key
-    pub fn remove(&self, key: &K) -> Result<()> {
-        Ok(remove_file(self.path_for(key))?)
-    }
-
-    /// Get the path for a given key
-    fn path_for(&self, key: &K) -> Utf8PathBuf {
-        // Sanitise key name
-        let key = key.to_string();
-        let mut san = String::with_capacity(key.len());
-        for chr in key.chars() {
-            if chr == '/'
-                || chr == '\\'
-                || chr == ':'
-                || chr == '*'
-                || chr == '?'
-                || chr == '"'
-                || chr == '<'
-                || chr == '>'
-                || chr == '|'
-            {
-                san += "_";
-            } else {
-                san.push(chr);
-            }
-        }
-        if san.is_empty() {
-            san += "_";
-        }
-        self.base_dir.join(san) // TODO: santisation
-    }
-}
-
-/// Generate a hash from the current state of the given directory
-/// This is built from the modification time of all files in that directory and all children.
-pub fn gen_tree_hash(dir: &Utf8Path) -> Result<u64> {
-    let mut hasher = DefaultHasher::new();
-
-    for f in glob(&format!("{}/**/*", dir)).unwrap() {
-        let modified = metadata(f?)?.modified()?;
-        modified.hash(&mut hasher);
-    }
-
-    Ok(hasher.finish())
-}
diff --git a/src/crates/cli/src/candidates.rs b/src/crates/cli/src/candidates.rs
deleted file mode 100644
index ab713d7..0000000
--- a/src/crates/cli/src/candidates.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-//! Generating and caching primrose candidate results
-
-use std::{collections::HashMap, fs::metadata, time::SystemTime};
-
-use anyhow::{Context, Result};
-use camino::{Utf8Path, Utf8PathBuf};
-use log::{debug, warn};
-use primrose::ContainerSelector;
-use serde::{Deserialize, Serialize};
-
-use crate::{
-    cache::{gen_tree_hash, FileCache},
-    paths::Paths,
-    project::Project,
-    State,
-};
-
-// TODO: Make this adjustable
-/// The size of the model used by primrose
-const MODEL_SIZE: usize = 3;
-
-/// Names a container type we want to select.
-pub type ConTypeName = String;
-
-/// Name of a container implementation we are considering
-pub type ImplName = String;
-
-/// A list of candidate container types
-pub type Candidates = HashMap<ConTypeName, Vec<ImplName>>;
-
-/// Entry in the benchmark cache
-#[derive(Serialize, Deserialize, Debug)]
-struct CacheEntry {
-    lib_hash: u64,
-    mod_time: SystemTime,
-    value: Candidates,
-}
-
-/// Gets/retrieves candidate container types for primrose files.
-/// This caches results, and invalidates them when the file changes.
-pub struct CandidatesStore {
-    paths: Paths,
-    store: FileCache<Utf8Path, CacheEntry>,
-    lib_hash: u64,
-}
-
-impl CandidatesStore {
-    /// Create a new store, using the given paths.
-    /// Benchmarks are cached in `paths.target_dir / candelabra / primrose_results`
-    pub fn new(paths: &Paths) -> Result<Self> {
-        let base_dir = paths.target_dir.join("candelabra").join("primrose_results");
-
-        let lib_hash =
-            gen_tree_hash(&paths.library_crate).context("Error generating library hash")?;
-
-        debug!("Initialised candidate cacher with hash {}", lib_hash);
-
-        Ok(Self {
-            store: FileCache::new(base_dir)?,
-            paths: paths.clone(),
-            lib_hash,
-        })
-    }
-
-    /// Get benchmark results for the given type, using cached results if possible and persisting the results for later.
-    /// Will panic if `name` is not in library specs.
-    pub fn get(&self, src: &Utf8Path) -> Result<Candidates> {
-        if let Some(results) = self.find(src)? {
-            debug!("Cache hit for {} candidates", src);
-            Ok(results)
-        } else {
-            debug!("Cache miss for {} candidates", src);
-            let selector = ContainerSelector::from_path(
-                src.as_std_path(),
-                self.paths.library_src.as_std_path(),
-                MODEL_SIZE,
-            )
-            .with_context(|| format!("error getting container selector for {}", src))?;
-
-            let candidates = selector
-                .find_all_candidates()?
-                .into_iter()
-                .map(|(k, v)| (k.to_string(), v))
-                .collect();
-
-            if let Err(e) = self.put(src, &candidates) {
-                warn!("Error caching candidates for {}: {}", src, e);
-            }
-            Ok(candidates)
-        }
-    }
-
-    /// Attempt to find an up-to-date set of results with the given key
-    fn find(&self, src: &Utf8Path) -> Result<Option<Candidates>> {
-        let mod_time = metadata(src)?.modified()?;
-        Ok(self
-            .store
-            .find(src)?
-            .filter(|e| e.lib_hash == self.lib_hash && e.mod_time == mod_time)
-            .map(|e| e.value))
-    }
-
-    /// Store a new set of results with the given key
-    fn put(&self, src: &Utf8Path, results: &Candidates) -> Result<()> {
-        let mod_time = metadata(src)?.modified()?;
-        self.store.put(
-            src,
-            &CacheEntry {
-                lib_hash: self.lib_hash,
-                value: results.clone(),
-                mod_time,
-            },
-        )
-    }
-}
-
-pub type ProjectCandidateList = Vec<(Utf8PathBuf, Vec<(ConTypeName, Vec<ImplName>)>)>;
-
-impl State {
-    /// Run primrose on all files in the given project.
-    /// Returns a list of all candidates for each container type in each file.
-    pub fn get_all_candidates(&self, project: &Project) -> Result<ProjectCandidateList> {
-        let mut all_candidates = Vec::new();
-        for file in project.find_primrose_files()? {
-            let result = self.candidates.get(&file)?;
-
-            let mut typs = Vec::new();
-            for (con_type_id, candidates) in result {
-                typs.push((con_type_id.clone(), candidates));
-            }
-            all_candidates.push((file, typs));
-        }
-
-        Ok(all_candidates)
-    }
-}
diff --git a/src/crates/cli/src/cmd.rs b/src/crates/cli/src/cmd.rs
deleted file mode 100644
index 7f9857d..0000000
--- a/src/crates/cli/src/cmd.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-use argh::FromArgs;
-
-#[derive(FromArgs)]
-/// Find the best performing container type using primrose
-pub struct Args {
-    /// path to Cargo.toml
-    #[argh(option)]
-    pub manifest_path: Option<String>,
-
-    /// project to run on, if in a workspace
-    #[argh(option, short = 'p')]
-    pub project: Option<String>,
-
-    #[argh(subcommand)]
-    pub cmd: Subcommand,
-}
-
-#[derive(FromArgs)]
-#[argh(subcommand)]
-pub enum Subcommand {
-    Model(ModelSubcommand),
-    Candidates(CandidatesSubcommand),
-    Profile(ProfileSubcommand),
-}
-
-#[derive(FromArgs)]
-/// Show the cost model for the given implementation
-#[argh(subcommand, name = "cost-model")]
-pub struct ModelSubcommand {}
-
-#[derive(FromArgs)]
-/// Show the candidate types selected by primrose
-#[argh(subcommand, name = "candidates")]
-pub struct CandidatesSubcommand {}
-
-#[derive(FromArgs)]
-/// Show the profiling information generated from benchmarks
-#[argh(subcommand, name = "profile")]
-pub struct ProfileSubcommand {}
diff --git a/src/crates/cli/src/cost/benchmark.rs b/src/crates/cli/src/cost/benchmark.rs
deleted file mode 100644
index a1e0e18..0000000
--- a/src/crates/cli/src/cost/benchmark.rs
+++ /dev/null
@@ -1,164 +0,0 @@
-//! Benchmarking of container types
-
-use std::{
-    collections::HashMap,
-    fs::{copy, create_dir, File},
-    io::Write,
-    process::Command,
-    time::Duration,
-};
-
-use anyhow::{bail, Context, Result};
-use log::{debug, log_enabled, Level};
-use primrose::{LibSpec, LibSpecs};
-use serde::{Deserialize, Serialize};
-use tempfile::{tempdir, TempDir};
-
-use crate::paths::Paths;
-
-/// The name of the element type we use for benchmarking
-pub const ELEM_TYPE: &str = "usize";
-
-/// String representation of the array of N values we use for benchmarking
-pub const NS: &str = "[8, 256, 1024, 65536]";
-
-/// Results for a whole suite of benchmarks
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct Results {
-    /// Results for each collection operation
-    pub by_op: HashMap<OpName, Vec<Observation>>,
-}
-
-/// Name of an operation
-pub type OpName = String;
-
-/// The first key in the tuple is the `n` of the container before the benchmark was taken, and the second the results of the benchmark.
-pub type Observation = (usize, BenchmarkResult);
-
-/// Results for a single benchmark
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct BenchmarkResult {
-    /// Number of times the benchmark was run
-    pub times: usize,
-
-    /// The minimum time taken
-    pub min: Duration,
-
-    /// The maximum time taken
-    pub max: Duration,
-
-    /// The average (mean) time taken
-    pub avg: Duration,
-}
-
-/// Run benchmarks for the given container type, returning the results.
-/// Panics if the given name is not in the library specs.
-pub fn run_benchmarks(name: &str, paths: &Paths, lib_specs: &LibSpecs) -> Result<Results> {
-    let lib_spec = lib_specs
-        .get(name)
-        .expect("name passed to benchmarkspec not in libspecs");
-
-    // Generate crate & source
-    let crate_ = prepare_crate(name, paths, lib_spec)?;
-
-    // Build and run
-    debug!("Building and running benchmarks for {}", name);
-    let run_output = Command::new("cargo")
-        .args(["run", "--release", "--", "--bench"])
-        .current_dir(crate_.path())
-        .env("CARGO_TARGET_DIR", &paths.target_dir) // Share target directory
-        .output()
-        .context("Error running build command")?;
-
-    if !run_output.status.success() {
-        bail!("Error result from benchmark. Output: {:?}", run_output);
-    }
-
-    if log_enabled!(Level::Debug) {
-        if let Ok(stdout) = String::from_utf8(run_output.stdout.clone()) {
-            debug!("stdout: {:?}", stdout);
-        }
-        if let Ok(stderr) = String::from_utf8(run_output.stderr.clone()) {
-            debug!("stderr: {:?}", stderr);
-        }
-    }
-
-    // Deserialise benchmark results
-    todo!()
-}
-
-fn prepare_crate(name: &str, paths: &Paths, lib_spec: &LibSpec) -> Result<TempDir> {
-    // Directory we will create the crate in
-    let crate_tempdir = tempdir()?;
-    let crate_dir = crate_tempdir.path();
-    debug!("Preparing benchmark crate for {} in {:?}", name, crate_dir);
-
-    // Write the manifest
-    let mut manifest =
-        File::create(crate_dir.join("Cargo.toml")).context("Error creating Cargo.toml")?;
-    manifest
-        .write_all(
-            format!(
-                "
-[package]
-name = \"bench\"
-version = \"0.1.0\"
-edition = \"2021\"
-
-[dependencies]
-candelabra-benchmarker = {{ path = \"{}\" }}
-primrose-library = {{ path = \"{}\" }}
-",
-                paths.benchmarker_crate, paths.library_crate,
-            )
-            .as_bytes(),
-        )
-        .context("Error writing Cargo.toml")?;
-
-    // Ensure we use the right toolchain
-    let orig_toolchain_file = paths.base.join("rust-toolchain.toml");
-    copy(orig_toolchain_file, crate_dir.join("rust-toolchain.toml"))
-        .context("Error writing rust-toolchain.toml")?;
-
-    // Generate the code for running our benchmarks
-    let mut benchmark_statements = String::new();
-
-    // Add benchmarks for implemented traits
-    let implemented_traits = lib_spec.interface_provide_map.keys();
-    for tr in implemented_traits {
-        benchmark_statements += &format!(
-            "candelabra_benchmarker::benchmark_{}::<{}<{}>, _>(c, &NS);",
-            tr.to_lowercase(),
-            name,
-            ELEM_TYPE,
-        );
-    }
-
-    // Write the benchmarking source, using our generated benchmarker code.
-    let src_dir = crate_dir.join("src");
-    create_dir(&src_dir).context("Error creating src directory")?;
-
-    let mut src_file = File::create(src_dir.join("main.rs")).context("Error creating main.rs")?;
-    src_file
-        .write_all(
-            format!(
-                "
-use candelabra_benchmarker::criterion::{{criterion_group, criterion_main, Criterion}};
-
-const NS: &[usize] = &{};
-
-fn run_benches(c: &mut Criterion) {{
-    {}
-}}
-
-criterion_group!(benches, run_benches);
-criterion_main!(benches);
-",
-                NS, benchmark_statements
-            )
-            .as_bytes(),
-        )
-        .context("Error writing to main.rs")?;
-
-    Ok(crate_tempdir)
-}
diff --git a/src/crates/cli/src/cost/fit.rs b/src/crates/cli/src/cost/fit.rs
deleted file mode 100644
index f4372f1..0000000
--- a/src/crates/cli/src/cost/fit.rs
+++ /dev/null
@@ -1,79 +0,0 @@
-//! Fitting a 3rd-order polynomial to benchmark results
-//! Based on code from al-jshen: https://github.com/al-jshen/compute/tree/master
-
-use super::benchmark::Observation;
-use na::{Dyn, MatrixXx4, OVector};
-use serde::{Deserialize, Serialize};
-
-/// Estimates durations using a 3rd-order polynomial.
-#[derive(Debug, Clone, Deserialize, Serialize)]
-pub struct Estimator([f64; 4]);
-
-/// Approximate cost of an action.
-/// This is an approximation for the number of nanoseconds it would take.
-pub type Cost = f64;
-
-impl Estimator {
-    /// Fit from the given set of observations, using the least squared method.
-    pub fn fit(results: &[Observation]) -> Self {
-        let (xs, ys) = Self::to_data(results);
-
-        let xv = vandermonde(&xs);
-        let xtx = xv.transpose() * xv.clone();
-        let xtxinv = xtx.try_inverse().unwrap();
-        let xty = xv.transpose() * ys;
-        let coeffs = xtxinv * xty;
-
-        Self(coeffs.into())
-    }
-
-    /// Calculate the residual sum of squares for the given data.
-    pub fn rss(&self, results: &[Observation]) -> f64 {
-        // TODO: there's a more efficient way to do this / bulk esimations
-        let (xs, ys) = Self::to_data(results);
-
-        xs.iter()
-            .zip(ys.iter())
-            .map(|(x, y)| (y - self.estimatef(*x)).powi(2))
-            .sum()
-    }
-
-    /// Estimate the cost of a given operation at the given `n`.
-    pub fn estimate(&self, n: usize) -> Cost {
-        self.estimatef(n as f64)
-    }
-
-    /// Estimate the cost of a given operation at the given `n`.
-    pub fn estimatef(&self, n: f64) -> Cost {
-        let [a, b, c, d] = self.0;
-        a + b * n + c * n.powi(2) + d * n.powi(3)
-    }
-
-    /// Convert a list of observations to the format we use internally.
-    fn to_data(results: &[Observation]) -> (Vec<f64>, OVector<f64, Dyn>) {
-        let xs = results.iter().map(|(n, _)| *n as f64).collect::<Vec<_>>();
-        let ys = OVector::<f64, Dyn>::from_iterator(
-            results.len(),
-            results
-                .iter()
-                .map(|(_, results)| results.avg.as_nanos() as f64),
-        );
-
-        (xs, ys)
-    }
-}
-
-/// Calculate a Vandermode matrix with 4 columns.
-/// https://en.wikipedia.org/wiki/Vandermonde_matrix
-fn vandermonde(xs: &[f64]) -> MatrixXx4<f64> {
-    let mut mat = MatrixXx4::repeat(xs.len(), 1.0);
-
-    for (row, x) in xs.iter().enumerate() {
-        // First column is all 1s so skip
-        for col in 1..=3 {
-            mat[(row, col)] = x.powi(col as i32);
-        }
-    }
-
-    mat
-}
diff --git a/src/crates/cli/src/cost/mod.rs b/src/crates/cli/src/cost/mod.rs
deleted file mode 100644
index f3cad13..0000000
--- a/src/crates/cli/src/cost/mod.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-//! Generating, caching, and using cost models
-pub mod benchmark;
-pub mod fit;
-
-use std::collections::HashMap;
-
-use anyhow::{anyhow, Context, Result};
-
-use benchmark::Results;
-use log::{debug, warn};
-use primrose::{LibSpec, LibSpecs};
-use serde::{Deserialize, Serialize};
-
-use self::fit::Estimator;
-use crate::{
-    cache::{gen_tree_hash, FileCache},
-    cost::benchmark::run_benchmarks,
-    paths::Paths,
-};
-
-/// Cost model for a container, capable of estimating cost of each supported operation.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CostModel {
-    by_op: HashMap<String, Estimator>,
-}
-
-/// Entry in the benchmark cache
-#[derive(Serialize, Deserialize)]
-struct CacheEntry {
-    lib_hash: u64,
-    model: CostModel,
-    results: Results,
-}
-
-/// Gets/retrieves benchmark results for container implementations.
-/// This caches results, and invalidates them when the library or parameters change.
-pub struct ResultsStore {
-    paths: Paths,
-    store: FileCache<str, CacheEntry>,
-    lib_specs: LibSpecs,
-    lib_hash: u64,
-}
-
-impl ResultsStore {
-    /// Create a new store, using the given paths.
-    /// Benchmarks are cached in `paths.target_dir / candelabra / benchmark_results`
-    pub fn new(paths: &Paths) -> Result<Self> {
-        let lib_specs =
-            LibSpec::read_all(paths.library_src.as_std_path()).map_err(|e| anyhow!("{}", e))?;
-
-        // TODO: this should be home folder or smth
-        let base_dir = paths
-            .target_dir
-            .join("candelabra")
-            .join("benchmark_results");
-
-        // TODO: Doesn't take NS or ELEM_TYPE into account
-        let lib_hash =
-            gen_tree_hash(&paths.library_crate).context("Error generating library hash")?;
-
-        debug!("Initialised benchmark cacher with hash {}", lib_hash);
-
-        Ok(Self {
-            store: FileCache::new(base_dir)?,
-            paths: paths.clone(),
-            lib_specs,
-            lib_hash,
-        })
-    }
-
-    /// Get benchmark results for the given type, using cached results if possible and persisting the results for later.
-    /// Will panic if `name` is not in library specs.
-    pub fn get(&self, name: &str) -> Result<CostModel> {
-        if let Some(results) = self.find(name)? {
-            debug!("Cache hit for {} benchmarks", name);
-            Ok(results)
-        } else {
-            debug!("Cache miss for {} benchmarks", name);
-            let results = run_benchmarks(name, &self.paths, &self.lib_specs)?;
-            let model = build_cost_model(results.clone())?;
-            if let Err(e) = self.put(name, &model, &results) {
-                warn!("Error caching benchmark outputs for {}: {}", name, e);
-            }
-            Ok(model)
-        }
-    }
-
-    /// Attempt to find an up-to-date set of results with the given key
-    fn find(&self, name: &str) -> Result<Option<CostModel>> {
-        Ok(self
-            .store
-            .find(name)?
-            .filter(|e| e.lib_hash == self.lib_hash)
-            .map(|e| e.model))
-    }
-
-    /// Store a new set of results with the given key
-    fn put(&self, name: &str, model: &CostModel, results: &Results) -> Result<()> {
-        self.store.put(
-            name,
-            &CacheEntry {
-                lib_hash: self.lib_hash,
-                model: model.clone(),
-                results: results.clone(),
-            },
-        )
-    }
-}
-
-fn build_cost_model(results: Results) -> Result<CostModel> {
-    Ok(CostModel {
-        by_op: results
-            .by_op
-            .into_iter()
-            .map(|(op, os)| {
-                debug!("Fitting op {} with {} observations", op, os.len());
-                (op, Estimator::fit(&os))
-            })
-            .collect(),
-    })
-}
diff --git a/src/crates/cli/src/main.rs b/src/crates/cli/src/main.rs
index 8827084..5bd0f34 100644
--- a/src/crates/cli/src/main.rs
+++ b/src/crates/cli/src/main.rs
@@ -1,115 +1,32 @@
-use anyhow::{anyhow, Context, Result};
-use candidates::CandidatesStore;
-use cmd::{CandidatesSubcommand, ModelSubcommand, ProfileSubcommand};
-use cost::ResultsStore;
-use log::info;
-use polars::prelude::*;
-use project::Project;
-
-use crate::{
-    cmd::{Args, Subcommand},
-    paths::Paths,
-};
-
-extern crate nalgebra as na;
-
-mod cache;
-mod candidates;
-mod cmd;
-mod cost;
-mod paths;
-mod profiler;
-mod project;
-
-fn main() -> Result<()> {
-    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
-
-    let args: Args = argh::from_env();
-
-    // Build shared state
-    let paths = Paths::default();
-    info!("Using source dir: {:?}", &paths.base);
-    let state = State {
-        candidates: CandidatesStore::new(&paths).context("error creating candidate store")?,
-        results: ResultsStore::new(&paths).context("error creating result store")?,
-        paths,
-        model_size: 3, // TODO
-    };
-
-    let projects = get_projects(&args).context("failed to find project paths")?;
-    match args.cmd {
-        Subcommand::Model(c) => state.cmd_model(projects, c),
-        Subcommand::Candidates(c) => state.cmd_candidates(projects, c),
-        Subcommand::Profile(c) => state.cmd_profile(projects, c),
-    }
+fn main() {
+    println!("Hello, world!");
 }
 
-/// Shared state for program execution
-pub struct State {
-    /// Paths used throughout execution
-    paths: Paths,
-
-    /// Candidate types for primrose files & annotations
-    candidates: CandidatesStore,
-
-    /// Results and cost models
-    results: ResultsStore,
-
-    /// The model size used for primrose operations
-    model_size: usize,
-}
-
-impl State {
-    pub fn cmd_model(&self, projects: Vec<Project>, c: ModelSubcommand) -> Result<()> {
-        todo!()
-    }
-
-    pub fn cmd_candidates(&self, projects: Vec<Project>, c: CandidatesSubcommand) -> Result<()> {
-        todo!()
-    }
-
-    pub fn cmd_profile(&self, projects: Vec<Project>, c: ProfileSubcommand) -> Result<()> {
-        for project in projects {
-            info!("Profiling project {}", project.name);
-            let inf = self
-                .profile_all(&project)
-                .with_context(|| format!("Error profiling project {}", project.name))?;
-
-            // TODO: More useful output
-            info!("{:?}", inf);
-            info!("{} samples", inf.shape().0);
-            info!("{:?}", inf.describe(None));
-        }
-
-        Ok(())
-    }
-}
-
-fn get_projects(args: &Args) -> Result<Vec<Project>> {
-    let mut cmd = cargo_metadata::MetadataCommand::new();
-    if let Some(p) = &args.manifest_path {
-        cmd.manifest_path(p);
-    }
-
-    let metadata = cmd.exec().context("failed to get manifest metadata")?;
-
-    if let Some(p) = &args.project {
-        // Select a specific project
-        Ok(vec![metadata
-            .packages
-            .iter()
-            .find(|pkg| pkg.name == *p)
-            .map(|pkg| Project::new(pkg.clone()))
-            .ok_or_else(|| {
-                anyhow!("specified project does not exist")
-            })?])
-    } else {
-        // Default to all workspace members
-        Ok(metadata
-            .workspace_members
-            .iter()
-            .flat_map(|member| metadata.packages.iter().find(|pkg| pkg.id == *member))
-            .map(|pkg| Project::new(pkg.clone()))
-            .collect())
-    }
-}
+// fn get_projects(args: &Args) -> Result<Vec<Project>> {
+//     let mut cmd = cargo_metadata::MetadataCommand::new();
+//     if let Some(p) = &args.manifest_path {
+//         cmd.manifest_path(p);
+//     }
+
+//     let metadata = cmd.exec().context("failed to get manifest metadata")?;
+
+//     if let Some(p) = &args.project {
+//         // Select a specific project
+//         Ok(vec![metadata
+//             .packages
+//             .iter()
+//             .find(|pkg| pkg.name == *p)
+//             .map(|pkg| Project::new(pkg.clone()))
+//             .ok_or_else(|| {
+//                 anyhow!("specified project does not exist")
+//             })?])
+//     } else {
+//         // Default to all workspace members
+//         Ok(metadata
+//             .workspace_members
+//             .iter()
+//             .flat_map(|member| metadata.packages.iter().find(|pkg| pkg.id == *member))
+//             .map(|pkg| Project::new(pkg.clone()))
+//             .collect())
+//     }
+// }
diff --git a/src/crates/cli/src/paths.rs b/src/crates/cli/src/paths.rs
deleted file mode 100644
index 2b44400..0000000
--- a/src/crates/cli/src/paths.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-use std::{env, path::PathBuf};
-
-use camino::Utf8PathBuf;
-
-#[derive(Debug, Clone)]
-pub struct Paths {
-    pub base: Utf8PathBuf,
-    pub library_crate: Utf8PathBuf,
-    pub library_src: Utf8PathBuf,
-    pub benchmarker_crate: Utf8PathBuf,
-    pub target_dir: Utf8PathBuf,
-}
-
-impl Paths {
-    fn from_base(base: Utf8PathBuf) -> Self {
-        Paths {
-            library_crate: base.join("crates").join("library"),
-            library_src: base.join("crates").join("library").join("src"),
-            benchmarker_crate: base.join("crates").join("benchmarker"),
-            target_dir: base.join("target"),
-            base,
-        }
-    }
-}
-
-impl Default for Paths {
-    fn default() -> Self {
-        let path = if let Ok(var) = env::var("CANDELABRA_SRC_DIR") {
-            var.into()
-        } else {
-            // Most the time this won't work, but it's worth a shot.
-            let mut path = PathBuf::from(file!());
-            path.pop(); // main.rs
-            path.pop(); // src
-            path.pop(); // candelabra-cli
-            path.pop(); // crates
-            if path.components().count() == 0 {
-                path.push(".");
-            }
-            path
-        };
-
-        Paths::from_base(path.canonicalize().expect(
-            "candelabra source directory not found. please specify it with CANDELABRA_SRC_DIR",
-        ).try_into().expect("candelabra source directory has non-utf8 components in it (???)"))
-    }
-}
diff --git a/src/crates/cli/src/profiler/mod.rs b/src/crates/cli/src/profiler/mod.rs
deleted file mode 100644
index 24ae544..0000000
--- a/src/crates/cli/src/profiler/mod.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-use anyhow::{anyhow, bail, Context, Result};
-use camino::Utf8Path;
-use log::{debug, trace};
-use polars::prelude::*;
-use primrose::ContainerSelector;
-use std::io::Write;
-use std::str::FromStr;
-use std::{
-    fs::{read_dir, File},
-    io::Read,
-    process::{Command, Stdio},
-};
-use tempfile::tempdir;
-
-use crate::project::Project;
-use crate::State;
-
-pub type ProfilerInfo = DataFrame;
-
-impl State {
-    /// Profile all benchmarks for the given project
-    pub fn profile_all(&self, project: &Project) -> Result<ProfilerInfo> {
-        self.prepare_for_profiling(project)?;
-        project
-            .benchmarks
-            .iter()
-            .map(|name| {
-                self.profile_benchmark(project, name)
-                    .with_context(|| format!("Error profiling benchmark {}", name))
-            })
-            .reduce(|acc, df| acc?.vstack(&df?).map_err(Into::into))
-            .ok_or(anyhow!("nothing to run or types are not used"))?
-    }
-
-    fn prepare_for_profiling(&self, project: &Project) -> Result<()> {
-        for (file, candidates) in self.get_all_candidates(project)? {
-            self.prepare_file(&file, &candidates)
-                .with_context(|| format!("error preparing {} for profiling", file))?;
-        }
-
-        Ok(())
-    }
-
-    fn prepare_file(&self, file: &Utf8Path, candidates: &[(String, Vec<String>)]) -> Result<()> {
-        debug!("Setting up {} for profiling", file);
-
-        let selector = ContainerSelector::from_path(
-            file.as_std_path(),
-            self.paths.library_src.as_std_path(),
-            self.model_size,
-        )
-        .context("error creating container selector")?;
-
-        let chosen = candidates
-            .iter()
-            .map(|(dest_name, impls)| (dest_name, &impls[0]))
-            .collect::<Vec<_>>();
-
-        let new_code = selector.gen_profiling_file(chosen.iter().map(|(d, c)| (*d, c.as_str())));
-
-        let new_path = file.to_string().replace(".pr", "");
-
-        trace!("New code: {}", new_code);
-        trace!("New path: {}", new_path);
-
-        let mut f = File::create(new_path).context("error creating new source file")?;
-        f.write_all(new_code.as_bytes())
-            .context("error writing new code")?;
-
-        Ok(())
-    }
-
-    fn profile_benchmark(&self, project: &Project, name: &str) -> Result<DataFrame> {
-        let profiler_out_dir = tempdir()?;
-        debug!(
-            "Running benchmark {} with out dir {:?}",
-            name, profiler_out_dir
-        );
-
-        let output = Command::new("cargo")
-            .current_dir(&project.source_dir)
-            .args(["bench", "--bench", name])
-            .env("PROFILER_OUT_DIR", profiler_out_dir.as_ref()) // Where profiler info gets outputted
-            .stderr(Stdio::inherit())
-            .stdout(Stdio::inherit())
-            .output()?;
-
-        if !output.status.success() {
-            bail!("Error running benchmark");
-        }
-
-        let mut info = ProfilerInfo::default();
-        for file in read_dir(&profiler_out_dir)? {
-            let file = file?;
-            let mut contents = String::new();
-            File::open(file.path())?.read_to_string(&mut contents)?;
-
-            info = info.vstack(&parse_output(&contents)?)?;
-        }
-
-        Ok(info)
-    }
-}
-
-fn parse_output(contents: &str) -> Result<DataFrame> {
-    let mut lines = contents.lines().map(i32::from_str);
-    let missing_line_err = || anyhow!("wrong number of lines in ");
-
-    Ok(df!(
-        "n" => &[lines.next().ok_or_else(missing_line_err)??],
-        "contains" => &[lines.next().ok_or_else(missing_line_err)??],
-        "insert" => &[lines.next().ok_or_else(missing_line_err)??],
-        "clear" => &[lines.next().ok_or_else(missing_line_err)??],
-        "remove" => &[lines.next().ok_or_else(missing_line_err)??],
-        "first" => &[lines.next().ok_or_else(missing_line_err)??],
-        "last" => &[lines.next().ok_or_else(missing_line_err)??],
-        "nth" => &[lines.next().ok_or_else(missing_line_err)??],
-        "push" => &[lines.next().ok_or_else(missing_line_err)??],
-        "pop" => &[lines.next().ok_or_else(missing_line_err)??],
-    )?)
-}
diff --git a/src/crates/cli/src/project.rs b/src/crates/cli/src/project.rs
deleted file mode 100644
index cc8b4a2..0000000
--- a/src/crates/cli/src/project.rs
+++ /dev/null
@@ -1,35 +0,0 @@
-use anyhow::{Context, Result};
-use cargo_metadata::{camino::Utf8PathBuf, Package, Target};
-use glob::glob;
-
-/// A single package or crate that we wish to process.
-#[derive(Debug, Clone)]
-pub struct Project {
-    pub name: String,
-    pub benchmarks: Vec<String>,
-    pub source_dir: Utf8PathBuf,
-}
-
-impl Project {
-    pub fn new(package: Package) -> Self {
-        Project {
-            name: package.name.clone(),
-            source_dir: package.manifest_path.parent().unwrap().to_path_buf(),
-            benchmarks: package
-                .targets
-                .into_iter()
-                .filter(Target::is_bench)
-                .map(|t| t.name)
-                .collect(),
-        }
-    }
-
-    /// Find all primrose files (`.pr.rs`) in this project.
-    pub fn find_primrose_files(&self) -> Result<Vec<Utf8PathBuf>> {
-        glob(&format!("{}/**/*.pr.rs", self.source_dir))
-            .unwrap()
-            .flat_map(|p| p.map(|p| p.try_into()))
-            .collect::<Result<Vec<_>, _>>()
-            .context("error finding primrose files in project")
-    }
-}
-- 
cgit v1.2.3