From 6c8407043120f3855dd0229c0f838041c7f0eb38 Mon Sep 17 00:00:00 2001
From: Aria Shrimpton <me@aria.rip>
Date: Wed, 31 Jan 2024 17:43:05 +0000
Subject: lints & refactors

---
 src/crates/candelabra/src/profiler.rs      | 555 -----------------------------
 src/crates/candelabra/src/profiler/info.rs | 372 +++++++++++++++++++
 src/crates/candelabra/src/profiler/mod.rs  | 200 +++++++++++
 src/crates/cli/src/display.rs              |   2 +-
 4 files changed, 573 insertions(+), 556 deletions(-)
 delete mode 100644 src/crates/candelabra/src/profiler.rs
 create mode 100644 src/crates/candelabra/src/profiler/info.rs
 create mode 100644 src/crates/candelabra/src/profiler/mod.rs

(limited to 'src/crates')
diff --git a/src/crates/candelabra/src/profiler.rs b/src/crates/candelabra/src/profiler.rs
deleted file mode 100644
index 4677bbc..0000000
--- a/src/crates/candelabra/src/profiler.rs
+++ /dev/null
@@ -1,555 +0,0 @@
-//! Profiling applications for info about container usage
-
-use anyhow::{anyhow, Context, Result};
-use camino::{Utf8Path, Utf8PathBuf};
-use log::{debug, log_enabled, trace, warn, Level};
-use primrose::ContainerSelector;
-use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-use std::io::Write;
-use std::str::FromStr;
-use std::{
-    fs::{read_dir, File},
-    io::Read,
-    process::{Command, Stdio},
-};
-use tempfile::tempdir;
-
-use crate::cache::{gen_tree_hash, FileCache};
-use crate::candidates::ConTypeName;
-use crate::cost::benchmark::{tee_output, OpName};
-use crate::cost::{Cost, CostModel, Estimator};
-use crate::project::Project;
-use crate::{Paths, State};
-
-#[derive(Debug, Serialize, Deserialize)]
-pub(crate) struct CacheEntry {
-    proj_hash: u64,
-    proj_location: Utf8PathBuf,
-    info: HashMap<ConTypeName, ProfilerInfo>,
-}
-
-/// The information we get from profiling.
-/// Rather than keeping all results, we split them into 'similar enough' partitions,
-/// with the idea that each partition will probably have the same best implementation.
-#[derive(Clone, Debug, Default, Serialize, Deserialize)]
-pub struct ProfilerInfo(pub Vec<ProfilerPartition>);
-
-/// A vector of container lifetimes which have similar characteristics
-#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
-pub struct ProfilerPartition {
-    pub occurences: f64,
-    pub avg_n: f64,
-    pub avg_op_counts: HashMap<OpName, f64>,
-}
-
-/// Breakdown of a cost value
-pub type CostBreakdown<'a> = HashMap<&'a OpName, Cost>;
-
-impl ProfilerInfo {
-    pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 {
-        self.0
-            .iter()
-            .map(|cl| cl.estimate_cost(cost_model))
-            .sum::<f64>()
-    }
-
-    pub fn cost_breakdown<'a>(&self, cost_model: &'a CostModel) -> CostBreakdown<'a> {
-        cost_model
-            .by_op
-            .iter()
-            .map(|(op, estimator)| {
-                (
-                    op,
-                    self.0
-                        .iter()
-                        .map(|cl| cl.op_cost(op, estimator))
-                        .sum::<f64>(),
-                )
-            })
-            .collect()
-    }
-}
-
-impl ProfilerPartition {
-    pub fn avg_op_count(&self, op: &str) -> f64 {
-        *self
-            .avg_op_counts
-            .get(op)
-            .expect("invalid op passed to op_count")
-    }
-
-    pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 {
-        cost_model
-            .by_op
-            .iter()
-            .map(|(op, estimator)| self.op_cost(op, estimator))
-            .sum::<f64>()
-    }
-
-    pub fn op_cost(&self, op: &str, estimator: &Estimator) -> f64 {
-        estimator.estimatef(self.avg_n) * self.avg_op_count(op) * self.occurences
-    }
-
-    fn add_lifetime(&mut self, (n, ops): (f64, HashMap<String, usize>)) {
-        self.avg_n = self.avg_n + (n - self.avg_n) / (self.occurences + 1.0);
-        for (op, count) in ops {
-            let count = count as f64;
-            self.avg_op_counts
-                .entry(op)
-                .and_modify(|avg| *avg = *avg + (count - *avg) / (self.occurences + 1.0))
-                .or_insert(count);
-        }
-        self.occurences += 1.0;
-    }
-}
-
-impl State {
-    pub(crate) fn profiler_info_cache(paths: &Paths) -> Result<FileCache<String, CacheEntry>> {
-        FileCache::new(
-            paths.target_dir.join("candelabra").join("profiler_info"),
-            |_, v: &CacheEntry| {
-                let proj_hash = gen_tree_hash(&v.proj_location).unwrap_or(0);
-                v.proj_hash == proj_hash
-            },
-        )
-    }
-
-    /// Get or calculate profiler info for the given project.
-    /// Results are cached by the modification time of the project's source tree
-    pub fn profiler_info(&self, project: &Project) -> Result<HashMap<ConTypeName, ProfilerInfo>> {
-        match self.profiler_info_cache.find(&project.name)? {
-            Some(x) => Ok(x.info),
-            None => {
-                let info = self.calc_profiler_info(project)?;
-
-                let proj_hash = gen_tree_hash(&project.source_dir)
-                    .context("Error generating project directory hash")?;
-                if let Err(e) = self.profiler_info_cache.put(
-                    &project.name,
-                    &CacheEntry {
-                        proj_hash,
-                        proj_location: project.source_dir.clone(),
-                        info: info.clone(),
-                    },
-                ) {
-                    warn!("Error caching profiler info for {}: {}", &project.name, e);
-                }
-
-                Ok(info)
-            }
-        }
-    }
-
-    /// Calculate profiler info for the given project.
-    fn calc_profiler_info(&self, project: &Project) -> Result<HashMap<ConTypeName, ProfilerInfo>> {
-        let candidate_list = self.project_candidate_list(project)?;
-        let con_types = candidate_list
-            .iter()
-            .flat_map(|(_, con_types)| con_types.iter())
-            .map(|(id, _)| id)
-            .collect::<Vec<_>>();
-
-        self.project_profiling_prep(project, &con_types)?;
-        let mut acc = HashMap::new();
-        for name in project.benchmarks.iter() {
-            for (con_type, new_results) in self
-                .profile_benchmark(project, name, &con_types)
-                .with_context(|| format!("Error profiling benchmark {}", name))?
-            {
-                acc.entry(con_type)
-                    .and_modify(|pi: &mut ProfilerInfo| pi.0.extend(new_results.0.iter().cloned()))
-                    .or_insert(new_results);
-            }
-        }
-
-        Ok(acc)
-    }
-
-    /// Prepare the given project to be profiled, by replacing all candidate types with the profiler wrapper.
-    fn project_profiling_prep(&self, project: &Project, con_types: &[&String]) -> Result<()> {
-        for (file, candidates) in self.project_candidate_list(project)? {
-            self.file_profiling_prep(&file, &candidates, con_types)
-                .with_context(|| format!("error preparing {} for profiling", file))?;
-        }
-
-        Ok(())
-    }
-
-    /// Prepare the given file to be profiled, by replacing all candidate types with the profiler wrapper.
-    fn file_profiling_prep(
-        &self,
-        file: &Utf8Path,
-        candidates: &[(String, Vec<String>)],
-        con_types: &[&String],
-    ) -> Result<()> {
-        debug!("Setting up {} for profiling", file);
-
-        let selector = ContainerSelector::from_path(
-            file.as_std_path(),
-            self.paths.library_src.as_std_path(),
-            self.model_size,
-        )
-        .context("error creating container selector")?;
-
-        let chosen = candidates
-            .iter()
-            .map(|(dest_name, impls)| (dest_name, &impls[0]))
-            .collect::<Vec<_>>();
-
-        let new_code = selector.gen_profiling_file(chosen.iter().map(|(d, c)| {
-            (
-                *d,
-                con_types.iter().position(|id| id == d).unwrap(),
-                c.as_str(),
-            )
-        }));
-
-        let new_path = file.to_string().replace(".pr", "");
-
-        trace!("New code: {}", new_code);
-        trace!("New path: {}", new_path);
-
-        let mut f = File::create(new_path).context("error creating new source file")?;
-        f.write_all(new_code.as_bytes())
-            .context("error writing new code")?;
-
-        Ok(())
-    }
-
-    /// Run the given benchmark on the project, and parse the resulting profiling information.
-    fn profile_benchmark(
-        &self,
-        project: &Project,
-        name: &str,
-        con_types: &[&String],
-    ) -> Result<HashMap<String, ProfilerInfo>> {
-        let profiler_out_dir = tempdir()?;
-        debug!(
-            "Running benchmark {} with out dir {:?}",
-            name, profiler_out_dir
-        );
-
-        let child = Command::new("cargo")
-            .current_dir(&project.source_dir)
-            .args(["bench", "--bench", name])
-            .env("PROFILER_OUT_DIR", profiler_out_dir.as_ref()) // Where profiler info gets outputted
-            .stdout(Stdio::piped())
-            .stderr(if log_enabled!(Level::Debug) {
-                Stdio::inherit()
-            } else {
-                Stdio::null()
-            })
-            .spawn()
-            .context("Error running bench command")?;
-
-        tee_output(child)?;
-
-        let mut con_type_results = HashMap::new();
-        for dir in read_dir(&profiler_out_dir)? {
-            // each directory has an index, corresponding to the container type name
-            let dir = dir?;
-            let con_type: String = con_types[dir
-                .file_name()
-                .into_string()
-                .unwrap()
-                .parse::<usize>()
-                .unwrap()]
-            .to_string();
-
-            let partitions = read_dir(dir.path())?
-                .map(|f| -> Result<String> {
-                    // read file contents
-                    let mut contents = String::new();
-                    File::open(f?.path())?.read_to_string(&mut contents)?;
-                    Ok(contents)
-                })
-                .map(|contents| parse_output(&contents?))
-                .fold(Ok(vec![]), partition_costs)?;
-
-            con_type_results.insert(con_type, ProfilerInfo(partitions));
-        }
-
-        Ok(con_type_results)
-    }
-}
-
-type CollectionLifetime = (f64, HashMap<OpName, usize>);
-
-/// Attempt to compress an iterator of collection lifetimes into as few partitions as possible
-fn partition_costs(
-    acc: Result<Vec<ProfilerPartition>>,
-    cl: Result<CollectionLifetime>,
-) -> Result<Vec<ProfilerPartition>> {
-    // error short circuiting
-    let (mut acc, (n, ops)) = (acc?, cl?);
-
-    // attempt to find a partition with a close enough n value
-    let (closest_idx, closest_delta) =
-        acc.iter()
-            .enumerate()
-            .fold((0, f64::MAX), |acc @ (_, val), (idx, partition)| {
-                let delta = (partition.avg_n - n).abs();
-                if delta < val {
-                    (idx, delta)
-                } else {
-                    acc
-                }
-            });
-
-    if closest_delta < 100.0 {
-        acc[closest_idx].add_lifetime((n, ops));
-    } else {
-        // add a new partition
-        acc.push(ProfilerPartition {
-            occurences: 1.0,
-            avg_n: n,
-            avg_op_counts: ops.into_iter().map(|(k, v)| (k, v as f64)).collect(),
-        })
-    }
-
-    Ok(acc)
-}
-
-/// Parse the output of the profiler
-fn parse_output(contents: &str) -> Result<(f64, HashMap<OpName, usize>)> {
-    let mut lines = contents.lines().map(usize::from_str);
-    let missing_line_err = || anyhow!("wrong number of lines in ");
-    let n = lines.next().ok_or_else(missing_line_err)??;
-    let mut op_counts = HashMap::new();
-    op_counts.insert(
-        "contains".to_string(),
-        lines.next().ok_or_else(missing_line_err)??,
-    );
-    op_counts.insert(
-        "insert".to_string(),
-        lines.next().ok_or_else(missing_line_err)??,
-    );
-    op_counts.insert(
-        "clear".to_string(),
-        lines.next().ok_or_else(missing_line_err)??,
-    );
-    op_counts.insert(
-        "remove".to_string(),
-        lines.next().ok_or_else(missing_line_err)??,
-    );
-    op_counts.insert(
-        "first".to_string(),
-        lines.next().ok_or_else(missing_line_err)??,
-    );
-    op_counts.insert(
-        "last".to_string(),
-        lines.next().ok_or_else(missing_line_err)??,
-    );
-    op_counts.insert(
-        "nth".to_string(),
-        lines.next().ok_or_else(missing_line_err)??,
-    );
-    op_counts.insert(
-        "push".to_string(),
-        lines.next().ok_or_else(missing_line_err)??,
-    );
-    op_counts.insert(
-        "pop".to_string(),
-        lines.next().ok_or_else(missing_line_err)??,
-    );
-    op_counts.insert(
-        "get".to_string(),
-        lines.next().ok_or_else(missing_line_err)??,
-    );
-
-    Ok((n as f64, op_counts))
-}
-
-#[cfg(test)]
-mod tests {
-    use std::collections::HashMap;
-
-    use crate::{
-        cost::{CostModel, Estimator},
-        profiler::partition_costs,
-    };
-
-    use super::{ProfilerInfo, ProfilerPartition};
-
-    const EPSILON: f64 = 1e-5;
-    fn assert_feq(left: f64, right: f64, msg: &'static str) {
-        assert!((left - right).abs() < EPSILON, "{}", msg);
-    }
-
-    fn linear_estimator() -> Estimator {
-        Estimator {
-            coeffs: [0.0, 1.0, 0.0, 0.0],
-            transform_x: (0.0, 1.0),
-            transform_y: (0.0, 1.0),
-        }
-    }
-
-    #[test]
-    fn test_cost_single_partition() {
-        let info = ProfilerInfo(vec![ProfilerPartition {
-            occurences: 1.0,
-            avg_n: 100.0,
-            avg_op_counts: {
-                let mut map = HashMap::new();
-                map.insert("insert".to_string(), 100.0);
-                map
-            },
-        }]);
-
-        let model = CostModel {
-            by_op: {
-                let mut map = HashMap::new();
-                map.insert("insert".to_string(), linear_estimator());
-                map
-            },
-        };
-
-        let cost = dbg!(info.estimate_cost(&model));
-        assert_feq(cost, 10_000.0, "per op = 100 * 100 ops");
-    }
-
-    #[test]
-    fn test_cost_multi_partitions_sums() {
-        let info = ProfilerInfo(vec![
-            ProfilerPartition {
-                occurences: 1.0,
-                avg_n: 100.0,
-                avg_op_counts: {
-                    let mut map = HashMap::new();
-                    map.insert("insert".to_string(), 100.0);
-                    map
-                },
-            },
-            ProfilerPartition {
-                occurences: 1.0,
-                avg_n: 10.0,
-                avg_op_counts: {
-                    let mut map = HashMap::new();
-                    map.insert("insert".to_string(), 10.0);
-                    map
-                },
-            },
-        ]);
-
-        let model = CostModel {
-            by_op: {
-                let mut map = HashMap::new();
-                map.insert("insert".to_string(), linear_estimator());
-                map
-            },
-        };
-
-        let cost = dbg!(info.estimate_cost(&model));
-        assert_feq(cost, 10_100.0, "100ns/op * 100 ops + 10ns/op * 10 ops");
-    }
-
-    #[test]
-    fn test_cost_multi_partitions_sums_weighted() {
-        let info = ProfilerInfo(vec![
-            ProfilerPartition {
-                occurences: 2.0,
-                avg_n: 100.0,
-                avg_op_counts: {
-                    let mut map = HashMap::new();
-                    map.insert("insert".to_string(), 100.0);
-                    map
-                },
-            },
-            ProfilerPartition {
-                occurences: 1.0,
-                avg_n: 10.0,
-                avg_op_counts: {
-                    let mut map = HashMap::new();
-                    map.insert("insert".to_string(), 10.0);
-                    map
-                },
-            },
-        ]);
-
-        let model = CostModel {
-            by_op: {
-                let mut map = HashMap::new();
-                map.insert("insert".to_string(), linear_estimator());
-                map
-            },
-        };
-
-        let cost = dbg!(info.estimate_cost(&model));
-        assert_feq(cost, 20_100.0, "100ns/op * 100 ops * 2 + 10ns/op * 10 ops");
-    }
-
-    #[test]
-    fn test_partition_costs_merges_duplicates() {
-        let cl = (100.0, {
-            let mut map = HashMap::new();
-            map.insert("insert".to_string(), 10);
-            map
-        });
-        let outp = vec![Ok(cl.clone()), Ok(cl)]
-            .into_iter()
-            .fold(Ok(vec![]), partition_costs)
-            .unwrap();
-
-        assert_eq!(outp.len(), 1, "merged duplicates");
-        assert_eq!(outp[0].occurences, 2.0, "weight updated");
-        assert_feq(outp[0].avg_n, 100.0, "average n correct");
-        assert_feq(
-            *outp[0].avg_op_counts.get("insert").unwrap(),
-            10.0,
-            "average n correct",
-        );
-    }
-
-    #[test]
-    fn test_partition_costs_merges_close() {
-        let outp = vec![
-            Ok((100.0, {
-                let mut map = HashMap::new();
-                map.insert("insert".to_string(), 50);
-                map
-            })),
-            Ok((110.0, {
-                let mut map = HashMap::new();
-                map.insert("insert".to_string(), 100);
-                map
-            })),
-        ]
-        .into_iter()
-        .fold(Ok(vec![]), partition_costs)
-        .unwrap();
-
-        assert_eq!(outp.len(), 1, "merged duplicates");
-        assert_eq!(outp[0].occurences, 2.0, "weight updated");
-        assert_feq(outp[0].avg_n, 105.0, "average n correct");
-        assert_feq(
-            *outp[0].avg_op_counts.get("insert").unwrap(),
-            75.0,
-            "average n correct",
-        );
-    }
-    #[test]
-    fn test_partition_costs_keeps_separate() {
-        let outp = vec![
-            Ok((100.0, {
-                let mut map = HashMap::new();
-                map.insert("insert".to_string(), 10);
-                map
-            })),
-            Ok((999999.0, {
-                let mut map = HashMap::new();
-                map.insert("insert".to_string(), 10);
-                map
-            })),
-        ]
-        .into_iter()
-        .fold(Ok(vec![]), partition_costs)
-        .unwrap();
-
-        assert_eq!(
-            outp.len(),
-            2,
-            "large difference in n values causes partition"
-        );
-    }
-}
diff --git a/src/crates/candelabra/src/profiler/info.rs b/src/crates/candelabra/src/profiler/info.rs
new file mode 100644
index 0000000..dc9a03c
--- /dev/null
+++ b/src/crates/candelabra/src/profiler/info.rs
@@ -0,0 +1,372 @@
+use std::collections::HashMap;
+use std::str::FromStr;
+
+use anyhow::{anyhow, Result};
+use serde::{Deserialize, Serialize};
+
+use crate::cost::{benchmark::OpName, Cost, CostModel, Estimator};
+
+/// The information we get from profiling.
+/// Rather than keeping all results, we split them into 'similar enough' partitions,
+/// with the idea that each partition will probably have the same best implementation.
+#[derive(Clone, Debug, Default, Serialize, Deserialize)]
+pub struct ProfilerInfo(pub Vec<ProfilerPartition>);
+
+/// A vector of container lifetimes which have similar usage characteristics
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct ProfilerPartition {
+    pub occurences: f64,
+    pub avg_n: f64,
+    pub avg_op_counts: HashMap<OpName, f64>,
+}
+
+/// Lifetime of a single allocated collection.
+type CollectionLifetime = (f64, HashMap<OpName, usize>);
+
+/// Breakdown of a cost value by operation
+pub type CostBreakdown<'a> = HashMap<&'a OpName, Cost>;
+
+impl ProfilerInfo {
+    pub fn from(iter: impl Iterator<Item = Result<String>>) -> Result<Self> {
+        Ok(Self(
+            iter.map(|contents| parse_output(&contents?))
+                .fold(Ok(vec![]), partition_costs)?,
+        ))
+    }
+
+    /// Estimate the cost of using the implementation with the given cost model
+    pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 {
+        self.0
+            .iter()
+            .map(|cl| cl.estimate_cost(cost_model))
+            .sum::<f64>()
+    }
+
+    /// Get a breakdown of the cost by operation
+    pub fn cost_breakdown<'a>(&self, cost_model: &'a CostModel) -> CostBreakdown<'a> {
+        cost_model
+            .by_op
+            .iter()
+            .map(|(op, estimator)| {
+                (
+                    op,
+                    self.0
+                        .iter()
+                        .map(|cl| cl.op_cost(op, estimator))
+                        .sum::<f64>(),
+                )
+            })
+            .collect()
+    }
+}
+
+impl ProfilerPartition {
+    pub fn avg_op_count(&self, op: &str) -> f64 {
+        *self
+            .avg_op_counts
+            .get(op)
+            .expect("invalid op passed to op_count")
+    }
+
+    pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 {
+        cost_model
+            .by_op
+            .iter()
+            .map(|(op, estimator)| self.op_cost(op, estimator))
+            .sum::<f64>()
+    }
+
+    pub fn op_cost(&self, op: &str, estimator: &Estimator) -> f64 {
+        estimator.estimatef(self.avg_n) * self.avg_op_count(op) * self.occurences
+    }
+
+    fn add_lifetime(&mut self, (n, ops): (f64, HashMap<String, usize>)) {
+        self.avg_n = self.avg_n + (n - self.avg_n) / (self.occurences + 1.0);
+        for (op, count) in ops {
+            let count = count as f64;
+            self.avg_op_counts
+                .entry(op)
+                .and_modify(|avg| *avg = *avg + (count - *avg) / (self.occurences + 1.0))
+                .or_insert(count);
+        }
+        self.occurences += 1.0;
+    }
+}
+
+/// Attempt to compress an iterator of collection lifetimes into as few partitions as possible
+fn partition_costs(
+    acc: Result<Vec<ProfilerPartition>>,
+    cl: Result<CollectionLifetime>,
+) -> Result<Vec<ProfilerPartition>> {
+    // error short circuiting
+    let (mut acc, (n, ops)) = (acc?, cl?);
+
+    // attempt to find a partition with a close enough n value
+    let (closest_idx, closest_delta) =
+        acc.iter()
+            .enumerate()
+            .fold((0, f64::MAX), |acc @ (_, val), (idx, partition)| {
+                let delta = (partition.avg_n - n).abs();
+                if delta < val {
+                    (idx, delta)
+                } else {
+                    acc
+                }
+            });
+
+    if closest_delta < 100.0 {
+        acc[closest_idx].add_lifetime((n, ops));
+    } else {
+        // add a new partition
+        acc.push(ProfilerPartition {
+            occurences: 1.0,
+            avg_n: n,
+            avg_op_counts: ops.into_iter().map(|(k, v)| (k, v as f64)).collect(),
+        })
+    }
+
+    Ok(acc)
+}
+
+/// Parse the output of the profiler
+fn parse_output(contents: &str) -> Result<(f64, HashMap<OpName, usize>)> {
+    let mut lines = contents.lines().map(usize::from_str);
+    let missing_line_err = || anyhow!("wrong number of lines in ");
+    let n = lines.next().ok_or_else(missing_line_err)??;
+    let mut op_counts = HashMap::new();
+    op_counts.insert(
+        "contains".to_string(),
+        lines.next().ok_or_else(missing_line_err)??,
+    );
+    op_counts.insert(
+        "insert".to_string(),
+        lines.next().ok_or_else(missing_line_err)??,
+    );
+    op_counts.insert(
+        "clear".to_string(),
+        lines.next().ok_or_else(missing_line_err)??,
+    );
+    op_counts.insert(
+        "remove".to_string(),
+        lines.next().ok_or_else(missing_line_err)??,
+    );
+    op_counts.insert(
+        "first".to_string(),
+        lines.next().ok_or_else(missing_line_err)??,
+    );
+    op_counts.insert(
+        "last".to_string(),
+        lines.next().ok_or_else(missing_line_err)??,
+    );
+    op_counts.insert(
+        "nth".to_string(),
+        lines.next().ok_or_else(missing_line_err)??,
+    );
+    op_counts.insert(
+        "push".to_string(),
+        lines.next().ok_or_else(missing_line_err)??,
+    );
+    op_counts.insert(
+        "pop".to_string(),
+        lines.next().ok_or_else(missing_line_err)??,
+    );
+    op_counts.insert(
+        "get".to_string(),
+        lines.next().ok_or_else(missing_line_err)??,
+    );
+
+    Ok((n as f64, op_counts))
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+
+    use crate::{
+        cost::{CostModel, Estimator},
+        profiler::info::partition_costs,
+    };
+
+    use super::{ProfilerInfo, ProfilerPartition};
+
+    const EPSILON: f64 = 1e-5;
+    fn assert_feq(left: f64, right: f64, msg: &'static str) {
+        assert!((left - right).abs() < EPSILON, "{}", msg);
+    }
+
+    fn linear_estimator() -> Estimator {
+        Estimator {
+            coeffs: [0.0, 1.0, 0.0, 0.0],
+            transform_x: (0.0, 1.0),
+            transform_y: (0.0, 1.0),
+        }
+    }
+
+    #[test]
+    fn test_cost_single_partition() {
+        let info = ProfilerInfo(vec![ProfilerPartition {
+            occurences: 1.0,
+            avg_n: 100.0,
+            avg_op_counts: {
+                let mut map = HashMap::new();
+                map.insert("insert".to_string(), 100.0);
+                map
+            },
+        }]);
+
+        let model = CostModel {
+            by_op: {
+                let mut map = HashMap::new();
+                map.insert("insert".to_string(), linear_estimator());
+                map
+            },
+        };
+
+        let cost = dbg!(info.estimate_cost(&model));
+        assert_feq(cost, 10_000.0, "per op = 100 * 100 ops");
+    }
+
+    #[test]
+    fn test_cost_multi_partitions_sums() {
+        let info = ProfilerInfo(vec![
+            ProfilerPartition {
+                occurences: 1.0,
+                avg_n: 100.0,
+                avg_op_counts: {
+                    let mut map = HashMap::new();
+                    map.insert("insert".to_string(), 100.0);
+                    map
+                },
+            },
+            ProfilerPartition {
+                occurences: 1.0,
+                avg_n: 10.0,
+                avg_op_counts: {
+                    let mut map = HashMap::new();
+                    map.insert("insert".to_string(), 10.0);
+                    map
+                },
+            },
+        ]);
+
+        let model = CostModel {
+            by_op: {
+                let mut map = HashMap::new();
+                map.insert("insert".to_string(), linear_estimator());
+                map
+            },
+        };
+
+        let cost = dbg!(info.estimate_cost(&model));
+        assert_feq(cost, 10_100.0, "100ns/op * 100 ops + 10ns/op * 10 ops");
+    }
+
+    #[test]
+    fn test_cost_multi_partitions_sums_weighted() {
+        let info = ProfilerInfo(vec![
+            ProfilerPartition {
+                occurences: 2.0,
+                avg_n: 100.0,
+                avg_op_counts: {
+                    let mut map = HashMap::new();
+                    map.insert("insert".to_string(), 100.0);
+                    map
+                },
+            },
+            ProfilerPartition {
+                occurences: 1.0,
+                avg_n: 10.0,
+                avg_op_counts: {
+                    let mut map = HashMap::new();
+                    map.insert("insert".to_string(), 10.0);
+                    map
+                },
+            },
+        ]);
+
+        let model = CostModel {
+            by_op: {
+                let mut map = HashMap::new();
+                map.insert("insert".to_string(), linear_estimator());
+                map
+            },
+        };
+
+        let cost = dbg!(info.estimate_cost(&model));
+        assert_feq(cost, 20_100.0, "100ns/op * 100 ops * 2 + 10ns/op * 10 ops");
+    }
+
+    #[test]
+    fn test_partition_costs_merges_duplicates() {
+        let cl = (100.0, {
+            let mut map = HashMap::new();
+            map.insert("insert".to_string(), 10);
+            map
+        });
+        let outp = vec![Ok(cl.clone()), Ok(cl)]
+            .into_iter()
+            .fold(Ok(vec![]), partition_costs)
+            .unwrap();
+
+        assert_eq!(outp.len(), 1, "merged duplicates");
+        assert_eq!(outp[0].occurences, 2.0, "weight updated");
+        assert_feq(outp[0].avg_n, 100.0, "average n correct");
+        assert_feq(
+            *outp[0].avg_op_counts.get("insert").unwrap(),
+            10.0,
+            "average n correct",
+        );
+    }
+
+    #[test]
+    fn test_partition_costs_merges_close() {
+        let outp = vec![
+            Ok((100.0, {
+                let mut map = HashMap::new();
+                map.insert("insert".to_string(), 50);
+                map
+            })),
+            Ok((110.0, {
+                let mut map = HashMap::new();
+                map.insert("insert".to_string(), 100);
+                map
+            })),
+        ]
+        .into_iter()
+        .fold(Ok(vec![]), partition_costs)
+        .unwrap();
+
+        assert_eq!(outp.len(), 1, "merged duplicates");
+        assert_eq!(outp[0].occurences, 2.0, "weight updated");
+        assert_feq(outp[0].avg_n, 105.0, "average n correct");
+        assert_feq(
+            *outp[0].avg_op_counts.get("insert").unwrap(),
+            75.0,
+            "average n correct",
+        );
+    }
+    #[test]
+    fn test_partition_costs_keeps_separate() {
+        let outp = vec![
+            Ok((100.0, {
+                let mut map = HashMap::new();
+                map.insert("insert".to_string(), 10);
+                map
+            })),
+            Ok((999999.0, {
+                let mut map = HashMap::new();
+                map.insert("insert".to_string(), 10);
+                map
+            })),
+        ]
+        .into_iter()
+        .fold(Ok(vec![]), partition_costs)
+        .unwrap();
+
+        assert_eq!(
+            outp.len(),
+            2,
+            "large difference in n values causes partition"
+        );
+    }
+}
diff --git a/src/crates/candelabra/src/profiler/mod.rs b/src/crates/candelabra/src/profiler/mod.rs
new file mode 100644
index 0000000..568929b
--- /dev/null
+++ b/src/crates/candelabra/src/profiler/mod.rs
@@ -0,0 +1,200 @@
+//! Profiling applications for info about container usage
+
+mod info;
+
+use anyhow::{Context, Result};
+use camino::{Utf8Path, Utf8PathBuf};
+use log::{debug, log_enabled, trace, warn, Level};
+use primrose::ContainerSelector;
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::io::Write;
+use std::{
+    fs::{read_dir, File},
+    io::Read,
+    process::{Command, Stdio},
+};
+use tempfile::tempdir;
+
+use crate::cache::{gen_tree_hash, FileCache};
+use crate::candidates::ConTypeName;
+use crate::cost::benchmark::tee_output;
+use crate::project::Project;
+use crate::{Paths, State};
+
+pub use self::info::{ProfilerInfo, ProfilerPartition};
+
+#[derive(Debug, Serialize, Deserialize)]
+pub(crate) struct CacheEntry {
+    proj_hash: u64,
+    proj_location: Utf8PathBuf,
+    info: HashMap<ConTypeName, ProfilerInfo>,
+}
+
+impl State {
+    pub(crate) fn profiler_info_cache(paths: &Paths) -> Result<FileCache<String, CacheEntry>> {
+        FileCache::new(
+            paths.target_dir.join("candelabra").join("profiler_info"),
+            |_, v: &CacheEntry| {
+                let proj_hash = gen_tree_hash(&v.proj_location).unwrap_or(0);
+                v.proj_hash == proj_hash
+            },
+        )
+    }
+
+    /// Get or calculate profiler info for the given project.
+    /// Results are cached by the modification time of the project's source tree
+    pub fn profiler_info(&self, project: &Project) -> Result<HashMap<ConTypeName, ProfilerInfo>> {
+        match self.profiler_info_cache.find(&project.name)? {
+            Some(x) => Ok(x.info),
+            None => {
+                let info = self.calc_profiler_info(project)?;
+
+                let proj_hash = gen_tree_hash(&project.source_dir)
+                    .context("Error generating project directory hash")?;
+                if let Err(e) = self.profiler_info_cache.put(
+                    &project.name,
+                    &CacheEntry {
+                        proj_hash,
+                        proj_location: project.source_dir.clone(),
+                        info: info.clone(),
+                    },
+                ) {
+                    warn!("Error caching profiler info for {}: {}", &project.name, e);
+                }
+
+                Ok(info)
+            }
+        }
+    }
+
+    /// Calculate profiler info for the given project.
+    fn calc_profiler_info(&self, project: &Project) -> Result<HashMap<ConTypeName, ProfilerInfo>> {
+        let candidate_list = self.project_candidate_list(project)?;
+        let con_types = candidate_list
+            .iter()
+            .flat_map(|(_, con_types)| con_types.iter())
+            .map(|(id, _)| id)
+            .collect::<Vec<_>>();
+
+        self.project_profiling_prep(project, &con_types)?;
+        let mut acc = HashMap::new();
+        for name in project.benchmarks.iter() {
+            for (con_type, new_results) in self
+                .profile_benchmark(project, name, &con_types)
+                .with_context(|| format!("Error profiling benchmark {}", name))?
+            {
+                acc.entry(con_type)
+                    .and_modify(|pi: &mut ProfilerInfo| pi.0.extend(new_results.0.iter().cloned()))
+                    .or_insert(new_results);
+            }
+        }
+
+        Ok(acc)
+    }
+
+    /// Prepare the given project to be profiled, by replacing all candidate types with the profiler wrapper.
+    fn project_profiling_prep(&self, project: &Project, con_types: &[&String]) -> Result<()> {
+        for (file, candidates) in self.project_candidate_list(project)? {
+            self.file_profiling_prep(&file, &candidates, con_types)
+                .with_context(|| format!("error preparing {} for profiling", file))?;
+        }
+
+        Ok(())
+    }
+
+    /// Prepare the given file to be profiled, by replacing all candidate types with the profiler wrapper.
+    fn file_profiling_prep(
+        &self,
+        file: &Utf8Path,
+        candidates: &[(String, Vec<String>)],
+        con_types: &[&String],
+    ) -> Result<()> {
+        debug!("Setting up {} for profiling", file);
+
+        let selector = ContainerSelector::from_path(
+            file.as_std_path(),
+            self.paths.library_src.as_std_path(),
+            self.model_size,
+        )
+        .context("error creating container selector")?;
+
+        let chosen = candidates
+            .iter()
+            .map(|(dest_name, impls)| (dest_name, &impls[0]))
+            .collect::<Vec<_>>();
+
+        let new_code = selector.gen_profiling_file(chosen.iter().map(|(d, c)| {
+            (
+                *d,
+                con_types.iter().position(|id| id == d).unwrap(),
+                c.as_str(),
+            )
+        }));
+
+        let new_path = file.to_string().replace(".pr", "");
+
+        trace!("New code: {}", new_code);
+        trace!("New path: {}", new_path);
+
+        let mut f = File::create(new_path).context("error creating new source file")?;
+        f.write_all(new_code.as_bytes())
+            .context("error writing new code")?;
+
+        Ok(())
+    }
+
+    /// Run the given benchmark on the project, and parse the resulting profiling information.
+    fn profile_benchmark(
+        &self,
+        project: &Project,
+        name: &str,
+        con_types: &[&String],
+    ) -> Result<HashMap<String, ProfilerInfo>> {
+        let profiler_out_dir = tempdir()?;
+        debug!(
+            "Running benchmark {} with out dir {:?}",
+            name, profiler_out_dir
+        );
+
+        let child = Command::new("cargo")
+            .current_dir(&project.source_dir)
+            .args(["bench", "--bench", name])
+            .env("PROFILER_OUT_DIR", profiler_out_dir.as_ref()) // Where profiler info gets outputted
+            .stdout(Stdio::piped())
+            .stderr(if log_enabled!(Level::Debug) {
+                Stdio::inherit()
+            } else {
+                Stdio::null()
+            })
+            .spawn()
+            .context("Error running bench command")?;
+
+        tee_output(child)?;
+
+        let mut con_type_results = HashMap::new();
+        for dir in read_dir(&profiler_out_dir)? {
+            // each directory has an index, corresponding to the container type name
+            let dir = dir?;
+            let con_type: String = con_types[dir
+                .file_name()
+                .into_string()
+                .unwrap()
+                .parse::<usize>()
+                .unwrap()]
+            .to_string();
+
+            con_type_results.insert(
+                con_type,
+                ProfilerInfo::from(read_dir(dir.path())?.map(|f| -> Result<String> {
+                    // read file contents
+                    let mut contents = String::new();
+                    File::open(f?.path())?.read_to_string(&mut contents)?;
+                    Ok(contents)
+                }))?,
+            );
+        }
+
+        Ok(con_type_results)
+    }
+}
diff --git a/src/crates/cli/src/display.rs b/src/crates/cli/src/display.rs
index 2ce9039..2debede 100644
--- a/src/crates/cli/src/display.rs
+++ b/src/crates/cli/src/display.rs
@@ -51,7 +51,7 @@ pub fn display_profiler_info(profile_info: ProfilerInfo) {
                 ("occurences".to_string(), p.occurences),
             ]
             .into_iter()
-            .chain(p.avg_op_counts.into_iter()),
+            .chain(p.avg_op_counts),
         )
     }))
 }
-- 
cgit v1.2.3