6 files changed, 146 insertions, 10 deletions
diff --git a/src/crates/candelabra/src/lib.rs b/src/crates/candelabra/src/lib.rs
index 19445f0..c483e12 100644
--- a/src/crates/candelabra/src/lib.rs
+++ b/src/crates/candelabra/src/lib.rs
@@ -10,7 +10,7 @@ pub mod candidates;
 mod confirmation;
 pub mod cost;
 pub mod profiler;
-mod select;
+pub mod select;
 
 mod paths;
 mod project;
diff --git a/src/crates/candelabra/src/profiler/info.rs b/src/crates/candelabra/src/profiler/info.rs
index 398f4e0..5eb6734 100644
--- a/src/crates/candelabra/src/profiler/info.rs
+++ b/src/crates/candelabra/src/profiler/info.rs
@@ -4,7 +4,10 @@ use std::str::FromStr;
 use anyhow::{anyhow, Result};
 use serde::{Deserialize, Serialize};
 
-use crate::cost::{benchmark::OpName, Cost, CostModel, Estimator};
+use crate::{
+    candidates::ImplName,
+    cost::{benchmark::OpName, Cost, CostModel, Estimator},
+};
 
 /// The information we get from profiling.
 /// Rather than keeping all results, we split them into 'similar enough' partitions,
@@ -26,6 +29,14 @@ type CollectionLifetime = (f64, HashMap<OpName, usize>);
 /// Breakdown of a cost value by operation
 pub type CostBreakdown<'a> = HashMap<&'a OpName, Cost>;
 
+/// A single result of container selection
+#[derive(Clone, Debug)]
+pub struct ContainerSplitSpec {
+    pub before: ImplName,
+    pub threshold: usize,
+    pub after: ImplName,
+}
+
 impl ProfilerInfo {
     pub fn from(iter: impl Iterator<Item = Result<String>>) -> Result<Self> {
         Ok(Self(
@@ -34,6 +45,100 @@ impl ProfilerInfo {
         ))
     }
 
+    pub fn check_for_nsplit(
+        &mut self,
+        candidates: &HashMap<&String, CostModel>,
+    ) -> Option<(ContainerSplitSpec, Cost)> {
+        self.0.sort_by_key(|p| p.avg_n as usize);
+        if self.0.is_empty() {
+            return None;
+        }
+
+        let costs_by_partitions = candidates
+            .iter()
+            .map(|(name, model)| {
+                (
+                    name,
+                    self.0
+                        .iter()
+                        .map(|p| p.estimate_cost(&model))
+                        .collect::<Vec<_>>(),
+                )
+            })
+            .collect::<Vec<(_, _)>>();
+
+        let top_by_partition = (0..self.0.len())
+            .map(|i| {
+                costs_by_partitions.iter().fold(
+                    ("".to_string(), f64::MAX),
+                    |acc @ (_, val), (name, c)| {
+                        if val < c[i] {
+                            acc
+                        } else {
+                            (name.to_string(), c[i])
+                        }
+                    },
+                )
+            })
+            .collect::<Vec<_>>();
+
+        let split_idx = top_by_partition
+            .iter()
+            .enumerate()
+            // TODO: fudge?
+            .find(|(idx, (best, _))| *idx > 0 && *best != top_by_partition[idx - 1].0)
+            .map(|(idx, _)| idx)?;
+
+        let split_is_proper = top_by_partition.iter().enumerate().all(|(i, (best, _))| {
+            if i >= split_idx {
+                *best == top_by_partition[split_idx].0
+            } else {
+                *best == top_by_partition[0].0
+            }
+        });
+
+        if !split_is_proper {
+            return None;
+        }
+
+        // calculate cost of switching
+        let before = &top_by_partition[0].0;
+        let after = &top_by_partition[split_idx].0;
+        let before_model = candidates.get(before).unwrap();
+        let after_model = candidates.get(after).unwrap();
+
+        let copy_n = self.0[split_idx].avg_n;
+        let switching_cost = after_model.by_op.get("insert")?.estimatef(copy_n)
+            + before_model.by_op.get("clear")?.estimatef(copy_n);
+
+        // see if it's "worth it"
+        let before_costs = &costs_by_partitions
+            .iter()
+            .find(|(name, _)| **name == before)
+            .unwrap()
+            .1;
+        let after_costs = &costs_by_partitions
+            .iter()
+            .find(|(name, _)| **name == after)
+            .unwrap()
+            .1;
+        let not_switching_cost = &before_costs[split_idx..].iter().sum::<f64>()
+            - &after_costs[split_idx..].iter().sum::<f64>();
+
+        if not_switching_cost < switching_cost {
+            None
+        } else {
+            Some((
+                ContainerSplitSpec {
+                    before: before.to_string(),
+                    threshold: copy_n as usize,
+                    after: after.to_string(),
+                },
+                top_by_partition.iter().map(|(_, v)| v).sum(),
+            ))
+        }
+    }
+
     /// Estimate the cost of using the implementation with the given cost model
     pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 {
         self.0
diff --git a/src/crates/candelabra/src/profiler/mod.rs b/src/crates/candelabra/src/profiler/mod.rs
index 568929b..83c7954 100644
--- a/src/crates/candelabra/src/profiler/mod.rs
+++ b/src/crates/candelabra/src/profiler/mod.rs
@@ -22,7 +22,7 @@ use crate::cost::benchmark::tee_output;
 use crate::project::Project;
 use crate::{Paths, State};
 
-pub use self::info::{ProfilerInfo, ProfilerPartition};
+pub use self::info::{ContainerSplitSpec, ProfilerInfo, ProfilerPartition};
 
 #[derive(Debug, Serialize, Deserialize)]
 pub(crate) struct CacheEntry {
diff --git a/src/crates/candelabra/src/select.rs b/src/crates/candelabra/src/select.rs
index 1146040..a8653a4 100644
--- a/src/crates/candelabra/src/select.rs
+++ b/src/crates/candelabra/src/select.rs
@@ -3,14 +3,21 @@ use std::collections::HashMap;
 use crate::{
     candidates::{ConTypeName, ImplName},
     cost::Cost,
+    profiler::ContainerSplitSpec,
     Project, State,
 };
 
 use anyhow::Result;
 
+#[derive(Clone, Debug)]
+pub enum Selection {
+    Singular(ImplName),
+    Split(ContainerSplitSpec),
+}
+
 impl State {
     /// Select a container implementation for each container type in the given project
-    pub fn select(&self, project: &Project) -> Result<HashMap<ConTypeName, ImplName>> {
+    pub fn select(&self, project: &Project) -> Result<HashMap<ConTypeName, Selection>> {
         Ok(self
             .rank_candidates(project)?
             .into_iter()
@@ -31,22 +38,36 @@ impl State {
     pub fn rank_candidates(
         &self,
         project: &Project,
-    ) -> Result<HashMap<ConTypeName, Vec<(ImplName, Cost)>>> {
+    ) -> Result<HashMap<ConTypeName, Vec<(Selection, Cost)>>> {
         // get all candidates
         let all_candidates = self.project_candidate_list(project)?;
 
         // get profiling information
-        let profiles = self.profiler_info(project)?;
+        let mut profiles = self.profiler_info(project)?;
 
         let mut acc = HashMap::new();
         let con_type_names = all_candidates.iter().flat_map(|(_, cs)| cs.iter());
         for (con_type_name, candidates) in con_type_names {
             let mut costs = vec![];
-            let profile_info = profiles.get(con_type_name).unwrap();
+            let profile_info = profiles.get_mut(con_type_name).unwrap();
+
+            let cost_models = candidates
+                .iter()
+                .map(|name| Ok((name, self.cost_model(name)?)))
+                .collect::<Result<HashMap<_, _>>>()?;
+
             for candidate in candidates {
                 let model = self.cost_model(candidate)?;
-                costs.push((candidate.clone(), profile_info.estimate_cost(&model)));
+                costs.push((
+                    Selection::Singular(candidate.clone()),
+                    profile_info.estimate_cost(&model),
+                ));
             }
+
+            if let Some((split, cost)) = profile_info.check_for_nsplit(&cost_models) {
+                costs.push((Selection::Split(split), cost));
+            }
+
             acc.insert(con_type_name.to_string(), costs);
         }
 
diff --git a/src/crates/cli/src/select.rs b/src/crates/cli/src/select.rs
index c415d03..bb613f5 100644
--- a/src/crates/cli/src/select.rs
+++ b/src/crates/cli/src/select.rs
@@ -2,6 +2,7 @@ use std::collections::HashMap;
 
 use anyhow::Result;
 use argh::FromArgs;
+use candelabra::select::Selection;
 use log::info;
 use primrose::tools::nary_cartesian_product;
 use tabled::{builder::Builder, settings::Style};
@@ -28,9 +29,18 @@ impl State {
             builder.set_header(["name", "implementation", "estimated cost"]);
             for (con_type_name, candidates) in costs.iter() {
                 for (candidate, cost) in candidates.iter() {
+                    let name = match candidate {
+                        Selection::Singular(x) => x.to_string(),
+                        Selection::Split(split) => {
+                            format!(
+                                "{} until n={}, then {}",
+                                split.before, split.threshold, split.after
+                            )
+                        }
+                    };
                     builder.push_record([
                         con_type_name.as_str(),
-                        candidate.as_str(),
+                        name.as_str(),
                         cost.to_string().as_str(),
                     ]);
                 }
diff --git a/src/tests/example_sets/benches/main.rs b/src/tests/example_sets/benches/main.rs
index 938fd7c..f72afdb 100644
--- a/src/tests/example_sets/benches/main.rs
+++ b/src/tests/example_sets/benches/main.rs
@@ -1,7 +1,7 @@
 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
 
 fn run_benches(c: &mut Criterion) {
-    for size in [10, 500, 1000, 5000].iter() {
+    for size in [10, 500, 10_000, 50_000].iter() {
         c.bench_with_input(BenchmarkId::new("insert_n", size), size, |b, &n| {
             b.iter(|| example_sets::insert_n(n));
         });