diff options
-rw-r--r-- | src/crates/candelabra/src/lib.rs | 2 | ||||
-rw-r--r-- | src/crates/candelabra/src/profiler/info.rs | 107 | ||||
-rw-r--r-- | src/crates/candelabra/src/profiler/mod.rs | 2 | ||||
-rw-r--r-- | src/crates/candelabra/src/select.rs | 31 | ||||
-rw-r--r-- | src/crates/cli/src/select.rs | 12 | ||||
-rw-r--r-- | src/tests/example_sets/benches/main.rs | 2 |
6 files changed, 146 insertions, 10 deletions
diff --git a/src/crates/candelabra/src/lib.rs b/src/crates/candelabra/src/lib.rs index 19445f0..c483e12 100644 --- a/src/crates/candelabra/src/lib.rs +++ b/src/crates/candelabra/src/lib.rs @@ -10,7 +10,7 @@ pub mod candidates; mod confirmation; pub mod cost; pub mod profiler; -mod select; +pub mod select; mod paths; mod project; diff --git a/src/crates/candelabra/src/profiler/info.rs b/src/crates/candelabra/src/profiler/info.rs index 398f4e0..5eb6734 100644 --- a/src/crates/candelabra/src/profiler/info.rs +++ b/src/crates/candelabra/src/profiler/info.rs @@ -4,7 +4,10 @@ use std::str::FromStr; use anyhow::{anyhow, Result}; use serde::{Deserialize, Serialize}; -use crate::cost::{benchmark::OpName, Cost, CostModel, Estimator}; +use crate::{ + candidates::ImplName, + cost::{benchmark::OpName, Cost, CostModel, Estimator}, +}; /// The information we get from profiling. /// Rather than keeping all results, we split them into 'similar enough' partitions, @@ -26,6 +29,14 @@ type CollectionLifetime = (f64, HashMap<OpName, usize>); /// Breakdown of a cost value by operation pub type CostBreakdown<'a> = HashMap<&'a OpName, Cost>; +/// A single result of container selection +#[derive(Clone, Debug)] +pub struct ContainerSplitSpec { + pub before: ImplName, + pub threshold: usize, + pub after: ImplName, +} + impl ProfilerInfo { pub fn from(iter: impl Iterator<Item = Result<String>>) -> Result<Self> { Ok(Self( @@ -34,6 +45,100 @@ impl ProfilerInfo { )) } + pub fn check_for_nsplit( + &mut self, + candidates: &HashMap<&String, CostModel>, + ) -> Option<(ContainerSplitSpec, Cost)> { + self.0.sort_by_key(|p| p.avg_n as usize); + if self.0.is_empty() { + return None; + } + + let costs_by_partitions = candidates + .iter() + .map(|(name, model)| { + ( + name, + self.0 + .iter() + .map(|p| p.estimate_cost(&model)) + .collect::<Vec<_>>(), + ) + }) + .collect::<Vec<(_, _)>>(); + + let top_by_partition = (0..self.0.len()) + .map(|i| { + costs_by_partitions.iter().fold( + ("".to_string(), f64::MAX), + |acc @ (_, val), (name, c)| { + if val < c[i] { + acc + } else { + (name.to_string(), c[i]) + } + }, + ) + }) + .collect::<Vec<_>>(); + + let split_idx = top_by_partition + .iter() + .enumerate() + // TODO: fudge? + .find(|(idx, (best, _))| *idx > 0 && *best != top_by_partition[idx - 1].0) + .map(|(idx, _)| idx)?; + + let split_is_proper = top_by_partition.iter().enumerate().all(|(i, (best, _))| { + if i >= split_idx { + *best == top_by_partition[split_idx].0 + } else { + *best == top_by_partition[0].0 + } + }); + + if !split_is_proper { + return None; + } + + // calculate cost of switching + let before = &top_by_partition[0].0; + let after = &top_by_partition[split_idx].0; + let before_model = candidates.get(before).unwrap(); + let after_model = candidates.get(after).unwrap(); + + let copy_n = self.0[split_idx].avg_n; + let switching_cost = after_model.by_op.get("insert")?.estimatef(copy_n) + + before_model.by_op.get("clear")?.estimatef(copy_n); + + // see if it's "worth it" + let before_costs = &costs_by_partitions + .iter() + .find(|(name, _)| **name == before) + .unwrap() + .1; + let after_costs = &costs_by_partitions + .iter() + .find(|(name, _)| **name == after) + .unwrap() + .1; + let not_switching_cost = &before_costs[split_idx..].iter().sum::<f64>() + - &after_costs[split_idx..].iter().sum::<f64>(); + + if not_switching_cost < switching_cost { + None + } else { + Some(( + ContainerSplitSpec { + before: before.to_string(), + threshold: copy_n as usize, + after: after.to_string(), + }, + top_by_partition.iter().map(|(_, v)| v).sum(), + )) + } + } + /// Estimate the cost of using the implementation with the given cost model pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 { self.0 diff --git a/src/crates/candelabra/src/profiler/mod.rs b/src/crates/candelabra/src/profiler/mod.rs index 568929b..83c7954 100644 --- a/src/crates/candelabra/src/profiler/mod.rs +++ b/src/crates/candelabra/src/profiler/mod.rs @@ -22,7 +22,7 @@ use crate::cost::benchmark::tee_output; use crate::project::Project; use crate::{Paths, State}; -pub use self::info::{ProfilerInfo, ProfilerPartition}; +pub use self::info::{ContainerSplitSpec, ProfilerInfo, ProfilerPartition}; #[derive(Debug, Serialize, Deserialize)] pub(crate) struct CacheEntry { diff --git a/src/crates/candelabra/src/select.rs b/src/crates/candelabra/src/select.rs index 1146040..a8653a4 100644 --- a/src/crates/candelabra/src/select.rs +++ b/src/crates/candelabra/src/select.rs @@ -3,14 +3,21 @@ use std::collections::HashMap; use crate::{ candidates::{ConTypeName, ImplName}, cost::Cost, + profiler::ContainerSplitSpec, Project, State, }; use anyhow::Result; +#[derive(Clone, Debug)] +pub enum Selection { + Singular(ImplName), + Split(ContainerSplitSpec), +} + impl State { /// Select a container implementation for each container type in the given project - pub fn select(&self, project: &Project) -> Result<HashMap<ConTypeName, ImplName>> { + pub fn select(&self, project: &Project) -> Result<HashMap<ConTypeName, Selection>> { Ok(self .rank_candidates(project)? .into_iter() @@ -31,22 +38,36 @@ impl State { pub fn rank_candidates( &self, project: &Project, - ) -> Result<HashMap<ConTypeName, Vec<(ImplName, Cost)>>> { + ) -> Result<HashMap<ConTypeName, Vec<(Selection, Cost)>>> { // get all candidates let all_candidates = self.project_candidate_list(project)?; // get profiling information - let profiles = self.profiler_info(project)?; + let mut profiles = self.profiler_info(project)?; let mut acc = HashMap::new(); let con_type_names = all_candidates.iter().flat_map(|(_, cs)| cs.iter()); for (con_type_name, candidates) in con_type_names { let mut costs = vec![]; - let profile_info = profiles.get(con_type_name).unwrap(); + let profile_info = profiles.get_mut(con_type_name).unwrap(); + + let cost_models = candidates + .iter() + .map(|name| Ok((name, self.cost_model(name)?))) + .collect::<Result<HashMap<_, _>>>()?; + for candidate in candidates { let model = self.cost_model(candidate)?; - costs.push((candidate.clone(), profile_info.estimate_cost(&model))); + costs.push(( + Selection::Singular(candidate.clone()), + profile_info.estimate_cost(&model), + )); } + + if let Some((split, cost)) = profile_info.check_for_nsplit(&cost_models) { + costs.push((Selection::Split(split), cost)); + } + acc.insert(con_type_name.to_string(), costs); } diff --git a/src/crates/cli/src/select.rs b/src/crates/cli/src/select.rs index c415d03..bb613f5 100644 --- a/src/crates/cli/src/select.rs +++ b/src/crates/cli/src/select.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use anyhow::Result; use argh::FromArgs; +use candelabra::select::Selection; use log::info; use primrose::tools::nary_cartesian_product; use tabled::{builder::Builder, settings::Style}; @@ -28,9 +29,18 @@ impl State { builder.set_header(["name", "implementation", "estimated cost"]); for (con_type_name, candidates) in costs.iter() { for (candidate, cost) in candidates.iter() { + let name = match candidate { + Selection::Singular(x) => x.to_string(), + Selection::Split(split) => { + format!( + "{} until n={}, then {}", + split.before, split.threshold, split.after + ) + } + }; builder.push_record([ con_type_name.as_str(), - candidate.as_str(), + name.as_str(), cost.to_string().as_str(), ]); } diff --git a/src/tests/example_sets/benches/main.rs b/src/tests/example_sets/benches/main.rs index 938fd7c..f72afdb 100644 --- a/src/tests/example_sets/benches/main.rs +++ b/src/tests/example_sets/benches/main.rs @@ -1,7 +1,7 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; fn run_benches(c: &mut Criterion) { - for size in [10, 500, 1000, 5000].iter() { + for size in [10, 500, 10_000, 50_000].iter() { c.bench_with_input(BenchmarkId::new("insert_n", size), size, |b, &n| { b.iter(|| example_sets::insert_n(n)); }); |