From 10d1da37f9f5ef38363f005508b7dec2bfd867fd Mon Sep 17 00:00:00 2001 From: Aria Shrimpton Date: Wed, 31 Jan 2024 22:59:43 +0000 Subject: attempt to detect when switching containers midway through would be better --- src/crates/candelabra/src/lib.rs | 2 +- src/crates/candelabra/src/profiler/info.rs | 107 ++++++++++++++++++++++++++++- src/crates/candelabra/src/profiler/mod.rs | 2 +- src/crates/candelabra/src/select.rs | 31 +++++++-- src/crates/cli/src/select.rs | 12 +++- 5 files changed, 145 insertions(+), 9 deletions(-) (limited to 'src/crates') diff --git a/src/crates/candelabra/src/lib.rs b/src/crates/candelabra/src/lib.rs index 19445f0..c483e12 100644 --- a/src/crates/candelabra/src/lib.rs +++ b/src/crates/candelabra/src/lib.rs @@ -10,7 +10,7 @@ pub mod candidates; mod confirmation; pub mod cost; pub mod profiler; -mod select; +pub mod select; mod paths; mod project; diff --git a/src/crates/candelabra/src/profiler/info.rs b/src/crates/candelabra/src/profiler/info.rs index 398f4e0..5eb6734 100644 --- a/src/crates/candelabra/src/profiler/info.rs +++ b/src/crates/candelabra/src/profiler/info.rs @@ -4,7 +4,10 @@ use std::str::FromStr; use anyhow::{anyhow, Result}; use serde::{Deserialize, Serialize}; -use crate::cost::{benchmark::OpName, Cost, CostModel, Estimator}; +use crate::{ + candidates::ImplName, + cost::{benchmark::OpName, Cost, CostModel, Estimator}, +}; /// The information we get from profiling. /// Rather than keeping all results, we split them into 'similar enough' partitions, @@ -26,6 +29,14 @@ type CollectionLifetime = (f64, HashMap); /// Breakdown of a cost value by operation pub type CostBreakdown<'a> = HashMap<&'a OpName, Cost>; +/// A single result of container selection +#[derive(Clone, Debug)] +pub struct ContainerSplitSpec { + pub before: ImplName, + pub threshold: usize, + pub after: ImplName, +} + impl ProfilerInfo { pub fn from(iter: impl Iterator>) -> Result { Ok(Self( @@ -34,6 +45,100 @@ impl ProfilerInfo { )) } + pub fn check_for_nsplit( + &mut self, + candidates: &HashMap<&String, CostModel>, + ) -> Option<(ContainerSplitSpec, Cost)> { + self.0.sort_by_key(|p| p.avg_n as usize); + if self.0.is_empty() { + return None; + } + + let costs_by_partitions = candidates + .iter() + .map(|(name, model)| { + ( + name, + self.0 + .iter() + .map(|p| p.estimate_cost(&model)) + .collect::>(), + ) + }) + .collect::>(); + + let top_by_partition = (0..self.0.len()) + .map(|i| { + costs_by_partitions.iter().fold( + ("".to_string(), f64::MAX), + |acc @ (_, val), (name, c)| { + if val < c[i] { + acc + } else { + (name.to_string(), c[i]) + } + }, + ) + }) + .collect::>(); + + let split_idx = top_by_partition + .iter() + .enumerate() + // TODO: fudge? + .find(|(idx, (best, _))| *idx > 0 && *best != top_by_partition[idx - 1].0) + .map(|(idx, _)| idx)?; + + let split_is_proper = top_by_partition.iter().enumerate().all(|(i, (best, _))| { + if i >= split_idx { + *best == top_by_partition[split_idx].0 + } else { + *best == top_by_partition[0].0 + } + }); + + if !split_is_proper { + return None; + } + + // calculate cost of switching + let before = &top_by_partition[0].0; + let after = &top_by_partition[split_idx].0; + let before_model = candidates.get(before).unwrap(); + let after_model = candidates.get(after).unwrap(); + + let copy_n = self.0[split_idx].avg_n; + let switching_cost = after_model.by_op.get("insert")?.estimatef(copy_n) + + before_model.by_op.get("clear")?.estimatef(copy_n); + + // see if it's "worth it" + let before_costs = &costs_by_partitions + .iter() + .find(|(name, _)| **name == before) + .unwrap() + .1; + let after_costs = &costs_by_partitions + .iter() + .find(|(name, _)| **name == after) + .unwrap() + .1; + let not_switching_cost = &before_costs[split_idx..].iter().sum::() + - &after_costs[split_idx..].iter().sum::(); + + if not_switching_cost < switching_cost { + None + } else { + Some(( + ContainerSplitSpec { + before: before.to_string(), + threshold: copy_n as usize, + after: after.to_string(), + }, + top_by_partition.iter().map(|(_, v)| v).sum(), + )) + } + } + /// Estimate the cost of using the implementation with the given cost model pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 { self.0 diff --git a/src/crates/candelabra/src/profiler/mod.rs b/src/crates/candelabra/src/profiler/mod.rs index 568929b..83c7954 100644 --- a/src/crates/candelabra/src/profiler/mod.rs +++ b/src/crates/candelabra/src/profiler/mod.rs @@ -22,7 +22,7 @@ use crate::cost::benchmark::tee_output; use crate::project::Project; use crate::{Paths, State}; -pub use self::info::{ProfilerInfo, ProfilerPartition}; +pub use self::info::{ContainerSplitSpec, ProfilerInfo, ProfilerPartition}; #[derive(Debug, Serialize, Deserialize)] pub(crate) struct CacheEntry { diff --git a/src/crates/candelabra/src/select.rs b/src/crates/candelabra/src/select.rs index 1146040..a8653a4 100644 --- a/src/crates/candelabra/src/select.rs +++ b/src/crates/candelabra/src/select.rs @@ -3,14 +3,21 @@ use std::collections::HashMap; use crate::{ candidates::{ConTypeName, ImplName}, cost::Cost, + profiler::ContainerSplitSpec, Project, State, }; use anyhow::Result; +#[derive(Clone, Debug)] +pub enum Selection { + Singular(ImplName), + Split(ContainerSplitSpec), +} + impl State { /// Select a container implementation for each container type in the given project - pub fn select(&self, project: &Project) -> Result> { + pub fn select(&self, project: &Project) -> Result> { Ok(self .rank_candidates(project)? .into_iter() @@ -31,22 +38,36 @@ impl State { pub fn rank_candidates( &self, project: &Project, - ) -> Result>> { + ) -> Result>> { // get all candidates let all_candidates = self.project_candidate_list(project)?; // get profiling information - let profiles = self.profiler_info(project)?; + let mut profiles = self.profiler_info(project)?; let mut acc = HashMap::new(); let con_type_names = all_candidates.iter().flat_map(|(_, cs)| cs.iter()); for (con_type_name, candidates) in con_type_names { let mut costs = vec![]; - let profile_info = profiles.get(con_type_name).unwrap(); + let profile_info = profiles.get_mut(con_type_name).unwrap(); + + let cost_models = candidates + .iter() + .map(|name| Ok((name, self.cost_model(name)?))) + .collect::>>()?; + for candidate in candidates { let model = self.cost_model(candidate)?; - costs.push((candidate.clone(), profile_info.estimate_cost(&model))); + costs.push(( + Selection::Singular(candidate.clone()), + profile_info.estimate_cost(&model), + )); } + + if let Some((split, cost)) = profile_info.check_for_nsplit(&cost_models) { + costs.push((Selection::Split(split), cost)); + } + acc.insert(con_type_name.to_string(), costs); } diff --git a/src/crates/cli/src/select.rs b/src/crates/cli/src/select.rs index c415d03..bb613f5 100644 --- a/src/crates/cli/src/select.rs +++ b/src/crates/cli/src/select.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use anyhow::Result; use argh::FromArgs; +use candelabra::select::Selection; use log::info; use primrose::tools::nary_cartesian_product; use tabled::{builder::Builder, settings::Style}; @@ -28,9 +29,18 @@ impl State { builder.set_header(["name", "implementation", "estimated cost"]); for (con_type_name, candidates) in costs.iter() { for (candidate, cost) in candidates.iter() { + let name = match candidate { + Selection::Singular(x) => x.to_string(), + Selection::Split(split) => { + format!( + "{} until n={}, then {}", + split.before, split.threshold, split.after + ) + } + }; builder.push_record([ con_type_name.as_str(), - candidate.as_str(), + name.as_str(), cost.to_string().as_str(), ]); } -- cgit v1.2.3