aboutsummaryrefslogtreecommitdiff
path: root/src/crates/candelabra
diff options
context:
space:
mode:
authorAria Shrimpton <me@aria.rip>2024-01-31 22:59:43 +0000
committerAria Shrimpton <me@aria.rip>2024-01-31 23:09:40 +0000
commit10d1da37f9f5ef38363f005508b7dec2bfd867fd (patch)
tree6dc113f3547b6033ebac7150054a8a3bd9f4b98e /src/crates/candelabra
parent77d77cc0f027b4e7b03bb924e8074b193ff5e763 (diff)
attempt to detect when switching containers midway through would be better
Diffstat (limited to 'src/crates/candelabra')
-rw-r--r--src/crates/candelabra/src/lib.rs2
-rw-r--r--src/crates/candelabra/src/profiler/info.rs107
-rw-r--r--src/crates/candelabra/src/profiler/mod.rs2
-rw-r--r--src/crates/candelabra/src/select.rs31
4 files changed, 134 insertions, 8 deletions
diff --git a/src/crates/candelabra/src/lib.rs b/src/crates/candelabra/src/lib.rs
index 19445f0..c483e12 100644
--- a/src/crates/candelabra/src/lib.rs
+++ b/src/crates/candelabra/src/lib.rs
@@ -10,7 +10,7 @@ pub mod candidates;
mod confirmation;
pub mod cost;
pub mod profiler;
-mod select;
+pub mod select;
mod paths;
mod project;
diff --git a/src/crates/candelabra/src/profiler/info.rs b/src/crates/candelabra/src/profiler/info.rs
index 398f4e0..5eb6734 100644
--- a/src/crates/candelabra/src/profiler/info.rs
+++ b/src/crates/candelabra/src/profiler/info.rs
@@ -4,7 +4,10 @@ use std::str::FromStr;
use anyhow::{anyhow, Result};
use serde::{Deserialize, Serialize};
-use crate::cost::{benchmark::OpName, Cost, CostModel, Estimator};
+use crate::{
+ candidates::ImplName,
+ cost::{benchmark::OpName, Cost, CostModel, Estimator},
+};
/// The information we get from profiling.
/// Rather than keeping all results, we split them into 'similar enough' partitions,
@@ -26,6 +29,14 @@ type CollectionLifetime = (f64, HashMap<OpName, usize>);
/// Breakdown of a cost value by operation
pub type CostBreakdown<'a> = HashMap<&'a OpName, Cost>;
+/// A single result of container selection
+#[derive(Clone, Debug)]
+pub struct ContainerSplitSpec {
+ pub before: ImplName,
+ pub threshold: usize,
+ pub after: ImplName,
+}
+
impl ProfilerInfo {
pub fn from(iter: impl Iterator<Item = Result<String>>) -> Result<Self> {
Ok(Self(
@@ -34,6 +45,100 @@ impl ProfilerInfo {
))
}
+ pub fn check_for_nsplit(
+ &mut self,
+ candidates: &HashMap<&String, CostModel>,
+ ) -> Option<(ContainerSplitSpec, Cost)> {
+ self.0.sort_by_key(|p| p.avg_n as usize);
+ if self.0.is_empty() {
+ return None;
+ }
+
+ let costs_by_partitions = candidates
+ .iter()
+ .map(|(name, model)| {
+ (
+ name,
+ self.0
+ .iter()
+ .map(|p| p.estimate_cost(&model))
+ .collect::<Vec<_>>(),
+ )
+ })
+ .collect::<Vec<(_, _)>>();
+
+ let top_by_partition = (0..self.0.len())
+ .map(|i| {
+ costs_by_partitions.iter().fold(
+ ("".to_string(), f64::MAX),
+ |acc @ (_, val), (name, c)| {
+ if val < c[i] {
+ acc
+ } else {
+ (name.to_string(), c[i])
+ }
+ },
+ )
+ })
+ .collect::<Vec<_>>();
+
+ let split_idx = top_by_partition
+ .iter()
+ .enumerate()
+ // TODO: fudge?
+ .find(|(idx, (best, _))| *idx > 0 && *best != top_by_partition[idx - 1].0)
+ .map(|(idx, _)| idx)?;
+
+ let split_is_proper = top_by_partition.iter().enumerate().all(|(i, (best, _))| {
+ if i >= split_idx {
+ *best == top_by_partition[split_idx].0
+ } else {
+ *best == top_by_partition[0].0
+ }
+ });
+
+ if !split_is_proper {
+ return None;
+ }
+
+ // calculate cost of switching
+ let before = &top_by_partition[0].0;
+ let after = &top_by_partition[split_idx].0;
+ let before_model = candidates.get(before).unwrap();
+ let after_model = candidates.get(after).unwrap();
+
+ let copy_n = self.0[split_idx].avg_n;
+ let switching_cost = after_model.by_op.get("insert")?.estimatef(copy_n)
+ + before_model.by_op.get("clear")?.estimatef(copy_n);
+
+ // see if it's "worth it"
+ let before_costs = &costs_by_partitions
+ .iter()
+ .find(|(name, _)| **name == before)
+ .unwrap()
+ .1;
+ let after_costs = &costs_by_partitions
+ .iter()
+ .find(|(name, _)| **name == after)
+ .unwrap()
+ .1;
+ let not_switching_cost = &before_costs[split_idx..].iter().sum::<f64>()
+ - &after_costs[split_idx..].iter().sum::<f64>();
+
+ if not_switching_cost < switching_cost {
+ None
+ } else {
+ Some((
+ ContainerSplitSpec {
+ before: before.to_string(),
+ threshold: copy_n as usize,
+ after: after.to_string(),
+ },
+ top_by_partition.iter().map(|(_, v)| v).sum(),
+ ))
+ }
+ }
+
/// Estimate the cost of using the implementation with the given cost model
pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 {
self.0
diff --git a/src/crates/candelabra/src/profiler/mod.rs b/src/crates/candelabra/src/profiler/mod.rs
index 568929b..83c7954 100644
--- a/src/crates/candelabra/src/profiler/mod.rs
+++ b/src/crates/candelabra/src/profiler/mod.rs
@@ -22,7 +22,7 @@ use crate::cost::benchmark::tee_output;
use crate::project::Project;
use crate::{Paths, State};
-pub use self::info::{ProfilerInfo, ProfilerPartition};
+pub use self::info::{ContainerSplitSpec, ProfilerInfo, ProfilerPartition};
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct CacheEntry {
diff --git a/src/crates/candelabra/src/select.rs b/src/crates/candelabra/src/select.rs
index 1146040..a8653a4 100644
--- a/src/crates/candelabra/src/select.rs
+++ b/src/crates/candelabra/src/select.rs
@@ -3,14 +3,21 @@ use std::collections::HashMap;
use crate::{
candidates::{ConTypeName, ImplName},
cost::Cost,
+ profiler::ContainerSplitSpec,
Project, State,
};
use anyhow::Result;
+#[derive(Clone, Debug)]
+pub enum Selection {
+ Singular(ImplName),
+ Split(ContainerSplitSpec),
+}
+
impl State {
/// Select a container implementation for each container type in the given project
- pub fn select(&self, project: &Project) -> Result<HashMap<ConTypeName, ImplName>> {
+ pub fn select(&self, project: &Project) -> Result<HashMap<ConTypeName, Selection>> {
Ok(self
.rank_candidates(project)?
.into_iter()
@@ -31,22 +38,36 @@ impl State {
pub fn rank_candidates(
&self,
project: &Project,
- ) -> Result<HashMap<ConTypeName, Vec<(ImplName, Cost)>>> {
+ ) -> Result<HashMap<ConTypeName, Vec<(Selection, Cost)>>> {
// get all candidates
let all_candidates = self.project_candidate_list(project)?;
// get profiling information
- let profiles = self.profiler_info(project)?;
+ let mut profiles = self.profiler_info(project)?;
let mut acc = HashMap::new();
let con_type_names = all_candidates.iter().flat_map(|(_, cs)| cs.iter());
for (con_type_name, candidates) in con_type_names {
let mut costs = vec![];
- let profile_info = profiles.get(con_type_name).unwrap();
+ let profile_info = profiles.get_mut(con_type_name).unwrap();
+
+ let cost_models = candidates
+ .iter()
+ .map(|name| Ok((name, self.cost_model(name)?)))
+ .collect::<Result<HashMap<_, _>>>()?;
+
for candidate in candidates {
let model = self.cost_model(candidate)?;
- costs.push((candidate.clone(), profile_info.estimate_cost(&model)));
+ costs.push((
+ Selection::Singular(candidate.clone()),
+ profile_info.estimate_cost(&model),
+ ));
}
+
+ if let Some((split, cost)) = profile_info.check_for_nsplit(&cost_models) {
+ costs.push((Selection::Split(split), cost));
+ }
+
acc.insert(con_type_name.to_string(), costs);
}