aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAria Shrimpton <me@aria.rip>2024-01-31 17:32:10 +0000
committerAria Shrimpton <me@aria.rip>2024-01-31 17:32:10 +0000
commiteafe2080e9e825649bd84edba9647df0e811af99 (patch)
tree47307bb0b8ae282578c48741e87c204c440c7082 /src
parent4c96b7fd7a6ec2f6a0d1d0b61c005c0e1b08c2a0 (diff)
add partitioning of collection lifetimes
Diffstat (limited to 'src')
-rw-r--r--src/crates/candelabra/src/profiler.rs356
-rw-r--r--src/crates/cli/src/display.rs (renamed from src/crates/cli/src/util.rs)15
-rw-r--r--src/crates/cli/src/estimate.rs2
-rw-r--r--src/crates/cli/src/main.rs2
-rw-r--r--src/crates/cli/src/profile.rs12
-rw-r--r--src/crates/library/src/profiler.rs10
6 files changed, 327 insertions, 70 deletions
diff --git a/src/crates/candelabra/src/profiler.rs b/src/crates/candelabra/src/profiler.rs
index 8291b33..4677bbc 100644
--- a/src/crates/candelabra/src/profiler.rs
+++ b/src/crates/candelabra/src/profiler.rs
@@ -1,6 +1,6 @@
//! Profiling applications for info about container usage
-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{anyhow, Context, Result};
use camino::{Utf8Path, Utf8PathBuf};
use log::{debug, log_enabled, trace, warn, Level};
use primrose::ContainerSelector;
@@ -17,8 +17,8 @@ use tempfile::tempdir;
use crate::cache::{gen_tree_hash, FileCache};
use crate::candidates::ConTypeName;
-use crate::cost::benchmark::OpName;
-use crate::cost::{Cost, CostModel};
+use crate::cost::benchmark::{tee_output, OpName};
+use crate::cost::{Cost, CostModel, Estimator};
use crate::project::Project;
use crate::{Paths, State};
@@ -30,64 +30,77 @@ pub(crate) struct CacheEntry {
}
/// The information we get from profiling.
+/// Rather than keeping all results, we split them into 'similar enough' partitions,
+/// with the idea that each partition will probably have the same best implementation.
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
-pub struct ProfilerInfo(pub Vec<CollectionLifetime>);
+pub struct ProfilerInfo(pub Vec<ProfilerPartition>);
+
+/// A vector of container lifetimes which have similar characteristics
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct ProfilerPartition {
+ pub occurences: f64,
+ pub avg_n: f64,
+ pub avg_op_counts: HashMap<OpName, f64>,
+}
/// Breakdown of a cost value
pub type CostBreakdown<'a> = HashMap<&'a OpName, Cost>;
-/// Profiler info collected from the lifetime of a single collection instance
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct CollectionLifetime {
- pub n: usize,
- pub op_counts: HashMap<OpName, usize>,
-}
-
impl ProfilerInfo {
pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 {
- let sum: f64 = self.0.iter().map(|cl| cl.estimate_cost(cost_model)).sum();
-
- sum / self.0.len() as f64
+ self.0
+ .iter()
+ .map(|cl| cl.estimate_cost(cost_model))
+ .sum::<f64>()
}
pub fn cost_breakdown<'a>(&self, cost_model: &'a CostModel) -> CostBreakdown<'a> {
cost_model
.by_op
.iter()
- .map(|(op_name, estimator)| {
+ .map(|(op, estimator)| {
(
- op_name,
+ op,
self.0
.iter()
- .map(|cl| estimator.estimatef(cl.avg_n()) * cl.op_count(op_name) as f64)
- .sum::<f64>()
- / self.0.len() as f64,
+ .map(|cl| cl.op_cost(op, estimator))
+ .sum::<f64>(),
)
})
.collect()
}
}
-impl CollectionLifetime {
- pub fn avg_n(&self) -> f64 {
- self.n as f64 / (self.op_counts.values().sum::<usize>() as f64)
- }
-
- pub fn op_count(&self, op: &str) -> usize {
+impl ProfilerPartition {
+ pub fn avg_op_count(&self, op: &str) -> f64 {
*self
- .op_counts
+ .avg_op_counts
.get(op)
.expect("invalid op passed to op_count")
}
pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 {
- let avg_n = self.avg_n();
- let mut acc = 0.0;
- for (op, estimator) in cost_model.by_op.iter() {
- acc += estimator.estimatef(avg_n) * self.op_count(op) as f64;
- }
+ cost_model
+ .by_op
+ .iter()
+ .map(|(op, estimator)| self.op_cost(op, estimator))
+ .sum::<f64>()
+ }
+
+ pub fn op_cost(&self, op: &str, estimator: &Estimator) -> f64 {
+ estimator.estimatef(self.avg_n) * self.avg_op_count(op) * self.occurences
+ }
- acc
+ fn add_lifetime(&mut self, (n, ops): (f64, HashMap<String, usize>)) {
+ self.avg_n = self.avg_n + (n - self.avg_n) / (self.occurences + 1.0);
+ for (op, count) in ops {
+ let count = count as f64;
+ self.avg_op_counts
+ .entry(op)
+ .and_modify(|avg| *avg = *avg + (count - *avg) / (self.occurences + 1.0))
+ .or_insert(count);
+ }
+ self.occurences += 1.0;
}
}
@@ -217,25 +230,24 @@ impl State {
name, profiler_out_dir
);
- let mut command = Command::new("cargo");
- command
+ let child = Command::new("cargo")
.current_dir(&project.source_dir)
.args(["bench", "--bench", name])
- .env("PROFILER_OUT_DIR", profiler_out_dir.as_ref()); // Where profiler info gets outputted
-
- if log_enabled!(Level::Debug) {
- command.stderr(Stdio::inherit()).stdout(Stdio::inherit());
- } else {
- command.stderr(Stdio::null()).stdout(Stdio::null());
- };
- let output = command.output()?;
+ .env("PROFILER_OUT_DIR", profiler_out_dir.as_ref()) // Where profiler info gets outputted
+ .stdout(Stdio::piped())
+ .stderr(if log_enabled!(Level::Debug) {
+ Stdio::inherit()
+ } else {
+ Stdio::null()
+ })
+ .spawn()
+ .context("Error running bench command")?;
- if !output.status.success() {
- bail!("Error running benchmark");
- }
+ tee_output(child)?;
let mut con_type_results = HashMap::new();
for dir in read_dir(&profiler_out_dir)? {
+ // each directory has an index, corresponding to the container type name
let dir = dir?;
let con_type: String = con_types[dir
.file_name()
@@ -244,24 +256,63 @@ impl State {
.parse::<usize>()
.unwrap()]
.to_string();
- let mut acc = Vec::default();
- for file in read_dir(dir.path())? {
- let file = file?;
- let mut contents = String::new();
- File::open(file.path())?.read_to_string(&mut contents)?;
-
- acc.push(parse_output(&contents)?);
- }
- con_type_results.insert(con_type, ProfilerInfo(acc));
+ let partitions = read_dir(dir.path())?
+ .map(|f| -> Result<String> {
+ // read file contents
+ let mut contents = String::new();
+ File::open(f?.path())?.read_to_string(&mut contents)?;
+ Ok(contents)
+ })
+ .map(|contents| parse_output(&contents?))
+ .fold(Ok(vec![]), partition_costs)?;
+
+ con_type_results.insert(con_type, ProfilerInfo(partitions));
}
Ok(con_type_results)
}
}
+type CollectionLifetime = (f64, HashMap<OpName, usize>);
+
+/// Attempt to compress an iterator of collection lifetimes into as few partitions as possible
+fn partition_costs(
+ acc: Result<Vec<ProfilerPartition>>,
+ cl: Result<CollectionLifetime>,
+) -> Result<Vec<ProfilerPartition>> {
+ // error short circuiting
+ let (mut acc, (n, ops)) = (acc?, cl?);
+
+ // attempt to find a partition with a close enough n value
+ let (closest_idx, closest_delta) =
+ acc.iter()
+ .enumerate()
+ .fold((0, f64::MAX), |acc @ (_, val), (idx, partition)| {
+ let delta = (partition.avg_n - n).abs();
+ if delta < val {
+ (idx, delta)
+ } else {
+ acc
+ }
+ });
+
+ if closest_delta < 100.0 {
+ acc[closest_idx].add_lifetime((n, ops));
+ } else {
+ // add a new partition
+ acc.push(ProfilerPartition {
+ occurences: 1.0,
+ avg_n: n,
+ avg_op_counts: ops.into_iter().map(|(k, v)| (k, v as f64)).collect(),
+ })
+ }
+
+ Ok(acc)
+}
+
/// Parse the output of the profiler
-fn parse_output(contents: &str) -> Result<CollectionLifetime> {
+fn parse_output(contents: &str) -> Result<(f64, HashMap<OpName, usize>)> {
let mut lines = contents.lines().map(usize::from_str);
let missing_line_err = || anyhow!("wrong number of lines in ");
let n = lines.next().ok_or_else(missing_line_err)??;
@@ -307,5 +358,198 @@ fn parse_output(contents: &str) -> Result<CollectionLifetime> {
lines.next().ok_or_else(missing_line_err)??,
);
- Ok(CollectionLifetime { n, op_counts })
+ Ok((n as f64, op_counts))
+}
+
+#[cfg(test)]
+mod tests {
+ use std::collections::HashMap;
+
+ use crate::{
+ cost::{CostModel, Estimator},
+ profiler::partition_costs,
+ };
+
+ use super::{ProfilerInfo, ProfilerPartition};
+
+ const EPSILON: f64 = 1e-5;
+ fn assert_feq(left: f64, right: f64, msg: &'static str) {
+ assert!((left - right).abs() < EPSILON, "{}", msg);
+ }
+
+ fn linear_estimator() -> Estimator {
+ Estimator {
+ coeffs: [0.0, 1.0, 0.0, 0.0],
+ transform_x: (0.0, 1.0),
+ transform_y: (0.0, 1.0),
+ }
+ }
+
+ #[test]
+ fn test_cost_single_partition() {
+ let info = ProfilerInfo(vec![ProfilerPartition {
+ occurences: 1.0,
+ avg_n: 100.0,
+ avg_op_counts: {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), 100.0);
+ map
+ },
+ }]);
+
+ let model = CostModel {
+ by_op: {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), linear_estimator());
+ map
+ },
+ };
+
+ let cost = dbg!(info.estimate_cost(&model));
+ assert_feq(cost, 10_000.0, "per op = 100 * 100 ops");
+ }
+
+ #[test]
+ fn test_cost_multi_partitions_sums() {
+ let info = ProfilerInfo(vec![
+ ProfilerPartition {
+ occurences: 1.0,
+ avg_n: 100.0,
+ avg_op_counts: {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), 100.0);
+ map
+ },
+ },
+ ProfilerPartition {
+ occurences: 1.0,
+ avg_n: 10.0,
+ avg_op_counts: {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), 10.0);
+ map
+ },
+ },
+ ]);
+
+ let model = CostModel {
+ by_op: {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), linear_estimator());
+ map
+ },
+ };
+
+ let cost = dbg!(info.estimate_cost(&model));
+ assert_feq(cost, 10_100.0, "100ns/op * 100 ops + 10ns/op * 10 ops");
+ }
+
+ #[test]
+ fn test_cost_multi_partitions_sums_weighted() {
+ let info = ProfilerInfo(vec![
+ ProfilerPartition {
+ occurences: 2.0,
+ avg_n: 100.0,
+ avg_op_counts: {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), 100.0);
+ map
+ },
+ },
+ ProfilerPartition {
+ occurences: 1.0,
+ avg_n: 10.0,
+ avg_op_counts: {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), 10.0);
+ map
+ },
+ },
+ ]);
+
+ let model = CostModel {
+ by_op: {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), linear_estimator());
+ map
+ },
+ };
+
+ let cost = dbg!(info.estimate_cost(&model));
+ assert_feq(cost, 20_100.0, "100ns/op * 100 ops * 2 + 10ns/op * 10 ops");
+ }
+
+ #[test]
+ fn test_partition_costs_merges_duplicates() {
+ let cl = (100.0, {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), 10);
+ map
+ });
+ let outp = vec![Ok(cl.clone()), Ok(cl)]
+ .into_iter()
+ .fold(Ok(vec![]), partition_costs)
+ .unwrap();
+
+ assert_eq!(outp.len(), 1, "merged duplicates");
+ assert_eq!(outp[0].occurences, 2.0, "weight updated");
+ assert_feq(outp[0].avg_n, 100.0, "average n correct");
+ assert_feq(
+ *outp[0].avg_op_counts.get("insert").unwrap(),
+ 10.0,
+ "average n correct",
+ );
+ }
+
+ #[test]
+ fn test_partition_costs_merges_close() {
+ let outp = vec![
+ Ok((100.0, {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), 50);
+ map
+ })),
+ Ok((110.0, {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), 100);
+ map
+ })),
+ ]
+ .into_iter()
+ .fold(Ok(vec![]), partition_costs)
+ .unwrap();
+
+ assert_eq!(outp.len(), 1, "merged duplicates");
+ assert_eq!(outp[0].occurences, 2.0, "weight updated");
+ assert_feq(outp[0].avg_n, 105.0, "average n correct");
+ assert_feq(
+ *outp[0].avg_op_counts.get("insert").unwrap(),
+ 75.0,
+ "average n correct",
+ );
+ }
+ #[test]
+ fn test_partition_costs_keeps_separate() {
+ let outp = vec![
+ Ok((100.0, {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), 10);
+ map
+ })),
+ Ok((999999.0, {
+ let mut map = HashMap::new();
+ map.insert("insert".to_string(), 10);
+ map
+ })),
+ ]
+ .into_iter()
+ .fold(Ok(vec![]), partition_costs)
+ .unwrap();
+
+ assert_eq!(
+ outp.len(),
+ 2,
+ "large difference in n values causes partition"
+ );
+ }
}
diff --git a/src/crates/cli/src/util.rs b/src/crates/cli/src/display.rs
index 499f4ba..2ce9039 100644
--- a/src/crates/cli/src/util.rs
+++ b/src/crates/cli/src/display.rs
@@ -1,5 +1,6 @@
use std::{collections::HashSet, fmt::Display, hash::Hash, iter::once};
+use candelabra::profiler::ProfilerInfo;
use tabled::{builder::Builder, settings::Style};
// Print the given 2D map as a table, where the first key is the left-most column, and the second key the column index
@@ -40,3 +41,17 @@ where
println!("{}", builder.build().with(Style::sharp()));
}
+
+pub fn display_profiler_info(profile_info: ProfilerInfo) {
+ print_table(profile_info.0.into_iter().enumerate().map(|(i, p)| {
+ (
+ i,
+ [
+ ("n".to_string(), p.avg_n),
+ ("occurences".to_string(), p.occurences),
+ ]
+ .into_iter()
+ .chain(p.avg_op_counts.into_iter()),
+ )
+ }))
+}
diff --git a/src/crates/cli/src/estimate.rs b/src/crates/cli/src/estimate.rs
index 6915afd..6f4716f 100644
--- a/src/crates/cli/src/estimate.rs
+++ b/src/crates/cli/src/estimate.rs
@@ -4,7 +4,7 @@ use anyhow::{anyhow, bail, Result};
use argh::FromArgs;
use log::info;
-use crate::{util::print_table, State};
+use crate::{display::print_table, State};
/// Estimate the cost of a given set of assignments for a specific project, and detail how that was reached.
#[derive(FromArgs)]
diff --git a/src/crates/cli/src/main.rs b/src/crates/cli/src/main.rs
index 81d484d..04bcc96 100644
--- a/src/crates/cli/src/main.rs
+++ b/src/crates/cli/src/main.rs
@@ -4,12 +4,12 @@ use candelabra::{Paths, Project};
use log::info;
mod candidates;
+mod display;
mod estimate;
mod library;
mod model;
mod profile;
mod select;
-mod util;
#[derive(FromArgs)]
/// Find the best performing container type using primrose
diff --git a/src/crates/cli/src/profile.rs b/src/crates/cli/src/profile.rs
index 0971771..efb1571 100644
--- a/src/crates/cli/src/profile.rs
+++ b/src/crates/cli/src/profile.rs
@@ -2,7 +2,7 @@ use anyhow::Result;
use argh::FromArgs;
use log::info;
-use crate::{util::print_table, State};
+use crate::{display::display_profiler_info, State};
/// Profile the selected projects and print the results
#[derive(FromArgs)]
@@ -16,12 +16,10 @@ impl State {
let all_info = self.inner.profiler_info(proj)?;
- print_table(all_info.iter().map(|(con_type_name, profile_info)| {
- (
- con_type_name,
- profile_info.0.iter().flat_map(|cl| cl.op_counts.iter()),
- )
- }));
+ for (con_type_name, profile_info) in all_info {
+ info!("{}:", con_type_name);
+ display_profiler_info(profile_info);
+ }
}
Ok(())
}
diff --git a/src/crates/library/src/profiler.rs b/src/crates/library/src/profiler.rs
index ba8c357..585d745 100644
--- a/src/crates/library/src/profiler.rs
+++ b/src/crates/library/src/profiler.rs
@@ -10,7 +10,7 @@ use crate::traits::{Container, Indexable, Mapping, Stack};
pub struct ProfilerWrapper<const ID: usize, T, E> {
inner: T,
- sum_ns: usize,
+ max_n: usize,
n_contains: usize,
n_insert: usize,
n_clear: usize,
@@ -28,7 +28,7 @@ impl<const ID: usize, T: Default, E> Default for ProfilerWrapper<ID, T, E> {
fn default() -> Self {
Self {
inner: T::default(),
- sum_ns: 0,
+ max_n: 0,
n_contains: 0,
n_insert: 0,
n_clear: 0,
@@ -46,13 +46,13 @@ impl<const ID: usize, T: Default, E> Default for ProfilerWrapper<ID, T, E> {
impl<const ID: usize, T: Container<E>, E> ProfilerWrapper<ID, T, E> {
fn add_n(&mut self) {
- self.sum_ns += self.inner.len();
+ self.max_n = self.max_n.max(self.inner.len());
}
}
impl<const ID: usize, T: Mapping<K, V>, K, V> ProfilerWrapper<ID, T, (K, V)> {
fn add_n_map(&mut self) {
- self.sum_ns += self.inner.len();
+ self.max_n = self.max_n.max(self.inner.len());
}
}
@@ -175,7 +175,7 @@ impl<const ID: usize, T, E> Drop for ProfilerWrapper<ID, T, E> {
}
let mut f = File::create(format!("{}/{}", dir, unix_time.as_nanos())).unwrap();
- writeln!(f, "{}", self.sum_ns).unwrap();
+ writeln!(f, "{}", self.max_n).unwrap();
writeln!(f, "{}", self.n_contains).unwrap();
writeln!(f, "{}", self.n_insert).unwrap();
writeln!(f, "{}", self.n_clear).unwrap();