From 6c8407043120f3855dd0229c0f838041c7f0eb38 Mon Sep 17 00:00:00 2001 From: Aria Shrimpton Date: Wed, 31 Jan 2024 17:43:05 +0000 Subject: lints & refactors --- src/crates/candelabra/src/profiler.rs | 555 ----------------------------- src/crates/candelabra/src/profiler/info.rs | 372 +++++++++++++++++++ src/crates/candelabra/src/profiler/mod.rs | 200 +++++++++++ src/crates/cli/src/display.rs | 2 +- 4 files changed, 573 insertions(+), 556 deletions(-) delete mode 100644 src/crates/candelabra/src/profiler.rs create mode 100644 src/crates/candelabra/src/profiler/info.rs create mode 100644 src/crates/candelabra/src/profiler/mod.rs (limited to 'src/crates') diff --git a/src/crates/candelabra/src/profiler.rs b/src/crates/candelabra/src/profiler.rs deleted file mode 100644 index 4677bbc..0000000 --- a/src/crates/candelabra/src/profiler.rs +++ /dev/null @@ -1,555 +0,0 @@ -//! Profiling applications for info about container usage - -use anyhow::{anyhow, Context, Result}; -use camino::{Utf8Path, Utf8PathBuf}; -use log::{debug, log_enabled, trace, warn, Level}; -use primrose::ContainerSelector; -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::io::Write; -use std::str::FromStr; -use std::{ - fs::{read_dir, File}, - io::Read, - process::{Command, Stdio}, -}; -use tempfile::tempdir; - -use crate::cache::{gen_tree_hash, FileCache}; -use crate::candidates::ConTypeName; -use crate::cost::benchmark::{tee_output, OpName}; -use crate::cost::{Cost, CostModel, Estimator}; -use crate::project::Project; -use crate::{Paths, State}; - -#[derive(Debug, Serialize, Deserialize)] -pub(crate) struct CacheEntry { - proj_hash: u64, - proj_location: Utf8PathBuf, - info: HashMap, -} - -/// The information we get from profiling. -/// Rather than keeping all results, we split them into 'similar enough' partitions, -/// with the idea that each partition will probably have the same best implementation. -#[derive(Clone, Debug, Default, Serialize, Deserialize)] -pub struct ProfilerInfo(pub Vec); - -/// A vector of container lifetimes which have similar characteristics -#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] -pub struct ProfilerPartition { - pub occurences: f64, - pub avg_n: f64, - pub avg_op_counts: HashMap, -} - -/// Breakdown of a cost value -pub type CostBreakdown<'a> = HashMap<&'a OpName, Cost>; - -impl ProfilerInfo { - pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 { - self.0 - .iter() - .map(|cl| cl.estimate_cost(cost_model)) - .sum::() - } - - pub fn cost_breakdown<'a>(&self, cost_model: &'a CostModel) -> CostBreakdown<'a> { - cost_model - .by_op - .iter() - .map(|(op, estimator)| { - ( - op, - self.0 - .iter() - .map(|cl| cl.op_cost(op, estimator)) - .sum::(), - ) - }) - .collect() - } -} - -impl ProfilerPartition { - pub fn avg_op_count(&self, op: &str) -> f64 { - *self - .avg_op_counts - .get(op) - .expect("invalid op passed to op_count") - } - - pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 { - cost_model - .by_op - .iter() - .map(|(op, estimator)| self.op_cost(op, estimator)) - .sum::() - } - - pub fn op_cost(&self, op: &str, estimator: &Estimator) -> f64 { - estimator.estimatef(self.avg_n) * self.avg_op_count(op) * self.occurences - } - - fn add_lifetime(&mut self, (n, ops): (f64, HashMap)) { - self.avg_n = self.avg_n + (n - self.avg_n) / (self.occurences + 1.0); - for (op, count) in ops { - let count = count as f64; - self.avg_op_counts - .entry(op) - .and_modify(|avg| *avg = *avg + (count - *avg) / (self.occurences + 1.0)) - .or_insert(count); - } - self.occurences += 1.0; - } -} - -impl State { - pub(crate) fn profiler_info_cache(paths: &Paths) -> Result> { - FileCache::new( - paths.target_dir.join("candelabra").join("profiler_info"), - |_, v: &CacheEntry| { - let proj_hash = gen_tree_hash(&v.proj_location).unwrap_or(0); - v.proj_hash == proj_hash - }, - ) - } - - /// Get or calculate profiler info for the given project. - /// Results are cached by the modification time of the project's source tree - pub fn profiler_info(&self, project: &Project) -> Result> { - match self.profiler_info_cache.find(&project.name)? { - Some(x) => Ok(x.info), - None => { - let info = self.calc_profiler_info(project)?; - - let proj_hash = gen_tree_hash(&project.source_dir) - .context("Error generating project directory hash")?; - if let Err(e) = self.profiler_info_cache.put( - &project.name, - &CacheEntry { - proj_hash, - proj_location: project.source_dir.clone(), - info: info.clone(), - }, - ) { - warn!("Error caching profiler info for {}: {}", &project.name, e); - } - - Ok(info) - } - } - } - - /// Calculate profiler info for the given project. - fn calc_profiler_info(&self, project: &Project) -> Result> { - let candidate_list = self.project_candidate_list(project)?; - let con_types = candidate_list - .iter() - .flat_map(|(_, con_types)| con_types.iter()) - .map(|(id, _)| id) - .collect::>(); - - self.project_profiling_prep(project, &con_types)?; - let mut acc = HashMap::new(); - for name in project.benchmarks.iter() { - for (con_type, new_results) in self - .profile_benchmark(project, name, &con_types) - .with_context(|| format!("Error profiling benchmark {}", name))? - { - acc.entry(con_type) - .and_modify(|pi: &mut ProfilerInfo| pi.0.extend(new_results.0.iter().cloned())) - .or_insert(new_results); - } - } - - Ok(acc) - } - - /// Prepare the given project to be profiled, by replacing all candidate types with the profiler wrapper. - fn project_profiling_prep(&self, project: &Project, con_types: &[&String]) -> Result<()> { - for (file, candidates) in self.project_candidate_list(project)? { - self.file_profiling_prep(&file, &candidates, con_types) - .with_context(|| format!("error preparing {} for profiling", file))?; - } - - Ok(()) - } - - /// Prepare the given file to be profiled, by replacing all candidate types with the profiler wrapper. - fn file_profiling_prep( - &self, - file: &Utf8Path, - candidates: &[(String, Vec)], - con_types: &[&String], - ) -> Result<()> { - debug!("Setting up {} for profiling", file); - - let selector = ContainerSelector::from_path( - file.as_std_path(), - self.paths.library_src.as_std_path(), - self.model_size, - ) - .context("error creating container selector")?; - - let chosen = candidates - .iter() - .map(|(dest_name, impls)| (dest_name, &impls[0])) - .collect::>(); - - let new_code = selector.gen_profiling_file(chosen.iter().map(|(d, c)| { - ( - *d, - con_types.iter().position(|id| id == d).unwrap(), - c.as_str(), - ) - })); - - let new_path = file.to_string().replace(".pr", ""); - - trace!("New code: {}", new_code); - trace!("New path: {}", new_path); - - let mut f = File::create(new_path).context("error creating new source file")?; - f.write_all(new_code.as_bytes()) - .context("error writing new code")?; - - Ok(()) - } - - /// Run the given benchmark on the project, and parse the resulting profiling information. - fn profile_benchmark( - &self, - project: &Project, - name: &str, - con_types: &[&String], - ) -> Result> { - let profiler_out_dir = tempdir()?; - debug!( - "Running benchmark {} with out dir {:?}", - name, profiler_out_dir - ); - - let child = Command::new("cargo") - .current_dir(&project.source_dir) - .args(["bench", "--bench", name]) - .env("PROFILER_OUT_DIR", profiler_out_dir.as_ref()) // Where profiler info gets outputted - .stdout(Stdio::piped()) - .stderr(if log_enabled!(Level::Debug) { - Stdio::inherit() - } else { - Stdio::null() - }) - .spawn() - .context("Error running bench command")?; - - tee_output(child)?; - - let mut con_type_results = HashMap::new(); - for dir in read_dir(&profiler_out_dir)? { - // each directory has an index, corresponding to the container type name - let dir = dir?; - let con_type: String = con_types[dir - .file_name() - .into_string() - .unwrap() - .parse::() - .unwrap()] - .to_string(); - - let partitions = read_dir(dir.path())? - .map(|f| -> Result { - // read file contents - let mut contents = String::new(); - File::open(f?.path())?.read_to_string(&mut contents)?; - Ok(contents) - }) - .map(|contents| parse_output(&contents?)) - .fold(Ok(vec![]), partition_costs)?; - - con_type_results.insert(con_type, ProfilerInfo(partitions)); - } - - Ok(con_type_results) - } -} - -type CollectionLifetime = (f64, HashMap); - -/// Attempt to compress an iterator of collection lifetimes into as few partitions as possible -fn partition_costs( - acc: Result>, - cl: Result, -) -> Result> { - // error short circuiting - let (mut acc, (n, ops)) = (acc?, cl?); - - // attempt to find a partition with a close enough n value - let (closest_idx, closest_delta) = - acc.iter() - .enumerate() - .fold((0, f64::MAX), |acc @ (_, val), (idx, partition)| { - let delta = (partition.avg_n - n).abs(); - if delta < val { - (idx, delta) - } else { - acc - } - }); - - if closest_delta < 100.0 { - acc[closest_idx].add_lifetime((n, ops)); - } else { - // add a new partition - acc.push(ProfilerPartition { - occurences: 1.0, - avg_n: n, - avg_op_counts: ops.into_iter().map(|(k, v)| (k, v as f64)).collect(), - }) - } - - Ok(acc) -} - -/// Parse the output of the profiler -fn parse_output(contents: &str) -> Result<(f64, HashMap)> { - let mut lines = contents.lines().map(usize::from_str); - let missing_line_err = || anyhow!("wrong number of lines in "); - let n = lines.next().ok_or_else(missing_line_err)??; - let mut op_counts = HashMap::new(); - op_counts.insert( - "contains".to_string(), - lines.next().ok_or_else(missing_line_err)??, - ); - op_counts.insert( - "insert".to_string(), - lines.next().ok_or_else(missing_line_err)??, - ); - op_counts.insert( - "clear".to_string(), - lines.next().ok_or_else(missing_line_err)??, - ); - op_counts.insert( - "remove".to_string(), - lines.next().ok_or_else(missing_line_err)??, - ); - op_counts.insert( - "first".to_string(), - lines.next().ok_or_else(missing_line_err)??, - ); - op_counts.insert( - "last".to_string(), - lines.next().ok_or_else(missing_line_err)??, - ); - op_counts.insert( - "nth".to_string(), - lines.next().ok_or_else(missing_line_err)??, - ); - op_counts.insert( - "push".to_string(), - lines.next().ok_or_else(missing_line_err)??, - ); - op_counts.insert( - "pop".to_string(), - lines.next().ok_or_else(missing_line_err)??, - ); - op_counts.insert( - "get".to_string(), - lines.next().ok_or_else(missing_line_err)??, - ); - - Ok((n as f64, op_counts)) -} - -#[cfg(test)] -mod tests { - use std::collections::HashMap; - - use crate::{ - cost::{CostModel, Estimator}, - profiler::partition_costs, - }; - - use super::{ProfilerInfo, ProfilerPartition}; - - const EPSILON: f64 = 1e-5; - fn assert_feq(left: f64, right: f64, msg: &'static str) { - assert!((left - right).abs() < EPSILON, "{}", msg); - } - - fn linear_estimator() -> Estimator { - Estimator { - coeffs: [0.0, 1.0, 0.0, 0.0], - transform_x: (0.0, 1.0), - transform_y: (0.0, 1.0), - } - } - - #[test] - fn test_cost_single_partition() { - let info = ProfilerInfo(vec![ProfilerPartition { - occurences: 1.0, - avg_n: 100.0, - avg_op_counts: { - let mut map = HashMap::new(); - map.insert("insert".to_string(), 100.0); - map - }, - }]); - - let model = CostModel { - by_op: { - let mut map = HashMap::new(); - map.insert("insert".to_string(), linear_estimator()); - map - }, - }; - - let cost = dbg!(info.estimate_cost(&model)); - assert_feq(cost, 10_000.0, "per op = 100 * 100 ops"); - } - - #[test] - fn test_cost_multi_partitions_sums() { - let info = ProfilerInfo(vec![ - ProfilerPartition { - occurences: 1.0, - avg_n: 100.0, - avg_op_counts: { - let mut map = HashMap::new(); - map.insert("insert".to_string(), 100.0); - map - }, - }, - ProfilerPartition { - occurences: 1.0, - avg_n: 10.0, - avg_op_counts: { - let mut map = HashMap::new(); - map.insert("insert".to_string(), 10.0); - map - }, - }, - ]); - - let model = CostModel { - by_op: { - let mut map = HashMap::new(); - map.insert("insert".to_string(), linear_estimator()); - map - }, - }; - - let cost = dbg!(info.estimate_cost(&model)); - assert_feq(cost, 10_100.0, "100ns/op * 100 ops + 10ns/op * 10 ops"); - } - - #[test] - fn test_cost_multi_partitions_sums_weighted() { - let info = ProfilerInfo(vec![ - ProfilerPartition { - occurences: 2.0, - avg_n: 100.0, - avg_op_counts: { - let mut map = HashMap::new(); - map.insert("insert".to_string(), 100.0); - map - }, - }, - ProfilerPartition { - occurences: 1.0, - avg_n: 10.0, - avg_op_counts: { - let mut map = HashMap::new(); - map.insert("insert".to_string(), 10.0); - map - }, - }, - ]); - - let model = CostModel { - by_op: { - let mut map = HashMap::new(); - map.insert("insert".to_string(), linear_estimator()); - map - }, - }; - - let cost = dbg!(info.estimate_cost(&model)); - assert_feq(cost, 20_100.0, "100ns/op * 100 ops * 2 + 10ns/op * 10 ops"); - } - - #[test] - fn test_partition_costs_merges_duplicates() { - let cl = (100.0, { - let mut map = HashMap::new(); - map.insert("insert".to_string(), 10); - map - }); - let outp = vec![Ok(cl.clone()), Ok(cl)] - .into_iter() - .fold(Ok(vec![]), partition_costs) - .unwrap(); - - assert_eq!(outp.len(), 1, "merged duplicates"); - assert_eq!(outp[0].occurences, 2.0, "weight updated"); - assert_feq(outp[0].avg_n, 100.0, "average n correct"); - assert_feq( - *outp[0].avg_op_counts.get("insert").unwrap(), - 10.0, - "average n correct", - ); - } - - #[test] - fn test_partition_costs_merges_close() { - let outp = vec![ - Ok((100.0, { - let mut map = HashMap::new(); - map.insert("insert".to_string(), 50); - map - })), - Ok((110.0, { - let mut map = HashMap::new(); - map.insert("insert".to_string(), 100); - map - })), - ] - .into_iter() - .fold(Ok(vec![]), partition_costs) - .unwrap(); - - assert_eq!(outp.len(), 1, "merged duplicates"); - assert_eq!(outp[0].occurences, 2.0, "weight updated"); - assert_feq(outp[0].avg_n, 105.0, "average n correct"); - assert_feq( - *outp[0].avg_op_counts.get("insert").unwrap(), - 75.0, - "average n correct", - ); - } - #[test] - fn test_partition_costs_keeps_separate() { - let outp = vec![ - Ok((100.0, { - let mut map = HashMap::new(); - map.insert("insert".to_string(), 10); - map - })), - Ok((999999.0, { - let mut map = HashMap::new(); - map.insert("insert".to_string(), 10); - map - })), - ] - .into_iter() - .fold(Ok(vec![]), partition_costs) - .unwrap(); - - assert_eq!( - outp.len(), - 2, - "large difference in n values causes partition" - ); - } -} diff --git a/src/crates/candelabra/src/profiler/info.rs b/src/crates/candelabra/src/profiler/info.rs new file mode 100644 index 0000000..dc9a03c --- /dev/null +++ b/src/crates/candelabra/src/profiler/info.rs @@ -0,0 +1,372 @@ +use std::collections::HashMap; +use std::str::FromStr; + +use anyhow::{anyhow, Result}; +use serde::{Deserialize, Serialize}; + +use crate::cost::{benchmark::OpName, Cost, CostModel, Estimator}; + +/// The information we get from profiling. +/// Rather than keeping all results, we split them into 'similar enough' partitions, +/// with the idea that each partition will probably have the same best implementation. +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +pub struct ProfilerInfo(pub Vec); + +/// A vector of container lifetimes which have similar usage characteristics +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] +pub struct ProfilerPartition { + pub occurences: f64, + pub avg_n: f64, + pub avg_op_counts: HashMap, +} + +/// Lifetime of a single allocated collection. +type CollectionLifetime = (f64, HashMap); + +/// Breakdown of a cost value by operation +pub type CostBreakdown<'a> = HashMap<&'a OpName, Cost>; + +impl ProfilerInfo { + pub fn from(iter: impl Iterator>) -> Result { + Ok(Self( + iter.map(|contents| parse_output(&contents?)) + .fold(Ok(vec![]), partition_costs)?, + )) + } + + /// Estimate the cost of using the implementation with the given cost model + pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 { + self.0 + .iter() + .map(|cl| cl.estimate_cost(cost_model)) + .sum::() + } + + /// Get a breakdown of the cost by operation + pub fn cost_breakdown<'a>(&self, cost_model: &'a CostModel) -> CostBreakdown<'a> { + cost_model + .by_op + .iter() + .map(|(op, estimator)| { + ( + op, + self.0 + .iter() + .map(|cl| cl.op_cost(op, estimator)) + .sum::(), + ) + }) + .collect() + } +} + +impl ProfilerPartition { + pub fn avg_op_count(&self, op: &str) -> f64 { + *self + .avg_op_counts + .get(op) + .expect("invalid op passed to op_count") + } + + pub fn estimate_cost(&self, cost_model: &CostModel) -> f64 { + cost_model + .by_op + .iter() + .map(|(op, estimator)| self.op_cost(op, estimator)) + .sum::() + } + + pub fn op_cost(&self, op: &str, estimator: &Estimator) -> f64 { + estimator.estimatef(self.avg_n) * self.avg_op_count(op) * self.occurences + } + + fn add_lifetime(&mut self, (n, ops): (f64, HashMap)) { + self.avg_n = self.avg_n + (n - self.avg_n) / (self.occurences + 1.0); + for (op, count) in ops { + let count = count as f64; + self.avg_op_counts + .entry(op) + .and_modify(|avg| *avg = *avg + (count - *avg) / (self.occurences + 1.0)) + .or_insert(count); + } + self.occurences += 1.0; + } +} + +/// Attempt to compress an iterator of collection lifetimes into as few partitions as possible +fn partition_costs( + acc: Result>, + cl: Result, +) -> Result> { + // error short circuiting + let (mut acc, (n, ops)) = (acc?, cl?); + + // attempt to find a partition with a close enough n value + let (closest_idx, closest_delta) = + acc.iter() + .enumerate() + .fold((0, f64::MAX), |acc @ (_, val), (idx, partition)| { + let delta = (partition.avg_n - n).abs(); + if delta < val { + (idx, delta) + } else { + acc + } + }); + + if closest_delta < 100.0 { + acc[closest_idx].add_lifetime((n, ops)); + } else { + // add a new partition + acc.push(ProfilerPartition { + occurences: 1.0, + avg_n: n, + avg_op_counts: ops.into_iter().map(|(k, v)| (k, v as f64)).collect(), + }) + } + + Ok(acc) +} + +/// Parse the output of the profiler +fn parse_output(contents: &str) -> Result<(f64, HashMap)> { + let mut lines = contents.lines().map(usize::from_str); + let missing_line_err = || anyhow!("wrong number of lines in "); + let n = lines.next().ok_or_else(missing_line_err)??; + let mut op_counts = HashMap::new(); + op_counts.insert( + "contains".to_string(), + lines.next().ok_or_else(missing_line_err)??, + ); + op_counts.insert( + "insert".to_string(), + lines.next().ok_or_else(missing_line_err)??, + ); + op_counts.insert( + "clear".to_string(), + lines.next().ok_or_else(missing_line_err)??, + ); + op_counts.insert( + "remove".to_string(), + lines.next().ok_or_else(missing_line_err)??, + ); + op_counts.insert( + "first".to_string(), + lines.next().ok_or_else(missing_line_err)??, + ); + op_counts.insert( + "last".to_string(), + lines.next().ok_or_else(missing_line_err)??, + ); + op_counts.insert( + "nth".to_string(), + lines.next().ok_or_else(missing_line_err)??, + ); + op_counts.insert( + "push".to_string(), + lines.next().ok_or_else(missing_line_err)??, + ); + op_counts.insert( + "pop".to_string(), + lines.next().ok_or_else(missing_line_err)??, + ); + op_counts.insert( + "get".to_string(), + lines.next().ok_or_else(missing_line_err)??, + ); + + Ok((n as f64, op_counts)) +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use crate::{ + cost::{CostModel, Estimator}, + profiler::info::partition_costs, + }; + + use super::{ProfilerInfo, ProfilerPartition}; + + const EPSILON: f64 = 1e-5; + fn assert_feq(left: f64, right: f64, msg: &'static str) { + assert!((left - right).abs() < EPSILON, "{}", msg); + } + + fn linear_estimator() -> Estimator { + Estimator { + coeffs: [0.0, 1.0, 0.0, 0.0], + transform_x: (0.0, 1.0), + transform_y: (0.0, 1.0), + } + } + + #[test] + fn test_cost_single_partition() { + let info = ProfilerInfo(vec![ProfilerPartition { + occurences: 1.0, + avg_n: 100.0, + avg_op_counts: { + let mut map = HashMap::new(); + map.insert("insert".to_string(), 100.0); + map + }, + }]); + + let model = CostModel { + by_op: { + let mut map = HashMap::new(); + map.insert("insert".to_string(), linear_estimator()); + map + }, + }; + + let cost = dbg!(info.estimate_cost(&model)); + assert_feq(cost, 10_000.0, "per op = 100 * 100 ops"); + } + + #[test] + fn test_cost_multi_partitions_sums() { + let info = ProfilerInfo(vec![ + ProfilerPartition { + occurences: 1.0, + avg_n: 100.0, + avg_op_counts: { + let mut map = HashMap::new(); + map.insert("insert".to_string(), 100.0); + map + }, + }, + ProfilerPartition { + occurences: 1.0, + avg_n: 10.0, + avg_op_counts: { + let mut map = HashMap::new(); + map.insert("insert".to_string(), 10.0); + map + }, + }, + ]); + + let model = CostModel { + by_op: { + let mut map = HashMap::new(); + map.insert("insert".to_string(), linear_estimator()); + map + }, + }; + + let cost = dbg!(info.estimate_cost(&model)); + assert_feq(cost, 10_100.0, "100ns/op * 100 ops + 10ns/op * 10 ops"); + } + + #[test] + fn test_cost_multi_partitions_sums_weighted() { + let info = ProfilerInfo(vec![ + ProfilerPartition { + occurences: 2.0, + avg_n: 100.0, + avg_op_counts: { + let mut map = HashMap::new(); + map.insert("insert".to_string(), 100.0); + map + }, + }, + ProfilerPartition { + occurences: 1.0, + avg_n: 10.0, + avg_op_counts: { + let mut map = HashMap::new(); + map.insert("insert".to_string(), 10.0); + map + }, + }, + ]); + + let model = CostModel { + by_op: { + let mut map = HashMap::new(); + map.insert("insert".to_string(), linear_estimator()); + map + }, + }; + + let cost = dbg!(info.estimate_cost(&model)); + assert_feq(cost, 20_100.0, "100ns/op * 100 ops * 2 + 10ns/op * 10 ops"); + } + + #[test] + fn test_partition_costs_merges_duplicates() { + let cl = (100.0, { + let mut map = HashMap::new(); + map.insert("insert".to_string(), 10); + map + }); + let outp = vec![Ok(cl.clone()), Ok(cl)] + .into_iter() + .fold(Ok(vec![]), partition_costs) + .unwrap(); + + assert_eq!(outp.len(), 1, "merged duplicates"); + assert_eq!(outp[0].occurences, 2.0, "weight updated"); + assert_feq(outp[0].avg_n, 100.0, "average n correct"); + assert_feq( + *outp[0].avg_op_counts.get("insert").unwrap(), + 10.0, + "average n correct", + ); + } + + #[test] + fn test_partition_costs_merges_close() { + let outp = vec![ + Ok((100.0, { + let mut map = HashMap::new(); + map.insert("insert".to_string(), 50); + map + })), + Ok((110.0, { + let mut map = HashMap::new(); + map.insert("insert".to_string(), 100); + map + })), + ] + .into_iter() + .fold(Ok(vec![]), partition_costs) + .unwrap(); + + assert_eq!(outp.len(), 1, "merged duplicates"); + assert_eq!(outp[0].occurences, 2.0, "weight updated"); + assert_feq(outp[0].avg_n, 105.0, "average n correct"); + assert_feq( + *outp[0].avg_op_counts.get("insert").unwrap(), + 75.0, + "average n correct", + ); + } + #[test] + fn test_partition_costs_keeps_separate() { + let outp = vec![ + Ok((100.0, { + let mut map = HashMap::new(); + map.insert("insert".to_string(), 10); + map + })), + Ok((999999.0, { + let mut map = HashMap::new(); + map.insert("insert".to_string(), 10); + map + })), + ] + .into_iter() + .fold(Ok(vec![]), partition_costs) + .unwrap(); + + assert_eq!( + outp.len(), + 2, + "large difference in n values causes partition" + ); + } +} diff --git a/src/crates/candelabra/src/profiler/mod.rs b/src/crates/candelabra/src/profiler/mod.rs new file mode 100644 index 0000000..568929b --- /dev/null +++ b/src/crates/candelabra/src/profiler/mod.rs @@ -0,0 +1,200 @@ +//! Profiling applications for info about container usage + +mod info; + +use anyhow::{Context, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use log::{debug, log_enabled, trace, warn, Level}; +use primrose::ContainerSelector; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::io::Write; +use std::{ + fs::{read_dir, File}, + io::Read, + process::{Command, Stdio}, +}; +use tempfile::tempdir; + +use crate::cache::{gen_tree_hash, FileCache}; +use crate::candidates::ConTypeName; +use crate::cost::benchmark::tee_output; +use crate::project::Project; +use crate::{Paths, State}; + +pub use self::info::{ProfilerInfo, ProfilerPartition}; + +#[derive(Debug, Serialize, Deserialize)] +pub(crate) struct CacheEntry { + proj_hash: u64, + proj_location: Utf8PathBuf, + info: HashMap, +} + +impl State { + pub(crate) fn profiler_info_cache(paths: &Paths) -> Result> { + FileCache::new( + paths.target_dir.join("candelabra").join("profiler_info"), + |_, v: &CacheEntry| { + let proj_hash = gen_tree_hash(&v.proj_location).unwrap_or(0); + v.proj_hash == proj_hash + }, + ) + } + + /// Get or calculate profiler info for the given project. + /// Results are cached by the modification time of the project's source tree + pub fn profiler_info(&self, project: &Project) -> Result> { + match self.profiler_info_cache.find(&project.name)? { + Some(x) => Ok(x.info), + None => { + let info = self.calc_profiler_info(project)?; + + let proj_hash = gen_tree_hash(&project.source_dir) + .context("Error generating project directory hash")?; + if let Err(e) = self.profiler_info_cache.put( + &project.name, + &CacheEntry { + proj_hash, + proj_location: project.source_dir.clone(), + info: info.clone(), + }, + ) { + warn!("Error caching profiler info for {}: {}", &project.name, e); + } + + Ok(info) + } + } + } + + /// Calculate profiler info for the given project. + fn calc_profiler_info(&self, project: &Project) -> Result> { + let candidate_list = self.project_candidate_list(project)?; + let con_types = candidate_list + .iter() + .flat_map(|(_, con_types)| con_types.iter()) + .map(|(id, _)| id) + .collect::>(); + + self.project_profiling_prep(project, &con_types)?; + let mut acc = HashMap::new(); + for name in project.benchmarks.iter() { + for (con_type, new_results) in self + .profile_benchmark(project, name, &con_types) + .with_context(|| format!("Error profiling benchmark {}", name))? + { + acc.entry(con_type) + .and_modify(|pi: &mut ProfilerInfo| pi.0.extend(new_results.0.iter().cloned())) + .or_insert(new_results); + } + } + + Ok(acc) + } + + /// Prepare the given project to be profiled, by replacing all candidate types with the profiler wrapper. + fn project_profiling_prep(&self, project: &Project, con_types: &[&String]) -> Result<()> { + for (file, candidates) in self.project_candidate_list(project)? { + self.file_profiling_prep(&file, &candidates, con_types) + .with_context(|| format!("error preparing {} for profiling", file))?; + } + + Ok(()) + } + + /// Prepare the given file to be profiled, by replacing all candidate types with the profiler wrapper. + fn file_profiling_prep( + &self, + file: &Utf8Path, + candidates: &[(String, Vec)], + con_types: &[&String], + ) -> Result<()> { + debug!("Setting up {} for profiling", file); + + let selector = ContainerSelector::from_path( + file.as_std_path(), + self.paths.library_src.as_std_path(), + self.model_size, + ) + .context("error creating container selector")?; + + let chosen = candidates + .iter() + .map(|(dest_name, impls)| (dest_name, &impls[0])) + .collect::>(); + + let new_code = selector.gen_profiling_file(chosen.iter().map(|(d, c)| { + ( + *d, + con_types.iter().position(|id| id == d).unwrap(), + c.as_str(), + ) + })); + + let new_path = file.to_string().replace(".pr", ""); + + trace!("New code: {}", new_code); + trace!("New path: {}", new_path); + + let mut f = File::create(new_path).context("error creating new source file")?; + f.write_all(new_code.as_bytes()) + .context("error writing new code")?; + + Ok(()) + } + + /// Run the given benchmark on the project, and parse the resulting profiling information. + fn profile_benchmark( + &self, + project: &Project, + name: &str, + con_types: &[&String], + ) -> Result> { + let profiler_out_dir = tempdir()?; + debug!( + "Running benchmark {} with out dir {:?}", + name, profiler_out_dir + ); + + let child = Command::new("cargo") + .current_dir(&project.source_dir) + .args(["bench", "--bench", name]) + .env("PROFILER_OUT_DIR", profiler_out_dir.as_ref()) // Where profiler info gets outputted + .stdout(Stdio::piped()) + .stderr(if log_enabled!(Level::Debug) { + Stdio::inherit() + } else { + Stdio::null() + }) + .spawn() + .context("Error running bench command")?; + + tee_output(child)?; + + let mut con_type_results = HashMap::new(); + for dir in read_dir(&profiler_out_dir)? { + // each directory has an index, corresponding to the container type name + let dir = dir?; + let con_type: String = con_types[dir + .file_name() + .into_string() + .unwrap() + .parse::() + .unwrap()] + .to_string(); + + con_type_results.insert( + con_type, + ProfilerInfo::from(read_dir(dir.path())?.map(|f| -> Result { + // read file contents + let mut contents = String::new(); + File::open(f?.path())?.read_to_string(&mut contents)?; + Ok(contents) + }))?, + ); + } + + Ok(con_type_results) + } +} diff --git a/src/crates/cli/src/display.rs b/src/crates/cli/src/display.rs index 2ce9039..2debede 100644 --- a/src/crates/cli/src/display.rs +++ b/src/crates/cli/src/display.rs @@ -51,7 +51,7 @@ pub fn display_profiler_info(profile_info: ProfilerInfo) { ("occurences".to_string(), p.occurences), ] .into_iter() - .chain(p.avg_op_counts.into_iter()), + .chain(p.avg_op_counts), ) })) } -- cgit v1.2.3