From 9da1961a33b20cc64d920ae82f6cc49c42d0c728 Mon Sep 17 00:00:00 2001 From: Aria Date: Thu, 14 Dec 2023 18:42:24 +0000 Subject: refactor(candelabra): split cli, reduce duplication introduce an invalidation function to the cache helper to get rid of repetitive code split cli and candelabra out to separate crates move most top-level operations into the State struct --- src/Cargo.toml | 5 +- src/crates/candelabra/Cargo.toml | 19 ++++ src/crates/candelabra/src/cache.rs | 126 +++++++++++++++++++++ src/crates/candelabra/src/candidates.rs | 116 ++++++++++++++++++++ src/crates/candelabra/src/cost/benchmark.rs | 164 ++++++++++++++++++++++++++++ src/crates/candelabra/src/cost/fit.rs | 79 ++++++++++++++ src/crates/candelabra/src/cost/mod.rs | 117 ++++++++++++++++++++ src/crates/candelabra/src/lib.rs | 41 +++++++ src/crates/candelabra/src/paths.rs | 47 ++++++++ src/crates/candelabra/src/profiler.rs | 132 ++++++++++++++++++++++ src/crates/candelabra/src/project.rs | 35 ++++++ src/crates/cli/Cargo.toml | 17 +-- src/crates/cli/src/cache.rs | 111 ------------------- src/crates/cli/src/candidates.rs | 136 ----------------------- src/crates/cli/src/cmd.rs | 39 ------- src/crates/cli/src/cost/benchmark.rs | 164 ---------------------------- src/crates/cli/src/cost/fit.rs | 79 -------------- src/crates/cli/src/cost/mod.rs | 121 -------------------- src/crates/cli/src/main.rs | 143 +++++------------------- src/crates/cli/src/paths.rs | 47 -------- src/crates/cli/src/profiler/mod.rs | 121 -------------------- src/crates/cli/src/project.rs | 35 ------ 22 files changed, 911 insertions(+), 983 deletions(-) create mode 100644 src/crates/candelabra/Cargo.toml create mode 100644 src/crates/candelabra/src/cache.rs create mode 100644 src/crates/candelabra/src/candidates.rs create mode 100644 src/crates/candelabra/src/cost/benchmark.rs create mode 100644 src/crates/candelabra/src/cost/fit.rs create mode 100644 src/crates/candelabra/src/cost/mod.rs create mode 100644 src/crates/candelabra/src/lib.rs create mode 100644 src/crates/candelabra/src/paths.rs create mode 100644 src/crates/candelabra/src/profiler.rs create mode 100644 src/crates/candelabra/src/project.rs delete mode 100644 src/crates/cli/src/cache.rs delete mode 100644 src/crates/cli/src/candidates.rs delete mode 100644 src/crates/cli/src/cmd.rs delete mode 100644 src/crates/cli/src/cost/benchmark.rs delete mode 100644 src/crates/cli/src/cost/fit.rs delete mode 100644 src/crates/cli/src/cost/mod.rs delete mode 100644 src/crates/cli/src/paths.rs delete mode 100644 src/crates/cli/src/profiler/mod.rs delete mode 100644 src/crates/cli/src/project.rs diff --git a/src/Cargo.toml b/src/Cargo.toml index d3658ac..3b0a7ba 100644 --- a/src/Cargo.toml +++ b/src/Cargo.toml @@ -3,8 +3,9 @@ resolver = "2" members = [ "crates/primrose", "crates/library", - "crates/cli", - "crates/benchmarker" + "crates/benchmarker", + "crates/candelabra", + "crates/cli" ] [workspace.dependencies] diff --git a/src/crates/candelabra/Cargo.toml b/src/crates/candelabra/Cargo.toml new file mode 100644 index 0000000..909e577 --- /dev/null +++ b/src/crates/candelabra/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "candelabra" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +log = { workspace = true } +primrose = { path = "../primrose" } +anyhow = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +camino = "1.1.6" +cargo_metadata = "0.18.1" +glob = "0.3.1" +tempfile = "3" +nalgebra = "0.32.3" +polars = { version = "0.35.4", features = ["describe"] } diff --git a/src/crates/candelabra/src/cache.rs b/src/crates/candelabra/src/cache.rs new file mode 100644 index 0000000..424b2e7 --- /dev/null +++ b/src/crates/candelabra/src/cache.rs @@ -0,0 +1,126 @@ +//! Common utilities for caching results +use std::{ + cell::RefCell, + collections::hash_map::DefaultHasher, + fs::{create_dir_all, metadata, remove_file, File}, + hash::{Hash, Hasher}, + marker::PhantomData, +}; + +use anyhow::{anyhow, Context, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use glob::glob; +use log::{debug, warn}; +use serde::{Deserialize, Serialize}; +use serde_json::{from_reader, to_writer}; + +/// A filesystem-based K/V cache +/// This doesn't deal with key invalidation or anything, just the filesystem/serialisation stuff +pub struct FileCache { + base_dir: Utf8PathBuf, + validator: RefCell bool>>, + _data: PhantomData<(&'static K, V, V)>, +} + +impl Deserialize<'a>> FileCache { + /// Create a new file store in the given directory. + pub fn new( + base_dir: Utf8PathBuf, + validator: impl FnMut(&K, &V) -> bool + 'static, + ) -> Result { + create_dir_all(base_dir.as_std_path()).context("Error creating cache directory")?; + Ok(Self { + base_dir, + validator: RefCell::new(Box::new(validator)), + _data: PhantomData, + }) + } + + /// Store the given value with the given `key` + pub fn put(&self, key: &K, value: &V) -> Result<()> { + let path = self.path_for(key); + let mut file = File::create(path)?; + to_writer(&mut file, value)?; + + Ok(()) + } + + /// Attempt to load cache entry with the given `key` + /// `Ok(None)` indicates no valid cache entry, while `Err(e)` indicates an IO error + /// Invalid cache entries will be deleted. + pub fn find(&self, key: &K) -> Result> { + let path = self.path_for(key); + if !path.exists() { + return Ok(None); + } + + let file = File::open(&path).context("Error opening cache entry")?; + let contents: V = match self.attempt_load(key, &file) { + Ok(x) => x, + Err(e) => { + debug!("Invalid cache entry: {}", e); + if let Err(e) = self.remove(key) { + warn!("Error deleting invalid cache entry: {}", e); + } + + return Ok(None); + } + }; + + Ok(Some(contents)) + } + + fn attempt_load(&self, key: &K, f: &File) -> Result { + let c = from_reader(f)?; + if (self.validator.borrow_mut())(key, &c) { + Ok(c) + } else { + Err(anyhow!("validation function said no")) + } + } + + /// Remove value for the given key + pub fn remove(&self, key: &K) -> Result<()> { + Ok(remove_file(self.path_for(key))?) + } + + /// Get the path for a given key + fn path_for(&self, key: &K) -> Utf8PathBuf { + // Sanitise key name + let key = key.to_string(); + let mut san = String::with_capacity(key.len()); + for chr in key.chars() { + if chr == '/' + || chr == '\\' + || chr == ':' + || chr == '*' + || chr == '?' + || chr == '"' + || chr == '<' + || chr == '>' + || chr == '|' + { + san += "_"; + } else { + san.push(chr); + } + } + if san.is_empty() { + san += "_"; + } + self.base_dir.join(san) // TODO: santisation + } +} + +/// Generate a hash from the current state of the given directory +/// This is built from the modification time of all files in that directory and all children. +pub fn gen_tree_hash(dir: &Utf8Path) -> Result { + let mut hasher = DefaultHasher::new(); + + for f in glob(&format!("{}/**/*", dir)).unwrap() { + let modified = metadata(f?)?.modified()?; + modified.hash(&mut hasher); + } + + Ok(hasher.finish()) +} diff --git a/src/crates/candelabra/src/candidates.rs b/src/crates/candelabra/src/candidates.rs new file mode 100644 index 0000000..0d76862 --- /dev/null +++ b/src/crates/candelabra/src/candidates.rs @@ -0,0 +1,116 @@ +//! Generating and caching primrose candidate results + +use std::{collections::HashMap, fs::metadata, time::SystemTime}; + +use anyhow::{Context, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use log::{debug, warn}; +use primrose::ContainerSelector; +use serde::{Deserialize, Serialize}; + +use crate::{ + cache::{gen_tree_hash, FileCache}, + paths::Paths, + project::Project, + State, +}; + +/// Names a container type we want to select. +pub type ConTypeName = String; + +/// Name of a container implementation we are considering +pub type ImplName = String; + +/// A list of candidate container types +pub type Candidates = HashMap>; + +/// A list of candidates for each selection site, and each file in a given project +pub type ProjectCandidateList = Vec<(Utf8PathBuf, Vec<(ConTypeName, Vec)>)>; + +/// Info for getting & caching candidate types +pub struct CandidatesStore { + pub store: FileCache, + pub lib_hash: u64, +} + +/// Entry in the benchmark cache +#[derive(Serialize, Deserialize, Debug)] +pub struct CacheEntry { + lib_hash: u64, + mod_time: SystemTime, + value: Candidates, +} + +impl CandidatesStore { + /// Create a new store, using the given paths. + /// Benchmarks are cached in `paths.target_dir / candelabra / primrose_results` + pub fn new(paths: &Paths) -> Result { + let base_dir = paths.target_dir.join("candelabra").join("primrose_results"); + + let lib_hash = + gen_tree_hash(&paths.library_crate).context("Error generating library hash")?; + debug!("Initialised candidate cacher with hash {}", lib_hash); + + Ok(Self { + store: FileCache::new(base_dir, move |k, v: &CacheEntry| { + let mod_time = metadata(k) + .map(|m| m.modified()) + .unwrap_or(Ok(SystemTime::UNIX_EPOCH)) + .unwrap(); + v.lib_hash == lib_hash && v.mod_time == mod_time + })?, + lib_hash, + }) + } +} + +impl State { + /// Run primrose on all files in the given project. + /// Returns a list of all candidates for each container type in each file. + pub fn project_candidate_list(&self, project: &Project) -> Result { + let mut all_candidates = Vec::new(); + for file in project.find_primrose_files()? { + let result = match self.candidates.store.find(&file)? { + Some(x) => x.value, + None => self.calc_candidates(&file)?, + }; + + let mut typs = Vec::new(); + for (con_type_id, candidates) in result { + typs.push((con_type_id.clone(), candidates)); + } + all_candidates.push((file, typs)); + } + + Ok(all_candidates) + } + + /// Find candidate types for every selection site in a given path + fn calc_candidates(&self, path: &Utf8Path) -> Result { + let selector = ContainerSelector::from_path( + path.as_std_path(), + self.paths.library_src.as_std_path(), + self.model_size, + ) + .with_context(|| format!("error getting container selector for {}", path))?; + + let candidates: Candidates = selector + .find_all_candidates()? + .into_iter() + .map(|(k, v)| (k.to_string(), v)) + .collect(); + + let mod_time = metadata(path)?.modified()?; + if let Err(e) = self.candidates.store.put( + path, + &CacheEntry { + lib_hash: self.candidates.lib_hash, + value: candidates.clone(), + mod_time, + }, + ) { + warn!("Error caching candidates for {}: {}", path, e); + } + Ok(candidates) + } +} diff --git a/src/crates/candelabra/src/cost/benchmark.rs b/src/crates/candelabra/src/cost/benchmark.rs new file mode 100644 index 0000000..a1e0e18 --- /dev/null +++ b/src/crates/candelabra/src/cost/benchmark.rs @@ -0,0 +1,164 @@ +//! Benchmarking of container types + +use std::{ + collections::HashMap, + fs::{copy, create_dir, File}, + io::Write, + process::Command, + time::Duration, +}; + +use anyhow::{bail, Context, Result}; +use log::{debug, log_enabled, Level}; +use primrose::{LibSpec, LibSpecs}; +use serde::{Deserialize, Serialize}; +use tempfile::{tempdir, TempDir}; + +use crate::paths::Paths; + +/// The name of the element type we use for benchmarking +pub const ELEM_TYPE: &str = "usize"; + +/// String representation of the array of N values we use for benchmarking +pub const NS: &str = "[8, 256, 1024, 65536]"; + +/// Results for a whole suite of benchmarks +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Results { + /// Results for each collection operation + pub by_op: HashMap>, +} + +/// Name of an operation +pub type OpName = String; + +/// The first key in the tuple is the `n` of the container before the benchmark was taken, and the second the results of the benchmark. +pub type Observation = (usize, BenchmarkResult); + +/// Results for a single benchmark +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct BenchmarkResult { + /// Number of times the benchmark was run + pub times: usize, + + /// The minimum time taken + pub min: Duration, + + /// The maximum time taken + pub max: Duration, + + /// The average (mean) time taken + pub avg: Duration, +} + +/// Run benchmarks for the given container type, returning the results. +/// Panics if the given name is not in the library specs. +pub fn run_benchmarks(name: &str, paths: &Paths, lib_specs: &LibSpecs) -> Result { + let lib_spec = lib_specs + .get(name) + .expect("name passed to benchmarkspec not in libspecs"); + + // Generate crate & source + let crate_ = prepare_crate(name, paths, lib_spec)?; + + // Build and run + debug!("Building and running benchmarks for {}", name); + let run_output = Command::new("cargo") + .args(["run", "--release", "--", "--bench"]) + .current_dir(crate_.path()) + .env("CARGO_TARGET_DIR", &paths.target_dir) // Share target directory + .output() + .context("Error running build command")?; + + if !run_output.status.success() { + bail!("Error result from benchmark. Output: {:?}", run_output); + } + + if log_enabled!(Level::Debug) { + if let Ok(stdout) = String::from_utf8(run_output.stdout.clone()) { + debug!("stdout: {:?}", stdout); + } + if let Ok(stderr) = String::from_utf8(run_output.stderr.clone()) { + debug!("stderr: {:?}", stderr); + } + } + + // Deserialise benchmark results + todo!() +} + +fn prepare_crate(name: &str, paths: &Paths, lib_spec: &LibSpec) -> Result { + // Directory we will create the crate in + let crate_tempdir = tempdir()?; + let crate_dir = crate_tempdir.path(); + debug!("Preparing benchmark crate for {} in {:?}", name, crate_dir); + + // Write the manifest + let mut manifest = + File::create(crate_dir.join("Cargo.toml")).context("Error creating Cargo.toml")?; + manifest + .write_all( + format!( + " +[package] +name = \"bench\" +version = \"0.1.0\" +edition = \"2021\" + +[dependencies] +candelabra-benchmarker = {{ path = \"{}\" }} +primrose-library = {{ path = \"{}\" }} +", + paths.benchmarker_crate, paths.library_crate, + ) + .as_bytes(), + ) + .context("Error writing Cargo.toml")?; + + // Ensure we use the right toolchain + let orig_toolchain_file = paths.base.join("rust-toolchain.toml"); + copy(orig_toolchain_file, crate_dir.join("rust-toolchain.toml")) + .context("Error writing rust-toolchain.toml")?; + + // Generate the code for running our benchmarks + let mut benchmark_statements = String::new(); + + // Add benchmarks for implemented traits + let implemented_traits = lib_spec.interface_provide_map.keys(); + for tr in implemented_traits { + benchmark_statements += &format!( + "candelabra_benchmarker::benchmark_{}::<{}<{}>, _>(c, &NS);", + tr.to_lowercase(), + name, + ELEM_TYPE, + ); + } + + // Write the benchmarking source, using our generated benchmarker code. + let src_dir = crate_dir.join("src"); + create_dir(&src_dir).context("Error creating src directory")?; + + let mut src_file = File::create(src_dir.join("main.rs")).context("Error creating main.rs")?; + src_file + .write_all( + format!( + " +use candelabra_benchmarker::criterion::{{criterion_group, criterion_main, Criterion}}; + +const NS: &[usize] = &{}; + +fn run_benches(c: &mut Criterion) {{ + {} +}} + +criterion_group!(benches, run_benches); +criterion_main!(benches); +", + NS, benchmark_statements + ) + .as_bytes(), + ) + .context("Error writing to main.rs")?; + + Ok(crate_tempdir) +} diff --git a/src/crates/candelabra/src/cost/fit.rs b/src/crates/candelabra/src/cost/fit.rs new file mode 100644 index 0000000..ace3634 --- /dev/null +++ b/src/crates/candelabra/src/cost/fit.rs @@ -0,0 +1,79 @@ +//! Fitting a 3rd-order polynomial to benchmark results +//! Based on code from al-jshen: + +use super::benchmark::Observation; +use na::{Dyn, MatrixXx4, OVector}; +use serde::{Deserialize, Serialize}; + +/// Estimates durations using a 3rd-order polynomial. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct Estimator([f64; 4]); + +/// Approximate cost of an action. +/// This is an approximation for the number of nanoseconds it would take. +pub type Cost = f64; + +impl Estimator { + /// Fit from the given set of observations, using the least squared method. + pub fn fit(results: &[Observation]) -> Self { + let (xs, ys) = Self::to_data(results); + + let xv = vandermonde(&xs); + let xtx = xv.transpose() * xv.clone(); + let xtxinv = xtx.try_inverse().unwrap(); + let xty = xv.transpose() * ys; + let coeffs = xtxinv * xty; + + Self(coeffs.into()) + } + + /// Calculate the residual sum of squares for the given data. + pub fn rss(&self, results: &[Observation]) -> f64 { + // TODO: there's a more efficient way to do this / bulk esimations + let (xs, ys) = Self::to_data(results); + + xs.iter() + .zip(ys.iter()) + .map(|(x, y)| (y - self.estimatef(*x)).powi(2)) + .sum() + } + + /// Estimate the cost of a given operation at the given `n`. + pub fn estimate(&self, n: usize) -> Cost { + self.estimatef(n as f64) + } + + /// Estimate the cost of a given operation at the given `n`. + pub fn estimatef(&self, n: f64) -> Cost { + let [a, b, c, d] = self.0; + a + b * n + c * n.powi(2) + d * n.powi(3) + } + + /// Convert a list of observations to the format we use internally. + fn to_data(results: &[Observation]) -> (Vec, OVector) { + let xs = results.iter().map(|(n, _)| *n as f64).collect::>(); + let ys = OVector::::from_iterator( + results.len(), + results + .iter() + .map(|(_, results)| results.avg.as_nanos() as f64), + ); + + (xs, ys) + } +} + +/// Calculate a Vandermode matrix with 4 columns. +/// https://en.wikipedia.org/wiki/Vandermonde_matrix +fn vandermonde(xs: &[f64]) -> MatrixXx4 { + let mut mat = MatrixXx4::repeat(xs.len(), 1.0); + + for (row, x) in xs.iter().enumerate() { + // First column is all 1s so skip + for col in 1..=3 { + mat[(row, col)] = x.powi(col as i32); + } + } + + mat +} diff --git a/src/crates/candelabra/src/cost/mod.rs b/src/crates/candelabra/src/cost/mod.rs new file mode 100644 index 0000000..18bcb79 --- /dev/null +++ b/src/crates/candelabra/src/cost/mod.rs @@ -0,0 +1,117 @@ +//! Generating, caching, and using cost models +mod benchmark; +mod fit; + +pub use benchmark::{BenchmarkResult, Results as BenchmarkResults}; +pub use fit::Estimator; + +use std::collections::HashMap; + +use anyhow::{anyhow, Context, Result}; + +use benchmark::Results; +use log::{debug, warn}; +use primrose::{LibSpec, LibSpecs}; +use serde::{Deserialize, Serialize}; + +use crate::{ + cache::{gen_tree_hash, FileCache}, + cost::benchmark::run_benchmarks, + paths::Paths, + State, +}; + +/// Cost model for a container, capable of estimating cost of each supported operation. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CostModel { + by_op: HashMap, +} + +/// Information for getting & caching cost information for container implementations. +pub struct ResultsStore { + store: FileCache, + lib_specs: LibSpecs, + lib_hash: u64, +} + +/// Entry in the cost info cache +#[derive(Serialize, Deserialize)] +struct CacheEntry { + /// Hash of the primrose library at the time measurements were taken + lib_hash: u64, + + /// The resulting cost model + model: CostModel, + + /// The raw benchmark results + results: Results, +} + +impl ResultsStore { + /// Create a new store, using the given paths. + /// Benchmarks are cached in `paths.target_dir / candelabra / benchmark_results` + pub fn new(paths: &Paths) -> Result { + let lib_specs = + LibSpec::read_all(paths.library_src.as_std_path()).map_err(|e| anyhow!("{}", e))?; + + // TODO: this should be home folder or smth + let base_dir = paths + .target_dir + .join("candelabra") + .join("benchmark_results"); + + // TODO: Doesn't take NS or ELEM_TYPE into account + let lib_hash = + gen_tree_hash(&paths.library_crate).context("Error generating library hash")?; + + debug!("Initialised benchmark cacher with hash {}", lib_hash); + + Ok(Self { + store: FileCache::new(base_dir, move |_, v: &CacheEntry| v.lib_hash == lib_hash)?, + lib_specs, + lib_hash, + }) + } +} + +impl State { + /// Get or calculate the cost model for the given type. + /// Will panic if `name` is not in library specs. + pub fn cost_model(&self, name: &str) -> Result { + match self.results.store.find(&name)? { + Some(x) => Ok(x.model), + None => self.calc_cost_model(&name), + } + } + + /// Calculate cost information for the given type + /// Will panic if `name` is not in library specs. + fn calc_cost_model(&self, name: &str) -> Result { + let results = run_benchmarks(name, &self.paths, &self.results.lib_specs)?; + let model = build_cost_model(results.clone())?; + if let Err(e) = self.results.store.put( + name, + &CacheEntry { + lib_hash: self.results.lib_hash, + model: model.clone(), + results: results.clone(), + }, + ) { + warn!("Error caching benchmark outputs for {}: {}", name, e); + } + Ok(model) + } +} + +fn build_cost_model(results: Results) -> Result { + Ok(CostModel { + by_op: results + .by_op + .into_iter() + .map(|(op, os)| { + debug!("Fitting op {} with {} observations", op, os.len()); + (op, Estimator::fit(&os)) + }) + .collect(), + }) +} diff --git a/src/crates/candelabra/src/lib.rs b/src/crates/candelabra/src/lib.rs new file mode 100644 index 0000000..2836e78 --- /dev/null +++ b/src/crates/candelabra/src/lib.rs @@ -0,0 +1,41 @@ +use anyhow::Result; + +use crate::{candidates::CandidatesStore, cost::ResultsStore}; + +extern crate nalgebra as na; + +mod cache; +pub mod candidates; +pub mod cost; +pub mod profiler; + +mod paths; +mod project; +pub use paths::Paths; +pub use project::Project; + +/// Shared state for program execution +pub struct State { + /// Paths used throughout execution + paths: Paths, + + /// Cache for candidate types for primrose files & annotations + candidates: CandidatesStore, + + /// Results and cost models + results: ResultsStore, + + /// The model size used for primrose operations + model_size: usize, +} + +impl State { + pub fn new(paths: Paths) -> Result { + Ok(Self { + candidates: CandidatesStore::new(&paths)?, + results: ResultsStore::new(&paths)?, + model_size: 3, // TODO + paths, + }) + } +} diff --git a/src/crates/candelabra/src/paths.rs b/src/crates/candelabra/src/paths.rs new file mode 100644 index 0000000..d25c174 --- /dev/null +++ b/src/crates/candelabra/src/paths.rs @@ -0,0 +1,47 @@ +use camino::Utf8PathBuf; +use std::{env, path::PathBuf}; + +/// Paths used throughout execution +#[derive(Debug, Clone)] +pub struct Paths { + pub base: Utf8PathBuf, + pub library_crate: Utf8PathBuf, + pub library_src: Utf8PathBuf, + pub benchmarker_crate: Utf8PathBuf, + pub target_dir: Utf8PathBuf, +} + +impl Paths { + pub fn from_base(base: Utf8PathBuf) -> Self { + Paths { + library_crate: base.join("crates").join("library"), + library_src: base.join("crates").join("library").join("src"), + benchmarker_crate: base.join("crates").join("benchmarker"), + target_dir: base.join("target"), + base, + } + } +} + +impl Default for Paths { + fn default() -> Self { + let path = if let Ok(var) = env::var("CANDELABRA_SRC_DIR") { + var.into() + } else { + // Most the time this won't work, but it's worth a shot. + let mut path = PathBuf::from(file!()); + path.pop(); // main.rs + path.pop(); // src + path.pop(); // candelabra-cli + path.pop(); // crates + if path.components().count() == 0 { + path.push("."); + } + path + }; + + Paths::from_base(path.canonicalize().expect( + "candelabra source directory not found. please specify it with CANDELABRA_SRC_DIR", + ).try_into().expect("candelabra source directory has non-utf8 components in it (???)")) + } +} diff --git a/src/crates/candelabra/src/profiler.rs b/src/crates/candelabra/src/profiler.rs new file mode 100644 index 0000000..b240258 --- /dev/null +++ b/src/crates/candelabra/src/profiler.rs @@ -0,0 +1,132 @@ +//! Profiling applications for info about container usage + +use anyhow::{anyhow, bail, Context, Result}; +use camino::Utf8Path; +use log::{debug, trace}; +use polars::prelude::*; +use primrose::ContainerSelector; +use std::io::Write; +use std::str::FromStr; +use std::{ + fs::{read_dir, File}, + io::Read, + process::{Command, Stdio}, +}; +use tempfile::tempdir; + +use crate::project::Project; +use crate::State; + +/// The information we get from profiling. +pub type ProfilerInfo = DataFrame; + +impl State { + /// Get/calculate profiler info for the given project. + pub fn calc_profiler_info(&self, project: &Project) -> Result { + self.project_profiling_prep(project)?; + project + .benchmarks + .iter() + .map(|name| { + self.profile_benchmark(project, name) + .with_context(|| format!("Error profiling benchmark {}", name)) + }) + .reduce(|acc, df| acc?.vstack(&df?).map_err(Into::into)) + .ok_or(anyhow!("nothing to run or types are not used"))? + } + + /// Prepare the given project to be profiled, by replacing all candidate types with the profiler wrapper. + fn project_profiling_prep(&self, project: &Project) -> Result<()> { + for (file, candidates) in self.project_candidate_list(project)? { + self.file_profiling_prep(&file, &candidates) + .with_context(|| format!("error preparing {} for profiling", file))?; + } + + Ok(()) + } + + /// Prepare the given file to be profiled, by replacing all candidate types with the profiler wrapper. + fn file_profiling_prep( + &self, + file: &Utf8Path, + candidates: &[(String, Vec)], + ) -> Result<()> { + debug!("Setting up {} for profiling", file); + + let selector = ContainerSelector::from_path( + file.as_std_path(), + self.paths.library_src.as_std_path(), + self.model_size, + ) + .context("error creating container selector")?; + + let chosen = candidates + .iter() + .map(|(dest_name, impls)| (dest_name, &impls[0])) + .collect::>(); + + let new_code = selector.gen_profiling_file(chosen.iter().map(|(d, c)| (*d, c.as_str()))); + + let new_path = file.to_string().replace(".pr", ""); + + trace!("New code: {}", new_code); + trace!("New path: {}", new_path); + + let mut f = File::create(new_path).context("error creating new source file")?; + f.write_all(new_code.as_bytes()) + .context("error writing new code")?; + + Ok(()) + } + + /// Run the given benchmark on the project, and parse the resulting profiling information. + fn profile_benchmark(&self, project: &Project, name: &str) -> Result { + let profiler_out_dir = tempdir()?; + debug!( + "Running benchmark {} with out dir {:?}", + name, profiler_out_dir + ); + + let output = Command::new("cargo") + .current_dir(&project.source_dir) + .args(["bench", "--bench", name]) + .env("PROFILER_OUT_DIR", profiler_out_dir.as_ref()) // Where profiler info gets outputted + .stderr(Stdio::inherit()) + .stdout(Stdio::inherit()) + .output()?; + + if !output.status.success() { + bail!("Error running benchmark"); + } + + let mut info = ProfilerInfo::default(); + for file in read_dir(&profiler_out_dir)? { + let file = file?; + let mut contents = String::new(); + File::open(file.path())?.read_to_string(&mut contents)?; + + info = info.vstack(&parse_output(&contents)?)?; + } + + Ok(info) + } +} + +/// Parse the output of the profiler +fn parse_output(contents: &str) -> Result { + let mut lines = contents.lines().map(i32::from_str); + let missing_line_err = || anyhow!("wrong number of lines in "); + + Ok(df!( + "n" => &[lines.next().ok_or_else(missing_line_err)??], + "contains" => &[lines.next().ok_or_else(missing_line_err)??], + "insert" => &[lines.next().ok_or_else(missing_line_err)??], + "clear" => &[lines.next().ok_or_else(missing_line_err)??], + "remove" => &[lines.next().ok_or_else(missing_line_err)??], + "first" => &[lines.next().ok_or_else(missing_line_err)??], + "last" => &[lines.next().ok_or_else(missing_line_err)??], + "nth" => &[lines.next().ok_or_else(missing_line_err)??], + "push" => &[lines.next().ok_or_else(missing_line_err)??], + "pop" => &[lines.next().ok_or_else(missing_line_err)??], + )?) +} diff --git a/src/crates/candelabra/src/project.rs b/src/crates/candelabra/src/project.rs new file mode 100644 index 0000000..cc8b4a2 --- /dev/null +++ b/src/crates/candelabra/src/project.rs @@ -0,0 +1,35 @@ +use anyhow::{Context, Result}; +use cargo_metadata::{camino::Utf8PathBuf, Package, Target}; +use glob::glob; + +/// A single package or crate that we wish to process. +#[derive(Debug, Clone)] +pub struct Project { + pub name: String, + pub benchmarks: Vec, + pub source_dir: Utf8PathBuf, +} + +impl Project { + pub fn new(package: Package) -> Self { + Project { + name: package.name.clone(), + source_dir: package.manifest_path.parent().unwrap().to_path_buf(), + benchmarks: package + .targets + .into_iter() + .filter(Target::is_bench) + .map(|t| t.name) + .collect(), + } + } + + /// Find all primrose files (`.pr.rs`) in this project. + pub fn find_primrose_files(&self) -> Result> { + glob(&format!("{}/**/*.pr.rs", self.source_dir)) + .unwrap() + .flat_map(|p| p.map(|p| p.try_into())) + .collect::, _>>() + .context("error finding primrose files in project") + } +} diff --git a/src/crates/cli/Cargo.toml b/src/crates/cli/Cargo.toml index 0a3a643..c0180ee 100644 --- a/src/crates/cli/Cargo.toml +++ b/src/crates/cli/Cargo.toml @@ -1,22 +1,9 @@ [package] -name = "candelabra-cli" +name = "cli" version = "0.1.0" edition = "2021" -default-run = "candelabra-cli" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -log = { workspace = true } -env_logger = { workspace = true } -primrose = { path = "../primrose" } -anyhow = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -camino = "1.1.6" -cargo_metadata = "0.18.1" -argh = "0.1.12" -glob = "0.3.1" -tempfile = "3" -nalgebra = "0.32.3" -polars = { version = "0.35.4", features = ["describe"] } +candelabra = { path = "../candelabra" } \ No newline at end of file diff --git a/src/crates/cli/src/cache.rs b/src/crates/cli/src/cache.rs deleted file mode 100644 index 4775a0f..0000000 --- a/src/crates/cli/src/cache.rs +++ /dev/null @@ -1,111 +0,0 @@ -//! Common utilities for caching results -use std::{ - collections::hash_map::DefaultHasher, - fs::{create_dir_all, metadata, remove_file, File}, - hash::{Hash, Hasher}, - marker::PhantomData, -}; - -use anyhow::{Context, Result}; -use camino::{Utf8Path, Utf8PathBuf}; -use glob::glob; -use log::{debug, warn}; -use serde::{Deserialize, Serialize}; -use serde_json::{from_reader, to_writer}; - -/// A filesystem-based K/V cache -/// This doesn't deal with key invalidation or anything, just the filesystem/serialisation stuff -pub struct FileCache { - base_dir: Utf8PathBuf, - _data: PhantomData<(&'static K, V, VR)>, -} - -impl Deserialize<'a>, VR: Serialize> FileCache { - /// Create a new file store in the given directory. - pub fn new(base_dir: Utf8PathBuf) -> Result { - create_dir_all(base_dir.as_std_path()).context("Error creating cache directory")?; - Ok(Self { - base_dir, - _data: PhantomData, - }) - } - - /// Store the given value with the given `key` - pub fn put(&self, key: &K, value: &VR) -> Result<()> { - let path = self.path_for(key); - let mut file = File::create(path)?; - to_writer(&mut file, value)?; - - Ok(()) - } - - /// Attempt to load cache entry with the given `key` - /// `Ok(None)` indicates no valid cache entry, while `Err(e)` indicates an IO error - /// Invalid cache entries will be deleted. - pub fn find(&self, key: &K) -> Result> { - let path = self.path_for(key); - if !path.exists() { - return Ok(None); - } - - let file = File::open(&path).context("Error opening cache entry")?; - let contents: V = match from_reader(file) { - Ok(x) => x, - Err(e) => { - debug!("Invalid cache entry: {}", e); - if let Err(e) = self.remove(key) { - warn!("Error deleting invalid cache entry: {}", e); - } - - return Ok(None); - } - }; - - Ok(Some(contents)) - } - - /// Remove value for the given key - pub fn remove(&self, key: &K) -> Result<()> { - Ok(remove_file(self.path_for(key))?) - } - - /// Get the path for a given key - fn path_for(&self, key: &K) -> Utf8PathBuf { - // Sanitise key name - let key = key.to_string(); - let mut san = String::with_capacity(key.len()); - for chr in key.chars() { - if chr == '/' - || chr == '\\' - || chr == ':' - || chr == '*' - || chr == '?' - || chr == '"' - || chr == '<' - || chr == '>' - || chr == '|' - { - san += "_"; - } else { - san.push(chr); - } - } - if san.is_empty() { - san += "_"; - } - self.base_dir.join(san) // TODO: santisation - } -} - -/// Generate a hash from the current state of the given directory -/// This is built from the modification time of all files in that directory and all children. -pub fn gen_tree_hash(dir: &Utf8Path) -> Result { - let mut hasher = DefaultHasher::new(); - - for f in glob(&format!("{}/**/*", dir)).unwrap() { - let modified = metadata(f?)?.modified()?; - modified.hash(&mut hasher); - } - - Ok(hasher.finish()) -} diff --git a/src/crates/cli/src/candidates.rs b/src/crates/cli/src/candidates.rs deleted file mode 100644 index ab713d7..0000000 --- a/src/crates/cli/src/candidates.rs +++ /dev/null @@ -1,136 +0,0 @@ -//! Generating and caching primrose candidate results - -use std::{collections::HashMap, fs::metadata, time::SystemTime}; - -use anyhow::{Context, Result}; -use camino::{Utf8Path, Utf8PathBuf}; -use log::{debug, warn}; -use primrose::ContainerSelector; -use serde::{Deserialize, Serialize}; - -use crate::{ - cache::{gen_tree_hash, FileCache}, - paths::Paths, - project::Project, - State, -}; - -// TODO: Make this adjustable -/// The size of the model used by primrose -const MODEL_SIZE: usize = 3; - -/// Names a container type we want to select. -pub type ConTypeName = String; - -/// Name of a container implementation we are considering -pub type ImplName = String; - -/// A list of candidate container types -pub type Candidates = HashMap>; - -/// Entry in the benchmark cache -#[derive(Serialize, Deserialize, Debug)] -struct CacheEntry { - lib_hash: u64, - mod_time: SystemTime, - value: Candidates, -} - -/// Gets/retrieves candidate container types for primrose files. -/// This caches results, and invalidates them when the file changes. -pub struct CandidatesStore { - paths: Paths, - store: FileCache, - lib_hash: u64, -} - -impl CandidatesStore { - /// Create a new store, using the given paths. - /// Benchmarks are cached in `paths.target_dir / candelabra / primrose_results` - pub fn new(paths: &Paths) -> Result { - let base_dir = paths.target_dir.join("candelabra").join("primrose_results"); - - let lib_hash = - gen_tree_hash(&paths.library_crate).context("Error generating library hash")?; - - debug!("Initialised candidate cacher with hash {}", lib_hash); - - Ok(Self { - store: FileCache::new(base_dir)?, - paths: paths.clone(), - lib_hash, - }) - } - - /// Get benchmark results for the given type, using cached results if possible and persisting the results for later. - /// Will panic if `name` is not in library specs. - pub fn get(&self, src: &Utf8Path) -> Result { - if let Some(results) = self.find(src)? { - debug!("Cache hit for {} candidates", src); - Ok(results) - } else { - debug!("Cache miss for {} candidates", src); - let selector = ContainerSelector::from_path( - src.as_std_path(), - self.paths.library_src.as_std_path(), - MODEL_SIZE, - ) - .with_context(|| format!("error getting container selector for {}", src))?; - - let candidates = selector - .find_all_candidates()? - .into_iter() - .map(|(k, v)| (k.to_string(), v)) - .collect(); - - if let Err(e) = self.put(src, &candidates) { - warn!("Error caching candidates for {}: {}", src, e); - } - Ok(candidates) - } - } - - /// Attempt to find an up-to-date set of results with the given key - fn find(&self, src: &Utf8Path) -> Result> { - let mod_time = metadata(src)?.modified()?; - Ok(self - .store - .find(src)? - .filter(|e| e.lib_hash == self.lib_hash && e.mod_time == mod_time) - .map(|e| e.value)) - } - - /// Store a new set of results with the given key - fn put(&self, src: &Utf8Path, results: &Candidates) -> Result<()> { - let mod_time = metadata(src)?.modified()?; - self.store.put( - src, - &CacheEntry { - lib_hash: self.lib_hash, - value: results.clone(), - mod_time, - }, - ) - } -} - -pub type ProjectCandidateList = Vec<(Utf8PathBuf, Vec<(ConTypeName, Vec)>)>; - -impl State { - /// Run primrose on all files in the given project. - /// Returns a list of all candidates for each container type in each file. - pub fn get_all_candidates(&self, project: &Project) -> Result { - let mut all_candidates = Vec::new(); - for file in project.find_primrose_files()? { - let result = self.candidates.get(&file)?; - - let mut typs = Vec::new(); - for (con_type_id, candidates) in result { - typs.push((con_type_id.clone(), candidates)); - } - all_candidates.push((file, typs)); - } - - Ok(all_candidates) - } -} diff --git a/src/crates/cli/src/cmd.rs b/src/crates/cli/src/cmd.rs deleted file mode 100644 index 7f9857d..0000000 --- a/src/crates/cli/src/cmd.rs +++ /dev/null @@ -1,39 +0,0 @@ -use argh::FromArgs; - -#[derive(FromArgs)] -/// Find the best performing container type using primrose -pub struct Args { - /// path to Cargo.toml - #[argh(option)] - pub manifest_path: Option, - - /// project to run on, if in a workspace - #[argh(option, short = 'p')] - pub project: Option, - - #[argh(subcommand)] - pub cmd: Subcommand, -} - -#[derive(FromArgs)] -#[argh(subcommand)] -pub enum Subcommand { - Model(ModelSubcommand), - Candidates(CandidatesSubcommand), - Profile(ProfileSubcommand), -} - -#[derive(FromArgs)] -/// Show the cost model for the given implementation -#[argh(subcommand, name = "cost-model")] -pub struct ModelSubcommand {} - -#[derive(FromArgs)] -/// Show the candidate types selected by primrose -#[argh(subcommand, name = "candidates")] -pub struct CandidatesSubcommand {} - -#[derive(FromArgs)] -/// Show the profiling information generated from benchmarks -#[argh(subcommand, name = "profile")] -pub struct ProfileSubcommand {} diff --git a/src/crates/cli/src/cost/benchmark.rs b/src/crates/cli/src/cost/benchmark.rs deleted file mode 100644 index a1e0e18..0000000 --- a/src/crates/cli/src/cost/benchmark.rs +++ /dev/null @@ -1,164 +0,0 @@ -//! Benchmarking of container types - -use std::{ - collections::HashMap, - fs::{copy, create_dir, File}, - io::Write, - process::Command, - time::Duration, -}; - -use anyhow::{bail, Context, Result}; -use log::{debug, log_enabled, Level}; -use primrose::{LibSpec, LibSpecs}; -use serde::{Deserialize, Serialize}; -use tempfile::{tempdir, TempDir}; - -use crate::paths::Paths; - -/// The name of the element type we use for benchmarking -pub const ELEM_TYPE: &str = "usize"; - -/// String representation of the array of N values we use for benchmarking -pub const NS: &str = "[8, 256, 1024, 65536]"; - -/// Results for a whole suite of benchmarks -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct Results { - /// Results for each collection operation - pub by_op: HashMap>, -} - -/// Name of an operation -pub type OpName = String; - -/// The first key in the tuple is the `n` of the container before the benchmark was taken, and the second the results of the benchmark. -pub type Observation = (usize, BenchmarkResult); - -/// Results for a single benchmark -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct BenchmarkResult { - /// Number of times the benchmark was run - pub times: usize, - - /// The minimum time taken - pub min: Duration, - - /// The maximum time taken - pub max: Duration, - - /// The average (mean) time taken - pub avg: Duration, -} - -/// Run benchmarks for the given container type, returning the results. -/// Panics if the given name is not in the library specs. -pub fn run_benchmarks(name: &str, paths: &Paths, lib_specs: &LibSpecs) -> Result { - let lib_spec = lib_specs - .get(name) - .expect("name passed to benchmarkspec not in libspecs"); - - // Generate crate & source - let crate_ = prepare_crate(name, paths, lib_spec)?; - - // Build and run - debug!("Building and running benchmarks for {}", name); - let run_output = Command::new("cargo") - .args(["run", "--release", "--", "--bench"]) - .current_dir(crate_.path()) - .env("CARGO_TARGET_DIR", &paths.target_dir) // Share target directory - .output() - .context("Error running build command")?; - - if !run_output.status.success() { - bail!("Error result from benchmark. Output: {:?}", run_output); - } - - if log_enabled!(Level::Debug) { - if let Ok(stdout) = String::from_utf8(run_output.stdout.clone()) { - debug!("stdout: {:?}", stdout); - } - if let Ok(stderr) = String::from_utf8(run_output.stderr.clone()) { - debug!("stderr: {:?}", stderr); - } - } - - // Deserialise benchmark results - todo!() -} - -fn prepare_crate(name: &str, paths: &Paths, lib_spec: &LibSpec) -> Result { - // Directory we will create the crate in - let crate_tempdir = tempdir()?; - let crate_dir = crate_tempdir.path(); - debug!("Preparing benchmark crate for {} in {:?}", name, crate_dir); - - // Write the manifest - let mut manifest = - File::create(crate_dir.join("Cargo.toml")).context("Error creating Cargo.toml")?; - manifest - .write_all( - format!( - " -[package] -name = \"bench\" -version = \"0.1.0\" -edition = \"2021\" - -[dependencies] -candelabra-benchmarker = {{ path = \"{}\" }} -primrose-library = {{ path = \"{}\" }} -", - paths.benchmarker_crate, paths.library_crate, - ) - .as_bytes(), - ) - .context("Error writing Cargo.toml")?; - - // Ensure we use the right toolchain - let orig_toolchain_file = paths.base.join("rust-toolchain.toml"); - copy(orig_toolchain_file, crate_dir.join("rust-toolchain.toml")) - .context("Error writing rust-toolchain.toml")?; - - // Generate the code for running our benchmarks - let mut benchmark_statements = String::new(); - - // Add benchmarks for implemented traits - let implemented_traits = lib_spec.interface_provide_map.keys(); - for tr in implemented_traits { - benchmark_statements += &format!( - "candelabra_benchmarker::benchmark_{}::<{}<{}>, _>(c, &NS);", - tr.to_lowercase(), - name, - ELEM_TYPE, - ); - } - - // Write the benchmarking source, using our generated benchmarker code. - let src_dir = crate_dir.join("src"); - create_dir(&src_dir).context("Error creating src directory")?; - - let mut src_file = File::create(src_dir.join("main.rs")).context("Error creating main.rs")?; - src_file - .write_all( - format!( - " -use candelabra_benchmarker::criterion::{{criterion_group, criterion_main, Criterion}}; - -const NS: &[usize] = &{}; - -fn run_benches(c: &mut Criterion) {{ - {} -}} - -criterion_group!(benches, run_benches); -criterion_main!(benches); -", - NS, benchmark_statements - ) - .as_bytes(), - ) - .context("Error writing to main.rs")?; - - Ok(crate_tempdir) -} diff --git a/src/crates/cli/src/cost/fit.rs b/src/crates/cli/src/cost/fit.rs deleted file mode 100644 index f4372f1..0000000 --- a/src/crates/cli/src/cost/fit.rs +++ /dev/null @@ -1,79 +0,0 @@ -//! Fitting a 3rd-order polynomial to benchmark results -//! Based on code from al-jshen: https://github.com/al-jshen/compute/tree/master - -use super::benchmark::Observation; -use na::{Dyn, MatrixXx4, OVector}; -use serde::{Deserialize, Serialize}; - -/// Estimates durations using a 3rd-order polynomial. -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct Estimator([f64; 4]); - -/// Approximate cost of an action. -/// This is an approximation for the number of nanoseconds it would take. -pub type Cost = f64; - -impl Estimator { - /// Fit from the given set of observations, using the least squared method. - pub fn fit(results: &[Observation]) -> Self { - let (xs, ys) = Self::to_data(results); - - let xv = vandermonde(&xs); - let xtx = xv.transpose() * xv.clone(); - let xtxinv = xtx.try_inverse().unwrap(); - let xty = xv.transpose() * ys; - let coeffs = xtxinv * xty; - - Self(coeffs.into()) - } - - /// Calculate the residual sum of squares for the given data. - pub fn rss(&self, results: &[Observation]) -> f64 { - // TODO: there's a more efficient way to do this / bulk esimations - let (xs, ys) = Self::to_data(results); - - xs.iter() - .zip(ys.iter()) - .map(|(x, y)| (y - self.estimatef(*x)).powi(2)) - .sum() - } - - /// Estimate the cost of a given operation at the given `n`. - pub fn estimate(&self, n: usize) -> Cost { - self.estimatef(n as f64) - } - - /// Estimate the cost of a given operation at the given `n`. - pub fn estimatef(&self, n: f64) -> Cost { - let [a, b, c, d] = self.0; - a + b * n + c * n.powi(2) + d * n.powi(3) - } - - /// Convert a list of observations to the format we use internally. - fn to_data(results: &[Observation]) -> (Vec, OVector) { - let xs = results.iter().map(|(n, _)| *n as f64).collect::>(); - let ys = OVector::::from_iterator( - results.len(), - results - .iter() - .map(|(_, results)| results.avg.as_nanos() as f64), - ); - - (xs, ys) - } -} - -/// Calculate a Vandermode matrix with 4 columns. -/// https://en.wikipedia.org/wiki/Vandermonde_matrix -fn vandermonde(xs: &[f64]) -> MatrixXx4 { - let mut mat = MatrixXx4::repeat(xs.len(), 1.0); - - for (row, x) in xs.iter().enumerate() { - // First column is all 1s so skip - for col in 1..=3 { - mat[(row, col)] = x.powi(col as i32); - } - } - - mat -} diff --git a/src/crates/cli/src/cost/mod.rs b/src/crates/cli/src/cost/mod.rs deleted file mode 100644 index f3cad13..0000000 --- a/src/crates/cli/src/cost/mod.rs +++ /dev/null @@ -1,121 +0,0 @@ -//! Generating, caching, and using cost models -pub mod benchmark; -pub mod fit; - -use std::collections::HashMap; - -use anyhow::{anyhow, Context, Result}; - -use benchmark::Results; -use log::{debug, warn}; -use primrose::{LibSpec, LibSpecs}; -use serde::{Deserialize, Serialize}; - -use self::fit::Estimator; -use crate::{ - cache::{gen_tree_hash, FileCache}, - cost::benchmark::run_benchmarks, - paths::Paths, -}; - -/// Cost model for a container, capable of estimating cost of each supported operation. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CostModel { - by_op: HashMap, -} - -/// Entry in the benchmark cache -#[derive(Serialize, Deserialize)] -struct CacheEntry { - lib_hash: u64, - model: CostModel, - results: Results, -} - -/// Gets/retrieves benchmark results for container implementations. -/// This caches results, and invalidates them when the library or parameters change. -pub struct ResultsStore { - paths: Paths, - store: FileCache, - lib_specs: LibSpecs, - lib_hash: u64, -} - -impl ResultsStore { - /// Create a new store, using the given paths. - /// Benchmarks are cached in `paths.target_dir / candelabra / benchmark_results` - pub fn new(paths: &Paths) -> Result { - let lib_specs = - LibSpec::read_all(paths.library_src.as_std_path()).map_err(|e| anyhow!("{}", e))?; - - // TODO: this should be home folder or smth - let base_dir = paths - .target_dir - .join("candelabra") - .join("benchmark_results"); - - // TODO: Doesn't take NS or ELEM_TYPE into account - let lib_hash = - gen_tree_hash(&paths.library_crate).context("Error generating library hash")?; - - debug!("Initialised benchmark cacher with hash {}", lib_hash); - - Ok(Self { - store: FileCache::new(base_dir)?, - paths: paths.clone(), - lib_specs, - lib_hash, - }) - } - - /// Get benchmark results for the given type, using cached results if possible and persisting the results for later. - /// Will panic if `name` is not in library specs. - pub fn get(&self, name: &str) -> Result { - if let Some(results) = self.find(name)? { - debug!("Cache hit for {} benchmarks", name); - Ok(results) - } else { - debug!("Cache miss for {} benchmarks", name); - let results = run_benchmarks(name, &self.paths, &self.lib_specs)?; - let model = build_cost_model(results.clone())?; - if let Err(e) = self.put(name, &model, &results) { - warn!("Error caching benchmark outputs for {}: {}", name, e); - } - Ok(model) - } - } - - /// Attempt to find an up-to-date set of results with the given key - fn find(&self, name: &str) -> Result> { - Ok(self - .store - .find(name)? - .filter(|e| e.lib_hash == self.lib_hash) - .map(|e| e.model)) - } - - /// Store a new set of results with the given key - fn put(&self, name: &str, model: &CostModel, results: &Results) -> Result<()> { - self.store.put( - name, - &CacheEntry { - lib_hash: self.lib_hash, - model: model.clone(), - results: results.clone(), - }, - ) - } -} - -fn build_cost_model(results: Results) -> Result { - Ok(CostModel { - by_op: results - .by_op - .into_iter() - .map(|(op, os)| { - debug!("Fitting op {} with {} observations", op, os.len()); - (op, Estimator::fit(&os)) - }) - .collect(), - }) -} diff --git a/src/crates/cli/src/main.rs b/src/crates/cli/src/main.rs index 8827084..5bd0f34 100644 --- a/src/crates/cli/src/main.rs +++ b/src/crates/cli/src/main.rs @@ -1,115 +1,32 @@ -use anyhow::{anyhow, Context, Result}; -use candidates::CandidatesStore; -use cmd::{CandidatesSubcommand, ModelSubcommand, ProfileSubcommand}; -use cost::ResultsStore; -use log::info; -use polars::prelude::*; -use project::Project; - -use crate::{ - cmd::{Args, Subcommand}, - paths::Paths, -}; - -extern crate nalgebra as na; - -mod cache; -mod candidates; -mod cmd; -mod cost; -mod paths; -mod profiler; -mod project; - -fn main() -> Result<()> { - env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); - - let args: Args = argh::from_env(); - - // Build shared state - let paths = Paths::default(); - info!("Using source dir: {:?}", &paths.base); - let state = State { - candidates: CandidatesStore::new(&paths).context("error creating candidate store")?, - results: ResultsStore::new(&paths).context("error creating result store")?, - paths, - model_size: 3, // TODO - }; - - let projects = get_projects(&args).context("failed to find project paths")?; - match args.cmd { - Subcommand::Model(c) => state.cmd_model(projects, c), - Subcommand::Candidates(c) => state.cmd_candidates(projects, c), - Subcommand::Profile(c) => state.cmd_profile(projects, c), - } +fn main() { + println!("Hello, world!"); } -/// Shared state for program execution -pub struct State { - /// Paths used throughout execution - paths: Paths, - - /// Candidate types for primrose files & annotations - candidates: CandidatesStore, - - /// Results and cost models - results: ResultsStore, - - /// The model size used for primrose operations - model_size: usize, -} - -impl State { - pub fn cmd_model(&self, projects: Vec, c: ModelSubcommand) -> Result<()> { - todo!() - } - - pub fn cmd_candidates(&self, projects: Vec, c: CandidatesSubcommand) -> Result<()> { - todo!() - } - - pub fn cmd_profile(&self, projects: Vec, c: ProfileSubcommand) -> Result<()> { - for project in projects { - info!("Profiling project {}", project.name); - let inf = self - .profile_all(&project) - .with_context(|| format!("Error profiling project {}", project.name))?; - - // TODO: More useful output - info!("{:?}", inf); - info!("{} samples", inf.shape().0); - info!("{:?}", inf.describe(None)); - } - - Ok(()) - } -} - -fn get_projects(args: &Args) -> Result> { - let mut cmd = cargo_metadata::MetadataCommand::new(); - if let Some(p) = &args.manifest_path { - cmd.manifest_path(p); - } - - let metadata = cmd.exec().context("failed to get manifest metadata")?; - - if let Some(p) = &args.project { - // Select a specific project - Ok(vec![metadata - .packages - .iter() - .find(|pkg| pkg.name == *p) - .map(|pkg| Project::new(pkg.clone())) - .ok_or_else(|| { - anyhow!("specified project does not exist") - })?]) - } else { - // Default to all workspace members - Ok(metadata - .workspace_members - .iter() - .flat_map(|member| metadata.packages.iter().find(|pkg| pkg.id == *member)) - .map(|pkg| Project::new(pkg.clone())) - .collect()) - } -} +// fn get_projects(args: &Args) -> Result> { +// let mut cmd = cargo_metadata::MetadataCommand::new(); +// if let Some(p) = &args.manifest_path { +// cmd.manifest_path(p); +// } + +// let metadata = cmd.exec().context("failed to get manifest metadata")?; + +// if let Some(p) = &args.project { +// // Select a specific project +// Ok(vec![metadata +// .packages +// .iter() +// .find(|pkg| pkg.name == *p) +// .map(|pkg| Project::new(pkg.clone())) +// .ok_or_else(|| { +// anyhow!("specified project does not exist") +// })?]) +// } else { +// // Default to all workspace members +// Ok(metadata +// .workspace_members +// .iter() +// .flat_map(|member| metadata.packages.iter().find(|pkg| pkg.id == *member)) +// .map(|pkg| Project::new(pkg.clone())) +// .collect()) +// } +// } diff --git a/src/crates/cli/src/paths.rs b/src/crates/cli/src/paths.rs deleted file mode 100644 index 2b44400..0000000 --- a/src/crates/cli/src/paths.rs +++ /dev/null @@ -1,47 +0,0 @@ -use std::{env, path::PathBuf}; - -use camino::Utf8PathBuf; - -#[derive(Debug, Clone)] -pub struct Paths { - pub base: Utf8PathBuf, - pub library_crate: Utf8PathBuf, - pub library_src: Utf8PathBuf, - pub benchmarker_crate: Utf8PathBuf, - pub target_dir: Utf8PathBuf, -} - -impl Paths { - fn from_base(base: Utf8PathBuf) -> Self { - Paths { - library_crate: base.join("crates").join("library"), - library_src: base.join("crates").join("library").join("src"), - benchmarker_crate: base.join("crates").join("benchmarker"), - target_dir: base.join("target"), - base, - } - } -} - -impl Default for Paths { - fn default() -> Self { - let path = if let Ok(var) = env::var("CANDELABRA_SRC_DIR") { - var.into() - } else { - // Most the time this won't work, but it's worth a shot. - let mut path = PathBuf::from(file!()); - path.pop(); // main.rs - path.pop(); // src - path.pop(); // candelabra-cli - path.pop(); // crates - if path.components().count() == 0 { - path.push("."); - } - path - }; - - Paths::from_base(path.canonicalize().expect( - "candelabra source directory not found. please specify it with CANDELABRA_SRC_DIR", - ).try_into().expect("candelabra source directory has non-utf8 components in it (???)")) - } -} diff --git a/src/crates/cli/src/profiler/mod.rs b/src/crates/cli/src/profiler/mod.rs deleted file mode 100644 index 24ae544..0000000 --- a/src/crates/cli/src/profiler/mod.rs +++ /dev/null @@ -1,121 +0,0 @@ -use anyhow::{anyhow, bail, Context, Result}; -use camino::Utf8Path; -use log::{debug, trace}; -use polars::prelude::*; -use primrose::ContainerSelector; -use std::io::Write; -use std::str::FromStr; -use std::{ - fs::{read_dir, File}, - io::Read, - process::{Command, Stdio}, -}; -use tempfile::tempdir; - -use crate::project::Project; -use crate::State; - -pub type ProfilerInfo = DataFrame; - -impl State { - /// Profile all benchmarks for the given project - pub fn profile_all(&self, project: &Project) -> Result { - self.prepare_for_profiling(project)?; - project - .benchmarks - .iter() - .map(|name| { - self.profile_benchmark(project, name) - .with_context(|| format!("Error profiling benchmark {}", name)) - }) - .reduce(|acc, df| acc?.vstack(&df?).map_err(Into::into)) - .ok_or(anyhow!("nothing to run or types are not used"))? - } - - fn prepare_for_profiling(&self, project: &Project) -> Result<()> { - for (file, candidates) in self.get_all_candidates(project)? { - self.prepare_file(&file, &candidates) - .with_context(|| format!("error preparing {} for profiling", file))?; - } - - Ok(()) - } - - fn prepare_file(&self, file: &Utf8Path, candidates: &[(String, Vec)]) -> Result<()> { - debug!("Setting up {} for profiling", file); - - let selector = ContainerSelector::from_path( - file.as_std_path(), - self.paths.library_src.as_std_path(), - self.model_size, - ) - .context("error creating container selector")?; - - let chosen = candidates - .iter() - .map(|(dest_name, impls)| (dest_name, &impls[0])) - .collect::>(); - - let new_code = selector.gen_profiling_file(chosen.iter().map(|(d, c)| (*d, c.as_str()))); - - let new_path = file.to_string().replace(".pr", ""); - - trace!("New code: {}", new_code); - trace!("New path: {}", new_path); - - let mut f = File::create(new_path).context("error creating new source file")?; - f.write_all(new_code.as_bytes()) - .context("error writing new code")?; - - Ok(()) - } - - fn profile_benchmark(&self, project: &Project, name: &str) -> Result { - let profiler_out_dir = tempdir()?; - debug!( - "Running benchmark {} with out dir {:?}", - name, profiler_out_dir - ); - - let output = Command::new("cargo") - .current_dir(&project.source_dir) - .args(["bench", "--bench", name]) - .env("PROFILER_OUT_DIR", profiler_out_dir.as_ref()) // Where profiler info gets outputted - .stderr(Stdio::inherit()) - .stdout(Stdio::inherit()) - .output()?; - - if !output.status.success() { - bail!("Error running benchmark"); - } - - let mut info = ProfilerInfo::default(); - for file in read_dir(&profiler_out_dir)? { - let file = file?; - let mut contents = String::new(); - File::open(file.path())?.read_to_string(&mut contents)?; - - info = info.vstack(&parse_output(&contents)?)?; - } - - Ok(info) - } -} - -fn parse_output(contents: &str) -> Result { - let mut lines = contents.lines().map(i32::from_str); - let missing_line_err = || anyhow!("wrong number of lines in "); - - Ok(df!( - "n" => &[lines.next().ok_or_else(missing_line_err)??], - "contains" => &[lines.next().ok_or_else(missing_line_err)??], - "insert" => &[lines.next().ok_or_else(missing_line_err)??], - "clear" => &[lines.next().ok_or_else(missing_line_err)??], - "remove" => &[lines.next().ok_or_else(missing_line_err)??], - "first" => &[lines.next().ok_or_else(missing_line_err)??], - "last" => &[lines.next().ok_or_else(missing_line_err)??], - "nth" => &[lines.next().ok_or_else(missing_line_err)??], - "push" => &[lines.next().ok_or_else(missing_line_err)??], - "pop" => &[lines.next().ok_or_else(missing_line_err)??], - )?) -} diff --git a/src/crates/cli/src/project.rs b/src/crates/cli/src/project.rs deleted file mode 100644 index cc8b4a2..0000000 --- a/src/crates/cli/src/project.rs +++ /dev/null @@ -1,35 +0,0 @@ -use anyhow::{Context, Result}; -use cargo_metadata::{camino::Utf8PathBuf, Package, Target}; -use glob::glob; - -/// A single package or crate that we wish to process. -#[derive(Debug, Clone)] -pub struct Project { - pub name: String, - pub benchmarks: Vec, - pub source_dir: Utf8PathBuf, -} - -impl Project { - pub fn new(package: Package) -> Self { - Project { - name: package.name.clone(), - source_dir: package.manifest_path.parent().unwrap().to_path_buf(), - benchmarks: package - .targets - .into_iter() - .filter(Target::is_bench) - .map(|t| t.name) - .collect(), - } - } - - /// Find all primrose files (`.pr.rs`) in this project. - pub fn find_primrose_files(&self) -> Result> { - glob(&format!("{}/**/*.pr.rs", self.source_dir)) - .unwrap() - .flat_map(|p| p.map(|p| p.try_into())) - .collect::, _>>() - .context("error finding primrose files in project") - } -} -- cgit v1.2.3