feat(cli): caching of cost models

author: Aria <me@aria.rip> 2023-11-11 17:17:01 +0000
committer: Aria <me@aria.rip> 2023-11-15 20:24:46 +0000
commit: e2bbf5e43ff548808c25d3edf8b9254197a7af06 (patch)
tree: d61569ef9b86812904a3f2b2ff7766357ac6d8ed /src
parent: edafbd35b2296d896eeab9d2f2ae7b682c440702 (diff)
3 files changed, 47 insertions, 22 deletions
diff --git a/src/crates/cli/src/cache.rs b/src/crates/cli/src/cache.rs
index 598cad1..4775a0f 100644
--- a/src/crates/cli/src/cache.rs
+++ b/src/crates/cli/src/cache.rs
@@ -15,12 +15,12 @@ use serde_json::{from_reader, to_writer};
 
 /// A filesystem-based K/V cache
 /// This doesn't deal with key invalidation or anything, just the filesystem/serialisation stuff
-pub struct FileCache<K: 'static + ?Sized, V> {
+pub struct FileCache<K: 'static + ?Sized, V, VR = V> {
     base_dir: Utf8PathBuf,
-    _data: PhantomData<(&'static K, V)>,
+    _data: PhantomData<(&'static K, V, VR)>,
 }
 
-impl<K: ?Sized + ToString, V: Serialize + for<'a> Deserialize<'a>> FileCache<K, V> {
+impl<K: ?Sized + ToString, V: for<'a> Deserialize<'a>, VR: Serialize> FileCache<K, V, VR> {
     /// Create a new file store in the given directory.
     pub fn new(base_dir: Utf8PathBuf) -> Result<Self> {
         create_dir_all(base_dir.as_std_path()).context("Error creating cache directory")?;
@@ -31,7 +31,7 @@ impl<K: ?Sized + ToString, V: Serialize + for<'a> Deserialize<'a>> FileCache<K,
     }
 
     /// Store the given value with the given `key`
-    pub fn put(&self, key: &K, value: &V) -> Result<()> {
+    pub fn put(&self, key: &K, value: &VR) -> Result<()> {
         let path = self.path_for(key);
         let mut file = File::create(path)?;
         to_writer(&mut file, value)?;
diff --git a/src/crates/cli/src/cost/mod.rs b/src/crates/cli/src/cost/mod.rs
index c407d9e..7f8d473 100644
--- a/src/crates/cli/src/cost/mod.rs
+++ b/src/crates/cli/src/cost/mod.rs
@@ -2,10 +2,11 @@
 pub mod benchmark;
 pub mod fit;
 
+use std::collections::HashMap;
+
 use anyhow::{anyhow, Context, Result};
 
 use candelabra_benchmarker::Results;
-
 use log::{debug, warn};
 use primrose::{LibSpec, LibSpecs};
 use serde::{Deserialize, Serialize};
@@ -16,18 +17,33 @@ use crate::{
     paths::Paths,
 };
 
+use self::fit::Estimator;
+
+/// Cost model for a container, capable of estimating cost of each supported operation.
+#[derive(Serialize, Deserialize)]
+pub struct CostModel {
+    by_op: HashMap<String, Estimator>,
+}
+
 /// Entry in the benchmark cache
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Deserialize)]
 struct CacheEntry {
     lib_hash: u64,
-    value: Results,
+    model: CostModel,
+}
+
+/// Entry in the benchmark cache, but borrowing the cost model so we don't need to clone it
+#[derive(Serialize)]
+struct CacheEntryBorrowed<'a> {
+    lib_hash: u64,
+    model: &'a CostModel,
 }
 
 /// Gets/retrieves benchmark results for container implementations.
 /// This caches results, and invalidates them when the library or parameters change.
 pub struct ResultsStore<'a> {
     paths: &'a Paths,
-    store: FileCache<str, CacheEntry>,
+    store: FileCache<str, CacheEntry, CacheEntryBorrowed<'a>>,
     lib_specs: LibSpecs,
     lib_hash: u64,
 }
@@ -61,37 +77,51 @@ impl<'a> ResultsStore<'a> {
 
     /// Get benchmark results for the given type, using cached results if possible and persisting the results for later.
     /// Will panic if `name` is not in library specs.
-    pub fn get(&self, name: &str) -> Result<Results> {
+    pub fn get(&self, name: &str) -> Result<CostModel> {
         if let Some(results) = self.find(name)? {
             debug!("Cache hit for {} benchmarks", name);
             Ok(results)
         } else {
             debug!("Cache miss for {} benchmarks", name);
             let results = run_benchmarks(name, self.paths, &self.lib_specs)?;
-            if let Err(e) = self.put(name, &results) {
+            let model = build_cost_model(results)?;
+            if let Err(e) = self.put(name, &model) {
                 warn!("Error caching benchmark outputs for {}: {}", name, e);
             }
-            Ok(results)
+            Ok(model)
         }
     }
 
     /// Attempt to find an up-to-date set of results with the given key
-    fn find(&self, name: &str) -> Result<Option<Results>> {
+    fn find(&self, name: &str) -> Result<Option<CostModel>> {
         Ok(self
             .store
             .find(name)?
             .filter(|e| e.lib_hash == self.lib_hash)
-            .map(|e| e.value))
+            .map(|e| e.model))
     }
 
     /// Store a new set of results with the given key
-    fn put(&self, name: &str, results: &Results) -> Result<()> {
+    fn put(&self, name: &str, model: &CostModel) -> Result<()> {
         self.store.put(
             name,
-            &CacheEntry {
+            &CacheEntryBorrowed {
                 lib_hash: self.lib_hash,
-                value: results.clone(),
+                model,
             },
         )
     }
 }
+
+fn build_cost_model(results: Results) -> Result<CostModel> {
+    Ok(CostModel {
+        by_op: results
+            .by_op
+            .into_iter()
+            .map(|(op, os)| {
+                debug!("Fitting op {} with {} observations", op, os.len());
+                (op, fit::fit(&os))
+            })
+            .collect(),
+    })
+}
diff --git a/src/crates/cli/src/main.rs b/src/crates/cli/src/main.rs
index b674ac1..42bb63d 100644
--- a/src/crates/cli/src/main.rs
+++ b/src/crates/cli/src/main.rs
@@ -60,12 +60,7 @@ fn main() -> Result<()> {
 
     info!("Found all candidate types. Running benchmarks");
     for typ in seen_types.into_iter() {
-        let results = benchmarks.get(&typ).context("Error running benchmark")?;
-
-        for (op, results) in results.by_op.iter() {
-            debug!("Fitting curve for op {}", op);
-            fit(results);
-        }
+        let results = benchmarks.get(&typ).context("Error building cost model")?;
     }
 
     Ok(())
author	Aria <me@aria.rip>	2023-11-11 17:17:01 +0000
committer	Aria <me@aria.rip>	2023-11-15 20:24:46 +0000
commit	e2bbf5e43ff548808c25d3edf8b9254197a7af06 (patch)
tree	d61569ef9b86812904a3f2b2ff7766357ac6d8ed /src
parent	edafbd35b2296d896eeab9d2f2ae7b682c440702 (diff)