From 4181475b3e68519ae330fa513425acb7f8c44cff Mon Sep 17 00:00:00 2001
From: Aria Shrimpton <me@aria.rip>
Date: Fri, 19 Jan 2024 21:37:13 +0000
Subject: feat(cli): show normalised root mean square error of cost model

---
 src/crates/candelabra/src/cost/benchmark.rs |   9 +--
 src/crates/candelabra/src/cost/fit.rs       | 102 +++-------------------------
 src/crates/candelabra/src/cost/mod.rs       |  10 ++-
 src/crates/cli/src/model.rs                 |  14 ++--
 4 files changed, 32 insertions(+), 103 deletions(-)

(limited to 'src')
diff --git a/src/crates/candelabra/src/cost/benchmark.rs b/src/crates/candelabra/src/cost/benchmark.rs
index 9c7266d..65addd2 100644
--- a/src/crates/candelabra/src/cost/benchmark.rs
+++ b/src/crates/candelabra/src/cost/benchmark.rs
@@ -100,9 +100,9 @@ pub fn run_benchmarks(name: &str, paths: &Paths, lib_specs: &LibSpecs) -> Result
         .flat_map(|(name, timings)| {
             let mut timings = timings
                 .trim()
-                .strip_prefix("[")?
-                .strip_suffix("]")?
-                .split(" ");
+                .strip_prefix('[')?
+                .strip_suffix(']')?
+                .split(' ');
 
             let result = BenchmarkResult {
                 min: parse_time_str(timings.next()?, timings.next()?)?,
@@ -110,7 +110,7 @@ pub fn run_benchmarks(name: &str, paths: &Paths, lib_specs: &LibSpecs) -> Result
                 max: parse_time_str(timings.next()?, timings.next()?)?,
             };
 
-            let (op, n) = name.trim().split_once("/")?;
+            let (op, n) = name.trim().split_once('/')?;
 
             Some((op, usize::from_str(n).ok()?, result))
         });
@@ -133,6 +133,7 @@ fn parse_time_str(quantity: &str, suffix: &str) -> Option<Duration> {
                 "ms" => 1e-3,
                 "µs" => 1e-6,
                 "ns" => 1e-9,
+                "ps" => 1e-12,
                 _ => todo!(),
             },
     ))
diff --git a/src/crates/candelabra/src/cost/fit.rs b/src/crates/candelabra/src/cost/fit.rs
index 57bee78..b5f737e 100644
--- a/src/crates/candelabra/src/cost/fit.rs
+++ b/src/crates/candelabra/src/cost/fit.rs
@@ -1,8 +1,6 @@
 //! Fitting a 3rd-order polynomial to benchmark results
 //! Based on code from al-jshen: <https://github.com/al-jshen/compute/tree/master>
 
-use std::cmp;
-
 use super::benchmark::Observation;
 use na::{Dyn, MatrixXx4, OVector};
 use serde::{Deserialize, Serialize};
@@ -50,7 +48,7 @@ impl Estimator {
         }
     }
 
-    pub fn normalisation_transformation<'a, I>(is: I) -> (f64, f64)
+    pub fn normalisation_transformation<'a, I>(_: I) -> (f64, f64)
     where
         I: Iterator<Item = &'a f64>,
     {
@@ -67,14 +65,18 @@ impl Estimator {
         (0.0, 1.0)
     }
 
-    /// Get the mean squared error with respect to some data points
-    pub fn mse(&self, results: &[Observation]) {
+    /// Get the normalised root mean square error with respect to some data points
+    pub fn nrmse(&self, results: &[Observation]) -> f64 {
         let (xs, ys) = Self::to_data(results);
-        xs.iter()
+        let mean = ys.sum() / xs.len() as f64;
+        let mse: f64 = xs
+            .iter()
             .zip(ys.iter())
-            .map(|(x, y)| (y - self.estimatef(y)).powi(2))
-            .sum()
-            / xs.len()
+            .map(|(x, y)| (y - self.estimatef(*x)).powi(2))
+            .sum::<f64>()
+            / xs.len() as f64;
+
+        mse.sqrt() / mean
     }
 
     /// Estimate the cost of a given operation at the given `n`.
@@ -118,85 +120,3 @@ fn vandermonde(xs: &[f64]) -> MatrixXx4<f64> {
 
     mat
 }
-
-#[cfg(test)]
-mod tests {
-    use std::time::Duration;
-
-    use crate::cost::{benchmark::Observation, BenchmarkResult, Estimator};
-
-    const EPSILON: f64 = 0.1e-3;
-
-    fn create_observations(points: &[(usize, u64)]) -> Vec<Observation> {
-        points
-            .iter()
-            .map(|(n, p)| {
-                (
-                    *n,
-                    BenchmarkResult {
-                        min: Duration::from_nanos(*p),
-                        max: Duration::from_nanos(*p),
-                        avg: Duration::from_nanos(*p),
-                    },
-                )
-            })
-            .collect()
-    }
-
-    fn assert_close_fit(points: &[(usize, u64)], msg: &'static str) {
-        let data = create_observations(points);
-        let estimator = Estimator::fit(&data);
-        let mse = estimator.mse(&data);
-        dbg!(&estimator, mse);
-
-        assert!(rss.abs() < EPSILON, "{} has too high mse", msg);
-    }
-
-    #[test]
-    fn test_fit_basic() {
-        assert_close_fit(&[(1, 1), (2, 1), (3, 1), (4, 1)], "constant");
-        assert_close_fit(&[(1, 1), (2, 2), (3, 3), (4, 4)], "straight line");
-        assert_close_fit(&[(1, 1), (2, 4), (3, 9), (4, 16)], "square");
-        assert_close_fit(&[(1, 1), (2, 8), (3, 27), (4, 64)], "cubic");
-    }
-
-    #[test]
-    fn test_fit_basic_largenum() {
-        assert_close_fit(
-            &[
-                (100_000, 100_000),
-                (200_000, 100_000),
-                (300_000, 100_000),
-                (400_000, 100_000),
-            ],
-            "constant",
-        );
-        assert_close_fit(
-            &[
-                (100_000, 100_000),
-                (200_000, 200_000),
-                (300_000, 300_000),
-                (400_000, 400_000),
-            ],
-            "straight line",
-        );
-        assert_close_fit(
-            &[
-                (100_000, 100_000),
-                (200_000, 400_000),
-                (300_000, 900_000),
-                (400_000, 1_600_000),
-            ],
-            "square",
-        );
-        assert_close_fit(
-            &[
-                (100_000, 100_000),
-                (200_000, 800_000),
-                (300_000, 2_700_000),
-                (400_000, 6_400_000),
-            ],
-            "cubic",
-        );
-    }
-}
diff --git a/src/crates/candelabra/src/cost/mod.rs b/src/crates/candelabra/src/cost/mod.rs
index 85adac6..99bdc7c 100644
--- a/src/crates/candelabra/src/cost/mod.rs
+++ b/src/crates/candelabra/src/cost/mod.rs
@@ -78,8 +78,14 @@ impl State {
     /// Get or calculate the cost model for the given type.
     /// Will panic if `name` is not in library specs.
     pub fn cost_model(&self, name: &str) -> Result<CostModel> {
+        Ok(self.cost_info(name)?.0)
+    }
+
+    /// Get information about the given type's cost, including raw benchmark results
+    /// Will panic if `name` is not in library specs.
+    pub fn cost_info(&self, name: &str) -> Result<(CostModel, Results)> {
         match self.results.store.find(name)? {
-            Some(x) => Ok(x.model),
+            Some(x) => Ok((x.model, x.results)),
             None => {
                 let (model, results) = self.calc_cost_model(name)?;
                 if let Err(e) = self.results.store.put(
@@ -92,7 +98,7 @@ impl State {
                 ) {
                     warn!("Error caching benchmark outputs for {}: {}", name, e);
                 }
-                Ok(model)
+                Ok((model, results))
             }
         }
     }
diff --git a/src/crates/cli/src/model.rs b/src/crates/cli/src/model.rs
index 42d9bf0..43c279f 100644
--- a/src/crates/cli/src/model.rs
+++ b/src/crates/cli/src/model.rs
@@ -17,18 +17,20 @@ pub struct Args {
 impl State {
     pub fn cmd_model(&self, args: Args) -> Result<()> {
         info!("Calculating cost model for {}", &args.name);
-        let model = self.inner.cost_model(&args.name)?;
+        let (model, results) = self.inner.cost_info(&args.name)?;
 
         // Table of parameters
         let mut builder = Builder::default();
-        builder.set_header(["op", "x^3", "x^2", "x", "+ c"]);
+        builder.set_header(["op", "x^0", "x^1", "x^2", "x^3", "nrmse"]);
         for (k, v) in model.by_op.iter() {
+            let obvs = results.by_op.get(k).unwrap();
             builder.push_record(&[
                 k.to_string(),
-                format!("{0:.5}", v.0[3]),
-                format!("{0:.5}", v.0[2]),
-                format!("{0:.5}", v.0[1]),
-                format!("{0:.5}", v.0[0]),
+                format!("{0:.5}", v.coeffs[3]),
+                format!("{0:.5}", v.coeffs[2]),
+                format!("{0:.5}", v.coeffs[1]),
+                format!("{0:.5}", v.coeffs[0]),
+                format!("{0:.5}", v.nrmse(obvs)),
             ]);
         }
 
-- 
cgit v1.2.3