2 files changed, 71 insertions, 28 deletions
diff --git a/src/crates/cli/Cargo.toml b/src/crates/cli/Cargo.toml
index 6b3ae92..5db5481 100644
--- a/src/crates/cli/Cargo.toml
+++ b/src/crates/cli/Cargo.toml
@@ -19,4 +19,4 @@ cargo_metadata = "0.18.1"
 argh = "0.1.12"
 glob = "0.3.1"
 tempfile = "3"
-friedrich = "0.5.0"
+nalgebra = "0.32.3"
diff --git a/src/crates/cli/src/cost/fit.rs b/src/crates/cli/src/cost/fit.rs
index e6b3e32..746c2a5 100644
--- a/src/crates/cli/src/cost/fit.rs
+++ b/src/crates/cli/src/cost/fit.rs
@@ -1,34 +1,77 @@
-//! Fitting a curve to benchmark results
-
-use std::time::Duration;
+//! Fitting a 3rd-order polynomial to benchmark results
+//! Based on code from al-jshen: https://github.com/al-jshen/compute/tree/master
 
 use candelabra_benchmarker::Observation;
-use friedrich::{gaussian_process::GaussianProcess, kernel::Kernel, prior::Prior};
-
-/// Fit a curve to the given set of observations.
-pub fn fit(results: &Vec<Observation>) -> impl Estimator {
-    let xs = results
-        .iter()
-        .map(|(n, _)| vec![*n as f64])
-        .collect::<Vec<_>>();
-
-    let ys = results
-        .iter()
-        .map(|(_, results)| results.avg.as_nanos() as f64)
-        .collect::<Vec<_>>();
-
-    // TODO: Should be able to incorporate the min/max into this
-    GaussianProcess::default(xs, ys)
-}
+use na::{Dyn, MatrixXx4, OVector};
+
+/// Estimates durations using a 3rd-order polynomial.
+pub struct Estimator([f64; 4]);
+
+/// Approximate cost of an action.
+/// This is an approximation for the number of nanoseconds it would take.
+pub type Cost = f64;
+
+impl Estimator {
+    /// Fit from the given set of observations, using the least squared method.
+    pub fn fit(results: &[Observation]) -> Self {
+        let (xs, ys) = Self::to_data(results);
 
-/// Can estimate a duration for a given `n`.
-pub trait Estimator {
-    /// Estimate the duration for `n`.
-    fn estimate(&self, n: usize) -> Duration;
+        let xv = vandermonde(&xs);
+        let xtx = xv.transpose() * xv.clone();
+        let xtxinv = xtx.try_inverse().unwrap();
+        let xty = xv.transpose() * ys;
+        let coeffs = xtxinv * xty;
+
+        Self(coeffs.into())
+    }
+
+    /// Calculate the residual sum of squares for the given data.
+    pub fn rss(&self, results: &Vec<Observation>) -> f64 {
+        // TODO: there's a more efficient way to do this / bulk esimations
+        let (xs, ys) = Self::to_data(results);
+
+        xs.iter()
+            .zip(ys.iter())
+            .map(|(x, y)| (y - self.estimatef(*x)).powi(2))
+            .sum()
+    }
+
+    /// Estimate the cost of a given operation at the given `n`.
+    pub fn estimate(&self, n: usize) -> Cost {
+        self.estimatef(n as f64)
+    }
+
+    /// Estimate the cost of a given operation at the given `n`.
+    pub fn estimatef(&self, n: f64) -> Cost {
+        let [a, b, c, d] = self.0;
+        a + b * n + c * n.powi(2) + d * n.powi(3)
+    }
+
+    /// Convert a list of observations to the format we use internally.
+    fn to_data(results: &[Observation]) -> (Vec<f64>, OVector<f64, Dyn>) {
+        let xs = results.iter().map(|(n, _)| *n as f64).collect::<Vec<_>>();
+        let ys = OVector::<f64, Dyn>::from_iterator(
+            results.len(),
+            results
+                .iter()
+                .map(|(_, results)| results.avg.as_nanos() as f64),
+        );
+
+        (xs, ys)
+    }
 }
 
-impl<K: Kernel, P: Prior> Estimator for GaussianProcess<K, P> {
-    fn estimate(&self, n: usize) -> Duration {
-        Duration::from_nanos(self.predict(&vec![n as f64]) as u64)
+/// Calculate a Vandermode matrix with 4 columns.
+/// https://en.wikipedia.org/wiki/Vandermonde_matrix
+fn vandermonde(xs: &[f64]) -> MatrixXx4<f64> {
+    let mut mat = MatrixXx4::repeat(xs.len(), 1.0);
+
+    for (row, x) in xs.iter().enumerate() {
+        // First column is all 1s so skip
+        for col in 1..=3 {
+            mat[(row, col)] = x.powi(col as i32);
+        }
     }
+
+    mat
 }