diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/crates/cli/Cargo.toml | 2 | ||||
-rw-r--r-- | src/crates/cli/src/cost/fit.rs | 97 |
2 files changed, 71 insertions, 28 deletions
diff --git a/src/crates/cli/Cargo.toml b/src/crates/cli/Cargo.toml index 6b3ae92..5db5481 100644 --- a/src/crates/cli/Cargo.toml +++ b/src/crates/cli/Cargo.toml @@ -19,4 +19,4 @@ cargo_metadata = "0.18.1" argh = "0.1.12" glob = "0.3.1" tempfile = "3" -friedrich = "0.5.0" +nalgebra = "0.32.3" diff --git a/src/crates/cli/src/cost/fit.rs b/src/crates/cli/src/cost/fit.rs index e6b3e32..746c2a5 100644 --- a/src/crates/cli/src/cost/fit.rs +++ b/src/crates/cli/src/cost/fit.rs @@ -1,34 +1,77 @@ -//! Fitting a curve to benchmark results - -use std::time::Duration; +//! Fitting a 3rd-order polynomial to benchmark results +//! Based on code from al-jshen: https://github.com/al-jshen/compute/tree/master use candelabra_benchmarker::Observation; -use friedrich::{gaussian_process::GaussianProcess, kernel::Kernel, prior::Prior}; - -/// Fit a curve to the given set of observations. -pub fn fit(results: &Vec<Observation>) -> impl Estimator { - let xs = results - .iter() - .map(|(n, _)| vec![*n as f64]) - .collect::<Vec<_>>(); - - let ys = results - .iter() - .map(|(_, results)| results.avg.as_nanos() as f64) - .collect::<Vec<_>>(); - - // TODO: Should be able to incorporate the min/max into this - GaussianProcess::default(xs, ys) -} +use na::{Dyn, MatrixXx4, OVector}; + +/// Estimates durations using a 3rd-order polynomial. +pub struct Estimator([f64; 4]); + +/// Approximate cost of an action. +/// This is an approximation for the number of nanoseconds it would take. +pub type Cost = f64; + +impl Estimator { + /// Fit from the given set of observations, using the least squared method. + pub fn fit(results: &[Observation]) -> Self { + let (xs, ys) = Self::to_data(results); -/// Can estimate a duration for a given `n`. -pub trait Estimator { - /// Estimate the duration for `n`. - fn estimate(&self, n: usize) -> Duration; + let xv = vandermonde(&xs); + let xtx = xv.transpose() * xv.clone(); + let xtxinv = xtx.try_inverse().unwrap(); + let xty = xv.transpose() * ys; + let coeffs = xtxinv * xty; + + Self(coeffs.into()) + } + + /// Calculate the residual sum of squares for the given data. + pub fn rss(&self, results: &Vec<Observation>) -> f64 { + // TODO: there's a more efficient way to do this / bulk esimations + let (xs, ys) = Self::to_data(results); + + xs.iter() + .zip(ys.iter()) + .map(|(x, y)| (y - self.estimatef(*x)).powi(2)) + .sum() + } + + /// Estimate the cost of a given operation at the given `n`. + pub fn estimate(&self, n: usize) -> Cost { + self.estimatef(n as f64) + } + + /// Estimate the cost of a given operation at the given `n`. + pub fn estimatef(&self, n: f64) -> Cost { + let [a, b, c, d] = self.0; + a + b * n + c * n.powi(2) + d * n.powi(3) + } + + /// Convert a list of observations to the format we use internally. + fn to_data(results: &[Observation]) -> (Vec<f64>, OVector<f64, Dyn>) { + let xs = results.iter().map(|(n, _)| *n as f64).collect::<Vec<_>>(); + let ys = OVector::<f64, Dyn>::from_iterator( + results.len(), + results + .iter() + .map(|(_, results)| results.avg.as_nanos() as f64), + ); + + (xs, ys) + } } -impl<K: Kernel, P: Prior> Estimator for GaussianProcess<K, P> { - fn estimate(&self, n: usize) -> Duration { - Duration::from_nanos(self.predict(&vec![n as f64]) as u64) +/// Calculate a Vandermode matrix with 4 columns. +/// https://en.wikipedia.org/wiki/Vandermonde_matrix +fn vandermonde(xs: &[f64]) -> MatrixXx4<f64> { + let mut mat = MatrixXx4::repeat(xs.len(), 1.0); + + for (row, x) in xs.iter().enumerate() { + // First column is all 1s so skip + for col in 1..=3 { + mat[(row, col)] = x.powi(col as i32); + } } + + mat } |