diff options
author | Aria Shrimpton <me@aria.rip> | 2024-01-20 17:09:06 +0000 |
---|---|---|
committer | Aria Shrimpton <me@aria.rip> | 2024-01-20 17:12:17 +0000 |
commit | ce4ae28bc7864c5c9c2d42f29ba025d28b42772d (patch) | |
tree | 5a88a95e0f16a89cd1f5c0e0acddb2f04403326c /src/crates | |
parent | ac6280b2a254272c1ecb5b508947bcdb84d31519 (diff) |
fix(benchmarker): more precise benchmarking without criterion
Diffstat (limited to 'src/crates')
-rw-r--r-- | src/crates/benchmarker/Cargo.toml | 13 | ||||
-rw-r--r-- | src/crates/benchmarker/benches/linked_list.rs | 9 | ||||
-rw-r--r-- | src/crates/benchmarker/benches/vec.rs | 21 | ||||
-rw-r--r-- | src/crates/benchmarker/src/container.rs | 170 | ||||
-rw-r--r-- | src/crates/benchmarker/src/indexable.rs | 121 | ||||
-rw-r--r-- | src/crates/benchmarker/src/lib.rs | 122 | ||||
-rw-r--r-- | src/crates/benchmarker/src/stack.rs | 76 | ||||
-rw-r--r-- | src/crates/candelabra/src/cost/benchmark.rs | 41 | ||||
-rw-r--r-- | src/crates/candelabra/src/cost/fit.rs | 6 | ||||
-rw-r--r-- | src/crates/cli/src/model.rs | 20 |
10 files changed, 414 insertions, 185 deletions
diff --git a/src/crates/benchmarker/Cargo.toml b/src/crates/benchmarker/Cargo.toml index a42127b..3fffe5e 100644 --- a/src/crates/benchmarker/Cargo.toml +++ b/src/crates/benchmarker/Cargo.toml @@ -4,14 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] -criterion = "0.3" - -log = { workspace = true } - -serde = { workspace = true, features = ["derive"] } -serde_json = { workspace = true } rand = { workspace = true } - primrose-library = { path = "../library" } [dev-dependencies] @@ -19,4 +12,8 @@ env_logger = { workspace = true } [[bench]] name = "vec" -harness = false
\ No newline at end of file +harness = false + +[[bench]] +name = "linked_list" +harness = false diff --git a/src/crates/benchmarker/benches/linked_list.rs b/src/crates/benchmarker/benches/linked_list.rs new file mode 100644 index 0000000..f2997e4 --- /dev/null +++ b/src/crates/benchmarker/benches/linked_list.rs @@ -0,0 +1,9 @@ +fn main() { + let ns = [ + 64, 128, 256, 512, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192, 16384, 24576, 32768, + 40960, 49152, 57344, 65536, + ]; + candelabra_benchmarker::benchmark_container::<std::collections::LinkedList<usize>, _>(&ns); + candelabra_benchmarker::benchmark_indexable::<std::collections::LinkedList<usize>, _>(&ns); + candelabra_benchmarker::benchmark_stack::<std::collections::LinkedList<usize>, _>(&ns); +} diff --git a/src/crates/benchmarker/benches/vec.rs b/src/crates/benchmarker/benches/vec.rs index ec572ca..0dd4637 100644 --- a/src/crates/benchmarker/benches/vec.rs +++ b/src/crates/benchmarker/benches/vec.rs @@ -1,14 +1,9 @@ -use criterion::{criterion_group, criterion_main, Criterion}; - -fn run_benches(c: &mut Criterion) { - candelabra_benchmarker::benchmark_container::<Vec<usize>, _>( - c, - &[ - 64, 128, 256, 512, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192, 16384, 24576, 32768, - 40960, 49152, 57344, 65536, - ], - ); +fn main() { + let ns = [ + 64, 128, 256, 512, 1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192, 16384, 24576, 32768, + 40960, 49152, 57344, 65536, + ]; + candelabra_benchmarker::benchmark_container::<Vec<usize>, _>(&ns); + candelabra_benchmarker::benchmark_indexable::<Vec<usize>, _>(&ns); + candelabra_benchmarker::benchmark_stack::<Vec<usize>, _>(&ns); } - -criterion_group!(benches, run_benches); -criterion_main!(benches); diff --git a/src/crates/benchmarker/src/container.rs b/src/crates/benchmarker/src/container.rs index 116620b..f1c9948 100644 --- a/src/crates/benchmarker/src/container.rs +++ b/src/crates/benchmarker/src/container.rs @@ -1,56 +1,132 @@ -use criterion::{BatchSize, Criterion}; use primrose_library::traits::{Container, Indexable}; use rand::{distributions::Standard, prelude::Distribution, random, thread_rng, Rng}; -use crate::bench_with_ns; +use crate::{benchmark_op, print_result}; -pub fn benchmark_container<T, E>(c: &mut Criterion, ns: &[usize]) +pub fn benchmark_container<T, E>(ns: &[usize]) where T: Container<E> + Indexable<E> + Default + Clone, - E: Clone, + E: Copy, Standard: Distribution<E>, { - bench_with_ns::<T, E>(c, ns, "contains", |b, container| { - b.iter_batched_ref( - || { - // TODO: maybe we should actually just test the worst case? (at the end) - // we also don't actually test misses yet. - let mut container = container.clone(); - let mut rng = thread_rng(); - let pivot = rng.gen_range(0..container.len()); - let chosen = container.nth(pivot).unwrap().clone(); - - (container, chosen) - }, - |(c, search)| c.contains(search), - BatchSize::LargeInput, - ); - }); - - bench_with_ns::<T, E>(c, ns, "insert", |b, container| { - b.iter_batched_ref( - || container.clone(), - |c| c.insert(random()), - BatchSize::LargeInput, - ); - }); - - bench_with_ns::<T, E>(c, ns, "remove", |b, container| { - b.iter_batched_ref( - || { - let mut container = container.clone(); - let mut rng = thread_rng(); - let pivot = rng.gen_range(0..container.len()); - let chosen = container.nth(pivot).unwrap().clone(); - - (container, chosen) - }, - |(c, chosen)| c.remove(chosen.clone()), - BatchSize::LargeInput, - ); - }); - - bench_with_ns::<T, E>(c, ns, "clear", |b, container| { - b.iter_batched_ref(|| container.clone(), |c| c.clear(), BatchSize::LargeInput); - }); + for n in ns { + scenario_populate::<T, E>(*n); + scenario_contains::<T, E>(*n); + scenario_remove::<T, E>(*n); + scenario_clear::<T, E>(*n); + } +} + +fn scenario_populate<T, E>(n: usize) +where + T: Container<E> + Indexable<E> + Default + Clone, + E: Copy, + Standard: Distribution<E>, +{ + let mut results = benchmark_op( + || (T::default(), (0..n).map(|_| random()).collect::<Vec<E>>()), + |(c, xs)| { + for i in 0..n { + c.insert(xs[i]); + } + }, + ); + + // Since we've repeated n times in each run + results.min_nanos /= n as f64; + results.avg_nanos /= n as f64; + results.max_nanos /= n as f64; + + print_result("insert", n, results); +} + +fn scenario_contains<T, E>(n: usize) +where + T: Container<E> + Indexable<E> + Default + Clone, + E: Copy, + Standard: Distribution<E>, +{ + let results = benchmark_op( + || { + let mut rng = thread_rng(); + let mut c = T::default(); + + // decide where the element that we will search for will be + let pivot = rng.gen_range(0..n); + + // insert the element at pivot, and keep track of what it is + for _ in 0..pivot { + c.insert(random()); + } + let chosen = rng.gen(); + c.insert(chosen.clone()); + for _ in pivot..n { + c.insert(random()); + } + + (c, chosen) + }, + |(c, search)| { + c.contains(search); + }, + ); + + print_result("contains", n, results); +} + +fn scenario_remove<T, E>(n: usize) +where + T: Container<E> + Indexable<E> + Default + Clone, + E: Copy, + Standard: Distribution<E>, +{ + let results = benchmark_op( + || { + let mut rng = thread_rng(); + let mut c = T::default(); + + // decide where the element that we will search for will be + let pivot = rng.gen_range(0..n); + + // insert the element at pivot, and keep track of what it is + for _ in 0..pivot { + c.insert(random()); + } + let chosen = rng.gen(); + c.insert(chosen.clone()); + for _ in pivot..n { + c.insert(random()); + } + + (c, chosen) + }, + |(c, remove)| { + c.remove(*remove); + }, + ); + + print_result("remove", n, results); +} + +fn scenario_clear<T, E>(n: usize) +where + T: Container<E> + Indexable<E> + Default + Clone, + E: Copy, + Standard: Distribution<E>, +{ + let results = benchmark_op( + || { + let mut c = T::default(); + for _ in 0..n { + c.insert(random()); + } + + c + }, + |c| { + c.clear(); + }, + ); + + print_result("clear", n, results); } diff --git a/src/crates/benchmarker/src/indexable.rs b/src/crates/benchmarker/src/indexable.rs index e993c93..196de6c 100644 --- a/src/crates/benchmarker/src/indexable.rs +++ b/src/crates/benchmarker/src/indexable.rs @@ -1,46 +1,91 @@ -use criterion::{black_box, BatchSize, Criterion}; +use std::hint::black_box; + use primrose_library::traits::{Container, Indexable}; -use rand::{distributions::Standard, prelude::Distribution, thread_rng, Rng}; +use rand::{distributions::Standard, prelude::Distribution, random, thread_rng, Rng}; -use crate::bench_with_ns; +use crate::{benchmark_op, print_result}; -pub fn benchmark_indexable<T, E>(c: &mut Criterion, ns: &[usize]) +pub fn benchmark_indexable<T, E>(ns: &[usize]) where T: Indexable<E> + Container<E> + Default + Clone, - E: Clone, + E: Copy, + Standard: Distribution<E>, +{ + for n in ns { + scenario_first::<T, E>(*n); + scenario_last::<T, E>(*n); + scenario_nth::<T, E>(*n); + } +} + +fn scenario_first<T, E>(n: usize) +where + T: Container<E> + Indexable<E> + Default + Clone, + E: Copy, + Standard: Distribution<E>, +{ + let results = benchmark_op( + || { + let mut c = T::default(); + for _ in 0..n { + c.insert(random()); + } + + c + }, + |c| { + let v = black_box(c.first()); + black_box(v); + }, + ); + + print_result("first", n, results); +} + +fn scenario_last<T, E>(n: usize) +where + T: Container<E> + Indexable<E> + Default + Clone, + E: Copy, Standard: Distribution<E>, { - bench_with_ns::<T, E>(c, ns, "first", |b, container| { - b.iter_batched_ref( - || container.clone(), - |c| { - black_box(c.first()); - }, - BatchSize::LargeInput, - ); - }); - - bench_with_ns::<T, E>(c, ns, "last", |b, container| { - b.iter_batched_ref( - || container.clone(), - |c| { - black_box(c.last()); - }, - BatchSize::LargeInput, - ); - }); - - bench_with_ns::<T, E>(c, ns, "nth", |b, container| { - b.iter_batched_ref( - || { - let mut container = container.clone(); - let i = thread_rng().gen_range(0..container.len()); - (container, i) - }, - |(c, i)| { - black_box(c.nth(*i)); - }, - BatchSize::LargeInput, - ); - }); + let results = benchmark_op( + || { + let mut c = T::default(); + for _ in 0..n { + c.insert(random()); + } + + c + }, + |c| { + let v = black_box(c.last()); + black_box(v); + }, + ); + + print_result("last", n, results); +} + +fn scenario_nth<T, E>(n: usize) +where + T: Container<E> + Indexable<E> + Default + Clone, + E: Copy, + Standard: Distribution<E>, +{ + let results = benchmark_op( + || { + let mut c = T::default(); + for _ in 0..n { + c.insert(random()); + } + + (c, thread_rng().gen_range(0..n)) + }, + |(c, idx)| { + let v = black_box(c.nth(*idx)); + black_box(v); + }, + ); + + print_result("nth", n, results); } diff --git a/src/crates/benchmarker/src/lib.rs b/src/crates/benchmarker/src/lib.rs index f3868f1..3018f56 100644 --- a/src/crates/benchmarker/src/lib.rs +++ b/src/crates/benchmarker/src/lib.rs @@ -2,37 +2,105 @@ mod container; mod indexable; mod stack; -use std::time::Duration; - -pub use criterion; +use std::{ + hint::black_box, + time::{Duration, Instant}, +}; pub use container::*; -use criterion::{Bencher, BenchmarkId, Criterion}; pub use indexable::*; -use primrose_library::traits::Container; -use rand::{distributions::Standard, prelude::Distribution, random}; pub use stack::*; -pub fn bench_with_ns<T, E>( - c: &mut Criterion, - ns: &[usize], - name: &str, - mut f: impl FnMut(&mut Bencher<'_>, &T), -) where - T: Container<E> + Default + Clone, - E: Clone, - Standard: Distribution<E>, -{ - let mut g = c.benchmark_group(name); - // HACK: speeding this up makes testing a lot easier. to be seen if this is still as reliable though - g.measurement_time(Duration::from_secs(1)); - g.warm_up_time(Duration::from_millis(500)); - for n in ns { - let mut container = T::default(); - for _ in 0..*n { - container.insert(random::<E>()); - } - - g.bench_with_input(BenchmarkId::from_parameter(n), &container, |b, n| f(b, n)); +const WARM_UP_TIME: Duration = Duration::from_millis(500); +const MEASUREMENT_TIME: Duration = Duration::from_secs(1); + +struct BenchmarkResult { + min_nanos: f64, + avg_nanos: f64, + max_nanos: f64, +} + +fn print_result(op: &str, n: usize, measurement: BenchmarkResult) { + println!( + "{}/{} time: [{:.3} ns {:.3} ns {:.3} ns]", + op, n, measurement.min_nanos, measurement.avg_nanos, measurement.max_nanos + ) +} + +/// Benchmark an operation for approx 5 seconds, returning the results. +/// +/// `setup` is used to create the thing `op` acts on, and `undo` is called between each run to undo `op`. +/// If `undo` is invalid, this will return garbage results. +/// +/// Warm-up for the setup is done beforehand. +fn benchmark_op<T, R>( + mut setup: impl FnMut() -> T, + mut op: impl FnMut(&mut T) -> R, +) -> BenchmarkResult { + let mut times = 0; + let mut min = f64::MAX; + let mut max = f64::MIN; + let mut sum = 0.0; + + let warmup_end = Instant::now() + WARM_UP_TIME; + + // Run warmup + while Instant::now() < warmup_end { + let mut target = black_box(setup()); + black_box(op(&mut target)); + } + + // Benchmarking loop + let loop_end = Instant::now() + MEASUREMENT_TIME; + while Instant::now() < loop_end { + let mut target = black_box(setup()); + + let start = Instant::now(); + black_box(op(&mut target)); + let end = Instant::now(); + drop(target); + + let duration = (end - start).as_nanos() as f64; + + min = min.min(duration); + max = max.max(duration); + sum += duration; + times += 1; + } + + BenchmarkResult { + min_nanos: min, + max_nanos: max, + avg_nanos: sum / times as f64, + } +} + +#[cfg(test)] +mod tests { + use super::benchmark_op; + use std::time::Duration; + + #[test] + fn benchmark_op_resets_properly() { + benchmark_op( + || false, + |b| { + assert!(!(*b)); + *b = true; + }, + ); + } + + #[test] + fn benchmark_op_times_properly() { + let results = benchmark_op(|| (), |_| std::thread::sleep(Duration::from_millis(5))); + + let avg_millis = results.avg_nanos / (10.0_f64).powi(6); + dbg!(avg_millis); + + assert!( + (avg_millis - 5.0).abs() < 0.1, + "sleeping for 5ms takes roughly 5ms" + ) } } diff --git a/src/crates/benchmarker/src/stack.rs b/src/crates/benchmarker/src/stack.rs index 499c997..2f82e0f 100644 --- a/src/crates/benchmarker/src/stack.rs +++ b/src/crates/benchmarker/src/stack.rs @@ -1,24 +1,70 @@ -use criterion::{BatchSize, Criterion}; +use std::hint::black_box; + use primrose_library::traits::{Container, Stack}; use rand::{distributions::Standard, prelude::Distribution, random}; -use crate::bench_with_ns; +use crate::{benchmark_op, print_result}; -pub fn benchmark_stack<T, E>(c: &mut Criterion, ns: &[usize]) +pub fn benchmark_stack<T, E>(ns: &[usize]) where T: Stack<E> + Container<E> + Default + Clone, - E: Clone, + E: Copy, Standard: Distribution<E>, { - bench_with_ns::<T, E>(c, ns, "push", |b, container| { - b.iter_batched_ref( - || (container.clone(), random::<E>()), - |(c, e)| c.push(e.clone()), - BatchSize::LargeInput, - ); - }); - - bench_with_ns::<T, E>(c, ns, "pop", |b, container| { - b.iter_batched_ref(|| container.clone(), |c| c.pop(), BatchSize::LargeInput); - }); + for n in ns { + scenario_populate::<T, E>(*n); + scenario_drain::<T, E>(*n); + } +} + +fn scenario_populate<T, E>(n: usize) +where + T: Stack<E> + Container<E> + Default + Clone, + E: Copy, + Standard: Distribution<E>, +{ + let mut results = benchmark_op( + || (T::default(), (0..n).map(|_| random()).collect::<Vec<E>>()), + |(c, xs)| { + for i in 0..n { + c.push(xs[i]); + } + }, + ); + + // Since we've repeated n times in each run + results.min_nanos /= n as f64; + results.avg_nanos /= n as f64; + results.max_nanos /= n as f64; + + print_result("push", n, results); +} + +fn scenario_drain<T, E>(n: usize) +where + T: Stack<E> + Container<E> + Default + Clone, + E: Copy, + Standard: Distribution<E>, +{ + let mut results = benchmark_op( + || { + let mut c = T::default(); + for _ in 0..n { + c.push(random()); + } + c + }, + |c| { + for _ in 0..n { + black_box(c.pop()); + } + }, + ); + + // Since we've repeated n times in each run + results.min_nanos /= n as f64; + results.avg_nanos /= n as f64; + results.max_nanos /= n as f64; + + print_result("pop", n, results); } diff --git a/src/crates/candelabra/src/cost/benchmark.rs b/src/crates/candelabra/src/cost/benchmark.rs index 2d3470a..bafe9bd 100644 --- a/src/crates/candelabra/src/cost/benchmark.rs +++ b/src/crates/candelabra/src/cost/benchmark.rs @@ -8,7 +8,6 @@ use std::{ fs::{copy, create_dir, File}, io::Write, process::Command, - time::Duration, }; use anyhow::{bail, Context, Result}; @@ -19,6 +18,8 @@ use tempfile::{tempdir, TempDir}; use crate::paths::Paths; +use super::Cost; + /// The name of the element type we use for benchmarking pub const ELEM_TYPE: &str = "usize"; @@ -44,14 +45,14 @@ pub type Observation = (usize, BenchmarkResult); /// Results for a single benchmark #[derive(Serialize, Deserialize, Debug, Clone)] pub struct BenchmarkResult { - /// The minimum time taken - pub min: Duration, + /// The minimum cost + pub min: Cost, - /// The maximum time taken - pub max: Duration, + /// The maximum cost + pub max: Cost, - /// The average (mean) time taken - pub avg: Duration, + /// The average (mean) cost + pub avg: Cost, } /// Run benchmarks for the given container type, returning the results. @@ -144,17 +145,17 @@ pub(crate) fn parse_criterion_output( }) } -fn parse_time_str(quantity: &str, suffix: &str) -> Option<Duration> { - Some(Duration::from_secs_f32( - f32::from_str(quantity).ok()? +fn parse_time_str(quantity: &str, suffix: &str) -> Option<Cost> { + Some( + f64::from_str(quantity).ok()? * match suffix { - "ms" => 1e-3, - "µs" => 1e-6, - "ns" => 1e-9, - "ps" => 1e-12, + "ms" => 1e6, + "µs" => 1e3, + "ns" => 1.0, + "ps" => 1e-3, _ => todo!(), }, - )) + ) } fn prepare_crate(name: &str, paths: &Paths, lib_spec: &LibSpec) -> Result<TempDir> { @@ -197,7 +198,7 @@ primrose-library = {{ path = \"{}\" }} let implemented_traits = lib_spec.interface_provide_map.keys(); for tr in implemented_traits { benchmark_statements += &format!( - "candelabra_benchmarker::benchmark_{}::<{}<{}>, _>(c, &NS);", + "candelabra_benchmarker::benchmark_{}::<{}<{}>, _>(&NS);", tr.to_lowercase(), name, ELEM_TYPE, @@ -213,16 +214,10 @@ primrose-library = {{ path = \"{}\" }} .write_all( format!( " -use candelabra_benchmarker::criterion::{{criterion_group, criterion_main, Criterion}}; - const NS: &[usize] = &{}; - -fn run_benches(c: &mut Criterion) {{ +fn main() {{ {} }} - -criterion_group!(benches, run_benches); -criterion_main!(benches); ", NS, benchmark_statements ) diff --git a/src/crates/candelabra/src/cost/fit.rs b/src/crates/candelabra/src/cost/fit.rs index b5f737e..dada377 100644 --- a/src/crates/candelabra/src/cost/fit.rs +++ b/src/crates/candelabra/src/cost/fit.rs @@ -89,7 +89,7 @@ impl Estimator { let [a, b, c, d] = self.coeffs; n = (n + self.transform_x.0) * self.transform_x.1; let raw = a + b * n + c * n.powi(2) + d * n.powi(3); - (raw / self.transform_y.1) - self.transform_y.0 + ((raw / self.transform_y.1) - self.transform_y.0).max(0.0) // can't be below 0 } /// Convert a list of observations to the format we use internally. @@ -97,9 +97,7 @@ impl Estimator { let xs = results.iter().map(|(n, _)| *n as f64).collect::<Vec<_>>(); let ys = OVector::<f64, Dyn>::from_iterator( results.len(), - results - .iter() - .map(|(_, results)| results.avg.as_nanos() as f64), + results.iter().map(|(_, results)| results.avg), ); (xs, ys) diff --git a/src/crates/cli/src/model.rs b/src/crates/cli/src/model.rs index 43c279f..b1dcb67 100644 --- a/src/crates/cli/src/model.rs +++ b/src/crates/cli/src/model.rs @@ -26,11 +26,11 @@ impl State { let obvs = results.by_op.get(k).unwrap(); builder.push_record(&[ k.to_string(), - format!("{0:.5}", v.coeffs[3]), - format!("{0:.5}", v.coeffs[2]), - format!("{0:.5}", v.coeffs[1]), - format!("{0:.5}", v.coeffs[0]), - format!("{0:.5}", v.nrmse(obvs)), + format!("{0}", v.coeffs[0]), + format!("{0}", v.coeffs[1]), + format!("{0}", v.coeffs[2]), + format!("{0}", v.coeffs[3]), + format!("{0}", v.nrmse(obvs)), ]); } @@ -38,14 +38,14 @@ impl State { // Table of example cost estimates let mut builder = Builder::default(); - builder.set_header(["op", "n = 1", "n = 100", "n = 1_000", "n = 10_000"]); + builder.set_header(["op", "n = 8", "n = 64", "n = 512", "n = 4096"]); for (k, v) in model.by_op.iter() { builder.push_record(&[ k.to_string(), - format!("{0:.3}", v.estimate(1)), - format!("{0:.3}", v.estimate(100)), - format!("{0:.3}", v.estimate(1_000)), - format!("{0:.3}", v.estimate(10_000)), + format!("{0:.3}", v.estimate(8)), + format!("{0:.3}", v.estimate(64)), + format!("{0:.3}", v.estimate(512)), + format!("{0:.3}", v.estimate(4096)), ]); } |