use std::f64;
use std::fmt::{self, Display, Formatter};
use std::time::*;
const BENCH_TIME_MAX_DESPERATION: Duration = Duration::from_secs(120);
const BENCH_TIME_MAX: Duration = Duration::from_secs(10);
const BENCH_TIME_MIN: Duration = Duration::from_millis(1);
#[derive(Debug, PartialEq, Clone)]
pub struct Stats {
pub ns_per_iter: f64,
pub goodness_of_fit: f64,
pub iterations: usize,
pub samples: usize,
}
impl Display for Stats {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
if self.ns_per_iter.is_nan() {
write!(
f,
"Only generated {} sample(s) - we can't fit a regression line to that! \
Try making your benchmark faster.",
self.samples
)
} else {
let per_iter = Duration::from_nanos(self.ns_per_iter as u64);
let per_iter = format!("{:?}", per_iter);
write!(
f,
"{:>11} (R²={:.3}, {} iterations in {} samples)",
per_iter, self.goodness_of_fit, self.iterations, self.samples
)
}
}
}
pub fn bench<F, O>(mut f: F) -> Stats
where
F: FnMut() -> O,
{
bench_env((), |_| f())
}
pub fn bench_env<F, I, O>(env: I, f: F) -> Stats
where
F: FnMut(&mut I) -> O,
I: Clone,
{
bench_gen_env(move || env.clone(), f)
}
pub fn bench_gen_env<G, F, I, O>(mut gen_env: G, mut f: F) -> Stats
where
G: FnMut() -> I,
F: FnMut(&mut I) -> O,
{
let mut data = Vec::new();
let bench_start = Instant::now();
for iters in slow_fib(BENCH_SCALE_TIME) {
let mut xs = std::iter::repeat_with(&mut gen_env)
.take(iters)
.collect::<Vec<I>>();
let iter_start = Instant::now();
for x in &mut xs {
pretend_to_use(f(x));
}
let time = iter_start.elapsed();
data.push((iters, time));
let elapsed = bench_start.elapsed();
if elapsed > BENCH_TIME_MIN && data.len() > 3 {
let (grad, r2) = regression(&data[1..]);
let stats = Stats {
ns_per_iter: grad,
goodness_of_fit: r2,
iterations: data[1..].iter().map(|&(x, _)| x).sum(),
samples: data[1..].len(),
};
if elapsed > BENCH_TIME_MAX || r2 > 0.99 {
return stats;
}
} else if elapsed > BENCH_TIME_MAX {
let total_time: f64 = data.iter().map(|(_, t)| t.as_nanos() as f64).sum();
let iterations = data.iter().map(|&(x, _)| x).sum();
return Stats {
ns_per_iter: total_time / iterations as f64,
iterations,
goodness_of_fit: 0.0,
samples: data.len(),
};
}
}
unreachable!()
}
#[derive(Debug, PartialEq, Clone)]
pub struct ScalingStats {
pub scaling: Scaling,
pub goodness_of_fit: f64,
pub iterations: usize,
pub samples: usize,
}
#[derive(Debug, PartialEq, Clone)]
pub struct Scaling {
pub power: usize,
pub exponential: usize,
pub ns_per_scale: f64,
}
impl Display for ScalingStats {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(
f,
"{} (R²={:.3}, {} iterations in {} samples)",
self.scaling, self.goodness_of_fit, self.iterations, self.samples
)
}
}
impl Display for Scaling {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let per_iter = Duration::from_nanos(self.ns_per_scale as u64);
let per_iter = if self.ns_per_scale < 1.0 {
format!("{:.2}ns", self.ns_per_scale)
} else if self.ns_per_scale < 10.0 {
format!("{:.1}ns", self.ns_per_scale)
} else {
format!("{:?}", per_iter)
};
if self.exponential == 1 {
match self.power {
0 => write!(f, "{:>8}/iter", per_iter),
1 => write!(f, "{:>8}/N ", per_iter),
2 => write!(f, "{:>8}/N² ", per_iter),
3 => write!(f, "{:>8}/N³ ", per_iter),
4 => write!(f, "{:>8}/N⁴ ", per_iter),
5 => write!(f, "{:>8}/N⁵ ", per_iter),
6 => write!(f, "{:>8}/N⁶ ", per_iter),
7 => write!(f, "{:>8}/N⁷ ", per_iter),
8 => write!(f, "{:>8}/N⁸ ", per_iter),
9 => write!(f, "{:>8}/N⁹ ", per_iter),
_ => write!(f, "{:>8}/N^{}", per_iter, self.power),
}
} else {
match self.power {
0 => write!(f, "{:>8}/{}ᴺ", per_iter, self.exponential),
1 => write!(f, "{:>8}/(N{}ᴺ) ", per_iter, self.exponential),
2 => write!(f, "{:>8}/(N²{}ᴺ) ", per_iter, self.exponential),
3 => write!(f, "{:>8}/(N³{}ᴺ) ", per_iter, self.exponential),
4 => write!(f, "{:>8}/(N⁴{}ᴺ) ", per_iter, self.exponential),
5 => write!(f, "{:>8}/(N⁵{}ᴺ) ", per_iter, self.exponential),
6 => write!(f, "{:>8}/(N⁶{}ᴺ) ", per_iter, self.exponential),
7 => write!(f, "{:>8}/(N⁷{}ᴺ) ", per_iter, self.exponential),
8 => write!(f, "{:>8}/(N⁸{}ᴺ) ", per_iter, self.exponential),
9 => write!(f, "{:>8}/(N⁹{}ᴺ) ", per_iter, self.exponential),
_ => write!(f, "{:>8}/(N^{}{}ᴺ)", per_iter, self.power, self.exponential),
}
}
}
}
pub fn bench_scaling<F, O>(f: F, nmin: usize) -> ScalingStats
where
F: Fn(usize) -> O,
{
let mut data = Vec::new();
let bench_start = Instant::now();
for iters in slow_fib(BENCH_SCALE_TIME) {
let n = if nmin > 0 { iters * nmin } else { iters };
let xs = vec![n; iters];
let iter_start = Instant::now();
for x in xs.into_iter() {
pretend_to_use(f(x));
}
let time = iter_start.elapsed();
data.push((n, iters, time));
let elapsed = bench_start.elapsed();
if elapsed > BENCH_TIME_MIN {
let stats = compute_scaling_gen(&data);
if elapsed > BENCH_TIME_MAX_DESPERATION
|| (elapsed > BENCH_TIME_MAX && stats.goodness_of_fit > 0.0)
|| stats.goodness_of_fit > 0.99
{
return stats;
}
}
}
unreachable!()
}
pub fn bench_scaling_gen<G, F, I, O>(mut gen_env: G, f: F, nmin: usize) -> ScalingStats
where
G: FnMut(usize) -> I,
F: Fn(&mut I) -> O,
{
let mut data = Vec::new();
let bench_start = Instant::now();
let mut am_slow = false;
for iters in slow_fib(BENCH_SCALE_TIME) {
let n = if nmin > 0 { iters * nmin } else { iters };
let iters = if am_slow { 1 + (iters & 1) } else { iters };
let mut xs = std::iter::repeat_with(|| gen_env(n))
.take(iters)
.collect::<Vec<I>>();
let iter_start = Instant::now();
for x in &mut xs {
pretend_to_use(f(x));
}
let time = iter_start.elapsed();
if !am_slow && iters == 1 && time > Duration::from_micros(1) {
am_slow = true;
}
data.push((n, iters, time));
let elapsed = bench_start.elapsed();
if elapsed > BENCH_TIME_MIN {
let stats = compute_scaling_gen(&data);
if elapsed > BENCH_TIME_MAX_DESPERATION
|| (elapsed > BENCH_TIME_MAX && stats.goodness_of_fit > 0.0)
|| stats.goodness_of_fit > 0.99
{
return stats;
}
}
}
println!("how did I get here?!");
unreachable!()
}
fn compute_scaling_gen(data: &[(usize, usize, Duration)]) -> ScalingStats {
let num_n = {
let mut ns = data.iter().map(|(n, _, _)| *n).collect::<Vec<_>>();
ns.dedup();
ns.len()
};
let mut stats = Vec::new();
let mut best = 0;
let mut second_best = 0;
for i in 1..num_n / 2 + 2 {
for power in 0..i {
let exponential = i - power;
let pdata: Vec<_> = data[1..]
.iter()
.map(|&(n, i, t)| {
(
(exponential as f64).powi(n as i32)
* (n as f64).powi(power as i32)
* (i as f64),
t,
)
})
.collect();
let (grad, r2) = fregression(&pdata);
stats.push(ScalingStats {
scaling: Scaling {
power,
exponential,
ns_per_scale: grad,
},
goodness_of_fit: r2,
iterations: data[1..].iter().map(|&(x, _, _)| x).sum(),
samples: data[1..].len(),
});
if r2 > stats[best].goodness_of_fit || stats[best].goodness_of_fit.is_nan() {
second_best = best;
best = stats.len() - 1;
}
}
}
if num_n < 10 || stats[second_best].goodness_of_fit == stats[best].goodness_of_fit {
stats[best].goodness_of_fit = 0.0;
} else {
}
stats[best].clone()
}
fn regression(data: &[(usize, Duration)]) -> (f64, f64) {
if data.len() < 2 {
return (f64::NAN, f64::NAN);
}
let data: Vec<_> = data
.iter()
.map(|&(x, y)| (x as f64, y.as_nanos() as f64))
.collect();
let n = data.len() as f64;
let nxbar = data.iter().map(|&(x, _)| x).sum::<f64>(); let nybar = data.iter().map(|&(_, y)| y).sum::<f64>(); let nxxbar = data.iter().map(|&(x, _)| x * x).sum::<f64>(); let nyybar = data.iter().map(|&(_, y)| y * y).sum::<f64>(); let nxybar = data.iter().map(|&(x, y)| x * y).sum::<f64>();
let ncovar = nxybar - ((nxbar * nybar) / n);
let nxvar = nxxbar - ((nxbar * nxbar) / n);
let nyvar = nyybar - ((nybar * nybar) / n);
let gradient = ncovar / nxvar;
let r2 = (ncovar * ncovar) / (nxvar * nyvar);
assert!(r2.is_nan() || r2 <= 1.0);
(gradient, r2)
}
fn fregression(data: &[(f64, Duration)]) -> (f64, f64) {
if data.len() < 2 {
return (f64::NAN, f64::NAN);
}
let data: Vec<_> = data
.iter()
.map(|&(x, y)| (x as f64, y.as_nanos() as f64))
.collect();
let n = data.len() as f64;
let xbar = data.iter().map(|&(x, _)| x).sum::<f64>() / n;
let xvar = data.iter().map(|&(x, _)| (x - xbar).powi(2)).sum::<f64>() / n;
let ybar = data.iter().map(|&(_, y)| y).sum::<f64>() / n;
let yvar = data.iter().map(|&(_, y)| (y - ybar).powi(2)).sum::<f64>() / n;
let covar = data
.iter()
.map(|&(x, y)| (x - xbar) * (y - ybar))
.sum::<f64>()
/ n;
let gradient = covar / xvar;
let r2 = covar.powi(2) / (xvar * yvar);
assert!(r2.is_nan() || r2 <= 1.0);
(gradient, r2)
}
fn pretend_to_use<T>(dummy: T) -> T {
unsafe {
let ret = ::std::ptr::read_volatile(&dummy);
::std::mem::forget(dummy);
ret
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread;
use std::time::Duration;
fn fib(n: usize) -> usize {
let mut i = 0;
let mut sum = 0;
let mut last = 0;
let mut curr = 1usize;
while i < n - 1 {
sum = curr.wrapping_add(last);
last = curr;
curr = sum;
i += 1;
}
sum
}
#[test]
#[ignore]
fn doctests_again() {
println!();
println!("fib 200: {}", bench(|| fib(200)));
println!("fib 500: {}", bench(|| fib(500)));
println!("fib scaling: {}", bench_scaling(|n| fib(n), 0));
println!("reverse: {}", bench_env(vec![0; 100], |xs| xs.reverse()));
println!("sort: {}", bench_env(vec![0; 100], |xs| xs.sort()));
println!("fib 1: {}", bench(|| fib(500)));
println!(
"fib 2: {}",
bench(|| {
fib(500);
})
);
println!(
"fib 3: {}",
bench_env(0, |x| {
*x = fib(500);
})
);
}
#[test]
fn scales_o_one() {
println!();
let stats = bench_scaling(|_| thread::sleep(Duration::from_millis(10)), 1);
println!("O(N): {}", stats);
assert_eq!(stats.scaling.power, 0);
println!(" error: {:e}", stats.scaling.ns_per_scale - 1e7);
assert!((stats.scaling.ns_per_scale - 1e7).abs() < 1e6);
assert!(format!("{}", stats).contains("samples"));
}
#[test]
fn scales_o_n() {
println!();
let stats = bench_scaling(|n| thread::sleep(Duration::from_millis(10 * n as u64)), 1);
println!("O(N): {}", stats);
assert_eq!(stats.scaling.power, 1);
println!(" error: {:e}", stats.scaling.ns_per_scale - 1e7);
assert!((stats.scaling.ns_per_scale - 1e7).abs() < 1e5);
println!("Summing integers");
let stats = bench_scaling_gen(
|n| (0..n as u64).collect::<Vec<_>>(),
|v| v.iter().cloned().sum::<u64>(),
1,
);
println!("O(N): {}", stats);
println!(" error: {:e}", stats.scaling.ns_per_scale - 1e7);
assert_eq!(stats.scaling.power, 1);
}
#[test]
fn scales_o_n_log_n_looks_like_n() {
println!("Sorting integers");
let stats = bench_scaling_gen(
|n| {
(0..n as u64)
.map(|i| (i * 13 + 5) % 137)
.collect::<Vec<_>>()
},
|v| v.sort(),
1,
);
println!("O(N log N): {}", stats);
println!(" error: {:e}", stats.scaling.ns_per_scale - 1e7);
assert_eq!(stats.scaling.power, 1);
}
#[test]
fn scales_o_2_to_the_n() {
println!();
let stats = bench_scaling(|n| thread::sleep(Duration::from_nanos((1 << n) as u64)), 1);
println!("O(2ᴺ): {}", stats);
assert_eq!(stats.scaling.power, 0);
assert_eq!(stats.scaling.exponential, 2);
println!(" error: {:e}", stats.scaling.ns_per_scale - 1.0);
assert!((stats.scaling.ns_per_scale - 1.0).abs() < 0.2);
}
#[test]
fn scales_o_n_square() {
println!();
let stats = bench_scaling(
|n| thread::sleep(Duration::from_millis(10 * (n * n) as u64)),
1,
);
println!("O(N): {}", stats);
assert_eq!(stats.scaling.power, 2);
println!(" error: {:e}", stats.scaling.ns_per_scale - 1e7);
assert!((stats.scaling.ns_per_scale - 1e7).abs() < 1e5);
}
#[test]
fn very_quick() {
println!();
println!("very quick: {}", bench(|| {}));
}
#[test]
fn very_slow() {
println!();
let stats = bench(|| thread::sleep(Duration::from_millis(400)));
println!("very slow: {}", stats);
assert!(stats.ns_per_iter > 399.0e6);
assert_eq!(3, stats.samples);
}
#[test]
fn painfully_slow() {
println!();
let stats = bench(|| thread::sleep(Duration::from_secs(11)));
println!("painfully slow: {}", stats);
println!("ns {}", stats.ns_per_iter);
assert!(stats.ns_per_iter > 11.0e9);
assert_eq!(1, stats.iterations);
}
#[test]
fn sadly_slow() {
println!();
let stats = bench(|| thread::sleep(Duration::from_secs(6)));
println!("sadly slow: {}", stats);
println!("ns {}", stats.ns_per_iter);
assert!(stats.ns_per_iter > 6.0e9);
assert_eq!(2, stats.iterations);
}
#[test]
fn test_sleep() {
println!();
println!(
"sleep 1 ms: {}",
bench(|| thread::sleep(Duration::from_millis(1)))
);
}
#[test]
fn noop() {
println!();
println!("noop base: {}", bench(|| {}));
println!("noop 0: {}", bench_env(vec![0u64; 0], |_| {}));
println!("noop 16: {}", bench_env(vec![0u64; 16], |_| {}));
println!("noop 64: {}", bench_env(vec![0u64; 64], |_| {}));
println!("noop 256: {}", bench_env(vec![0u64; 256], |_| {}));
println!("noop 512: {}", bench_env(vec![0u64; 512], |_| {}));
}
#[test]
fn ret_value() {
println!();
println!(
"no ret 32: {}",
bench_env(vec![0u64; 32], |x| { x.clone() })
);
println!("return 32: {}", bench_env(vec![0u64; 32], |x| x.clone()));
println!(
"no ret 256: {}",
bench_env(vec![0u64; 256], |x| { x.clone() })
);
println!(
"return 256: {}",
bench_env(vec![0u64; 256], |x| x.clone())
);
println!(
"no ret 1024: {}",
bench_env(vec![0u64; 1024], |x| { x.clone() })
);
println!(
"return 1024: {}",
bench_env(vec![0u64; 1024], |x| x.clone())
);
println!(
"no ret 4096: {}",
bench_env(vec![0u64; 4096], |x| { x.clone() })
);
println!(
"return 4096: {}",
bench_env(vec![0u64; 4096], |x| x.clone())
);
println!(
"no ret 50000: {}",
bench_env(vec![0u64; 50000], |x| { x.clone() })
);
println!(
"return 50000: {}",
bench_env(vec![0u64; 50000], |x| x.clone())
);
}
}
const BENCH_SCALE_TIME: usize = 25;
fn slow_fib(scale_time: usize) -> impl Iterator<Item = usize> {
#[derive(Debug)]
struct SlowFib {
which: usize,
buffer: Vec<usize>,
}
impl Iterator for SlowFib {
type Item = usize;
fn next(&mut self) -> Option<usize> {
let oldwhich = self.which;
self.which = (self.which + 1) % self.buffer.len();
self.buffer[self.which] = self.buffer[oldwhich] + self.buffer[self.which];
Some(self.buffer[self.which])
}
}
assert!(scale_time > 3);
let mut buffer = vec![1; scale_time];
buffer[1] = 0;
buffer[2] = 0;
SlowFib { which: 0, buffer }
}
#[test]
fn test_fib() {
let mut prev = 1;
for x in slow_fib(25).take(200) {
let rat = x as f64 / prev as f64;
println!("ratio: {}/{} = {}", prev, x, rat);
prev = x;
}
let five: Vec<_> = slow_fib(25).take(5).collect();
assert_eq!(&five, &[1, 1, 2, 3, 4]);
let more: Vec<_> = slow_fib(25).take(32).collect();
assert_eq!(
&more,
&[
1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 28, 31, 35, 40, 46,
]
);
let previous_sequence: Vec<_> = (0..32).map(|n| (1.1f64).powi(n).round() as usize).collect();
assert_eq!(
&previous_sequence,
&[
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 9, 10, 11, 12, 13,
14, 16, 17, 19,
]
);
let previous_sequence: Vec<_> = (20..40)
.map(|n| (1.1f64).powi(n).round() as usize)
.collect();
assert_eq!(
&previous_sequence,
&[7, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 21, 23, 26, 28, 31, 34, 37, 41,]
);
}