use crate::connection::{SamplingMethod, Throughput};
use crate::estimate::{build_change_estimates, build_estimates, ConfidenceInterval, Estimate};
use crate::estimate::{
ChangeDistributions, ChangeEstimates, ChangePointEstimates, Distributions, Estimates,
PointEstimates,
};
use crate::report::MeasurementData;
use crate::stats::bivariate::regression::Slope;
use crate::stats::bivariate::Data;
use crate::stats::univariate::outliers::tukey;
use crate::stats::univariate::Sample;
use crate::stats::{Distribution, Tails};
use std::time::Duration;
#[derive(Debug, Clone)]
pub struct BenchmarkConfig {
pub confidence_level: f64,
pub measurement_time: Duration,
pub noise_threshold: f64,
pub nresamples: usize,
pub sample_size: usize,
pub significance_level: f64,
pub warm_up_time: Duration,
}
pub struct MeasuredValues<'a> {
pub iteration_count: &'a [f64],
pub sample_values: &'a [f64],
pub avg_values: &'a [f64],
}
pub(crate) fn analysis<'a>(
config: &BenchmarkConfig,
throughput: Option<Throughput>,
new_sample: MeasuredValues<'a>,
old_sample: Option<(MeasuredValues<'a>, &'a Estimates)>,
sampling_method: SamplingMethod,
) -> MeasurementData<'a> {
let iters = new_sample.iteration_count;
let values = new_sample.sample_values;
let avg_values = Sample::new(new_sample.avg_values);
let data = Data::new(iters, values);
let labeled_sample = tukey::classify(avg_values);
let (mut distributions, mut estimates) = estimates(avg_values, config);
if sampling_method.is_linear() {
let (distribution, slope) = regression(&data, config);
estimates.slope = Some(slope);
distributions.slope = Some(distribution);
}
let compare_data = if let Some((old_sample, old_estimates)) = old_sample {
let (t_value, t_distribution, relative_estimates, relative_distributions, base_avg_times) =
compare(avg_values, &old_sample, config);
let p_value = t_distribution.p_value(t_value, &Tails::Two);
Some(crate::report::ComparisonData {
p_value,
t_distribution,
t_value,
relative_estimates,
relative_distributions,
significance_threshold: config.significance_level,
noise_threshold: config.noise_threshold,
base_iter_counts: old_sample.iteration_count.iter().copied().collect(),
base_sample_times: old_sample.sample_values.iter().copied().collect(),
base_avg_times,
base_estimates: old_estimates.clone(),
})
} else {
None
};
MeasurementData {
data: Data::new(&*iters, &*values),
avg_times: labeled_sample,
absolute_estimates: estimates,
distributions,
comparison: compare_data,
throughput,
}
}
fn regression(
data: &Data<'_, f64, f64>,
config: &BenchmarkConfig,
) -> (Distribution<f64>, Estimate) {
let cl = config.confidence_level;
let distribution = elapsed!(
"Bootstrapped linear regression",
data.bootstrap(config.nresamples, |d| (Slope::fit(&d).0,))
)
.0;
let point = Slope::fit(data);
let (lb, ub) = distribution.confidence_interval(config.confidence_level);
let se = distribution.std_dev(None);
(
distribution,
Estimate {
confidence_interval: ConfidenceInterval {
confidence_level: cl,
lower_bound: lb,
upper_bound: ub,
},
point_estimate: point.0,
standard_error: se,
},
)
}
fn estimates(avg_times: &Sample<f64>, config: &BenchmarkConfig) -> (Distributions, Estimates) {
fn stats(sample: &Sample<f64>) -> (f64, f64, f64, f64) {
let mean = sample.mean();
let std_dev = sample.std_dev(Some(mean));
let median = sample.percentiles().median();
let mad = sample.median_abs_dev(Some(median));
(mean, std_dev, median, mad)
}
let cl = config.confidence_level;
let nresamples = config.nresamples;
let (mean, std_dev, median, mad) = stats(avg_times);
let points = PointEstimates {
mean,
median,
std_dev,
median_abs_dev: mad,
};
let (dist_mean, dist_stddev, dist_median, dist_mad) = elapsed!(
"Bootstrapping the absolute statistics.",
avg_times.bootstrap(nresamples, stats)
);
let distributions = Distributions {
mean: dist_mean,
slope: None,
median: dist_median,
median_abs_dev: dist_mad,
std_dev: dist_stddev,
};
let estimates = build_estimates(&distributions, &points, cl);
(distributions, estimates)
}
#[cfg_attr(feature = "cargo-clippy", allow(clippy::type_complexity))]
pub(crate) fn compare(
new_avg_times: &Sample<f64>,
old_values: &MeasuredValues,
config: &BenchmarkConfig,
) -> (
f64,
Distribution<f64>,
ChangeEstimates,
ChangeDistributions,
Vec<f64>,
) {
let iters = old_values.iteration_count;
let values = old_values.sample_values;
let base_avg_values: Vec<f64> = iters
.iter()
.zip(values.iter())
.map(|(iters, elapsed)| elapsed / iters)
.collect();
let base_avg_value_sample = Sample::new(&base_avg_values);
let (t_statistic, t_distribution) = t_test(new_avg_times, base_avg_value_sample, config);
let (estimates, relative_distributions) =
difference_estimates(new_avg_times, base_avg_value_sample, config);
(
t_statistic,
t_distribution,
estimates,
relative_distributions,
base_avg_values,
)
}
fn t_test(
avg_times: &Sample<f64>,
base_avg_times: &Sample<f64>,
config: &BenchmarkConfig,
) -> (f64, Distribution<f64>) {
let nresamples = config.nresamples;
let t_statistic = avg_times.t(base_avg_times);
let t_distribution = elapsed!(
"Bootstrapping the T distribution",
crate::stats::univariate::mixed::bootstrap(
avg_times,
base_avg_times,
nresamples,
|a, b| (a.t(b),)
)
)
.0;
let t_distribution = Distribution::from(
t_distribution
.iter()
.filter(|a| a.is_finite())
.cloned()
.collect::<Vec<_>>()
.into_boxed_slice(),
);
(t_statistic, t_distribution)
}
fn difference_estimates(
avg_times: &Sample<f64>,
base_avg_times: &Sample<f64>,
config: &BenchmarkConfig,
) -> (ChangeEstimates, ChangeDistributions) {
fn stats(a: &Sample<f64>, b: &Sample<f64>) -> (f64, f64) {
(
a.mean() / b.mean() - 1.,
a.percentiles().median() / b.percentiles().median() - 1.,
)
}
let cl = config.confidence_level;
let nresamples = config.nresamples;
let (dist_mean, dist_median) = elapsed!(
"Bootstrapping the relative statistics",
crate::stats::univariate::bootstrap(avg_times, base_avg_times, nresamples, stats)
);
let distributions = ChangeDistributions {
mean: dist_mean,
median: dist_median,
};
let (mean, median) = stats(avg_times, base_avg_times);
let points = ChangePointEstimates { mean, median };
let estimates = build_change_estimates(&distributions, &points, cl);
(estimates, distributions)
}