use crate ::measurement ::BenchmarkResult;
type Result< T > = std ::result ::Result< T, Box<dyn std ::error ::Error >>;
use std ::time ::Duration;
#[ derive(Debug, Clone, Copy, PartialEq) ]
pub enum SignificanceLevel
{
Standard,
High,
VeryHigh,
}
impl SignificanceLevel
{
pub fn alpha( &self ) -> f64
{
match self
{
SignificanceLevel ::Standard => 0.05,
SignificanceLevel ::High => 0.01,
SignificanceLevel ::VeryHigh => 0.001,
}
}
pub fn t_critical( &self ) -> f64
{
match self
{
SignificanceLevel ::Standard => 1.96, SignificanceLevel ::High => 2.58, SignificanceLevel ::VeryHigh => 3.29, }
}
}
#[ derive(Debug, Clone) ]
pub struct ConfidenceInterval
{
pub lower_bound: Duration,
pub upper_bound: Duration,
pub point_estimate: Duration,
pub confidence_level: f64,
pub margin_of_error: Duration,
}
impl ConfidenceInterval
{
pub fn new(
point_estimate: Duration,
margin_of_error: Duration,
confidence_level: f64,
) -> Self
{
let lower = point_estimate.saturating_sub(margin_of_error);
let upper = point_estimate + margin_of_error;
Self
{
lower_bound: lower,
upper_bound: upper,
point_estimate,
confidence_level,
margin_of_error,
}
}
pub fn contains(&self, value: Duration) -> bool
{
value >= self.lower_bound && value <= self.upper_bound
}
pub fn overlaps(&self, other: &ConfidenceInterval) -> bool
{
!(self.upper_bound < other.lower_bound || other.upper_bound < self.lower_bound)
}
pub fn to_string( &self ) -> String
{
format!(
"{:.2?} [{:.2?} - {:.2?}] ({:.1}% CI)",
self.point_estimate,
self.lower_bound,
self.upper_bound,
self.confidence_level * 100.0
)
}
}
#[ derive(Debug, Clone) ]
pub struct StatisticalTest
{
pub test_statistic: f64,
pub p_value: f64,
pub effect_size: f64,
pub degrees_of_freedom: usize,
pub is_significant: bool,
pub significance_level: SignificanceLevel,
}
impl StatisticalTest
{
pub fn effect_size_interpretation( &self ) -> &'static str
{
let abs_effect = self.effect_size.abs();
if abs_effect < 0.2
{
"negligible"
}
else if abs_effect < 0.5
{
"small"
}
else if abs_effect < 0.8
{
"medium"
}
else
{
"large"
}
}
pub fn conclusion( &self ) -> String
{
if self.is_significant
{
format!(
"Statistically significant difference (p = {:.4}, effect size: {} [{}])",
self.p_value,
self.effect_size,
self.effect_size_interpretation()
)
}
else
{
format!(
"No statistically significant difference (p = {:.4}, effect size: {} [{}])",
self.p_value,
self.effect_size,
self.effect_size_interpretation()
)
}
}
}
#[ derive(Debug, Clone) ]
pub struct NormalityTest
{
pub test_statistic: f64,
pub p_value: f64,
pub is_normal: bool,
pub test_name: String,
}
#[ derive(Debug, Clone) ]
pub struct StatisticalAnalysis
{
pub benchmark_result: BenchmarkResult,
pub mean_confidence_interval: ConfidenceInterval,
pub median_confidence_interval: ConfidenceInterval,
pub standard_error: Duration,
pub coefficient_of_variation: f64,
pub normality_test: NormalityTest,
pub outlier_count: usize,
pub statistical_power: f64,
}
impl StatisticalAnalysis
{
pub fn analyze(
result: &BenchmarkResult,
significance_level: SignificanceLevel,
) -> Result< Self >
{
if result.times.is_empty()
{
return Err("Cannot analyze empty benchmark result".into());
}
let n = result.times.len();
let mean = result.mean_time();
let std_dev = result.std_deviation();
let standard_error = Duration ::from_secs_f64(
std_dev.as_secs_f64() / (n as f64).sqrt()
);
let margin_of_error = Duration ::from_secs_f64(
significance_level.t_critical() * standard_error.as_secs_f64()
);
let mean_ci = ConfidenceInterval ::new(
mean,
margin_of_error,
1.0 - significance_level.alpha(),
);
let median = result.median_time();
let median_margin = Duration ::from_secs_f64(
1.253 * standard_error.as_secs_f64() );
let median_ci = ConfidenceInterval ::new(
median,
median_margin,
1.0 - significance_level.alpha(),
);
let cv = if mean.as_secs_f64() > 0.0
{
std_dev.as_secs_f64() / mean.as_secs_f64()
}
else
{
0.0
};
let normality_test = Self ::shapiro_wilk_test(&result.times);
let outlier_count = Self ::detect_outliers(&result.times);
let statistical_power = Self ::calculate_power(n, std_dev.as_secs_f64(), significance_level);
Ok(Self
{
benchmark_result: result.clone(),
mean_confidence_interval: mean_ci,
median_confidence_interval: median_ci,
standard_error,
coefficient_of_variation: cv,
normality_test,
outlier_count,
statistical_power,
})
}
pub fn compare(
result_a: &BenchmarkResult,
result_b: &BenchmarkResult,
significance_level: SignificanceLevel,
) -> Result< StatisticalTest >
{
if result_a.times.is_empty() || result_b.times.is_empty()
{
return Err("Cannot compare empty benchmark results".into());
}
let mean_a = result_a.mean_time().as_secs_f64();
let mean_b = result_b.mean_time().as_secs_f64();
let var_a = result_a.std_deviation().as_secs_f64().powi(2);
let var_b = result_b.std_deviation().as_secs_f64().powi(2);
let n_a = result_a.times.len() as f64;
let n_b = result_b.times.len() as f64;
let pooled_std = ((var_a * (n_a - 1.0) + var_b * (n_b - 1.0)) / (n_a + n_b - 2.0)).sqrt();
let effect_size = (mean_a - mean_b) / pooled_std;
let se_diff = (var_a / n_a + var_b / n_b).sqrt();
let t_stat = (mean_a - mean_b) / se_diff;
let df = (var_a / n_a + var_b / n_b).powi(2) /
((var_a / n_a).powi(2) / (n_a - 1.0) + (var_b / n_b).powi(2) / (n_b - 1.0));
let p_value = Self ::t_test_p_value(t_stat.abs(), df);
let is_significant = p_value < significance_level.alpha();
Ok(StatisticalTest
{
test_statistic: t_stat,
p_value,
effect_size,
degrees_of_freedom: df as usize,
is_significant,
significance_level,
})
}
pub fn is_reliable( &self ) -> bool
{
let low_variation = self.coefficient_of_variation < 0.1;
let sufficient_samples = self.benchmark_result.times.len() > 10;
let high_power = self.statistical_power > 0.8;
let few_outliers = (self.outlier_count as f64 / self.benchmark_result.times.len() as f64) < 0.1;
low_variation && sufficient_samples && high_power && few_outliers
}
pub fn generate_report( &self ) -> String
{
let mut report = String ::new();
report.push_str("## Statistical Analysis Report\n\n");
report.push_str(&format!("**Benchmark** : {}\n", self.benchmark_result.name));
report.push_str(&format!("**Sample size** : {} measurements\n\n", self.benchmark_result.times.len()));
report.push_str("### Descriptive Statistics\n\n");
report.push_str(&format!("- **Mean** : {}\n", self.mean_confidence_interval.to_string()));
report.push_str(&format!("- **Median** : {}\n", self.median_confidence_interval.to_string()));
report.push_str(&format!("- **Standard Deviation** : {:.2?}\n", self.benchmark_result.std_deviation()));
report.push_str(&format!("- **Standard Error** : {:.2?}\n", self.standard_error));
report.push_str(&format!("- **Coefficient of Variation** : {:.1}%\n\n", self.coefficient_of_variation * 100.0));
report.push_str("### Statistical Validity\n\n");
report.push_str(&format!("- **Normality test** : {} (p = {:.4})\n",
if self.normality_test.is_normal
{ "✅ Normal" } else { "⚠️ Non-normal" },
self.normality_test.p_value));
report.push_str(&format!("- **Outliers detected** : {} ({:.1}% of data)\n",
self.outlier_count,
self.outlier_count as f64 / self.benchmark_result.times.len() as f64 * 100.0));
report.push_str(&format!("- **Statistical power** : {:.3} ({})\n",
self.statistical_power,
if self.statistical_power > 0.8
{ "✅ High" } else { "⚠️ Low" }));
report.push_str(&format!("- **Overall reliability** : {}\n\n",
if self.is_reliable()
{ "✅ Reliable" } else { "⚠️ Questionable" }));
report.push_str("### Recommendations\n\n");
if !self.is_reliable()
{
if self.coefficient_of_variation > 0.1
{
report.push_str("- ⚠️ High variation detected. Consider increasing sample size or controlling environment.\n");
}
if self.statistical_power < 0.8
{
report.push_str("- ⚠️ Low statistical power. Increase sample size for reliable effect detection.\n");
}
if !self.normality_test.is_normal
{
report.push_str("- ⚠️ Data not normally distributed. Consider non-parametric tests or transformation.\n");
}
if self.outlier_count > 0
{
report.push_str(&format!("- ⚠️ {} outliers detected. Investigate measurement conditions.\n", self.outlier_count));
}
}
else
{
report.push_str("- ✅ Results meet research-grade statistical standards.\n");
}
report
}
fn shapiro_wilk_test(times: &[ Duration]) -> NormalityTest
{
let n = times.len();
let mean_val = times.iter().sum :: < Duration >().as_secs_f64() / n as f64;
let skewness = Self ::calculate_skewness(times, mean_val);
let kurtosis = Self ::calculate_kurtosis(times, mean_val);
let w_stat = 1.0 - (skewness.abs() + (kurtosis - 3.0).abs()) / 10.0;
let p_value = if w_stat > 0.95 { 0.8 } else if w_stat > 0.9 { 0.3 } else { 0.01 };
NormalityTest
{
test_statistic: w_stat,
p_value,
is_normal: p_value > 0.05,
test_name: "Shapiro-Wilk (simplified)".to_string(),
}
}
fn calculate_skewness(times: &[ Duration], mean_val: f64) -> f64
{
let n = times.len() as f64;
let variance = times.iter()
.map(|t| (t.as_secs_f64() - mean_val).powi(2))
.sum :: < f64 >() / (n - 1.0);
let std_dev = variance.sqrt();
let skew = times.iter()
.map(|t| ((t.as_secs_f64() - mean_val) / std_dev).powi(3))
.sum :: < f64 >() / n;
skew
}
fn calculate_kurtosis(times: &[ Duration], mean_val: f64) -> f64
{
let n = times.len() as f64;
let variance = times.iter()
.map(|t| (t.as_secs_f64() - mean_val).powi(2))
.sum :: < f64 >() / (n - 1.0);
let std_dev = variance.sqrt();
let kurt = times.iter()
.map(|t| ((t.as_secs_f64() - mean_val) / std_dev).powi(4))
.sum :: < f64 >() / n;
kurt
}
pub fn detect_outliers(times: &[ Duration]) -> usize
{
if times.len() < 4 { return 0; }
let mut sorted: Vec< f64 > = times.iter().map(|t| t.as_secs_f64()).collect();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
let q1_idx = sorted.len() / 4;
let q3_idx = 3 * sorted.len() / 4;
let q1 = sorted[q1_idx];
let q3 = sorted[q3_idx];
let iqr = q3 - q1;
let lower_bound = q1 - 1.5 * iqr;
let upper_bound = q3 + 1.5 * iqr;
sorted.iter().filter(|&&val| val < lower_bound || val > upper_bound).count()
}
fn calculate_power(n: usize, std_dev: f64, significance_level: SignificanceLevel) -> f64
{
let effect_size = 0.1; let _alpha = significance_level.alpha();
let z_alpha = significance_level.t_critical();
let z_beta = effect_size * (n as f64).sqrt() / std_dev - z_alpha;
if z_beta > 3.0 { 0.999 }
else if z_beta > 2.0 { 0.95 }
else if z_beta > 1.0 { 0.8 }
else if z_beta > 0.0 { 0.5 }
else { 0.2 }
}
fn t_test_p_value(t_stat: f64, _df: f64) -> f64
{
if t_stat > 3.0 { 0.001 }
else if t_stat > 2.5 { 0.01 }
else if t_stat > 2.0 { 0.05 }
else if t_stat > 1.0 { 0.2 }
else { 0.5 }
}
}