1use crate ::measurement ::BenchmarkResult;
8type Result< T > = std ::result ::Result< T, Box<dyn std ::error ::Error >>;
9use std ::time ::Duration;
10
11#[ derive(Debug, Clone, Copy, PartialEq) ]
13pub enum SignificanceLevel
14{
15 Standard,
17 High,
19 VeryHigh,
21}
22
23impl SignificanceLevel
24{
25 pub fn alpha( &self ) -> f64
27 {
28 match self
29 {
30 SignificanceLevel ::Standard => 0.05,
31 SignificanceLevel ::High => 0.01,
32 SignificanceLevel ::VeryHigh => 0.001,
33 }
34 }
35
36 pub fn t_critical( &self ) -> f64
38 {
39 match self
40 {
41 SignificanceLevel ::Standard => 1.96, SignificanceLevel ::High => 2.58, SignificanceLevel ::VeryHigh => 3.29, }
45 }
46}
47
48#[ derive(Debug, Clone) ]
50pub struct ConfidenceInterval
51{
52 pub lower_bound: Duration,
54 pub upper_bound: Duration,
56 pub point_estimate: Duration,
58 pub confidence_level: f64,
60 pub margin_of_error: Duration,
62}
63
64impl ConfidenceInterval
65{
66 pub fn new(
68 point_estimate: Duration,
69 margin_of_error: Duration,
70 confidence_level: f64,
71 ) -> Self
72 {
73 let lower = point_estimate.saturating_sub(margin_of_error);
74 let upper = point_estimate + margin_of_error;
75
76 Self
77 {
78 lower_bound: lower,
79 upper_bound: upper,
80 point_estimate,
81 confidence_level,
82 margin_of_error,
83 }
84 }
85
86 pub fn contains(&self, value: Duration) -> bool
88 {
89 value >= self.lower_bound && value <= self.upper_bound
90 }
91
92 pub fn overlaps(&self, other: &ConfidenceInterval) -> bool
94 {
95 !(self.upper_bound < other.lower_bound || other.upper_bound < self.lower_bound)
96 }
97
98 pub fn to_string( &self ) -> String
100 {
101 format!(
102 "{:.2?} [{:.2?} - {:.2?}] ({:.1}% CI)",
103 self.point_estimate,
104 self.lower_bound,
105 self.upper_bound,
106 self.confidence_level * 100.0
107 )
108 }
109}
110
111#[ derive(Debug, Clone) ]
113pub struct StatisticalTest
114{
115 pub test_statistic: f64,
117 pub p_value: f64,
119 pub effect_size: f64,
121 pub degrees_of_freedom: usize,
123 pub is_significant: bool,
125 pub significance_level: SignificanceLevel,
127}
128
129impl StatisticalTest
130{
131 pub fn effect_size_interpretation( &self ) -> &'static str
133 {
134 let abs_effect = self.effect_size.abs();
135 if abs_effect < 0.2
136 {
137 "negligible"
138 }
139 else if abs_effect < 0.5
140 {
141 "small"
142 }
143 else if abs_effect < 0.8
144 {
145 "medium"
146 }
147 else
148 {
149 "large"
150 }
151 }
152
153 pub fn conclusion( &self ) -> String
155 {
156 if self.is_significant
157 {
158 format!(
159 "Statistically significant difference (p = {:.4}, effect size: {} [{}])",
160 self.p_value,
161 self.effect_size,
162 self.effect_size_interpretation()
163 )
164 }
165 else
166 {
167 format!(
168 "No statistically significant difference (p = {:.4}, effect size: {} [{}])",
169 self.p_value,
170 self.effect_size,
171 self.effect_size_interpretation()
172 )
173 }
174 }
175}
176
177#[ derive(Debug, Clone) ]
179pub struct NormalityTest
180{
181 pub test_statistic: f64,
183 pub p_value: f64,
185 pub is_normal: bool,
187 pub test_name: String,
189}
190
191#[ derive(Debug, Clone) ]
193pub struct StatisticalAnalysis
194{
195 pub benchmark_result: BenchmarkResult,
197 pub mean_confidence_interval: ConfidenceInterval,
199 pub median_confidence_interval: ConfidenceInterval,
201 pub standard_error: Duration,
203 pub coefficient_of_variation: f64,
205 pub normality_test: NormalityTest,
207 pub outlier_count: usize,
209 pub statistical_power: f64,
211}
212
213impl StatisticalAnalysis
214{
215 pub fn analyze(
217 result: &BenchmarkResult,
218 significance_level: SignificanceLevel,
219 ) -> Result< Self >
220 {
221 if result.times.is_empty()
222 {
223 return Err("Cannot analyze empty benchmark result".into());
224 }
225
226 let n = result.times.len();
227 let mean = result.mean_time();
228 let std_dev = result.std_deviation();
229 let standard_error = Duration ::from_secs_f64(
230 std_dev.as_secs_f64() / (n as f64).sqrt()
231 );
232
233 let margin_of_error = Duration ::from_secs_f64(
235 significance_level.t_critical() * standard_error.as_secs_f64()
236 );
237 let mean_ci = ConfidenceInterval ::new(
238 mean,
239 margin_of_error,
240 1.0 - significance_level.alpha(),
241 );
242
243 let median = result.median_time();
245 let median_margin = Duration ::from_secs_f64(
246 1.253 * standard_error.as_secs_f64() );
248 let median_ci = ConfidenceInterval ::new(
249 median,
250 median_margin,
251 1.0 - significance_level.alpha(),
252 );
253
254 let cv = if mean.as_secs_f64() > 0.0
256 {
257 std_dev.as_secs_f64() / mean.as_secs_f64()
258 }
259 else
260 {
261 0.0
262 };
263
264 let normality_test = Self ::shapiro_wilk_test(&result.times);
266
267 let outlier_count = Self ::detect_outliers(&result.times);
269
270 let statistical_power = Self ::calculate_power(n, std_dev.as_secs_f64(), significance_level);
272
273 Ok(Self
274 {
275 benchmark_result: result.clone(),
276 mean_confidence_interval: mean_ci,
277 median_confidence_interval: median_ci,
278 standard_error,
279 coefficient_of_variation: cv,
280 normality_test,
281 outlier_count,
282 statistical_power,
283 })
284 }
285
286 pub fn compare(
288 result_a: &BenchmarkResult,
289 result_b: &BenchmarkResult,
290 significance_level: SignificanceLevel,
291 ) -> Result< StatisticalTest >
292 {
293 if result_a.times.is_empty() || result_b.times.is_empty()
294 {
295 return Err("Cannot compare empty benchmark results".into());
296 }
297
298 let mean_a = result_a.mean_time().as_secs_f64();
300 let mean_b = result_b.mean_time().as_secs_f64();
301 let var_a = result_a.std_deviation().as_secs_f64().powi(2);
302 let var_b = result_b.std_deviation().as_secs_f64().powi(2);
303 let n_a = result_a.times.len() as f64;
304 let n_b = result_b.times.len() as f64;
305
306 let pooled_std = ((var_a * (n_a - 1.0) + var_b * (n_b - 1.0)) / (n_a + n_b - 2.0)).sqrt();
308 let effect_size = (mean_a - mean_b) / pooled_std;
309
310 let se_diff = (var_a / n_a + var_b / n_b).sqrt();
312 let t_stat = (mean_a - mean_b) / se_diff;
313
314 let df = (var_a / n_a + var_b / n_b).powi(2) /
316 ((var_a / n_a).powi(2) / (n_a - 1.0) + (var_b / n_b).powi(2) / (n_b - 1.0));
317
318 let p_value = Self ::t_test_p_value(t_stat.abs(), df);
320 let is_significant = p_value < significance_level.alpha();
321
322 Ok(StatisticalTest
323 {
324 test_statistic: t_stat,
325 p_value,
326 effect_size,
327 degrees_of_freedom: df as usize,
328 is_significant,
329 significance_level,
330 })
331 }
332
333 pub fn is_reliable( &self ) -> bool
335 {
336 let low_variation = self.coefficient_of_variation < 0.1;
343 let sufficient_samples = self.benchmark_result.times.len() > 10;
344 let high_power = self.statistical_power > 0.8;
345 let few_outliers = (self.outlier_count as f64 / self.benchmark_result.times.len() as f64) < 0.1;
346
347 low_variation && sufficient_samples && high_power && few_outliers
348 }
349
350 pub fn generate_report( &self ) -> String
352 {
353 let mut report = String ::new();
354
355 report.push_str("## Statistical Analysis Report\n\n");
356 report.push_str(&format!("**Benchmark** : {}\n", self.benchmark_result.name));
357 report.push_str(&format!("**Sample size** : {} measurements\n\n", self.benchmark_result.times.len()));
358
359 report.push_str("### Descriptive Statistics\n\n");
361 report.push_str(&format!("- **Mean** : {}\n", self.mean_confidence_interval.to_string()));
362 report.push_str(&format!("- **Median** : {}\n", self.median_confidence_interval.to_string()));
363 report.push_str(&format!("- **Standard Deviation** : {:.2?}\n", self.benchmark_result.std_deviation()));
364 report.push_str(&format!("- **Standard Error** : {:.2?}\n", self.standard_error));
365 report.push_str(&format!("- **Coefficient of Variation** : {:.1}%\n\n", self.coefficient_of_variation * 100.0));
366
367 report.push_str("### Statistical Validity\n\n");
369 report.push_str(&format!("- **Normality test** : {} (p = {:.4})\n",
370 if self.normality_test.is_normal
371 { "✅ Normal" } else { "⚠️ Non-normal" },
372 self.normality_test.p_value));
373 report.push_str(&format!("- **Outliers detected** : {} ({:.1}% of data)\n",
374 self.outlier_count,
375 self.outlier_count as f64 / self.benchmark_result.times.len() as f64 * 100.0));
376 report.push_str(&format!("- **Statistical power** : {:.3} ({})\n",
377 self.statistical_power,
378 if self.statistical_power > 0.8
379 { "✅ High" } else { "⚠️ Low" }));
380 report.push_str(&format!("- **Overall reliability** : {}\n\n",
381 if self.is_reliable()
382 { "✅ Reliable" } else { "⚠️ Questionable" }));
383
384 report.push_str("### Recommendations\n\n");
386 if !self.is_reliable()
387 {
388 if self.coefficient_of_variation > 0.1
389 {
390 report.push_str("- ⚠️ High variation detected. Consider increasing sample size or controlling environment.\n");
391 }
392 if self.statistical_power < 0.8
393 {
394 report.push_str("- ⚠️ Low statistical power. Increase sample size for reliable effect detection.\n");
395 }
396 if !self.normality_test.is_normal
397 {
398 report.push_str("- ⚠️ Data not normally distributed. Consider non-parametric tests or transformation.\n");
399 }
400 if self.outlier_count > 0
401 {
402 report.push_str(&format!("- ⚠️ {} outliers detected. Investigate measurement conditions.\n", self.outlier_count));
403 }
404 }
405 else
406 {
407 report.push_str("- ✅ Results meet research-grade statistical standards.\n");
408 }
409
410 report
411 }
412
413 fn shapiro_wilk_test(times: &[ Duration]) -> NormalityTest
416 {
417 let n = times.len();
419 let mean_val = times.iter().sum :: < Duration >().as_secs_f64() / n as f64;
420
421 let skewness = Self ::calculate_skewness(times, mean_val);
422 let kurtosis = Self ::calculate_kurtosis(times, mean_val);
423
424 let w_stat = 1.0 - (skewness.abs() + (kurtosis - 3.0).abs()) / 10.0;
426 let p_value = if w_stat > 0.95 { 0.8 } else if w_stat > 0.9 { 0.3 } else { 0.01 };
427
428 NormalityTest
429 {
430 test_statistic: w_stat,
431 p_value,
432 is_normal: p_value > 0.05,
433 test_name: "Shapiro-Wilk (simplified)".to_string(),
434 }
435 }
436
437 fn calculate_skewness(times: &[ Duration], mean_val: f64) -> f64
438 {
439 let n = times.len() as f64;
440 let variance = times.iter()
441 .map(|t| (t.as_secs_f64() - mean_val).powi(2))
442 .sum :: < f64 >() / (n - 1.0);
443 let std_dev = variance.sqrt();
444
445 let skew = times.iter()
446 .map(|t| ((t.as_secs_f64() - mean_val) / std_dev).powi(3))
447 .sum :: < f64 >() / n;
448
449 skew
450 }
451
452 fn calculate_kurtosis(times: &[ Duration], mean_val: f64) -> f64
453 {
454 let n = times.len() as f64;
455 let variance = times.iter()
456 .map(|t| (t.as_secs_f64() - mean_val).powi(2))
457 .sum :: < f64 >() / (n - 1.0);
458 let std_dev = variance.sqrt();
459
460 let kurt = times.iter()
461 .map(|t| ((t.as_secs_f64() - mean_val) / std_dev).powi(4))
462 .sum :: < f64 >() / n;
463
464 kurt
465 }
466
467 pub fn detect_outliers(times: &[ Duration]) -> usize
469 {
470 if times.len() < 4 { return 0; }
471
472 let mut sorted: Vec< f64 > = times.iter().map(|t| t.as_secs_f64()).collect();
473 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
474
475 let q1_idx = sorted.len() / 4;
476 let q3_idx = 3 * sorted.len() / 4;
477 let q1 = sorted[q1_idx];
478 let q3 = sorted[q3_idx];
479 let iqr = q3 - q1;
480
481 let lower_bound = q1 - 1.5 * iqr;
482 let upper_bound = q3 + 1.5 * iqr;
483
484 sorted.iter().filter(|&&val| val < lower_bound || val > upper_bound).count()
485 }
486
487 fn calculate_power(n: usize, std_dev: f64, significance_level: SignificanceLevel) -> f64
488 {
489 let effect_size = 0.1; let _alpha = significance_level.alpha();
492 let z_alpha = significance_level.t_critical();
493 let z_beta = effect_size * (n as f64).sqrt() / std_dev - z_alpha;
494
495 if z_beta > 3.0 { 0.999 }
497 else if z_beta > 2.0 { 0.95 }
498 else if z_beta > 1.0 { 0.8 }
499 else if z_beta > 0.0 { 0.5 }
500 else { 0.2 }
501 }
502
503 fn t_test_p_value(t_stat: f64, _df: f64) -> f64
504 {
505 if t_stat > 3.0 { 0.001 }
508 else if t_stat > 2.5 { 0.01 }
509 else if t_stat > 2.0 { 0.05 }
510 else if t_stat > 1.0 { 0.2 }
511 else { 0.5 }
512 }
513}
514