#![allow(clippy::cast_precision_loss)]
#[derive(Debug, Clone)]
pub struct AnalysisConfig {
pub alpha: f64,
pub auto_detect_skew: bool,
}
impl Default for AnalysisConfig {
fn default() -> Self {
Self {
alpha: 0.05,
auto_detect_skew: true,
}
}
}
#[derive(Debug, Clone)]
pub struct AnalysisResult {
pub control_mean: f64,
pub treatment_mean: f64,
pub effect_size: f64,
pub p_value: f64,
pub significant: bool,
pub method: TestMethod,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TestMethod {
TTest,
LogTransformTTest,
MannWhitneyU,
}
#[must_use]
pub fn analyze(control: &[f64], treatment: &[f64], config: &AnalysisConfig) -> AnalysisResult {
let skewness = compute_skewness(control);
let use_log = config.auto_detect_skew && skewness.abs() > 1.0;
if use_log {
analyze_log_transform(control, treatment, config.alpha)
} else {
analyze_t_test(control, treatment, config.alpha)
}
}
#[must_use]
pub fn analyze_log_transform(control: &[f64], treatment: &[f64], alpha: f64) -> AnalysisResult {
let log_control: Vec<f64> = control.iter().map(|x| x.ln()).collect();
let log_treatment: Vec<f64> = treatment.iter().map(|x| x.ln()).collect();
let result = analyze_t_test(&log_control, &log_treatment, alpha);
AnalysisResult {
control_mean: result.control_mean.exp(),
treatment_mean: result.treatment_mean.exp(),
effect_size: result.treatment_mean.exp() / result.control_mean.exp() - 1.0,
p_value: result.p_value,
significant: result.significant,
method: TestMethod::LogTransformTTest,
}
}
#[must_use]
pub fn analyze_t_test(control: &[f64], treatment: &[f64], alpha: f64) -> AnalysisResult {
let n1 = control.len() as f64;
let n2 = treatment.len() as f64;
let mean1 = control.iter().sum::<f64>() / n1;
let mean2 = treatment.iter().sum::<f64>() / n2;
let var1 = control.iter().map(|x| (x - mean1).powi(2)).sum::<f64>() / (n1 - 1.0);
let var2 = treatment.iter().map(|x| (x - mean2).powi(2)).sum::<f64>() / (n2 - 1.0);
let se = (var1 / n1 + var2 / n2).sqrt();
let t_stat = (mean2 - mean1) / se;
let p_value = 2.0 * (1.0 - normal_cdf(t_stat.abs()));
AnalysisResult {
control_mean: mean1,
treatment_mean: mean2,
effect_size: (mean2 - mean1) / mean1,
p_value,
significant: p_value < alpha,
method: TestMethod::TTest,
}
}
fn compute_skewness(data: &[f64]) -> f64 {
let n = data.len() as f64;
let mean = data.iter().sum::<f64>() / n;
let std_dev = (data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n).sqrt();
if std_dev < 1e-10 {
return 0.0;
}
let m3 = data
.iter()
.map(|x| ((x - mean) / std_dev).powi(3))
.sum::<f64>()
/ n;
m3
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EffectSizeInterpretation {
Negligible,
Small,
Medium,
Large,
}
#[derive(Debug, Clone)]
pub struct MannWhitneyResult {
pub u_statistic: f64,
pub z_score: f64,
pub p_value: f64,
pub significant: bool,
pub effect_size: f64,
pub effect_interpretation: EffectSizeInterpretation,
pub method: TestMethod,
}
#[must_use]
pub fn mann_whitney_u(control: &[f64], treatment: &[f64]) -> MannWhitneyResult {
let n1 = control.len();
let n2 = treatment.len();
let mut combined: Vec<(f64, usize)> = control
.iter()
.map(|&x| (x, 0)) .chain(treatment.iter().map(|&x| (x, 1))) .collect();
combined.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
let ranks = assign_ranks_with_ties(&combined);
let r1: f64 = ranks
.iter()
.filter(|(_, group)| *group == 0)
.map(|(rank, _)| rank)
.sum();
let u1 = r1 - (n1 * (n1 + 1)) as f64 / 2.0;
let u2 = (n1 * n2) as f64 - u1;
let u_statistic = u1.min(u2);
let mu = (n1 * n2) as f64 / 2.0;
let sigma = ((n1 * n2 * (n1 + n2 + 1)) as f64 / 12.0).sqrt();
let z_score = if sigma > 0.0 {
(u_statistic - mu) / sigma
} else {
0.0
};
let p_value = 2.0 * (1.0 - normal_cdf(z_score.abs()));
let effect_size = 1.0 - (2.0 * u_statistic) / (n1 * n2) as f64;
let effect_interpretation = interpret_effect_size(effect_size.abs());
MannWhitneyResult {
u_statistic,
z_score,
p_value,
significant: p_value < 0.05,
effect_size,
effect_interpretation,
method: TestMethod::MannWhitneyU,
}
}
fn assign_ranks_with_ties(sorted: &[(f64, usize)]) -> Vec<(f64, usize)> {
let mut ranks = Vec::with_capacity(sorted.len());
let mut i = 0;
while i < sorted.len() {
let value = sorted[i].0;
let mut j = i;
while j < sorted.len() && (sorted[j].0 - value).abs() < 1e-10 {
j += 1;
}
let avg_rank: f64 = ((i + 1)..=(j)).map(|r| r as f64).sum::<f64>() / (j - i) as f64;
for item in sorted.iter().take(j).skip(i) {
ranks.push((avg_rank, item.1));
}
i = j;
}
ranks
}
fn interpret_effect_size(r: f64) -> EffectSizeInterpretation {
if r < 0.1 {
EffectSizeInterpretation::Negligible
} else if r < 0.3 {
EffectSizeInterpretation::Small
} else if r < 0.5 {
EffectSizeInterpretation::Medium
} else {
EffectSizeInterpretation::Large
}
}
const MIN_PARAMETRIC_SAMPLE_SIZE: usize = 15;
const SKEWNESS_THRESHOLD: f64 = 1.0;
#[must_use]
pub fn analyze_with_auto_select(
control: &[f64],
treatment: &[f64],
config: &AnalysisConfig,
) -> AnalysisResult {
let min_n = control.len().min(treatment.len());
if min_n < MIN_PARAMETRIC_SAMPLE_SIZE {
let mw = mann_whitney_u(control, treatment);
return AnalysisResult {
control_mean: median(control),
treatment_mean: median(treatment),
effect_size: mw.effect_size,
p_value: mw.p_value,
significant: mw.significant,
method: TestMethod::MannWhitneyU,
};
}
let skewness = compute_skewness(control);
if config.auto_detect_skew && skewness.abs() > SKEWNESS_THRESHOLD {
let all_positive = control.iter().all(|&x| x > 0.0) && treatment.iter().all(|&x| x > 0.0);
if all_positive {
analyze_log_transform(control, treatment, config.alpha)
} else {
let mw = mann_whitney_u(control, treatment);
AnalysisResult {
control_mean: median(control),
treatment_mean: median(treatment),
effect_size: mw.effect_size,
p_value: mw.p_value,
significant: mw.significant,
method: TestMethod::MannWhitneyU,
}
}
} else {
analyze_t_test(control, treatment, config.alpha)
}
}
fn median(data: &[f64]) -> f64 {
let mut sorted = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
if n == 0 {
return 0.0;
}
if n % 2 == 0 {
(sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
} else {
sorted[n / 2]
}
}
#[allow(clippy::unreadable_literal)] fn normal_cdf(x: f64) -> f64 {
let a1 = 0.254_829_592;
let a2 = -0.284_496_736;
let a3 = 1.421_413_741;
let a4 = -1.453_152_027;
let a5 = 1.061_405_429;
let p = 0.327_591;
let sign = if x < 0.0 { -1.0 } else { 1.0 };
let x = x.abs() / std::f64::consts::SQRT_2;
let t = 1.0 / (1.0 + p * x);
let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
0.5 * (1.0 + sign * y)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mann_whitney_identical_samples() {
let control = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let treatment = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let result = mann_whitney_u(&control, &treatment);
assert!(!result.significant);
assert!(result.effect_size.abs() < 0.1); }
#[test]
fn test_mann_whitney_completely_separated() {
let control = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let treatment = vec![10.0, 11.0, 12.0, 13.0, 14.0];
let result = mann_whitney_u(&control, &treatment);
assert!(result.significant);
assert!(result.effect_size.abs() > 0.8); assert_eq!(result.u_statistic, 0.0); }
#[test]
fn test_mann_whitney_handles_ties() {
let control = vec![1.0, 2.0, 2.0, 3.0, 3.0];
let treatment = vec![2.0, 2.0, 3.0, 4.0, 5.0];
let result = mann_whitney_u(&control, &treatment);
assert!(result.p_value > 0.0 && result.p_value <= 1.0);
}
#[test]
fn test_mann_whitney_effect_size_interpretation() {
let control = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let treatment = vec![1.5, 2.5, 3.5, 4.5, 5.5];
let result = mann_whitney_u(&control, &treatment);
assert!(matches!(
result.effect_interpretation,
EffectSizeInterpretation::Small | EffectSizeInterpretation::Negligible
));
}
#[test]
fn test_mann_whitney_returns_correct_method() {
let control = vec![1.0, 2.0, 3.0];
let treatment = vec![4.0, 5.0, 6.0];
let result = mann_whitney_u(&control, &treatment);
assert_eq!(result.method, TestMethod::MannWhitneyU);
}
#[test]
fn test_auto_select_uses_mann_whitney_for_small_samples() {
let control = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let treatment = vec![2.0, 3.0, 4.0, 5.0, 6.0];
let config = AnalysisConfig {
alpha: 0.05,
auto_detect_skew: true,
};
let result = analyze_with_auto_select(&control, &treatment, &config);
assert_eq!(result.method, TestMethod::MannWhitneyU);
}
#[test]
fn test_auto_select_uses_log_transform_for_latency_like_data() {
let control: Vec<f64> = vec![
10.0, 12.0, 11.0, 15.0, 100.0, 13.0, 14.0, 11.0, 12.0, 10.0, 11.0, 12.0, 13.0, 14.0,
15.0, 16.0, 17.0, 18.0, 19.0, 200.0,
];
let treatment: Vec<f64> = vec![
8.0, 9.0, 10.0, 11.0, 50.0, 9.0, 10.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
8.0, 9.0, 10.0, 11.0, 80.0,
];
let config = AnalysisConfig {
alpha: 0.05,
auto_detect_skew: true,
};
let result = analyze_with_auto_select(&control, &treatment, &config);
assert!(matches!(
result.method,
TestMethod::LogTransformTTest | TestMethod::MannWhitneyU
));
}
#[test]
fn test_t_test_no_difference() {
let control = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let treatment = vec![1.1, 2.1, 3.1, 4.1, 5.1];
let result = analyze_t_test(&control, &treatment, 0.05);
assert!(!result.significant); }
#[test]
fn test_t_test_significant_difference() {
let control = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let treatment = vec![10.0, 11.0, 12.0, 13.0, 14.0];
let result = analyze_t_test(&control, &treatment, 0.05);
assert!(result.significant); }
#[test]
fn test_log_transform_latency() {
let control = vec![10.0, 12.0, 15.0, 100.0, 11.0]; let treatment = vec![8.0, 9.0, 10.0, 50.0, 8.5];
let result = analyze_log_transform(&control, &treatment, 0.05);
assert!(result.treatment_mean < result.control_mean);
assert_eq!(result.method, TestMethod::LogTransformTTest);
}
#[test]
fn test_auto_detect_skewness() {
let control = vec![1.0, 1.1, 1.2, 1.3, 100.0]; let treatment = vec![1.0, 1.1, 1.2, 1.3, 1.4];
let config = AnalysisConfig::default();
let result = analyze(&control, &treatment, &config);
assert_eq!(result.method, TestMethod::LogTransformTTest);
}
#[test]
fn test_normal_data_uses_t_test() {
let control = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let treatment = vec![2.0, 3.0, 4.0, 5.0, 6.0];
let config = AnalysisConfig::default();
let result = analyze(&control, &treatment, &config);
assert_eq!(result.method, TestMethod::TTest);
}
#[test]
fn test_skewness_calculation() {
let symmetric = vec![1.0, 2.0, 3.0, 4.0, 5.0];
assert!(compute_skewness(&symmetric).abs() < 0.5);
let skewed = vec![1.0, 1.0, 1.0, 1.0, 100.0];
assert!(compute_skewness(&skewed) > 1.0);
}
}