use crate::core::error::{Error, Result};
use crate::dataframe::DataFrame;
use crate::series::Series;
use crate::stats::distributions::{ChiSquared, Distribution, FDistribution, Normal, TDistribution};
use crate::utils::rand_compat::{thread_rng, GenRangeCompat};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::f64::consts::PI;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TestResult {
pub statistic: f64,
pub p_value: f64,
pub degrees_of_freedom: Option<f64>,
pub critical_value: Option<f64>,
pub effect_size: Option<f64>,
pub effect_size_interpretation: Option<String>,
pub confidence_interval: Option<(f64, f64)>,
pub test_name: String,
pub alternative: AlternativeHypothesis,
pub reject_null: bool,
pub additional_info: HashMap<String, f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AlternativeHypothesis {
TwoSided,
Greater,
Less,
}
#[derive(Debug, Clone)]
pub enum EffectSize {
CohensD(f64),
PearsonR(f64),
EtaSquared(f64),
PartialEtaSquared(f64),
OmegaSquared(f64),
CramersV(f64),
GlassDelta(f64),
HedgesG(f64),
}
impl EffectSize {
pub fn value(&self) -> f64 {
match self {
EffectSize::CohensD(d)
| EffectSize::PearsonR(d)
| EffectSize::EtaSquared(d)
| EffectSize::PartialEtaSquared(d)
| EffectSize::OmegaSquared(d)
| EffectSize::CramersV(d)
| EffectSize::GlassDelta(d)
| EffectSize::HedgesG(d) => *d,
}
}
pub fn interpretation(&self) -> String {
let val = self.value().abs();
match self {
EffectSize::CohensD(_) | EffectSize::GlassDelta(_) | EffectSize::HedgesG(_) => {
if val < 0.2 {
"Negligible".to_string()
} else if val < 0.5 {
"Small".to_string()
} else if val < 0.8 {
"Medium".to_string()
} else {
"Large".to_string()
}
}
EffectSize::PearsonR(_) => {
if val < 0.1 {
"Negligible".to_string()
} else if val < 0.3 {
"Small".to_string()
} else if val < 0.5 {
"Medium".to_string()
} else {
"Large".to_string()
}
}
EffectSize::EtaSquared(_)
| EffectSize::PartialEtaSquared(_)
| EffectSize::OmegaSquared(_) => {
if val < 0.01 {
"Small".to_string()
} else if val < 0.06 {
"Medium".to_string()
} else {
"Large".to_string()
}
}
EffectSize::CramersV(_) => {
if val < 0.1 {
"Negligible".to_string()
} else if val < 0.3 {
"Small".to_string()
} else if val < 0.5 {
"Medium".to_string()
} else {
"Large".to_string()
}
}
}
}
}
pub fn one_sample_ttest(
data: &[f64],
hypothesized_mean: f64,
alternative: AlternativeHypothesis,
) -> Result<TestResult> {
if data.is_empty() {
return Err(Error::InvalidValue("Data cannot be empty".into()));
}
let n = data.len() as f64;
let sample_mean = data.iter().sum::<f64>() / n;
let sample_std = {
let variance = data.iter().map(|&x| (x - sample_mean).powi(2)).sum::<f64>() / (n - 1.0);
variance.sqrt()
};
let standard_error = sample_std / n.sqrt();
let t_statistic = (sample_mean - hypothesized_mean) / standard_error;
let df = n - 1.0;
let t_dist = TDistribution::new(df)?;
let p_value = match alternative {
AlternativeHypothesis::TwoSided => 2.0 * (1.0 - t_dist.cdf(t_statistic.abs())),
AlternativeHypothesis::Greater => 1.0 - t_dist.cdf(t_statistic),
AlternativeHypothesis::Less => t_dist.cdf(t_statistic),
};
let critical_value = match alternative {
AlternativeHypothesis::TwoSided => t_dist.inverse_cdf(0.975),
AlternativeHypothesis::Greater => t_dist.inverse_cdf(0.95),
AlternativeHypothesis::Less => t_dist.inverse_cdf(0.05),
};
let cohens_d = (sample_mean - hypothesized_mean) / sample_std;
let effect_size = EffectSize::CohensD(cohens_d);
let margin_of_error = critical_value * standard_error;
let ci = (
(sample_mean - hypothesized_mean) - margin_of_error,
(sample_mean - hypothesized_mean) + margin_of_error,
);
let mut additional_info = HashMap::new();
additional_info.insert("sample_mean".to_string(), sample_mean);
additional_info.insert("sample_std".to_string(), sample_std);
additional_info.insert("standard_error".to_string(), standard_error);
additional_info.insert("hypothesized_mean".to_string(), hypothesized_mean);
Ok(TestResult {
statistic: t_statistic,
p_value,
degrees_of_freedom: Some(df),
critical_value: Some(critical_value),
effect_size: Some(effect_size.value()),
effect_size_interpretation: Some(effect_size.interpretation()),
confidence_interval: Some(ci),
test_name: "One-sample t-test".to_string(),
alternative,
reject_null: p_value < 0.05,
additional_info,
})
}
pub fn independent_ttest(
group1: &[f64],
group2: &[f64],
alternative: AlternativeHypothesis,
equal_variances: bool,
) -> Result<TestResult> {
if group1.is_empty() || group2.is_empty() {
return Err(Error::InvalidValue("Both groups must contain data".into()));
}
let n1 = group1.len() as f64;
let n2 = group2.len() as f64;
let mean1 = group1.iter().sum::<f64>() / n1;
let mean2 = group2.iter().sum::<f64>() / n2;
let var1 = group1.iter().map(|&x| (x - mean1).powi(2)).sum::<f64>() / (n1 - 1.0);
let var2 = group2.iter().map(|&x| (x - mean2).powi(2)).sum::<f64>() / (n2 - 1.0);
let (t_statistic, df, standard_error) = if equal_variances {
let pooled_var = ((n1 - 1.0) * var1 + (n2 - 1.0) * var2) / (n1 + n2 - 2.0);
let se = (pooled_var * (1.0 / n1 + 1.0 / n2)).sqrt();
let t = (mean1 - mean2) / se;
let degrees_freedom = n1 + n2 - 2.0;
(t, degrees_freedom, se)
} else {
let se = (var1 / n1 + var2 / n2).sqrt();
let t = (mean1 - mean2) / se;
let numerator = (var1 / n1 + var2 / n2).powi(2);
let denominator = (var1 / n1).powi(2) / (n1 - 1.0) + (var2 / n2).powi(2) / (n2 - 1.0);
let degrees_freedom = numerator / denominator;
(t, degrees_freedom, se)
};
let t_dist = TDistribution::new(df)?;
let p_value = match alternative {
AlternativeHypothesis::TwoSided => 2.0 * (1.0 - t_dist.cdf(t_statistic.abs())),
AlternativeHypothesis::Greater => 1.0 - t_dist.cdf(t_statistic),
AlternativeHypothesis::Less => t_dist.cdf(t_statistic),
};
let critical_value = match alternative {
AlternativeHypothesis::TwoSided => t_dist.inverse_cdf(0.975),
AlternativeHypothesis::Greater => t_dist.inverse_cdf(0.95),
AlternativeHypothesis::Less => t_dist.inverse_cdf(0.05),
};
let pooled_std = if equal_variances {
let pooled_var = ((n1 - 1.0) * var1 + (n2 - 1.0) * var2) / (n1 + n2 - 2.0);
pooled_var.sqrt()
} else {
((var1 + var2) / 2.0).sqrt()
};
let cohens_d = (mean1 - mean2) / pooled_std;
let effect_size = EffectSize::CohensD(cohens_d);
let margin_of_error = critical_value * standard_error;
let ci = (
(mean1 - mean2) - margin_of_error,
(mean1 - mean2) + margin_of_error,
);
let mut additional_info = HashMap::new();
additional_info.insert("mean1".to_string(), mean1);
additional_info.insert("mean2".to_string(), mean2);
additional_info.insert("var1".to_string(), var1);
additional_info.insert("var2".to_string(), var2);
additional_info.insert("n1".to_string(), n1);
additional_info.insert("n2".to_string(), n2);
additional_info.insert("pooled_std".to_string(), pooled_std);
let test_name = if equal_variances {
"Independent samples t-test (equal variances)".to_string()
} else {
"Welch's t-test (unequal variances)".to_string()
};
Ok(TestResult {
statistic: t_statistic,
p_value,
degrees_of_freedom: Some(df),
critical_value: Some(critical_value),
effect_size: Some(effect_size.value()),
effect_size_interpretation: Some(effect_size.interpretation()),
confidence_interval: Some(ci),
test_name,
alternative,
reject_null: p_value < 0.05,
additional_info,
})
}
pub fn paired_ttest(
before: &[f64],
after: &[f64],
alternative: AlternativeHypothesis,
) -> Result<TestResult> {
if before.len() != after.len() {
return Err(Error::DimensionMismatch(
"Before and after groups must have same length".into(),
));
}
if before.is_empty() {
return Err(Error::InvalidValue("Data cannot be empty".into()));
}
let differences: Vec<f64> = before
.iter()
.zip(after.iter())
.map(|(&b, &a)| b - a)
.collect();
one_sample_ttest(&differences, 0.0, alternative).map(|mut result| {
result.test_name = "Paired samples t-test".to_string();
result.additional_info.insert(
"mean_before".to_string(),
before.iter().sum::<f64>() / before.len() as f64,
);
result.additional_info.insert(
"mean_after".to_string(),
after.iter().sum::<f64>() / after.len() as f64,
);
result.additional_info.insert(
"mean_difference".to_string(),
differences.iter().sum::<f64>() / differences.len() as f64,
);
result
})
}
pub fn one_way_anova(groups: &[&[f64]]) -> Result<TestResult> {
if groups.is_empty() {
return Err(Error::InvalidValue("At least one group is required".into()));
}
if groups.len() < 2 {
return Err(Error::InvalidValue(
"At least two groups are required for ANOVA".into(),
));
}
for (i, group) in groups.iter().enumerate() {
if group.is_empty() {
return Err(Error::InvalidValue(format!("Group {} is empty", i)));
}
}
let k = groups.len() as f64; let n_total: usize = groups.iter().map(|g| g.len()).sum();
let group_means: Vec<f64> = groups
.iter()
.map(|group| group.iter().sum::<f64>() / group.len() as f64)
.collect();
let overall_mean = groups.iter().flat_map(|group| group.iter()).sum::<f64>() / n_total as f64;
let mut ss_between = 0.0;
let mut ss_within = 0.0;
for (i, group) in groups.iter().enumerate() {
let group_mean = group_means[i];
let n_group = group.len() as f64;
ss_between += n_group * (group_mean - overall_mean).powi(2);
for &value in group.iter() {
ss_within += (value - group_mean).powi(2);
}
}
let ss_total = ss_between + ss_within;
let df_between = k - 1.0;
let df_within = n_total as f64 - k;
let df_total = n_total as f64 - 1.0;
let ms_between = ss_between / df_between;
let ms_within = ss_within / df_within;
let f_statistic = ms_between / ms_within;
let f_dist = FDistribution::new(df_between, df_within)?;
let p_value = 1.0 - f_dist.cdf(f_statistic);
let critical_value = f_dist.inverse_cdf(0.95);
let eta_squared = ss_between / ss_total;
let omega_squared = (ss_between - df_between * ms_within) / (ss_total + ms_within);
let effect_size = EffectSize::EtaSquared(eta_squared);
let mut additional_info = HashMap::new();
additional_info.insert("ss_between".to_string(), ss_between);
additional_info.insert("ss_within".to_string(), ss_within);
additional_info.insert("ss_total".to_string(), ss_total);
additional_info.insert("ms_between".to_string(), ms_between);
additional_info.insert("ms_within".to_string(), ms_within);
additional_info.insert("df_between".to_string(), df_between);
additional_info.insert("df_within".to_string(), df_within);
additional_info.insert("eta_squared".to_string(), eta_squared);
additional_info.insert("omega_squared".to_string(), omega_squared);
additional_info.insert("n_groups".to_string(), k);
additional_info.insert("n_total".to_string(), n_total as f64);
Ok(TestResult {
statistic: f_statistic,
p_value,
degrees_of_freedom: Some(df_between), critical_value: Some(critical_value),
effect_size: Some(effect_size.value()),
effect_size_interpretation: Some(effect_size.interpretation()),
confidence_interval: None, test_name: "One-way ANOVA".to_string(),
alternative: AlternativeHypothesis::Greater, reject_null: p_value < 0.05,
additional_info,
})
}
pub fn chi_square_independence(observed: &[Vec<f64>]) -> Result<TestResult> {
if observed.is_empty() || observed[0].is_empty() {
return Err(Error::InvalidValue(
"Contingency table cannot be empty".into(),
));
}
let rows = observed.len();
let cols = observed[0].len();
for row in observed.iter() {
if row.len() != cols {
return Err(Error::DimensionMismatch(
"All rows must have the same length".into(),
));
}
}
let mut row_totals = vec![0.0; rows];
let mut col_totals = vec![0.0; cols];
let mut grand_total = 0.0;
for (i, row) in observed.iter().enumerate() {
for (j, &value) in row.iter().enumerate() {
if value < 0.0 {
return Err(Error::InvalidValue(
"All frequencies must be non-negative".into(),
));
}
row_totals[i] += value;
col_totals[j] += value;
grand_total += value;
}
}
if grand_total == 0.0 {
return Err(Error::InvalidValue("Total frequency cannot be zero".into()));
}
let mut chi_square = 0.0;
let mut min_expected = f64::INFINITY;
for i in 0..rows {
for j in 0..cols {
let expected = (row_totals[i] * col_totals[j]) / grand_total;
if expected < min_expected {
min_expected = expected;
}
if expected > 0.0 {
chi_square += (observed[i][j] - expected).powi(2) / expected;
}
}
}
let df = (rows - 1) * (cols - 1);
let chi_sq_dist = ChiSquared::new(df as f64)?;
let p_value = 1.0 - chi_sq_dist.cdf(chi_square);
let critical_value = chi_sq_dist.inverse_cdf(0.95);
let cramers_v = (chi_square / (grand_total * ((rows.min(cols) - 1) as f64))).sqrt();
let effect_size = EffectSize::CramersV(cramers_v);
let mut additional_info = HashMap::new();
additional_info.insert("degrees_of_freedom".to_string(), df as f64);
additional_info.insert("grand_total".to_string(), grand_total);
additional_info.insert("min_expected_frequency".to_string(), min_expected);
additional_info.insert("cramers_v".to_string(), cramers_v);
additional_info.insert("n_rows".to_string(), rows as f64);
additional_info.insert("n_cols".to_string(), cols as f64);
if min_expected < 5.0 {
additional_info.insert("warning_low_expected".to_string(), 1.0);
}
Ok(TestResult {
statistic: chi_square,
p_value,
degrees_of_freedom: Some(df as f64),
critical_value: Some(critical_value),
effect_size: Some(effect_size.value()),
effect_size_interpretation: Some(effect_size.interpretation()),
confidence_interval: None,
test_name: "Chi-square test of independence".to_string(),
alternative: AlternativeHypothesis::Greater,
reject_null: p_value < 0.05,
additional_info,
})
}
pub fn correlation_test(
x: &[f64],
y: &[f64],
alternative: AlternativeHypothesis,
) -> Result<TestResult> {
if x.len() != y.len() {
return Err(Error::DimensionMismatch(
"X and Y must have the same length".into(),
));
}
if x.len() < 3 {
return Err(Error::InvalidValue(
"At least 3 data points are required".into(),
));
}
let n = x.len() as f64;
let mean_x = x.iter().sum::<f64>() / n;
let mean_y = y.iter().sum::<f64>() / n;
let mut sum_xy = 0.0;
let mut sum_xx = 0.0;
let mut sum_yy = 0.0;
for (&xi, &yi) in x.iter().zip(y.iter()) {
let dx = xi - mean_x;
let dy = yi - mean_y;
sum_xy += dx * dy;
sum_xx += dx * dx;
sum_yy += dy * dy;
}
let denominator = (sum_xx * sum_yy).sqrt();
if denominator < 1e-10 {
return Err(Error::InvalidValue(
"Cannot compute correlation: zero variance".into(),
));
}
let r = sum_xy / denominator;
let denominator_t = 1.0 - r.powi(2);
let t_statistic = if denominator_t < 1e-10 {
if r > 0.0 {
f64::INFINITY
} else {
f64::NEG_INFINITY
}
} else {
r * ((n - 2.0) / denominator_t).sqrt()
};
let df = n - 2.0;
let t_dist = TDistribution::new(df)?;
let p_value = if t_statistic.is_infinite() {
match alternative {
AlternativeHypothesis::TwoSided => 0.0,
AlternativeHypothesis::Greater => {
if t_statistic > 0.0 {
0.0
} else {
1.0
}
}
AlternativeHypothesis::Less => {
if t_statistic < 0.0 {
0.0
} else {
1.0
}
}
}
} else {
match alternative {
AlternativeHypothesis::TwoSided => 2.0 * (1.0 - t_dist.cdf(t_statistic.abs())),
AlternativeHypothesis::Greater => 1.0 - t_dist.cdf(t_statistic),
AlternativeHypothesis::Less => t_dist.cdf(t_statistic),
}
};
let critical_value = match alternative {
AlternativeHypothesis::TwoSided => t_dist.inverse_cdf(0.975),
AlternativeHypothesis::Greater => t_dist.inverse_cdf(0.95),
AlternativeHypothesis::Less => t_dist.inverse_cdf(0.05),
};
let effect_size = EffectSize::PearsonR(r);
let ci = if r.abs() >= 0.999999 {
let margin = 1e-6;
let r_clamped = r.clamp(-0.999999, 0.999999);
(r_clamped - margin, r_clamped + margin)
} else {
let z_r = 0.5 * ((1.0 + r) / (1.0 - r)).ln();
let se_z = 1.0 / (n - 3.0).sqrt();
let z_critical = 1.96;
let z_lower = z_r - z_critical * se_z;
let z_upper = z_r + z_critical * se_z;
let r_lower = z_lower.tanh();
let r_upper = z_upper.tanh();
(r_lower, r_upper)
};
let mut additional_info = HashMap::new();
additional_info.insert("correlation".to_string(), r);
additional_info.insert("n".to_string(), n);
additional_info.insert("mean_x".to_string(), mean_x);
additional_info.insert("mean_y".to_string(), mean_y);
additional_info.insert("r_squared".to_string(), r.powi(2));
Ok(TestResult {
statistic: t_statistic,
p_value,
degrees_of_freedom: Some(df),
critical_value: Some(critical_value),
effect_size: Some(effect_size.value()),
effect_size_interpretation: Some(effect_size.interpretation()),
confidence_interval: Some(ci),
test_name: "Pearson correlation test".to_string(),
alternative,
reject_null: p_value < 0.05,
additional_info,
})
}
pub fn shapiro_wilk_test(data: &[f64]) -> Result<TestResult> {
let n = data.len();
if n < 3 {
return Err(Error::InvalidValue(
"At least 3 observations required for Shapiro-Wilk test".into(),
));
}
if n > 5000 {
return Err(Error::InvalidValue(
"Shapiro-Wilk test not reliable for n > 5000".into(),
));
}
let mut sorted_data = data.to_vec();
sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mean = data.iter().sum::<f64>() / n as f64;
let variance = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / (n - 1) as f64;
let std_dev = variance.sqrt();
let mut w_numerator = 0.0;
let k = n / 2;
for i in 0..k {
let coeff = if i == 0 {
0.7071
} else {
0.5 / (i as f64 + 1.0)
};
w_numerator += coeff * (sorted_data[n - 1 - i] - sorted_data[i]);
}
let w_denominator = (n - 1) as f64 * variance;
let w_statistic = w_numerator.powi(2) / w_denominator;
let log_w = w_statistic.ln();
let normalized = (log_w + 1.0) * (n as f64).sqrt();
let p_value = if w_statistic > 0.95 {
1.0 - Normal::new(0.0, 1.0)?.cdf(normalized)
} else {
0.01
};
let mut additional_info = HashMap::new();
additional_info.insert("n".to_string(), n as f64);
additional_info.insert("mean".to_string(), mean);
additional_info.insert("std_dev".to_string(), std_dev);
additional_info.insert("note".to_string(), 1.0);
Ok(TestResult {
statistic: w_statistic,
p_value: p_value.max(0.001).min(1.0), degrees_of_freedom: None,
critical_value: Some(0.95), effect_size: None,
effect_size_interpretation: None,
confidence_interval: None,
test_name: "Shapiro-Wilk normality test (approximation)".to_string(),
alternative: AlternativeHypothesis::Greater,
reject_null: p_value < 0.05,
additional_info,
})
}
#[derive(Debug, Clone)]
pub enum MultipleComparisonCorrection {
None,
Bonferroni,
HolmBonferroni,
BenjaminiHochberg,
BenjaminiYekutieli,
}
pub fn adjust_p_values(p_values: &[f64], method: MultipleComparisonCorrection) -> Result<Vec<f64>> {
if p_values.is_empty() {
return Ok(Vec::new());
}
let n = p_values.len();
match method {
MultipleComparisonCorrection::None => Ok(p_values.to_vec()),
MultipleComparisonCorrection::Bonferroni => {
Ok(p_values.iter().map(|&p| (p * n as f64).min(1.0)).collect())
}
MultipleComparisonCorrection::HolmBonferroni => {
let mut indexed_p: Vec<(usize, f64)> =
p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
indexed_p.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
let mut adjusted = vec![0.0; n];
let mut max_adj = 0.0;
for (rank, (original_index, p)) in indexed_p.iter().enumerate() {
let multiplier = n - rank;
let adj_p = (p * multiplier as f64).min(1.0);
let adj_p = adj_p.max(max_adj);
adjusted[*original_index] = adj_p;
max_adj = adj_p;
}
Ok(adjusted)
}
MultipleComparisonCorrection::BenjaminiHochberg => {
let mut indexed_p: Vec<(usize, f64)> =
p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
indexed_p.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let mut adjusted = vec![0.0; n];
let mut min_adj = 1.0;
for (rank, (original_index, p)) in indexed_p.iter().enumerate() {
let adj_p = (p * n as f64 / (n - rank) as f64).min(1.0);
let adj_p = adj_p.min(min_adj);
adjusted[*original_index] = adj_p;
min_adj = adj_p;
}
Ok(adjusted)
}
MultipleComparisonCorrection::BenjaminiYekutieli => {
let correction_factor: f64 = (1..=n).map(|i| 1.0 / i as f64).sum();
let mut indexed_p: Vec<(usize, f64)> =
p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
indexed_p.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let mut adjusted = vec![0.0; n];
let mut min_adj = 1.0;
for (rank, (original_index, p)) in indexed_p.iter().enumerate() {
let adj_p = (p * n as f64 * correction_factor / (n - rank) as f64).min(1.0);
let adj_p = adj_p.min(min_adj);
adjusted[*original_index] = adj_p;
min_adj = adj_p;
}
Ok(adjusted)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_one_sample_ttest() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let result = one_sample_ttest(&data, 3.0, AlternativeHypothesis::TwoSided)
.expect("operation should succeed");
assert_eq!(result.test_name, "One-sample t-test");
assert!(result.p_value > 0.05); assert!(!result.reject_null);
}
#[test]
fn test_independent_ttest() {
let group1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let group2 = vec![3.0, 4.0, 5.0, 6.0, 7.0];
let result = independent_ttest(&group1, &group2, AlternativeHypothesis::TwoSided, true)
.expect("operation should succeed");
assert!(result.test_name.contains("t-test"));
assert!(result.effect_size.is_some());
assert!(result.confidence_interval.is_some());
}
#[test]
fn test_correlation_test() {
let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let y = vec![2.0, 4.0, 6.0, 8.0, 10.0];
let result = correlation_test(&x, &y, AlternativeHypothesis::TwoSided)
.expect("operation should succeed");
assert_eq!(result.test_name, "Pearson correlation test");
assert!((result.additional_info["correlation"] - 1.0).abs() < 1e-10);
assert!(result.reject_null); }
#[test]
fn test_chi_square_independence() {
let observed = vec![vec![10.0, 15.0, 25.0], vec![20.0, 10.0, 15.0]];
let result = chi_square_independence(&observed).expect("operation should succeed");
assert_eq!(result.test_name, "Chi-square test of independence");
assert!(result.degrees_of_freedom.is_some());
assert!(result.effect_size.is_some());
}
#[test]
fn test_multiple_comparison_bonferroni() {
let p_values = vec![0.01, 0.02, 0.03, 0.04, 0.05];
let adjusted = adjust_p_values(&p_values, MultipleComparisonCorrection::Bonferroni)
.expect("operation should succeed");
assert!((adjusted[0] - 0.05).abs() < 1e-10);
assert!((adjusted[1] - 0.10).abs() < 1e-10);
assert!((adjusted[4] - 0.25).abs() < 1e-10);
}
#[test]
fn test_effect_size_interpretation() {
let small_effect = EffectSize::CohensD(0.3);
assert_eq!(small_effect.interpretation(), "Small");
let large_effect = EffectSize::CohensD(1.0);
assert_eq!(large_effect.interpretation(), "Large");
let medium_correlation = EffectSize::PearsonR(0.4);
assert_eq!(medium_correlation.interpretation(), "Medium");
}
}