use crate::common::{StatError, TailType, TestResult, calculate_p};
use statrs::distribution::ChiSquared;
use std::f64;
pub fn independence(contingency_table: &[Vec<f64>], alpha: f64) -> Result<TestResult, StatError> {
let num_rows = contingency_table.len();
if num_rows < 2 {
return Err(StatError::ComputeError("At least two rows required".into()));
}
let num_cols = contingency_table[0].len();
if num_cols < 2 || !contingency_table.iter().all(|row| row.len() == num_cols) {
return Err(StatError::ComputeError(
"All rows must have equal and ≥2 columns".into(),
));
}
let total: f64 = contingency_table.iter().flatten().sum();
if total == 0.0 {
return Err(StatError::ComputeError(
"Total frequency must be greater than zero".into(),
));
}
let mut expected = vec![vec![0.0; num_cols]; num_rows];
let row_totals: Vec<f64> = contingency_table
.iter()
.map(|row| row.iter().sum())
.collect();
let col_totals: Vec<f64> = (0..num_cols)
.map(|j| contingency_table.iter().map(|row| row[j]).sum())
.collect();
for i in 0..num_rows {
for j in 0..num_cols {
expected[i][j] = row_totals[i] * col_totals[j] / total;
}
}
let test_statistic = contingency_table
.iter()
.enumerate()
.map(|(i, row)| {
row.iter()
.enumerate()
.map(|(j, &obs)| {
let exp = expected[i][j];
if exp == 0.0 {
0.0
} else {
(obs - exp).powi(2) / exp
}
})
.sum::<f64>()
})
.sum::<f64>();
let df = (num_rows - 1) * (num_cols - 1);
let chi_distribution = ChiSquared::new(df as f64)
.map_err(|e| StatError::ComputeError(format!("Chi-squared distribution error: {e}")))?;
let p_value = calculate_p(test_statistic, TailType::Right, &chi_distribution);
let reject_null = p_value < alpha;
Ok(TestResult {
test_statistic,
p_value,
confidence_interval: (f64::NAN, f64::NAN),
null_hypothesis: "H0: Variables are independent".into(),
alt_hypothesis: "Ha: Variables are not independent".into(),
reject_null,
})
}
pub fn goodness_of_fit<O, E, T, U>(
observed: O,
expected: E,
alpha: f64,
) -> Result<TestResult, StatError>
where
O: IntoIterator<Item = T>,
E: IntoIterator<Item = U>,
T: Into<f64>,
U: Into<f64>,
{
let observed: Vec<f64> = observed.into_iter().map(|x| x.into()).collect();
let expected: Vec<f64> = expected.into_iter().map(|x| x.into()).collect();
if observed.len() != expected.len() {
return Err(StatError::ComputeError(
"Observed and expected lengths must match".into(),
));
}
if observed.len() < 2 {
return Err(StatError::ComputeError(
"At least two categories required".into(),
));
}
let test_statistic: f64 = observed
.iter()
.zip(expected.iter())
.map(|(&obs, &exp)| {
if exp == 0.0 {
0.0
} else {
(obs - exp).powi(2) / exp
}
})
.sum();
let df = (observed.len() - 1) as f64;
let chi_distribution = ChiSquared::new(df)
.map_err(|e| StatError::ComputeError(format!("Chi-squared distribution error: {e}")))?;
let p_value = calculate_p(test_statistic, TailType::Right, &chi_distribution);
let reject_null = p_value < alpha;
Ok(TestResult {
test_statistic,
p_value,
confidence_interval: (f64::NAN, f64::NAN),
null_hypothesis: "H0: Observed distribution matches expected distribution".into(),
alt_hypothesis: "Ha: Observed distribution does not match expected distribution".into(),
reject_null,
})
}