cbtop/statistics/
comparison.rs1use std::cmp::Ordering;
4
5use super::analysis::EffectSize;
6use super::helpers::{normal_cdf, percentile};
7
8fn sample_stats(samples: &[f64]) -> (f64, f64, f64) {
18 let n = samples.len() as f64;
19 let mean = samples.iter().sum::<f64>() / n;
20 let divisor = (n - 1.0).max(1.0);
21 let var = samples.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / divisor;
22 (n, mean, var)
23}
24
25fn two_tailed_p(abs_stat: f64) -> f64 {
28 2.0 * (1.0 - normal_cdf(abs_stat))
29}
30
31fn sort_finite(samples: &[f64]) -> Option<Vec<f64>> {
35 let mut v: Vec<f64> = samples.iter().copied().filter(|x| x.is_finite()).collect();
36 if v.is_empty() {
37 return None;
38 }
39 v.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
40 Some(v)
41}
42
43#[derive(Debug, Clone)]
45pub struct ComparisonResult {
46 pub t_statistic: f64,
48 pub p_value: f64,
50 pub effect_size: EffectSize,
52 pub statistically_significant: bool,
54 pub practically_significant: bool,
56 pub degrees_of_freedom: f64,
58}
59
60impl ComparisonResult {
61 pub fn welch_t_test(sample1: &[f64], sample2: &[f64]) -> Option<Self> {
63 if sample1.len() < 2 || sample2.len() < 2 {
64 return None;
65 }
66
67 let (n1, mean1, var1) = sample_stats(sample1);
68 let (n2, mean2, var2) = sample_stats(sample2);
69
70 let se1 = var1 / n1;
71 let se2 = var2 / n2;
72 let se_diff = (se1 + se2).sqrt();
73
74 if se_diff == 0.0 {
75 return None;
76 }
77
78 let t = (mean1 - mean2) / se_diff;
79
80 let df = (se1 + se2).powi(2) / (se1.powi(2) / (n1 - 1.0) + se2.powi(2) / (n2 - 1.0));
82
83 let p_value = two_tailed_p(t.abs());
84 let effect_size = EffectSize::cohens_d(sample1, sample2)?;
85
86 Some(Self {
87 t_statistic: t,
88 p_value,
89 effect_size: effect_size.clone(),
90 statistically_significant: p_value < 0.05,
91 practically_significant: effect_size.is_significant(),
92 degrees_of_freedom: df,
93 })
94 }
95
96 pub fn is_meaningful(&self) -> bool {
98 self.statistically_significant && self.practically_significant
99 }
100}
101
102#[derive(Debug, Clone)]
104pub struct MannWhitneyResult {
105 pub u_statistic: f64,
107 pub p_value: f64,
109 pub effect_size: f64,
111 pub significant: bool,
113}
114
115impl MannWhitneyResult {
116 pub fn test(sample1: &[f64], sample2: &[f64]) -> Option<Self> {
118 if sample1.is_empty() || sample2.is_empty() {
119 return None;
120 }
121
122 let n1 = sample1.len();
123 let n2 = sample2.len();
124
125 let mut combined: Vec<(f64, usize)> = sample1
127 .iter()
128 .map(|&x| (x, 0))
129 .chain(sample2.iter().map(|&x| (x, 1)))
130 .collect();
131
132 combined.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal));
133
134 let mut ranks: Vec<f64> = vec![0.0; combined.len()];
136 let mut i = 0;
137 while i < combined.len() {
138 let mut j = i;
139 while j < combined.len() && combined[j].0 == combined[i].0 {
140 j += 1;
141 }
142 let avg_rank = (i + j + 1) as f64 / 2.0;
143 for k in i..j {
144 ranks[k] = avg_rank;
145 }
146 i = j;
147 }
148
149 let r1: f64 = combined
151 .iter()
152 .enumerate()
153 .filter(|(_, (_, group))| *group == 0)
154 .map(|(idx, _)| ranks[idx])
155 .sum();
156
157 let u1 = r1 - (n1 * (n1 + 1)) as f64 / 2.0;
159 let u2 = (n1 * n2) as f64 - u1;
160 let u = u1.min(u2);
161
162 let mean_u = (n1 * n2) as f64 / 2.0;
164 let std_u = ((n1 * n2 * (n1 + n2 + 1)) as f64 / 12.0).sqrt();
165
166 let z = if std_u > 0.0 {
167 (u - mean_u) / std_u
168 } else {
169 0.0
170 };
171 let p_value = two_tailed_p(z.abs());
172
173 let effect_size = 1.0 - (2.0 * u) / (n1 * n2) as f64;
175
176 Some(Self {
177 u_statistic: u,
178 p_value,
179 effect_size,
180 significant: p_value < 0.05,
181 })
182 }
183}
184
185#[derive(Debug, Clone)]
187pub struct OutlierFilter {
188 pub lower_fence: f64,
190 pub upper_fence: f64,
192 pub q1: f64,
194 pub q3: f64,
196 pub iqr: f64,
198 pub multiplier: f64,
200}
201
202impl OutlierFilter {
203 pub fn new(samples: &[f64]) -> Option<Self> {
205 Self::with_multiplier(samples, 1.5)
206 }
207
208 pub fn with_multiplier(samples: &[f64], multiplier: f64) -> Option<Self> {
210 let sorted = sort_finite(samples)?;
211
212 let q1 = percentile(&sorted, 0.25);
213 let q3 = percentile(&sorted, 0.75);
214 let iqr = q3 - q1;
215
216 let lower_fence = q1 - multiplier * iqr;
217 let upper_fence = q3 + multiplier * iqr;
218
219 Some(Self {
220 lower_fence,
221 upper_fence,
222 q1,
223 q3,
224 iqr,
225 multiplier,
226 })
227 }
228
229 pub fn is_outlier(&self, value: f64) -> bool {
231 value < self.lower_fence || value > self.upper_fence
232 }
233
234 pub fn filter(&self, samples: &[f64]) -> Vec<f64> {
236 samples
237 .iter()
238 .copied()
239 .filter(|&x| !self.is_outlier(x))
240 .collect()
241 }
242
243 pub fn count_outliers(&self, samples: &[f64]) -> usize {
245 samples.iter().filter(|&&x| self.is_outlier(x)).count()
246 }
247}