1pub struct StatisticalAnalyzer;
5
6impl StatisticalAnalyzer {
7 pub fn welch_t_test(sample1: &[f64], sample2: &[f64]) -> TestResult {
11 if sample1.len() < 2 || sample2.len() < 2 {
12 return TestResult {
13 statistic: 0.0,
14 p_value: 1.0,
15 significant: false,
16 effect_size: 0.0,
17 };
18 }
19
20 let n1 = sample1.len() as f64;
21 let n2 = sample2.len() as f64;
22
23 let mean1 = sample1.iter().sum::<f64>() / n1;
24 let mean2 = sample2.iter().sum::<f64>() / n2;
25
26 let var1 = sample1.iter().map(|x| (x - mean1).powi(2)).sum::<f64>() / (n1 - 1.0);
27 let var2 = sample2.iter().map(|x| (x - mean2).powi(2)).sum::<f64>() / (n2 - 1.0);
28
29 let se = ((var1 / n1) + (var2 / n2)).sqrt();
30 if se == 0.0 {
31 return TestResult {
32 statistic: 0.0,
33 p_value: 1.0,
34 significant: false,
35 effect_size: 0.0,
36 };
37 }
38
39 let t = (mean1 - mean2) / se;
40
41 let df_num = ((var1 / n1) + (var2 / n2)).powi(2);
43 let df_denom = ((var1 / n1).powi(2) / (n1 - 1.0)) + ((var2 / n2).powi(2) / (n2 - 1.0));
44 let df = df_num / df_denom;
45
46 let p_value = Self::t_to_p(t.abs(), df);
48
49 let pooled_std = f64::midpoint(var1, var2).sqrt();
51 let effect_size = if pooled_std > 0.0 {
52 (mean1 - mean2).abs() / pooled_std
53 } else {
54 0.0
55 };
56
57 TestResult {
58 statistic: t,
59 p_value,
60 significant: p_value < 0.05,
61 effect_size,
62 }
63 }
64
65 pub fn mann_whitney_u(sample1: &[f64], sample2: &[f64]) -> TestResult {
67 if sample1.is_empty() || sample2.is_empty() {
68 return TestResult {
69 statistic: 0.0,
70 p_value: 1.0,
71 significant: false,
72 effect_size: 0.0,
73 };
74 }
75
76 let n1 = sample1.len();
77 let n2 = sample2.len();
78
79 let mut u = 0.0;
81 for &x in sample1 {
82 for &y in sample2 {
83 if x > y {
84 u += 1.0;
85 } else if (x - y).abs() < 1e-10 {
86 u += 0.5;
87 }
88 }
89 }
90
91 let mu = (n1 * n2) as f64 / 2.0;
93 let sigma = ((n1 * n2 * (n1 + n2 + 1)) as f64 / 12.0).sqrt();
94
95 let z = if sigma > 0.0 { (u - mu) / sigma } else { 0.0 };
96 let p_value = 2.0 * Self::normal_cdf(-z.abs());
97
98 let effect_size = z.abs() / ((n1 + n2) as f64).sqrt();
100
101 TestResult {
102 statistic: u,
103 p_value,
104 significant: p_value < 0.05,
105 effect_size,
106 }
107 }
108
109 pub fn anova(groups: &[Vec<f64>]) -> TestResult {
111 if groups.len() < 2 {
112 return TestResult {
113 statistic: 0.0,
114 p_value: 1.0,
115 significant: false,
116 effect_size: 0.0,
117 };
118 }
119
120 let k = groups.len() as f64;
121 let n_total: usize = groups.iter().map(Vec::len).sum();
122
123 let grand_mean: f64 = groups.iter().flat_map(|g| g.iter()).sum::<f64>() / n_total as f64;
125
126 let ss_between: f64 = groups
128 .iter()
129 .map(|g| {
130 let group_mean = g.iter().sum::<f64>() / g.len() as f64;
131 g.len() as f64 * (group_mean - grand_mean).powi(2)
132 })
133 .sum();
134
135 let ss_within: f64 = groups
137 .iter()
138 .map(|g| {
139 let group_mean = g.iter().sum::<f64>() / g.len() as f64;
140 g.iter().map(|x| (x - group_mean).powi(2)).sum::<f64>()
141 })
142 .sum();
143
144 let df_between = k - 1.0;
145 let df_within = n_total as f64 - k;
146
147 let ms_between = ss_between / df_between;
148 let ms_within = ss_within / df_within;
149
150 let f = if ms_within > 0.0 {
151 ms_between / ms_within
152 } else {
153 0.0
154 };
155
156 let p_value = Self::f_to_p(f, df_between, df_within);
158
159 let effect_size = ss_between / (ss_between + ss_within);
161
162 TestResult {
163 statistic: f,
164 p_value,
165 significant: p_value < 0.05,
166 effect_size,
167 }
168 }
169
170 fn t_to_p(t: f64, df: f64) -> f64 {
172 if df > 30.0 {
174 2.0 * Self::normal_cdf(-t)
175 } else {
176 let adjusted_t = t * (1.0 + 1.0 / (4.0 * df));
178 2.0 * Self::normal_cdf(-adjusted_t)
179 }
180 }
181
182 fn f_to_p(f: f64, df1: f64, _df2: f64) -> f64 {
184 let chi_approx = f * df1;
186 Self::chi_square_p(chi_approx, df1)
187 }
188
189 fn chi_square_p(x: f64, df: f64) -> f64 {
191 if df > 30.0 {
193 let z = (2.0 * x).sqrt() - (2.0 * df - 1.0).sqrt();
194 Self::normal_cdf(-z)
195 } else {
196 let mean = df;
198 let std = (2.0 * df).sqrt();
199 Self::normal_cdf(-(x - mean) / std)
200 }
201 }
202
203 fn normal_cdf(x: f64) -> f64 {
205 0.5 * (1.0 + Self::erf(x / std::f64::consts::SQRT_2))
207 }
208
209 fn erf(x: f64) -> f64 {
211 let a1 = 0.254829592;
213 let a2 = -0.284496736;
214 let a3 = 1.421413741;
215 let a4 = -1.453152027;
216 let a5 = 1.061405429;
217 let p = 0.3275911;
218
219 let sign = if x < 0.0 { -1.0 } else { 1.0 };
220 let x = x.abs();
221
222 let t = 1.0 / (1.0 + p * x);
223 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
224
225 sign * y
226 }
227}
228
229#[derive(Debug, Clone)]
231pub struct TestResult {
232 pub statistic: f64,
234 pub p_value: f64,
236 pub significant: bool,
238 pub effect_size: f64,
240}
241
242impl TestResult {
243 pub fn effect_interpretation(&self) -> &'static str {
245 if self.effect_size < 0.2 {
246 "negligible"
247 } else if self.effect_size < 0.5 {
248 "small"
249 } else if self.effect_size < 0.8 {
250 "medium"
251 } else {
252 "large"
253 }
254 }
255}
256
257impl std::fmt::Display for TestResult {
258 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
259 write!(
260 f,
261 "statistic={:.3}, p={:.4}, significant={}, effect_size={:.3} ({})",
262 self.statistic,
263 self.p_value,
264 self.significant,
265 self.effect_size,
266 self.effect_interpretation()
267 )
268 }
269}
270
271pub fn confidence_interval(sample: &[f64], confidence: f64) -> (f64, f64) {
273 if sample.len() < 2 {
274 let mean = sample.first().copied().unwrap_or(0.0);
275 return (mean, mean);
276 }
277
278 let n = sample.len() as f64;
279 let mean = sample.iter().sum::<f64>() / n;
280 let std = (sample.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0)).sqrt();
281 let se = std / n.sqrt();
282
283 let z = match confidence {
285 c if c >= 0.99 => 2.576,
286 c if c >= 0.95 => 1.96,
287 c if c >= 0.90 => 1.645,
288 _ => 1.96,
289 };
290
291 (mean - z * se, mean + z * se)
292}
293
294#[cfg(test)]
295mod tests {
296 use super::*;
297
298 #[test]
299 fn test_welch_t_test_significant() {
300 let sample1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
301 let sample2 = vec![6.0, 7.0, 8.0, 9.0, 10.0];
302
303 let result = StatisticalAnalyzer::welch_t_test(&sample1, &sample2);
304 assert!(result.significant);
305 assert!(result.p_value < 0.05);
306 }
307
308 #[test]
309 fn test_welch_t_test_not_significant() {
310 let sample1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
311 let sample2 = vec![1.5, 2.5, 3.5, 4.5, 5.5];
312
313 let result = StatisticalAnalyzer::welch_t_test(&sample1, &sample2);
314 assert!(result.p_value > 0.01);
316 }
317
318 #[test]
319 fn test_mann_whitney() {
320 let sample1 = vec![1.0, 2.0, 3.0];
321 let sample2 = vec![4.0, 5.0, 6.0];
322
323 let result = StatisticalAnalyzer::mann_whitney_u(&sample1, &sample2);
324 assert!(result.statistic >= 0.0);
325 }
326
327 #[test]
328 fn test_anova() {
329 let groups = vec![
330 vec![1.0, 2.0, 3.0],
331 vec![4.0, 5.0, 6.0],
332 vec![7.0, 8.0, 9.0],
333 ];
334
335 let result = StatisticalAnalyzer::anova(&groups);
336 assert!(result.significant);
337 }
338
339 #[test]
340 fn test_confidence_interval() {
341 let sample = vec![1.0, 2.0, 3.0, 4.0, 5.0];
342 let (lower, upper) = confidence_interval(&sample, 0.95);
343
344 assert!(lower < 3.0); assert!(upper > 3.0);
346 }
347
348 #[test]
349 fn test_effect_interpretation() {
350 let small = TestResult {
351 statistic: 0.0,
352 p_value: 0.0,
353 significant: false,
354 effect_size: 0.3,
355 };
356 assert_eq!(small.effect_interpretation(), "small");
357
358 let large = TestResult {
359 statistic: 0.0,
360 p_value: 0.0,
361 significant: false,
362 effect_size: 1.0,
363 };
364 assert_eq!(large.effect_interpretation(), "large");
365 }
366
367 #[test]
368 fn test_effect_interpretation_all_levels() {
369 let negligible = TestResult {
371 statistic: 0.0,
372 p_value: 0.0,
373 significant: false,
374 effect_size: 0.1,
375 };
376 assert_eq!(negligible.effect_interpretation(), "negligible");
377
378 let medium = TestResult {
380 statistic: 0.0,
381 p_value: 0.0,
382 significant: false,
383 effect_size: 0.6,
384 };
385 assert_eq!(medium.effect_interpretation(), "medium");
386 }
387
388 #[test]
389 fn test_welch_t_test_small_samples() {
390 let sample1 = vec![1.0];
391 let sample2 = vec![2.0];
392
393 let result = StatisticalAnalyzer::welch_t_test(&sample1, &sample2);
394 assert_eq!(result.p_value, 1.0);
395 assert!(!result.significant);
396 }
397
398 #[test]
399 fn test_mann_whitney_empty_samples() {
400 let result = StatisticalAnalyzer::mann_whitney_u(&[], &[1.0, 2.0]);
401 assert_eq!(result.p_value, 1.0);
402 assert!(!result.significant);
403 }
404
405 #[test]
406 fn test_mann_whitney_ties() {
407 let sample1 = vec![1.0, 2.0, 3.0];
409 let sample2 = vec![2.0, 3.0, 4.0]; let result = StatisticalAnalyzer::mann_whitney_u(&sample1, &sample2);
412 assert!(result.statistic >= 0.0);
413 }
414
415 #[test]
416 fn test_anova_single_group() {
417 let groups = vec![vec![1.0, 2.0, 3.0]];
418
419 let result = StatisticalAnalyzer::anova(&groups);
420 assert_eq!(result.p_value, 1.0);
421 }
422
423 #[test]
424 fn test_anova_identical_groups() {
425 let groups = vec![
426 vec![5.0, 5.0, 5.0],
427 vec![5.0, 5.0, 5.0],
428 vec![5.0, 5.0, 5.0],
429 ];
430
431 let result = StatisticalAnalyzer::anova(&groups);
432 assert!(!result.significant || result.statistic.abs() < 0.001);
434 }
435
436 #[test]
437 fn test_confidence_interval_single_sample() {
438 let sample = vec![5.0];
439 let (lower, upper) = confidence_interval(&sample, 0.95);
440 assert_eq!(lower, 5.0);
441 assert_eq!(upper, 5.0);
442 }
443
444 #[test]
445 fn test_confidence_interval_99() {
446 let sample = vec![1.0, 2.0, 3.0, 4.0, 5.0];
447 let (lower_95, upper_95) = confidence_interval(&sample, 0.95);
448 let (lower_99, upper_99) = confidence_interval(&sample, 0.99);
449
450 assert!(lower_99 < lower_95);
452 assert!(upper_99 > upper_95);
453 }
454
455 #[test]
456 fn test_confidence_interval_90() {
457 let sample = vec![1.0, 2.0, 3.0, 4.0, 5.0];
458 let (lower_95, upper_95) = confidence_interval(&sample, 0.95);
459 let (lower_90, upper_90) = confidence_interval(&sample, 0.90);
460
461 assert!(lower_90 > lower_95);
463 assert!(upper_90 < upper_95);
464 }
465
466 #[test]
467 fn test_test_result_to_string() {
468 let result = TestResult {
469 statistic: 2.5,
470 p_value: 0.025,
471 significant: true,
472 effect_size: 0.8,
473 };
474
475 let s = result.to_string();
476 assert!(s.contains("statistic=2.5"));
477 assert!(s.contains("p=0.0250"));
478 assert!(s.contains("significant=true"));
479 assert!(s.contains("large"));
480 }
481
482 #[test]
483 fn test_welch_t_test_zero_variance() {
484 let sample1 = vec![5.0, 5.0, 5.0, 5.0, 5.0];
486 let sample2 = vec![10.0, 10.0, 10.0, 10.0, 10.0];
487
488 let result = StatisticalAnalyzer::welch_t_test(&sample1, &sample2);
489 assert_eq!(result.p_value, 1.0);
491 }
492
493 #[test]
494 fn test_normal_cdf_properties() {
495 let cdf_0 = StatisticalAnalyzer::normal_cdf(0.0);
497 assert!((cdf_0 - 0.5).abs() < 0.01);
498
499 let cdf_large = StatisticalAnalyzer::normal_cdf(3.0);
501 assert!(cdf_large > 0.99);
502
503 let cdf_neg = StatisticalAnalyzer::normal_cdf(-3.0);
505 assert!(cdf_neg < 0.01);
506 }
507}