Skip to main content

cbtop/statistics/
analysis.rs

1//! Core statistical analysis types and confidence intervals.
2
3use super::helpers::bootstrap_ci;
4
5/// Effect size category per Cohen's conventions
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum EffectCategory {
8    /// |d| < 0.2 - negligible practical significance
9    Negligible,
10    /// 0.2 <= |d| < 0.5 - small effect
11    Small,
12    /// 0.5 <= |d| < 0.8 - medium effect
13    Medium,
14    /// |d| >= 0.8 - large effect
15    Large,
16}
17
18impl EffectCategory {
19    /// Get description of effect category
20    pub fn description(&self) -> &'static str {
21        match self {
22            EffectCategory::Negligible => "negligible practical significance",
23            EffectCategory::Small => "small effect",
24            EffectCategory::Medium => "medium effect",
25            EffectCategory::Large => "large effect",
26        }
27    }
28
29    /// Categorize effect size from Cohen's d value
30    pub fn from_cohens_d(d: f64) -> Self {
31        let abs_d = d.abs();
32        if abs_d < 0.2 {
33            EffectCategory::Negligible
34        } else if abs_d < 0.5 {
35            EffectCategory::Small
36        } else if abs_d < 0.8 {
37            EffectCategory::Medium
38        } else {
39            EffectCategory::Large
40        }
41    }
42}
43
44/// Statistical analysis result with confidence interval
45#[derive(Debug, Clone)]
46pub struct StatisticalAnalysis {
47    /// Sample mean
48    pub mean: f64,
49    /// Sample standard deviation
50    pub std_dev: f64,
51    /// Standard error of the mean
52    pub std_error: f64,
53    /// Lower bound of confidence interval
54    pub ci_lower: f64,
55    /// Upper bound of confidence interval
56    pub ci_upper: f64,
57    /// Confidence level (e.g., 0.95 for 95%)
58    pub confidence_level: f64,
59    /// Sample size
60    pub n: usize,
61    /// Coefficient of variation (std_dev / mean * 100)
62    pub cv_percent: f64,
63}
64
65impl StatisticalAnalysis {
66    /// Compute statistical analysis from samples
67    pub fn from_samples(samples: &[f64], confidence_level: f64) -> Option<Self> {
68        if samples.is_empty() {
69            return None;
70        }
71
72        // Filter out NaN/Inf
73        let valid: Vec<f64> = samples.iter().copied().filter(|x| x.is_finite()).collect();
74
75        if valid.is_empty() {
76            return None;
77        }
78
79        let n = valid.len();
80        let mean = valid.iter().sum::<f64>() / n as f64;
81
82        if n == 1 {
83            return Some(Self {
84                mean,
85                std_dev: 0.0,
86                std_error: 0.0,
87                ci_lower: mean,
88                ci_upper: mean,
89                confidence_level,
90                n,
91                cv_percent: 0.0,
92            });
93        }
94
95        let variance = valid.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1) as f64;
96        let std_dev = variance.sqrt();
97        let std_error = std_dev / (n as f64).sqrt();
98
99        // Bootstrap confidence interval
100        let (ci_lower, ci_upper) = bootstrap_ci(&valid, confidence_level, 10000);
101
102        let cv_percent = if mean != 0.0 {
103            (std_dev / mean.abs()) * 100.0
104        } else {
105            0.0
106        };
107
108        Some(Self {
109            mean,
110            std_dev,
111            std_error,
112            ci_lower,
113            ci_upper,
114            confidence_level,
115            n,
116            cv_percent,
117        })
118    }
119
120    /// Compute with default 95% confidence level
121    pub fn from_samples_default(samples: &[f64]) -> Option<Self> {
122        Self::from_samples(samples, 0.95)
123    }
124
125    /// Get CI width
126    pub fn ci_width(&self) -> f64 {
127        self.ci_upper - self.ci_lower
128    }
129
130    /// Check if CI is narrow (< 10% of mean)
131    pub fn ci_is_narrow(&self) -> bool {
132        if self.mean == 0.0 {
133            return self.ci_width() < 0.1;
134        }
135        (self.ci_width() / self.mean.abs()) < 0.1
136    }
137}
138
139/// Effect size calculation result
140#[derive(Debug, Clone)]
141pub struct EffectSize {
142    /// Cohen's d value
143    pub cohens_d: f64,
144    /// Effect category
145    pub category: EffectCategory,
146    /// 95% CI lower for effect size
147    pub ci_lower: f64,
148    /// 95% CI upper for effect size
149    pub ci_upper: f64,
150}
151
152impl EffectSize {
153    /// Calculate Cohen's d between two samples
154    pub fn cohens_d(sample1: &[f64], sample2: &[f64]) -> Option<Self> {
155        if sample1.is_empty() || sample2.is_empty() {
156            return None;
157        }
158
159        let n1 = sample1.len() as f64;
160        let n2 = sample2.len() as f64;
161
162        let mean1 = sample1.iter().sum::<f64>() / n1;
163        let mean2 = sample2.iter().sum::<f64>() / n2;
164
165        let var1 = sample1.iter().map(|x| (x - mean1).powi(2)).sum::<f64>() / (n1 - 1.0).max(1.0);
166        let var2 = sample2.iter().map(|x| (x - mean2).powi(2)).sum::<f64>() / (n2 - 1.0).max(1.0);
167
168        // Pooled standard deviation
169        let pooled_var = ((n1 - 1.0) * var1 + (n2 - 1.0) * var2) / (n1 + n2 - 2.0).max(1.0);
170        let pooled_std = pooled_var.sqrt();
171
172        if pooled_std == 0.0 {
173            return Some(Self {
174                cohens_d: 0.0,
175                category: EffectCategory::Negligible,
176                ci_lower: 0.0,
177                ci_upper: 0.0,
178            });
179        }
180
181        let d = (mean1 - mean2) / pooled_std;
182        let category = EffectCategory::from_cohens_d(d);
183
184        // Approximate 95% CI for Cohen's d using non-central t approximation
185        let se_d = ((n1 + n2) / (n1 * n2) + d.powi(2) / (2.0 * (n1 + n2))).sqrt();
186        let ci_lower = d - 1.96 * se_d;
187        let ci_upper = d + 1.96 * se_d;
188
189        Some(Self {
190            cohens_d: d,
191            category,
192            ci_lower,
193            ci_upper,
194        })
195    }
196
197    /// Check if effect is practically significant
198    pub fn is_significant(&self) -> bool {
199        self.category != EffectCategory::Negligible
200    }
201}