1use crate::consts::Const;
2use std::collections::HashMap;
3
4#[derive(Debug, Clone)]
5pub struct Stats;
6
7impl Stats {
8 pub fn mean(data: &[f64]) -> Option<f64> {
10 if data.is_empty() {
11 return None;
12 }
13 Some(data.iter().sum::<f64>() / data.len() as f64)
14 }
15
16 pub fn median(data: &[f64]) -> Option<f64> {
17 if data.is_empty() {
18 return None;
19 }
20 let mut sorted = data.to_vec();
21 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
22 let mid = sorted.len() / 2;
23 if sorted.len() % 2 == 0 {
24 Some((sorted[mid - 1] + sorted[mid]) / 2.0)
25 } else {
26 Some(sorted[mid])
27 }
28 }
29
30 pub fn mode(data: &[f64]) -> Option<Vec<f64>> {
31 if data.is_empty() {
32 return None;
33 }
34 let mut freq_map: HashMap<String, usize> = HashMap::new();
35
36 for value in data {
38 let key = format!("{:.10}", value);
39 *freq_map.entry(key).or_insert(0) += 1;
40 }
41
42 let max_freq = freq_map.values().max()?;
43 let modes: Vec<f64> = freq_map
44 .iter()
45 .filter(|(_, &count)| count == *max_freq)
46 .map(|(key, _)| key.parse::<f64>().unwrap())
47 .collect();
48
49 Some(modes)
50 }
51
52 pub fn variance(data: &[f64]) -> Option<f64> {
54 if data.len() < 2 {
55 return None;
56 }
57 let mean = Stats::mean(data)?;
58 let squared_diff_sum: f64 = data.iter().map(|&x| (x - mean).powi(2)).sum();
59 Some(squared_diff_sum / (data.len() - 1) as f64)
60 }
61
62 pub fn std_dev(data: &[f64]) -> Option<f64> {
63 Some(Stats::variance(data)?.sqrt())
64 }
65
66 pub fn range(data: &[f64]) -> Option<f64> {
67 if data.is_empty() {
68 return None;
69 }
70 let min = data.iter().min_by(|a, b| a.partial_cmp(b).unwrap())?;
71 let max = data.iter().max_by(|a, b| a.partial_cmp(b).unwrap())?;
72 Some(max - min)
73 }
74
75 pub fn quartiles(data: &[f64]) -> Option<(f64, f64, f64)> {
77 if data.is_empty() {
78 return None;
79 }
80 let mut sorted = data.to_vec();
81 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
82
83 let q2 = Stats::median(&sorted)?;
84 let (lower, upper) = sorted.split_at(sorted.len() / 2);
85 let q1 = Stats::median(lower)?;
86 let q3 = Stats::median(if sorted.len() % 2 == 0 {
87 upper
88 } else {
89 &upper[1..]
90 })?;
91
92 Some((q1, q2, q3))
93 }
94
95 pub fn percentile(data: &[f64], p: f64) -> Option<f64> {
96 if data.is_empty() || p < 0.0 || p > 100.0 {
97 return None;
98 }
99 let mut sorted = data.to_vec();
100 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
101
102 let rank = p / 100.0 * (sorted.len() - 1) as f64;
103 let lower_idx = rank.floor() as usize;
104 let upper_idx = rank.ceil() as usize;
105
106 if lower_idx == upper_idx {
107 Some(sorted[lower_idx])
108 } else {
109 let weight = rank - lower_idx as f64;
110 Some(sorted[lower_idx] * (1.0 - weight) + sorted[upper_idx] * weight)
111 }
112 }
113
114 pub fn skewness(data: &[f64]) -> Option<f64> {
116 if data.len() < 3 {
117 return None;
118 }
119 let mean = Stats::mean(data)?;
120 let std_dev = Stats::std_dev(data)?;
121 let n = data.len() as f64;
122
123 let m3 = data.iter().map(|&x| (x - mean).powi(3)).sum::<f64>() / n;
124
125 Some(m3 / std_dev.powi(3))
126 }
127
128 pub fn kurtosis(data: &[f64]) -> Option<f64> {
129 if data.len() < 4 {
130 return None;
131 }
132 let mean = Stats::mean(data)?;
133 let std_dev = Stats::std_dev(data)?;
134 let n = data.len() as f64;
135
136 let m4 = data.iter().map(|&x| (x - mean).powi(4)).sum::<f64>() / n;
137
138 Some(m4 / std_dev.powi(4) - 3.0) }
140
141 pub fn covariance(x: &[f64], y: &[f64]) -> Option<f64> {
143 if x.len() != y.len() || x.is_empty() {
144 return None;
145 }
146 let mean_x = Stats::mean(x)?;
147 let mean_y = Stats::mean(y)?;
148 let n = x.len() as f64;
149
150 let sum = x
151 .iter()
152 .zip(y.iter())
153 .map(|(&xi, &yi)| (xi - mean_x) * (yi - mean_y))
154 .sum::<f64>();
155
156 Some(sum / (n - 1.0))
157 }
158
159 pub fn correlation(x: &[f64], y: &[f64]) -> Option<f64> {
160 let cov = Stats::covariance(x, y)?;
161 let std_x = Stats::std_dev(x)?;
162 let std_y = Stats::std_dev(y)?;
163
164 Some(cov / (std_x * std_y))
165 }
166
167 pub fn z_scores(data: &[f64]) -> Option<Vec<f64>> {
169 let mean = Stats::mean(data)?;
170 let std_dev = Stats::std_dev(data)?;
171
172 Some(data.iter().map(|&x| (x - mean) / std_dev).collect())
173 }
174
175 pub fn mad(data: &[f64]) -> Option<f64> {
177 let median = Stats::median(data)?;
179 let deviations: Vec<f64> = data.iter().map(|&x| (x - median).abs()).collect();
180 Stats::median(&deviations)
181 }
182
183 pub fn winsorized_mean(data: &[f64], trim: f64) -> Option<f64> {
184 if data.is_empty() || trim < 0.0 || trim > 0.5 {
185 return None;
186 }
187 let mut sorted = data.to_vec();
188 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
189
190 let n = sorted.len();
191 let k = (n as f64 * trim).floor() as usize;
192
193 let lower = sorted[k];
194 let upper = sorted[n - k - 1];
195
196 let sum: f64 = sorted.iter().map(|&x| x.max(lower).min(upper)).sum();
197
198 Some(sum / n as f64)
199 }
200
201 pub fn moving_average(data: &[f64], window: usize) -> Option<Vec<f64>> {
202 if window == 0 || window > data.len() {
203 return None;
204 }
205
206 let mut result = Vec::with_capacity(data.len() - window + 1);
207 for i in 0..=data.len() - window {
208 let mean = Stats::mean(&data[i..i + window])?;
209 result.push(mean);
210 }
211 Some(result)
212 }
213
214 pub fn exponential_smoothing(data: &[f64], alpha: f64) -> Option<Vec<f64>> {
215 if data.is_empty() || alpha < 0.0 || alpha > 1.0 {
216 return None;
217 }
218
219 let mut result = Vec::with_capacity(data.len());
220 result.push(data[0]);
221
222 for i in 1..data.len() {
223 let smoothed = alpha * data[i] + (1.0 - alpha) * result[i - 1];
224 result.push(smoothed);
225 }
226 Some(result)
227 }
228
229 pub fn bonferroni_correction(p_values: &[f64]) -> Option<Vec<f64>> {
231 if p_values.is_empty() {
232 return None;
233 }
234 let n = p_values.len() as f64;
235 Some(p_values.iter().map(|&p| (p * n).min(1.0)).collect())
236 }
237
238 pub fn benjamini_hochberg(p_values: &[f64]) -> Option<Vec<f64>> {
239 if p_values.is_empty() {
240 return None;
241 }
242
243 let mut indexed_p: Vec<(usize, &f64)> = p_values.iter().enumerate().collect();
245 indexed_p.sort_by(|a, b| a.1.partial_cmp(b.1).unwrap());
246
247 let n = p_values.len();
248 let mut adjusted = vec![0.0; n];
249
250 let mut prev = 1.0;
252 for (rank, (orig_idx, &p)) in indexed_p.iter().enumerate().rev() {
253 let adj_p = (p * n as f64 / (rank + 1) as f64).min(prev);
254 adjusted[*orig_idx] = adj_p;
255 prev = adj_p;
256 }
257
258 Some(adjusted)
259 }
260
261 pub fn kernel_density_estimation(data: &[f64], x: f64, bandwidth: f64) -> Option<f64> {
263 if data.is_empty() || bandwidth <= 0.0 {
264 return None;
265 }
266
267 let n = data.len() as f64;
268 let density: f64 = data
269 .iter()
270 .map(|&xi| {
271 let u = (x - xi) / bandwidth;
272 Stats::gaussian_kernel(u) / bandwidth
273 })
274 .sum::<f64>()
275 / n;
276
277 Some(density)
278 }
279
280 fn gaussian_kernel(u: f64) -> f64 {
281 (-0.5 * u * u).exp() / (2.0 * Const::PI).sqrt()
282 }
283
284 pub fn bayesian_update(prior: f64, likelihood: f64, evidence: f64) -> Option<f64> {
286 if evidence == 0.0 || prior < 0.0 || prior > 1.0 || likelihood < 0.0 {
287 return None;
288 }
289 Some(prior * likelihood / evidence)
290 }
291
292 pub fn entropy(probabilities: &[f64]) -> Option<f64> {
294 if probabilities.is_empty()
295 || probabilities.iter().any(|&p| p < 0.0 || p > 1.0)
296 || (probabilities.iter().sum::<f64>() - 1.0).abs() > 1e-10
297 {
298 return None;
299 }
300
301 Some(
302 -probabilities
303 .iter()
304 .filter(|&&p| p > 0.0)
305 .map(|&p| p * p.log2())
306 .sum::<f64>(),
307 )
308 }
309
310 pub fn kullback_leibler_divergence(p: &[f64], q: &[f64]) -> Option<f64> {
311 if p.len() != q.len() || p.is_empty() {
312 return None;
313 }
314
315 Some(
316 p.iter()
317 .zip(q.iter())
318 .filter(|(&pi, &qi)| pi > 0.0 && qi > 0.0)
319 .map(|(&pi, &qi)| pi * (pi / qi).log2())
320 .sum(),
321 )
322 }
323}
324
325pub fn summary_statistics(data: &[f64]) -> Option<SummaryStats> {
327 Some(SummaryStats {
328 count: data.len(),
329 mean: Stats::mean(data)?,
330 median: Stats::median(data)?,
331 mode: Stats::mode(data)?,
332 variance: Stats::variance(data)?,
333 std_dev: Stats::std_dev(data)?,
334 skewness: Stats::skewness(data)?,
335 kurtosis: Stats::kurtosis(data)?,
336 range: Stats::range(data)?,
337 quartiles: Stats::quartiles(data)?,
338 })
339}
340
341#[derive(Debug)]
342pub struct SummaryStats {
343 pub count: usize,
344 pub mean: f64,
345 pub median: f64,
346 pub mode: Vec<f64>,
347 pub variance: f64,
348 pub std_dev: f64,
349 pub skewness: f64,
350 pub kurtosis: f64,
351 pub range: f64,
352 pub quartiles: (f64, f64, f64),
353}
354
355impl Stats {
357 pub fn normal_pdf(x: f64, mean: f64, std_dev: f64) -> f64 {
358 let exponent = -(x - mean).powi(2) / (2.0 * std_dev.powi(2));
359 (1.0 / (std_dev * (2.0 * Const::PI).sqrt())) * exponent.exp()
360 }
361
362 pub fn cohens_d(data1: &[f64], data2: &[f64]) -> Option<f64> {
364 let mean1 = Stats::mean(data1)?;
365 let mean2 = Stats::mean(data2)?;
366 let var1 = Stats::variance(data1)?;
367 let var2 = Stats::variance(data2)?;
368 let pooled_std = ((var1 + var2) / 2.0).sqrt();
369 Some((mean1 - mean2) / pooled_std)
370 }
371
372 pub fn linear_regression(x: &[f64], y: &[f64]) -> Option<(f64, f64)> {
374 if x.len() != y.len() || x.is_empty() {
376 return None;
377 }
378
379 let mean_x = Stats::mean(x)?;
380 let mean_y = Stats::mean(y)?;
381 let cov = Stats::covariance(x, y)?;
382 let var_x = Stats::variance(x)?;
383
384 let slope = cov / var_x;
385 let intercept = mean_y - slope * mean_x;
386
387 Some((slope, intercept))
388 }
389
390 pub fn autocorrelation(data: &[f64], lag: usize) -> Option<f64> {
392 if lag >= data.len() {
393 return None;
394 }
395
396 let mean = Stats::mean(data)?;
397 let n = data.len() - lag;
398
399 let numerator: f64 = (0..n)
400 .map(|i| (data[i] - mean) * (data[i + lag] - mean))
401 .sum();
402 let denominator: f64 = data.iter().map(|&x| (x - mean).powi(2)).sum();
403
404 Some(numerator / denominator)
405 }
406
407 pub fn binomial_pmf(k: u64, n: u64, p: f64) -> f64 {
408 if p < 0.0 || p > 1.0 {
409 return 0.0;
410 }
411 let combinations = Stats::combinations(n, k);
412 combinations as f64 * p.powi(k as i32) * (1.0 - p).powi((n - k) as i32)
413 }
414
415 fn combinations(n: u64, k: u64) -> u64 {
417 if k > n {
418 return 0;
419 }
420 let k = k.min(n - k);
421 let mut result = 1;
422 for i in 0..k {
423 result = result * (n - i) / (i + 1);
424 }
425 result
426 }
427}