Skip to main content

sciforge_lib/maths/statistics/
descriptive.rs

1pub fn mean(data: &[f64]) -> f64 {
2    data.iter().sum::<f64>() / data.len() as f64
3}
4
5pub fn variance(data: &[f64]) -> f64 {
6    let m = mean(data);
7    data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / data.len() as f64
8}
9
10pub fn std_dev(data: &[f64]) -> f64 {
11    variance(data).sqrt()
12}
13
14pub fn sample_variance(data: &[f64]) -> f64 {
15    let m = mean(data);
16    data.iter().map(|x| (x - m).powi(2)).sum::<f64>() / (data.len() - 1) as f64
17}
18
19pub fn sample_std_dev(data: &[f64]) -> f64 {
20    sample_variance(data).sqrt()
21}
22
23pub fn median(data: &mut [f64]) -> f64 {
24    data.sort_by(|a, b| a.partial_cmp(b).unwrap());
25    let n = data.len();
26    if n.is_multiple_of(2) {
27        (data[n / 2 - 1] + data[n / 2]) / 2.0
28    } else {
29        data[n / 2]
30    }
31}
32
33pub fn percentile(data: &mut [f64], p: f64) -> f64 {
34    data.sort_by(|a, b| a.partial_cmp(b).unwrap());
35    let idx = p * (data.len() - 1) as f64;
36    let lo = idx.floor() as usize;
37    let hi = idx.ceil() as usize;
38    if lo == hi {
39        data[lo]
40    } else {
41        data[lo] + (idx - lo as f64) * (data[hi] - data[lo])
42    }
43}
44
45pub fn mode(data: &[f64]) -> f64 {
46    let mut sorted = data.to_vec();
47    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
48    let mut best = sorted[0];
49    let mut best_count = 1;
50    let mut current = sorted[0];
51    let mut current_count = 1;
52    for &v in &sorted[1..] {
53        if (v - current).abs() < 1e-10 {
54            current_count += 1;
55        } else {
56            if current_count > best_count {
57                best = current;
58                best_count = current_count;
59            }
60            current = v;
61            current_count = 1;
62        }
63    }
64    if current_count > best_count {
65        current
66    } else {
67        best
68    }
69}
70
71pub fn skewness(data: &[f64]) -> f64 {
72    let m = mean(data);
73    let s = std_dev(data);
74    let n = data.len() as f64;
75    data.iter().map(|x| ((x - m) / s).powi(3)).sum::<f64>() / n
76}
77
78pub fn kurtosis(data: &[f64]) -> f64 {
79    let m = mean(data);
80    let s = std_dev(data);
81    let n = data.len() as f64;
82    data.iter().map(|x| ((x - m) / s).powi(4)).sum::<f64>() / n - 3.0
83}
84
85pub fn covariance(x: &[f64], y: &[f64]) -> f64 {
86    let mx = mean(x);
87    let my = mean(y);
88    let n = x.len() as f64;
89    x.iter()
90        .zip(y)
91        .map(|(a, b)| (a - mx) * (b - my))
92        .sum::<f64>()
93        / n
94}
95
96pub fn correlation(x: &[f64], y: &[f64]) -> f64 {
97    let cov = covariance(x, y);
98    let sx = std_dev(x);
99    let sy = std_dev(y);
100    if sx < 1e-30 || sy < 1e-30 {
101        return 0.0;
102    }
103    cov / (sx * sy)
104}
105
106pub fn weighted_mean(data: &[f64], weights: &[f64]) -> f64 {
107    let sum_w: f64 = weights.iter().sum();
108    data.iter().zip(weights).map(|(d, w)| d * w).sum::<f64>() / sum_w
109}
110
111pub fn geometric_mean(data: &[f64]) -> f64 {
112    let n = data.len() as f64;
113    data.iter().map(|x| x.ln()).sum::<f64>().div_euclid(n).exp()
114        * (data.iter().map(|x| x.ln()).sum::<f64>() % n / n).exp()
115}
116
117pub fn harmonic_mean(data: &[f64]) -> f64 {
118    let n = data.len() as f64;
119    n / data.iter().map(|x| 1.0 / x).sum::<f64>()
120}
121
122pub fn entropy(probs: &[f64]) -> f64 {
123    -probs
124        .iter()
125        .filter(|&&p| p > 0.0)
126        .map(|p| p * p.ln())
127        .sum::<f64>()
128}
129
130pub fn kl_divergence(p: &[f64], q: &[f64]) -> f64 {
131    p.iter()
132        .zip(q.iter())
133        .filter(|&(&pi, &qi)| pi > 0.0 && qi > 0.0)
134        .map(|(&pi, &qi)| pi * (pi / qi).ln())
135        .sum()
136}