Skip to main content

oxihuman_core/
statistics_utils.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3#![allow(dead_code)]
4
5//! Basic statistical utilities.
6
7/// Arithmetic mean of a slice. Returns 0.0 for empty input.
8pub fn mean(data: &[f32]) -> f32 {
9    if data.is_empty() {
10        return 0.0;
11    }
12    data.iter().sum::<f32>() / data.len() as f32
13}
14
15/// Population variance of a slice. Returns 0.0 for fewer than 2 elements.
16pub fn variance(data: &[f32]) -> f32 {
17    if data.len() < 2 {
18        return 0.0;
19    }
20    let m = mean(data);
21    data.iter().map(|&x| (x - m) * (x - m)).sum::<f32>() / data.len() as f32
22}
23
24/// Population standard deviation.
25pub fn std_dev(data: &[f32]) -> f32 {
26    variance(data).sqrt()
27}
28
29/// Median of a slice (clones and sorts). Returns 0.0 for empty.
30pub fn median(data: &[f32]) -> f32 {
31    if data.is_empty() {
32        return 0.0;
33    }
34    let mut sorted = data.to_vec();
35    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
36    let n = sorted.len();
37    if n.is_multiple_of(2) {
38        (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
39    } else {
40        sorted[n / 2]
41    }
42}
43
44/// Minimum value. Returns f32::MAX for empty.
45pub fn min_val(data: &[f32]) -> f32 {
46    data.iter().cloned().fold(f32::MAX, f32::min)
47}
48
49/// Maximum value. Returns f32::MIN for empty.
50pub fn max_val(data: &[f32]) -> f32 {
51    data.iter().cloned().fold(f32::MIN, f32::max)
52}
53
54/// Pearson correlation coefficient between two equal-length slices.
55/// Returns 0.0 if inputs are empty or have zero variance.
56pub fn pearson_r(x: &[f32], y: &[f32]) -> f32 {
57    let n = x.len().min(y.len());
58    if n == 0 {
59        return 0.0;
60    }
61    let mx = mean(&x[..n]);
62    let my = mean(&y[..n]);
63    let num: f32 = x[..n]
64        .iter()
65        .zip(y[..n].iter())
66        .map(|(&xi, &yi)| (xi - mx) * (yi - my))
67        .sum();
68    let dx: f32 = x[..n]
69        .iter()
70        .map(|&xi| (xi - mx) * (xi - mx))
71        .sum::<f32>()
72        .sqrt();
73    let dy: f32 = y[..n]
74        .iter()
75        .map(|&yi| (yi - my) * (yi - my))
76        .sum::<f32>()
77        .sqrt();
78    if dx < 1e-12 || dy < 1e-12 {
79        return 0.0;
80    }
81    num / (dx * dy)
82}
83
84/// p-th percentile (0–100) using linear interpolation. Returns 0.0 for empty data.
85pub fn percentile(data: &[f32], p: f32) -> f32 {
86    if data.is_empty() {
87        return 0.0;
88    }
89    let mut sorted = data.to_vec();
90    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
91    let idx = (p / 100.0 * (sorted.len() - 1) as f32).clamp(0.0, (sorted.len() - 1) as f32);
92    let lo = idx.floor() as usize;
93    let hi = idx.ceil() as usize;
94    let frac = idx - lo as f32;
95    sorted[lo] + frac * (sorted[hi] - sorted[lo])
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101
102    #[test]
103    fn test_mean_basic() {
104        /* mean of [1,2,3,4,5] = 3 */
105        let d = [1.0f32, 2.0, 3.0, 4.0, 5.0];
106        assert!((mean(&d) - 3.0).abs() < 1e-5);
107    }
108
109    #[test]
110    fn test_mean_empty() {
111        /* empty returns 0 */
112        assert!(mean(&[]).abs() < 1e-9);
113    }
114
115    #[test]
116    fn test_variance_constant() {
117        /* constant array has zero variance */
118        let d = [5.0f32; 10];
119        assert!(variance(&d).abs() < 1e-9);
120    }
121
122    #[test]
123    fn test_std_dev_known() {
124        /* std dev of [2,4,4,4,5,5,7,9] = 2 */
125        let d = [2.0f32, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
126        assert!((std_dev(&d) - 2.0).abs() < 1e-4);
127    }
128
129    #[test]
130    fn test_median_odd() {
131        /* median of [1,3,5] = 3 */
132        let d = [5.0f32, 1.0, 3.0];
133        assert!((median(&d) - 3.0).abs() < 1e-5);
134    }
135
136    #[test]
137    fn test_median_even() {
138        /* median of [1,2,3,4] = 2.5 */
139        let d = [4.0f32, 2.0, 1.0, 3.0];
140        assert!((median(&d) - 2.5).abs() < 1e-5);
141    }
142
143    #[test]
144    fn test_min_max() {
145        /* min and max */
146        let d = [3.0f32, 1.0, 4.0, 1.0, 5.0];
147        assert!((min_val(&d) - 1.0).abs() < 1e-6);
148        assert!((max_val(&d) - 5.0).abs() < 1e-6);
149    }
150
151    #[test]
152    fn test_pearson_r_perfect() {
153        /* perfectly correlated -> r=1 */
154        let x: Vec<f32> = (0..10).map(|i| i as f32).collect();
155        let y: Vec<f32> = x.iter().map(|&v| v * 2.0 + 3.0).collect();
156        let r = pearson_r(&x, &y);
157        assert!((r - 1.0).abs() < 1e-4);
158    }
159
160    #[test]
161    fn test_percentile_50() {
162        /* 50th percentile ≈ median */
163        let d: Vec<f32> = (1..=11).map(|i| i as f32).collect();
164        let p = percentile(&d, 50.0);
165        assert!((p - 6.0).abs() < 1e-4);
166    }
167}