Skip to main content

radiate_core/stats/
distribution.rs

1use crate::Statistic;
2#[cfg(feature = "serde")]
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Clone, PartialEq, Default)]
7#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
8pub struct Distribution {
9    pub statistic: Statistic,
10    pub last_sequence: Vec<f32>,
11}
12
13impl Distribution {
14    #[inline(always)]
15    pub fn add(&mut self, value: &[f32]) {
16        self.clear();
17        for v in value {
18            self.statistic.add(*v);
19            self.last_sequence.push(*v);
20        }
21    }
22
23    pub fn last_sequence(&self) -> &[f32] {
24        &self.last_sequence
25    }
26
27    pub fn count(&self) -> i32 {
28        self.last_sequence.len() as i32
29    }
30
31    pub fn mean(&self) -> f32 {
32        self.statistic.mean()
33    }
34
35    pub fn variance(&self) -> f32 {
36        self.statistic.variance()
37    }
38
39    pub fn standard_deviation(&self) -> f32 {
40        self.statistic.std_dev()
41    }
42
43    pub fn skewness(&self) -> f32 {
44        self.statistic.skewness()
45    }
46
47    pub fn kurtosis(&self) -> f32 {
48        self.statistic.kurtosis()
49    }
50
51    pub fn min(&self) -> f32 {
52        self.statistic.min()
53    }
54
55    pub fn max(&self) -> f32 {
56        self.statistic.max()
57    }
58
59    pub fn clear(&mut self) {
60        self.statistic.clear();
61        self.last_sequence.clear();
62    }
63
64    pub fn log2(&self) -> f32 {
65        (self.last_sequence().len() as f32).log2()
66    }
67
68    #[inline(always)]
69    pub fn entropy(&self) -> f32 {
70        let bin_width = 0.01;
71        let mut counts = HashMap::new();
72
73        for &value in &self.last_sequence {
74            let bin = (value / bin_width).floor();
75            *counts.entry(bin as i32).or_insert(0) += 1;
76        }
77
78        let total = self.last_sequence.len() as f32;
79        if total == 0.0 {
80            return 0.0;
81        }
82
83        counts
84            .values()
85            .map(|&count| {
86                let p = count as f32 / total;
87                -p * p.log2()
88            })
89            .sum()
90    }
91
92    #[inline(always)]
93    pub fn percentile(&self, p: f32) -> f32 {
94        if p < 0.0 || p > 100.0 {
95            panic!("Percentile must be between 0 and 100");
96        }
97
98        let count = self.last_sequence.len() as f32;
99        if count == 0.0 {
100            return 0.0;
101        }
102
103        let index = (p / 100.0) * count;
104
105        let sorted_values = { &self.last_sequence };
106
107        let index = index as usize;
108
109        if index == 0 && !sorted_values.is_empty() {
110            return sorted_values[0];
111        } else if index == sorted_values.len() {
112            return sorted_values[sorted_values.len() - 1];
113        } else if index >= sorted_values.len() {
114            panic!(
115                "Index out of bounds for the sorted values {} >= {}",
116                index,
117                sorted_values.len()
118            );
119        }
120
121        sorted_values[index]
122    }
123}
124
125impl From<&[f32]> for Distribution {
126    fn from(value: &[f32]) -> Self {
127        let mut result = Distribution::default();
128        result.add(value);
129        result
130    }
131}
132
133impl From<Vec<f32>> for Distribution {
134    fn from(value: Vec<f32>) -> Self {
135        let mut result = Distribution::default();
136        result.add(&value);
137        result
138    }
139}
140
141impl From<&Vec<usize>> for Distribution {
142    fn from(value: &Vec<usize>) -> Self {
143        let mut dist = Distribution::default();
144        for v in value.iter().map(|&v| v as f32) {
145            dist.statistic.add(v);
146            dist.last_sequence.push(v);
147        }
148
149        dist
150    }
151}