radiate_core/stats/
distribution.rs

1use crate::Statistic;
2#[cfg(feature = "serde")]
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Clone, PartialEq, Default)]
7#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
8pub struct Distribution {
9    pub statistic: Statistic,
10    pub last_sequence: Vec<f32>,
11}
12
13impl Distribution {
14    #[inline(always)]
15    pub fn add(&mut self, value: &[f32]) {
16        self.clear();
17        for v in value {
18            self.statistic.add(*v);
19            self.last_sequence.push(*v);
20        }
21    }
22
23    pub fn last_sequence(&self) -> &[f32] {
24        &self.last_sequence
25    }
26
27    pub fn count(&self) -> i32 {
28        self.last_sequence.len() as i32
29    }
30
31    pub fn mean(&self) -> f32 {
32        self.statistic.mean()
33    }
34
35    pub fn variance(&self) -> f32 {
36        self.statistic.variance()
37    }
38
39    pub fn standard_deviation(&self) -> f32 {
40        self.statistic.std_dev()
41    }
42
43    pub fn skewness(&self) -> f32 {
44        self.statistic.skewness()
45    }
46
47    pub fn kurtosis(&self) -> f32 {
48        self.statistic.kurtosis()
49    }
50
51    pub fn min(&self) -> f32 {
52        self.statistic.min()
53    }
54
55    pub fn max(&self) -> f32 {
56        self.statistic.max()
57    }
58
59    pub fn clear(&mut self) {
60        self.last_sequence.clear();
61    }
62
63    pub fn log2(&self) -> f32 {
64        (self.last_sequence().len() as f32).log2()
65    }
66
67    #[inline(always)]
68    pub fn entropy(&self) -> f32 {
69        let bin_width = 0.01;
70        let mut counts = HashMap::new();
71
72        for &value in &self.last_sequence {
73            let bin = (value / bin_width).floor();
74            *counts.entry(bin as i32).or_insert(0) += 1;
75        }
76
77        let total = self.last_sequence.len() as f32;
78        if total == 0.0 {
79            return 0.0;
80        }
81
82        counts
83            .values()
84            .map(|&count| {
85                let p = count as f32 / total;
86                -p * p.log2()
87            })
88            .sum()
89    }
90
91    #[inline(always)]
92    pub fn percentile(&self, p: f32) -> f32 {
93        if p < 0.0 || p > 100.0 {
94            panic!("Percentile must be between 0 and 100");
95        }
96
97        let count = self.last_sequence.len() as f32;
98        if count == 0.0 {
99            return 0.0;
100        }
101
102        let index = (p / 100.0) * count;
103
104        let sorted_values = { &self.last_sequence };
105
106        let index = index as usize;
107
108        if index == 0 && !sorted_values.is_empty() {
109            return sorted_values[0];
110        } else if index == sorted_values.len() {
111            return sorted_values[sorted_values.len() - 1];
112        } else if index >= sorted_values.len() {
113            panic!(
114                "Index out of bounds for the sorted values {} >= {}",
115                index,
116                sorted_values.len()
117            );
118        }
119
120        sorted_values[index]
121    }
122}
123
124impl From<&[f32]> for Distribution {
125    fn from(value: &[f32]) -> Self {
126        let mut result = Distribution::default();
127        result.add(value);
128        result
129    }
130}
131
132impl From<Vec<f32>> for Distribution {
133    fn from(value: Vec<f32>) -> Self {
134        let mut result = Distribution::default();
135        result.add(&value);
136        result
137    }
138}
139
140impl From<&Vec<usize>> for Distribution {
141    fn from(value: &Vec<usize>) -> Self {
142        let mut dist = Distribution::default();
143        for v in value.iter().map(|&v| v as f32) {
144            dist.statistic.add(v);
145            dist.last_sequence.push(v);
146        }
147
148        dist
149    }
150}