radiate_core/stats/
distribution.rs

1use crate::Statistic;
2#[cfg(feature = "serde")]
3use serde::{Deserialize, Serialize};
4
5#[derive(Clone, PartialEq, Default)]
6#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
7pub struct Distribution {
8    pub statistic: Statistic,
9    pub last_sequence: Vec<f32>,
10}
11
12impl Distribution {
13    pub fn push(&mut self, value: f32) {
14        self.statistic.add(value);
15        self.last_sequence.push(value);
16    }
17
18    pub fn add(&mut self, value: &[f32]) {
19        self.clear();
20        for v in value {
21            self.statistic.add(*v);
22            self.last_sequence.push(*v);
23        }
24    }
25
26    pub fn last_sequence(&self) -> &Vec<f32> {
27        &self.last_sequence
28    }
29
30    pub fn count(&self) -> i32 {
31        self.statistic.count()
32    }
33
34    pub fn mean(&self) -> f32 {
35        self.statistic.mean()
36    }
37
38    pub fn variance(&self) -> f32 {
39        self.statistic.variance()
40    }
41
42    pub fn standard_deviation(&self) -> f32 {
43        self.statistic.std_dev()
44    }
45
46    pub fn skewness(&self) -> f32 {
47        self.statistic.skewness()
48    }
49
50    pub fn kurtosis(&self) -> f32 {
51        self.statistic.kurtosis()
52    }
53
54    pub fn min(&self) -> f32 {
55        self.statistic.min()
56    }
57
58    pub fn max(&self) -> f32 {
59        self.statistic.max()
60    }
61
62    pub fn clear(&mut self) {
63        // self.statistic.clear();
64        self.last_sequence.clear();
65    }
66
67    pub fn log2(&self) -> f32 {
68        (self.last_sequence().len() as f32).log2()
69    }
70
71    pub fn entropy(&self) -> f32 {
72        let bin_width = 0.01; // You can tune this for your resolution
73        let mut counts = std::collections::HashMap::new();
74
75        for &value in &self.last_sequence {
76            let bin = (value / bin_width).floor();
77            *counts.entry(bin as i32).or_insert(0usize) += 1;
78        }
79
80        let total = self.last_sequence.len() as f32;
81        if total == 0.0 {
82            return 0.0;
83        }
84
85        counts
86            .values()
87            .map(|&count| {
88                let p = count as f32 / total;
89                -p * p.log2()
90            })
91            .sum()
92    }
93
94    pub fn percentile(&self, p: f32) -> f32 {
95        if p < 0.0 || p > 100.0 {
96            panic!("Percentile must be between 0 and 100");
97        }
98
99        let count = self.last_sequence.len() as f32;
100        if count == 0 as f32 {
101            panic!("Cannot calculate percentile for an empty distribution");
102        }
103        let index = (p / 100.0) * count;
104        let sorted_values = {
105            let mut values = self.last_sequence.clone();
106            values.sort_by(|a, b| a.partial_cmp(b).unwrap());
107            values
108        };
109
110        let index = index as usize;
111        if index >= sorted_values.len() {
112            panic!("Index out of bounds for the sorted values");
113        }
114
115        sorted_values[index]
116    }
117}
118
119impl From<&[f32]> for Distribution {
120    fn from(value: &[f32]) -> Self {
121        let mut result = Distribution::default();
122        result.add(value);
123        result
124    }
125}
126
127impl From<Vec<f32>> for Distribution {
128    fn from(value: Vec<f32>) -> Self {
129        let mut result = Distribution::default();
130        result.add(&value);
131        result
132    }
133}
134
135// pub fn entropy(&self, bin_count: usize) -> f32 {
136//     if self.last_sequence.is_empty() || bin_count == 0 {
137//         return 0.0;
138//     }
139
140//     let min = self.min();
141//     let max = self.max();
142
143//     if (max - min).abs() < std::f32::EPSILON {
144//         return 0.0; // All values are (almost) the same
145//     }
146
147//     let bin_width = (max - min) / bin_count as f32;
148//     let mut bins = vec![0usize; bin_count];
149
150//     for &v in &self.last_sequence {
151//         let bin_index = ((v - min) / bin_width).floor() as usize;
152//         let clamped = bin_index.min(bin_count - 1);
153//         bins[clamped] += 1;
154//     }
155
156//     let total = self.last_sequence.len() as f32;
157//     bins.iter()
158//         .filter(|&&count| count > 0)
159//         .map(|&count| {
160//             let p = count as f32 / total;
161//             -p * p.log2()
162//         })
163//         .sum()
164// }