lmn_core/stats/distribution.rs
1// ── Distribution ──────────────────────────────────────────────────────────────
2
3/// Owns a sorted snapshot of observed values and answers arbitrary quantile queries.
4///
5/// Construction sorts the input once; queries are O(1) index lookups.
6/// Input is `f64` throughout — Duration callers convert to milliseconds at the call site.
7pub struct Distribution {
8 sorted: Vec<f64>,
9}
10
11impl Distribution {
12 /// Sorts `values` and takes ownership. O(n log n).
13 ///
14 /// Use `from_sorted` when values are already sorted to skip the sort step.
15 pub fn from_unsorted(mut values: Vec<f64>) -> Self {
16 values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
17 Self { sorted: values }
18 }
19
20 /// Constructs a `Distribution` from an already-sorted `Vec<f64>`.
21 ///
22 /// Caller is responsible for ensuring the input is sorted in ascending order.
23 /// No sort is performed; construction is O(1).
24 pub fn from_sorted(values: Vec<f64>) -> Self {
25 Self { sorted: values }
26 }
27
28 /// Returns the value at quantile `p` in `[0.0, 1.0]` using the floor-index formula.
29 ///
30 /// `idx = (n as f64 * p).floor() as usize`, clamped to `[0, n-1]`.
31 /// Returns `0.0` for an empty distribution.
32 ///
33 /// This formula is equivalent to the integer formula `n * p / 100` (for integer
34 /// percentile values 0–100) used in the ASCII table renderer, ensuring both
35 /// output paths report identical percentile values.
36 pub fn quantile(&self, p: f64) -> f64 {
37 if self.sorted.is_empty() {
38 return 0.0;
39 }
40 let n = self.sorted.len();
41 let idx = ((n as f64 * p).floor() as usize).min(n - 1);
42 self.sorted[idx]
43 }
44
45 /// Returns the minimum value, or `0.0` for an empty distribution.
46 pub fn min(&self) -> f64 {
47 self.sorted.first().copied().unwrap_or(0.0)
48 }
49
50 /// Returns the maximum value, or `0.0` for an empty distribution.
51 pub fn max(&self) -> f64 {
52 self.sorted.last().copied().unwrap_or(0.0)
53 }
54
55 /// Returns the arithmetic mean, or `0.0` for an empty distribution.
56 pub fn mean(&self) -> f64 {
57 if self.sorted.is_empty() {
58 return 0.0;
59 }
60 self.sorted.iter().sum::<f64>() / self.sorted.len() as f64
61 }
62
63 /// Returns `true` if the distribution contains no values.
64 pub fn is_empty(&self) -> bool {
65 self.sorted.is_empty()
66 }
67
68 /// Returns the number of values in the distribution.
69 pub fn len(&self) -> usize {
70 self.sorted.len()
71 }
72
73 /// Returns the value at a pre-computed index into the sorted backing array.
74 ///
75 /// Use this when the caller has already computed the index via the integer formula
76 /// `(n * p / 100).min(n - 1)` and wants to avoid the redundant float round-trip
77 /// through `quantile(idx as f64 / n as f64)`.
78 ///
79 /// # Panics
80 /// Panics if `idx >= self.len()`. The caller is responsible for clamping the index
81 /// to `[0, n-1]` before calling this method.
82 pub fn value_at(&self, idx: usize) -> f64 {
83 self.sorted[idx]
84 }
85}
86
87// ── Tests ─────────────────────────────────────────────────────────────────────
88
89#[cfg(test)]
90mod tests {
91 use super::*;
92
93 // ── Distribution ──────────────────────────────────────────────────────────
94
95 #[test]
96 fn distribution_quantile_known_values() {
97 // 100-element uniform distribution: 1.0, 2.0, ..., 100.0
98 let values: Vec<f64> = (1..=100).map(|i| i as f64).collect();
99 let dist = Distribution::from_sorted(values);
100
101 // quantile(0.0): idx = floor(100 * 0.0) = 0 → value 1.0
102 assert_eq!(dist.quantile(0.0), 1.0);
103 // quantile(0.5): idx = floor(100 * 0.5) = 50 → value 51.0
104 assert_eq!(dist.quantile(0.5), 51.0);
105 // quantile(0.99): idx = floor(100 * 0.99) = 99 → value 100.0
106 assert_eq!(dist.quantile(0.99), 100.0);
107 // quantile(1.0): idx = floor(100 * 1.0) = 100, clamped to 99 → value 100.0
108 assert_eq!(dist.quantile(1.0), 100.0);
109 }
110
111 #[test]
112 fn distribution_empty_returns_zero() {
113 let dist = Distribution::from_sorted(vec![]);
114 assert_eq!(dist.quantile(0.5), 0.0);
115 assert_eq!(dist.min(), 0.0);
116 assert_eq!(dist.max(), 0.0);
117 assert_eq!(dist.mean(), 0.0);
118 }
119
120 #[test]
121 fn distribution_single_element() {
122 let dist = Distribution::from_sorted(vec![42.0]);
123 assert_eq!(dist.quantile(0.0), 42.0);
124 assert_eq!(dist.quantile(0.5), 42.0);
125 assert_eq!(dist.quantile(1.0), 42.0);
126 assert_eq!(dist.min(), 42.0);
127 assert_eq!(dist.max(), 42.0);
128 assert_eq!(dist.mean(), 42.0);
129 }
130
131 #[test]
132 fn distribution_quantile_boundary_p100() {
133 let values: Vec<f64> = (1..=10).map(|i| i as f64).collect();
134 let dist = Distribution::from_sorted(values);
135 // Must not panic — must return last element
136 assert_eq!(dist.quantile(1.0), 10.0);
137 }
138
139 #[test]
140 fn distribution_avg_is_arithmetic_mean() {
141 let dist = Distribution::from_sorted(vec![1.0, 2.0, 3.0]);
142 assert_eq!(dist.mean(), 2.0);
143 }
144
145 #[test]
146 fn distribution_is_sorted_on_construction() {
147 // Unsorted input: [5.0, 1.0, 3.0, 2.0, 4.0]
148 let dist = Distribution::from_unsorted(vec![5.0, 1.0, 3.0, 2.0, 4.0]);
149 assert_eq!(dist.min(), 1.0);
150 assert_eq!(dist.max(), 5.0);
151 // quantile(0.5): idx = floor(5 * 0.5) = 2 → value 3.0
152 assert_eq!(dist.quantile(0.5), 3.0);
153 }
154
155 #[test]
156 fn distribution_len_and_is_empty() {
157 let empty = Distribution::from_sorted(vec![]);
158 assert!(empty.is_empty());
159 assert_eq!(empty.len(), 0);
160
161 let dist = Distribution::from_sorted(vec![1.0, 2.0, 3.0]);
162 assert!(!dist.is_empty());
163 assert_eq!(dist.len(), 3);
164 }
165
166 #[test]
167 fn distribution_value_at_returns_correct_element() {
168 // sorted: [10.0, 20.0, 30.0, 40.0, 50.0]
169 let dist = Distribution::from_sorted(vec![10.0, 20.0, 30.0, 40.0, 50.0]);
170 assert_eq!(dist.value_at(0), 10.0);
171 assert_eq!(dist.value_at(2), 30.0);
172 assert_eq!(dist.value_at(4), 50.0);
173 }
174}