Skip to main content

lmn_core/histogram/
numeric.rs

1use rand::Rng;
2use rand::SeedableRng;
3
4use crate::stats::Distribution;
5
6// ── NumericHistogramParams ────────────────────────────────────────────────────
7
8/// Parameters for constructing a `NumericHistogram`.
9pub struct NumericHistogramParams {
10    /// Maximum number of samples to retain.
11    /// Once this cap is reached, reservoir sampling is used.
12    pub max_samples: usize,
13}
14
15// ── NumericHistogram ──────────────────────────────────────────────────────────
16
17/// Bounded reservoir of float64 samples for response field statistics.
18///
19/// Fills up to `max_samples`, then applies Vitter's Algorithm R reservoir
20/// sampling so the retained set remains a uniform random sample of all
21/// observed values.
22///
23/// Uses `SmallRng` (a `Send`-safe PRNG) seeded from entropy at construction
24/// time, so instances can be sent across threads (e.g. moved into drain tasks).
25pub struct NumericHistogram {
26    samples: Vec<f64>,
27    max_samples: usize,
28    total_seen: usize,
29    rng: rand::rngs::SmallRng,
30}
31
32impl NumericHistogram {
33    /// Creates a new histogram with the given parameters.
34    pub fn new(params: NumericHistogramParams) -> Self {
35        Self {
36            samples: Vec::new(),
37            max_samples: params.max_samples,
38            total_seen: 0,
39            rng: rand::rngs::SmallRng::from_os_rng(),
40        }
41    }
42
43    /// Records a single float value.
44    pub fn record(&mut self, value: f64) {
45        self.total_seen += 1;
46        if self.samples.len() < self.max_samples {
47            self.samples.push(value);
48        } else {
49            let j = self.rng.random_range(0..self.total_seen);
50            if j < self.max_samples {
51                self.samples[j] = value;
52            }
53        }
54    }
55
56    /// Returns a sorted `Distribution` over the retained samples.
57    pub fn distribution(&self) -> Distribution {
58        Distribution::from_unsorted(self.samples.clone())
59    }
60
61    /// Returns `true` if no values have been recorded.
62    pub fn is_empty(&self) -> bool {
63        self.samples.is_empty()
64    }
65
66    /// Returns the total number of values observed (including those not retained).
67    pub fn total_seen(&self) -> usize {
68        self.total_seen
69    }
70}
71
72// ── Tests ─────────────────────────────────────────────────────────────────────
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77
78    fn hist_with_cap(cap: usize) -> NumericHistogram {
79        NumericHistogram::new(NumericHistogramParams { max_samples: cap })
80    }
81
82    #[test]
83    fn record_fills_up_to_cap() {
84        let mut h = hist_with_cap(5);
85        for i in 0..5 {
86            h.record(i as f64);
87        }
88        assert_eq!(h.samples.len(), 5);
89    }
90
91    #[test]
92    fn record_beyond_cap_does_not_grow_vec() {
93        let mut h = hist_with_cap(3);
94        for i in 0..100 {
95            h.record(i as f64);
96        }
97        assert_eq!(h.samples.len(), 3);
98    }
99
100    #[test]
101    fn total_seen_always_increments() {
102        let mut h = hist_with_cap(2);
103        for i in 0..10 {
104            h.record(i as f64);
105        }
106        assert_eq!(h.total_seen(), 10);
107    }
108
109    #[test]
110    fn distribution_returns_correct_quantiles() {
111        let mut h = hist_with_cap(100);
112        // Record values 1.0 through 100.0
113        for i in 1..=100 {
114            h.record(i as f64);
115        }
116        let dist = h.distribution();
117        // Min should be 1.0, max should be 100.0
118        assert_eq!(dist.min(), 1.0);
119        assert_eq!(dist.max(), 100.0);
120        // With 100 values: p50 idx = floor(100 * 0.5) = 50 → 51st value in sorted order
121        let p50 = dist.quantile(0.5);
122        assert!((1.0..=100.0).contains(&p50), "p50={p50} out of range");
123    }
124
125    #[test]
126    fn is_empty_before_records() {
127        let h = hist_with_cap(10);
128        assert!(h.is_empty());
129    }
130}