use rand::Rng;
use rand::SeedableRng;
use crate::stats::Distribution;
pub struct NumericHistogramParams {
pub max_samples: usize,
}
pub struct NumericHistogram {
samples: Vec<f64>,
max_samples: usize,
total_seen: usize,
rng: rand::rngs::SmallRng,
}
impl NumericHistogram {
pub fn new(params: NumericHistogramParams) -> Self {
Self {
samples: Vec::new(),
max_samples: params.max_samples,
total_seen: 0,
rng: rand::rngs::SmallRng::from_os_rng(),
}
}
pub fn record(&mut self, value: f64) {
self.total_seen += 1;
if self.samples.len() < self.max_samples {
self.samples.push(value);
} else {
let j = self.rng.random_range(0..self.total_seen);
if j < self.max_samples {
self.samples[j] = value;
}
}
}
pub fn distribution(&self) -> Distribution {
Distribution::from_unsorted(self.samples.clone())
}
pub fn is_empty(&self) -> bool {
self.samples.is_empty()
}
pub fn total_seen(&self) -> usize {
self.total_seen
}
}
#[cfg(test)]
mod tests {
use super::*;
fn hist_with_cap(cap: usize) -> NumericHistogram {
NumericHistogram::new(NumericHistogramParams { max_samples: cap })
}
#[test]
fn record_fills_up_to_cap() {
let mut h = hist_with_cap(5);
for i in 0..5 {
h.record(i as f64);
}
assert_eq!(h.samples.len(), 5);
}
#[test]
fn record_beyond_cap_does_not_grow_vec() {
let mut h = hist_with_cap(3);
for i in 0..100 {
h.record(i as f64);
}
assert_eq!(h.samples.len(), 3);
}
#[test]
fn total_seen_always_increments() {
let mut h = hist_with_cap(2);
for i in 0..10 {
h.record(i as f64);
}
assert_eq!(h.total_seen(), 10);
}
#[test]
fn distribution_returns_correct_quantiles() {
let mut h = hist_with_cap(100);
for i in 1..=100 {
h.record(i as f64);
}
let dist = h.distribution();
assert_eq!(dist.min(), 1.0);
assert_eq!(dist.max(), 100.0);
let p50 = dist.quantile(0.5);
assert!((1.0..=100.0).contains(&p50), "p50={p50} out of range");
}
#[test]
fn is_empty_before_records() {
let h = hist_with_cap(10);
assert!(h.is_empty());
}
}