Skip to main content

latency_buckets/
lib.rs

1//! # latency-buckets
2//!
3//! Streaming histogram for LLM call latencies with constant-memory
4//! percentile estimation.
5//!
6//! Buckets are log-scale (base 2), covering 1 µs to ~17 minutes in 30
7//! buckets. Each record is `O(1)`. Percentiles are estimated by
8//! linear interpolation inside the chosen bucket; expected error is
9//! roughly half a bucket width (≤ 50% of the bucket value).
10//!
11//! ## Example
12//!
13//! ```
14//! use latency_buckets::Histogram;
15//! use std::time::Duration;
16//!
17//! let mut h = Histogram::new();
18//! for ms in [10, 50, 200, 800, 1500, 3000] {
19//!     h.record(Duration::from_millis(ms));
20//! }
21//! let p50 = h.percentile(0.50);
22//! // Coarse log-scale buckets; expect a value in the same order of magnitude.
23//! assert!(p50.as_millis() >= 50 && p50.as_millis() <= 2000);
24//! ```
25
26#![deny(missing_docs)]
27
28use std::time::Duration;
29
30const N: usize = 30;
31
32/// Streaming latency histogram.
33#[derive(Debug, Clone)]
34pub struct Histogram {
35    buckets: [u64; N],
36    total: u64,
37}
38
39impl Default for Histogram {
40    fn default() -> Self {
41        Self::new()
42    }
43}
44
45impl Histogram {
46    /// Empty histogram.
47    pub fn new() -> Self {
48        Self {
49            buckets: [0; N],
50            total: 0,
51        }
52    }
53
54    /// Record one sample.
55    pub fn record(&mut self, d: Duration) {
56        let micros = d.as_micros().max(1) as u64;
57        let idx = bucket_index(micros);
58        self.buckets[idx] += 1;
59        self.total += 1;
60    }
61
62    /// Total samples recorded.
63    pub fn count(&self) -> u64 {
64        self.total
65    }
66
67    /// Estimated percentile (q ∈ [0, 1]).
68    pub fn percentile(&self, q: f64) -> Duration {
69        if self.total == 0 {
70            return Duration::ZERO;
71        }
72        let target = (q.clamp(0.0, 1.0) * self.total as f64).ceil().max(1.0) as u64;
73        let mut running = 0u64;
74        for (i, &count) in self.buckets.iter().enumerate() {
75            if count == 0 {
76                continue;
77            }
78            running += count;
79            if running >= target {
80                // Pick midpoint of the bucket as estimate.
81                let lo = bucket_lo_micros(i);
82                let hi = bucket_hi_micros(i);
83                return Duration::from_micros((lo + hi) / 2);
84            }
85        }
86        // Fallback: top bucket midpoint.
87        let lo = bucket_lo_micros(N - 1);
88        let hi = bucket_hi_micros(N - 1);
89        Duration::from_micros((lo + hi) / 2)
90    }
91}
92
93fn bucket_index(micros: u64) -> usize {
94    // Buckets cover [2^i, 2^(i+1)) microseconds. Cap at N-1.
95    let i = 64 - micros.leading_zeros() - 1;
96    (i as usize).min(N - 1)
97}
98
99fn bucket_lo_micros(i: usize) -> u64 {
100    1u64 << i
101}
102
103fn bucket_hi_micros(i: usize) -> u64 {
104    (1u64 << (i + 1)) - 1
105}