latency_buckets/lib.rs
1//! # latency-buckets
2//!
3//! Streaming histogram for LLM call latencies with constant-memory
4//! percentile estimation.
5//!
6//! Buckets are log-scale (base 2), covering 1 µs to ~17 minutes in 30
7//! buckets. Each record is `O(1)`. Percentiles are estimated by
8//! linear interpolation inside the chosen bucket; expected error is
9//! roughly half a bucket width (≤ 50% of the bucket value).
10//!
11//! ## Example
12//!
13//! ```
14//! use latency_buckets::Histogram;
15//! use std::time::Duration;
16//!
17//! let mut h = Histogram::new();
18//! for ms in [10, 50, 200, 800, 1500, 3000] {
19//! h.record(Duration::from_millis(ms));
20//! }
21//! let p50 = h.percentile(0.50);
22//! // Coarse log-scale buckets; expect a value in the same order of magnitude.
23//! assert!(p50.as_millis() >= 50 && p50.as_millis() <= 2000);
24//! ```
25
26#![deny(missing_docs)]
27
28use std::time::Duration;
29
30const N: usize = 30;
31
32/// Streaming latency histogram.
33#[derive(Debug, Clone)]
34pub struct Histogram {
35 buckets: [u64; N],
36 total: u64,
37}
38
39impl Default for Histogram {
40 fn default() -> Self {
41 Self::new()
42 }
43}
44
45impl Histogram {
46 /// Empty histogram.
47 pub fn new() -> Self {
48 Self {
49 buckets: [0; N],
50 total: 0,
51 }
52 }
53
54 /// Record one sample.
55 pub fn record(&mut self, d: Duration) {
56 let micros = d.as_micros().max(1) as u64;
57 let idx = bucket_index(micros);
58 self.buckets[idx] += 1;
59 self.total += 1;
60 }
61
62 /// Total samples recorded.
63 pub fn count(&self) -> u64 {
64 self.total
65 }
66
67 /// Estimated percentile (q ∈ [0, 1]).
68 pub fn percentile(&self, q: f64) -> Duration {
69 if self.total == 0 {
70 return Duration::ZERO;
71 }
72 let target = (q.clamp(0.0, 1.0) * self.total as f64).ceil().max(1.0) as u64;
73 let mut running = 0u64;
74 for (i, &count) in self.buckets.iter().enumerate() {
75 if count == 0 {
76 continue;
77 }
78 running += count;
79 if running >= target {
80 // Pick midpoint of the bucket as estimate.
81 let lo = bucket_lo_micros(i);
82 let hi = bucket_hi_micros(i);
83 return Duration::from_micros((lo + hi) / 2);
84 }
85 }
86 // Fallback: top bucket midpoint.
87 let lo = bucket_lo_micros(N - 1);
88 let hi = bucket_hi_micros(N - 1);
89 Duration::from_micros((lo + hi) / 2)
90 }
91}
92
93fn bucket_index(micros: u64) -> usize {
94 // Buckets cover [2^i, 2^(i+1)) microseconds. Cap at N-1.
95 let i = 64 - micros.leading_zeros() - 1;
96 (i as usize).min(N - 1)
97}
98
99fn bucket_lo_micros(i: usize) -> u64 {
100 1u64 << i
101}
102
103fn bucket_hi_micros(i: usize) -> u64 {
104 (1u64 << (i + 1)) - 1
105}