latency-buckets 0.1.0

Streaming histogram + percentile estimator for LLM call latencies. Fixed log-scale buckets, O(1) record, p50/p90/p95/p99 in microseconds. Zero deps.
Documentation
//! # latency-buckets
//!
//! Streaming histogram for LLM call latencies with constant-memory
//! percentile estimation.
//!
//! Buckets are log-scale (base 2), covering 1 µs to ~17 minutes in 30
//! buckets. Each record is `O(1)`. Percentiles are estimated by
//! linear interpolation inside the chosen bucket; expected error is
//! roughly half a bucket width (≤ 50% of the bucket value).
//!
//! ## Example
//!
//! ```
//! use latency_buckets::Histogram;
//! use std::time::Duration;
//!
//! let mut h = Histogram::new();
//! for ms in [10, 50, 200, 800, 1500, 3000] {
//!     h.record(Duration::from_millis(ms));
//! }
//! let p50 = h.percentile(0.50);
//! // Coarse log-scale buckets; expect a value in the same order of magnitude.
//! assert!(p50.as_millis() >= 50 && p50.as_millis() <= 2000);
//! ```

#![deny(missing_docs)]

use std::time::Duration;

const N: usize = 30;

/// Streaming latency histogram.
#[derive(Debug, Clone)]
pub struct Histogram {
    buckets: [u64; N],
    total: u64,
}

impl Default for Histogram {
    fn default() -> Self {
        Self::new()
    }
}

impl Histogram {
    /// Empty histogram.
    pub fn new() -> Self {
        Self {
            buckets: [0; N],
            total: 0,
        }
    }

    /// Record one sample.
    pub fn record(&mut self, d: Duration) {
        let micros = d.as_micros().max(1) as u64;
        let idx = bucket_index(micros);
        self.buckets[idx] += 1;
        self.total += 1;
    }

    /// Total samples recorded.
    pub fn count(&self) -> u64 {
        self.total
    }

    /// Estimated percentile (q ∈ [0, 1]).
    pub fn percentile(&self, q: f64) -> Duration {
        if self.total == 0 {
            return Duration::ZERO;
        }
        let target = (q.clamp(0.0, 1.0) * self.total as f64).ceil().max(1.0) as u64;
        let mut running = 0u64;
        for (i, &count) in self.buckets.iter().enumerate() {
            if count == 0 {
                continue;
            }
            running += count;
            if running >= target {
                // Pick midpoint of the bucket as estimate.
                let lo = bucket_lo_micros(i);
                let hi = bucket_hi_micros(i);
                return Duration::from_micros((lo + hi) / 2);
            }
        }
        // Fallback: top bucket midpoint.
        let lo = bucket_lo_micros(N - 1);
        let hi = bucket_hi_micros(N - 1);
        Duration::from_micros((lo + hi) / 2)
    }
}

fn bucket_index(micros: u64) -> usize {
    // Buckets cover [2^i, 2^(i+1)) microseconds. Cap at N-1.
    let i = 64 - micros.leading_zeros() - 1;
    (i as usize).min(N - 1)
}

fn bucket_lo_micros(i: usize) -> u64 {
    1u64 << i
}

fn bucket_hi_micros(i: usize) -> u64 {
    (1u64 << (i + 1)) - 1
}