Skip to main content

brainos_observe/
sampling.rs

1//! Log sampling for high-volume, low-information log lines.
2//!
3//! Some call sites fire on every iteration of a hot loop — a resource
4//! sampler heartbeat, a per-request embedding probe — where logging *every*
5//! occurrence drowns the signal and inflates log volume without adding
6//! information. [`LogSampler`] gates such sites so only 1 in N lines is
7//! emitted, while the value behind each line (a metric, an event) is still
8//! recorded every time.
9//!
10//! It is deliberately tiny and dependency-free: a single relaxed atomic
11//! counter. Sampling decisions don't need cross-thread ordering guarantees —
12//! an occasional off-by-one under concurrency only shifts *which* line is
13//! kept, never the long-run 1-in-N rate.
14
15use std::sync::atomic::{AtomicU64, Ordering};
16
17/// A 1-in-N gate for high-volume log lines. Construct once per high-volume
18/// site (or share via `Arc`) and call [`LogSampler::should_emit`] at the log
19/// point:
20///
21/// ```
22/// use brainos_observe::LogSampler;
23///
24/// let sampler = LogSampler::one_in(100);
25/// for i in 0..250 {
26///     if sampler.should_emit() {
27///         // ~1 in 100 of these run; the other 99 are skipped.
28///         let _ = i;
29///     }
30/// }
31/// ```
32#[derive(Debug)]
33pub struct LogSampler {
34    /// Emit one line for every `one_in_n` calls. Clamped to a minimum of 1,
35    /// so a misconfigured `0` means "emit everything" rather than "never".
36    one_in_n: u64,
37    count: AtomicU64,
38}
39
40impl LogSampler {
41    /// Emit 1 in `n` calls. `n <= 1` disables sampling (every call emits).
42    pub fn one_in(n: u32) -> Self {
43        Self {
44            one_in_n: (n as u64).max(1),
45            count: AtomicU64::new(0),
46        }
47    }
48
49    /// A sampler that emits on every call — the explicit "sampling off" form.
50    pub fn unsampled() -> Self {
51        Self::one_in(1)
52    }
53
54    /// Whether this call should emit its log line. Returns `true` on the
55    /// 1st call and every `one_in_n`-th call thereafter, so a freshly built
56    /// sampler always logs its first occurrence (you never lose the first
57    /// instance of a newly-hot site).
58    pub fn should_emit(&self) -> bool {
59        let n = self.count.fetch_add(1, Ordering::Relaxed);
60        n.is_multiple_of(self.one_in_n)
61    }
62
63    /// The configured rate (post-clamp); 1 means unsampled.
64    pub fn rate(&self) -> u64 {
65        self.one_in_n
66    }
67}
68
69#[cfg(test)]
70mod tests {
71    use super::*;
72
73    #[test]
74    fn rate_one_emits_every_call() {
75        let s = LogSampler::one_in(1);
76        assert!((0..10).all(|_| s.should_emit()));
77    }
78
79    #[test]
80    fn zero_is_clamped_to_emit_everything() {
81        let s = LogSampler::one_in(0);
82        assert_eq!(s.rate(), 1);
83        assert!(s.should_emit());
84    }
85
86    #[test]
87    fn emits_first_then_every_nth() {
88        let s = LogSampler::one_in(4);
89        // calls:        0    1      2      3      4    5
90        // should_emit: true false  false  false  true false
91        let pattern: Vec<bool> = (0..6).map(|_| s.should_emit()).collect();
92        assert_eq!(
93            pattern,
94            vec![true, false, false, false, true, false],
95            "first call emits, then every 4th"
96        );
97    }
98
99    #[test]
100    fn long_run_rate_is_one_in_n() {
101        let s = LogSampler::one_in(10);
102        let emitted = (0..1000).filter(|_| s.should_emit()).count();
103        assert_eq!(emitted, 100, "1000 calls at 1-in-10 → 100 emitted");
104    }
105
106    #[test]
107    fn unsampled_is_rate_one() {
108        assert_eq!(LogSampler::unsampled().rate(), 1);
109    }
110}