brainos_observe/sampling.rs
1//! Log sampling for high-volume, low-information log lines.
2//!
3//! Some call sites fire on every iteration of a hot loop — a resource
4//! sampler heartbeat, a per-request embedding probe — where logging *every*
5//! occurrence drowns the signal and inflates log volume without adding
6//! information. [`LogSampler`] gates such sites so only 1 in N lines is
7//! emitted, while the value behind each line (a metric, an event) is still
8//! recorded every time.
9//!
10//! It is deliberately tiny and dependency-free: a single relaxed atomic
11//! counter. Sampling decisions don't need cross-thread ordering guarantees —
12//! an occasional off-by-one under concurrency only shifts *which* line is
13//! kept, never the long-run 1-in-N rate.
14
15use std::sync::atomic::{AtomicU64, Ordering};
16
17/// A 1-in-N gate for high-volume log lines. Construct once per high-volume
18/// site (or share via `Arc`) and call [`LogSampler::should_emit`] at the log
19/// point:
20///
21/// ```
22/// use brainos_observe::LogSampler;
23///
24/// let sampler = LogSampler::one_in(100);
25/// for i in 0..250 {
26/// if sampler.should_emit() {
27/// // ~1 in 100 of these run; the other 99 are skipped.
28/// let _ = i;
29/// }
30/// }
31/// ```
32#[derive(Debug)]
33pub struct LogSampler {
34 /// Emit one line for every `one_in_n` calls. Clamped to a minimum of 1,
35 /// so a misconfigured `0` means "emit everything" rather than "never".
36 one_in_n: u64,
37 count: AtomicU64,
38}
39
40impl LogSampler {
41 /// Emit 1 in `n` calls. `n <= 1` disables sampling (every call emits).
42 pub fn one_in(n: u32) -> Self {
43 Self {
44 one_in_n: (n as u64).max(1),
45 count: AtomicU64::new(0),
46 }
47 }
48
49 /// A sampler that emits on every call — the explicit "sampling off" form.
50 pub fn unsampled() -> Self {
51 Self::one_in(1)
52 }
53
54 /// Whether this call should emit its log line. Returns `true` on the
55 /// 1st call and every `one_in_n`-th call thereafter, so a freshly built
56 /// sampler always logs its first occurrence (you never lose the first
57 /// instance of a newly-hot site).
58 pub fn should_emit(&self) -> bool {
59 let n = self.count.fetch_add(1, Ordering::Relaxed);
60 n.is_multiple_of(self.one_in_n)
61 }
62
63 /// The configured rate (post-clamp); 1 means unsampled.
64 pub fn rate(&self) -> u64 {
65 self.one_in_n
66 }
67}
68
69#[cfg(test)]
70mod tests {
71 use super::*;
72
73 #[test]
74 fn rate_one_emits_every_call() {
75 let s = LogSampler::one_in(1);
76 assert!((0..10).all(|_| s.should_emit()));
77 }
78
79 #[test]
80 fn zero_is_clamped_to_emit_everything() {
81 let s = LogSampler::one_in(0);
82 assert_eq!(s.rate(), 1);
83 assert!(s.should_emit());
84 }
85
86 #[test]
87 fn emits_first_then_every_nth() {
88 let s = LogSampler::one_in(4);
89 // calls: 0 1 2 3 4 5
90 // should_emit: true false false false true false
91 let pattern: Vec<bool> = (0..6).map(|_| s.should_emit()).collect();
92 assert_eq!(
93 pattern,
94 vec![true, false, false, false, true, false],
95 "first call emits, then every 4th"
96 );
97 }
98
99 #[test]
100 fn long_run_rate_is_one_in_n() {
101 let s = LogSampler::one_in(10);
102 let emitted = (0..1000).filter(|_| s.should_emit()).count();
103 assert_eq!(emitted, 100, "1000 calls at 1-in-10 → 100 emitted");
104 }
105
106 #[test]
107 fn unsampled_is_rate_one() {
108 assert_eq!(LogSampler::unsampled().rate(), 1);
109 }
110}