skeg_telemetry/lib.rs
1//! Zero-overhead telemetry for skeg.
2//!
3//! All public API entry points are `#[inline(always)]`. When neither the
4//! `stats` nor `http` feature is enabled, every call collapses to a no-op
5//! the compiler eliminates (verified with `cargo asm`).
6//!
7//! When `stats` is enabled (default), the static counters and histograms
8//! tick on the hot path with a single atomic fetch_add each. Reading the
9//! values is done by [`stats::dump_text`] (or the helper accessors on
10//! [`metrics`] / [`histograms`]); reading does not lock, and never blocks
11//! the hot path.
12//!
13//! When `http` is also enabled, [`http::serve_blocking`] runs a tiny
14//! HTTP server on a dedicated thread that serves `/metrics` in Prometheus
15//! text format. The server is purely a reader — it never writes through
16//! the hot path.
17//!
18//! # Hot-path cost budget
19//!
20//! - per-op counter tick: `AtomicU64::fetch_add(1, Relaxed)` ≈ 1–2 ns
21//! - per-op histogram tick: leading-zeros bucket pick + one `fetch_add` ≈ 3–5 ns
22//!
23//! The crate's `benches/overhead.rs` gates these with criterion; CI fails
24//! the build if any record path exceeds 50 ns.
25
26#![cfg_attr(not(any(feature = "stats", feature = "http")), allow(dead_code))]
27
28#[cfg(any(feature = "stats", feature = "http"))]
29pub mod dynamic;
30#[cfg(any(feature = "stats", feature = "http"))]
31pub mod histograms;
32#[cfg(any(feature = "stats", feature = "http"))]
33pub mod metrics;
34#[cfg(any(feature = "stats", feature = "http"))]
35pub mod stats;
36
37#[cfg(feature = "http")]
38pub mod http;
39
40// ───────────────────────────────────────────────────────────────────────────
41// Re-exports for the dynamic registry (v0.2.0). Downstream crates that need
42// their own metrics should reach for these instead of patching the closed
43// enums below; see [`dynamic`] for the design rationale and pool sizing.
44// ───────────────────────────────────────────────────────────────────────────
45
46#[cfg(any(feature = "stats", feature = "http"))]
47pub use dynamic::{DynHistogram, DynOp, register_counter, register_gauge, register_histogram};
48
49#[cfg(any(feature = "stats", feature = "http"))]
50pub use metrics::MAX_SHARDS;
51
52/// Enumeration of operations tracked on the hot path.
53///
54/// Kept small and `repr(usize)` so it indexes directly into the static
55/// metric arrays. Add variants here when a new hot-path operation needs
56/// counting; the array sizes in [`metrics`] track this enum.
57#[repr(usize)]
58#[derive(Copy, Clone, Debug, Eq, PartialEq)]
59pub enum Op {
60 /// `GET key` (scalar lookup).
61 Get = 0,
62 /// `SET key val` (scalar store; group-committed downstream).
63 Set = 1,
64 /// `DEL key` (tombstone).
65 Del = 2,
66 /// `VSET name vec` (vector store).
67 VSet = 3,
68 /// `VSEARCH name vec k` (vector top-k search).
69 VSearch = 4,
70 /// `VDEL name id` (vector tombstone).
71 VDel = 5,
72 /// `PING` (round-trip probe).
73 Ping = 6,
74}
75
76impl Op {
77 /// Number of variants. Update array sizes in [`metrics`] if this grows.
78 pub const COUNT: usize = 7;
79
80 /// All variants in declaration order. Used by the dumpers to iterate
81 /// without unsafe transmutes.
82 pub const ALL: [Op; Self::COUNT] = [
83 Op::Get,
84 Op::Set,
85 Op::Del,
86 Op::VSet,
87 Op::VSearch,
88 Op::VDel,
89 Op::Ping,
90 ];
91
92 /// Compact textual name used in metric labels.
93 #[inline]
94 pub const fn name(self) -> &'static str {
95 match self {
96 Op::Get => "get",
97 Op::Set => "set",
98 Op::Del => "del",
99 Op::VSet => "vset",
100 Op::VSearch => "vsearch",
101 Op::VDel => "vdel",
102 Op::Ping => "ping",
103 }
104 }
105}
106
107// ───────────────────────────────────────────────────────────────────────────
108// Public hot-path API.
109// Every function is `#[inline(always)]`. With no telemetry feature enabled
110// the body is empty and the parameters are forced into `let _ = …` sinks
111// so the compiler treats them as side-effect-free and removes the calls.
112// ───────────────────────────────────────────────────────────────────────────
113
114/// Record completion of one operation, with its observed duration.
115///
116/// `shard_id` is the worker shard that handled the request (used to
117/// partition counters and avoid cross-core cache-line contention).
118#[inline(always)]
119pub fn record_op(op: Op, shard_id: u16, duration: core::time::Duration) {
120 #[cfg(any(feature = "stats", feature = "http"))]
121 {
122 metrics::tick_op(op, shard_id);
123 histograms::observe_us(op, duration.as_micros() as u64);
124 }
125 #[cfg(not(any(feature = "stats", feature = "http")))]
126 {
127 let _ = (op, shard_id, duration);
128 }
129}
130
131/// Set the current value of a gauge metric (overwrites; not a counter).
132#[inline(always)]
133pub fn set_gauge(g: Gauge, value: u64) {
134 #[cfg(any(feature = "stats", feature = "http"))]
135 {
136 metrics::set_gauge(g, value);
137 }
138 #[cfg(not(any(feature = "stats", feature = "http")))]
139 {
140 let _ = (g, value);
141 }
142}
143
144/// Increment a gauge by one. Pair with [`decr_gauge`] for "in flight"
145/// counters where the natural API is `incr` at the start of an
146/// operation and `decr` at the end.
147#[inline(always)]
148pub fn incr_gauge(g: Gauge) {
149 #[cfg(any(feature = "stats", feature = "http"))]
150 {
151 metrics::incr_gauge(g);
152 }
153 #[cfg(not(any(feature = "stats", feature = "http")))]
154 {
155 let _ = g;
156 }
157}
158
159/// Decrement a gauge by one. Safe to call when the gauge is already
160/// zero (wraps; pair calls correctly with [`incr_gauge`] for symmetry).
161#[inline(always)]
162pub fn decr_gauge(g: Gauge) {
163 #[cfg(any(feature = "stats", feature = "http"))]
164 {
165 metrics::decr_gauge(g);
166 }
167 #[cfg(not(any(feature = "stats", feature = "http")))]
168 {
169 let _ = g;
170 }
171}
172
173/// Increment a counter that is not tied to a specific operation.
174#[inline(always)]
175pub fn tick_counter(c: Counter) {
176 #[cfg(any(feature = "stats", feature = "http"))]
177 {
178 metrics::tick_counter(c, 1);
179 }
180 #[cfg(not(any(feature = "stats", feature = "http")))]
181 {
182 let _ = c;
183 }
184}
185
186/// Add a delta to a counter (for batch / amortised paths).
187#[inline(always)]
188pub fn add_counter(c: Counter, delta: u64) {
189 #[cfg(any(feature = "stats", feature = "http"))]
190 {
191 metrics::tick_counter(c, delta);
192 }
193 #[cfg(not(any(feature = "stats", feature = "http")))]
194 {
195 let _ = (c, delta);
196 }
197}
198
199/// Counters that exist outside the per-op hot path.
200#[repr(usize)]
201#[derive(Copy, Clone, Debug, Eq, PartialEq)]
202pub enum Counter {
203 CacheHits = 0,
204 CacheMisses = 1,
205 CacheEvictions = 2,
206 CompactionRunsTotal = 3,
207 CompactionBytesTotal = 4,
208 VlogSyncs = 5,
209 VlogGroupCommitBatches = 6,
210}
211
212impl Counter {
213 pub const COUNT: usize = 7;
214 pub const ALL: [Counter; Self::COUNT] = [
215 Counter::CacheHits,
216 Counter::CacheMisses,
217 Counter::CacheEvictions,
218 Counter::CompactionRunsTotal,
219 Counter::CompactionBytesTotal,
220 Counter::VlogSyncs,
221 Counter::VlogGroupCommitBatches,
222 ];
223
224 #[inline]
225 pub const fn name(self) -> &'static str {
226 match self {
227 Counter::CacheHits => "skeg_cache_hits_total",
228 Counter::CacheMisses => "skeg_cache_misses_total",
229 Counter::CacheEvictions => "skeg_cache_evictions_total",
230 Counter::CompactionRunsTotal => "skeg_compaction_runs_total",
231 Counter::CompactionBytesTotal => "skeg_compaction_bytes_total",
232 Counter::VlogSyncs => "skeg_vlog_syncs_total",
233 Counter::VlogGroupCommitBatches => "skeg_vlog_group_commit_batches_total",
234 }
235 }
236}
237
238/// Gauges (current value, not monotonic).
239///
240/// Wiring status (as of v0.2.1):
241/// - `VlogLiveBytes` wired in `skeg-server` `STATS` handler
242/// - `VlogSegmentsLive` wired in `skeg-server` `STATS` handler
243/// - `VlogTotalBytes` wired in `skeg-server` `STATS` handler
244/// - `CompactionInProgress` wired by RAII guard in `vlog::compact_segment`
245/// - `VlogSegmentsCompacting` wired by RAII guard in `vlog::compact_segment`
246/// - `VindexSizeBytes` wired in `skeg-server` `STATS` handler
247/// - `VindexVectors` wired in `skeg-server` `STATS` handler
248///
249/// The vlog-segment and vindex gauges refresh on every `STATS` call
250/// (cheap arithmetic, no allocation). The compaction gauges use
251/// `incr`/`decr` so the count is accurate between polls.
252#[repr(usize)]
253#[derive(Copy, Clone, Debug, Eq, PartialEq)]
254pub enum Gauge {
255 VlogSegmentsLive = 0,
256 VlogSegmentsCompacting = 1,
257 VlogLiveBytes = 2,
258 VlogTotalBytes = 3,
259 CompactionInProgress = 4,
260 VindexSizeBytes = 5,
261 VindexVectors = 6,
262}
263
264impl Gauge {
265 pub const COUNT: usize = 7;
266 pub const ALL: [Gauge; Self::COUNT] = [
267 Gauge::VlogSegmentsLive,
268 Gauge::VlogSegmentsCompacting,
269 Gauge::VlogLiveBytes,
270 Gauge::VlogTotalBytes,
271 Gauge::CompactionInProgress,
272 Gauge::VindexSizeBytes,
273 Gauge::VindexVectors,
274 ];
275
276 #[inline]
277 pub const fn name(self) -> &'static str {
278 match self {
279 Gauge::VlogSegmentsLive => "skeg_vlog_segments_live",
280 Gauge::VlogSegmentsCompacting => "skeg_vlog_segments_compacting",
281 Gauge::VlogLiveBytes => "skeg_vlog_live_bytes",
282 Gauge::VlogTotalBytes => "skeg_vlog_total_bytes",
283 Gauge::CompactionInProgress => "skeg_compaction_in_progress",
284 Gauge::VindexSizeBytes => "skeg_vindex_size_bytes",
285 Gauge::VindexVectors => "skeg_vindex_vectors",
286 }
287 }
288}
289
290/// Compile-time check: when neither feature is enabled the entire module
291/// is dead code from the caller's perspective, but the enum + signatures
292/// still compile so the consumer code does not need `#[cfg]` gates.
293#[allow(dead_code)]
294const _ASSERT_ZERO_COST: () = {
295 // If `record_op` ever stops being inline + side-effect-free with no
296 // features, this const will fail to evaluate due to the `let _` sinks
297 // being optimised out.
298};