Skip to main content

musefs_core/
telemetry.rs

1//! Runtime telemetry surface: plain-data snapshot types and Prometheus
2//! exposition-format rendering for the `.musefs-metrics/metrics` virtual file
3//! (#394). All rendering lives here (most of the data is core-owned and this is
4//! unit-testable without a mount); `musefs-fuse` gathers the fuse-side half and
5//! the optional allocator/syscall probes, then calls [`render_prometheus`].
6
7use std::fmt::Write;
8
9/// Core-owned telemetry: the file-handle slab count, header/size caches, the
10/// virtual-tree footprint, and refresh health. Produced by `Musefs::telemetry`.
11#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
12pub struct CoreTelemetry {
13    pub handles_open: u64,
14    pub cache_header_entries: u64,
15    pub cache_header_bytes: u64,
16    pub cache_header_bytes_max: u64,
17    pub cache_header_hits: u64,
18    pub cache_header_misses: u64,
19    pub cache_size_entries: u64,
20    pub readahead_budget_bytes: u64,
21    pub readahead_charged_bytes: u64,
22    pub tree_nodes: u64,
23    pub inode_paths: u64,
24    pub refresh_generation: u64,
25    pub refresh_gap_fallbacks: u64,
26    pub refresh_needs_rebuild: bool,
27}
28
29/// Passthrough sub-telemetry; `None` (in [`FuseTelemetry`]) off Linux.
30#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
31pub struct PassthroughTelemetry {
32    pub disabled: bool,
33    pub active: u64,
34}
35
36/// Fuse-owned telemetry: uptime, the read/dir-handle gates and their caps, the
37/// worker pool, and (Linux only) passthrough state.
38#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
39pub struct FuseTelemetry {
40    pub uptime_seconds: u64,
41    pub reads_inflight: u64,
42    pub reads_inflight_max: u64,
43    pub read_errors: u64,
44    pub dir_handles: u64,
45    pub dir_handles_max: u64,
46    pub pool_workers: u64,
47    pub pool_active: u64,
48    pub pool_queued: u64,
49    pub passthrough: Option<PassthroughTelemetry>,
50}
51
52/// jemalloc allocator stats (present only on a `jemalloc`-feature build).
53#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
54pub struct AllocatorStats {
55    pub allocated: u64,
56    pub resident: u64,
57    pub active: u64,
58    pub retained: u64,
59}
60
61fn gauge(out: &mut String, name: &str, help: &str, val: u64) {
62    let _ = write!(
63        out,
64        "# HELP {name} {help}\n# TYPE {name} gauge\n{name} {val}\n"
65    );
66}
67
68fn counter(out: &mut String, name: &str, help: &str, val: u64) {
69    let _ = write!(
70        out,
71        "# HELP {name} {help}\n# TYPE {name} counter\n{name} {val}\n"
72    );
73}
74
75/// Render a full Prometheus exposition-format document. Feature-gated blocks
76/// (`alloc`, `syscalls`) are omitted entirely when their `Option` is `None`.
77pub fn render_prometheus(
78    core: &CoreTelemetry,
79    fuse: &FuseTelemetry,
80    alloc: Option<&AllocatorStats>,
81    syscalls: Option<&crate::metrics::Snapshot>,
82) -> String {
83    let mut out = String::with_capacity(4096);
84
85    gauge(
86        &mut out,
87        "musefs_uptime_seconds",
88        "Seconds since the mount started.",
89        fuse.uptime_seconds,
90    );
91    gauge(
92        &mut out,
93        "musefs_handles_open",
94        "Open file handles in the core slab.",
95        core.handles_open,
96    );
97
98    gauge(
99        &mut out,
100        "musefs_reads_inflight",
101        "Foreground reads queued/in-flight.",
102        fuse.reads_inflight,
103    );
104    gauge(
105        &mut out,
106        "musefs_reads_inflight_max",
107        "Cap before reads are rejected with EAGAIN.",
108        fuse.reads_inflight_max,
109    );
110    counter(
111        &mut out,
112        "musefs_read_errors_total",
113        "Reads that failed: EAGAIN load-sheds plus error replies from the read worker.",
114        fuse.read_errors,
115    );
116    gauge(
117        &mut out,
118        "musefs_dir_handles",
119        "Open directory-listing snapshots.",
120        fuse.dir_handles,
121    );
122    gauge(
123        &mut out,
124        "musefs_dir_handles_max",
125        "Cap before opendir is rejected with ENFILE.",
126        fuse.dir_handles_max,
127    );
128
129    gauge(
130        &mut out,
131        "musefs_pool_workers",
132        "Worker-pool size.",
133        fuse.pool_workers,
134    );
135    gauge(
136        &mut out,
137        "musefs_pool_active",
138        "Workers currently running a job.",
139        fuse.pool_active,
140    );
141    gauge(
142        &mut out,
143        "musefs_pool_queued",
144        "Jobs waiting in the worker-pool queue.",
145        fuse.pool_queued,
146    );
147
148    gauge(
149        &mut out,
150        "musefs_cache_header_entries",
151        "Resolved-file entries in the header cache.",
152        core.cache_header_entries,
153    );
154    gauge(
155        &mut out,
156        "musefs_cache_header_bytes",
157        "Resident inline bytes in the header cache.",
158        core.cache_header_bytes,
159    );
160    gauge(
161        &mut out,
162        "musefs_cache_header_bytes_max",
163        "Header-cache byte budget.",
164        core.cache_header_bytes_max,
165    );
166    counter(
167        &mut out,
168        "musefs_cache_header_hits_total",
169        "Raw header-cache key hits; a hit may still trigger a content-version rebuild.",
170        core.cache_header_hits,
171    );
172    counter(
173        &mut out,
174        "musefs_cache_header_misses_total",
175        "Raw header-cache key misses.",
176        core.cache_header_misses,
177    );
178    gauge(
179        &mut out,
180        "musefs_cache_size_entries",
181        "Entries in the getattr size cache.",
182        core.cache_size_entries,
183    );
184
185    gauge(
186        &mut out,
187        "musefs_readahead_budget_bytes",
188        "Backing read-ahead RAM budget (0 when read-ahead is off).",
189        core.readahead_budget_bytes,
190    );
191    gauge(
192        &mut out,
193        "musefs_readahead_charged_bytes",
194        "Bytes currently held across all read-ahead buffers.",
195        core.readahead_charged_bytes,
196    );
197
198    gauge(
199        &mut out,
200        "musefs_tree_nodes",
201        "Live virtual-tree inodes.",
202        core.tree_nodes,
203    );
204    gauge(
205        &mut out,
206        "musefs_inode_paths",
207        "Interned paths in the inode allocator.",
208        core.inode_paths,
209    );
210
211    gauge(
212        &mut out,
213        "musefs_refresh_generation",
214        "Refresh generation (bumped on each non-empty refresh).",
215        core.refresh_generation,
216    );
217    counter(
218        &mut out,
219        "musefs_refresh_gap_fallbacks_total",
220        "Polls that took the changelog-gap full-rebuild path.",
221        core.refresh_gap_fallbacks,
222    );
223    gauge(
224        &mut out,
225        "musefs_refresh_needs_rebuild",
226        "1 if a poisoned-lock recovery left a full rebuild pending.",
227        u64::from(core.refresh_needs_rebuild),
228    );
229
230    if let Some(pt) = fuse.passthrough {
231        gauge(
232            &mut out,
233            "musefs_passthrough_disabled",
234            "1 if kernel passthrough is sticky-disabled.",
235            u64::from(pt.disabled),
236        );
237        gauge(
238            &mut out,
239            "musefs_passthrough_active",
240            "Live kernel-passthrough backing registrations.",
241            pt.active,
242        );
243    }
244
245    if let Some(a) = alloc {
246        gauge(
247            &mut out,
248            "musefs_alloc_allocated_bytes",
249            "jemalloc bytes allocated and in use.",
250            a.allocated,
251        );
252        gauge(
253            &mut out,
254            "musefs_alloc_resident_bytes",
255            "jemalloc resident bytes (RSS proxy).",
256            a.resident,
257        );
258        gauge(
259            &mut out,
260            "musefs_alloc_active_bytes",
261            "jemalloc bytes in active pages.",
262            a.active,
263        );
264        gauge(
265            &mut out,
266            "musefs_alloc_retained_bytes",
267            "jemalloc retained (lazily-purgeable) bytes.",
268            a.retained,
269        );
270    }
271
272    if let Some(s) = syscalls {
273        counter(
274            &mut out,
275            "musefs_backing_opens_total",
276            "Serve-path backing-file opens.",
277            s.opens,
278        );
279        counter(
280            &mut out,
281            "musefs_backing_stats_total",
282            "Serve-path metadata syscalls.",
283            s.stats,
284        );
285        counter(
286            &mut out,
287            "musefs_backing_preads_total",
288            "Serve-path positioned backing reads.",
289            s.preads,
290        );
291        counter(
292            &mut out,
293            "musefs_backing_pread_bytes_total",
294            "Serve-path backing bytes attempted.",
295            s.pread_bytes,
296        );
297        counter(
298            &mut out,
299            "musefs_art_chunks_total",
300            "Art-blob chunks streamed from the DB.",
301            s.art_chunks,
302        );
303        counter(
304            &mut out,
305            "musefs_binary_tag_chunks_total",
306            "Binary-tag chunks streamed from the DB.",
307            s.binary_tag_chunks,
308        );
309        counter(
310            &mut out,
311            "musefs_scan_opens_total",
312            "Scan-path backing-file opens.",
313            s.scan_opens,
314        );
315        counter(
316            &mut out,
317            "musefs_scan_preads_total",
318            "Scan-path positioned reads.",
319            s.scan_preads,
320        );
321        counter(
322            &mut out,
323            "musefs_scan_bytes_total",
324            "Scan-path bytes read.",
325            s.scan_bytes_read,
326        );
327        counter(
328            &mut out,
329            "musefs_readahead_hits_total",
330            "Reads served wholly from a read-ahead buffer (no backing pread).",
331            s.readahead_hits,
332        );
333        counter(
334            &mut out,
335            "musefs_readahead_misses_total",
336            "Reads that missed the read-ahead buffer and hit the backing file.",
337            s.readahead_misses,
338        );
339    }
340
341    out.push('\n');
342    out
343}
344
345#[cfg(test)]
346mod tests {
347    use super::*;
348
349    fn sample_core() -> CoreTelemetry {
350        CoreTelemetry {
351            handles_open: 3,
352            cache_header_entries: 7,
353            cache_header_bytes: 4096,
354            cache_header_bytes_max: 64 * 1024 * 1024,
355            cache_header_hits: 100,
356            cache_header_misses: 5,
357            cache_size_entries: 9,
358            readahead_budget_bytes: 67_108_864,
359            readahead_charged_bytes: 8192,
360            tree_nodes: 42,
361            inode_paths: 50,
362            refresh_generation: 2,
363            refresh_gap_fallbacks: 1,
364            refresh_needs_rebuild: false,
365        }
366    }
367
368    fn sample_fuse() -> FuseTelemetry {
369        FuseTelemetry {
370            uptime_seconds: 60,
371            reads_inflight: 1,
372            reads_inflight_max: 1024,
373            read_errors: 7,
374            dir_handles: 2,
375            dir_handles_max: 1024,
376            pool_workers: 8,
377            pool_active: 1,
378            pool_queued: 0,
379            passthrough: Some(PassthroughTelemetry {
380                disabled: false,
381                active: 4,
382            }),
383        }
384    }
385
386    #[test]
387    fn renders_core_and_fuse_gauges() {
388        let out = render_prometheus(&sample_core(), &sample_fuse(), None, None);
389        assert!(out.contains("# TYPE musefs_handles_open gauge\nmusefs_handles_open 3\n"));
390        assert!(out.contains("musefs_reads_inflight 1\n"));
391        assert!(out.contains("musefs_reads_inflight_max 1024\n"));
392        assert!(
393            out.contains("# TYPE musefs_read_errors_total counter\nmusefs_read_errors_total 7\n")
394        );
395        assert!(out.contains("musefs_pool_queued 0\n"));
396        assert!(out.contains("musefs_readahead_budget_bytes 67108864\n"));
397        assert!(out.contains("musefs_readahead_charged_bytes 8192\n"));
398        assert!(out.contains("musefs_tree_nodes 42\n"));
399        // counter type for hit/miss
400        assert!(out.contains(
401            "# TYPE musefs_cache_header_hits_total counter\nmusefs_cache_header_hits_total 100\n"
402        ));
403    }
404
405    #[test]
406    fn passthrough_block_present_when_some_absent_when_none() {
407        let with = render_prometheus(&sample_core(), &sample_fuse(), None, None);
408        assert!(with.contains("musefs_passthrough_active 4\n"));
409        assert!(with.contains("musefs_passthrough_disabled 0\n"));
410
411        let mut f = sample_fuse();
412        f.passthrough = None;
413        let without = render_prometheus(&sample_core(), &f, None, None);
414        assert!(!without.contains("musefs_passthrough"));
415    }
416
417    #[test]
418    fn alloc_and_syscall_blocks_are_omitted_when_none() {
419        let out = render_prometheus(&sample_core(), &sample_fuse(), None, None);
420        assert!(!out.contains("musefs_alloc_"));
421        assert!(!out.contains("musefs_backing_"));
422    }
423
424    #[test]
425    fn alloc_block_present_when_some() {
426        let a = AllocatorStats {
427            allocated: 1,
428            resident: 2,
429            active: 3,
430            retained: 4,
431        };
432        let out = render_prometheus(&sample_core(), &sample_fuse(), Some(&a), None);
433        assert!(out.contains("musefs_alloc_resident_bytes 2\n"));
434        assert!(out.contains("musefs_alloc_retained_bytes 4\n"));
435    }
436
437    #[test]
438    fn syscall_block_present_when_some() {
439        let s = crate::metrics::Snapshot {
440            opens: 11,
441            preads: 22,
442            readahead_hits: 33,
443            readahead_misses: 44,
444            ..crate::metrics::Snapshot::default()
445        };
446        let out = render_prometheus(&sample_core(), &sample_fuse(), None, Some(&s));
447        assert!(out.contains(
448            "# TYPE musefs_backing_opens_total counter\nmusefs_backing_opens_total 11\n"
449        ));
450        assert!(out.contains("musefs_backing_preads_total 22\n"));
451        assert!(out.contains(
452            "# TYPE musefs_readahead_hits_total counter\nmusefs_readahead_hits_total 33\n"
453        ));
454        assert!(out.contains("musefs_readahead_misses_total 44\n"));
455    }
456
457    #[test]
458    fn refresh_needs_rebuild_true_renders_as_one() {
459        let mut c = sample_core();
460        c.refresh_needs_rebuild = true;
461        let out = render_prometheus(&c, &sample_fuse(), None, None);
462        assert!(out.contains("musefs_refresh_needs_rebuild 1\n"));
463    }
464}