proc_daemon/
metrics.rs

1//! Metrics collection and monitoring for proc-daemon.
2//!
3//! This module provides optional metrics collection capabilities for monitoring
4//! daemon performance, subsystem health, and resource usage.
5
6use crate::pool::StringPool;
7use parking_lot::RwLock;
8use std::collections::HashMap;
9use std::fmt::Write as _;
10use std::sync::atomic::{AtomicU64, Ordering};
11use std::sync::Arc;
12use std::time::{Duration, Instant};
13
14// Metrics error handling
15
16/// Metrics collector for daemon monitoring.
17#[derive(Debug, Clone)]
18pub struct MetricsCollector {
19    inner: Arc<MetricsInner>,
20    /// String pool for metric names to avoid allocations on hot paths
21    string_pool: Arc<StringPool>,
22}
23
24impl MetricsSnapshot {
25    /// Render metrics in Prometheus exposition format (text/plain; version=0.0.4)
26    #[must_use]
27    pub fn render_prometheus(&self) -> String {
28        let mut out = String::with_capacity(1024);
29        // Uptime
30        out.push_str("# HELP proc_uptime_seconds Daemon uptime in seconds\n");
31        out.push_str("# TYPE proc_uptime_seconds gauge\n");
32        let _ = writeln!(out, "proc_uptime_seconds {}", self.uptime.as_secs_f64());
33
34        // Gauges
35        for (k, v) in &self.gauges {
36            out.push('#');
37            out.push_str(" TYPE ");
38            out.push_str(k);
39            out.push_str(" gauge\n");
40            out.push_str(k);
41            out.push(' ');
42            out.push_str(&v.to_string());
43            out.push('\n');
44        }
45
46        // Counters
47        for (k, v) in &self.counters {
48            out.push('#');
49            out.push_str(" TYPE ");
50            out.push_str(k);
51            out.push_str(" counter\n");
52            out.push_str(k);
53            out.push(' ');
54            out.push_str(&v.to_string());
55            out.push('\n');
56        }
57
58        // Histograms (expose count and sum of seconds)
59        for (k, durations) in &self.histograms {
60            let count = durations.len() as u64;
61            let sum: f64 = durations.iter().map(std::time::Duration::as_secs_f64).sum();
62            let count_name = format!("{k}_count");
63            let sum_name = format!("{k}_sum");
64
65            out.push('#');
66            out.push_str(" TYPE ");
67            out.push_str(&count_name);
68            out.push_str(" counter\n");
69            out.push_str(&count_name);
70            out.push(' ');
71            out.push_str(&count.to_string());
72            out.push('\n');
73
74            out.push('#');
75            out.push_str(" TYPE ");
76            out.push_str(&sum_name);
77            out.push_str(" counter\n");
78            out.push_str(&sum_name);
79            out.push(' ');
80            out.push_str(&sum.to_string());
81            out.push('\n');
82        }
83
84        out
85    }
86}
87
88#[derive(Debug)]
89struct MetricsInner {
90    counters: RwLock<HashMap<String, AtomicU64>>,
91    gauges: RwLock<HashMap<String, AtomicU64>>,
92    histograms: RwLock<HashMap<String, Vec<Duration>>>,
93    start_time: Instant,
94}
95
96impl MetricsCollector {
97    /// Create a new metrics collector.
98    #[must_use]
99    pub fn new() -> Self {
100        Self {
101            inner: Arc::new(MetricsInner {
102                counters: RwLock::new(HashMap::new()),
103                gauges: RwLock::new(HashMap::new()),
104                histograms: RwLock::new(HashMap::new()),
105                start_time: Instant::now(),
106            }),
107            // Create a string pool optimized for metric names (typically short strings)
108            string_pool: Arc::new(StringPool::new(50, 200, 64)),
109        }
110    }
111
112    /// Increment a counter by the given value.
113    pub fn increment_counter(&self, name: &str, value: u64) {
114        let counters = self.inner.counters.read();
115        if let Some(counter) = counters.get(name) {
116            counter.fetch_add(value, Ordering::AcqRel);
117        } else {
118            drop(counters);
119            // Use string pool to avoid allocation
120            let pooled_name = self.string_pool.get_with_value(name);
121            let mut counters = self.inner.counters.write();
122            counters
123                .entry(pooled_name.to_string())
124                .or_insert_with(|| AtomicU64::new(0))
125                .fetch_add(value, Ordering::AcqRel);
126        }
127    }
128
129    /// Set a gauge to the given value.
130    pub fn set_gauge(&self, name: &str, value: u64) {
131        let gauges = self.inner.gauges.read();
132        if let Some(gauge) = gauges.get(name) {
133            gauge.store(value, Ordering::Release);
134        } else {
135            drop(gauges);
136            // Use string pool to avoid allocation
137            let pooled_name = self.string_pool.get_with_value(name);
138            let mut gauges = self.inner.gauges.write();
139            gauges
140                .entry(pooled_name.to_string())
141                .or_insert_with(|| AtomicU64::new(0))
142                .store(value, Ordering::Release);
143        }
144    }
145
146    /// Record a histogram value.
147    pub fn record_histogram(&self, name: &str, duration: Duration) {
148        // Use string pool to avoid allocation
149        let pooled_name = self.string_pool.get_with_value(name);
150        let mut histograms = self.inner.histograms.write();
151        histograms
152            .entry(pooled_name.to_string())
153            .or_insert_with(|| Vec::with_capacity(64)) // Pre-allocate vector to avoid frequent reallocations
154            .push(duration);
155    }
156
157    /// Get current metric values.
158    #[must_use]
159    pub fn get_metrics(&self) -> MetricsSnapshot {
160        let counters: HashMap<String, u64> = self
161            .inner
162            .counters
163            .read()
164            .iter()
165            .map(|(k, v)| (k.clone(), v.load(Ordering::Acquire)))
166            .collect();
167
168        let gauges: HashMap<String, u64> = self
169            .inner
170            .gauges
171            .read()
172            .iter()
173            .map(|(k, v)| (k.clone(), v.load(Ordering::Acquire)))
174            .collect();
175
176        let histograms: HashMap<String, Vec<Duration>> = self.inner.histograms.read().clone();
177
178        MetricsSnapshot {
179            uptime: self.inner.start_time.elapsed(),
180            counters,
181            gauges,
182            histograms,
183            timestamp: Instant::now(),
184        }
185    }
186
187    /// Reset all metrics.
188    pub fn reset(&self) {
189        self.inner.counters.write().clear();
190        self.inner.gauges.write().clear();
191        self.inner.histograms.write().clear();
192    }
193}
194
195impl Default for MetricsCollector {
196    fn default() -> Self {
197        Self::new()
198    }
199}
200
201/// Snapshot of current metrics.
202#[derive(Debug, Clone)]
203pub struct MetricsSnapshot {
204    /// Daemon uptime
205    pub uptime: Duration,
206    /// Counter metrics
207    pub counters: HashMap<String, u64>,
208    /// Gauge metrics
209    pub gauges: HashMap<String, u64>,
210    /// Histogram metrics
211    pub histograms: HashMap<String, Vec<Duration>>,
212    /// Timestamp when snapshot was taken
213    pub timestamp: Instant,
214}
215
216/// Timer for measuring operation duration.
217#[derive(Debug)]
218pub struct Timer {
219    collector: MetricsCollector,
220    name: Arc<str>, // Use Arc<str> instead of String to avoid clone during drop
221    start: Instant,
222}
223
224impl Timer {
225    /// Create a new timer for the given metric.
226    #[must_use]
227    pub fn new(collector: MetricsCollector, name: impl AsRef<str>) -> Self {
228        // Create an Arc<str> directly from the input name
229        // This avoids holding a reference to the collector's string pool
230        let name_arc: Arc<str> = Arc::from(name.as_ref());
231
232        Self {
233            collector,
234            name: name_arc,
235            start: Instant::now(),
236        }
237    }
238
239    /// Stop the timer and record the duration.
240    pub fn stop(self) {
241        let duration = self.start.elapsed();
242        self.collector
243            .record_histogram(self.name.as_ref(), duration);
244    }
245}
246
247impl Drop for Timer {
248    fn drop(&mut self) {
249        let duration = self.start.elapsed();
250        // Use the Arc<str> directly to avoid allocation
251        self.collector
252            .record_histogram(self.name.as_ref(), duration);
253    }
254}
255
256/// Macro for timing code blocks.
257#[macro_export]
258macro_rules! time_block {
259    ($collector:expr, $metric:expr, $block:block) => {{
260        // Pass metric directly to avoid to_string() allocation
261        let _timer = $crate::metrics::Timer::new($collector.clone(), $metric);
262        $block
263    }};
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use std::time::Duration;
270
271    #[test]
272    fn test_metrics_collector() {
273        let collector = MetricsCollector::new();
274
275        // Test counter
276        collector.increment_counter("test_counter", 5);
277        collector.increment_counter("test_counter", 3);
278
279        // Test gauge
280        collector.set_gauge("test_gauge", 42);
281
282        // Test histogram
283        collector.record_histogram("test_histogram", Duration::from_millis(100));
284        collector.record_histogram("test_histogram", Duration::from_millis(200));
285
286        let snapshot = collector.get_metrics();
287
288        assert_eq!(snapshot.counters.get("test_counter"), Some(&8));
289        assert_eq!(snapshot.gauges.get("test_gauge"), Some(&42));
290        assert_eq!(snapshot.histograms.get("test_histogram").unwrap().len(), 2);
291    }
292
293    #[test]
294    fn test_timer() {
295        let collector = MetricsCollector::new();
296
297        {
298            let _timer = Timer::new(collector.clone(), "test_timer");
299            std::thread::sleep(Duration::from_millis(10));
300        }
301
302        let snapshot = collector.get_metrics();
303        let durations = snapshot.histograms.get("test_timer").unwrap();
304        assert_eq!(durations.len(), 1);
305        assert!(durations[0] >= Duration::from_millis(10));
306    }
307}