Skip to main content

proc_daemon/
metrics.rs

1//! Metrics collection and monitoring for proc-daemon.
2//!
3//! This module provides optional metrics collection capabilities for monitoring
4//! daemon performance, subsystem health, and resource usage.
5
6use crate::pool::StringPool;
7use parking_lot::RwLock;
8use std::collections::{HashMap, VecDeque};
9use std::fmt::Write as _;
10use std::sync::atomic::{AtomicU64, Ordering};
11use std::sync::Arc;
12use std::time::{Duration, Instant};
13
14// Metrics error handling
15
16/// Metrics collector for daemon monitoring.
17#[derive(Debug, Clone)]
18pub struct MetricsCollector {
19    inner: Arc<MetricsInner>,
20    /// String pool for metric names to avoid allocations on hot paths
21    string_pool: Arc<StringPool>,
22}
23
24impl MetricsSnapshot {
25    /// Render metrics in Prometheus exposition format (text/plain; version=0.0.4)
26    #[must_use]
27    pub fn render_prometheus(&self) -> String {
28        let mut out = String::with_capacity(1024);
29        // Uptime
30        out.push_str("# HELP proc_uptime_seconds Daemon uptime in seconds\n");
31        out.push_str("# TYPE proc_uptime_seconds gauge\n");
32        let _ = writeln!(out, "proc_uptime_seconds {}", self.uptime.as_secs_f64());
33
34        // Gauges
35        for (k, v) in &self.gauges {
36            out.push('#');
37            out.push_str(" TYPE ");
38            out.push_str(k);
39            out.push_str(" gauge\n");
40            out.push_str(k);
41            out.push(' ');
42            out.push_str(&v.to_string());
43            out.push('\n');
44        }
45
46        // Counters
47        for (k, v) in &self.counters {
48            out.push('#');
49            out.push_str(" TYPE ");
50            out.push_str(k);
51            out.push_str(" counter\n");
52            out.push_str(k);
53            out.push(' ');
54            out.push_str(&v.to_string());
55            out.push('\n');
56        }
57
58        // Histograms (expose count and sum of seconds)
59        for (k, durations) in &self.histograms {
60            let count = durations.len() as u64;
61            let sum: f64 = durations.iter().map(std::time::Duration::as_secs_f64).sum();
62            let count_name = format!("{k}_count");
63            let sum_name = format!("{k}_sum");
64
65            out.push('#');
66            out.push_str(" TYPE ");
67            out.push_str(&count_name);
68            out.push_str(" counter\n");
69            out.push_str(&count_name);
70            out.push(' ');
71            out.push_str(&count.to_string());
72            out.push('\n');
73
74            out.push('#');
75            out.push_str(" TYPE ");
76            out.push_str(&sum_name);
77            out.push_str(" counter\n");
78            out.push_str(&sum_name);
79            out.push(' ');
80            out.push_str(&sum.to_string());
81            out.push('\n');
82        }
83
84        out
85    }
86}
87
88#[derive(Debug)]
89struct MetricsInner {
90    counters: RwLock<HashMap<String, AtomicU64>>,
91    gauges: RwLock<HashMap<String, AtomicU64>>,
92    histograms: RwLock<HashMap<String, VecDeque<Duration>>>,
93    max_histogram_samples: usize,
94    start_time: Instant,
95}
96
97impl MetricsCollector {
98    const DEFAULT_MAX_HISTOGRAM_SAMPLES: usize = 2048;
99
100    /// Create a new metrics collector.
101    #[must_use]
102    pub fn new() -> Self {
103        Self {
104            inner: Arc::new(MetricsInner {
105                counters: RwLock::new(HashMap::new()),
106                gauges: RwLock::new(HashMap::new()),
107                histograms: RwLock::new(HashMap::new()),
108                max_histogram_samples: Self::DEFAULT_MAX_HISTOGRAM_SAMPLES,
109                start_time: Instant::now(),
110            }),
111            // Create a string pool optimized for metric names (typically short strings)
112            string_pool: Arc::new(StringPool::new(50, 200, 64)),
113        }
114    }
115
116    /// Increment a counter by the given value.
117    pub fn increment_counter(&self, name: &str, value: u64) {
118        // Fast path: try read lock first
119        let counters = self.inner.counters.read();
120        if let Some(counter) = counters.get(name) {
121            counter.fetch_add(value, Ordering::Release);
122            return;
123        }
124        drop(counters);
125
126        // Slow path: need to create metric (happens once per unique metric)
127        let pooled_name = self.string_pool.get_with_value(name);
128        let mut counters = self.inner.counters.write();
129        counters
130            .entry(pooled_name.to_string())
131            .or_insert_with(|| AtomicU64::new(0))
132            .fetch_add(value, Ordering::Release);
133    }
134
135    /// Set a gauge to the given value.
136    pub fn set_gauge(&self, name: &str, value: u64) {
137        // Fast path: try read lock first
138        let gauges = self.inner.gauges.read();
139        if let Some(gauge) = gauges.get(name) {
140            gauge.store(value, Ordering::Relaxed);
141            return;
142        }
143        drop(gauges);
144
145        // Slow path: need to create metric (happens once per unique metric)
146        let pooled_name = self.string_pool.get_with_value(name);
147        let mut gauges = self.inner.gauges.write();
148        gauges
149            .entry(pooled_name.to_string())
150            .or_insert_with(|| AtomicU64::new(0))
151            .store(value, Ordering::Relaxed);
152    }
153
154    /// Record a histogram value.
155    pub fn record_histogram(&self, name: &str, duration: Duration) {
156        // Use string pool to avoid allocation
157        let pooled_name = self.string_pool.get_with_value(name);
158        let mut histograms = self.inner.histograms.write();
159        let max_samples = self.inner.max_histogram_samples;
160        let entry = histograms
161            .entry(pooled_name.to_string())
162            .or_insert_with(|| VecDeque::with_capacity(64));
163
164        if entry.len() >= max_samples {
165            let _ = entry.pop_front();
166        }
167        entry.push_back(duration);
168        drop(histograms);
169    }
170
171    /// Get current metric values.
172    #[must_use]
173    pub fn get_metrics(&self) -> MetricsSnapshot {
174        let counters: HashMap<String, u64> = self
175            .inner
176            .counters
177            .read()
178            .iter()
179            .map(|(k, v)| (k.clone(), v.load(Ordering::Relaxed)))
180            .collect();
181
182        let gauges: HashMap<String, u64> = self
183            .inner
184            .gauges
185            .read()
186            .iter()
187            .map(|(k, v)| (k.clone(), v.load(Ordering::Relaxed)))
188            .collect();
189
190        let histograms: HashMap<String, Vec<Duration>> = self
191            .inner
192            .histograms
193            .read()
194            .iter()
195            .map(|(k, v)| (k.clone(), v.iter().copied().collect()))
196            .collect();
197
198        MetricsSnapshot {
199            uptime: self.inner.start_time.elapsed(),
200            counters,
201            gauges,
202            histograms,
203            timestamp: Instant::now(),
204        }
205    }
206
207    /// Reset all metrics.
208    pub fn reset(&self) {
209        self.inner.counters.write().clear();
210        self.inner.gauges.write().clear();
211        self.inner.histograms.write().clear();
212    }
213}
214
215impl Default for MetricsCollector {
216    fn default() -> Self {
217        Self::new()
218    }
219}
220
221/// Snapshot of current metrics.
222#[derive(Debug, Clone)]
223pub struct MetricsSnapshot {
224    /// Daemon uptime
225    pub uptime: Duration,
226    /// Counter metrics
227    pub counters: HashMap<String, u64>,
228    /// Gauge metrics
229    pub gauges: HashMap<String, u64>,
230    /// Histogram metrics
231    pub histograms: HashMap<String, Vec<Duration>>,
232    /// Timestamp when snapshot was taken
233    pub timestamp: Instant,
234}
235
236/// Timer for measuring operation duration.
237#[derive(Debug)]
238pub struct Timer {
239    collector: MetricsCollector,
240    name: Arc<str>, // Use Arc<str> instead of String to avoid clone during drop
241    start: Instant,
242}
243
244impl Timer {
245    /// Create a new timer for the given metric.
246    #[must_use]
247    pub fn new(collector: MetricsCollector, name: impl AsRef<str>) -> Self {
248        // Create an Arc<str> directly from the input name
249        // This avoids holding a reference to the collector's string pool
250        let name_arc: Arc<str> = Arc::from(name.as_ref());
251
252        Self {
253            collector,
254            name: name_arc,
255            start: Instant::now(),
256        }
257    }
258
259    /// Stop the timer and record the duration.
260    pub fn stop(self) {
261        let duration = self.start.elapsed();
262        self.collector
263            .record_histogram(self.name.as_ref(), duration);
264    }
265}
266
267impl Drop for Timer {
268    fn drop(&mut self) {
269        let duration = self.start.elapsed();
270        // Use the Arc<str> directly to avoid allocation
271        self.collector
272            .record_histogram(self.name.as_ref(), duration);
273    }
274}
275
276/// Macro for timing code blocks.
277#[macro_export]
278macro_rules! time_block {
279    ($collector:expr, $metric:expr, $block:block) => {{
280        // Pass metric directly to avoid to_string() allocation
281        let _timer = $crate::metrics::Timer::new($collector.clone(), $metric);
282        $block
283    }};
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289    use std::time::Duration;
290
291    #[test]
292    fn test_metrics_collector() {
293        let collector = MetricsCollector::new();
294
295        // Test counter
296        collector.increment_counter("test_counter", 5);
297        collector.increment_counter("test_counter", 3);
298
299        // Test gauge
300        collector.set_gauge("test_gauge", 42);
301
302        // Test histogram
303        collector.record_histogram("test_histogram", Duration::from_millis(100));
304        collector.record_histogram("test_histogram", Duration::from_millis(200));
305
306        let snapshot = collector.get_metrics();
307
308        assert_eq!(snapshot.counters.get("test_counter"), Some(&8));
309        assert_eq!(snapshot.gauges.get("test_gauge"), Some(&42));
310        assert_eq!(snapshot.histograms.get("test_histogram").unwrap().len(), 2);
311    }
312
313    #[test]
314    fn test_timer() {
315        let collector = MetricsCollector::new();
316
317        {
318            let _timer = Timer::new(collector.clone(), "test_timer");
319            std::thread::sleep(Duration::from_millis(10));
320        }
321
322        let snapshot = collector.get_metrics();
323        let durations = snapshot.histograms.get("test_timer").unwrap();
324        assert_eq!(durations.len(), 1);
325        assert!(durations[0] >= Duration::from_millis(10));
326    }
327}