Skip to main content

heliosdb_proxy/plugins/
metrics.rs

1//! Plugin Metrics
2//!
3//! Metrics collection and reporting for the plugin system.
4
5use std::collections::HashMap;
6use std::sync::atomic::{AtomicU64, Ordering};
7use std::time::{Duration, Instant};
8
9use parking_lot::RwLock;
10
11use super::HookType;
12
13/// Plugin metrics collector
14pub struct PluginMetrics {
15    /// Per-plugin statistics
16    plugin_stats: RwLock<HashMap<String, PluginStatsInner>>,
17
18    /// Global counters
19    global: GlobalMetrics,
20
21    /// Hook latency histograms
22    hook_latencies: RwLock<HashMap<HookType, LatencyHistogram>>,
23
24    /// Creation time
25    created_at: Instant,
26}
27
28impl PluginMetrics {
29    /// Create a new metrics collector
30    pub fn new() -> Self {
31        Self {
32            plugin_stats: RwLock::new(HashMap::new()),
33            global: GlobalMetrics::new(),
34            hook_latencies: RwLock::new(HashMap::new()),
35            created_at: Instant::now(),
36        }
37    }
38
39    /// Record a hook call
40    pub fn record_hook_call(
41        &self,
42        plugin_name: &str,
43        hook: HookType,
44        latency: Duration,
45        success: bool,
46    ) {
47        // Update global counters
48        self.global.total_calls.fetch_add(1, Ordering::Relaxed);
49        if !success {
50            self.global.total_errors.fetch_add(1, Ordering::Relaxed);
51        }
52
53        // Update plugin-specific stats
54        {
55            let mut stats = self.plugin_stats.write();
56            let entry = stats
57                .entry(plugin_name.to_string())
58                .or_insert_with(PluginStatsInner::new);
59
60            entry.total_calls += 1;
61            if success {
62                entry.successful_calls += 1;
63            } else {
64                entry.failed_calls += 1;
65            }
66            entry.total_latency += latency;
67
68            if latency > entry.max_latency {
69                entry.max_latency = latency;
70            }
71            if entry.min_latency == Duration::ZERO || latency < entry.min_latency {
72                entry.min_latency = latency;
73            }
74
75            // Update per-hook stats
76            let hook_entry = entry
77                .hook_stats
78                .entry(hook)
79                .or_insert_with(HookStatsInner::new);
80            hook_entry.calls += 1;
81            hook_entry.latency += latency;
82            if !success {
83                hook_entry.errors += 1;
84            }
85        }
86
87        // Update hook latency histogram
88        {
89            let mut histograms = self.hook_latencies.write();
90            let histogram = histograms
91                .entry(hook)
92                .or_insert_with(LatencyHistogram::new);
93            histogram.record(latency);
94        }
95    }
96
97    /// Record a plugin load
98    pub fn record_plugin_load(&self, plugin_name: &str) {
99        self.global.plugins_loaded.fetch_add(1, Ordering::Relaxed);
100
101        let mut stats = self.plugin_stats.write();
102        let entry = stats
103            .entry(plugin_name.to_string())
104            .or_insert_with(PluginStatsInner::new);
105        entry.loaded_at = Some(Instant::now());
106    }
107
108    /// Record a plugin unload
109    pub fn record_plugin_unload(&self, plugin_name: &str) {
110        self.global.plugins_unloaded.fetch_add(1, Ordering::Relaxed);
111
112        let mut stats = self.plugin_stats.write();
113        if let Some(entry) = stats.get_mut(plugin_name) {
114            entry.unloaded_at = Some(Instant::now());
115        }
116    }
117
118    /// Record a plugin error
119    pub fn record_plugin_error(&self, plugin_name: &str, _error: &str) {
120        self.global.total_errors.fetch_add(1, Ordering::Relaxed);
121
122        let mut stats = self.plugin_stats.write();
123        let entry = stats
124            .entry(plugin_name.to_string())
125            .or_insert_with(PluginStatsInner::new);
126        entry.error_count += 1;
127    }
128
129    /// Get plugin statistics
130    pub fn get_plugin_stats(&self, plugin_name: &str) -> PluginStats {
131        let stats = self.plugin_stats.read();
132        stats
133            .get(plugin_name)
134            .map(|s| s.to_public())
135            .unwrap_or_default()
136    }
137
138    /// Get all plugin statistics
139    pub fn get_all_stats(&self) -> HashMap<String, PluginStats> {
140        let stats = self.plugin_stats.read();
141        stats
142            .iter()
143            .map(|(name, s)| (name.clone(), s.to_public()))
144            .collect()
145    }
146
147    /// Get total calls
148    pub fn total_calls(&self) -> u64 {
149        self.global.total_calls.load(Ordering::Relaxed)
150    }
151
152    /// Get total errors
153    pub fn total_errors(&self) -> u64 {
154        self.global.total_errors.load(Ordering::Relaxed)
155    }
156
157    /// Get average latency across all plugins
158    pub fn avg_latency(&self) -> Duration {
159        let stats = self.plugin_stats.read();
160        let mut total_latency = Duration::ZERO;
161        let mut total_calls = 0u64;
162
163        for s in stats.values() {
164            total_latency += s.total_latency;
165            total_calls += s.total_calls;
166        }
167
168        if total_calls == 0 {
169            Duration::ZERO
170        } else {
171            total_latency / total_calls as u32
172        }
173    }
174
175    /// Get hook latency
176    pub fn get_hook_latency(&self, hook: HookType) -> HookLatency {
177        let histograms = self.hook_latencies.read();
178        histograms
179            .get(&hook)
180            .map(|h| h.to_latency())
181            .unwrap_or_default()
182    }
183
184    /// Get uptime
185    pub fn uptime(&self) -> Duration {
186        self.created_at.elapsed()
187    }
188
189    /// Reset all metrics
190    pub fn reset(&self) {
191        self.global.total_calls.store(0, Ordering::Relaxed);
192        self.global.total_errors.store(0, Ordering::Relaxed);
193        self.plugin_stats.write().clear();
194        self.hook_latencies.write().clear();
195    }
196}
197
198impl Default for PluginMetrics {
199    fn default() -> Self {
200        Self::new()
201    }
202}
203
204/// Global metrics
205struct GlobalMetrics {
206    total_calls: AtomicU64,
207    total_errors: AtomicU64,
208    plugins_loaded: AtomicU64,
209    plugins_unloaded: AtomicU64,
210}
211
212impl GlobalMetrics {
213    fn new() -> Self {
214        Self {
215            total_calls: AtomicU64::new(0),
216            total_errors: AtomicU64::new(0),
217            plugins_loaded: AtomicU64::new(0),
218            plugins_unloaded: AtomicU64::new(0),
219        }
220    }
221}
222
223/// Internal plugin statistics
224struct PluginStatsInner {
225    total_calls: u64,
226    successful_calls: u64,
227    failed_calls: u64,
228    error_count: u64,
229    total_latency: Duration,
230    min_latency: Duration,
231    max_latency: Duration,
232    hook_stats: HashMap<HookType, HookStatsInner>,
233    loaded_at: Option<Instant>,
234    unloaded_at: Option<Instant>,
235}
236
237impl PluginStatsInner {
238    fn new() -> Self {
239        Self {
240            total_calls: 0,
241            successful_calls: 0,
242            failed_calls: 0,
243            error_count: 0,
244            total_latency: Duration::ZERO,
245            min_latency: Duration::ZERO,
246            max_latency: Duration::ZERO,
247            hook_stats: HashMap::new(),
248            loaded_at: None,
249            unloaded_at: None,
250        }
251    }
252
253    fn to_public(&self) -> PluginStats {
254        PluginStats {
255            total_calls: self.total_calls,
256            successful_calls: self.successful_calls,
257            failed_calls: self.failed_calls,
258            error_count: self.error_count,
259            avg_latency: if self.total_calls > 0 {
260                self.total_latency / self.total_calls as u32
261            } else {
262                Duration::ZERO
263            },
264            min_latency: self.min_latency,
265            max_latency: self.max_latency,
266            uptime: self.loaded_at.map(|t| t.elapsed()),
267        }
268    }
269}
270
271/// Internal hook statistics
272struct HookStatsInner {
273    calls: u64,
274    errors: u64,
275    latency: Duration,
276}
277
278impl HookStatsInner {
279    fn new() -> Self {
280        Self {
281            calls: 0,
282            errors: 0,
283            latency: Duration::ZERO,
284        }
285    }
286}
287
288/// Public plugin statistics
289#[derive(Debug, Clone, Default)]
290pub struct PluginStats {
291    /// Total calls
292    pub total_calls: u64,
293
294    /// Successful calls
295    pub successful_calls: u64,
296
297    /// Failed calls
298    pub failed_calls: u64,
299
300    /// Error count
301    pub error_count: u64,
302
303    /// Average latency
304    pub avg_latency: Duration,
305
306    /// Minimum latency
307    pub min_latency: Duration,
308
309    /// Maximum latency
310    pub max_latency: Duration,
311
312    /// Plugin uptime
313    pub uptime: Option<Duration>,
314}
315
316impl PluginStats {
317    /// Get success rate
318    pub fn success_rate(&self) -> f64 {
319        if self.total_calls == 0 {
320            1.0
321        } else {
322            self.successful_calls as f64 / self.total_calls as f64
323        }
324    }
325}
326
327/// Hook latency statistics
328#[derive(Debug, Clone, Default)]
329pub struct HookLatency {
330    /// Total calls
331    pub count: u64,
332
333    /// Average latency
334    pub avg: Duration,
335
336    /// P50 latency
337    pub p50: Duration,
338
339    /// P90 latency
340    pub p90: Duration,
341
342    /// P99 latency
343    pub p99: Duration,
344
345    /// Maximum latency
346    pub max: Duration,
347}
348
349/// Latency histogram
350struct LatencyHistogram {
351    /// Recorded latencies (sorted)
352    latencies: Vec<Duration>,
353
354    /// Maximum latency
355    max: Duration,
356
357    /// Sum for average
358    sum: Duration,
359}
360
361impl LatencyHistogram {
362    fn new() -> Self {
363        Self {
364            latencies: Vec::new(),
365            max: Duration::ZERO,
366            sum: Duration::ZERO,
367        }
368    }
369
370    fn record(&mut self, latency: Duration) {
371        self.latencies.push(latency);
372        self.sum += latency;
373        if latency > self.max {
374            self.max = latency;
375        }
376
377        // Keep sorted for percentile calculations
378        // In production, would use a more efficient data structure
379        self.latencies.sort();
380
381        // Limit size to prevent memory growth
382        if self.latencies.len() > 10000 {
383            self.latencies.drain(0..5000);
384        }
385    }
386
387    fn percentile(&self, p: f64) -> Duration {
388        if self.latencies.is_empty() {
389            return Duration::ZERO;
390        }
391        let idx = ((self.latencies.len() as f64) * p / 100.0) as usize;
392        let idx = idx.min(self.latencies.len() - 1);
393        self.latencies[idx]
394    }
395
396    fn to_latency(&self) -> HookLatency {
397        HookLatency {
398            count: self.latencies.len() as u64,
399            avg: if self.latencies.is_empty() {
400                Duration::ZERO
401            } else {
402                self.sum / self.latencies.len() as u32
403            },
404            p50: self.percentile(50.0),
405            p90: self.percentile(90.0),
406            p99: self.percentile(99.0),
407            max: self.max,
408        }
409    }
410}
411
412/// Metrics exporter for Prometheus format
413pub struct MetricsExporter {
414    metrics: std::sync::Arc<PluginMetrics>,
415    prefix: String,
416}
417
418impl MetricsExporter {
419    /// Create a new exporter
420    pub fn new(metrics: std::sync::Arc<PluginMetrics>, prefix: &str) -> Self {
421        Self {
422            metrics,
423            prefix: prefix.to_string(),
424        }
425    }
426
427    /// Export metrics in Prometheus format
428    pub fn export(&self) -> String {
429        let mut output = String::new();
430
431        // Global metrics
432        output.push_str(&format!(
433            "# HELP {}_total_calls Total hook calls\n",
434            self.prefix
435        ));
436        output.push_str(&format!(
437            "# TYPE {}_total_calls counter\n",
438            self.prefix
439        ));
440        output.push_str(&format!(
441            "{}_total_calls {}\n",
442            self.prefix,
443            self.metrics.total_calls()
444        ));
445
446        output.push_str(&format!(
447            "# HELP {}_total_errors Total errors\n",
448            self.prefix
449        ));
450        output.push_str(&format!(
451            "# TYPE {}_total_errors counter\n",
452            self.prefix
453        ));
454        output.push_str(&format!(
455            "{}_total_errors {}\n",
456            self.prefix,
457            self.metrics.total_errors()
458        ));
459
460        // Per-plugin metrics
461        let all_stats = self.metrics.get_all_stats();
462        for (name, stats) in all_stats {
463            let name_label = name.replace('-', "_");
464
465            output.push_str(&format!(
466                "{}_plugin_calls{{plugin=\"{}\"}} {}\n",
467                self.prefix, name_label, stats.total_calls
468            ));
469
470            output.push_str(&format!(
471                "{}_plugin_errors{{plugin=\"{}\"}} {}\n",
472                self.prefix, name_label, stats.error_count
473            ));
474
475            output.push_str(&format!(
476                "{}_plugin_latency_avg_us{{plugin=\"{}\"}} {}\n",
477                self.prefix,
478                name_label,
479                stats.avg_latency.as_micros()
480            ));
481        }
482
483        output
484    }
485}
486
487#[cfg(test)]
488mod tests {
489    use super::*;
490
491    #[test]
492    fn test_plugin_metrics_new() {
493        let metrics = PluginMetrics::new();
494        assert_eq!(metrics.total_calls(), 0);
495        assert_eq!(metrics.total_errors(), 0);
496    }
497
498    #[test]
499    fn test_record_hook_call() {
500        let metrics = PluginMetrics::new();
501
502        metrics.record_hook_call(
503            "test-plugin",
504            HookType::PreQuery,
505            Duration::from_micros(50),
506            true,
507        );
508
509        assert_eq!(metrics.total_calls(), 1);
510        assert_eq!(metrics.total_errors(), 0);
511
512        let stats = metrics.get_plugin_stats("test-plugin");
513        assert_eq!(stats.total_calls, 1);
514        assert_eq!(stats.successful_calls, 1);
515    }
516
517    #[test]
518    fn test_record_hook_call_error() {
519        let metrics = PluginMetrics::new();
520
521        metrics.record_hook_call(
522            "test-plugin",
523            HookType::PreQuery,
524            Duration::from_micros(50),
525            false,
526        );
527
528        assert_eq!(metrics.total_calls(), 1);
529        assert_eq!(metrics.total_errors(), 1);
530
531        let stats = metrics.get_plugin_stats("test-plugin");
532        assert_eq!(stats.failed_calls, 1);
533    }
534
535    #[test]
536    fn test_plugin_stats_success_rate() {
537        let stats = PluginStats {
538            total_calls: 100,
539            successful_calls: 90,
540            failed_calls: 10,
541            ..Default::default()
542        };
543
544        assert!((stats.success_rate() - 0.9).abs() < 0.001);
545    }
546
547    #[test]
548    fn test_plugin_stats_default() {
549        let stats = PluginStats::default();
550        assert_eq!(stats.total_calls, 0);
551        assert_eq!(stats.success_rate(), 1.0);
552    }
553
554    #[test]
555    fn test_latency_histogram() {
556        let mut histogram = LatencyHistogram::new();
557
558        for i in 1..=100 {
559            histogram.record(Duration::from_micros(i));
560        }
561
562        let latency = histogram.to_latency();
563        assert_eq!(latency.count, 100);
564        assert!(latency.p50 >= Duration::from_micros(50));
565        assert!(latency.p99 >= Duration::from_micros(99));
566    }
567
568    #[test]
569    fn test_get_hook_latency() {
570        let metrics = PluginMetrics::new();
571
572        for i in 1..=10 {
573            metrics.record_hook_call(
574                "test",
575                HookType::PreQuery,
576                Duration::from_micros(i * 10),
577                true,
578            );
579        }
580
581        let latency = metrics.get_hook_latency(HookType::PreQuery);
582        assert_eq!(latency.count, 10);
583        assert!(latency.avg > Duration::ZERO);
584    }
585
586    #[test]
587    fn test_avg_latency() {
588        let metrics = PluginMetrics::new();
589
590        metrics.record_hook_call("p1", HookType::PreQuery, Duration::from_micros(100), true);
591        metrics.record_hook_call("p1", HookType::PreQuery, Duration::from_micros(200), true);
592
593        let avg = metrics.avg_latency();
594        assert_eq!(avg, Duration::from_micros(150));
595    }
596
597    #[test]
598    fn test_reset() {
599        let metrics = PluginMetrics::new();
600
601        metrics.record_hook_call("test", HookType::PreQuery, Duration::from_micros(50), true);
602        assert_eq!(metrics.total_calls(), 1);
603
604        metrics.reset();
605        assert_eq!(metrics.total_calls(), 0);
606    }
607
608    #[test]
609    fn test_metrics_exporter() {
610        let metrics = std::sync::Arc::new(PluginMetrics::new());
611
612        metrics.record_hook_call("test", HookType::PreQuery, Duration::from_micros(50), true);
613
614        let exporter = MetricsExporter::new(metrics, "helios_plugin");
615        let output = exporter.export();
616
617        assert!(output.contains("helios_plugin_total_calls"));
618        assert!(output.contains("helios_plugin_plugin_calls"));
619    }
620
621    #[test]
622    fn test_record_plugin_load_unload() {
623        let metrics = PluginMetrics::new();
624
625        metrics.record_plugin_load("test-plugin");
626
627        let stats = metrics.get_plugin_stats("test-plugin");
628        assert!(stats.uptime.is_some());
629
630        metrics.record_plugin_unload("test-plugin");
631    }
632}