mockforge_observability/prometheus/
metrics.rs

1//! Prometheus metrics definitions and registry
2
3use once_cell::sync::Lazy;
4use prometheus::{
5    Gauge, GaugeVec, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
6    Opts, Registry,
7};
8use std::sync::Arc;
9use tracing::debug;
10
11/// Global metrics registry for MockForge
12#[derive(Clone)]
13pub struct MetricsRegistry {
14    registry: Arc<Registry>,
15
16    // Request metrics by protocol
17    pub requests_total: IntCounterVec,
18    pub requests_duration_seconds: HistogramVec,
19    pub requests_in_flight: IntGaugeVec,
20
21    // Request metrics by path (endpoint-specific)
22    pub requests_by_path_total: IntCounterVec,
23    pub request_duration_by_path_seconds: HistogramVec,
24    pub average_latency_by_path_seconds: GaugeVec,
25
26    // Workspace-specific metrics
27    pub workspace_requests_total: IntCounterVec,
28    pub workspace_requests_duration_seconds: HistogramVec,
29    pub workspace_active_routes: IntGaugeVec,
30    pub workspace_errors_total: IntCounterVec,
31
32    // Error metrics
33    pub errors_total: IntCounterVec,
34    pub error_rate: GaugeVec,
35
36    // Plugin metrics
37    pub plugin_executions_total: IntCounterVec,
38    pub plugin_execution_duration_seconds: HistogramVec,
39    pub plugin_errors_total: IntCounterVec,
40
41    // WebSocket specific metrics
42    pub ws_connections_active: IntGauge,
43    pub ws_connections_total: IntCounter,
44    pub ws_connection_duration_seconds: HistogramVec,
45    pub ws_messages_sent: IntCounter,
46    pub ws_messages_received: IntCounter,
47    pub ws_errors_total: IntCounter,
48
49    // SMTP specific metrics
50    pub smtp_connections_active: IntGauge,
51    pub smtp_connections_total: IntCounter,
52    pub smtp_messages_received_total: IntCounter,
53    pub smtp_messages_stored_total: IntCounter,
54    pub smtp_errors_total: IntCounterVec,
55
56    // MQTT specific metrics
57    pub mqtt_connections_active: IntGauge,
58    pub mqtt_connections_total: IntCounter,
59    pub mqtt_messages_published_total: IntCounter,
60    pub mqtt_messages_received_total: IntCounter,
61    pub mqtt_topics_active: IntGauge,
62    pub mqtt_subscriptions_active: IntGauge,
63    pub mqtt_retained_messages: IntGauge,
64    pub mqtt_errors_total: IntCounterVec,
65
66    // System metrics
67    pub memory_usage_bytes: Gauge,
68    pub cpu_usage_percent: Gauge,
69    pub thread_count: Gauge,
70    pub uptime_seconds: Gauge,
71
72    // Scenario metrics (for Phase 4)
73    pub active_scenario_mode: IntGauge,
74    pub chaos_triggers_total: IntCounter,
75}
76
77impl MetricsRegistry {
78    /// Create a new metrics registry with all metrics initialized
79    pub fn new() -> Self {
80        let registry = Registry::new();
81
82        // Request metrics
83        let requests_total = IntCounterVec::new(
84            Opts::new(
85                "mockforge_requests_total",
86                "Total number of requests by protocol, method, and status",
87            ),
88            &["protocol", "method", "status"],
89        )
90        .expect("Failed to create requests_total metric");
91
92        let requests_duration_seconds = HistogramVec::new(
93            HistogramOpts::new("mockforge_request_duration_seconds", "Request duration in seconds")
94                .buckets(vec![
95                    0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
96                ]),
97            &["protocol", "method"],
98        )
99        .expect("Failed to create requests_duration_seconds metric");
100
101        let requests_in_flight = IntGaugeVec::new(
102            Opts::new(
103                "mockforge_requests_in_flight",
104                "Number of requests currently being processed",
105            ),
106            &["protocol"],
107        )
108        .expect("Failed to create requests_in_flight metric");
109
110        // Error metrics
111        let errors_total = IntCounterVec::new(
112            Opts::new(
113                "mockforge_errors_total",
114                "Total number of errors by protocol and error type",
115            ),
116            &["protocol", "error_type"],
117        )
118        .expect("Failed to create errors_total metric");
119
120        let error_rate = GaugeVec::new(
121            Opts::new("mockforge_error_rate", "Error rate by protocol (0.0 to 1.0)"),
122            &["protocol"],
123        )
124        .expect("Failed to create error_rate metric");
125
126        // Plugin metrics
127        let plugin_executions_total = IntCounterVec::new(
128            Opts::new("mockforge_plugin_executions_total", "Total number of plugin executions"),
129            &["plugin_name", "status"],
130        )
131        .expect("Failed to create plugin_executions_total metric");
132
133        let plugin_execution_duration_seconds = HistogramVec::new(
134            HistogramOpts::new(
135                "mockforge_plugin_execution_duration_seconds",
136                "Plugin execution duration in seconds",
137            )
138            .buckets(vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0]),
139            &["plugin_name"],
140        )
141        .expect("Failed to create plugin_execution_duration_seconds metric");
142
143        let plugin_errors_total = IntCounterVec::new(
144            Opts::new("mockforge_plugin_errors_total", "Total number of plugin errors"),
145            &["plugin_name", "error_type"],
146        )
147        .expect("Failed to create plugin_errors_total metric");
148
149        // WebSocket metrics
150        // Path-based request metrics
151        let requests_by_path_total = IntCounterVec::new(
152            Opts::new(
153                "mockforge_requests_by_path_total",
154                "Total number of requests by path, method, and status",
155            ),
156            &["path", "method", "status"],
157        )
158        .expect("Failed to create requests_by_path_total metric");
159
160        let request_duration_by_path_seconds = HistogramVec::new(
161            HistogramOpts::new(
162                "mockforge_request_duration_by_path_seconds",
163                "Request duration by path in seconds",
164            )
165            .buckets(vec![
166                0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
167            ]),
168            &["path", "method"],
169        )
170        .expect("Failed to create request_duration_by_path_seconds metric");
171
172        let average_latency_by_path_seconds = GaugeVec::new(
173            Opts::new(
174                "mockforge_average_latency_by_path_seconds",
175                "Average request latency by path in seconds",
176            ),
177            &["path", "method"],
178        )
179        .expect("Failed to create average_latency_by_path_seconds metric");
180
181        // Workspace-specific metrics
182        let workspace_requests_total = IntCounterVec::new(
183            Opts::new(
184                "mockforge_workspace_requests_total",
185                "Total number of requests by workspace, method, and status",
186            ),
187            &["workspace_id", "method", "status"],
188        )
189        .expect("Failed to create workspace_requests_total metric");
190
191        let workspace_requests_duration_seconds = HistogramVec::new(
192            HistogramOpts::new(
193                "mockforge_workspace_request_duration_seconds",
194                "Request duration by workspace in seconds",
195            )
196            .buckets(vec![
197                0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
198            ]),
199            &["workspace_id", "method"],
200        )
201        .expect("Failed to create workspace_requests_duration_seconds metric");
202
203        let workspace_active_routes = IntGaugeVec::new(
204            Opts::new(
205                "mockforge_workspace_active_routes",
206                "Number of active routes in each workspace",
207            ),
208            &["workspace_id"],
209        )
210        .expect("Failed to create workspace_active_routes metric");
211
212        let workspace_errors_total = IntCounterVec::new(
213            Opts::new("mockforge_workspace_errors_total", "Total number of errors by workspace"),
214            &["workspace_id", "error_type"],
215        )
216        .expect("Failed to create workspace_errors_total metric");
217
218        // WebSocket metrics
219        let ws_connections_active = IntGauge::new(
220            "mockforge_ws_connections_active",
221            "Number of active WebSocket connections",
222        )
223        .expect("Failed to create ws_connections_active metric");
224
225        let ws_connections_total = IntCounter::new(
226            "mockforge_ws_connections_total",
227            "Total number of WebSocket connections established",
228        )
229        .expect("Failed to create ws_connections_total metric");
230
231        let ws_connection_duration_seconds = HistogramVec::new(
232            HistogramOpts::new(
233                "mockforge_ws_connection_duration_seconds",
234                "WebSocket connection duration in seconds",
235            )
236            .buckets(vec![1.0, 5.0, 10.0, 30.0, 60.0, 300.0, 600.0, 1800.0, 3600.0]),
237            &["status"],
238        )
239        .expect("Failed to create ws_connection_duration_seconds metric");
240
241        let ws_messages_sent = IntCounter::new(
242            "mockforge_ws_messages_sent_total",
243            "Total number of WebSocket messages sent",
244        )
245        .expect("Failed to create ws_messages_sent metric");
246
247        let ws_messages_received = IntCounter::new(
248            "mockforge_ws_messages_received_total",
249            "Total number of WebSocket messages received",
250        )
251        .expect("Failed to create ws_messages_received metric");
252
253        let ws_errors_total =
254            IntCounter::new("mockforge_ws_errors_total", "Total number of WebSocket errors")
255                .expect("Failed to create ws_errors_total metric");
256
257        // SMTP metrics
258        let smtp_connections_active =
259            IntGauge::new("mockforge_smtp_connections_active", "Number of active SMTP connections")
260                .expect("Failed to create smtp_connections_active metric");
261
262        let smtp_connections_total =
263            IntCounter::new("mockforge_smtp_connections_total", "Total number of SMTP connections")
264                .expect("Failed to create smtp_connections_total metric");
265
266        let smtp_messages_received_total = IntCounter::new(
267            "mockforge_smtp_messages_received_total",
268            "Total number of SMTP messages received",
269        )
270        .expect("Failed to create smtp_messages_received_total metric");
271
272        let smtp_messages_stored_total = IntCounter::new(
273            "mockforge_smtp_messages_stored_total",
274            "Total number of SMTP messages stored in mailbox",
275        )
276        .expect("Failed to create smtp_messages_stored_total metric");
277
278        let smtp_errors_total = IntCounterVec::new(
279            Opts::new("mockforge_smtp_errors_total", "Total number of SMTP errors by type"),
280            &["error_type"],
281        )
282        .expect("Failed to create smtp_errors_total metric");
283
284        // MQTT metrics
285        let mqtt_connections_active = IntGauge::new(
286            "mockforge_mqtt_connections_active",
287            "Number of active MQTT client connections",
288        )
289        .expect("Failed to create mqtt_connections_active metric");
290
291        let mqtt_connections_total = IntCounter::new(
292            "mockforge_mqtt_connections_total",
293            "Total number of MQTT client connections established",
294        )
295        .expect("Failed to create mqtt_connections_total metric");
296
297        let mqtt_messages_published_total = IntCounter::new(
298            "mockforge_mqtt_messages_published_total",
299            "Total number of MQTT messages published",
300        )
301        .expect("Failed to create mqtt_messages_published_total metric");
302
303        let mqtt_messages_received_total = IntCounter::new(
304            "mockforge_mqtt_messages_received_total",
305            "Total number of MQTT messages received",
306        )
307        .expect("Failed to create mqtt_messages_received_total metric");
308
309        let mqtt_topics_active =
310            IntGauge::new("mockforge_mqtt_topics_active", "Number of active MQTT topics")
311                .expect("Failed to create mqtt_topics_active metric");
312
313        let mqtt_subscriptions_active = IntGauge::new(
314            "mockforge_mqtt_subscriptions_active",
315            "Number of active MQTT subscriptions",
316        )
317        .expect("Failed to create mqtt_subscriptions_active metric");
318
319        let mqtt_retained_messages =
320            IntGauge::new("mockforge_mqtt_retained_messages", "Number of retained MQTT messages")
321                .expect("Failed to create mqtt_retained_messages metric");
322
323        let mqtt_errors_total = IntCounterVec::new(
324            Opts::new("mockforge_mqtt_errors_total", "Total number of MQTT errors by type"),
325            &["error_type"],
326        )
327        .expect("Failed to create mqtt_errors_total metric");
328
329        // System metrics
330        let memory_usage_bytes =
331            Gauge::new("mockforge_memory_usage_bytes", "Memory usage in bytes")
332                .expect("Failed to create memory_usage_bytes metric");
333
334        let cpu_usage_percent = Gauge::new("mockforge_cpu_usage_percent", "CPU usage percentage")
335            .expect("Failed to create cpu_usage_percent metric");
336
337        let thread_count = Gauge::new("mockforge_thread_count", "Number of active threads")
338            .expect("Failed to create thread_count metric");
339
340        let uptime_seconds = Gauge::new("mockforge_uptime_seconds", "Server uptime in seconds")
341            .expect("Failed to create uptime_seconds metric");
342
343        // Scenario metrics
344        let active_scenario_mode = IntGauge::new(
345            "mockforge_active_scenario_mode",
346            "Active scenario mode (0=healthy, 1=degraded, 2=error, 3=chaos)",
347        )
348        .expect("Failed to create active_scenario_mode metric");
349
350        let chaos_triggers_total = IntCounter::new(
351            "mockforge_chaos_triggers_total",
352            "Total number of chaos mode triggers",
353        )
354        .expect("Failed to create chaos_triggers_total metric");
355
356        // Register all metrics
357        registry
358            .register(Box::new(requests_total.clone()))
359            .expect("Failed to register requests_total");
360        registry
361            .register(Box::new(requests_duration_seconds.clone()))
362            .expect("Failed to register requests_duration_seconds");
363        registry
364            .register(Box::new(requests_in_flight.clone()))
365            .expect("Failed to register requests_in_flight");
366        registry
367            .register(Box::new(requests_by_path_total.clone()))
368            .expect("Failed to register requests_by_path_total");
369        registry
370            .register(Box::new(request_duration_by_path_seconds.clone()))
371            .expect("Failed to register request_duration_by_path_seconds");
372        registry
373            .register(Box::new(average_latency_by_path_seconds.clone()))
374            .expect("Failed to register average_latency_by_path_seconds");
375        registry
376            .register(Box::new(workspace_requests_total.clone()))
377            .expect("Failed to register workspace_requests_total");
378        registry
379            .register(Box::new(workspace_requests_duration_seconds.clone()))
380            .expect("Failed to register workspace_requests_duration_seconds");
381        registry
382            .register(Box::new(workspace_active_routes.clone()))
383            .expect("Failed to register workspace_active_routes");
384        registry
385            .register(Box::new(workspace_errors_total.clone()))
386            .expect("Failed to register workspace_errors_total");
387        registry
388            .register(Box::new(errors_total.clone()))
389            .expect("Failed to register errors_total");
390        registry
391            .register(Box::new(error_rate.clone()))
392            .expect("Failed to register error_rate");
393        registry
394            .register(Box::new(plugin_executions_total.clone()))
395            .expect("Failed to register plugin_executions_total");
396        registry
397            .register(Box::new(plugin_execution_duration_seconds.clone()))
398            .expect("Failed to register plugin_execution_duration_seconds");
399        registry
400            .register(Box::new(plugin_errors_total.clone()))
401            .expect("Failed to register plugin_errors_total");
402        registry
403            .register(Box::new(ws_connections_active.clone()))
404            .expect("Failed to register ws_connections_active");
405        registry
406            .register(Box::new(ws_connections_total.clone()))
407            .expect("Failed to register ws_connections_total");
408        registry
409            .register(Box::new(ws_connection_duration_seconds.clone()))
410            .expect("Failed to register ws_connection_duration_seconds");
411        registry
412            .register(Box::new(ws_messages_sent.clone()))
413            .expect("Failed to register ws_messages_sent");
414        registry
415            .register(Box::new(ws_messages_received.clone()))
416            .expect("Failed to register ws_messages_received");
417        registry
418            .register(Box::new(ws_errors_total.clone()))
419            .expect("Failed to register ws_errors_total");
420        registry
421            .register(Box::new(smtp_connections_active.clone()))
422            .expect("Failed to register smtp_connections_active");
423        registry
424            .register(Box::new(smtp_connections_total.clone()))
425            .expect("Failed to register smtp_connections_total");
426        registry
427            .register(Box::new(smtp_messages_received_total.clone()))
428            .expect("Failed to register smtp_messages_received_total");
429        registry
430            .register(Box::new(smtp_messages_stored_total.clone()))
431            .expect("Failed to register smtp_messages_stored_total");
432        registry
433            .register(Box::new(smtp_errors_total.clone()))
434            .expect("Failed to register smtp_errors_total");
435        registry
436            .register(Box::new(mqtt_connections_active.clone()))
437            .expect("Failed to register mqtt_connections_active");
438        registry
439            .register(Box::new(mqtt_connections_total.clone()))
440            .expect("Failed to register mqtt_connections_total");
441        registry
442            .register(Box::new(mqtt_messages_published_total.clone()))
443            .expect("Failed to register mqtt_messages_published_total");
444        registry
445            .register(Box::new(mqtt_messages_received_total.clone()))
446            .expect("Failed to register mqtt_messages_received_total");
447        registry
448            .register(Box::new(mqtt_topics_active.clone()))
449            .expect("Failed to register mqtt_topics_active");
450        registry
451            .register(Box::new(mqtt_subscriptions_active.clone()))
452            .expect("Failed to register mqtt_subscriptions_active");
453        registry
454            .register(Box::new(mqtt_retained_messages.clone()))
455            .expect("Failed to register mqtt_retained_messages");
456        registry
457            .register(Box::new(mqtt_errors_total.clone()))
458            .expect("Failed to register mqtt_errors_total");
459        registry
460            .register(Box::new(memory_usage_bytes.clone()))
461            .expect("Failed to register memory_usage_bytes");
462        registry
463            .register(Box::new(cpu_usage_percent.clone()))
464            .expect("Failed to register cpu_usage_percent");
465        registry
466            .register(Box::new(thread_count.clone()))
467            .expect("Failed to register thread_count");
468        registry
469            .register(Box::new(uptime_seconds.clone()))
470            .expect("Failed to register uptime_seconds");
471        registry
472            .register(Box::new(active_scenario_mode.clone()))
473            .expect("Failed to register active_scenario_mode");
474        registry
475            .register(Box::new(chaos_triggers_total.clone()))
476            .expect("Failed to register chaos_triggers_total");
477
478        debug!("Initialized Prometheus metrics registry");
479
480        Self {
481            registry: Arc::new(registry),
482            requests_total,
483            requests_duration_seconds,
484            requests_in_flight,
485            requests_by_path_total,
486            request_duration_by_path_seconds,
487            average_latency_by_path_seconds,
488            workspace_requests_total,
489            workspace_requests_duration_seconds,
490            workspace_active_routes,
491            workspace_errors_total,
492            errors_total,
493            error_rate,
494            plugin_executions_total,
495            plugin_execution_duration_seconds,
496            plugin_errors_total,
497            ws_connections_active,
498            ws_connections_total,
499            ws_connection_duration_seconds,
500            ws_messages_sent,
501            ws_messages_received,
502            ws_errors_total,
503            smtp_connections_active,
504            smtp_connections_total,
505            smtp_messages_received_total,
506            smtp_messages_stored_total,
507            smtp_errors_total,
508            mqtt_connections_active,
509            mqtt_connections_total,
510            mqtt_messages_published_total,
511            mqtt_messages_received_total,
512            mqtt_topics_active,
513            mqtt_subscriptions_active,
514            mqtt_retained_messages,
515            mqtt_errors_total,
516            memory_usage_bytes,
517            cpu_usage_percent,
518            thread_count,
519            uptime_seconds,
520            active_scenario_mode,
521            chaos_triggers_total,
522        }
523    }
524
525    /// Get the underlying Prometheus registry
526    pub fn registry(&self) -> &Registry {
527        &self.registry
528    }
529
530    /// Check if the registry is initialized
531    pub fn is_initialized(&self) -> bool {
532        true
533    }
534
535    /// Record an HTTP request
536    pub fn record_http_request(&self, method: &str, status: u16, duration_seconds: f64) {
537        let status_str = status.to_string();
538        self.requests_total.with_label_values(&["http", method, &status_str]).inc();
539        self.requests_duration_seconds
540            .with_label_values(&["http", method])
541            .observe(duration_seconds);
542    }
543
544    /// Record a gRPC request
545    pub fn record_grpc_request(&self, method: &str, status: &str, duration_seconds: f64) {
546        self.requests_total.with_label_values(&["grpc", method, status]).inc();
547        self.requests_duration_seconds
548            .with_label_values(&["grpc", method])
549            .observe(duration_seconds);
550    }
551
552    /// Record a WebSocket message
553    pub fn record_ws_message_sent(&self) {
554        self.ws_messages_sent.inc();
555    }
556
557    /// Record a WebSocket message received
558    pub fn record_ws_message_received(&self) {
559        self.ws_messages_received.inc();
560    }
561
562    /// Record a GraphQL request
563    pub fn record_graphql_request(&self, operation: &str, status: u16, duration_seconds: f64) {
564        let status_str = status.to_string();
565        self.requests_total
566            .with_label_values(&["graphql", operation, &status_str])
567            .inc();
568        self.requests_duration_seconds
569            .with_label_values(&["graphql", operation])
570            .observe(duration_seconds);
571    }
572
573    /// Record a plugin execution
574    pub fn record_plugin_execution(&self, plugin_name: &str, success: bool, duration_seconds: f64) {
575        let status = if success { "success" } else { "failure" };
576        self.plugin_executions_total.with_label_values(&[plugin_name, status]).inc();
577        self.plugin_execution_duration_seconds
578            .with_label_values(&[plugin_name])
579            .observe(duration_seconds);
580    }
581
582    /// Increment in-flight requests
583    pub fn increment_in_flight(&self, protocol: &str) {
584        self.requests_in_flight.with_label_values(&[protocol]).inc();
585    }
586
587    /// Decrement in-flight requests
588    pub fn decrement_in_flight(&self, protocol: &str) {
589        self.requests_in_flight.with_label_values(&[protocol]).dec();
590    }
591
592    /// Record an error
593    pub fn record_error(&self, protocol: &str, error_type: &str) {
594        self.errors_total.with_label_values(&[protocol, error_type]).inc();
595    }
596
597    /// Update memory usage
598    pub fn update_memory_usage(&self, bytes: f64) {
599        self.memory_usage_bytes.set(bytes);
600    }
601
602    /// Update CPU usage
603    pub fn update_cpu_usage(&self, percent: f64) {
604        self.cpu_usage_percent.set(percent);
605    }
606
607    /// Set active scenario mode (0=healthy, 1=degraded, 2=error, 3=chaos)
608    pub fn set_scenario_mode(&self, mode: i64) {
609        self.active_scenario_mode.set(mode);
610    }
611
612    /// Record a chaos trigger
613    pub fn record_chaos_trigger(&self) {
614        self.chaos_triggers_total.inc();
615    }
616
617    /// Record an HTTP request with path information
618    pub fn record_http_request_with_path(
619        &self,
620        path: &str,
621        method: &str,
622        status: u16,
623        duration_seconds: f64,
624    ) {
625        // Normalize path to avoid cardinality explosion
626        let normalized_path = normalize_path(path);
627        let status_str = status.to_string();
628
629        // Record by path
630        self.requests_by_path_total
631            .with_label_values(&[normalized_path.as_str(), method, status_str.as_str()])
632            .inc();
633        self.request_duration_by_path_seconds
634            .with_label_values(&[normalized_path.as_str(), method])
635            .observe(duration_seconds);
636
637        // Update average latency (simple moving average approximation)
638        // Note: For production use, consider using a proper moving average or quantiles
639        let current = self
640            .average_latency_by_path_seconds
641            .with_label_values(&[normalized_path.as_str(), method])
642            .get();
643        let new_avg = if current == 0.0 {
644            duration_seconds
645        } else {
646            (current * 0.95) + (duration_seconds * 0.05)
647        };
648        self.average_latency_by_path_seconds
649            .with_label_values(&[normalized_path.as_str(), method])
650            .set(new_avg);
651
652        // Also record in the general metrics
653        self.record_http_request(method, status, duration_seconds);
654    }
655
656    /// Record a WebSocket connection established
657    pub fn record_ws_connection_established(&self) {
658        self.ws_connections_total.inc();
659        self.ws_connections_active.inc();
660    }
661
662    /// Record a WebSocket connection closed
663    pub fn record_ws_connection_closed(&self, duration_seconds: f64, status: &str) {
664        self.ws_connections_active.dec();
665        self.ws_connection_duration_seconds
666            .with_label_values(&[status])
667            .observe(duration_seconds);
668    }
669
670    /// Record a WebSocket error
671    pub fn record_ws_error(&self) {
672        self.ws_errors_total.inc();
673    }
674
675    /// Record an SMTP connection established
676    pub fn record_smtp_connection_established(&self) {
677        self.smtp_connections_total.inc();
678        self.smtp_connections_active.inc();
679    }
680
681    /// Record an SMTP connection closed
682    pub fn record_smtp_connection_closed(&self) {
683        self.smtp_connections_active.dec();
684    }
685
686    /// Record an SMTP message received
687    pub fn record_smtp_message_received(&self) {
688        self.smtp_messages_received_total.inc();
689    }
690
691    /// Record an SMTP message stored
692    pub fn record_smtp_message_stored(&self) {
693        self.smtp_messages_stored_total.inc();
694    }
695
696    /// Record an SMTP error
697    pub fn record_smtp_error(&self, error_type: &str) {
698        self.smtp_errors_total.with_label_values(&[error_type]).inc();
699    }
700
701    /// Update thread count
702    pub fn update_thread_count(&self, count: f64) {
703        self.thread_count.set(count);
704    }
705
706    /// Update uptime
707    pub fn update_uptime(&self, seconds: f64) {
708        self.uptime_seconds.set(seconds);
709    }
710
711    // ==================== Workspace-specific metrics ====================
712
713    /// Record a workspace request
714    pub fn record_workspace_request(
715        &self,
716        workspace_id: &str,
717        method: &str,
718        status: u16,
719        duration_seconds: f64,
720    ) {
721        let status_str = status.to_string();
722        self.workspace_requests_total
723            .with_label_values(&[workspace_id, method, &status_str])
724            .inc();
725        self.workspace_requests_duration_seconds
726            .with_label_values(&[workspace_id, method])
727            .observe(duration_seconds);
728    }
729
730    /// Update workspace active routes count
731    pub fn update_workspace_active_routes(&self, workspace_id: &str, count: i64) {
732        self.workspace_active_routes.with_label_values(&[workspace_id]).set(count);
733    }
734
735    /// Record a workspace error
736    pub fn record_workspace_error(&self, workspace_id: &str, error_type: &str) {
737        self.workspace_errors_total.with_label_values(&[workspace_id, error_type]).inc();
738    }
739
740    /// Increment workspace active routes
741    pub fn increment_workspace_routes(&self, workspace_id: &str) {
742        self.workspace_active_routes.with_label_values(&[workspace_id]).inc();
743    }
744
745    /// Decrement workspace active routes
746    pub fn decrement_workspace_routes(&self, workspace_id: &str) {
747        self.workspace_active_routes.with_label_values(&[workspace_id]).dec();
748    }
749}
750
751/// Normalize path to avoid high cardinality
752///
753/// This function replaces dynamic path segments (IDs, UUIDs, etc.) with placeholders
754/// to prevent metric explosion.
755fn normalize_path(path: &str) -> String {
756    let mut segments: Vec<&str> = path.split('/').collect();
757
758    for segment in &mut segments {
759        // Replace UUIDs, numeric IDs, or hex strings with :id placeholder
760        if is_uuid(segment)
761            || segment.parse::<i64>().is_ok()
762            || (segment.len() > 8 && segment.chars().all(|c| c.is_ascii_hexdigit()))
763        {
764            *segment = ":id";
765        }
766    }
767
768    segments.join("/")
769}
770
771/// Check if a string is a UUID
772fn is_uuid(s: &str) -> bool {
773    s.len() == 36 && s.chars().filter(|&c| c == '-').count() == 4
774}
775
776impl Default for MetricsRegistry {
777    fn default() -> Self {
778        Self::new()
779    }
780}
781
782/// Global metrics registry instance
783static GLOBAL_REGISTRY: Lazy<MetricsRegistry> = Lazy::new(MetricsRegistry::new);
784
785/// Get the global metrics registry
786pub fn get_global_registry() -> &'static MetricsRegistry {
787    &GLOBAL_REGISTRY
788}
789
790#[cfg(test)]
791mod tests {
792    use super::*;
793
794    #[test]
795    fn test_metrics_registry_creation() {
796        let registry = MetricsRegistry::new();
797        assert!(registry.is_initialized());
798    }
799
800    #[test]
801    fn test_record_http_request() {
802        let registry = MetricsRegistry::new();
803        registry.record_http_request("GET", 200, 0.045);
804        registry.record_http_request("POST", 201, 0.123);
805
806        // Verify metrics were recorded (they should not panic)
807        assert!(registry.is_initialized());
808    }
809
810    #[test]
811    fn test_global_registry() {
812        let registry = get_global_registry();
813        assert!(registry.is_initialized());
814    }
815
816    #[test]
817    fn test_plugin_metrics() {
818        let registry = MetricsRegistry::new();
819        registry.record_plugin_execution("test-plugin", true, 0.025);
820        registry.record_plugin_execution("test-plugin", false, 0.050);
821        assert!(registry.is_initialized());
822    }
823
824    #[test]
825    fn test_websocket_metrics() {
826        let registry = MetricsRegistry::new();
827        registry.record_ws_message_sent();
828        registry.record_ws_message_received();
829        registry.record_ws_connection_established();
830        registry.record_ws_connection_closed(120.5, "normal");
831        registry.record_ws_error();
832        assert!(registry.is_initialized());
833    }
834
835    #[test]
836    fn test_path_normalization() {
837        assert_eq!(normalize_path("/api/users/123"), "/api/users/:id");
838        assert_eq!(
839            normalize_path("/api/users/550e8400-e29b-41d4-a716-446655440000"),
840            "/api/users/:id"
841        );
842        assert_eq!(normalize_path("/api/users/abc123def456"), "/api/users/:id");
843        assert_eq!(normalize_path("/api/users/list"), "/api/users/list");
844    }
845
846    #[test]
847    fn test_path_based_metrics() {
848        let registry = MetricsRegistry::new();
849        registry.record_http_request_with_path("/api/users/123", "GET", 200, 0.045);
850        registry.record_http_request_with_path("/api/users/456", "GET", 200, 0.055);
851        registry.record_http_request_with_path("/api/posts", "POST", 201, 0.123);
852        assert!(registry.is_initialized());
853    }
854
855    #[test]
856    fn test_smtp_metrics() {
857        let registry = MetricsRegistry::new();
858        registry.record_smtp_connection_established();
859        registry.record_smtp_message_received();
860        registry.record_smtp_message_stored();
861        registry.record_smtp_connection_closed();
862        registry.record_smtp_error("timeout");
863        assert!(registry.is_initialized());
864    }
865
866    #[test]
867    fn test_system_metrics() {
868        let registry = MetricsRegistry::new();
869        registry.update_memory_usage(1024.0 * 1024.0 * 100.0); // 100 MB
870        registry.update_cpu_usage(45.5);
871        registry.update_thread_count(25.0);
872        registry.update_uptime(3600.0); // 1 hour
873        assert!(registry.is_initialized());
874    }
875
876    #[test]
877    fn test_workspace_metrics() {
878        let registry = MetricsRegistry::new();
879
880        // Record workspace requests
881        registry.record_workspace_request("workspace1", "GET", 200, 0.045);
882        registry.record_workspace_request("workspace1", "POST", 201, 0.123);
883        registry.record_workspace_request("workspace2", "GET", 200, 0.055);
884
885        // Update active routes
886        registry.update_workspace_active_routes("workspace1", 10);
887        registry.update_workspace_active_routes("workspace2", 5);
888
889        // Record errors
890        registry.record_workspace_error("workspace1", "validation");
891        registry.record_workspace_error("workspace2", "timeout");
892
893        // Test increment/decrement
894        registry.increment_workspace_routes("workspace1");
895        registry.decrement_workspace_routes("workspace1");
896
897        assert!(registry.is_initialized());
898    }
899
900    #[test]
901    fn test_workspace_metrics_isolation() {
902        let registry = MetricsRegistry::new();
903
904        // Ensure metrics for different workspaces are independent
905        registry.record_workspace_request("ws1", "GET", 200, 0.1);
906        registry.record_workspace_request("ws2", "GET", 200, 0.2);
907
908        registry.update_workspace_active_routes("ws1", 5);
909        registry.update_workspace_active_routes("ws2", 10);
910
911        // Both should be tracked independently
912        assert!(registry.is_initialized());
913    }
914}