mockforge_observability/prometheus/
metrics.rs

1//! Prometheus metrics definitions and registry
2
3use once_cell::sync::Lazy;
4use prometheus::{
5    Gauge, GaugeVec, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
6    Opts, Registry,
7};
8use std::sync::Arc;
9use tracing::debug;
10
11/// Global metrics registry for MockForge
12#[derive(Clone)]
13pub struct MetricsRegistry {
14    registry: Arc<Registry>,
15
16    // Request metrics by protocol
17    pub requests_total: IntCounterVec,
18    pub requests_duration_seconds: HistogramVec,
19    pub requests_in_flight: IntGaugeVec,
20
21    // Request metrics by path (endpoint-specific)
22    pub requests_by_path_total: IntCounterVec,
23    pub request_duration_by_path_seconds: HistogramVec,
24    pub average_latency_by_path_seconds: GaugeVec,
25
26    // Workspace-specific metrics
27    pub workspace_requests_total: IntCounterVec,
28    pub workspace_requests_duration_seconds: HistogramVec,
29    pub workspace_active_routes: IntGaugeVec,
30    pub workspace_errors_total: IntCounterVec,
31
32    // Error metrics
33    pub errors_total: IntCounterVec,
34    pub error_rate: GaugeVec,
35
36    // Plugin metrics
37    pub plugin_executions_total: IntCounterVec,
38    pub plugin_execution_duration_seconds: HistogramVec,
39    pub plugin_errors_total: IntCounterVec,
40
41    // WebSocket specific metrics
42    pub ws_connections_active: IntGauge,
43    pub ws_connections_total: IntCounter,
44    pub ws_connection_duration_seconds: HistogramVec,
45    pub ws_messages_sent: IntCounter,
46    pub ws_messages_received: IntCounter,
47    pub ws_errors_total: IntCounter,
48
49    // SMTP specific metrics
50    pub smtp_connections_active: IntGauge,
51    pub smtp_connections_total: IntCounter,
52    pub smtp_messages_received_total: IntCounter,
53    pub smtp_messages_stored_total: IntCounter,
54    pub smtp_errors_total: IntCounterVec,
55
56    // MQTT specific metrics
57    pub mqtt_connections_active: IntGauge,
58    pub mqtt_connections_total: IntCounter,
59    pub mqtt_messages_published_total: IntCounter,
60    pub mqtt_messages_received_total: IntCounter,
61    pub mqtt_topics_active: IntGauge,
62    pub mqtt_subscriptions_active: IntGauge,
63    pub mqtt_retained_messages: IntGauge,
64    pub mqtt_errors_total: IntCounterVec,
65
66    // System metrics
67    pub memory_usage_bytes: Gauge,
68    pub cpu_usage_percent: Gauge,
69    pub thread_count: Gauge,
70    pub uptime_seconds: Gauge,
71
72    // Scenario metrics (for Phase 4)
73    pub active_scenario_mode: IntGauge,
74    pub chaos_triggers_total: IntCounter,
75
76    // Business/SLO metrics
77    pub service_availability: GaugeVec,
78    pub slo_compliance: GaugeVec,
79    pub successful_request_rate: GaugeVec,
80    pub p95_latency_slo_compliance: GaugeVec,
81    pub error_budget_remaining: GaugeVec,
82
83    // Marketplace metrics
84    pub marketplace_publish_total: IntCounterVec,
85    pub marketplace_publish_duration_seconds: HistogramVec,
86    pub marketplace_download_total: IntCounterVec,
87    pub marketplace_download_duration_seconds: HistogramVec,
88    pub marketplace_search_total: IntCounterVec,
89    pub marketplace_search_duration_seconds: HistogramVec,
90    pub marketplace_errors_total: IntCounterVec,
91    pub marketplace_items_total: IntGaugeVec,
92}
93
94impl MetricsRegistry {
95    /// Create a new metrics registry with all metrics initialized
96    pub fn new() -> Self {
97        let registry = Registry::new();
98
99        // Request metrics
100        let requests_total = IntCounterVec::new(
101            Opts::new(
102                "mockforge_requests_total",
103                "Total number of requests by protocol, method, and status",
104            ),
105            &["protocol", "method", "status"],
106        )
107        .expect("Failed to create requests_total metric");
108
109        let requests_duration_seconds = HistogramVec::new(
110            HistogramOpts::new("mockforge_request_duration_seconds", "Request duration in seconds")
111                .buckets(vec![
112                    0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
113                ]),
114            &["protocol", "method"],
115        )
116        .expect("Failed to create requests_duration_seconds metric");
117
118        let requests_in_flight = IntGaugeVec::new(
119            Opts::new(
120                "mockforge_requests_in_flight",
121                "Number of requests currently being processed",
122            ),
123            &["protocol"],
124        )
125        .expect("Failed to create requests_in_flight metric");
126
127        // Error metrics
128        let errors_total = IntCounterVec::new(
129            Opts::new(
130                "mockforge_errors_total",
131                "Total number of errors by protocol and error type",
132            ),
133            &["protocol", "error_type"],
134        )
135        .expect("Failed to create errors_total metric");
136
137        let error_rate = GaugeVec::new(
138            Opts::new("mockforge_error_rate", "Error rate by protocol (0.0 to 1.0)"),
139            &["protocol"],
140        )
141        .expect("Failed to create error_rate metric");
142
143        // Plugin metrics
144        let plugin_executions_total = IntCounterVec::new(
145            Opts::new("mockforge_plugin_executions_total", "Total number of plugin executions"),
146            &["plugin_name", "status"],
147        )
148        .expect("Failed to create plugin_executions_total metric");
149
150        let plugin_execution_duration_seconds = HistogramVec::new(
151            HistogramOpts::new(
152                "mockforge_plugin_execution_duration_seconds",
153                "Plugin execution duration in seconds",
154            )
155            .buckets(vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0]),
156            &["plugin_name"],
157        )
158        .expect("Failed to create plugin_execution_duration_seconds metric");
159
160        let plugin_errors_total = IntCounterVec::new(
161            Opts::new("mockforge_plugin_errors_total", "Total number of plugin errors"),
162            &["plugin_name", "error_type"],
163        )
164        .expect("Failed to create plugin_errors_total metric");
165
166        // WebSocket metrics
167        // Path-based request metrics
168        let requests_by_path_total = IntCounterVec::new(
169            Opts::new(
170                "mockforge_requests_by_path_total",
171                "Total number of requests by path, method, and status",
172            ),
173            &["path", "method", "status"],
174        )
175        .expect("Failed to create requests_by_path_total metric");
176
177        let request_duration_by_path_seconds = HistogramVec::new(
178            HistogramOpts::new(
179                "mockforge_request_duration_by_path_seconds",
180                "Request duration by path in seconds",
181            )
182            .buckets(vec![
183                0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
184            ]),
185            &["path", "method"],
186        )
187        .expect("Failed to create request_duration_by_path_seconds metric");
188
189        let average_latency_by_path_seconds = GaugeVec::new(
190            Opts::new(
191                "mockforge_average_latency_by_path_seconds",
192                "Average request latency by path in seconds",
193            ),
194            &["path", "method"],
195        )
196        .expect("Failed to create average_latency_by_path_seconds metric");
197
198        // Workspace-specific metrics
199        let workspace_requests_total = IntCounterVec::new(
200            Opts::new(
201                "mockforge_workspace_requests_total",
202                "Total number of requests by workspace, method, and status",
203            ),
204            &["workspace_id", "method", "status"],
205        )
206        .expect("Failed to create workspace_requests_total metric");
207
208        let workspace_requests_duration_seconds = HistogramVec::new(
209            HistogramOpts::new(
210                "mockforge_workspace_request_duration_seconds",
211                "Request duration by workspace in seconds",
212            )
213            .buckets(vec![
214                0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
215            ]),
216            &["workspace_id", "method"],
217        )
218        .expect("Failed to create workspace_requests_duration_seconds metric");
219
220        let workspace_active_routes = IntGaugeVec::new(
221            Opts::new(
222                "mockforge_workspace_active_routes",
223                "Number of active routes in each workspace",
224            ),
225            &["workspace_id"],
226        )
227        .expect("Failed to create workspace_active_routes metric");
228
229        let workspace_errors_total = IntCounterVec::new(
230            Opts::new("mockforge_workspace_errors_total", "Total number of errors by workspace"),
231            &["workspace_id", "error_type"],
232        )
233        .expect("Failed to create workspace_errors_total metric");
234
235        // WebSocket metrics
236        let ws_connections_active = IntGauge::new(
237            "mockforge_ws_connections_active",
238            "Number of active WebSocket connections",
239        )
240        .expect("Failed to create ws_connections_active metric");
241
242        let ws_connections_total = IntCounter::new(
243            "mockforge_ws_connections_total",
244            "Total number of WebSocket connections established",
245        )
246        .expect("Failed to create ws_connections_total metric");
247
248        let ws_connection_duration_seconds = HistogramVec::new(
249            HistogramOpts::new(
250                "mockforge_ws_connection_duration_seconds",
251                "WebSocket connection duration in seconds",
252            )
253            .buckets(vec![1.0, 5.0, 10.0, 30.0, 60.0, 300.0, 600.0, 1800.0, 3600.0]),
254            &["status"],
255        )
256        .expect("Failed to create ws_connection_duration_seconds metric");
257
258        let ws_messages_sent = IntCounter::new(
259            "mockforge_ws_messages_sent_total",
260            "Total number of WebSocket messages sent",
261        )
262        .expect("Failed to create ws_messages_sent metric");
263
264        let ws_messages_received = IntCounter::new(
265            "mockforge_ws_messages_received_total",
266            "Total number of WebSocket messages received",
267        )
268        .expect("Failed to create ws_messages_received metric");
269
270        let ws_errors_total =
271            IntCounter::new("mockforge_ws_errors_total", "Total number of WebSocket errors")
272                .expect("Failed to create ws_errors_total metric");
273
274        // SMTP metrics
275        let smtp_connections_active =
276            IntGauge::new("mockforge_smtp_connections_active", "Number of active SMTP connections")
277                .expect("Failed to create smtp_connections_active metric");
278
279        let smtp_connections_total =
280            IntCounter::new("mockforge_smtp_connections_total", "Total number of SMTP connections")
281                .expect("Failed to create smtp_connections_total metric");
282
283        let smtp_messages_received_total = IntCounter::new(
284            "mockforge_smtp_messages_received_total",
285            "Total number of SMTP messages received",
286        )
287        .expect("Failed to create smtp_messages_received_total metric");
288
289        let smtp_messages_stored_total = IntCounter::new(
290            "mockforge_smtp_messages_stored_total",
291            "Total number of SMTP messages stored in mailbox",
292        )
293        .expect("Failed to create smtp_messages_stored_total metric");
294
295        let smtp_errors_total = IntCounterVec::new(
296            Opts::new("mockforge_smtp_errors_total", "Total number of SMTP errors by type"),
297            &["error_type"],
298        )
299        .expect("Failed to create smtp_errors_total metric");
300
301        // MQTT metrics
302        let mqtt_connections_active = IntGauge::new(
303            "mockforge_mqtt_connections_active",
304            "Number of active MQTT client connections",
305        )
306        .expect("Failed to create mqtt_connections_active metric");
307
308        let mqtt_connections_total = IntCounter::new(
309            "mockforge_mqtt_connections_total",
310            "Total number of MQTT client connections established",
311        )
312        .expect("Failed to create mqtt_connections_total metric");
313
314        let mqtt_messages_published_total = IntCounter::new(
315            "mockforge_mqtt_messages_published_total",
316            "Total number of MQTT messages published",
317        )
318        .expect("Failed to create mqtt_messages_published_total metric");
319
320        let mqtt_messages_received_total = IntCounter::new(
321            "mockforge_mqtt_messages_received_total",
322            "Total number of MQTT messages received",
323        )
324        .expect("Failed to create mqtt_messages_received_total metric");
325
326        let mqtt_topics_active =
327            IntGauge::new("mockforge_mqtt_topics_active", "Number of active MQTT topics")
328                .expect("Failed to create mqtt_topics_active metric");
329
330        let mqtt_subscriptions_active = IntGauge::new(
331            "mockforge_mqtt_subscriptions_active",
332            "Number of active MQTT subscriptions",
333        )
334        .expect("Failed to create mqtt_subscriptions_active metric");
335
336        let mqtt_retained_messages =
337            IntGauge::new("mockforge_mqtt_retained_messages", "Number of retained MQTT messages")
338                .expect("Failed to create mqtt_retained_messages metric");
339
340        let mqtt_errors_total = IntCounterVec::new(
341            Opts::new("mockforge_mqtt_errors_total", "Total number of MQTT errors by type"),
342            &["error_type"],
343        )
344        .expect("Failed to create mqtt_errors_total metric");
345
346        // System metrics
347        let memory_usage_bytes =
348            Gauge::new("mockforge_memory_usage_bytes", "Memory usage in bytes")
349                .expect("Failed to create memory_usage_bytes metric");
350
351        let cpu_usage_percent = Gauge::new("mockforge_cpu_usage_percent", "CPU usage percentage")
352            .expect("Failed to create cpu_usage_percent metric");
353
354        let thread_count = Gauge::new("mockforge_thread_count", "Number of active threads")
355            .expect("Failed to create thread_count metric");
356
357        let uptime_seconds = Gauge::new("mockforge_uptime_seconds", "Server uptime in seconds")
358            .expect("Failed to create uptime_seconds metric");
359
360        // Scenario metrics
361        let active_scenario_mode = IntGauge::new(
362            "mockforge_active_scenario_mode",
363            "Active scenario mode (0=healthy, 1=degraded, 2=error, 3=chaos)",
364        )
365        .expect("Failed to create active_scenario_mode metric");
366
367        let chaos_triggers_total = IntCounter::new(
368            "mockforge_chaos_triggers_total",
369            "Total number of chaos mode triggers",
370        )
371        .expect("Failed to create chaos_triggers_total metric");
372
373        // Business/SLO metrics
374        let service_availability = GaugeVec::new(
375            Opts::new(
376                "mockforge_service_availability",
377                "Service availability percentage (0.0 to 1.0) by protocol",
378            ),
379            &["protocol"],
380        )
381        .expect("Failed to create service_availability metric");
382
383        let slo_compliance = GaugeVec::new(
384            Opts::new(
385                "mockforge_slo_compliance",
386                "SLO compliance percentage (0.0 to 1.0) by protocol and slo_type",
387            ),
388            &["protocol", "slo_type"],
389        )
390        .expect("Failed to create slo_compliance metric");
391
392        let successful_request_rate = GaugeVec::new(
393            Opts::new(
394                "mockforge_successful_request_rate",
395                "Successful request rate (0.0 to 1.0) by protocol",
396            ),
397            &["protocol"],
398        )
399        .expect("Failed to create successful_request_rate metric");
400
401        let p95_latency_slo_compliance = GaugeVec::new(
402            Opts::new(
403                "mockforge_p95_latency_slo_compliance",
404                "P95 latency SLO compliance (1.0 = compliant, 0.0 = non-compliant) by protocol",
405            ),
406            &["protocol"],
407        )
408        .expect("Failed to create p95_latency_slo_compliance metric");
409
410        let error_budget_remaining = GaugeVec::new(
411            Opts::new(
412                "mockforge_error_budget_remaining",
413                "Remaining error budget percentage (0.0 to 1.0) by protocol",
414            ),
415            &["protocol"],
416        )
417        .expect("Failed to create error_budget_remaining metric");
418
419        // Marketplace metrics
420        let marketplace_publish_total = IntCounterVec::new(
421            Opts::new(
422                "mockforge_marketplace_publish_total",
423                "Total number of marketplace items published by type and status",
424            ),
425            &["type", "status"], // type: plugin, template, scenario; status: success, error
426        )
427        .expect("Failed to create marketplace_publish_total metric");
428
429        let marketplace_publish_duration_seconds = HistogramVec::new(
430            HistogramOpts::new(
431                "mockforge_marketplace_publish_duration_seconds",
432                "Marketplace publish operation duration in seconds",
433            )
434            .buckets(vec![0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0]),
435            &["type"], // type: plugin, template, scenario
436        )
437        .expect("Failed to create marketplace_publish_duration_seconds metric");
438
439        let marketplace_download_total = IntCounterVec::new(
440            Opts::new(
441                "mockforge_marketplace_download_total",
442                "Total number of marketplace items downloaded by type and status",
443            ),
444            &["type", "status"], // type: plugin, template, scenario; status: success, error
445        )
446        .expect("Failed to create marketplace_download_total metric");
447
448        let marketplace_download_duration_seconds = HistogramVec::new(
449            HistogramOpts::new(
450                "mockforge_marketplace_download_duration_seconds",
451                "Marketplace download operation duration in seconds",
452            )
453            .buckets(vec![0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0]),
454            &["type"], // type: plugin, template, scenario
455        )
456        .expect("Failed to create marketplace_download_duration_seconds metric");
457
458        let marketplace_search_total = IntCounterVec::new(
459            Opts::new(
460                "mockforge_marketplace_search_total",
461                "Total number of marketplace searches by type and status",
462            ),
463            &["type", "status"], // type: plugin, template, scenario; status: success, error
464        )
465        .expect("Failed to create marketplace_search_total metric");
466
467        let marketplace_search_duration_seconds = HistogramVec::new(
468            HistogramOpts::new(
469                "mockforge_marketplace_search_duration_seconds",
470                "Marketplace search operation duration in seconds",
471            )
472            .buckets(vec![0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.0]),
473            &["type"], // type: plugin, template, scenario
474        )
475        .expect("Failed to create marketplace_search_duration_seconds metric");
476
477        let marketplace_errors_total = IntCounterVec::new(
478            Opts::new(
479                "mockforge_marketplace_errors_total",
480                "Total number of marketplace errors by type and error_code",
481            ),
482            &["type", "error_code"], // type: plugin, template, scenario; error_code: validation_failed, not_found, etc.
483        )
484        .expect("Failed to create marketplace_errors_total metric");
485
486        let marketplace_items_total = IntGaugeVec::new(
487            Opts::new(
488                "mockforge_marketplace_items_total",
489                "Total number of marketplace items by type",
490            ),
491            &["type"], // type: plugin, template, scenario
492        )
493        .expect("Failed to create marketplace_items_total metric");
494
495        // Register all metrics
496        registry
497            .register(Box::new(requests_total.clone()))
498            .expect("Failed to register requests_total");
499        registry
500            .register(Box::new(requests_duration_seconds.clone()))
501            .expect("Failed to register requests_duration_seconds");
502        registry
503            .register(Box::new(requests_in_flight.clone()))
504            .expect("Failed to register requests_in_flight");
505        registry
506            .register(Box::new(requests_by_path_total.clone()))
507            .expect("Failed to register requests_by_path_total");
508        registry
509            .register(Box::new(request_duration_by_path_seconds.clone()))
510            .expect("Failed to register request_duration_by_path_seconds");
511        registry
512            .register(Box::new(average_latency_by_path_seconds.clone()))
513            .expect("Failed to register average_latency_by_path_seconds");
514        registry
515            .register(Box::new(workspace_requests_total.clone()))
516            .expect("Failed to register workspace_requests_total");
517        registry
518            .register(Box::new(workspace_requests_duration_seconds.clone()))
519            .expect("Failed to register workspace_requests_duration_seconds");
520        registry
521            .register(Box::new(workspace_active_routes.clone()))
522            .expect("Failed to register workspace_active_routes");
523        registry
524            .register(Box::new(workspace_errors_total.clone()))
525            .expect("Failed to register workspace_errors_total");
526        registry
527            .register(Box::new(errors_total.clone()))
528            .expect("Failed to register errors_total");
529        registry
530            .register(Box::new(error_rate.clone()))
531            .expect("Failed to register error_rate");
532        registry
533            .register(Box::new(plugin_executions_total.clone()))
534            .expect("Failed to register plugin_executions_total");
535        registry
536            .register(Box::new(plugin_execution_duration_seconds.clone()))
537            .expect("Failed to register plugin_execution_duration_seconds");
538        registry
539            .register(Box::new(plugin_errors_total.clone()))
540            .expect("Failed to register plugin_errors_total");
541        registry
542            .register(Box::new(ws_connections_active.clone()))
543            .expect("Failed to register ws_connections_active");
544        registry
545            .register(Box::new(ws_connections_total.clone()))
546            .expect("Failed to register ws_connections_total");
547        registry
548            .register(Box::new(ws_connection_duration_seconds.clone()))
549            .expect("Failed to register ws_connection_duration_seconds");
550        registry
551            .register(Box::new(ws_messages_sent.clone()))
552            .expect("Failed to register ws_messages_sent");
553        registry
554            .register(Box::new(ws_messages_received.clone()))
555            .expect("Failed to register ws_messages_received");
556        registry
557            .register(Box::new(ws_errors_total.clone()))
558            .expect("Failed to register ws_errors_total");
559        registry
560            .register(Box::new(smtp_connections_active.clone()))
561            .expect("Failed to register smtp_connections_active");
562        registry
563            .register(Box::new(smtp_connections_total.clone()))
564            .expect("Failed to register smtp_connections_total");
565        registry
566            .register(Box::new(smtp_messages_received_total.clone()))
567            .expect("Failed to register smtp_messages_received_total");
568        registry
569            .register(Box::new(smtp_messages_stored_total.clone()))
570            .expect("Failed to register smtp_messages_stored_total");
571        registry
572            .register(Box::new(smtp_errors_total.clone()))
573            .expect("Failed to register smtp_errors_total");
574        registry
575            .register(Box::new(mqtt_connections_active.clone()))
576            .expect("Failed to register mqtt_connections_active");
577        registry
578            .register(Box::new(mqtt_connections_total.clone()))
579            .expect("Failed to register mqtt_connections_total");
580        registry
581            .register(Box::new(mqtt_messages_published_total.clone()))
582            .expect("Failed to register mqtt_messages_published_total");
583        registry
584            .register(Box::new(mqtt_messages_received_total.clone()))
585            .expect("Failed to register mqtt_messages_received_total");
586        registry
587            .register(Box::new(mqtt_topics_active.clone()))
588            .expect("Failed to register mqtt_topics_active");
589        registry
590            .register(Box::new(mqtt_subscriptions_active.clone()))
591            .expect("Failed to register mqtt_subscriptions_active");
592        registry
593            .register(Box::new(mqtt_retained_messages.clone()))
594            .expect("Failed to register mqtt_retained_messages");
595        registry
596            .register(Box::new(mqtt_errors_total.clone()))
597            .expect("Failed to register mqtt_errors_total");
598        registry
599            .register(Box::new(memory_usage_bytes.clone()))
600            .expect("Failed to register memory_usage_bytes");
601        registry
602            .register(Box::new(cpu_usage_percent.clone()))
603            .expect("Failed to register cpu_usage_percent");
604        registry
605            .register(Box::new(thread_count.clone()))
606            .expect("Failed to register thread_count");
607        registry
608            .register(Box::new(uptime_seconds.clone()))
609            .expect("Failed to register uptime_seconds");
610        registry
611            .register(Box::new(active_scenario_mode.clone()))
612            .expect("Failed to register active_scenario_mode");
613        registry
614            .register(Box::new(chaos_triggers_total.clone()))
615            .expect("Failed to register chaos_triggers_total");
616        registry
617            .register(Box::new(service_availability.clone()))
618            .expect("Failed to register service_availability");
619        registry
620            .register(Box::new(slo_compliance.clone()))
621            .expect("Failed to register slo_compliance");
622        registry
623            .register(Box::new(successful_request_rate.clone()))
624            .expect("Failed to register successful_request_rate");
625        registry
626            .register(Box::new(p95_latency_slo_compliance.clone()))
627            .expect("Failed to register p95_latency_slo_compliance");
628        registry
629            .register(Box::new(error_budget_remaining.clone()))
630            .expect("Failed to register error_budget_remaining");
631        registry
632            .register(Box::new(marketplace_publish_total.clone()))
633            .expect("Failed to register marketplace_publish_total");
634        registry
635            .register(Box::new(marketplace_publish_duration_seconds.clone()))
636            .expect("Failed to register marketplace_publish_duration_seconds");
637        registry
638            .register(Box::new(marketplace_download_total.clone()))
639            .expect("Failed to register marketplace_download_total");
640        registry
641            .register(Box::new(marketplace_download_duration_seconds.clone()))
642            .expect("Failed to register marketplace_download_duration_seconds");
643        registry
644            .register(Box::new(marketplace_search_total.clone()))
645            .expect("Failed to register marketplace_search_total");
646        registry
647            .register(Box::new(marketplace_search_duration_seconds.clone()))
648            .expect("Failed to register marketplace_search_duration_seconds");
649        registry
650            .register(Box::new(marketplace_errors_total.clone()))
651            .expect("Failed to register marketplace_errors_total");
652        registry
653            .register(Box::new(marketplace_items_total.clone()))
654            .expect("Failed to register marketplace_items_total");
655
656        debug!("Initialized Prometheus metrics registry");
657
658        Self {
659            registry: Arc::new(registry),
660            requests_total,
661            requests_duration_seconds,
662            requests_in_flight,
663            requests_by_path_total,
664            request_duration_by_path_seconds,
665            average_latency_by_path_seconds,
666            workspace_requests_total,
667            workspace_requests_duration_seconds,
668            workspace_active_routes,
669            workspace_errors_total,
670            errors_total,
671            error_rate,
672            plugin_executions_total,
673            plugin_execution_duration_seconds,
674            plugin_errors_total,
675            ws_connections_active,
676            ws_connections_total,
677            ws_connection_duration_seconds,
678            ws_messages_sent,
679            ws_messages_received,
680            ws_errors_total,
681            smtp_connections_active,
682            smtp_connections_total,
683            smtp_messages_received_total,
684            smtp_messages_stored_total,
685            smtp_errors_total,
686            mqtt_connections_active,
687            mqtt_connections_total,
688            mqtt_messages_published_total,
689            mqtt_messages_received_total,
690            mqtt_topics_active,
691            mqtt_subscriptions_active,
692            mqtt_retained_messages,
693            mqtt_errors_total,
694            memory_usage_bytes,
695            cpu_usage_percent,
696            thread_count,
697            uptime_seconds,
698            active_scenario_mode,
699            chaos_triggers_total,
700            service_availability,
701            slo_compliance,
702            successful_request_rate,
703            p95_latency_slo_compliance,
704            error_budget_remaining,
705            marketplace_publish_total,
706            marketplace_publish_duration_seconds,
707            marketplace_download_total,
708            marketplace_download_duration_seconds,
709            marketplace_search_total,
710            marketplace_search_duration_seconds,
711            marketplace_errors_total,
712            marketplace_items_total,
713        }
714    }
715
716    /// Get the underlying Prometheus registry
717    pub fn registry(&self) -> &Registry {
718        &self.registry
719    }
720
721    /// Check if the registry is initialized
722    pub fn is_initialized(&self) -> bool {
723        true
724    }
725
726    /// Record an HTTP request
727    pub fn record_http_request(&self, method: &str, status: u16, duration_seconds: f64) {
728        let status_str = status.to_string();
729        self.requests_total.with_label_values(&["http", method, &status_str]).inc();
730        self.requests_duration_seconds
731            .with_label_values(&["http", method])
732            .observe(duration_seconds);
733    }
734
735    /// Record a gRPC request
736    pub fn record_grpc_request(&self, method: &str, status: &str, duration_seconds: f64) {
737        self.requests_total.with_label_values(&["grpc", method, status]).inc();
738        self.requests_duration_seconds
739            .with_label_values(&["grpc", method])
740            .observe(duration_seconds);
741    }
742
743    /// Record a WebSocket message
744    pub fn record_ws_message_sent(&self) {
745        self.ws_messages_sent.inc();
746    }
747
748    /// Record a WebSocket message received
749    pub fn record_ws_message_received(&self) {
750        self.ws_messages_received.inc();
751    }
752
753    /// Record a GraphQL request
754    pub fn record_graphql_request(&self, operation: &str, status: u16, duration_seconds: f64) {
755        let status_str = status.to_string();
756        self.requests_total
757            .with_label_values(&["graphql", operation, &status_str])
758            .inc();
759        self.requests_duration_seconds
760            .with_label_values(&["graphql", operation])
761            .observe(duration_seconds);
762    }
763
764    /// Record a plugin execution
765    pub fn record_plugin_execution(&self, plugin_name: &str, success: bool, duration_seconds: f64) {
766        let status = if success { "success" } else { "failure" };
767        self.plugin_executions_total.with_label_values(&[plugin_name, status]).inc();
768        self.plugin_execution_duration_seconds
769            .with_label_values(&[plugin_name])
770            .observe(duration_seconds);
771    }
772
773    /// Increment in-flight requests
774    pub fn increment_in_flight(&self, protocol: &str) {
775        self.requests_in_flight.with_label_values(&[protocol]).inc();
776    }
777
778    /// Decrement in-flight requests
779    pub fn decrement_in_flight(&self, protocol: &str) {
780        self.requests_in_flight.with_label_values(&[protocol]).dec();
781    }
782
783    /// Record an error
784    pub fn record_error(&self, protocol: &str, error_type: &str) {
785        self.errors_total.with_label_values(&[protocol, error_type]).inc();
786    }
787
788    /// Update memory usage
789    pub fn update_memory_usage(&self, bytes: f64) {
790        self.memory_usage_bytes.set(bytes);
791    }
792
793    /// Update CPU usage
794    pub fn update_cpu_usage(&self, percent: f64) {
795        self.cpu_usage_percent.set(percent);
796    }
797
798    /// Set active scenario mode (0=healthy, 1=degraded, 2=error, 3=chaos)
799    pub fn set_scenario_mode(&self, mode: i64) {
800        self.active_scenario_mode.set(mode);
801    }
802
803    /// Record a chaos trigger
804    pub fn record_chaos_trigger(&self) {
805        self.chaos_triggers_total.inc();
806    }
807
808    /// Record an HTTP request with path information
809    pub fn record_http_request_with_path(
810        &self,
811        path: &str,
812        method: &str,
813        status: u16,
814        duration_seconds: f64,
815    ) {
816        // Normalize path to avoid cardinality explosion
817        let normalized_path = normalize_path(path);
818        let status_str = status.to_string();
819
820        // Record by path
821        self.requests_by_path_total
822            .with_label_values(&[normalized_path.as_str(), method, status_str.as_str()])
823            .inc();
824        self.request_duration_by_path_seconds
825            .with_label_values(&[normalized_path.as_str(), method])
826            .observe(duration_seconds);
827
828        // Update average latency (simple moving average approximation)
829        // Note: For production use, consider using a proper moving average or quantiles
830        let current = self
831            .average_latency_by_path_seconds
832            .with_label_values(&[normalized_path.as_str(), method])
833            .get();
834        let new_avg = if current == 0.0 {
835            duration_seconds
836        } else {
837            (current * 0.95) + (duration_seconds * 0.05)
838        };
839        self.average_latency_by_path_seconds
840            .with_label_values(&[normalized_path.as_str(), method])
841            .set(new_avg);
842
843        // Also record in the general metrics
844        self.record_http_request(method, status, duration_seconds);
845    }
846
847    /// Record a WebSocket connection established
848    pub fn record_ws_connection_established(&self) {
849        self.ws_connections_total.inc();
850        self.ws_connections_active.inc();
851    }
852
853    /// Record a WebSocket connection closed
854    pub fn record_ws_connection_closed(&self, duration_seconds: f64, status: &str) {
855        self.ws_connections_active.dec();
856        self.ws_connection_duration_seconds
857            .with_label_values(&[status])
858            .observe(duration_seconds);
859    }
860
861    /// Record a WebSocket error
862    pub fn record_ws_error(&self) {
863        self.ws_errors_total.inc();
864    }
865
866    /// Record an SMTP connection established
867    pub fn record_smtp_connection_established(&self) {
868        self.smtp_connections_total.inc();
869        self.smtp_connections_active.inc();
870    }
871
872    /// Record an SMTP connection closed
873    pub fn record_smtp_connection_closed(&self) {
874        self.smtp_connections_active.dec();
875    }
876
877    /// Record an SMTP message received
878    pub fn record_smtp_message_received(&self) {
879        self.smtp_messages_received_total.inc();
880    }
881
882    /// Record an SMTP message stored
883    pub fn record_smtp_message_stored(&self) {
884        self.smtp_messages_stored_total.inc();
885    }
886
887    /// Record an SMTP error
888    pub fn record_smtp_error(&self, error_type: &str) {
889        self.smtp_errors_total.with_label_values(&[error_type]).inc();
890    }
891
892    /// Update thread count
893    pub fn update_thread_count(&self, count: f64) {
894        self.thread_count.set(count);
895    }
896
897    /// Update uptime
898    pub fn update_uptime(&self, seconds: f64) {
899        self.uptime_seconds.set(seconds);
900    }
901
902    // ==================== Workspace-specific metrics ====================
903
904    /// Record a workspace request
905    pub fn record_workspace_request(
906        &self,
907        workspace_id: &str,
908        method: &str,
909        status: u16,
910        duration_seconds: f64,
911    ) {
912        let status_str = status.to_string();
913        self.workspace_requests_total
914            .with_label_values(&[workspace_id, method, &status_str])
915            .inc();
916        self.workspace_requests_duration_seconds
917            .with_label_values(&[workspace_id, method])
918            .observe(duration_seconds);
919    }
920
921    /// Update workspace active routes count
922    pub fn update_workspace_active_routes(&self, workspace_id: &str, count: i64) {
923        self.workspace_active_routes.with_label_values(&[workspace_id]).set(count);
924    }
925
926    /// Record a workspace error
927    pub fn record_workspace_error(&self, workspace_id: &str, error_type: &str) {
928        self.workspace_errors_total.with_label_values(&[workspace_id, error_type]).inc();
929    }
930
931    /// Increment workspace active routes
932    pub fn increment_workspace_routes(&self, workspace_id: &str) {
933        self.workspace_active_routes.with_label_values(&[workspace_id]).inc();
934    }
935
936    /// Decrement workspace active routes
937    pub fn decrement_workspace_routes(&self, workspace_id: &str) {
938        self.workspace_active_routes.with_label_values(&[workspace_id]).dec();
939    }
940
941    // ==================== Marketplace metrics ====================
942
943    /// Record a marketplace publish operation
944    pub fn record_marketplace_publish(&self, item_type: &str, success: bool, duration_seconds: f64) {
945        let status = if success { "success" } else { "error" };
946        self.marketplace_publish_total
947            .with_label_values(&[item_type, status])
948            .inc();
949        self.marketplace_publish_duration_seconds
950            .with_label_values(&[item_type])
951            .observe(duration_seconds);
952    }
953
954    /// Record a marketplace download operation
955    pub fn record_marketplace_download(&self, item_type: &str, success: bool, duration_seconds: f64) {
956        let status = if success { "success" } else { "error" };
957        self.marketplace_download_total
958            .with_label_values(&[item_type, status])
959            .inc();
960        self.marketplace_download_duration_seconds
961            .with_label_values(&[item_type])
962            .observe(duration_seconds);
963    }
964
965    /// Record a marketplace search operation
966    pub fn record_marketplace_search(&self, item_type: &str, success: bool, duration_seconds: f64) {
967        let status = if success { "success" } else { "error" };
968        self.marketplace_search_total
969            .with_label_values(&[item_type, status])
970            .inc();
971        self.marketplace_search_duration_seconds
972            .with_label_values(&[item_type])
973            .observe(duration_seconds);
974    }
975
976    /// Record a marketplace error
977    pub fn record_marketplace_error(&self, item_type: &str, error_code: &str) {
978        self.marketplace_errors_total
979            .with_label_values(&[item_type, error_code])
980            .inc();
981    }
982
983    /// Update the total number of marketplace items
984    pub fn update_marketplace_items_total(&self, item_type: &str, count: i64) {
985        self.marketplace_items_total
986            .with_label_values(&[item_type])
987            .set(count);
988    }
989}
990
991/// Normalize path to avoid high cardinality
992///
993/// This function replaces dynamic path segments (IDs, UUIDs, etc.) with placeholders
994/// to prevent metric explosion.
995fn normalize_path(path: &str) -> String {
996    let mut segments: Vec<&str> = path.split('/').collect();
997
998    for segment in &mut segments {
999        // Replace UUIDs, numeric IDs, or hex strings with :id placeholder
1000        if is_uuid(segment)
1001            || segment.parse::<i64>().is_ok()
1002            || (segment.len() > 8 && segment.chars().all(|c| c.is_ascii_hexdigit()))
1003        {
1004            *segment = ":id";
1005        }
1006    }
1007
1008    segments.join("/")
1009}
1010
1011/// Check if a string is a UUID
1012fn is_uuid(s: &str) -> bool {
1013    s.len() == 36 && s.chars().filter(|&c| c == '-').count() == 4
1014}
1015
1016impl Default for MetricsRegistry {
1017    fn default() -> Self {
1018        Self::new()
1019    }
1020}
1021
1022/// Global metrics registry instance
1023static GLOBAL_REGISTRY: Lazy<MetricsRegistry> = Lazy::new(MetricsRegistry::new);
1024
1025/// Get the global metrics registry
1026pub fn get_global_registry() -> &'static MetricsRegistry {
1027    &GLOBAL_REGISTRY
1028}
1029
1030#[cfg(test)]
1031mod tests {
1032    use super::*;
1033
1034    #[test]
1035    fn test_metrics_registry_creation() {
1036        let registry = MetricsRegistry::new();
1037        assert!(registry.is_initialized());
1038    }
1039
1040    #[test]
1041    fn test_record_http_request() {
1042        let registry = MetricsRegistry::new();
1043        registry.record_http_request("GET", 200, 0.045);
1044        registry.record_http_request("POST", 201, 0.123);
1045
1046        // Verify metrics were recorded (they should not panic)
1047        assert!(registry.is_initialized());
1048    }
1049
1050    #[test]
1051    fn test_global_registry() {
1052        let registry = get_global_registry();
1053        assert!(registry.is_initialized());
1054    }
1055
1056    #[test]
1057    fn test_plugin_metrics() {
1058        let registry = MetricsRegistry::new();
1059        registry.record_plugin_execution("test-plugin", true, 0.025);
1060        registry.record_plugin_execution("test-plugin", false, 0.050);
1061        assert!(registry.is_initialized());
1062    }
1063
1064    #[test]
1065    fn test_websocket_metrics() {
1066        let registry = MetricsRegistry::new();
1067        registry.record_ws_message_sent();
1068        registry.record_ws_message_received();
1069        registry.record_ws_connection_established();
1070        registry.record_ws_connection_closed(120.5, "normal");
1071        registry.record_ws_error();
1072        assert!(registry.is_initialized());
1073    }
1074
1075    #[test]
1076    fn test_path_normalization() {
1077        assert_eq!(normalize_path("/api/users/123"), "/api/users/:id");
1078        assert_eq!(
1079            normalize_path("/api/users/550e8400-e29b-41d4-a716-446655440000"),
1080            "/api/users/:id"
1081        );
1082        assert_eq!(normalize_path("/api/users/abc123def456"), "/api/users/:id");
1083        assert_eq!(normalize_path("/api/users/list"), "/api/users/list");
1084    }
1085
1086    #[test]
1087    fn test_path_based_metrics() {
1088        let registry = MetricsRegistry::new();
1089        registry.record_http_request_with_path("/api/users/123", "GET", 200, 0.045);
1090        registry.record_http_request_with_path("/api/users/456", "GET", 200, 0.055);
1091        registry.record_http_request_with_path("/api/posts", "POST", 201, 0.123);
1092        assert!(registry.is_initialized());
1093    }
1094
1095    #[test]
1096    fn test_smtp_metrics() {
1097        let registry = MetricsRegistry::new();
1098        registry.record_smtp_connection_established();
1099        registry.record_smtp_message_received();
1100        registry.record_smtp_message_stored();
1101        registry.record_smtp_connection_closed();
1102        registry.record_smtp_error("timeout");
1103        assert!(registry.is_initialized());
1104    }
1105
1106    #[test]
1107    fn test_system_metrics() {
1108        let registry = MetricsRegistry::new();
1109        registry.update_memory_usage(1024.0 * 1024.0 * 100.0); // 100 MB
1110        registry.update_cpu_usage(45.5);
1111        registry.update_thread_count(25.0);
1112        registry.update_uptime(3600.0); // 1 hour
1113        assert!(registry.is_initialized());
1114    }
1115
1116    #[test]
1117    fn test_workspace_metrics() {
1118        let registry = MetricsRegistry::new();
1119
1120        // Record workspace requests
1121        registry.record_workspace_request("workspace1", "GET", 200, 0.045);
1122        registry.record_workspace_request("workspace1", "POST", 201, 0.123);
1123        registry.record_workspace_request("workspace2", "GET", 200, 0.055);
1124
1125        // Update active routes
1126        registry.update_workspace_active_routes("workspace1", 10);
1127        registry.update_workspace_active_routes("workspace2", 5);
1128
1129        // Record errors
1130        registry.record_workspace_error("workspace1", "validation");
1131        registry.record_workspace_error("workspace2", "timeout");
1132
1133        // Test increment/decrement
1134        registry.increment_workspace_routes("workspace1");
1135        registry.decrement_workspace_routes("workspace1");
1136
1137        assert!(registry.is_initialized());
1138    }
1139
1140    #[test]
1141    fn test_workspace_metrics_isolation() {
1142        let registry = MetricsRegistry::new();
1143
1144        // Ensure metrics for different workspaces are independent
1145        registry.record_workspace_request("ws1", "GET", 200, 0.1);
1146        registry.record_workspace_request("ws2", "GET", 200, 0.2);
1147
1148        registry.update_workspace_active_routes("ws1", 5);
1149        registry.update_workspace_active_routes("ws2", 10);
1150
1151        // Both should be tracked independently
1152        assert!(registry.is_initialized());
1153    }
1154}