mockforge_observability/prometheus/
metrics.rs

1//! Prometheus metrics definitions and registry
2
3use once_cell::sync::Lazy;
4use prometheus::{
5    Gauge, GaugeVec, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
6    Opts, Registry,
7};
8use std::sync::Arc;
9use tracing::debug;
10
11/// Global metrics registry for MockForge
12#[derive(Clone)]
13pub struct MetricsRegistry {
14    registry: Arc<Registry>,
15
16    // Request metrics by protocol
17    pub requests_total: IntCounterVec,
18    pub requests_duration_seconds: HistogramVec,
19    pub requests_in_flight: IntGaugeVec,
20
21    // Request metrics by path (endpoint-specific)
22    pub requests_by_path_total: IntCounterVec,
23    pub request_duration_by_path_seconds: HistogramVec,
24    pub average_latency_by_path_seconds: GaugeVec,
25
26    // Workspace-specific metrics
27    pub workspace_requests_total: IntCounterVec,
28    pub workspace_requests_duration_seconds: HistogramVec,
29    pub workspace_active_routes: IntGaugeVec,
30    pub workspace_errors_total: IntCounterVec,
31
32    // Error metrics
33    pub errors_total: IntCounterVec,
34    pub error_rate: GaugeVec,
35
36    // Plugin metrics
37    pub plugin_executions_total: IntCounterVec,
38    pub plugin_execution_duration_seconds: HistogramVec,
39    pub plugin_errors_total: IntCounterVec,
40
41    // WebSocket specific metrics
42    pub ws_connections_active: IntGauge,
43    pub ws_connections_total: IntCounter,
44    pub ws_connection_duration_seconds: HistogramVec,
45    pub ws_messages_sent: IntCounter,
46    pub ws_messages_received: IntCounter,
47    pub ws_errors_total: IntCounter,
48
49    // SMTP specific metrics
50    pub smtp_connections_active: IntGauge,
51    pub smtp_connections_total: IntCounter,
52    pub smtp_messages_received_total: IntCounter,
53    pub smtp_messages_stored_total: IntCounter,
54    pub smtp_errors_total: IntCounterVec,
55
56    // MQTT specific metrics
57    pub mqtt_connections_active: IntGauge,
58    pub mqtt_connections_total: IntCounter,
59    pub mqtt_messages_published_total: IntCounter,
60    pub mqtt_messages_received_total: IntCounter,
61    pub mqtt_topics_active: IntGauge,
62    pub mqtt_subscriptions_active: IntGauge,
63    pub mqtt_retained_messages: IntGauge,
64    pub mqtt_errors_total: IntCounterVec,
65
66    // System metrics
67    pub memory_usage_bytes: Gauge,
68    pub cpu_usage_percent: Gauge,
69    pub thread_count: Gauge,
70    pub uptime_seconds: Gauge,
71
72    // Scenario metrics (for Phase 4)
73    pub active_scenario_mode: IntGauge,
74    pub chaos_triggers_total: IntCounter,
75
76    // Business/SLO metrics
77    pub service_availability: GaugeVec,
78    pub slo_compliance: GaugeVec,
79    pub successful_request_rate: GaugeVec,
80    pub p95_latency_slo_compliance: GaugeVec,
81    pub error_budget_remaining: GaugeVec,
82
83    // Marketplace metrics
84    pub marketplace_publish_total: IntCounterVec,
85    pub marketplace_publish_duration_seconds: HistogramVec,
86    pub marketplace_download_total: IntCounterVec,
87    pub marketplace_download_duration_seconds: HistogramVec,
88    pub marketplace_search_total: IntCounterVec,
89    pub marketplace_search_duration_seconds: HistogramVec,
90    pub marketplace_errors_total: IntCounterVec,
91    pub marketplace_items_total: IntGaugeVec,
92}
93
94impl MetricsRegistry {
95    /// Create a new metrics registry with all metrics initialized
96    pub fn new() -> Self {
97        let registry = Registry::new();
98
99        // Request metrics (with pillar label)
100        let requests_total = IntCounterVec::new(
101            Opts::new(
102                "mockforge_requests_total",
103                "Total number of requests by protocol, method, status, and pillar",
104            ),
105            &["protocol", "method", "status", "pillar"],
106        )
107        .expect("Failed to create requests_total metric");
108
109        let requests_duration_seconds = HistogramVec::new(
110            HistogramOpts::new(
111                "mockforge_request_duration_seconds",
112                "Request duration in seconds by protocol, method, and pillar",
113            )
114            .buckets(vec![
115                0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
116            ]),
117            &["protocol", "method", "pillar"],
118        )
119        .expect("Failed to create requests_duration_seconds metric");
120
121        let requests_in_flight = IntGaugeVec::new(
122            Opts::new(
123                "mockforge_requests_in_flight",
124                "Number of requests currently being processed",
125            ),
126            &["protocol"],
127        )
128        .expect("Failed to create requests_in_flight metric");
129
130        // Error metrics (with pillar label)
131        let errors_total = IntCounterVec::new(
132            Opts::new(
133                "mockforge_errors_total",
134                "Total number of errors by protocol, error type, and pillar",
135            ),
136            &["protocol", "error_type", "pillar"],
137        )
138        .expect("Failed to create errors_total metric");
139
140        let error_rate = GaugeVec::new(
141            Opts::new("mockforge_error_rate", "Error rate by protocol (0.0 to 1.0)"),
142            &["protocol"],
143        )
144        .expect("Failed to create error_rate metric");
145
146        // Plugin metrics
147        let plugin_executions_total = IntCounterVec::new(
148            Opts::new("mockforge_plugin_executions_total", "Total number of plugin executions"),
149            &["plugin_name", "status"],
150        )
151        .expect("Failed to create plugin_executions_total metric");
152
153        let plugin_execution_duration_seconds = HistogramVec::new(
154            HistogramOpts::new(
155                "mockforge_plugin_execution_duration_seconds",
156                "Plugin execution duration in seconds",
157            )
158            .buckets(vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0]),
159            &["plugin_name"],
160        )
161        .expect("Failed to create plugin_execution_duration_seconds metric");
162
163        let plugin_errors_total = IntCounterVec::new(
164            Opts::new("mockforge_plugin_errors_total", "Total number of plugin errors"),
165            &["plugin_name", "error_type"],
166        )
167        .expect("Failed to create plugin_errors_total metric");
168
169        // WebSocket metrics
170        // Path-based request metrics
171        let requests_by_path_total = IntCounterVec::new(
172            Opts::new(
173                "mockforge_requests_by_path_total",
174                "Total number of requests by path, method, and status",
175            ),
176            &["path", "method", "status"],
177        )
178        .expect("Failed to create requests_by_path_total metric");
179
180        let request_duration_by_path_seconds = HistogramVec::new(
181            HistogramOpts::new(
182                "mockforge_request_duration_by_path_seconds",
183                "Request duration by path in seconds",
184            )
185            .buckets(vec![
186                0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
187            ]),
188            &["path", "method"],
189        )
190        .expect("Failed to create request_duration_by_path_seconds metric");
191
192        let average_latency_by_path_seconds = GaugeVec::new(
193            Opts::new(
194                "mockforge_average_latency_by_path_seconds",
195                "Average request latency by path in seconds",
196            ),
197            &["path", "method"],
198        )
199        .expect("Failed to create average_latency_by_path_seconds metric");
200
201        // Workspace-specific metrics
202        let workspace_requests_total = IntCounterVec::new(
203            Opts::new(
204                "mockforge_workspace_requests_total",
205                "Total number of requests by workspace, method, and status",
206            ),
207            &["workspace_id", "method", "status"],
208        )
209        .expect("Failed to create workspace_requests_total metric");
210
211        let workspace_requests_duration_seconds = HistogramVec::new(
212            HistogramOpts::new(
213                "mockforge_workspace_request_duration_seconds",
214                "Request duration by workspace in seconds",
215            )
216            .buckets(vec![
217                0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
218            ]),
219            &["workspace_id", "method"],
220        )
221        .expect("Failed to create workspace_requests_duration_seconds metric");
222
223        let workspace_active_routes = IntGaugeVec::new(
224            Opts::new(
225                "mockforge_workspace_active_routes",
226                "Number of active routes in each workspace",
227            ),
228            &["workspace_id"],
229        )
230        .expect("Failed to create workspace_active_routes metric");
231
232        let workspace_errors_total = IntCounterVec::new(
233            Opts::new("mockforge_workspace_errors_total", "Total number of errors by workspace"),
234            &["workspace_id", "error_type"],
235        )
236        .expect("Failed to create workspace_errors_total metric");
237
238        // WebSocket metrics
239        let ws_connections_active = IntGauge::new(
240            "mockforge_ws_connections_active",
241            "Number of active WebSocket connections",
242        )
243        .expect("Failed to create ws_connections_active metric");
244
245        let ws_connections_total = IntCounter::new(
246            "mockforge_ws_connections_total",
247            "Total number of WebSocket connections established",
248        )
249        .expect("Failed to create ws_connections_total metric");
250
251        let ws_connection_duration_seconds = HistogramVec::new(
252            HistogramOpts::new(
253                "mockforge_ws_connection_duration_seconds",
254                "WebSocket connection duration in seconds",
255            )
256            .buckets(vec![1.0, 5.0, 10.0, 30.0, 60.0, 300.0, 600.0, 1800.0, 3600.0]),
257            &["status"],
258        )
259        .expect("Failed to create ws_connection_duration_seconds metric");
260
261        let ws_messages_sent = IntCounter::new(
262            "mockforge_ws_messages_sent_total",
263            "Total number of WebSocket messages sent",
264        )
265        .expect("Failed to create ws_messages_sent metric");
266
267        let ws_messages_received = IntCounter::new(
268            "mockforge_ws_messages_received_total",
269            "Total number of WebSocket messages received",
270        )
271        .expect("Failed to create ws_messages_received metric");
272
273        let ws_errors_total =
274            IntCounter::new("mockforge_ws_errors_total", "Total number of WebSocket errors")
275                .expect("Failed to create ws_errors_total metric");
276
277        // SMTP metrics
278        let smtp_connections_active =
279            IntGauge::new("mockforge_smtp_connections_active", "Number of active SMTP connections")
280                .expect("Failed to create smtp_connections_active metric");
281
282        let smtp_connections_total =
283            IntCounter::new("mockforge_smtp_connections_total", "Total number of SMTP connections")
284                .expect("Failed to create smtp_connections_total metric");
285
286        let smtp_messages_received_total = IntCounter::new(
287            "mockforge_smtp_messages_received_total",
288            "Total number of SMTP messages received",
289        )
290        .expect("Failed to create smtp_messages_received_total metric");
291
292        let smtp_messages_stored_total = IntCounter::new(
293            "mockforge_smtp_messages_stored_total",
294            "Total number of SMTP messages stored in mailbox",
295        )
296        .expect("Failed to create smtp_messages_stored_total metric");
297
298        let smtp_errors_total = IntCounterVec::new(
299            Opts::new("mockforge_smtp_errors_total", "Total number of SMTP errors by type"),
300            &["error_type"],
301        )
302        .expect("Failed to create smtp_errors_total metric");
303
304        // MQTT metrics
305        let mqtt_connections_active = IntGauge::new(
306            "mockforge_mqtt_connections_active",
307            "Number of active MQTT client connections",
308        )
309        .expect("Failed to create mqtt_connections_active metric");
310
311        let mqtt_connections_total = IntCounter::new(
312            "mockforge_mqtt_connections_total",
313            "Total number of MQTT client connections established",
314        )
315        .expect("Failed to create mqtt_connections_total metric");
316
317        let mqtt_messages_published_total = IntCounter::new(
318            "mockforge_mqtt_messages_published_total",
319            "Total number of MQTT messages published",
320        )
321        .expect("Failed to create mqtt_messages_published_total metric");
322
323        let mqtt_messages_received_total = IntCounter::new(
324            "mockforge_mqtt_messages_received_total",
325            "Total number of MQTT messages received",
326        )
327        .expect("Failed to create mqtt_messages_received_total metric");
328
329        let mqtt_topics_active =
330            IntGauge::new("mockforge_mqtt_topics_active", "Number of active MQTT topics")
331                .expect("Failed to create mqtt_topics_active metric");
332
333        let mqtt_subscriptions_active = IntGauge::new(
334            "mockforge_mqtt_subscriptions_active",
335            "Number of active MQTT subscriptions",
336        )
337        .expect("Failed to create mqtt_subscriptions_active metric");
338
339        let mqtt_retained_messages =
340            IntGauge::new("mockforge_mqtt_retained_messages", "Number of retained MQTT messages")
341                .expect("Failed to create mqtt_retained_messages metric");
342
343        let mqtt_errors_total = IntCounterVec::new(
344            Opts::new("mockforge_mqtt_errors_total", "Total number of MQTT errors by type"),
345            &["error_type"],
346        )
347        .expect("Failed to create mqtt_errors_total metric");
348
349        // System metrics
350        let memory_usage_bytes =
351            Gauge::new("mockforge_memory_usage_bytes", "Memory usage in bytes")
352                .expect("Failed to create memory_usage_bytes metric");
353
354        let cpu_usage_percent = Gauge::new("mockforge_cpu_usage_percent", "CPU usage percentage")
355            .expect("Failed to create cpu_usage_percent metric");
356
357        let thread_count = Gauge::new("mockforge_thread_count", "Number of active threads")
358            .expect("Failed to create thread_count metric");
359
360        let uptime_seconds = Gauge::new("mockforge_uptime_seconds", "Server uptime in seconds")
361            .expect("Failed to create uptime_seconds metric");
362
363        // Scenario metrics
364        let active_scenario_mode = IntGauge::new(
365            "mockforge_active_scenario_mode",
366            "Active scenario mode (0=healthy, 1=degraded, 2=error, 3=chaos)",
367        )
368        .expect("Failed to create active_scenario_mode metric");
369
370        let chaos_triggers_total = IntCounter::new(
371            "mockforge_chaos_triggers_total",
372            "Total number of chaos mode triggers",
373        )
374        .expect("Failed to create chaos_triggers_total metric");
375
376        // Business/SLO metrics
377        let service_availability = GaugeVec::new(
378            Opts::new(
379                "mockforge_service_availability",
380                "Service availability percentage (0.0 to 1.0) by protocol",
381            ),
382            &["protocol"],
383        )
384        .expect("Failed to create service_availability metric");
385
386        let slo_compliance = GaugeVec::new(
387            Opts::new(
388                "mockforge_slo_compliance",
389                "SLO compliance percentage (0.0 to 1.0) by protocol and slo_type",
390            ),
391            &["protocol", "slo_type"],
392        )
393        .expect("Failed to create slo_compliance metric");
394
395        let successful_request_rate = GaugeVec::new(
396            Opts::new(
397                "mockforge_successful_request_rate",
398                "Successful request rate (0.0 to 1.0) by protocol",
399            ),
400            &["protocol"],
401        )
402        .expect("Failed to create successful_request_rate metric");
403
404        let p95_latency_slo_compliance = GaugeVec::new(
405            Opts::new(
406                "mockforge_p95_latency_slo_compliance",
407                "P95 latency SLO compliance (1.0 = compliant, 0.0 = non-compliant) by protocol",
408            ),
409            &["protocol"],
410        )
411        .expect("Failed to create p95_latency_slo_compliance metric");
412
413        let error_budget_remaining = GaugeVec::new(
414            Opts::new(
415                "mockforge_error_budget_remaining",
416                "Remaining error budget percentage (0.0 to 1.0) by protocol",
417            ),
418            &["protocol"],
419        )
420        .expect("Failed to create error_budget_remaining metric");
421
422        // Marketplace metrics
423        let marketplace_publish_total = IntCounterVec::new(
424            Opts::new(
425                "mockforge_marketplace_publish_total",
426                "Total number of marketplace items published by type and status",
427            ),
428            &["type", "status"], // type: plugin, template, scenario; status: success, error
429        )
430        .expect("Failed to create marketplace_publish_total metric");
431
432        let marketplace_publish_duration_seconds = HistogramVec::new(
433            HistogramOpts::new(
434                "mockforge_marketplace_publish_duration_seconds",
435                "Marketplace publish operation duration in seconds",
436            )
437            .buckets(vec![0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0]),
438            &["type"], // type: plugin, template, scenario
439        )
440        .expect("Failed to create marketplace_publish_duration_seconds metric");
441
442        let marketplace_download_total = IntCounterVec::new(
443            Opts::new(
444                "mockforge_marketplace_download_total",
445                "Total number of marketplace items downloaded by type and status",
446            ),
447            &["type", "status"], // type: plugin, template, scenario; status: success, error
448        )
449        .expect("Failed to create marketplace_download_total metric");
450
451        let marketplace_download_duration_seconds = HistogramVec::new(
452            HistogramOpts::new(
453                "mockforge_marketplace_download_duration_seconds",
454                "Marketplace download operation duration in seconds",
455            )
456            .buckets(vec![0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0]),
457            &["type"], // type: plugin, template, scenario
458        )
459        .expect("Failed to create marketplace_download_duration_seconds metric");
460
461        let marketplace_search_total = IntCounterVec::new(
462            Opts::new(
463                "mockforge_marketplace_search_total",
464                "Total number of marketplace searches by type and status",
465            ),
466            &["type", "status"], // type: plugin, template, scenario; status: success, error
467        )
468        .expect("Failed to create marketplace_search_total metric");
469
470        let marketplace_search_duration_seconds = HistogramVec::new(
471            HistogramOpts::new(
472                "mockforge_marketplace_search_duration_seconds",
473                "Marketplace search operation duration in seconds",
474            )
475            .buckets(vec![0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.0]),
476            &["type"], // type: plugin, template, scenario
477        )
478        .expect("Failed to create marketplace_search_duration_seconds metric");
479
480        let marketplace_errors_total = IntCounterVec::new(
481            Opts::new(
482                "mockforge_marketplace_errors_total",
483                "Total number of marketplace errors by type and error_code",
484            ),
485            &["type", "error_code"], // type: plugin, template, scenario; error_code: validation_failed, not_found, etc.
486        )
487        .expect("Failed to create marketplace_errors_total metric");
488
489        let marketplace_items_total = IntGaugeVec::new(
490            Opts::new(
491                "mockforge_marketplace_items_total",
492                "Total number of marketplace items by type",
493            ),
494            &["type"], // type: plugin, template, scenario
495        )
496        .expect("Failed to create marketplace_items_total metric");
497
498        // Register all metrics
499        registry
500            .register(Box::new(requests_total.clone()))
501            .expect("Failed to register requests_total");
502        registry
503            .register(Box::new(requests_duration_seconds.clone()))
504            .expect("Failed to register requests_duration_seconds");
505        registry
506            .register(Box::new(requests_in_flight.clone()))
507            .expect("Failed to register requests_in_flight");
508        registry
509            .register(Box::new(requests_by_path_total.clone()))
510            .expect("Failed to register requests_by_path_total");
511        registry
512            .register(Box::new(request_duration_by_path_seconds.clone()))
513            .expect("Failed to register request_duration_by_path_seconds");
514        registry
515            .register(Box::new(average_latency_by_path_seconds.clone()))
516            .expect("Failed to register average_latency_by_path_seconds");
517        registry
518            .register(Box::new(workspace_requests_total.clone()))
519            .expect("Failed to register workspace_requests_total");
520        registry
521            .register(Box::new(workspace_requests_duration_seconds.clone()))
522            .expect("Failed to register workspace_requests_duration_seconds");
523        registry
524            .register(Box::new(workspace_active_routes.clone()))
525            .expect("Failed to register workspace_active_routes");
526        registry
527            .register(Box::new(workspace_errors_total.clone()))
528            .expect("Failed to register workspace_errors_total");
529        registry
530            .register(Box::new(errors_total.clone()))
531            .expect("Failed to register errors_total");
532        registry
533            .register(Box::new(error_rate.clone()))
534            .expect("Failed to register error_rate");
535        registry
536            .register(Box::new(plugin_executions_total.clone()))
537            .expect("Failed to register plugin_executions_total");
538        registry
539            .register(Box::new(plugin_execution_duration_seconds.clone()))
540            .expect("Failed to register plugin_execution_duration_seconds");
541        registry
542            .register(Box::new(plugin_errors_total.clone()))
543            .expect("Failed to register plugin_errors_total");
544        registry
545            .register(Box::new(ws_connections_active.clone()))
546            .expect("Failed to register ws_connections_active");
547        registry
548            .register(Box::new(ws_connections_total.clone()))
549            .expect("Failed to register ws_connections_total");
550        registry
551            .register(Box::new(ws_connection_duration_seconds.clone()))
552            .expect("Failed to register ws_connection_duration_seconds");
553        registry
554            .register(Box::new(ws_messages_sent.clone()))
555            .expect("Failed to register ws_messages_sent");
556        registry
557            .register(Box::new(ws_messages_received.clone()))
558            .expect("Failed to register ws_messages_received");
559        registry
560            .register(Box::new(ws_errors_total.clone()))
561            .expect("Failed to register ws_errors_total");
562        registry
563            .register(Box::new(smtp_connections_active.clone()))
564            .expect("Failed to register smtp_connections_active");
565        registry
566            .register(Box::new(smtp_connections_total.clone()))
567            .expect("Failed to register smtp_connections_total");
568        registry
569            .register(Box::new(smtp_messages_received_total.clone()))
570            .expect("Failed to register smtp_messages_received_total");
571        registry
572            .register(Box::new(smtp_messages_stored_total.clone()))
573            .expect("Failed to register smtp_messages_stored_total");
574        registry
575            .register(Box::new(smtp_errors_total.clone()))
576            .expect("Failed to register smtp_errors_total");
577        registry
578            .register(Box::new(mqtt_connections_active.clone()))
579            .expect("Failed to register mqtt_connections_active");
580        registry
581            .register(Box::new(mqtt_connections_total.clone()))
582            .expect("Failed to register mqtt_connections_total");
583        registry
584            .register(Box::new(mqtt_messages_published_total.clone()))
585            .expect("Failed to register mqtt_messages_published_total");
586        registry
587            .register(Box::new(mqtt_messages_received_total.clone()))
588            .expect("Failed to register mqtt_messages_received_total");
589        registry
590            .register(Box::new(mqtt_topics_active.clone()))
591            .expect("Failed to register mqtt_topics_active");
592        registry
593            .register(Box::new(mqtt_subscriptions_active.clone()))
594            .expect("Failed to register mqtt_subscriptions_active");
595        registry
596            .register(Box::new(mqtt_retained_messages.clone()))
597            .expect("Failed to register mqtt_retained_messages");
598        registry
599            .register(Box::new(mqtt_errors_total.clone()))
600            .expect("Failed to register mqtt_errors_total");
601        registry
602            .register(Box::new(memory_usage_bytes.clone()))
603            .expect("Failed to register memory_usage_bytes");
604        registry
605            .register(Box::new(cpu_usage_percent.clone()))
606            .expect("Failed to register cpu_usage_percent");
607        registry
608            .register(Box::new(thread_count.clone()))
609            .expect("Failed to register thread_count");
610        registry
611            .register(Box::new(uptime_seconds.clone()))
612            .expect("Failed to register uptime_seconds");
613        registry
614            .register(Box::new(active_scenario_mode.clone()))
615            .expect("Failed to register active_scenario_mode");
616        registry
617            .register(Box::new(chaos_triggers_total.clone()))
618            .expect("Failed to register chaos_triggers_total");
619        registry
620            .register(Box::new(service_availability.clone()))
621            .expect("Failed to register service_availability");
622        registry
623            .register(Box::new(slo_compliance.clone()))
624            .expect("Failed to register slo_compliance");
625        registry
626            .register(Box::new(successful_request_rate.clone()))
627            .expect("Failed to register successful_request_rate");
628        registry
629            .register(Box::new(p95_latency_slo_compliance.clone()))
630            .expect("Failed to register p95_latency_slo_compliance");
631        registry
632            .register(Box::new(error_budget_remaining.clone()))
633            .expect("Failed to register error_budget_remaining");
634        registry
635            .register(Box::new(marketplace_publish_total.clone()))
636            .expect("Failed to register marketplace_publish_total");
637        registry
638            .register(Box::new(marketplace_publish_duration_seconds.clone()))
639            .expect("Failed to register marketplace_publish_duration_seconds");
640        registry
641            .register(Box::new(marketplace_download_total.clone()))
642            .expect("Failed to register marketplace_download_total");
643        registry
644            .register(Box::new(marketplace_download_duration_seconds.clone()))
645            .expect("Failed to register marketplace_download_duration_seconds");
646        registry
647            .register(Box::new(marketplace_search_total.clone()))
648            .expect("Failed to register marketplace_search_total");
649        registry
650            .register(Box::new(marketplace_search_duration_seconds.clone()))
651            .expect("Failed to register marketplace_search_duration_seconds");
652        registry
653            .register(Box::new(marketplace_errors_total.clone()))
654            .expect("Failed to register marketplace_errors_total");
655        registry
656            .register(Box::new(marketplace_items_total.clone()))
657            .expect("Failed to register marketplace_items_total");
658
659        debug!("Initialized Prometheus metrics registry");
660
661        Self {
662            registry: Arc::new(registry),
663            requests_total,
664            requests_duration_seconds,
665            requests_in_flight,
666            requests_by_path_total,
667            request_duration_by_path_seconds,
668            average_latency_by_path_seconds,
669            workspace_requests_total,
670            workspace_requests_duration_seconds,
671            workspace_active_routes,
672            workspace_errors_total,
673            errors_total,
674            error_rate,
675            plugin_executions_total,
676            plugin_execution_duration_seconds,
677            plugin_errors_total,
678            ws_connections_active,
679            ws_connections_total,
680            ws_connection_duration_seconds,
681            ws_messages_sent,
682            ws_messages_received,
683            ws_errors_total,
684            smtp_connections_active,
685            smtp_connections_total,
686            smtp_messages_received_total,
687            smtp_messages_stored_total,
688            smtp_errors_total,
689            mqtt_connections_active,
690            mqtt_connections_total,
691            mqtt_messages_published_total,
692            mqtt_messages_received_total,
693            mqtt_topics_active,
694            mqtt_subscriptions_active,
695            mqtt_retained_messages,
696            mqtt_errors_total,
697            memory_usage_bytes,
698            cpu_usage_percent,
699            thread_count,
700            uptime_seconds,
701            active_scenario_mode,
702            chaos_triggers_total,
703            service_availability,
704            slo_compliance,
705            successful_request_rate,
706            p95_latency_slo_compliance,
707            error_budget_remaining,
708            marketplace_publish_total,
709            marketplace_publish_duration_seconds,
710            marketplace_download_total,
711            marketplace_download_duration_seconds,
712            marketplace_search_total,
713            marketplace_search_duration_seconds,
714            marketplace_errors_total,
715            marketplace_items_total,
716        }
717    }
718
719    /// Get the underlying Prometheus registry
720    pub fn registry(&self) -> &Registry {
721        &self.registry
722    }
723
724    /// Check if the registry is initialized
725    pub fn is_initialized(&self) -> bool {
726        true
727    }
728
729    /// Record an HTTP request
730    pub fn record_http_request(&self, method: &str, status: u16, duration_seconds: f64) {
731        self.record_http_request_with_pillar(method, status, duration_seconds, "");
732    }
733
734    /// Record an HTTP request with pillar information
735    pub fn record_http_request_with_pillar(
736        &self,
737        method: &str,
738        status: u16,
739        duration_seconds: f64,
740        pillar: &str,
741    ) {
742        let status_str = status.to_string();
743        let pillar_label = if pillar.is_empty() { "unknown" } else { pillar };
744        self.requests_total
745            .with_label_values(&["http", method, &status_str, pillar_label])
746            .inc();
747        self.requests_duration_seconds
748            .with_label_values(&["http", method, pillar_label])
749            .observe(duration_seconds);
750    }
751
752    /// Record a gRPC request
753    pub fn record_grpc_request(&self, method: &str, status: &str, duration_seconds: f64) {
754        self.record_grpc_request_with_pillar(method, status, duration_seconds, "");
755    }
756
757    /// Record a gRPC request with pillar information
758    pub fn record_grpc_request_with_pillar(
759        &self,
760        method: &str,
761        status: &str,
762        duration_seconds: f64,
763        pillar: &str,
764    ) {
765        let pillar_label = if pillar.is_empty() { "unknown" } else { pillar };
766        self.requests_total
767            .with_label_values(&["grpc", method, status, pillar_label])
768            .inc();
769        self.requests_duration_seconds
770            .with_label_values(&["grpc", method, pillar_label])
771            .observe(duration_seconds);
772    }
773
774    /// Record a WebSocket message
775    pub fn record_ws_message_sent(&self) {
776        self.ws_messages_sent.inc();
777    }
778
779    /// Record a WebSocket message received
780    pub fn record_ws_message_received(&self) {
781        self.ws_messages_received.inc();
782    }
783
784    /// Record a GraphQL request
785    pub fn record_graphql_request(&self, operation: &str, status: u16, duration_seconds: f64) {
786        let status_str = status.to_string();
787        self.requests_total
788            .with_label_values(&["graphql", operation, &status_str])
789            .inc();
790        self.requests_duration_seconds
791            .with_label_values(&["graphql", operation])
792            .observe(duration_seconds);
793    }
794
795    /// Record a plugin execution
796    pub fn record_plugin_execution(&self, plugin_name: &str, success: bool, duration_seconds: f64) {
797        let status = if success { "success" } else { "failure" };
798        self.plugin_executions_total.with_label_values(&[plugin_name, status]).inc();
799        self.plugin_execution_duration_seconds
800            .with_label_values(&[plugin_name])
801            .observe(duration_seconds);
802    }
803
804    /// Increment in-flight requests
805    pub fn increment_in_flight(&self, protocol: &str) {
806        self.requests_in_flight.with_label_values(&[protocol]).inc();
807    }
808
809    /// Decrement in-flight requests
810    pub fn decrement_in_flight(&self, protocol: &str) {
811        self.requests_in_flight.with_label_values(&[protocol]).dec();
812    }
813
814    /// Record an error
815    pub fn record_error(&self, protocol: &str, error_type: &str) {
816        self.record_error_with_pillar(protocol, error_type, "");
817    }
818
819    /// Record an error with pillar information
820    pub fn record_error_with_pillar(&self, protocol: &str, error_type: &str, pillar: &str) {
821        let pillar_label = if pillar.is_empty() { "unknown" } else { pillar };
822        self.errors_total.with_label_values(&[protocol, error_type, pillar_label]).inc();
823    }
824
825    /// Update memory usage
826    pub fn update_memory_usage(&self, bytes: f64) {
827        self.memory_usage_bytes.set(bytes);
828    }
829
830    /// Update CPU usage
831    pub fn update_cpu_usage(&self, percent: f64) {
832        self.cpu_usage_percent.set(percent);
833    }
834
835    /// Set active scenario mode (0=healthy, 1=degraded, 2=error, 3=chaos)
836    pub fn set_scenario_mode(&self, mode: i64) {
837        self.active_scenario_mode.set(mode);
838    }
839
840    /// Record a chaos trigger
841    pub fn record_chaos_trigger(&self) {
842        self.chaos_triggers_total.inc();
843    }
844
845    /// Record an HTTP request with path information
846    pub fn record_http_request_with_path(
847        &self,
848        path: &str,
849        method: &str,
850        status: u16,
851        duration_seconds: f64,
852    ) {
853        self.record_http_request_with_path_and_pillar(path, method, status, duration_seconds, "");
854    }
855
856    /// Record an HTTP request with path and pillar information
857    pub fn record_http_request_with_path_and_pillar(
858        &self,
859        path: &str,
860        method: &str,
861        status: u16,
862        duration_seconds: f64,
863        pillar: &str,
864    ) {
865        // Normalize path to avoid cardinality explosion
866        let normalized_path = normalize_path(path);
867        let status_str = status.to_string();
868
869        // Record by path
870        self.requests_by_path_total
871            .with_label_values(&[normalized_path.as_str(), method, status_str.as_str()])
872            .inc();
873        self.request_duration_by_path_seconds
874            .with_label_values(&[normalized_path.as_str(), method])
875            .observe(duration_seconds);
876
877        // Update average latency (simple moving average approximation)
878        // Note: For production use, consider using a proper moving average or quantiles
879        let current = self
880            .average_latency_by_path_seconds
881            .with_label_values(&[normalized_path.as_str(), method])
882            .get();
883        let new_avg = if current == 0.0 {
884            duration_seconds
885        } else {
886            (current * 0.95) + (duration_seconds * 0.05)
887        };
888        self.average_latency_by_path_seconds
889            .with_label_values(&[normalized_path.as_str(), method])
890            .set(new_avg);
891
892        // Also record in the general metrics with pillar
893        self.record_http_request_with_pillar(method, status, duration_seconds, pillar);
894    }
895
896    /// Record a WebSocket connection established
897    pub fn record_ws_connection_established(&self) {
898        self.ws_connections_total.inc();
899        self.ws_connections_active.inc();
900    }
901
902    /// Record a WebSocket connection closed
903    pub fn record_ws_connection_closed(&self, duration_seconds: f64, status: &str) {
904        self.ws_connections_active.dec();
905        self.ws_connection_duration_seconds
906            .with_label_values(&[status])
907            .observe(duration_seconds);
908    }
909
910    /// Record a WebSocket error
911    pub fn record_ws_error(&self) {
912        self.ws_errors_total.inc();
913    }
914
915    /// Record an SMTP connection established
916    pub fn record_smtp_connection_established(&self) {
917        self.smtp_connections_total.inc();
918        self.smtp_connections_active.inc();
919    }
920
921    /// Record an SMTP connection closed
922    pub fn record_smtp_connection_closed(&self) {
923        self.smtp_connections_active.dec();
924    }
925
926    /// Record an SMTP message received
927    pub fn record_smtp_message_received(&self) {
928        self.smtp_messages_received_total.inc();
929    }
930
931    /// Record an SMTP message stored
932    pub fn record_smtp_message_stored(&self) {
933        self.smtp_messages_stored_total.inc();
934    }
935
936    /// Record an SMTP error
937    pub fn record_smtp_error(&self, error_type: &str) {
938        self.smtp_errors_total.with_label_values(&[error_type]).inc();
939    }
940
941    /// Update thread count
942    pub fn update_thread_count(&self, count: f64) {
943        self.thread_count.set(count);
944    }
945
946    /// Update uptime
947    pub fn update_uptime(&self, seconds: f64) {
948        self.uptime_seconds.set(seconds);
949    }
950
951    // ==================== Workspace-specific metrics ====================
952
953    /// Record a workspace request
954    pub fn record_workspace_request(
955        &self,
956        workspace_id: &str,
957        method: &str,
958        status: u16,
959        duration_seconds: f64,
960    ) {
961        let status_str = status.to_string();
962        self.workspace_requests_total
963            .with_label_values(&[workspace_id, method, &status_str])
964            .inc();
965        self.workspace_requests_duration_seconds
966            .with_label_values(&[workspace_id, method])
967            .observe(duration_seconds);
968    }
969
970    /// Update workspace active routes count
971    pub fn update_workspace_active_routes(&self, workspace_id: &str, count: i64) {
972        self.workspace_active_routes.with_label_values(&[workspace_id]).set(count);
973    }
974
975    /// Record a workspace error
976    pub fn record_workspace_error(&self, workspace_id: &str, error_type: &str) {
977        self.workspace_errors_total.with_label_values(&[workspace_id, error_type]).inc();
978    }
979
980    /// Increment workspace active routes
981    pub fn increment_workspace_routes(&self, workspace_id: &str) {
982        self.workspace_active_routes.with_label_values(&[workspace_id]).inc();
983    }
984
985    /// Decrement workspace active routes
986    pub fn decrement_workspace_routes(&self, workspace_id: &str) {
987        self.workspace_active_routes.with_label_values(&[workspace_id]).dec();
988    }
989
990    // ==================== Marketplace metrics ====================
991
992    /// Record a marketplace publish operation
993    pub fn record_marketplace_publish(
994        &self,
995        item_type: &str,
996        success: bool,
997        duration_seconds: f64,
998    ) {
999        let status = if success { "success" } else { "error" };
1000        self.marketplace_publish_total.with_label_values(&[item_type, status]).inc();
1001        self.marketplace_publish_duration_seconds
1002            .with_label_values(&[item_type])
1003            .observe(duration_seconds);
1004    }
1005
1006    /// Record a marketplace download operation
1007    pub fn record_marketplace_download(
1008        &self,
1009        item_type: &str,
1010        success: bool,
1011        duration_seconds: f64,
1012    ) {
1013        let status = if success { "success" } else { "error" };
1014        self.marketplace_download_total.with_label_values(&[item_type, status]).inc();
1015        self.marketplace_download_duration_seconds
1016            .with_label_values(&[item_type])
1017            .observe(duration_seconds);
1018    }
1019
1020    /// Record a marketplace search operation
1021    pub fn record_marketplace_search(&self, item_type: &str, success: bool, duration_seconds: f64) {
1022        let status = if success { "success" } else { "error" };
1023        self.marketplace_search_total.with_label_values(&[item_type, status]).inc();
1024        self.marketplace_search_duration_seconds
1025            .with_label_values(&[item_type])
1026            .observe(duration_seconds);
1027    }
1028
1029    /// Record a marketplace error
1030    pub fn record_marketplace_error(&self, item_type: &str, error_code: &str) {
1031        self.marketplace_errors_total.with_label_values(&[item_type, error_code]).inc();
1032    }
1033
1034    /// Update the total number of marketplace items
1035    pub fn update_marketplace_items_total(&self, item_type: &str, count: i64) {
1036        self.marketplace_items_total.with_label_values(&[item_type]).set(count);
1037    }
1038}
1039
1040/// Normalize path to avoid high cardinality
1041///
1042/// This function replaces dynamic path segments (IDs, UUIDs, etc.) with placeholders
1043/// to prevent metric explosion.
1044fn normalize_path(path: &str) -> String {
1045    let mut segments: Vec<&str> = path.split('/').collect();
1046
1047    for segment in &mut segments {
1048        // Replace UUIDs, numeric IDs, or hex strings with :id placeholder
1049        if is_uuid(segment)
1050            || segment.parse::<i64>().is_ok()
1051            || (segment.len() > 8 && segment.chars().all(|c| c.is_ascii_hexdigit()))
1052        {
1053            *segment = ":id";
1054        }
1055    }
1056
1057    segments.join("/")
1058}
1059
1060/// Check if a string is a UUID
1061fn is_uuid(s: &str) -> bool {
1062    s.len() == 36 && s.chars().filter(|&c| c == '-').count() == 4
1063}
1064
1065impl Default for MetricsRegistry {
1066    fn default() -> Self {
1067        Self::new()
1068    }
1069}
1070
1071/// Global metrics registry instance
1072static GLOBAL_REGISTRY: Lazy<MetricsRegistry> = Lazy::new(MetricsRegistry::new);
1073
1074/// Get the global metrics registry
1075pub fn get_global_registry() -> &'static MetricsRegistry {
1076    &GLOBAL_REGISTRY
1077}
1078
1079#[cfg(test)]
1080mod tests {
1081    use super::*;
1082
1083    #[test]
1084    fn test_metrics_registry_creation() {
1085        let registry = MetricsRegistry::new();
1086        assert!(registry.is_initialized());
1087    }
1088
1089    #[test]
1090    fn test_record_http_request() {
1091        let registry = MetricsRegistry::new();
1092        registry.record_http_request("GET", 200, 0.045);
1093        registry.record_http_request("POST", 201, 0.123);
1094
1095        // Verify metrics were recorded (they should not panic)
1096        assert!(registry.is_initialized());
1097    }
1098
1099    #[test]
1100    fn test_record_http_request_with_pillar() {
1101        let registry = MetricsRegistry::new();
1102        registry.record_http_request_with_pillar("GET", 200, 0.045, "reality");
1103        registry.record_http_request_with_pillar("POST", 201, 0.123, "contracts");
1104
1105        // Verify metrics were recorded (they should not panic)
1106        assert!(registry.is_initialized());
1107    }
1108
1109    #[test]
1110    fn test_global_registry() {
1111        let registry = get_global_registry();
1112        assert!(registry.is_initialized());
1113    }
1114
1115    #[test]
1116    fn test_plugin_metrics() {
1117        let registry = MetricsRegistry::new();
1118        registry.record_plugin_execution("test-plugin", true, 0.025);
1119        registry.record_plugin_execution("test-plugin", false, 0.050);
1120        assert!(registry.is_initialized());
1121    }
1122
1123    #[test]
1124    fn test_websocket_metrics() {
1125        let registry = MetricsRegistry::new();
1126        registry.record_ws_message_sent();
1127        registry.record_ws_message_received();
1128        registry.record_ws_connection_established();
1129        registry.record_ws_connection_closed(120.5, "normal");
1130        registry.record_ws_error();
1131        assert!(registry.is_initialized());
1132    }
1133
1134    #[test]
1135    fn test_path_normalization() {
1136        assert_eq!(normalize_path("/api/users/123"), "/api/users/:id");
1137        assert_eq!(
1138            normalize_path("/api/users/550e8400-e29b-41d4-a716-446655440000"),
1139            "/api/users/:id"
1140        );
1141        assert_eq!(normalize_path("/api/users/abc123def456"), "/api/users/:id");
1142        assert_eq!(normalize_path("/api/users/list"), "/api/users/list");
1143    }
1144
1145    #[test]
1146    fn test_path_based_metrics() {
1147        let registry = MetricsRegistry::new();
1148        registry.record_http_request_with_path("/api/users/123", "GET", 200, 0.045);
1149        registry.record_http_request_with_path("/api/users/456", "GET", 200, 0.055);
1150        registry.record_http_request_with_path("/api/posts", "POST", 201, 0.123);
1151        assert!(registry.is_initialized());
1152    }
1153
1154    #[test]
1155    fn test_smtp_metrics() {
1156        let registry = MetricsRegistry::new();
1157        registry.record_smtp_connection_established();
1158        registry.record_smtp_message_received();
1159        registry.record_smtp_message_stored();
1160        registry.record_smtp_connection_closed();
1161        registry.record_smtp_error("timeout");
1162        assert!(registry.is_initialized());
1163    }
1164
1165    #[test]
1166    fn test_system_metrics() {
1167        let registry = MetricsRegistry::new();
1168        registry.update_memory_usage(1024.0 * 1024.0 * 100.0); // 100 MB
1169        registry.update_cpu_usage(45.5);
1170        registry.update_thread_count(25.0);
1171        registry.update_uptime(3600.0); // 1 hour
1172        assert!(registry.is_initialized());
1173    }
1174
1175    #[test]
1176    fn test_workspace_metrics() {
1177        let registry = MetricsRegistry::new();
1178
1179        // Record workspace requests
1180        registry.record_workspace_request("workspace1", "GET", 200, 0.045);
1181        registry.record_workspace_request("workspace1", "POST", 201, 0.123);
1182        registry.record_workspace_request("workspace2", "GET", 200, 0.055);
1183
1184        // Update active routes
1185        registry.update_workspace_active_routes("workspace1", 10);
1186        registry.update_workspace_active_routes("workspace2", 5);
1187
1188        // Record errors
1189        registry.record_workspace_error("workspace1", "validation");
1190        registry.record_workspace_error("workspace2", "timeout");
1191
1192        // Test increment/decrement
1193        registry.increment_workspace_routes("workspace1");
1194        registry.decrement_workspace_routes("workspace1");
1195
1196        assert!(registry.is_initialized());
1197    }
1198
1199    #[test]
1200    fn test_workspace_metrics_isolation() {
1201        let registry = MetricsRegistry::new();
1202
1203        // Ensure metrics for different workspaces are independent
1204        registry.record_workspace_request("ws1", "GET", 200, 0.1);
1205        registry.record_workspace_request("ws2", "GET", 200, 0.2);
1206
1207        registry.update_workspace_active_routes("ws1", 5);
1208        registry.update_workspace_active_routes("ws2", 10);
1209
1210        // Both should be tracked independently
1211        assert!(registry.is_initialized());
1212    }
1213}