mockforge_observability/prometheus/
metrics.rs

1//! Prometheus metrics definitions and registry
2
3use once_cell::sync::Lazy;
4use prometheus::{
5    Gauge, GaugeVec, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
6    Opts, Registry,
7};
8use std::sync::Arc;
9use tracing::debug;
10
11/// Global metrics registry for MockForge
12#[derive(Clone)]
13pub struct MetricsRegistry {
14    registry: Arc<Registry>,
15
16    // Request metrics by protocol
17    pub requests_total: IntCounterVec,
18    pub requests_duration_seconds: HistogramVec,
19    pub requests_in_flight: IntGaugeVec,
20
21    // Request metrics by path (endpoint-specific)
22    pub requests_by_path_total: IntCounterVec,
23    pub request_duration_by_path_seconds: HistogramVec,
24    pub average_latency_by_path_seconds: GaugeVec,
25
26    // Workspace-specific metrics
27    pub workspace_requests_total: IntCounterVec,
28    pub workspace_requests_duration_seconds: HistogramVec,
29    pub workspace_active_routes: IntGaugeVec,
30    pub workspace_errors_total: IntCounterVec,
31
32    // Error metrics
33    pub errors_total: IntCounterVec,
34    pub error_rate: GaugeVec,
35
36    // Plugin metrics
37    pub plugin_executions_total: IntCounterVec,
38    pub plugin_execution_duration_seconds: HistogramVec,
39    pub plugin_errors_total: IntCounterVec,
40
41    // WebSocket specific metrics
42    pub ws_connections_active: IntGauge,
43    pub ws_connections_total: IntCounter,
44    pub ws_connection_duration_seconds: HistogramVec,
45    pub ws_messages_sent: IntCounter,
46    pub ws_messages_received: IntCounter,
47    pub ws_errors_total: IntCounter,
48
49    // SMTP specific metrics
50    pub smtp_connections_active: IntGauge,
51    pub smtp_connections_total: IntCounter,
52    pub smtp_messages_received_total: IntCounter,
53    pub smtp_messages_stored_total: IntCounter,
54    pub smtp_errors_total: IntCounterVec,
55
56    // MQTT specific metrics
57    pub mqtt_connections_active: IntGauge,
58    pub mqtt_connections_total: IntCounter,
59    pub mqtt_messages_published_total: IntCounter,
60    pub mqtt_messages_received_total: IntCounter,
61    pub mqtt_topics_active: IntGauge,
62    pub mqtt_subscriptions_active: IntGauge,
63    pub mqtt_retained_messages: IntGauge,
64    pub mqtt_errors_total: IntCounterVec,
65
66    // System metrics
67    pub memory_usage_bytes: Gauge,
68    pub cpu_usage_percent: Gauge,
69    pub thread_count: Gauge,
70    pub uptime_seconds: Gauge,
71
72    // Scenario metrics (for Phase 4)
73    pub active_scenario_mode: IntGauge,
74    pub chaos_triggers_total: IntCounter,
75
76    // Business/SLO metrics
77    pub service_availability: GaugeVec,
78    pub slo_compliance: GaugeVec,
79    pub successful_request_rate: GaugeVec,
80    pub p95_latency_slo_compliance: GaugeVec,
81    pub error_budget_remaining: GaugeVec,
82
83    // Marketplace metrics
84    pub marketplace_publish_total: IntCounterVec,
85    pub marketplace_publish_duration_seconds: HistogramVec,
86    pub marketplace_download_total: IntCounterVec,
87    pub marketplace_download_duration_seconds: HistogramVec,
88    pub marketplace_search_total: IntCounterVec,
89    pub marketplace_search_duration_seconds: HistogramVec,
90    pub marketplace_errors_total: IntCounterVec,
91    pub marketplace_items_total: IntGaugeVec,
92}
93
94impl MetricsRegistry {
95    /// Create a new metrics registry with all metrics initialized
96    pub fn new() -> Self {
97        let registry = Registry::new();
98
99        // Request metrics (with pillar label)
100        let requests_total = IntCounterVec::new(
101            Opts::new(
102                "mockforge_requests_total",
103                "Total number of requests by protocol, method, status, and pillar",
104            ),
105            &["protocol", "method", "status", "pillar"],
106        )
107        .expect("Failed to create requests_total metric");
108
109        let requests_duration_seconds = HistogramVec::new(
110            HistogramOpts::new(
111                "mockforge_request_duration_seconds",
112                "Request duration in seconds by protocol, method, and pillar",
113            )
114            .buckets(vec![
115                0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
116            ]),
117            &["protocol", "method", "pillar"],
118        )
119        .expect("Failed to create requests_duration_seconds metric");
120
121        let requests_in_flight = IntGaugeVec::new(
122            Opts::new(
123                "mockforge_requests_in_flight",
124                "Number of requests currently being processed",
125            ),
126            &["protocol"],
127        )
128        .expect("Failed to create requests_in_flight metric");
129
130        // Error metrics (with pillar label)
131        let errors_total = IntCounterVec::new(
132            Opts::new(
133                "mockforge_errors_total",
134                "Total number of errors by protocol, error type, and pillar",
135            ),
136            &["protocol", "error_type", "pillar"],
137        )
138        .expect("Failed to create errors_total metric");
139
140        let error_rate = GaugeVec::new(
141            Opts::new("mockforge_error_rate", "Error rate by protocol (0.0 to 1.0)"),
142            &["protocol"],
143        )
144        .expect("Failed to create error_rate metric");
145
146        // Plugin metrics
147        let plugin_executions_total = IntCounterVec::new(
148            Opts::new("mockforge_plugin_executions_total", "Total number of plugin executions"),
149            &["plugin_name", "status"],
150        )
151        .expect("Failed to create plugin_executions_total metric");
152
153        let plugin_execution_duration_seconds = HistogramVec::new(
154            HistogramOpts::new(
155                "mockforge_plugin_execution_duration_seconds",
156                "Plugin execution duration in seconds",
157            )
158            .buckets(vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0]),
159            &["plugin_name"],
160        )
161        .expect("Failed to create plugin_execution_duration_seconds metric");
162
163        let plugin_errors_total = IntCounterVec::new(
164            Opts::new("mockforge_plugin_errors_total", "Total number of plugin errors"),
165            &["plugin_name", "error_type"],
166        )
167        .expect("Failed to create plugin_errors_total metric");
168
169        // WebSocket metrics
170        // Path-based request metrics
171        let requests_by_path_total = IntCounterVec::new(
172            Opts::new(
173                "mockforge_requests_by_path_total",
174                "Total number of requests by path, method, and status",
175            ),
176            &["path", "method", "status"],
177        )
178        .expect("Failed to create requests_by_path_total metric");
179
180        let request_duration_by_path_seconds = HistogramVec::new(
181            HistogramOpts::new(
182                "mockforge_request_duration_by_path_seconds",
183                "Request duration by path in seconds",
184            )
185            .buckets(vec![
186                0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
187            ]),
188            &["path", "method"],
189        )
190        .expect("Failed to create request_duration_by_path_seconds metric");
191
192        let average_latency_by_path_seconds = GaugeVec::new(
193            Opts::new(
194                "mockforge_average_latency_by_path_seconds",
195                "Average request latency by path in seconds",
196            ),
197            &["path", "method"],
198        )
199        .expect("Failed to create average_latency_by_path_seconds metric");
200
201        // Workspace-specific metrics
202        let workspace_requests_total = IntCounterVec::new(
203            Opts::new(
204                "mockforge_workspace_requests_total",
205                "Total number of requests by workspace, method, and status",
206            ),
207            &["workspace_id", "method", "status"],
208        )
209        .expect("Failed to create workspace_requests_total metric");
210
211        let workspace_requests_duration_seconds = HistogramVec::new(
212            HistogramOpts::new(
213                "mockforge_workspace_request_duration_seconds",
214                "Request duration by workspace in seconds",
215            )
216            .buckets(vec![
217                0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
218            ]),
219            &["workspace_id", "method"],
220        )
221        .expect("Failed to create workspace_requests_duration_seconds metric");
222
223        let workspace_active_routes = IntGaugeVec::new(
224            Opts::new(
225                "mockforge_workspace_active_routes",
226                "Number of active routes in each workspace",
227            ),
228            &["workspace_id"],
229        )
230        .expect("Failed to create workspace_active_routes metric");
231
232        let workspace_errors_total = IntCounterVec::new(
233            Opts::new("mockforge_workspace_errors_total", "Total number of errors by workspace"),
234            &["workspace_id", "error_type"],
235        )
236        .expect("Failed to create workspace_errors_total metric");
237
238        // WebSocket metrics
239        let ws_connections_active = IntGauge::new(
240            "mockforge_ws_connections_active",
241            "Number of active WebSocket connections",
242        )
243        .expect("Failed to create ws_connections_active metric");
244
245        let ws_connections_total = IntCounter::new(
246            "mockforge_ws_connections_total",
247            "Total number of WebSocket connections established",
248        )
249        .expect("Failed to create ws_connections_total metric");
250
251        let ws_connection_duration_seconds = HistogramVec::new(
252            HistogramOpts::new(
253                "mockforge_ws_connection_duration_seconds",
254                "WebSocket connection duration in seconds",
255            )
256            .buckets(vec![1.0, 5.0, 10.0, 30.0, 60.0, 300.0, 600.0, 1800.0, 3600.0]),
257            &["status"],
258        )
259        .expect("Failed to create ws_connection_duration_seconds metric");
260
261        let ws_messages_sent = IntCounter::new(
262            "mockforge_ws_messages_sent_total",
263            "Total number of WebSocket messages sent",
264        )
265        .expect("Failed to create ws_messages_sent metric");
266
267        let ws_messages_received = IntCounter::new(
268            "mockforge_ws_messages_received_total",
269            "Total number of WebSocket messages received",
270        )
271        .expect("Failed to create ws_messages_received metric");
272
273        let ws_errors_total =
274            IntCounter::new("mockforge_ws_errors_total", "Total number of WebSocket errors")
275                .expect("Failed to create ws_errors_total metric");
276
277        // SMTP metrics
278        let smtp_connections_active =
279            IntGauge::new("mockforge_smtp_connections_active", "Number of active SMTP connections")
280                .expect("Failed to create smtp_connections_active metric");
281
282        let smtp_connections_total =
283            IntCounter::new("mockforge_smtp_connections_total", "Total number of SMTP connections")
284                .expect("Failed to create smtp_connections_total metric");
285
286        let smtp_messages_received_total = IntCounter::new(
287            "mockforge_smtp_messages_received_total",
288            "Total number of SMTP messages received",
289        )
290        .expect("Failed to create smtp_messages_received_total metric");
291
292        let smtp_messages_stored_total = IntCounter::new(
293            "mockforge_smtp_messages_stored_total",
294            "Total number of SMTP messages stored in mailbox",
295        )
296        .expect("Failed to create smtp_messages_stored_total metric");
297
298        let smtp_errors_total = IntCounterVec::new(
299            Opts::new("mockforge_smtp_errors_total", "Total number of SMTP errors by type"),
300            &["error_type"],
301        )
302        .expect("Failed to create smtp_errors_total metric");
303
304        // MQTT metrics
305        let mqtt_connections_active = IntGauge::new(
306            "mockforge_mqtt_connections_active",
307            "Number of active MQTT client connections",
308        )
309        .expect("Failed to create mqtt_connections_active metric");
310
311        let mqtt_connections_total = IntCounter::new(
312            "mockforge_mqtt_connections_total",
313            "Total number of MQTT client connections established",
314        )
315        .expect("Failed to create mqtt_connections_total metric");
316
317        let mqtt_messages_published_total = IntCounter::new(
318            "mockforge_mqtt_messages_published_total",
319            "Total number of MQTT messages published",
320        )
321        .expect("Failed to create mqtt_messages_published_total metric");
322
323        let mqtt_messages_received_total = IntCounter::new(
324            "mockforge_mqtt_messages_received_total",
325            "Total number of MQTT messages received",
326        )
327        .expect("Failed to create mqtt_messages_received_total metric");
328
329        let mqtt_topics_active =
330            IntGauge::new("mockforge_mqtt_topics_active", "Number of active MQTT topics")
331                .expect("Failed to create mqtt_topics_active metric");
332
333        let mqtt_subscriptions_active = IntGauge::new(
334            "mockforge_mqtt_subscriptions_active",
335            "Number of active MQTT subscriptions",
336        )
337        .expect("Failed to create mqtt_subscriptions_active metric");
338
339        let mqtt_retained_messages =
340            IntGauge::new("mockforge_mqtt_retained_messages", "Number of retained MQTT messages")
341                .expect("Failed to create mqtt_retained_messages metric");
342
343        let mqtt_errors_total = IntCounterVec::new(
344            Opts::new("mockforge_mqtt_errors_total", "Total number of MQTT errors by type"),
345            &["error_type"],
346        )
347        .expect("Failed to create mqtt_errors_total metric");
348
349        // System metrics
350        let memory_usage_bytes =
351            Gauge::new("mockforge_memory_usage_bytes", "Memory usage in bytes")
352                .expect("Failed to create memory_usage_bytes metric");
353
354        let cpu_usage_percent = Gauge::new("mockforge_cpu_usage_percent", "CPU usage percentage")
355            .expect("Failed to create cpu_usage_percent metric");
356
357        let thread_count = Gauge::new("mockforge_thread_count", "Number of active threads")
358            .expect("Failed to create thread_count metric");
359
360        let uptime_seconds = Gauge::new("mockforge_uptime_seconds", "Server uptime in seconds")
361            .expect("Failed to create uptime_seconds metric");
362
363        // Scenario metrics
364        let active_scenario_mode = IntGauge::new(
365            "mockforge_active_scenario_mode",
366            "Active scenario mode (0=healthy, 1=degraded, 2=error, 3=chaos)",
367        )
368        .expect("Failed to create active_scenario_mode metric");
369
370        let chaos_triggers_total = IntCounter::new(
371            "mockforge_chaos_triggers_total",
372            "Total number of chaos mode triggers",
373        )
374        .expect("Failed to create chaos_triggers_total metric");
375
376        // Business/SLO metrics
377        let service_availability = GaugeVec::new(
378            Opts::new(
379                "mockforge_service_availability",
380                "Service availability percentage (0.0 to 1.0) by protocol",
381            ),
382            &["protocol"],
383        )
384        .expect("Failed to create service_availability metric");
385
386        let slo_compliance = GaugeVec::new(
387            Opts::new(
388                "mockforge_slo_compliance",
389                "SLO compliance percentage (0.0 to 1.0) by protocol and slo_type",
390            ),
391            &["protocol", "slo_type"],
392        )
393        .expect("Failed to create slo_compliance metric");
394
395        let successful_request_rate = GaugeVec::new(
396            Opts::new(
397                "mockforge_successful_request_rate",
398                "Successful request rate (0.0 to 1.0) by protocol",
399            ),
400            &["protocol"],
401        )
402        .expect("Failed to create successful_request_rate metric");
403
404        let p95_latency_slo_compliance = GaugeVec::new(
405            Opts::new(
406                "mockforge_p95_latency_slo_compliance",
407                "P95 latency SLO compliance (1.0 = compliant, 0.0 = non-compliant) by protocol",
408            ),
409            &["protocol"],
410        )
411        .expect("Failed to create p95_latency_slo_compliance metric");
412
413        let error_budget_remaining = GaugeVec::new(
414            Opts::new(
415                "mockforge_error_budget_remaining",
416                "Remaining error budget percentage (0.0 to 1.0) by protocol",
417            ),
418            &["protocol"],
419        )
420        .expect("Failed to create error_budget_remaining metric");
421
422        // Marketplace metrics
423        let marketplace_publish_total = IntCounterVec::new(
424            Opts::new(
425                "mockforge_marketplace_publish_total",
426                "Total number of marketplace items published by type and status",
427            ),
428            &["type", "status"], // type: plugin, template, scenario; status: success, error
429        )
430        .expect("Failed to create marketplace_publish_total metric");
431
432        let marketplace_publish_duration_seconds = HistogramVec::new(
433            HistogramOpts::new(
434                "mockforge_marketplace_publish_duration_seconds",
435                "Marketplace publish operation duration in seconds",
436            )
437            .buckets(vec![0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0]),
438            &["type"], // type: plugin, template, scenario
439        )
440        .expect("Failed to create marketplace_publish_duration_seconds metric");
441
442        let marketplace_download_total = IntCounterVec::new(
443            Opts::new(
444                "mockforge_marketplace_download_total",
445                "Total number of marketplace items downloaded by type and status",
446            ),
447            &["type", "status"], // type: plugin, template, scenario; status: success, error
448        )
449        .expect("Failed to create marketplace_download_total metric");
450
451        let marketplace_download_duration_seconds = HistogramVec::new(
452            HistogramOpts::new(
453                "mockforge_marketplace_download_duration_seconds",
454                "Marketplace download operation duration in seconds",
455            )
456            .buckets(vec![0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0]),
457            &["type"], // type: plugin, template, scenario
458        )
459        .expect("Failed to create marketplace_download_duration_seconds metric");
460
461        let marketplace_search_total = IntCounterVec::new(
462            Opts::new(
463                "mockforge_marketplace_search_total",
464                "Total number of marketplace searches by type and status",
465            ),
466            &["type", "status"], // type: plugin, template, scenario; status: success, error
467        )
468        .expect("Failed to create marketplace_search_total metric");
469
470        let marketplace_search_duration_seconds = HistogramVec::new(
471            HistogramOpts::new(
472                "mockforge_marketplace_search_duration_seconds",
473                "Marketplace search operation duration in seconds",
474            )
475            .buckets(vec![0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.0]),
476            &["type"], // type: plugin, template, scenario
477        )
478        .expect("Failed to create marketplace_search_duration_seconds metric");
479
480        let marketplace_errors_total = IntCounterVec::new(
481            Opts::new(
482                "mockforge_marketplace_errors_total",
483                "Total number of marketplace errors by type and error_code",
484            ),
485            &["type", "error_code"], // type: plugin, template, scenario; error_code: validation_failed, not_found, etc.
486        )
487        .expect("Failed to create marketplace_errors_total metric");
488
489        let marketplace_items_total = IntGaugeVec::new(
490            Opts::new(
491                "mockforge_marketplace_items_total",
492                "Total number of marketplace items by type",
493            ),
494            &["type"], // type: plugin, template, scenario
495        )
496        .expect("Failed to create marketplace_items_total metric");
497
498        // Register all metrics
499        registry
500            .register(Box::new(requests_total.clone()))
501            .expect("Failed to register requests_total");
502        registry
503            .register(Box::new(requests_duration_seconds.clone()))
504            .expect("Failed to register requests_duration_seconds");
505        registry
506            .register(Box::new(requests_in_flight.clone()))
507            .expect("Failed to register requests_in_flight");
508        registry
509            .register(Box::new(requests_by_path_total.clone()))
510            .expect("Failed to register requests_by_path_total");
511        registry
512            .register(Box::new(request_duration_by_path_seconds.clone()))
513            .expect("Failed to register request_duration_by_path_seconds");
514        registry
515            .register(Box::new(average_latency_by_path_seconds.clone()))
516            .expect("Failed to register average_latency_by_path_seconds");
517        registry
518            .register(Box::new(workspace_requests_total.clone()))
519            .expect("Failed to register workspace_requests_total");
520        registry
521            .register(Box::new(workspace_requests_duration_seconds.clone()))
522            .expect("Failed to register workspace_requests_duration_seconds");
523        registry
524            .register(Box::new(workspace_active_routes.clone()))
525            .expect("Failed to register workspace_active_routes");
526        registry
527            .register(Box::new(workspace_errors_total.clone()))
528            .expect("Failed to register workspace_errors_total");
529        registry
530            .register(Box::new(errors_total.clone()))
531            .expect("Failed to register errors_total");
532        registry
533            .register(Box::new(error_rate.clone()))
534            .expect("Failed to register error_rate");
535        registry
536            .register(Box::new(plugin_executions_total.clone()))
537            .expect("Failed to register plugin_executions_total");
538        registry
539            .register(Box::new(plugin_execution_duration_seconds.clone()))
540            .expect("Failed to register plugin_execution_duration_seconds");
541        registry
542            .register(Box::new(plugin_errors_total.clone()))
543            .expect("Failed to register plugin_errors_total");
544        registry
545            .register(Box::new(ws_connections_active.clone()))
546            .expect("Failed to register ws_connections_active");
547        registry
548            .register(Box::new(ws_connections_total.clone()))
549            .expect("Failed to register ws_connections_total");
550        registry
551            .register(Box::new(ws_connection_duration_seconds.clone()))
552            .expect("Failed to register ws_connection_duration_seconds");
553        registry
554            .register(Box::new(ws_messages_sent.clone()))
555            .expect("Failed to register ws_messages_sent");
556        registry
557            .register(Box::new(ws_messages_received.clone()))
558            .expect("Failed to register ws_messages_received");
559        registry
560            .register(Box::new(ws_errors_total.clone()))
561            .expect("Failed to register ws_errors_total");
562        registry
563            .register(Box::new(smtp_connections_active.clone()))
564            .expect("Failed to register smtp_connections_active");
565        registry
566            .register(Box::new(smtp_connections_total.clone()))
567            .expect("Failed to register smtp_connections_total");
568        registry
569            .register(Box::new(smtp_messages_received_total.clone()))
570            .expect("Failed to register smtp_messages_received_total");
571        registry
572            .register(Box::new(smtp_messages_stored_total.clone()))
573            .expect("Failed to register smtp_messages_stored_total");
574        registry
575            .register(Box::new(smtp_errors_total.clone()))
576            .expect("Failed to register smtp_errors_total");
577        registry
578            .register(Box::new(mqtt_connections_active.clone()))
579            .expect("Failed to register mqtt_connections_active");
580        registry
581            .register(Box::new(mqtt_connections_total.clone()))
582            .expect("Failed to register mqtt_connections_total");
583        registry
584            .register(Box::new(mqtt_messages_published_total.clone()))
585            .expect("Failed to register mqtt_messages_published_total");
586        registry
587            .register(Box::new(mqtt_messages_received_total.clone()))
588            .expect("Failed to register mqtt_messages_received_total");
589        registry
590            .register(Box::new(mqtt_topics_active.clone()))
591            .expect("Failed to register mqtt_topics_active");
592        registry
593            .register(Box::new(mqtt_subscriptions_active.clone()))
594            .expect("Failed to register mqtt_subscriptions_active");
595        registry
596            .register(Box::new(mqtt_retained_messages.clone()))
597            .expect("Failed to register mqtt_retained_messages");
598        registry
599            .register(Box::new(mqtt_errors_total.clone()))
600            .expect("Failed to register mqtt_errors_total");
601        registry
602            .register(Box::new(memory_usage_bytes.clone()))
603            .expect("Failed to register memory_usage_bytes");
604        registry
605            .register(Box::new(cpu_usage_percent.clone()))
606            .expect("Failed to register cpu_usage_percent");
607        registry
608            .register(Box::new(thread_count.clone()))
609            .expect("Failed to register thread_count");
610        registry
611            .register(Box::new(uptime_seconds.clone()))
612            .expect("Failed to register uptime_seconds");
613        registry
614            .register(Box::new(active_scenario_mode.clone()))
615            .expect("Failed to register active_scenario_mode");
616        registry
617            .register(Box::new(chaos_triggers_total.clone()))
618            .expect("Failed to register chaos_triggers_total");
619        registry
620            .register(Box::new(service_availability.clone()))
621            .expect("Failed to register service_availability");
622        registry
623            .register(Box::new(slo_compliance.clone()))
624            .expect("Failed to register slo_compliance");
625        registry
626            .register(Box::new(successful_request_rate.clone()))
627            .expect("Failed to register successful_request_rate");
628        registry
629            .register(Box::new(p95_latency_slo_compliance.clone()))
630            .expect("Failed to register p95_latency_slo_compliance");
631        registry
632            .register(Box::new(error_budget_remaining.clone()))
633            .expect("Failed to register error_budget_remaining");
634        registry
635            .register(Box::new(marketplace_publish_total.clone()))
636            .expect("Failed to register marketplace_publish_total");
637        registry
638            .register(Box::new(marketplace_publish_duration_seconds.clone()))
639            .expect("Failed to register marketplace_publish_duration_seconds");
640        registry
641            .register(Box::new(marketplace_download_total.clone()))
642            .expect("Failed to register marketplace_download_total");
643        registry
644            .register(Box::new(marketplace_download_duration_seconds.clone()))
645            .expect("Failed to register marketplace_download_duration_seconds");
646        registry
647            .register(Box::new(marketplace_search_total.clone()))
648            .expect("Failed to register marketplace_search_total");
649        registry
650            .register(Box::new(marketplace_search_duration_seconds.clone()))
651            .expect("Failed to register marketplace_search_duration_seconds");
652        registry
653            .register(Box::new(marketplace_errors_total.clone()))
654            .expect("Failed to register marketplace_errors_total");
655        registry
656            .register(Box::new(marketplace_items_total.clone()))
657            .expect("Failed to register marketplace_items_total");
658
659        debug!("Initialized Prometheus metrics registry");
660
661        Self {
662            registry: Arc::new(registry),
663            requests_total,
664            requests_duration_seconds,
665            requests_in_flight,
666            requests_by_path_total,
667            request_duration_by_path_seconds,
668            average_latency_by_path_seconds,
669            workspace_requests_total,
670            workspace_requests_duration_seconds,
671            workspace_active_routes,
672            workspace_errors_total,
673            errors_total,
674            error_rate,
675            plugin_executions_total,
676            plugin_execution_duration_seconds,
677            plugin_errors_total,
678            ws_connections_active,
679            ws_connections_total,
680            ws_connection_duration_seconds,
681            ws_messages_sent,
682            ws_messages_received,
683            ws_errors_total,
684            smtp_connections_active,
685            smtp_connections_total,
686            smtp_messages_received_total,
687            smtp_messages_stored_total,
688            smtp_errors_total,
689            mqtt_connections_active,
690            mqtt_connections_total,
691            mqtt_messages_published_total,
692            mqtt_messages_received_total,
693            mqtt_topics_active,
694            mqtt_subscriptions_active,
695            mqtt_retained_messages,
696            mqtt_errors_total,
697            memory_usage_bytes,
698            cpu_usage_percent,
699            thread_count,
700            uptime_seconds,
701            active_scenario_mode,
702            chaos_triggers_total,
703            service_availability,
704            slo_compliance,
705            successful_request_rate,
706            p95_latency_slo_compliance,
707            error_budget_remaining,
708            marketplace_publish_total,
709            marketplace_publish_duration_seconds,
710            marketplace_download_total,
711            marketplace_download_duration_seconds,
712            marketplace_search_total,
713            marketplace_search_duration_seconds,
714            marketplace_errors_total,
715            marketplace_items_total,
716        }
717    }
718
719    /// Get the underlying Prometheus registry
720    pub fn registry(&self) -> &Registry {
721        &self.registry
722    }
723
724    /// Check if the registry is initialized
725    pub fn is_initialized(&self) -> bool {
726        true
727    }
728
729    /// Record an HTTP request
730    pub fn record_http_request(&self, method: &str, status: u16, duration_seconds: f64) {
731        self.record_http_request_with_pillar(method, status, duration_seconds, "");
732    }
733
734    /// Record an HTTP request with pillar information
735    pub fn record_http_request_with_pillar(
736        &self,
737        method: &str,
738        status: u16,
739        duration_seconds: f64,
740        pillar: &str,
741    ) {
742        let status_str = status.to_string();
743        let pillar_label = if pillar.is_empty() { "unknown" } else { pillar };
744        self.requests_total
745            .with_label_values(&["http", method, &status_str, pillar_label])
746            .inc();
747        self.requests_duration_seconds
748            .with_label_values(&["http", method, pillar_label])
749            .observe(duration_seconds);
750    }
751
752    /// Record a gRPC request
753    pub fn record_grpc_request(&self, method: &str, status: &str, duration_seconds: f64) {
754        self.record_grpc_request_with_pillar(method, status, duration_seconds, "");
755    }
756
757    /// Record a gRPC request with pillar information
758    pub fn record_grpc_request_with_pillar(
759        &self,
760        method: &str,
761        status: &str,
762        duration_seconds: f64,
763        pillar: &str,
764    ) {
765        let pillar_label = if pillar.is_empty() { "unknown" } else { pillar };
766        self.requests_total
767            .with_label_values(&["grpc", method, status, pillar_label])
768            .inc();
769        self.requests_duration_seconds
770            .with_label_values(&["grpc", method, pillar_label])
771            .observe(duration_seconds);
772    }
773
774    /// Record a WebSocket message
775    pub fn record_ws_message_sent(&self) {
776        self.ws_messages_sent.inc();
777    }
778
779    /// Record a WebSocket message received
780    pub fn record_ws_message_received(&self) {
781        self.ws_messages_received.inc();
782    }
783
784    /// Record a GraphQL request
785    pub fn record_graphql_request(&self, operation: &str, status: u16, duration_seconds: f64) {
786        let status_str = status.to_string();
787        // GraphQL requests are categorized under the "contracts" pillar
788        self.requests_total
789            .with_label_values(&["graphql", operation, &status_str, "contracts"])
790            .inc();
791        self.requests_duration_seconds
792            .with_label_values(&["graphql", operation, "contracts"])
793            .observe(duration_seconds);
794    }
795
796    /// Record a plugin execution
797    pub fn record_plugin_execution(&self, plugin_name: &str, success: bool, duration_seconds: f64) {
798        let status = if success { "success" } else { "failure" };
799        self.plugin_executions_total.with_label_values(&[plugin_name, status]).inc();
800        self.plugin_execution_duration_seconds
801            .with_label_values(&[plugin_name])
802            .observe(duration_seconds);
803    }
804
805    /// Increment in-flight requests
806    pub fn increment_in_flight(&self, protocol: &str) {
807        self.requests_in_flight.with_label_values(&[protocol]).inc();
808    }
809
810    /// Decrement in-flight requests
811    pub fn decrement_in_flight(&self, protocol: &str) {
812        self.requests_in_flight.with_label_values(&[protocol]).dec();
813    }
814
815    /// Record an error
816    pub fn record_error(&self, protocol: &str, error_type: &str) {
817        self.record_error_with_pillar(protocol, error_type, "");
818    }
819
820    /// Record an error with pillar information
821    pub fn record_error_with_pillar(&self, protocol: &str, error_type: &str, pillar: &str) {
822        let pillar_label = if pillar.is_empty() { "unknown" } else { pillar };
823        self.errors_total.with_label_values(&[protocol, error_type, pillar_label]).inc();
824    }
825
826    /// Update memory usage
827    pub fn update_memory_usage(&self, bytes: f64) {
828        self.memory_usage_bytes.set(bytes);
829    }
830
831    /// Update CPU usage
832    pub fn update_cpu_usage(&self, percent: f64) {
833        self.cpu_usage_percent.set(percent);
834    }
835
836    /// Set active scenario mode (0=healthy, 1=degraded, 2=error, 3=chaos)
837    pub fn set_scenario_mode(&self, mode: i64) {
838        self.active_scenario_mode.set(mode);
839    }
840
841    /// Record a chaos trigger
842    pub fn record_chaos_trigger(&self) {
843        self.chaos_triggers_total.inc();
844    }
845
846    /// Record an HTTP request with path information
847    pub fn record_http_request_with_path(
848        &self,
849        path: &str,
850        method: &str,
851        status: u16,
852        duration_seconds: f64,
853    ) {
854        self.record_http_request_with_path_and_pillar(path, method, status, duration_seconds, "");
855    }
856
857    /// Record an HTTP request with path and pillar information
858    pub fn record_http_request_with_path_and_pillar(
859        &self,
860        path: &str,
861        method: &str,
862        status: u16,
863        duration_seconds: f64,
864        pillar: &str,
865    ) {
866        // Normalize path to avoid cardinality explosion
867        let normalized_path = normalize_path(path);
868        let status_str = status.to_string();
869
870        // Record by path
871        self.requests_by_path_total
872            .with_label_values(&[normalized_path.as_str(), method, status_str.as_str()])
873            .inc();
874        self.request_duration_by_path_seconds
875            .with_label_values(&[normalized_path.as_str(), method])
876            .observe(duration_seconds);
877
878        // Update average latency (simple moving average approximation)
879        // Note: For production use, consider using a proper moving average or quantiles
880        let current = self
881            .average_latency_by_path_seconds
882            .with_label_values(&[normalized_path.as_str(), method])
883            .get();
884        let new_avg = if current == 0.0 {
885            duration_seconds
886        } else {
887            (current * 0.95) + (duration_seconds * 0.05)
888        };
889        self.average_latency_by_path_seconds
890            .with_label_values(&[normalized_path.as_str(), method])
891            .set(new_avg);
892
893        // Also record in the general metrics with pillar
894        self.record_http_request_with_pillar(method, status, duration_seconds, pillar);
895    }
896
897    /// Record a WebSocket connection established
898    pub fn record_ws_connection_established(&self) {
899        self.ws_connections_total.inc();
900        self.ws_connections_active.inc();
901    }
902
903    /// Record a WebSocket connection closed
904    pub fn record_ws_connection_closed(&self, duration_seconds: f64, status: &str) {
905        self.ws_connections_active.dec();
906        self.ws_connection_duration_seconds
907            .with_label_values(&[status])
908            .observe(duration_seconds);
909    }
910
911    /// Record a WebSocket error
912    pub fn record_ws_error(&self) {
913        self.ws_errors_total.inc();
914    }
915
916    /// Record an SMTP connection established
917    pub fn record_smtp_connection_established(&self) {
918        self.smtp_connections_total.inc();
919        self.smtp_connections_active.inc();
920    }
921
922    /// Record an SMTP connection closed
923    pub fn record_smtp_connection_closed(&self) {
924        self.smtp_connections_active.dec();
925    }
926
927    /// Record an SMTP message received
928    pub fn record_smtp_message_received(&self) {
929        self.smtp_messages_received_total.inc();
930    }
931
932    /// Record an SMTP message stored
933    pub fn record_smtp_message_stored(&self) {
934        self.smtp_messages_stored_total.inc();
935    }
936
937    /// Record an SMTP error
938    pub fn record_smtp_error(&self, error_type: &str) {
939        self.smtp_errors_total.with_label_values(&[error_type]).inc();
940    }
941
942    /// Update thread count
943    pub fn update_thread_count(&self, count: f64) {
944        self.thread_count.set(count);
945    }
946
947    /// Update uptime
948    pub fn update_uptime(&self, seconds: f64) {
949        self.uptime_seconds.set(seconds);
950    }
951
952    // ==================== Workspace-specific metrics ====================
953
954    /// Record a workspace request
955    pub fn record_workspace_request(
956        &self,
957        workspace_id: &str,
958        method: &str,
959        status: u16,
960        duration_seconds: f64,
961    ) {
962        let status_str = status.to_string();
963        self.workspace_requests_total
964            .with_label_values(&[workspace_id, method, &status_str])
965            .inc();
966        self.workspace_requests_duration_seconds
967            .with_label_values(&[workspace_id, method])
968            .observe(duration_seconds);
969    }
970
971    /// Update workspace active routes count
972    pub fn update_workspace_active_routes(&self, workspace_id: &str, count: i64) {
973        self.workspace_active_routes.with_label_values(&[workspace_id]).set(count);
974    }
975
976    /// Record a workspace error
977    pub fn record_workspace_error(&self, workspace_id: &str, error_type: &str) {
978        self.workspace_errors_total.with_label_values(&[workspace_id, error_type]).inc();
979    }
980
981    /// Increment workspace active routes
982    pub fn increment_workspace_routes(&self, workspace_id: &str) {
983        self.workspace_active_routes.with_label_values(&[workspace_id]).inc();
984    }
985
986    /// Decrement workspace active routes
987    pub fn decrement_workspace_routes(&self, workspace_id: &str) {
988        self.workspace_active_routes.with_label_values(&[workspace_id]).dec();
989    }
990
991    // ==================== Marketplace metrics ====================
992
993    /// Record a marketplace publish operation
994    pub fn record_marketplace_publish(
995        &self,
996        item_type: &str,
997        success: bool,
998        duration_seconds: f64,
999    ) {
1000        let status = if success { "success" } else { "error" };
1001        self.marketplace_publish_total.with_label_values(&[item_type, status]).inc();
1002        self.marketplace_publish_duration_seconds
1003            .with_label_values(&[item_type])
1004            .observe(duration_seconds);
1005    }
1006
1007    /// Record a marketplace download operation
1008    pub fn record_marketplace_download(
1009        &self,
1010        item_type: &str,
1011        success: bool,
1012        duration_seconds: f64,
1013    ) {
1014        let status = if success { "success" } else { "error" };
1015        self.marketplace_download_total.with_label_values(&[item_type, status]).inc();
1016        self.marketplace_download_duration_seconds
1017            .with_label_values(&[item_type])
1018            .observe(duration_seconds);
1019    }
1020
1021    /// Record a marketplace search operation
1022    pub fn record_marketplace_search(&self, item_type: &str, success: bool, duration_seconds: f64) {
1023        let status = if success { "success" } else { "error" };
1024        self.marketplace_search_total.with_label_values(&[item_type, status]).inc();
1025        self.marketplace_search_duration_seconds
1026            .with_label_values(&[item_type])
1027            .observe(duration_seconds);
1028    }
1029
1030    /// Record a marketplace error
1031    pub fn record_marketplace_error(&self, item_type: &str, error_code: &str) {
1032        self.marketplace_errors_total.with_label_values(&[item_type, error_code]).inc();
1033    }
1034
1035    /// Update the total number of marketplace items
1036    pub fn update_marketplace_items_total(&self, item_type: &str, count: i64) {
1037        self.marketplace_items_total.with_label_values(&[item_type]).set(count);
1038    }
1039}
1040
1041/// Normalize path to avoid high cardinality
1042///
1043/// This function replaces dynamic path segments (IDs, UUIDs, etc.) with placeholders
1044/// to prevent metric explosion.
1045fn normalize_path(path: &str) -> String {
1046    let mut segments: Vec<&str> = path.split('/').collect();
1047
1048    for segment in &mut segments {
1049        // Replace UUIDs, numeric IDs, or hex strings with :id placeholder
1050        if is_uuid(segment)
1051            || segment.parse::<i64>().is_ok()
1052            || (segment.len() > 8 && segment.chars().all(|c| c.is_ascii_hexdigit()))
1053        {
1054            *segment = ":id";
1055        }
1056    }
1057
1058    segments.join("/")
1059}
1060
1061/// Check if a string is a UUID
1062fn is_uuid(s: &str) -> bool {
1063    s.len() == 36 && s.chars().filter(|&c| c == '-').count() == 4
1064}
1065
1066impl Default for MetricsRegistry {
1067    fn default() -> Self {
1068        Self::new()
1069    }
1070}
1071
1072/// Global metrics registry instance
1073static GLOBAL_REGISTRY: Lazy<MetricsRegistry> = Lazy::new(MetricsRegistry::new);
1074
1075/// Get the global metrics registry
1076pub fn get_global_registry() -> &'static MetricsRegistry {
1077    &GLOBAL_REGISTRY
1078}
1079
1080#[cfg(test)]
1081mod tests {
1082    use super::*;
1083
1084    #[test]
1085    fn test_metrics_registry_creation() {
1086        let registry = MetricsRegistry::new();
1087        assert!(registry.is_initialized());
1088    }
1089
1090    #[test]
1091    fn test_record_http_request() {
1092        let registry = MetricsRegistry::new();
1093        registry.record_http_request("GET", 200, 0.045);
1094        registry.record_http_request("POST", 201, 0.123);
1095
1096        // Verify metrics were recorded (they should not panic)
1097        assert!(registry.is_initialized());
1098    }
1099
1100    #[test]
1101    fn test_record_http_request_with_pillar() {
1102        let registry = MetricsRegistry::new();
1103        registry.record_http_request_with_pillar("GET", 200, 0.045, "reality");
1104        registry.record_http_request_with_pillar("POST", 201, 0.123, "contracts");
1105
1106        // Verify metrics were recorded (they should not panic)
1107        assert!(registry.is_initialized());
1108    }
1109
1110    #[test]
1111    fn test_global_registry() {
1112        let registry = get_global_registry();
1113        assert!(registry.is_initialized());
1114    }
1115
1116    #[test]
1117    fn test_plugin_metrics() {
1118        let registry = MetricsRegistry::new();
1119        registry.record_plugin_execution("test-plugin", true, 0.025);
1120        registry.record_plugin_execution("test-plugin", false, 0.050);
1121        assert!(registry.is_initialized());
1122    }
1123
1124    #[test]
1125    fn test_websocket_metrics() {
1126        let registry = MetricsRegistry::new();
1127        registry.record_ws_message_sent();
1128        registry.record_ws_message_received();
1129        registry.record_ws_connection_established();
1130        registry.record_ws_connection_closed(120.5, "normal");
1131        registry.record_ws_error();
1132        assert!(registry.is_initialized());
1133    }
1134
1135    #[test]
1136    fn test_path_normalization() {
1137        assert_eq!(normalize_path("/api/users/123"), "/api/users/:id");
1138        assert_eq!(
1139            normalize_path("/api/users/550e8400-e29b-41d4-a716-446655440000"),
1140            "/api/users/:id"
1141        );
1142        assert_eq!(normalize_path("/api/users/abc123def456"), "/api/users/:id");
1143        assert_eq!(normalize_path("/api/users/list"), "/api/users/list");
1144    }
1145
1146    #[test]
1147    fn test_path_based_metrics() {
1148        let registry = MetricsRegistry::new();
1149        registry.record_http_request_with_path("/api/users/123", "GET", 200, 0.045);
1150        registry.record_http_request_with_path("/api/users/456", "GET", 200, 0.055);
1151        registry.record_http_request_with_path("/api/posts", "POST", 201, 0.123);
1152        assert!(registry.is_initialized());
1153    }
1154
1155    #[test]
1156    fn test_smtp_metrics() {
1157        let registry = MetricsRegistry::new();
1158        registry.record_smtp_connection_established();
1159        registry.record_smtp_message_received();
1160        registry.record_smtp_message_stored();
1161        registry.record_smtp_connection_closed();
1162        registry.record_smtp_error("timeout");
1163        assert!(registry.is_initialized());
1164    }
1165
1166    #[test]
1167    fn test_system_metrics() {
1168        let registry = MetricsRegistry::new();
1169        registry.update_memory_usage(1024.0 * 1024.0 * 100.0); // 100 MB
1170        registry.update_cpu_usage(45.5);
1171        registry.update_thread_count(25.0);
1172        registry.update_uptime(3600.0); // 1 hour
1173        assert!(registry.is_initialized());
1174    }
1175
1176    #[test]
1177    fn test_workspace_metrics() {
1178        let registry = MetricsRegistry::new();
1179
1180        // Record workspace requests
1181        registry.record_workspace_request("workspace1", "GET", 200, 0.045);
1182        registry.record_workspace_request("workspace1", "POST", 201, 0.123);
1183        registry.record_workspace_request("workspace2", "GET", 200, 0.055);
1184
1185        // Update active routes
1186        registry.update_workspace_active_routes("workspace1", 10);
1187        registry.update_workspace_active_routes("workspace2", 5);
1188
1189        // Record errors
1190        registry.record_workspace_error("workspace1", "validation");
1191        registry.record_workspace_error("workspace2", "timeout");
1192
1193        // Test increment/decrement
1194        registry.increment_workspace_routes("workspace1");
1195        registry.decrement_workspace_routes("workspace1");
1196
1197        assert!(registry.is_initialized());
1198    }
1199
1200    #[test]
1201    fn test_workspace_metrics_isolation() {
1202        let registry = MetricsRegistry::new();
1203
1204        // Ensure metrics for different workspaces are independent
1205        registry.record_workspace_request("ws1", "GET", 200, 0.1);
1206        registry.record_workspace_request("ws2", "GET", 200, 0.2);
1207
1208        registry.update_workspace_active_routes("ws1", 5);
1209        registry.update_workspace_active_routes("ws2", 10);
1210
1211        // Both should be tracked independently
1212        assert!(registry.is_initialized());
1213    }
1214}