Skip to main content

mockforge_observability/prometheus/
exporter.rs

1//! Prometheus metrics exporter
2//!
3//! Provides HTTP endpoints for Prometheus to scrape metrics
4
5use axum::{
6    extract::State,
7    http::StatusCode,
8    response::{IntoResponse, Response},
9    routing::get,
10    Router,
11};
12use prometheus::{Encoder, TextEncoder};
13use std::sync::Arc;
14use tracing::{debug, error};
15
16use super::metrics::MetricsRegistry;
17
18/// Handler for the /metrics endpoint.
19///
20/// Gathers from two registries: the local `MetricsRegistry` (HTTP request
21/// counters, protocol metrics, etc., wired by the observability layer) and
22/// `prometheus::default_registry()` (where `mockforge-chaos` registers its
23/// `mockforge_chaos_*` counters via `register_counter_vec!`). Without this
24/// merge, chaos counters were silently dropped from `/metrics` even when
25/// they were being incremented — issue #79 follow-up: Srikanth ran a bench
26/// that triggered 80 fault injections but
27/// `curl /metrics | grep mockforge_chaos_` returned nothing because the
28/// two registries were disjoint.
29pub async fn metrics_handler(
30    State(registry): State<Arc<MetricsRegistry>>,
31) -> Result<impl IntoResponse, MetricsError> {
32    debug!("Serving Prometheus metrics");
33
34    let encoder = TextEncoder::new();
35    let mut metric_families = registry.registry().gather();
36    metric_families.extend(prometheus::default_registry().gather());
37
38    let mut buffer = Vec::new();
39    encoder.encode(&metric_families, &mut buffer).map_err(|e| {
40        error!("Failed to encode metrics: {}", e);
41        MetricsError::EncodingError(e.to_string())
42    })?;
43
44    let body = String::from_utf8(buffer).map_err(|e| {
45        error!("Failed to convert metrics to UTF-8: {}", e);
46        MetricsError::EncodingError(e.to_string())
47    })?;
48
49    Ok((
50        StatusCode::OK,
51        [("content-type", "text/plain; version=0.0.4; charset=utf-8")],
52        body,
53    ))
54}
55
56/// Health check endpoint for the metrics server
57pub async fn health_handler() -> impl IntoResponse {
58    (StatusCode::OK, "OK")
59}
60
61/// Create a router for the Prometheus metrics endpoint
62pub fn prometheus_router(registry: Arc<MetricsRegistry>) -> Router {
63    Router::new()
64        .route("/metrics", get(metrics_handler))
65        .route("/health", get(health_handler))
66        .with_state(registry)
67}
68
69/// Error type for metrics operations
70#[derive(Debug)]
71pub enum MetricsError {
72    EncodingError(String),
73}
74
75impl IntoResponse for MetricsError {
76    fn into_response(self) -> Response {
77        let (status, message) = match self {
78            MetricsError::EncodingError(msg) => {
79                (StatusCode::INTERNAL_SERVER_ERROR, format!("Encoding error: {}", msg))
80            }
81        };
82
83        (status, message).into_response()
84    }
85}
86
87impl std::fmt::Display for MetricsError {
88    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89        match self {
90            MetricsError::EncodingError(msg) => write!(f, "Encoding error: {}", msg),
91        }
92    }
93}
94
95impl std::error::Error for MetricsError {}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100    use crate::prometheus::MetricsRegistry;
101
102    #[tokio::test]
103    async fn test_metrics_handler() {
104        let registry = Arc::new(MetricsRegistry::new());
105
106        // Record some test metrics
107        registry.record_http_request("GET", 200, 0.045);
108        registry.record_http_request("POST", 201, 0.123);
109
110        // Call the handler
111        let result = metrics_handler(State(registry)).await;
112        assert!(result.is_ok());
113    }
114
115    #[tokio::test]
116    async fn test_health_handler() {
117        let response = health_handler().await.into_response();
118        assert_eq!(response.status(), StatusCode::OK);
119    }
120
121    #[test]
122    fn test_prometheus_router_creation() {
123        let registry = Arc::new(MetricsRegistry::new());
124        let _router = prometheus_router(registry);
125        // Router should be created successfully
126    }
127
128    #[test]
129    fn test_metrics_error_display() {
130        let error = MetricsError::EncodingError("test error message".to_string());
131        let display = format!("{}", error);
132        assert!(display.contains("Encoding error"));
133        assert!(display.contains("test error message"));
134    }
135
136    #[test]
137    fn test_metrics_error_debug() {
138        let error = MetricsError::EncodingError("test".to_string());
139        let debug = format!("{:?}", error);
140        assert!(debug.contains("EncodingError"));
141        assert!(debug.contains("test"));
142    }
143
144    #[test]
145    fn test_metrics_error_is_error_trait() {
146        let error = MetricsError::EncodingError("test".to_string());
147        // Verify it implements std::error::Error
148        let _: &dyn std::error::Error = &error;
149    }
150
151    #[tokio::test]
152    async fn test_metrics_error_into_response() {
153        let error = MetricsError::EncodingError("encoding failed".to_string());
154        let response = error.into_response();
155        assert_eq!(response.status(), StatusCode::INTERNAL_SERVER_ERROR);
156    }
157
158    #[tokio::test]
159    async fn test_metrics_handler_with_various_metrics() {
160        let registry = Arc::new(MetricsRegistry::new());
161
162        // Record various types of metrics
163        registry.record_http_request("GET", 200, 0.045);
164        registry.record_http_request("POST", 201, 0.123);
165        registry.record_http_request("DELETE", 204, 0.015);
166        registry.record_http_request("PUT", 500, 0.500);
167        registry.record_grpc_request("ListUsers", "OK", 0.025);
168        registry.record_ws_message_sent();
169        registry.record_ws_message_received();
170        registry.record_plugin_execution("test-plugin", true, 0.010);
171        registry.record_error("http", "timeout");
172        registry.update_memory_usage(1024.0 * 1024.0 * 50.0);
173        registry.update_cpu_usage(25.5);
174
175        // Call the handler
176        let result = metrics_handler(State(registry)).await;
177        assert!(result.is_ok());
178    }
179
180    #[tokio::test]
181    async fn test_metrics_handler_empty_registry() {
182        let registry = Arc::new(MetricsRegistry::new());
183        // No metrics recorded
184        let result = metrics_handler(State(registry)).await;
185        assert!(result.is_ok());
186    }
187
188    #[tokio::test]
189    async fn test_metrics_handler_with_grpc_metrics() {
190        let registry = Arc::new(MetricsRegistry::new());
191        registry.record_grpc_request("GetUser", "OK", 0.025);
192        registry.record_grpc_request("CreateUser", "INTERNAL", 0.150);
193        registry.record_grpc_request_with_pillar("ListUsers", "OK", 0.050, "reality");
194
195        let result = metrics_handler(State(registry)).await;
196        assert!(result.is_ok());
197    }
198
199    #[tokio::test]
200    async fn test_metrics_handler_with_websocket_metrics() {
201        let registry = Arc::new(MetricsRegistry::new());
202        registry.record_ws_connection_established();
203        registry.record_ws_message_sent();
204        registry.record_ws_message_received();
205        registry.record_ws_error();
206        registry.record_ws_connection_closed(60.0, "normal");
207
208        let result = metrics_handler(State(registry)).await;
209        assert!(result.is_ok());
210    }
211
212    #[tokio::test]
213    async fn test_metrics_handler_with_smtp_metrics() {
214        let registry = Arc::new(MetricsRegistry::new());
215        registry.record_smtp_connection_established();
216        registry.record_smtp_message_received();
217        registry.record_smtp_message_stored();
218        registry.record_smtp_error("auth_failed");
219        registry.record_smtp_connection_closed();
220
221        let result = metrics_handler(State(registry)).await;
222        assert!(result.is_ok());
223    }
224
225    #[tokio::test]
226    async fn test_metrics_handler_with_marketplace_metrics() {
227        let registry = Arc::new(MetricsRegistry::new());
228        registry.record_marketplace_publish("plugin", true, 2.5);
229        registry.record_marketplace_download("template", true, 0.5);
230        registry.record_marketplace_search("scenario", true, 0.1);
231        registry.record_marketplace_error("plugin", "validation_failed");
232        registry.update_marketplace_items_total("plugin", 150);
233
234        let result = metrics_handler(State(registry)).await;
235        assert!(result.is_ok());
236    }
237
238    #[tokio::test]
239    async fn test_metrics_handler_with_workspace_metrics() {
240        let registry = Arc::new(MetricsRegistry::new());
241        registry.record_workspace_request("workspace-1", "GET", 200, 0.05);
242        registry.update_workspace_active_routes("workspace-1", 10);
243        registry.record_workspace_error("workspace-1", "timeout");
244        registry.increment_workspace_routes("workspace-1");
245        registry.decrement_workspace_routes("workspace-1");
246
247        let result = metrics_handler(State(registry)).await;
248        assert!(result.is_ok());
249    }
250
251    #[tokio::test]
252    async fn test_metrics_handler_with_scenario_metrics() {
253        let registry = Arc::new(MetricsRegistry::new());
254        registry.set_scenario_mode(0); // healthy
255        registry.record_chaos_trigger();
256        registry.set_scenario_mode(3); // chaos
257
258        let result = metrics_handler(State(registry)).await;
259        assert!(result.is_ok());
260    }
261
262    #[tokio::test]
263    async fn test_metrics_handler_with_path_metrics() {
264        let registry = Arc::new(MetricsRegistry::new());
265        registry.record_http_request_with_path("/api/users/123", "GET", 200, 0.05);
266        registry.record_http_request_with_path("/api/users/456", "GET", 200, 0.06);
267        registry.record_http_request_with_path_and_pillar(
268            "/api/items",
269            "POST",
270            201,
271            0.1,
272            "reality",
273        );
274
275        let result = metrics_handler(State(registry)).await;
276        assert!(result.is_ok());
277    }
278}