Skip to main content

rmcp_server_kit/
metrics.rs

1//! Prometheus metrics for MCP servers.
2//!
3//! Provides a shared [`crate::metrics::McpMetrics`] registry with standard HTTP counters.
4//! The transport layer exposes these via a `/metrics` endpoint on a
5//! dedicated listener when `metrics_enabled` is true.
6//!
7//! # Public surface and the `prometheus` crate
8//!
9//! [`crate::metrics::McpMetrics::registry`] and the `IntCounterVec` / `HistogramVec` fields are
10//! intentionally exposed so downstream crates can register additional custom
11//! collectors against the same registry. This re-exports the [`prometheus`]
12//! crate types as part of `rmcp-server-kit`'s public API; pin the same major version to
13//! avoid type-identity mismatches when registering custom metrics.
14
15use std::sync::Arc;
16
17use prometheus::{
18    Encoder, HistogramOpts, HistogramVec, IntCounterVec, Registry, TextEncoder, opts,
19};
20
21use crate::error::McpxError;
22
23/// Default Prometheus histogram buckets for HTTP request latency
24/// (seconds). Tuned for low-latency service work: sub-millisecond
25/// through five seconds, covering health-check fast paths up to slow
26/// outbound dependencies. Operators that need different buckets can
27/// register their own histogram against
28/// [`McpMetrics::registry`].
29const HTTP_DURATION_BUCKETS: &[f64] = &[
30    0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0,
31];
32
33/// Collected Prometheus metrics for an MCP server.
34#[derive(Clone, Debug)]
35#[non_exhaustive]
36pub struct McpMetrics {
37    /// Prometheus registry holding all counters and histograms.
38    pub registry: Registry,
39    /// Total HTTP requests by method, path, and status code.
40    pub http_requests_total: IntCounterVec,
41    /// HTTP request duration in seconds by method and path.
42    pub http_request_duration_seconds: HistogramVec,
43}
44
45impl McpMetrics {
46    /// Create a new metrics registry with default MCP counters.
47    ///
48    /// # Errors
49    ///
50    /// Returns [`McpxError::Metrics`] if counter registration fails (should
51    /// not happen unless duplicate registrations occur).
52    pub fn new() -> Result<Self, McpxError> {
53        let registry = Registry::new();
54
55        let http_requests_total = IntCounterVec::new(
56            opts!("rmcp_server_kit_http_requests_total", "Total HTTP requests"),
57            &["method", "path", "status"],
58        )
59        .map_err(|e| McpxError::Metrics(e.to_string()))?;
60        registry
61            .register(Box::new(http_requests_total.clone()))
62            .map_err(|e| McpxError::Metrics(e.to_string()))?;
63
64        let http_request_duration_seconds = HistogramVec::new(
65            HistogramOpts::new(
66                "rmcp_server_kit_http_request_duration_seconds",
67                "HTTP request duration in seconds",
68            )
69            .buckets(HTTP_DURATION_BUCKETS.to_vec()),
70            &["method", "path"],
71        )
72        .map_err(|e| McpxError::Metrics(e.to_string()))?;
73        registry
74            .register(Box::new(http_request_duration_seconds.clone()))
75            .map_err(|e| McpxError::Metrics(e.to_string()))?;
76
77        Ok(Self {
78            registry,
79            http_requests_total,
80            http_request_duration_seconds,
81        })
82    }
83
84    /// Encode all collected metrics as Prometheus text format.
85    #[must_use]
86    pub fn encode(&self) -> String {
87        let encoder = TextEncoder::new();
88        let metric_families = self.registry.gather();
89        let mut buf = Vec::new();
90        if let Err(e) = encoder.encode(&metric_families, &mut buf) {
91            tracing::warn!(error = %e, "prometheus encode failed");
92            return String::new();
93        }
94        // TextEncoder always produces valid UTF-8; fall back to empty on
95        // the near-impossible chance it doesn't.
96        String::from_utf8(buf).unwrap_or_default()
97    }
98}
99
100/// Spawn a dedicated HTTP listener that serves Prometheus metrics on `/metrics`.
101///
102/// # Errors
103///
104/// Returns [`McpxError::Startup`] if the TCP listener cannot bind or the
105/// underlying axum server fails.
106pub async fn serve_metrics(bind: String, metrics: Arc<McpMetrics>) -> Result<(), McpxError> {
107    let app = axum::Router::new().route(
108        "/metrics",
109        axum::routing::get(move || {
110            let m = Arc::clone(&metrics);
111            async move { m.encode() }
112        }),
113    );
114
115    let listener = tokio::net::TcpListener::bind(&bind)
116        .await
117        .map_err(|e| McpxError::Startup(format!("metrics bind {bind}: {e}")))?;
118    tracing::info!("metrics endpoint listening on http://{bind}/metrics");
119    axum::serve(listener, app)
120        .await
121        .map_err(|e| McpxError::Startup(format!("metrics serve: {e}")))?;
122    Ok(())
123}
124
125#[cfg(test)]
126mod tests {
127    #![allow(
128        clippy::unwrap_used,
129        clippy::expect_used,
130        clippy::panic,
131        clippy::indexing_slicing,
132        clippy::unwrap_in_result,
133        clippy::print_stdout,
134        clippy::print_stderr
135    )]
136    use super::*;
137
138    #[test]
139    fn new_creates_registry_with_counters() {
140        let m = McpMetrics::new().unwrap();
141        // Incrementing a counter should make it appear in gather output.
142        m.http_requests_total
143            .with_label_values(&["GET", "/test", "200"])
144            .inc();
145        m.http_request_duration_seconds
146            .with_label_values(&["GET", "/test"])
147            .observe(0.1);
148        assert_eq!(m.registry.gather().len(), 2);
149    }
150
151    #[test]
152    fn encode_empty_registry() {
153        let m = McpMetrics::new().unwrap();
154        let output = m.encode();
155        // Empty counters/histograms produce no samples but the output is valid.
156        assert!(output.is_empty() || output.contains("rmcp_server_kit_"));
157    }
158
159    #[test]
160    fn counter_increment_shows_in_encode() {
161        let m = McpMetrics::new().unwrap();
162        m.http_requests_total
163            .with_label_values(&["GET", "/healthz", "200"])
164            .inc();
165        let output = m.encode();
166        assert!(output.contains("rmcp_server_kit_http_requests_total"));
167        assert!(output.contains("method=\"GET\""));
168        assert!(output.contains("path=\"/healthz\""));
169        assert!(output.contains("status=\"200\""));
170        assert!(output.contains(" 1")); // count = 1
171    }
172
173    #[test]
174    fn histogram_observe_shows_in_encode() {
175        let m = McpMetrics::new().unwrap();
176        m.http_request_duration_seconds
177            .with_label_values(&["POST", "/mcp"])
178            .observe(0.042);
179        let output = m.encode();
180        assert!(output.contains("rmcp_server_kit_http_request_duration_seconds"));
181        assert!(output.contains("method=\"POST\""));
182        assert!(output.contains("path=\"/mcp\""));
183    }
184
185    #[test]
186    fn multiple_increments_accumulate() {
187        let m = McpMetrics::new().unwrap();
188        let counter = m
189            .http_requests_total
190            .with_label_values(&["POST", "/mcp", "200"]);
191        counter.inc();
192        counter.inc();
193        counter.inc();
194        let output = m.encode();
195        assert!(output.contains(" 3")); // count = 3
196    }
197
198    #[test]
199    fn clone_shares_registry() {
200        let m = McpMetrics::new().unwrap();
201        let m2 = m.clone();
202        m.http_requests_total
203            .with_label_values(&["GET", "/test", "200"])
204            .inc();
205        // The clone should see the same counter value.
206        let output = m2.encode();
207        assert!(output.contains(" 1"));
208    }
209}