mcpkit_server/
health.rs

1//! Health check utilities for MCP servers.
2//!
3//! This module provides a standardized health check mechanism for MCP servers,
4//! supporting both simple and detailed health status reporting.
5//!
6//! # Example
7//!
8//! ```rust
9//! use mcpkit_server::health::{HealthChecker, HealthStatus, ComponentHealth};
10//!
11//! // Create a health checker
12//! let mut checker = HealthChecker::new("my-mcp-server");
13//!
14//! // Add component checks
15//! checker.add_check("database", || {
16//!     // Your database health check logic
17//!     ComponentHealth::healthy()
18//! });
19//!
20//! checker.add_check("cache", || {
21//!     ComponentHealth::healthy().with_detail("hit_rate", "95%")
22//! });
23//!
24//! // Get overall health status
25//! let status = checker.check();
26//! assert!(status.is_healthy());
27//! ```
28
29use std::collections::HashMap;
30use std::sync::Arc;
31use std::time::{Duration, Instant};
32
33/// Overall health status of the service.
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub enum HealthStatus {
36    /// All components are healthy.
37    Healthy,
38    /// Some components are degraded but the service is functional.
39    Degraded,
40    /// The service is unhealthy and may not function correctly.
41    Unhealthy,
42}
43
44impl HealthStatus {
45    /// Check if the status is healthy.
46    #[must_use]
47    pub fn is_healthy(&self) -> bool {
48        matches!(self, Self::Healthy)
49    }
50
51    /// Check if the status is degraded.
52    #[must_use]
53    pub fn is_degraded(&self) -> bool {
54        matches!(self, Self::Degraded)
55    }
56
57    /// Check if the status is unhealthy.
58    #[must_use]
59    pub fn is_unhealthy(&self) -> bool {
60        matches!(self, Self::Unhealthy)
61    }
62
63    /// Get the status as an HTTP status code.
64    #[must_use]
65    pub fn http_status_code(&self) -> u16 {
66        match self {
67            Self::Healthy => 200,
68            Self::Degraded => 200, // Still operational
69            Self::Unhealthy => 503,
70        }
71    }
72
73    /// Get the status as a string.
74    #[must_use]
75    pub fn as_str(&self) -> &'static str {
76        match self {
77            Self::Healthy => "healthy",
78            Self::Degraded => "degraded",
79            Self::Unhealthy => "unhealthy",
80        }
81    }
82}
83
84impl std::fmt::Display for HealthStatus {
85    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86        write!(f, "{}", self.as_str())
87    }
88}
89
90/// Health status of a single component.
91#[derive(Debug, Clone)]
92pub struct ComponentHealth {
93    /// Component health status.
94    pub status: HealthStatus,
95    /// Optional message describing the status.
96    pub message: Option<String>,
97    /// Additional details about the component.
98    pub details: HashMap<String, String>,
99    /// Time taken to check this component.
100    pub check_duration: Duration,
101}
102
103impl ComponentHealth {
104    /// Create a healthy component status.
105    #[must_use]
106    pub fn healthy() -> Self {
107        Self {
108            status: HealthStatus::Healthy,
109            message: None,
110            details: HashMap::new(),
111            check_duration: Duration::ZERO,
112        }
113    }
114
115    /// Create a degraded component status.
116    #[must_use]
117    pub fn degraded(message: impl Into<String>) -> Self {
118        Self {
119            status: HealthStatus::Degraded,
120            message: Some(message.into()),
121            details: HashMap::new(),
122            check_duration: Duration::ZERO,
123        }
124    }
125
126    /// Create an unhealthy component status.
127    #[must_use]
128    pub fn unhealthy(message: impl Into<String>) -> Self {
129        Self {
130            status: HealthStatus::Unhealthy,
131            message: Some(message.into()),
132            details: HashMap::new(),
133            check_duration: Duration::ZERO,
134        }
135    }
136
137    /// Add a detail to the health status.
138    #[must_use]
139    pub fn with_detail(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
140        self.details.insert(key.into(), value.into());
141        self
142    }
143
144    /// Add multiple details to the health status.
145    #[must_use]
146    pub fn with_details(mut self, details: impl IntoIterator<Item = (String, String)>) -> Self {
147        self.details.extend(details);
148        self
149    }
150
151    /// Set the check duration.
152    #[must_use]
153    pub fn with_duration(mut self, duration: Duration) -> Self {
154        self.check_duration = duration;
155        self
156    }
157}
158
159impl Default for ComponentHealth {
160    fn default() -> Self {
161        Self::healthy()
162    }
163}
164
165/// Type alias for health check functions.
166pub type HealthCheckFn = Arc<dyn Fn() -> ComponentHealth + Send + Sync>;
167
168/// Detailed health check result.
169#[derive(Debug, Clone)]
170pub struct HealthReport {
171    /// Service name.
172    pub service: String,
173    /// Overall health status.
174    pub status: HealthStatus,
175    /// Service version (if available).
176    pub version: Option<String>,
177    /// How long the health check took.
178    pub check_duration: Duration,
179    /// Individual component health statuses.
180    pub components: HashMap<String, ComponentHealth>,
181    /// Timestamp of the health check.
182    pub timestamp: std::time::SystemTime,
183}
184
185impl HealthReport {
186    /// Check if the service is healthy.
187    #[must_use]
188    pub fn is_healthy(&self) -> bool {
189        self.status.is_healthy()
190    }
191
192    /// Get the number of healthy components.
193    #[must_use]
194    pub fn healthy_count(&self) -> usize {
195        self.components
196            .values()
197            .filter(|c| c.status.is_healthy())
198            .count()
199    }
200
201    /// Get the number of degraded components.
202    #[must_use]
203    pub fn degraded_count(&self) -> usize {
204        self.components
205            .values()
206            .filter(|c| c.status.is_degraded())
207            .count()
208    }
209
210    /// Get the number of unhealthy components.
211    #[must_use]
212    pub fn unhealthy_count(&self) -> usize {
213        self.components
214            .values()
215            .filter(|c| c.status.is_unhealthy())
216            .count()
217    }
218
219    /// Convert to a JSON-serializable structure.
220    #[must_use]
221    pub fn to_json(&self) -> serde_json::Value {
222        let components: HashMap<String, serde_json::Value> = self
223            .components
224            .iter()
225            .map(|(name, health)| {
226                let mut obj = serde_json::json!({
227                    "status": health.status.as_str(),
228                    "check_duration_ms": health.check_duration.as_millis(),
229                });
230
231                if let Some(msg) = &health.message {
232                    obj["message"] = serde_json::json!(msg);
233                }
234
235                if !health.details.is_empty() {
236                    obj["details"] = serde_json::json!(health.details);
237                }
238
239                (name.clone(), obj)
240            })
241            .collect();
242
243        let mut result = serde_json::json!({
244            "status": self.status.as_str(),
245            "service": self.service,
246            "check_duration_ms": self.check_duration.as_millis(),
247            "components": components,
248        });
249
250        if let Some(version) = &self.version {
251            result["version"] = serde_json::json!(version);
252        }
253
254        result
255    }
256}
257
258/// Health checker for MCP servers.
259///
260/// Provides a centralized way to register and execute health checks.
261#[derive(Default)]
262pub struct HealthChecker {
263    service_name: String,
264    version: Option<String>,
265    checks: HashMap<String, HealthCheckFn>,
266}
267
268impl HealthChecker {
269    /// Create a new health checker.
270    #[must_use]
271    pub fn new(service_name: impl Into<String>) -> Self {
272        Self {
273            service_name: service_name.into(),
274            version: None,
275            checks: HashMap::new(),
276        }
277    }
278
279    /// Set the service version.
280    #[must_use]
281    pub fn with_version(mut self, version: impl Into<String>) -> Self {
282        self.version = Some(version.into());
283        self
284    }
285
286    /// Add a health check for a component.
287    pub fn add_check<F>(&mut self, name: impl Into<String>, check: F)
288    where
289        F: Fn() -> ComponentHealth + Send + Sync + 'static,
290    {
291        self.checks.insert(name.into(), Arc::new(check));
292    }
293
294    /// Add a simple health check that just returns healthy.
295    pub fn add_simple_check(&mut self, name: impl Into<String>) {
296        self.add_check(name, ComponentHealth::healthy);
297    }
298
299    /// Run all health checks and return a report.
300    #[must_use]
301    pub fn check(&self) -> HealthReport {
302        let start = Instant::now();
303        let mut components = HashMap::new();
304        let mut overall_status = HealthStatus::Healthy;
305
306        for (name, check_fn) in &self.checks {
307            let check_start = Instant::now();
308            let mut result = check_fn();
309            result.check_duration = check_start.elapsed();
310
311            // Update overall status based on component status
312            match (&overall_status, &result.status) {
313                (HealthStatus::Healthy, HealthStatus::Degraded) => {
314                    overall_status = HealthStatus::Degraded;
315                }
316                (_, HealthStatus::Unhealthy) => {
317                    overall_status = HealthStatus::Unhealthy;
318                }
319                _ => {}
320            }
321
322            components.insert(name.clone(), result);
323        }
324
325        HealthReport {
326            service: self.service_name.clone(),
327            status: overall_status,
328            version: self.version.clone(),
329            check_duration: start.elapsed(),
330            components,
331            timestamp: std::time::SystemTime::now(),
332        }
333    }
334
335    /// Run a quick liveness check (just verifies the service is running).
336    #[must_use]
337    pub fn liveness(&self) -> bool {
338        true
339    }
340
341    /// Run a readiness check (verifies all components are ready).
342    #[must_use]
343    pub fn readiness(&self) -> bool {
344        self.check().is_healthy()
345    }
346}
347
348impl std::fmt::Debug for HealthChecker {
349    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
350        f.debug_struct("HealthChecker")
351            .field("service_name", &self.service_name)
352            .field("version", &self.version)
353            .field("check_count", &self.checks.len())
354            .finish()
355    }
356}
357
358/// Liveness probe response.
359#[derive(Debug, Clone)]
360pub struct LivenessResponse {
361    /// Whether the service is alive.
362    pub alive: bool,
363    /// Service name.
364    pub service: String,
365}
366
367impl LivenessResponse {
368    /// Create a new liveness response.
369    #[must_use]
370    pub fn new(service: impl Into<String>, alive: bool) -> Self {
371        Self {
372            alive,
373            service: service.into(),
374        }
375    }
376
377    /// Create an alive response.
378    #[must_use]
379    pub fn alive(service: impl Into<String>) -> Self {
380        Self::new(service, true)
381    }
382
383    /// Convert to JSON.
384    #[must_use]
385    pub fn to_json(&self) -> serde_json::Value {
386        serde_json::json!({
387            "alive": self.alive,
388            "service": self.service,
389        })
390    }
391}
392
393/// Readiness probe response.
394#[derive(Debug, Clone)]
395pub struct ReadinessResponse {
396    /// Whether the service is ready.
397    pub ready: bool,
398    /// Service name.
399    pub service: String,
400    /// Optional reason if not ready.
401    pub reason: Option<String>,
402}
403
404impl ReadinessResponse {
405    /// Create a new readiness response.
406    #[must_use]
407    pub fn new(service: impl Into<String>, ready: bool) -> Self {
408        Self {
409            ready,
410            service: service.into(),
411            reason: None,
412        }
413    }
414
415    /// Create a ready response.
416    #[must_use]
417    pub fn ready(service: impl Into<String>) -> Self {
418        Self::new(service, true)
419    }
420
421    /// Create a not-ready response with a reason.
422    #[must_use]
423    pub fn not_ready(service: impl Into<String>, reason: impl Into<String>) -> Self {
424        Self {
425            ready: false,
426            service: service.into(),
427            reason: Some(reason.into()),
428        }
429    }
430
431    /// Convert to JSON.
432    #[must_use]
433    pub fn to_json(&self) -> serde_json::Value {
434        let mut result = serde_json::json!({
435            "ready": self.ready,
436            "service": self.service,
437        });
438
439        if let Some(reason) = &self.reason {
440            result["reason"] = serde_json::json!(reason);
441        }
442
443        result
444    }
445}
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450
451    #[test]
452    fn test_health_status() {
453        assert!(HealthStatus::Healthy.is_healthy());
454        assert!(!HealthStatus::Degraded.is_healthy());
455        assert!(!HealthStatus::Unhealthy.is_healthy());
456
457        assert_eq!(HealthStatus::Healthy.http_status_code(), 200);
458        assert_eq!(HealthStatus::Degraded.http_status_code(), 200);
459        assert_eq!(HealthStatus::Unhealthy.http_status_code(), 503);
460    }
461
462    #[test]
463    fn test_component_health() {
464        let health = ComponentHealth::healthy()
465            .with_detail("connections", "10")
466            .with_detail("memory_mb", "256");
467
468        assert!(health.status.is_healthy());
469        assert_eq!(health.details.get("connections"), Some(&"10".to_string()));
470    }
471
472    #[test]
473    fn test_health_checker() {
474        let mut checker = HealthChecker::new("test-service").with_version("1.0.0");
475
476        checker.add_check("component_a", ComponentHealth::healthy);
477        checker.add_check("component_b", || {
478            ComponentHealth::healthy().with_detail("status", "ok")
479        });
480
481        let report = checker.check();
482
483        assert!(report.is_healthy());
484        assert_eq!(report.healthy_count(), 2);
485        assert_eq!(report.unhealthy_count(), 0);
486    }
487
488    #[test]
489    fn test_degraded_status() {
490        let mut checker = HealthChecker::new("test-service");
491
492        checker.add_check("healthy_component", ComponentHealth::healthy);
493        checker.add_check("degraded_component", || {
494            ComponentHealth::degraded("High latency detected")
495        });
496
497        let report = checker.check();
498
499        assert!(!report.is_healthy());
500        assert_eq!(report.status, HealthStatus::Degraded);
501    }
502
503    #[test]
504    fn test_unhealthy_status() {
505        let mut checker = HealthChecker::new("test-service");
506
507        checker.add_check("healthy_component", ComponentHealth::healthy);
508        checker.add_check("unhealthy_component", || {
509            ComponentHealth::unhealthy("Connection refused")
510        });
511
512        let report = checker.check();
513
514        assert!(report.status.is_unhealthy());
515    }
516
517    #[test]
518    fn test_liveness_and_readiness() {
519        let mut checker = HealthChecker::new("test-service");
520        checker.add_check("component", ComponentHealth::healthy);
521
522        assert!(checker.liveness());
523        assert!(checker.readiness());
524    }
525
526    #[test]
527    fn test_health_report_json() {
528        let mut checker = HealthChecker::new("test-service").with_version("1.0.0");
529        checker.add_check("database", ComponentHealth::healthy);
530
531        let report = checker.check();
532        let json = report.to_json();
533
534        assert_eq!(json["status"], "healthy");
535        assert_eq!(json["service"], "test-service");
536        assert_eq!(json["version"], "1.0.0");
537    }
538}