Skip to main content

fastapi_core/
health.rs

1//! Health check endpoint helpers for operations.
2//!
3//! Provides standard health check patterns:
4//! - Basic health: returns `{"status":"healthy"}` on `GET /health`
5//! - Detailed health with named checks, critical flags, and per-check latency
6//! - Kubernetes-style liveness (`/healthz`) and readiness (`/ready`) probes
7//!
8//! # Example
9//!
10//! ```ignore
11//! use fastapi_core::health::{HealthCheckRegistry, HealthStatus};
12//!
13//! let mut registry = HealthCheckRegistry::new();
14//! registry.add("database", true, || async {
15//!     // Check database connection
16//!     Ok(())
17//! });
18//! registry.add("cache", false, || async {
19//!     // Check cache connection (non-critical)
20//!     Ok(())
21//! });
22//!
23//! let result = futures_executor::block_on(registry.check_all());
24//! assert_eq!(result.status, HealthStatus::Healthy);
25//! ```
26
27use std::future::Future;
28use std::pin::Pin;
29use std::sync::Arc;
30use std::time::Instant;
31
32use crate::context::RequestContext;
33use crate::request::Request;
34use crate::response::{Response, ResponseBody, StatusCode};
35
36/// Overall health status of the application.
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
38pub enum HealthStatus {
39    /// All checks pass.
40    Healthy,
41    /// At least one non-critical check failed, but all critical checks pass.
42    Degraded,
43    /// At least one critical check failed.
44    Unhealthy,
45}
46
47impl HealthStatus {
48    /// Returns the string representation.
49    #[must_use]
50    pub fn as_str(self) -> &'static str {
51        match self {
52            Self::Healthy => "healthy",
53            Self::Degraded => "degraded",
54            Self::Unhealthy => "unhealthy",
55        }
56    }
57
58    /// Returns the HTTP status code for this health status.
59    #[must_use]
60    pub fn status_code(self) -> StatusCode {
61        match self {
62            Self::Healthy | Self::Degraded => StatusCode::OK,
63            Self::Unhealthy => StatusCode::SERVICE_UNAVAILABLE,
64        }
65    }
66}
67
68impl std::fmt::Display for HealthStatus {
69    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
70        f.write_str(self.as_str())
71    }
72}
73
74/// Result of a single named health check.
75#[derive(Debug, Clone)]
76pub struct HealthCheckResult {
77    /// Name of the check.
78    pub name: String,
79    /// Whether the check passed.
80    pub passed: bool,
81    /// Whether this is a critical check.
82    pub critical: bool,
83    /// Time the check took.
84    pub latency_ms: u64,
85    /// Error message if the check failed.
86    pub error: Option<String>,
87}
88
89/// Aggregate result of all health checks.
90#[derive(Debug, Clone)]
91pub struct HealthReport {
92    /// Overall status.
93    pub status: HealthStatus,
94    /// Individual check results.
95    pub checks: Vec<HealthCheckResult>,
96    /// Total time for all checks.
97    pub total_latency_ms: u64,
98}
99
100impl HealthReport {
101    /// Serialize the report to JSON bytes.
102    #[must_use]
103    pub fn to_json(&self) -> Vec<u8> {
104        let mut json = String::with_capacity(256);
105        json.push_str("{\"status\":\"");
106        json.push_str(self.status.as_str());
107        json.push_str("\",\"total_latency_ms\":");
108        json.push_str(&self.total_latency_ms.to_string());
109        json.push_str(",\"checks\":[");
110
111        for (i, check) in self.checks.iter().enumerate() {
112            if i > 0 {
113                json.push(',');
114            }
115            json.push_str("{\"name\":\"");
116            json.push_str(&check.name);
117            json.push_str("\",\"passed\":");
118            json.push_str(if check.passed { "true" } else { "false" });
119            json.push_str(",\"critical\":");
120            json.push_str(if check.critical { "true" } else { "false" });
121            json.push_str(",\"latency_ms\":");
122            json.push_str(&check.latency_ms.to_string());
123            if let Some(ref err) = check.error {
124                json.push_str(",\"error\":\"");
125                // Escape JSON special characters
126                for ch in err.chars() {
127                    match ch {
128                        '"' => json.push_str("\\\""),
129                        '\\' => json.push_str("\\\\"),
130                        '\n' => json.push_str("\\n"),
131                        '\r' => json.push_str("\\r"),
132                        '\t' => json.push_str("\\t"),
133                        c => json.push(c),
134                    }
135                }
136                json.push('"');
137            }
138            json.push('}');
139        }
140
141        json.push_str("]}");
142        json.into_bytes()
143    }
144}
145
146/// A boxed future for health check functions.
147type CheckFn =
148    Arc<dyn Fn() -> Pin<Box<dyn Future<Output = Result<(), String>> + Send>> + Send + Sync>;
149
150/// A single health check entry.
151struct HealthCheckEntry {
152    name: String,
153    critical: bool,
154    check_fn: CheckFn,
155}
156
157/// Registry of health checks.
158///
159/// Collects named health checks that can be evaluated together to produce
160/// a [`HealthReport`].
161///
162/// # Example
163///
164/// ```ignore
165/// let mut registry = HealthCheckRegistry::new();
166///
167/// // Critical check — failure makes the app "unhealthy"
168/// registry.add("database", true, || async {
169///     // db.ping().await?;
170///     Ok(())
171/// });
172///
173/// // Non-critical check — failure makes the app "degraded"
174/// registry.add("cache", false, || async {
175///     // cache.ping().await?;
176///     Ok(())
177/// });
178/// ```
179pub struct HealthCheckRegistry {
180    checks: Vec<HealthCheckEntry>,
181}
182
183impl HealthCheckRegistry {
184    /// Create a new empty registry.
185    #[must_use]
186    pub fn new() -> Self {
187        Self { checks: Vec::new() }
188    }
189
190    /// Add a named health check.
191    ///
192    /// # Parameters
193    ///
194    /// - `name`: Identifier for the check (e.g., "database", "cache").
195    /// - `critical`: If `true`, failure of this check makes the overall status `Unhealthy`.
196    ///   If `false`, failure makes it `Degraded` (unless another critical check fails).
197    /// - `check_fn`: Async function that returns `Ok(())` on success or `Err(message)` on failure.
198    pub fn add<F, Fut>(&mut self, name: impl Into<String>, critical: bool, check_fn: F)
199    where
200        F: Fn() -> Fut + Send + Sync + 'static,
201        Fut: Future<Output = Result<(), String>> + Send + 'static,
202    {
203        let check_fn = Arc::new(move || {
204            let fut = check_fn();
205            Box::pin(fut) as Pin<Box<dyn Future<Output = Result<(), String>> + Send>>
206        }) as CheckFn;
207
208        self.checks.push(HealthCheckEntry {
209            name: name.into(),
210            critical,
211            check_fn,
212        });
213    }
214
215    /// Run all health checks and produce a report.
216    pub async fn check_all(&self) -> HealthReport {
217        let total_start = Instant::now();
218        let mut results = Vec::with_capacity(self.checks.len());
219        let mut has_critical_failure = false;
220        let mut has_non_critical_failure = false;
221
222        for entry in &self.checks {
223            let start = Instant::now();
224            let outcome = (entry.check_fn)().await;
225            let latency_ms = start.elapsed().as_millis() as u64;
226
227            let (passed, error) = match outcome {
228                Ok(()) => (true, None),
229                Err(msg) => {
230                    if entry.critical {
231                        has_critical_failure = true;
232                    } else {
233                        has_non_critical_failure = true;
234                    }
235                    (false, Some(msg))
236                }
237            };
238
239            results.push(HealthCheckResult {
240                name: entry.name.clone(),
241                passed,
242                critical: entry.critical,
243                latency_ms,
244                error,
245            });
246        }
247
248        let status = if has_critical_failure {
249            HealthStatus::Unhealthy
250        } else if has_non_critical_failure {
251            HealthStatus::Degraded
252        } else {
253            HealthStatus::Healthy
254        };
255
256        HealthReport {
257            status,
258            checks: results,
259            total_latency_ms: total_start.elapsed().as_millis() as u64,
260        }
261    }
262
263    /// Returns the number of registered checks.
264    #[must_use]
265    pub fn len(&self) -> usize {
266        self.checks.len()
267    }
268
269    /// Returns true if no checks are registered.
270    #[must_use]
271    pub fn is_empty(&self) -> bool {
272        self.checks.is_empty()
273    }
274}
275
276impl Default for HealthCheckRegistry {
277    fn default() -> Self {
278        Self::new()
279    }
280}
281
282/// Create a basic health check handler that returns `{"status":"healthy"}`.
283///
284/// This is the simplest health check — always returns 200 OK. Useful as a
285/// basic liveness probe when you just need to confirm the process is running.
286pub fn basic_health_handler()
287-> impl Fn(&RequestContext, &mut Request) -> std::future::Ready<Response> + Send + Sync + 'static {
288    |_ctx: &RequestContext, _req: &mut Request| {
289        let body = br#"{"status":"healthy"}"#.to_vec();
290        std::future::ready(
291            Response::with_status(StatusCode::OK)
292                .header("Content-Type", b"application/json".to_vec())
293                .body(ResponseBody::Bytes(body)),
294        )
295    }
296}
297
298/// Create a detailed health check handler from a registry.
299///
300/// Runs all registered checks and returns:
301/// - 200 OK with `{"status":"healthy", ...}` when all checks pass
302/// - 200 OK with `{"status":"degraded", ...}` when non-critical checks fail
303/// - 503 Service Unavailable with `{"status":"unhealthy", ...}` when critical checks fail
304pub fn detailed_health_handler(
305    registry: Arc<HealthCheckRegistry>,
306) -> impl Fn(&RequestContext, &mut Request) -> Pin<Box<dyn Future<Output = Response> + Send>>
307+ Send
308+ Sync
309+ 'static {
310    move |_ctx: &RequestContext, _req: &mut Request| {
311        let registry = Arc::clone(&registry);
312        Box::pin(async move {
313            let report = registry.check_all().await;
314            let status_code = report.status.status_code();
315            let body = report.to_json();
316
317            Response::with_status(status_code)
318                .header("Content-Type", b"application/json".to_vec())
319                .header("Cache-Control", b"no-cache, no-store".to_vec())
320                .body(ResponseBody::Bytes(body))
321        })
322    }
323}
324
325/// Create a Kubernetes liveness probe handler.
326///
327/// Returns 200 OK if the process is alive. This is equivalent to `basic_health_handler()`
328/// but semantically represents a liveness check.
329pub fn liveness_handler()
330-> impl Fn(&RequestContext, &mut Request) -> std::future::Ready<Response> + Send + Sync + 'static {
331    basic_health_handler()
332}
333
334/// Create a Kubernetes readiness probe handler from a registry.
335///
336/// Runs all checks and returns:
337/// - 200 OK when the app is ready to serve traffic
338/// - 503 Service Unavailable when critical checks fail
339///
340/// Kubernetes will stop routing traffic to pods that fail readiness checks.
341pub fn readiness_handler(
342    registry: Arc<HealthCheckRegistry>,
343) -> impl Fn(&RequestContext, &mut Request) -> Pin<Box<dyn Future<Output = Response> + Send>>
344+ Send
345+ Sync
346+ 'static {
347    detailed_health_handler(registry)
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353    use crate::request::Method;
354
355    fn run_handler<F, Fut>(handler: F) -> Response
356    where
357        F: Fn(&RequestContext, &mut Request) -> Fut,
358        Fut: Future<Output = Response>,
359    {
360        let ctx = RequestContext::new(asupersync::Cx::for_testing(), 1);
361        let mut req = Request::new(Method::Get, "/health");
362        futures_executor::block_on(handler(&ctx, &mut req))
363    }
364
365    #[test]
366    fn health_status_as_str() {
367        assert_eq!(HealthStatus::Healthy.as_str(), "healthy");
368        assert_eq!(HealthStatus::Degraded.as_str(), "degraded");
369        assert_eq!(HealthStatus::Unhealthy.as_str(), "unhealthy");
370    }
371
372    #[test]
373    fn health_status_display() {
374        assert_eq!(format!("{}", HealthStatus::Healthy), "healthy");
375        assert_eq!(format!("{}", HealthStatus::Unhealthy), "unhealthy");
376    }
377
378    #[test]
379    fn health_status_code() {
380        assert_eq!(HealthStatus::Healthy.status_code(), StatusCode::OK);
381        assert_eq!(HealthStatus::Degraded.status_code(), StatusCode::OK);
382        assert_eq!(
383            HealthStatus::Unhealthy.status_code(),
384            StatusCode::SERVICE_UNAVAILABLE
385        );
386    }
387
388    #[test]
389    fn empty_registry_is_healthy() {
390        let registry = HealthCheckRegistry::new();
391        assert!(registry.is_empty());
392        let report = futures_executor::block_on(registry.check_all());
393        assert_eq!(report.status, HealthStatus::Healthy);
394        assert!(report.checks.is_empty());
395    }
396
397    #[test]
398    fn all_checks_pass_is_healthy() {
399        let mut registry = HealthCheckRegistry::new();
400        registry.add("database", true, || async { Ok(()) });
401        registry.add("cache", false, || async { Ok(()) });
402
403        let report = futures_executor::block_on(registry.check_all());
404        assert_eq!(report.status, HealthStatus::Healthy);
405        assert_eq!(report.checks.len(), 2);
406        assert!(report.checks[0].passed);
407        assert!(report.checks[1].passed);
408    }
409
410    #[test]
411    fn non_critical_failure_is_degraded() {
412        let mut registry = HealthCheckRegistry::new();
413        registry.add("database", true, || async { Ok(()) });
414        registry.add("cache", false, || async {
415            Err("Cache connection refused".to_string())
416        });
417
418        let report = futures_executor::block_on(registry.check_all());
419        assert_eq!(report.status, HealthStatus::Degraded);
420        assert!(report.checks[0].passed);
421        assert!(!report.checks[1].passed);
422        assert_eq!(
423            report.checks[1].error.as_deref(),
424            Some("Cache connection refused")
425        );
426    }
427
428    #[test]
429    fn critical_failure_is_unhealthy() {
430        let mut registry = HealthCheckRegistry::new();
431        registry.add("database", true, || async {
432            Err("Connection timeout".to_string())
433        });
434        registry.add("cache", false, || async { Ok(()) });
435
436        let report = futures_executor::block_on(registry.check_all());
437        assert_eq!(report.status, HealthStatus::Unhealthy);
438        assert!(!report.checks[0].passed);
439        assert!(report.checks[0].critical);
440        assert!(report.checks[1].passed);
441    }
442
443    #[test]
444    fn latency_is_measured() {
445        let mut registry = HealthCheckRegistry::new();
446        registry.add("fast", true, || async { Ok(()) });
447
448        let report = futures_executor::block_on(registry.check_all());
449        // Latency should be >= 0 (can't be negative)
450        assert!(report.checks[0].latency_ms < 1000);
451        assert!(report.total_latency_ms < 1000);
452    }
453
454    #[test]
455    fn report_to_json_healthy() {
456        let report = HealthReport {
457            status: HealthStatus::Healthy,
458            checks: vec![HealthCheckResult {
459                name: "database".to_string(),
460                passed: true,
461                critical: true,
462                latency_ms: 5,
463                error: None,
464            }],
465            total_latency_ms: 5,
466        };
467
468        let json = String::from_utf8(report.to_json()).unwrap();
469        assert!(json.contains("\"status\":\"healthy\""));
470        assert!(json.contains("\"name\":\"database\""));
471        assert!(json.contains("\"passed\":true"));
472        assert!(json.contains("\"critical\":true"));
473        assert!(json.contains("\"latency_ms\":5"));
474    }
475
476    #[test]
477    fn report_to_json_unhealthy_with_error() {
478        let report = HealthReport {
479            status: HealthStatus::Unhealthy,
480            checks: vec![HealthCheckResult {
481                name: "database".to_string(),
482                passed: false,
483                critical: true,
484                latency_ms: 3000,
485                error: Some("Connection refused".to_string()),
486            }],
487            total_latency_ms: 3000,
488        };
489
490        let json = String::from_utf8(report.to_json()).unwrap();
491        assert!(json.contains("\"status\":\"unhealthy\""));
492        assert!(json.contains("\"passed\":false"));
493        assert!(json.contains("\"error\":\"Connection refused\""));
494    }
495
496    #[test]
497    fn report_to_json_escapes_special_chars() {
498        let report = HealthReport {
499            status: HealthStatus::Unhealthy,
500            checks: vec![HealthCheckResult {
501                name: "test".to_string(),
502                passed: false,
503                critical: true,
504                latency_ms: 0,
505                error: Some("Error with \"quotes\" and \\backslash".to_string()),
506            }],
507            total_latency_ms: 0,
508        };
509
510        let json = String::from_utf8(report.to_json()).unwrap();
511        assert!(json.contains(r#"\"quotes\""#));
512        assert!(json.contains(r"\\backslash"));
513    }
514
515    #[test]
516    fn basic_health_handler_returns_200() {
517        let handler = basic_health_handler();
518        let resp = run_handler(handler);
519        assert_eq!(resp.status(), StatusCode::OK);
520
521        if let ResponseBody::Bytes(body) = resp.body_ref() {
522            let body_str = std::str::from_utf8(body).unwrap();
523            assert!(body_str.contains("\"status\":\"healthy\""));
524        } else {
525            panic!("expected Bytes body");
526        }
527    }
528
529    #[test]
530    fn detailed_health_handler_healthy() {
531        let mut registry = HealthCheckRegistry::new();
532        registry.add("db", true, || async { Ok(()) });
533        let handler = detailed_health_handler(Arc::new(registry));
534
535        let ctx = RequestContext::new(asupersync::Cx::for_testing(), 1);
536        let mut req = Request::new(Method::Get, "/health");
537        let resp = futures_executor::block_on(handler(&ctx, &mut req));
538        assert_eq!(resp.status(), StatusCode::OK);
539
540        if let ResponseBody::Bytes(body) = resp.body_ref() {
541            let body_str = std::str::from_utf8(body).unwrap();
542            assert!(body_str.contains("\"status\":\"healthy\""));
543            assert!(body_str.contains("\"name\":\"db\""));
544        } else {
545            panic!("expected Bytes body");
546        }
547    }
548
549    #[test]
550    fn detailed_health_handler_unhealthy_returns_503() {
551        let mut registry = HealthCheckRegistry::new();
552        registry.add("db", true, || async { Err("down".to_string()) });
553        let handler = detailed_health_handler(Arc::new(registry));
554
555        let ctx = RequestContext::new(asupersync::Cx::for_testing(), 1);
556        let mut req = Request::new(Method::Get, "/health");
557        let resp = futures_executor::block_on(handler(&ctx, &mut req));
558        assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
559
560        if let ResponseBody::Bytes(body) = resp.body_ref() {
561            let body_str = std::str::from_utf8(body).unwrap();
562            assert!(body_str.contains("\"status\":\"unhealthy\""));
563        } else {
564            panic!("expected Bytes body");
565        }
566    }
567
568    #[test]
569    fn detailed_health_handler_has_cache_headers() {
570        let registry = HealthCheckRegistry::new();
571        let handler = detailed_health_handler(Arc::new(registry));
572
573        let ctx = RequestContext::new(asupersync::Cx::for_testing(), 1);
574        let mut req = Request::new(Method::Get, "/health");
575        let resp = futures_executor::block_on(handler(&ctx, &mut req));
576
577        let has_no_cache = resp
578            .headers()
579            .iter()
580            .any(|(n, v)| n.eq_ignore_ascii_case("cache-control") && v == b"no-cache, no-store");
581        assert!(
582            has_no_cache,
583            "should have Cache-Control: no-cache, no-store"
584        );
585    }
586
587    #[test]
588    fn registry_len_and_is_empty() {
589        let mut registry = HealthCheckRegistry::new();
590        assert!(registry.is_empty());
591        assert_eq!(registry.len(), 0);
592
593        registry.add("db", true, || async { Ok(()) });
594        assert!(!registry.is_empty());
595        assert_eq!(registry.len(), 1);
596    }
597
598    #[test]
599    fn liveness_handler_returns_200() {
600        let handler = liveness_handler();
601        let resp = run_handler(handler);
602        assert_eq!(resp.status(), StatusCode::OK);
603    }
604
605    #[test]
606    fn readiness_handler_healthy_returns_200() {
607        let mut registry = HealthCheckRegistry::new();
608        registry.add("db", true, || async { Ok(()) });
609        let handler = readiness_handler(Arc::new(registry));
610
611        let ctx = RequestContext::new(asupersync::Cx::for_testing(), 1);
612        let mut req = Request::new(Method::Get, "/ready");
613        let resp = futures_executor::block_on(handler(&ctx, &mut req));
614        assert_eq!(resp.status(), StatusCode::OK);
615    }
616
617    #[test]
618    fn readiness_handler_unhealthy_returns_503() {
619        let mut registry = HealthCheckRegistry::new();
620        registry.add("db", true, || async { Err("down".to_string()) });
621        let handler = readiness_handler(Arc::new(registry));
622
623        let ctx = RequestContext::new(asupersync::Cx::for_testing(), 1);
624        let mut req = Request::new(Method::Get, "/ready");
625        let resp = futures_executor::block_on(handler(&ctx, &mut req));
626        assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
627    }
628
629    #[test]
630    fn multiple_checks_mixed_results() {
631        let mut registry = HealthCheckRegistry::new();
632        registry.add("database", true, || async { Ok(()) });
633        registry.add("cache", false, || async {
634            Err("Cache timeout".to_string())
635        });
636        registry.add("search", false, || async { Ok(()) });
637
638        let report = futures_executor::block_on(registry.check_all());
639        assert_eq!(report.status, HealthStatus::Degraded);
640        assert_eq!(report.checks.len(), 3);
641
642        // Database passed
643        assert!(report.checks[0].passed);
644        assert!(report.checks[0].critical);
645
646        // Cache failed (non-critical)
647        assert!(!report.checks[1].passed);
648        assert!(!report.checks[1].critical);
649        assert!(report.checks[1].error.is_some());
650
651        // Search passed
652        assert!(report.checks[2].passed);
653    }
654}