1use axum::{extract::State, http::StatusCode, response::Json, routing::get};
15use serde::{Deserialize, Serialize};
16use std::sync::Arc;
17use std::time::{Duration, Instant};
18use tokio::sync::RwLock;
19use tracing::{debug, error, info, warn};
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum ServiceStatus {
24 Initializing,
26 Ready,
28 ShuttingDown,
30 Failed,
32}
33
34impl ServiceStatus {
35 pub fn is_ready(&self) -> bool {
37 matches!(self, ServiceStatus::Ready)
38 }
39
40 pub fn is_alive(&self) -> bool {
42 !matches!(self, ServiceStatus::Failed)
43 }
44}
45
46#[derive(Debug, Clone)]
48pub struct HealthManager {
49 status: Arc<RwLock<ServiceStatus>>,
51 start_time: Arc<Instant>,
53 init_deadline: Arc<Option<Instant>>,
55 shutdown_signal: Arc<RwLock<Option<tokio::sync::oneshot::Sender<()>>>>,
57}
58
59impl HealthManager {
60 pub fn new() -> Self {
62 Self {
63 status: Arc::new(RwLock::new(ServiceStatus::Initializing)),
64 start_time: Arc::new(Instant::now()),
65 init_deadline: Arc::new(None),
66 shutdown_signal: Arc::new(RwLock::new(None)),
67 }
68 }
69
70 pub fn with_init_timeout(timeout: Duration) -> Self {
72 let deadline = Instant::now() + timeout;
73 Self {
74 status: Arc::new(RwLock::new(ServiceStatus::Initializing)),
75 start_time: Arc::new(Instant::now()),
76 init_deadline: Arc::new(Some(deadline)),
77 shutdown_signal: Arc::new(RwLock::new(None)),
78 }
79 }
80
81 pub async fn set_ready(&self) {
83 let mut status = self.status.write().await;
84 *status = ServiceStatus::Ready;
85 info!("Service marked as ready");
86 }
87
88 pub async fn set_failed(&self, reason: &str) {
90 let mut status = self.status.write().await;
91 *status = ServiceStatus::Failed;
92 error!("Service marked as failed: {}", reason);
93 }
94
95 pub async fn set_shutting_down(&self) {
97 let mut status = self.status.write().await;
98 *status = ServiceStatus::ShuttingDown;
99 info!("Service marked as shutting down");
100 }
101
102 pub async fn get_status(&self) -> ServiceStatus {
104 *self.status.read().await
105 }
106
107 pub fn uptime_seconds(&self) -> u64 {
109 self.start_time.elapsed().as_secs()
110 }
111
112 pub fn is_init_timeout(&self) -> bool {
114 if let Some(deadline) = *self.init_deadline {
115 Instant::now() > deadline
116 } else {
117 false
118 }
119 }
120
121 pub async fn set_shutdown_signal(&self, sender: tokio::sync::oneshot::Sender<()>) {
123 let mut signal = self.shutdown_signal.write().await;
124 *signal = Some(sender);
125 }
126
127 pub async fn trigger_shutdown(&self) {
129 self.set_shutting_down().await;
130 let mut signal = self.shutdown_signal.write().await;
131 if let Some(sender) = signal.take() {
132 let _ = sender.send(());
133 info!("Graceful shutdown signal sent");
134 }
135 }
136}
137
138impl Default for HealthManager {
139 fn default() -> Self {
140 Self::new()
141 }
142}
143
144#[derive(Debug, Serialize, Deserialize)]
146pub struct HealthResponse {
147 pub status: String,
149 pub timestamp: String,
151 pub uptime_seconds: u64,
153 pub version: String,
155 #[serde(skip_serializing_if = "Option::is_none")]
157 pub details: Option<HealthDetails>,
158}
159
160#[derive(Debug, Serialize, Deserialize)]
162pub struct HealthDetails {
163 pub initialization: String,
165 #[serde(skip_serializing_if = "Option::is_none")]
167 pub connections: Option<u64>,
168 #[serde(skip_serializing_if = "Option::is_none")]
170 pub memory_bytes: Option<u64>,
171}
172
173async fn liveness_probe(
180 State(health): State<Arc<HealthManager>>,
181) -> Result<Json<HealthResponse>, StatusCode> {
182 let status = health.get_status().await;
183 let uptime = health.uptime_seconds();
184
185 if status.is_alive() {
187 let response = HealthResponse {
188 status: "alive".to_string(),
189 timestamp: chrono::Utc::now().to_rfc3339(),
190 uptime_seconds: uptime,
191 version: env!("CARGO_PKG_VERSION").to_string(),
192 details: None,
193 };
194 Ok(Json(response))
195 } else {
196 Err(StatusCode::SERVICE_UNAVAILABLE)
197 }
198}
199
200async fn readiness_probe(
207 State(health): State<Arc<HealthManager>>,
208) -> Result<Json<HealthResponse>, (StatusCode, Json<HealthResponse>)> {
209 let status = health.get_status().await;
210 let uptime = health.uptime_seconds();
211
212 if status.is_ready() {
214 let response = HealthResponse {
215 status: "ready".to_string(),
216 timestamp: chrono::Utc::now().to_rfc3339(),
217 uptime_seconds: uptime,
218 version: env!("CARGO_PKG_VERSION").to_string(),
219 details: Some(HealthDetails {
220 initialization: "complete".to_string(),
221 connections: None,
222 memory_bytes: None,
223 }),
224 };
225 Ok(Json(response))
226 } else {
227 let details = match status {
228 ServiceStatus::Initializing => {
229 if health.is_init_timeout() {
230 "initialization_timeout".to_string()
231 } else {
232 "initializing".to_string()
233 }
234 }
235 ServiceStatus::ShuttingDown => "shutting_down".to_string(),
236 ServiceStatus::Failed => "failed".to_string(),
237 ServiceStatus::Ready => unreachable!(),
238 };
239
240 let response = HealthResponse {
241 status: "not_ready".to_string(),
242 timestamp: chrono::Utc::now().to_rfc3339(),
243 uptime_seconds: uptime,
244 version: env!("CARGO_PKG_VERSION").to_string(),
245 details: Some(HealthDetails {
246 initialization: details,
247 connections: None,
248 memory_bytes: None,
249 }),
250 };
251
252 Err((StatusCode::SERVICE_UNAVAILABLE, Json(response)))
253 }
254}
255
256async fn startup_probe(
262 State(health): State<Arc<HealthManager>>,
263) -> Result<Json<HealthResponse>, StatusCode> {
264 let status = health.get_status().await;
265 let uptime = health.uptime_seconds();
266
267 match status {
269 ServiceStatus::Ready => {
270 let response = HealthResponse {
271 status: "startup_complete".to_string(),
272 timestamp: chrono::Utc::now().to_rfc3339(),
273 uptime_seconds: uptime,
274 version: env!("CARGO_PKG_VERSION").to_string(),
275 details: Some(HealthDetails {
276 initialization: "complete".to_string(),
277 connections: None,
278 memory_bytes: None,
279 }),
280 };
281 Ok(Json(response))
282 }
283 ServiceStatus::Initializing => {
284 if health.is_init_timeout() {
285 warn!("Startup probe: initialization timeout exceeded");
286 Err(StatusCode::SERVICE_UNAVAILABLE)
287 } else {
288 debug!("Startup probe: still initializing");
289 Err(StatusCode::SERVICE_UNAVAILABLE)
290 }
291 }
292 ServiceStatus::Failed => Err(StatusCode::SERVICE_UNAVAILABLE),
293 ServiceStatus::ShuttingDown => {
294 let response = HealthResponse {
296 status: "startup_complete".to_string(),
297 timestamp: chrono::Utc::now().to_rfc3339(),
298 uptime_seconds: uptime,
299 version: env!("CARGO_PKG_VERSION").to_string(),
300 details: Some(HealthDetails {
301 initialization: "complete".to_string(),
302 connections: None,
303 memory_bytes: None,
304 }),
305 };
306 Ok(Json(response))
307 }
308 }
309}
310
311async fn health_check(
316 State(health): State<Arc<HealthManager>>,
317) -> Result<Json<HealthResponse>, (StatusCode, Json<HealthResponse>)> {
318 let status = health.get_status().await;
319 let uptime = health.uptime_seconds();
320
321 if status.is_ready() {
322 let response = HealthResponse {
323 status: "healthy".to_string(),
324 timestamp: chrono::Utc::now().to_rfc3339(),
325 uptime_seconds: uptime,
326 version: env!("CARGO_PKG_VERSION").to_string(),
327 details: Some(HealthDetails {
328 initialization: "complete".to_string(),
329 connections: None,
330 memory_bytes: None,
331 }),
332 };
333 Ok(Json(response))
334 } else {
335 let status_str = match status {
336 ServiceStatus::Initializing => "initializing",
337 ServiceStatus::ShuttingDown => "shutting_down",
338 ServiceStatus::Failed => "failed",
339 ServiceStatus::Ready => unreachable!(),
340 };
341
342 let response = HealthResponse {
343 status: status_str.to_string(),
344 timestamp: chrono::Utc::now().to_rfc3339(),
345 uptime_seconds: uptime,
346 version: env!("CARGO_PKG_VERSION").to_string(),
347 details: Some(HealthDetails {
348 initialization: status_str.to_string(),
349 connections: None,
350 memory_bytes: None,
351 }),
352 };
353
354 Err((StatusCode::SERVICE_UNAVAILABLE, Json(response)))
355 }
356}
357
358pub fn health_router(health_manager: Arc<HealthManager>) -> axum::Router {
360 use axum::Router;
361 Router::new()
362 .route("/health", get(health_check))
363 .route("/health/live", get(liveness_probe))
364 .route("/health/ready", get(readiness_probe))
365 .route("/health/startup", get(startup_probe))
366 .with_state(health_manager)
367}
368
369pub fn health_router_with_prefix(health_manager: Arc<HealthManager>, prefix: &str) -> axum::Router {
371 use axum::Router;
372 Router::new()
373 .route(&format!("{}/health", prefix), get(health_check))
374 .route(&format!("{}/health/live", prefix), get(liveness_probe))
375 .route(&format!("{}/health/ready", prefix), get(readiness_probe))
376 .route(&format!("{}/health/startup", prefix), get(startup_probe))
377 .with_state(health_manager)
378}
379
380#[cfg(test)]
381mod tests {
382 use super::*;
383 use axum::body::Body;
384 use axum::http::Request;
385 use tower::ServiceExt;
386
387 #[test]
390 fn test_service_status() {
391 assert!(ServiceStatus::Ready.is_ready());
392 assert!(!ServiceStatus::Initializing.is_ready());
393 assert!(!ServiceStatus::ShuttingDown.is_ready());
394 assert!(!ServiceStatus::Failed.is_ready());
395
396 assert!(ServiceStatus::Ready.is_alive());
397 assert!(ServiceStatus::Initializing.is_alive());
398 assert!(ServiceStatus::ShuttingDown.is_alive());
399 assert!(!ServiceStatus::Failed.is_alive());
400 }
401
402 #[test]
403 fn test_service_status_eq() {
404 assert_eq!(ServiceStatus::Ready, ServiceStatus::Ready);
405 assert_ne!(ServiceStatus::Ready, ServiceStatus::Failed);
406 assert_eq!(ServiceStatus::Initializing, ServiceStatus::Initializing);
407 }
408
409 #[test]
410 fn test_service_status_clone() {
411 let status = ServiceStatus::Ready;
412 let cloned = status;
413 assert_eq!(status, cloned);
414 }
415
416 #[test]
417 fn test_service_status_debug() {
418 let debug = format!("{:?}", ServiceStatus::Ready);
419 assert!(debug.contains("Ready"));
420 }
421
422 #[tokio::test]
425 async fn test_health_manager_new() {
426 let manager = HealthManager::new();
427 let status = manager.get_status().await;
428 assert_eq!(status, ServiceStatus::Initializing);
429 }
430
431 #[tokio::test]
432 async fn test_health_manager_default() {
433 let manager = HealthManager::default();
434 let status = manager.get_status().await;
435 assert_eq!(status, ServiceStatus::Initializing);
436 }
437
438 #[tokio::test]
439 async fn test_health_manager_with_init_timeout() {
440 let manager = HealthManager::with_init_timeout(Duration::from_secs(30));
441 let status = manager.get_status().await;
442 assert_eq!(status, ServiceStatus::Initializing);
443 assert!(!manager.is_init_timeout());
444 }
445
446 #[tokio::test]
447 async fn test_health_manager_set_ready() {
448 let manager = HealthManager::new();
449 manager.set_ready().await;
450 let status = manager.get_status().await;
451 assert_eq!(status, ServiceStatus::Ready);
452 }
453
454 #[tokio::test]
455 async fn test_health_manager_set_failed() {
456 let manager = HealthManager::new();
457 manager.set_failed("test error").await;
458 let status = manager.get_status().await;
459 assert_eq!(status, ServiceStatus::Failed);
460 }
461
462 #[tokio::test]
463 async fn test_health_manager_set_shutting_down() {
464 let manager = HealthManager::new();
465 manager.set_shutting_down().await;
466 let status = manager.get_status().await;
467 assert_eq!(status, ServiceStatus::ShuttingDown);
468 }
469
470 #[tokio::test]
471 async fn test_health_manager_uptime() {
472 let manager = HealthManager::new();
473 let uptime = manager.uptime_seconds();
474 assert!(uptime < 5);
476 }
477
478 #[tokio::test]
479 async fn test_health_manager_clone() {
480 let manager = HealthManager::new();
481 manager.set_ready().await;
482 let cloned = manager.clone();
483 let status = cloned.get_status().await;
484 assert_eq!(status, ServiceStatus::Ready);
485 }
486
487 #[tokio::test]
488 async fn test_health_manager_trigger_shutdown() {
489 let manager = HealthManager::new();
490 manager.set_ready().await;
491 manager.trigger_shutdown().await;
492 let status = manager.get_status().await;
493 assert_eq!(status, ServiceStatus::ShuttingDown);
494 }
495
496 #[test]
499 fn test_health_response_serialization() {
500 let response = HealthResponse {
501 status: "healthy".to_string(),
502 timestamp: "2024-01-15T10:30:00Z".to_string(),
503 uptime_seconds: 3600,
504 version: "1.0.0".to_string(),
505 details: None,
506 };
507
508 let json = serde_json::to_string(&response).unwrap();
509 assert!(json.contains("healthy"));
510 assert!(json.contains("3600"));
511 assert!(json.contains("1.0.0"));
512 }
513
514 #[test]
515 fn test_health_response_with_details() {
516 let response = HealthResponse {
517 status: "healthy".to_string(),
518 timestamp: "2024-01-15T10:30:00Z".to_string(),
519 uptime_seconds: 3600,
520 version: "1.0.0".to_string(),
521 details: Some(HealthDetails {
522 initialization: "complete".to_string(),
523 connections: Some(10),
524 memory_bytes: Some(1024 * 1024),
525 }),
526 };
527
528 let json = serde_json::to_string(&response).unwrap();
529 assert!(json.contains("complete"));
530 assert!(json.contains("connections"));
531 }
532
533 #[test]
534 fn test_health_response_deserialization() {
535 let json = r#"{
536 "status": "healthy",
537 "timestamp": "2024-01-15T10:30:00Z",
538 "uptime_seconds": 7200,
539 "version": "2.0.0"
540 }"#;
541
542 let response: HealthResponse = serde_json::from_str(json).unwrap();
543 assert_eq!(response.status, "healthy");
544 assert_eq!(response.uptime_seconds, 7200);
545 assert_eq!(response.version, "2.0.0");
546 }
547
548 #[test]
551 fn test_health_details_serialization() {
552 let details = HealthDetails {
553 initialization: "complete".to_string(),
554 connections: Some(5),
555 memory_bytes: Some(2048),
556 };
557
558 let json = serde_json::to_string(&details).unwrap();
559 assert!(json.contains("complete"));
560 assert!(json.contains("5"));
561 assert!(json.contains("2048"));
562 }
563
564 #[test]
565 fn test_health_details_optional_fields() {
566 let details = HealthDetails {
567 initialization: "initializing".to_string(),
568 connections: None,
569 memory_bytes: None,
570 };
571
572 let json = serde_json::to_string(&details).unwrap();
573 assert!(json.contains("initializing"));
574 assert!(!json.contains("connections"));
575 assert!(!json.contains("memory_bytes"));
576 }
577
578 #[tokio::test]
581 async fn test_liveness_probe_alive() {
582 let health = Arc::new(HealthManager::new());
583 health.set_ready().await;
584
585 let app = health_router(health.clone());
586 let response = app
587 .oneshot(Request::builder().uri("/health/live").body(Body::empty()).unwrap())
588 .await
589 .unwrap();
590
591 assert_eq!(response.status(), StatusCode::OK);
592 }
593
594 #[tokio::test]
595 async fn test_liveness_probe_failed() {
596 let health = Arc::new(HealthManager::new());
597 health.set_failed("test failure").await;
598
599 let app = health_router(health.clone());
600 let response = app
601 .oneshot(Request::builder().uri("/health/live").body(Body::empty()).unwrap())
602 .await
603 .unwrap();
604
605 assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE);
606 }
607
608 #[tokio::test]
609 async fn test_liveness_probe_initializing() {
610 let health = Arc::new(HealthManager::new());
611 let app = health_router(health.clone());
614 let response = app
615 .oneshot(Request::builder().uri("/health/live").body(Body::empty()).unwrap())
616 .await
617 .unwrap();
618
619 assert_eq!(response.status(), StatusCode::OK);
620 }
621
622 #[tokio::test]
623 async fn test_liveness_probe_shutting_down() {
624 let health = Arc::new(HealthManager::new());
625 health.set_shutting_down().await;
626
627 let app = health_router(health.clone());
628 let response = app
629 .oneshot(Request::builder().uri("/health/live").body(Body::empty()).unwrap())
630 .await
631 .unwrap();
632
633 assert_eq!(response.status(), StatusCode::OK);
635 }
636
637 #[tokio::test]
638 async fn test_readiness_probe_ready() {
639 let health = Arc::new(HealthManager::new());
640 health.set_ready().await;
641
642 let app = health_router(health.clone());
643 let response = app
644 .oneshot(Request::builder().uri("/health/ready").body(Body::empty()).unwrap())
645 .await
646 .unwrap();
647
648 assert_eq!(response.status(), StatusCode::OK);
649 }
650
651 #[tokio::test]
652 async fn test_readiness_probe_initializing() {
653 let health = Arc::new(HealthManager::new());
654
655 let app = health_router(health.clone());
656 let response = app
657 .oneshot(Request::builder().uri("/health/ready").body(Body::empty()).unwrap())
658 .await
659 .unwrap();
660
661 assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE);
662 }
663
664 #[tokio::test]
665 async fn test_readiness_probe_shutting_down() {
666 let health = Arc::new(HealthManager::new());
667 health.set_shutting_down().await;
668
669 let app = health_router(health.clone());
670 let response = app
671 .oneshot(Request::builder().uri("/health/ready").body(Body::empty()).unwrap())
672 .await
673 .unwrap();
674
675 assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE);
676 }
677
678 #[tokio::test]
679 async fn test_startup_probe_ready() {
680 let health = Arc::new(HealthManager::new());
681 health.set_ready().await;
682
683 let app = health_router(health.clone());
684 let response = app
685 .oneshot(Request::builder().uri("/health/startup").body(Body::empty()).unwrap())
686 .await
687 .unwrap();
688
689 assert_eq!(response.status(), StatusCode::OK);
690 }
691
692 #[tokio::test]
693 async fn test_startup_probe_initializing() {
694 let health = Arc::new(HealthManager::new());
695
696 let app = health_router(health.clone());
697 let response = app
698 .oneshot(Request::builder().uri("/health/startup").body(Body::empty()).unwrap())
699 .await
700 .unwrap();
701
702 assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE);
703 }
704
705 #[tokio::test]
706 async fn test_startup_probe_shutting_down() {
707 let health = Arc::new(HealthManager::new());
708 health.set_ready().await;
709 health.set_shutting_down().await;
710
711 let app = health_router(health.clone());
712 let response = app
713 .oneshot(Request::builder().uri("/health/startup").body(Body::empty()).unwrap())
714 .await
715 .unwrap();
716
717 assert_eq!(response.status(), StatusCode::OK);
719 }
720
721 #[tokio::test]
722 async fn test_health_check_ready() {
723 let health = Arc::new(HealthManager::new());
724 health.set_ready().await;
725
726 let app = health_router(health.clone());
727 let response = app
728 .oneshot(Request::builder().uri("/health").body(Body::empty()).unwrap())
729 .await
730 .unwrap();
731
732 assert_eq!(response.status(), StatusCode::OK);
733 }
734
735 #[tokio::test]
736 async fn test_health_check_not_ready() {
737 let health = Arc::new(HealthManager::new());
738
739 let app = health_router(health.clone());
740 let response = app
741 .oneshot(Request::builder().uri("/health").body(Body::empty()).unwrap())
742 .await
743 .unwrap();
744
745 assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE);
746 }
747
748 #[test]
751 fn test_health_router_creation() {
752 let health = Arc::new(HealthManager::new());
753 let router = health_router(health);
754 let _ = router;
755 }
756
757 #[test]
758 fn test_health_router_with_prefix() {
759 let health = Arc::new(HealthManager::new());
760 let router = health_router_with_prefix(health, "/api/v1");
761 let _ = router;
762 }
763}