1use axum::{
23 extract::State,
24 http::StatusCode,
25 response::{IntoResponse, Response},
26 routing::get,
27 Json, Router,
28};
29use parking_lot::RwLock;
30use serde::Serialize;
31use std::sync::atomic::{AtomicBool, Ordering};
32use std::sync::Arc;
33use std::time::{Duration, Instant};
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
37#[serde(rename_all = "lowercase")]
38pub enum HealthStatus {
39 Up,
41 Down,
43 Unknown,
45}
46
47#[derive(Debug, Clone, Serialize)]
49pub struct ComponentHealth {
50 pub status: HealthStatus,
52 #[serde(skip_serializing_if = "Option::is_none")]
54 pub latency_ms: Option<u64>,
55 #[serde(skip_serializing_if = "Option::is_none")]
57 pub details: Option<serde_json::Value>,
58}
59
60impl ComponentHealth {
61 pub fn up() -> Self {
63 Self {
64 status: HealthStatus::Up,
65 latency_ms: None,
66 details: None,
67 }
68 }
69
70 pub fn up_with_latency(latency: Duration) -> Self {
72 Self {
73 status: HealthStatus::Up,
74 latency_ms: Some(latency.as_millis() as u64),
75 details: None,
76 }
77 }
78
79 pub fn down() -> Self {
81 Self {
82 status: HealthStatus::Down,
83 latency_ms: None,
84 details: None,
85 }
86 }
87
88 pub fn down_with_reason(reason: &str) -> Self {
90 Self {
91 status: HealthStatus::Down,
92 latency_ms: None,
93 details: Some(serde_json::json!({ "reason": reason })),
94 }
95 }
96}
97
98#[derive(Debug, Clone, Serialize)]
100pub struct LivenessResponse {
101 pub status: HealthStatus,
103 pub uptime_seconds: u64,
105}
106
107#[derive(Debug, Clone, Serialize)]
109pub struct ReadinessResponse {
110 pub status: HealthStatus,
112 pub checks: ReadinessChecks,
114}
115
116#[derive(Debug, Clone, Serialize)]
118pub struct ReadinessChecks {
119 pub storage: ComponentHealth,
121 pub p2p: ComponentHealth,
123 pub realtime: ComponentHealth,
125}
126
127#[derive(Debug, Clone, Serialize)]
129pub struct StartupResponse {
130 pub status: HealthStatus,
132 #[serde(skip_serializing_if = "Option::is_none")]
134 pub startup_duration_ms: Option<u64>,
135}
136
137#[derive(Debug, Clone, Serialize)]
139pub struct HealthResponse {
140 pub status: HealthStatus,
142 pub version: String,
144 pub uptime_seconds: u64,
146 pub checks: ReadinessChecks,
148}
149
150#[derive(Clone)]
152pub struct HealthState {
153 start_time: Instant,
155 startup_complete: Arc<AtomicBool>,
157 ready: Arc<AtomicBool>,
159 components: Arc<RwLock<ComponentStates>>,
161}
162
163#[derive(Default)]
165struct ComponentStates {
166 storage_healthy: bool,
167 p2p_connected: bool,
168 p2p_peer_count: usize,
169 realtime_healthy: bool,
170 websocket_connections: usize,
171}
172
173impl Default for HealthState {
174 fn default() -> Self {
175 Self::new()
176 }
177}
178
179impl HealthState {
180 pub fn new() -> Self {
182 Self {
183 start_time: Instant::now(),
184 startup_complete: Arc::new(AtomicBool::new(false)),
185 ready: Arc::new(AtomicBool::new(false)),
186 components: Arc::new(RwLock::new(ComponentStates::default())),
187 }
188 }
189
190 pub fn uptime(&self) -> u64 {
192 self.start_time.elapsed().as_secs()
193 }
194
195 pub fn set_startup_complete(&self, complete: bool) {
197 self.startup_complete.store(complete, Ordering::SeqCst);
198 }
199
200 pub fn is_startup_complete(&self) -> bool {
202 self.startup_complete.load(Ordering::SeqCst)
203 }
204
205 pub fn set_ready(&self, ready: bool) {
207 self.ready.store(ready, Ordering::SeqCst);
208 }
209
210 pub fn is_ready(&self) -> bool {
212 self.ready.load(Ordering::SeqCst)
213 }
214
215 pub fn set_storage_healthy(&self, healthy: bool) {
217 self.components.write().storage_healthy = healthy;
218 }
219
220 pub fn set_p2p_connected(&self, connected: bool, peer_count: usize) {
222 let mut components = self.components.write();
223 components.p2p_connected = connected;
224 components.p2p_peer_count = peer_count;
225 }
226
227 pub fn set_realtime_healthy(&self, healthy: bool, connection_count: usize) {
229 let mut components = self.components.write();
230 components.realtime_healthy = healthy;
231 components.websocket_connections = connection_count;
232 }
233
234 fn storage_health(&self) -> ComponentHealth {
236 let components = self.components.read();
237 if components.storage_healthy {
238 ComponentHealth::up()
239 } else {
240 ComponentHealth::down()
241 }
242 }
243
244 fn p2p_health(&self) -> ComponentHealth {
246 let components = self.components.read();
247 if components.p2p_connected {
248 ComponentHealth {
249 status: HealthStatus::Up,
250 latency_ms: None,
251 details: Some(serde_json::json!({
252 "peer_count": components.p2p_peer_count
253 })),
254 }
255 } else {
256 ComponentHealth {
258 status: HealthStatus::Unknown,
259 latency_ms: None,
260 details: Some(serde_json::json!({
261 "reason": "P2P not connected or not enabled"
262 })),
263 }
264 }
265 }
266
267 fn realtime_health(&self) -> ComponentHealth {
269 let components = self.components.read();
270 ComponentHealth {
271 status: if components.realtime_healthy {
272 HealthStatus::Up
273 } else {
274 HealthStatus::Down
275 },
276 latency_ms: None,
277 details: Some(serde_json::json!({
278 "connections": components.websocket_connections
279 })),
280 }
281 }
282
283 fn readiness_checks(&self) -> ReadinessChecks {
285 ReadinessChecks {
286 storage: self.storage_health(),
287 p2p: self.p2p_health(),
288 realtime: self.realtime_health(),
289 }
290 }
291}
292
293pub fn health_routes<S>(state: HealthState) -> Router<S>
295where
296 S: Clone + Send + Sync + 'static,
297{
298 Router::new()
299 .route("/health", get(health_handler))
300 .route("/health/live", get(liveness_handler))
301 .route("/health/ready", get(readiness_handler))
302 .route("/health/startup", get(startup_handler))
303 .with_state(state)
304}
305
306async fn health_handler(State(state): State<HealthState>) -> Response {
308 let checks = state.readiness_checks();
309 let overall_status = if state.is_ready()
310 && checks.storage.status == HealthStatus::Up
311 && checks.realtime.status == HealthStatus::Up
312 {
313 HealthStatus::Up
314 } else {
315 HealthStatus::Down
316 };
317
318 let response = HealthResponse {
319 status: overall_status,
320 version: env!("CARGO_PKG_VERSION").to_string(),
321 uptime_seconds: state.uptime(),
322 checks,
323 };
324
325 let status_code = match overall_status {
326 HealthStatus::Up => StatusCode::OK,
327 _ => StatusCode::SERVICE_UNAVAILABLE,
328 };
329
330 (status_code, Json(response)).into_response()
331}
332
333async fn liveness_handler(State(state): State<HealthState>) -> Response {
335 let response = LivenessResponse {
336 status: HealthStatus::Up,
337 uptime_seconds: state.uptime(),
338 };
339
340 (StatusCode::OK, Json(response)).into_response()
341}
342
343async fn readiness_handler(State(state): State<HealthState>) -> Response {
345 if !state.is_ready() {
346 let response = ReadinessResponse {
347 status: HealthStatus::Down,
348 checks: state.readiness_checks(),
349 };
350 return (StatusCode::SERVICE_UNAVAILABLE, Json(response)).into_response();
351 }
352
353 let checks = state.readiness_checks();
354 let overall_status = if checks.storage.status == HealthStatus::Up {
355 HealthStatus::Up
356 } else {
357 HealthStatus::Down
358 };
359
360 let response = ReadinessResponse {
361 status: overall_status,
362 checks,
363 };
364
365 let status_code = match overall_status {
366 HealthStatus::Up => StatusCode::OK,
367 _ => StatusCode::SERVICE_UNAVAILABLE,
368 };
369
370 (status_code, Json(response)).into_response()
371}
372
373async fn startup_handler(State(state): State<HealthState>) -> Response {
375 if state.is_startup_complete() {
376 let response = StartupResponse {
377 status: HealthStatus::Up,
378 startup_duration_ms: None,
379 };
380 (StatusCode::OK, Json(response)).into_response()
381 } else {
382 let response = StartupResponse {
383 status: HealthStatus::Down,
384 startup_duration_ms: None,
385 };
386 (StatusCode::SERVICE_UNAVAILABLE, Json(response)).into_response()
387 }
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393
394 #[test]
395 fn test_health_state() {
396 let state = HealthState::new();
397
398 assert!(!state.is_startup_complete());
399 assert!(!state.is_ready());
400
401 state.set_startup_complete(true);
402 state.set_ready(true);
403
404 assert!(state.is_startup_complete());
405 assert!(state.is_ready());
406 }
407
408 #[test]
409 fn test_component_health() {
410 let up = ComponentHealth::up();
411 assert_eq!(up.status, HealthStatus::Up);
412
413 let down = ComponentHealth::down_with_reason("test failure");
414 assert_eq!(down.status, HealthStatus::Down);
415 assert!(down.details.is_some());
416 }
417}