1use crate::api::{ApiResponse, ApiState};
6use axum::{
7 extract::State,
8 http::StatusCode,
9 response::{IntoResponse, Response},
10};
11use serde::Serialize;
12use std::collections::HashMap;
13
14#[derive(Debug, Serialize)]
16pub struct HealthResponse {
17 pub status: String,
19 pub timestamp: String,
21 pub services: HashMap<String, String>,
23 pub version: String,
25 pub uptime: String,
27}
28
29#[derive(Debug, Serialize)]
31pub struct DetailedHealthResponse {
32 pub status: String,
33 pub timestamp: String,
34 pub services: HashMap<String, ServiceHealth>,
35 pub system: SystemHealth,
36 pub version: String,
37 pub uptime: String,
38}
39
40#[derive(Debug, Serialize)]
42pub struct ServiceHealth {
43 pub status: String,
45 pub response_time_ms: u64,
47 pub last_check: String,
49 #[serde(skip_serializing_if = "Option::is_none")]
50 pub error: Option<String>,
51 pub details: HashMap<String, serde_json::Value>,
52}
53
54#[derive(Debug, Serialize)]
56pub struct SystemHealth {
57 pub memory_usage: MemoryInfo,
58 pub cpu_usage: f64,
59 pub disk_usage: DiskInfo,
60 pub network: NetworkInfo,
61}
62
63#[derive(Debug, Serialize)]
65pub struct MemoryInfo {
66 pub total_mb: u64,
67 pub used_mb: u64,
68 pub free_mb: u64,
69 pub usage_percent: f64,
70}
71
72#[derive(Debug, Serialize)]
74pub struct DiskInfo {
75 pub total_gb: u64,
76 pub used_gb: u64,
77 pub free_gb: u64,
78 pub usage_percent: f64,
79}
80
81#[derive(Debug, Serialize)]
83pub struct NetworkInfo {
84 pub requests_per_minute: u64,
85 pub active_connections: u64,
86 pub bytes_sent: u64,
87 pub bytes_received: u64,
88}
89
90#[derive(Debug, Serialize)]
92pub struct MetricsResponse {
93 pub metrics: Vec<Metric>,
94 pub timestamp: String,
95}
96
97#[derive(Debug, Serialize)]
99pub struct Metric {
100 pub name: String,
101 pub value: f64,
102 pub labels: HashMap<String, String>,
103 pub help: String,
104 pub metric_type: String,
105}
106
107pub async fn health_check(State(state): State<ApiState>) -> ApiResponse<HealthResponse> {
109 let mut services = std::collections::HashMap::new();
110 let mut overall_healthy = true;
111
112 let auth_health = check_auth_framework_health(&state.auth_framework).await;
114 services.insert("auth_framework".to_string(), auth_health.status.clone());
115 if auth_health.status != "healthy" {
116 overall_healthy = false;
117 }
118
119 let storage_health = check_storage_health(&state.auth_framework).await;
121 services.insert("storage".to_string(), storage_health.status.clone());
122 if storage_health.status != "healthy" {
123 overall_healthy = false;
124 }
125
126 let token_health = check_token_manager_health(&state.auth_framework).await;
128 services.insert("token_manager".to_string(), token_health.status.clone());
129 if token_health.status != "healthy" {
130 overall_healthy = false;
131 }
132
133 let memory_health = check_memory_health().await;
135 services.insert("memory".to_string(), memory_health.status.clone());
136 if memory_health.status != "healthy" {
137 overall_healthy = false;
138 }
139
140 let health = HealthResponse {
141 status: if overall_healthy {
142 "healthy".to_string()
143 } else {
144 "degraded".to_string()
145 },
146 timestamp: chrono::Utc::now().to_rfc3339(),
147 services,
148 version: env!("CARGO_PKG_VERSION").to_string(),
149 uptime: get_uptime().await,
150 };
151
152 ApiResponse::success(health)
153}
154
155pub async fn detailed_health_check(
157 State(state): State<ApiState>,
158) -> ApiResponse<DetailedHealthResponse> {
159 let mut services = HashMap::new();
160 let mut overall_healthy = true;
161
162 let auth_health = check_auth_framework_health(&state.auth_framework).await;
164 services.insert(
165 "auth_framework".to_string(),
166 ServiceHealth {
167 status: auth_health.status.clone(),
168 response_time_ms: auth_health.response_time_ms,
169 last_check: chrono::Utc::now().to_rfc3339(),
170 error: auth_health.error,
171 details: {
172 let mut details = HashMap::new();
173 if let Ok(stats) = state.auth_framework.get_stats().await {
174 details.insert(
175 "active_sessions".to_string(),
176 serde_json::Value::Number(serde_json::Number::from(stats.active_sessions)),
177 );
178 details.insert(
179 "auth_attempts".to_string(),
180 serde_json::Value::Number(serde_json::Number::from(stats.auth_attempts)),
181 );
182 details.insert(
183 "tokens_issued".to_string(),
184 serde_json::Value::Number(serde_json::Number::from(stats.tokens_issued)),
185 );
186 }
187 details
188 },
189 },
190 );
191 if auth_health.status != "healthy" {
192 overall_healthy = false;
193 }
194
195 let storage_health = check_storage_health(&state.auth_framework).await;
197 services.insert(
198 "storage".to_string(),
199 ServiceHealth {
200 status: storage_health.status.clone(),
201 response_time_ms: storage_health.response_time_ms,
202 last_check: chrono::Utc::now().to_rfc3339(),
203 error: storage_health.error,
204 details: HashMap::new(),
205 },
206 );
207 if storage_health.status != "healthy" {
208 overall_healthy = false;
209 }
210
211 let token_health = check_token_manager_health(&state.auth_framework).await;
213 services.insert(
214 "token_manager".to_string(),
215 ServiceHealth {
216 status: token_health.status.clone(),
217 response_time_ms: token_health.response_time_ms,
218 last_check: chrono::Utc::now().to_rfc3339(),
219 error: token_health.error,
220 details: HashMap::new(),
221 },
222 );
223 if token_health.status != "healthy" {
224 overall_healthy = false;
225 }
226
227 let system = SystemHealth {
228 memory_usage: get_memory_info().await,
229 cpu_usage: get_cpu_usage().await,
230 disk_usage: get_disk_info().await,
231 network: get_network_info().await,
232 };
233
234 let health = DetailedHealthResponse {
235 status: if overall_healthy {
236 "healthy".to_string()
237 } else {
238 "degraded".to_string()
239 },
240 timestamp: chrono::Utc::now().to_rfc3339(),
241 services,
242 system,
243 version: env!("CARGO_PKG_VERSION").to_string(),
244 uptime: get_uptime().await,
245 };
246
247 ApiResponse::success(health)
248}
249
250pub async fn metrics(State(state): State<ApiState>) -> impl IntoResponse {
252 let metrics_text = state.auth_framework.export_prometheus_metrics().await;
253
254 Response::builder()
255 .status(StatusCode::OK)
256 .header("content-type", "text/plain; version=0.0.4")
257 .body(metrics_text)
258 .expect("infallible: String body is always valid")
259}
260
261pub async fn readiness_check(State(state): State<ApiState>) -> impl IntoResponse {
263 let ready = state.auth_framework.get_stats().await.is_ok();
266
267 if ready {
268 (StatusCode::OK, "Ready").into_response()
269 } else {
270 (StatusCode::SERVICE_UNAVAILABLE, "Not Ready").into_response()
271 }
272}
273
274pub async fn liveness_check(State(state): State<ApiState>) -> impl IntoResponse {
276 state.auth_framework.get_performance_metrics().await;
279 (StatusCode::OK, "Alive").into_response()
280}
281
282async fn check_auth_framework_health(
284 auth_framework: &std::sync::Arc<crate::AuthFramework>,
285) -> ServiceHealthResult {
286 let start = std::time::Instant::now();
287
288 match auth_framework.get_stats().await {
290 Ok(_stats) => ServiceHealthResult {
291 status: "healthy".to_string(),
292 response_time_ms: start.elapsed().as_millis() as u64,
293 error: None,
294 },
295 Err(e) => {
296 tracing::warn!(error = %e, "Health check: framework error");
297 ServiceHealthResult {
298 status: "unhealthy".to_string(),
299 response_time_ms: start.elapsed().as_millis() as u64,
300 error: Some("Service check failed".to_string()),
301 }
302 }
303 }
304}
305
306async fn check_storage_health(
307 auth_framework: &std::sync::Arc<crate::AuthFramework>,
308) -> ServiceHealthResult {
309 let start = std::time::Instant::now();
310
311 match auth_framework.get_stats().await {
314 Ok(_) => ServiceHealthResult {
315 status: "healthy".to_string(),
316 response_time_ms: start.elapsed().as_millis() as u64,
317 error: None,
318 },
319 Err(e) => {
320 tracing::warn!(error = %e, "Health check: storage error");
321 ServiceHealthResult {
322 status: "unhealthy".to_string(),
323 response_time_ms: start.elapsed().as_millis() as u64,
324 error: Some("Service check failed".to_string()),
325 }
326 }
327 }
328}
329
330async fn check_token_manager_health(
331 auth_framework: &std::sync::Arc<crate::AuthFramework>,
332) -> ServiceHealthResult {
333 let start = std::time::Instant::now();
334
335 let test_token = auth_framework.token_manager().create_jwt_token(
337 "health_check_user",
338 vec!["health_check".to_string()],
339 Some(std::time::Duration::from_secs(1)),
340 );
341
342 match test_token {
343 Ok(token) => {
344 match auth_framework.token_manager().validate_jwt_token(&token) {
346 Ok(_) => ServiceHealthResult {
347 status: "healthy".to_string(),
348 response_time_ms: start.elapsed().as_millis() as u64,
349 error: None,
350 },
351 Err(e) => {
352 tracing::warn!(error = %e, "Health check: token validation error");
353 ServiceHealthResult {
354 status: "unhealthy".to_string(),
355 response_time_ms: start.elapsed().as_millis() as u64,
356 error: Some("Service check failed".to_string()),
357 }
358 }
359 }
360 }
361 Err(e) => {
362 tracing::warn!(error = %e, "Health check: token creation error");
363 ServiceHealthResult {
364 status: "unhealthy".to_string(),
365 response_time_ms: start.elapsed().as_millis() as u64,
366 error: Some("Service check failed".to_string()),
367 }
368 }
369 }
370}
371
372async fn check_memory_health() -> ServiceHealthResult {
373 let start = std::time::Instant::now();
374
375 let test_vec: Vec<u8> = vec![0; 1024]; ServiceHealthResult {
379 status: if test_vec.len() == 1024 {
380 "healthy".to_string()
381 } else {
382 "unhealthy".to_string()
383 },
384 response_time_ms: start.elapsed().as_millis() as u64,
385 error: None,
386 }
387}
388
389async fn get_uptime() -> String {
390 use std::time::SystemTime;
391
392 static START_TIME: std::sync::OnceLock<SystemTime> = std::sync::OnceLock::new();
395 let start_time = START_TIME.get_or_init(SystemTime::now);
396
397 match start_time.elapsed() {
398 Ok(duration) => {
399 let seconds = duration.as_secs();
400 let days = seconds / 86400;
401 let hours = (seconds % 86400) / 3600;
402 let minutes = (seconds % 3600) / 60;
403
404 if days > 0 {
405 format!("{} days, {} hours, {} minutes", days, hours, minutes)
406 } else if hours > 0 {
407 format!("{} hours, {} minutes", hours, minutes)
408 } else {
409 format!("{} minutes", minutes)
410 }
411 }
412 Err(_) => "Unknown".to_string(),
413 }
414}
415
416async fn get_memory_info() -> MemoryInfo {
417 use sysinfo::System;
418 let mut sys = System::new();
419 sys.refresh_memory();
420
421 let total_mb = sys.total_memory() / (1024 * 1024);
422 let used_mb = sys.used_memory() / (1024 * 1024);
423 let free_mb = sys.available_memory() / (1024 * 1024);
424 let usage_percent = if total_mb > 0 {
425 (used_mb as f64 / total_mb as f64) * 100.0
426 } else {
427 0.0
428 };
429
430 MemoryInfo {
431 total_mb,
432 used_mb,
433 free_mb,
434 usage_percent,
435 }
436}
437
438async fn get_cpu_usage() -> f64 {
439 use sysinfo::System;
440 let mut sys = System::new();
441 sys.refresh_cpu_all();
442 tokio::time::sleep(std::time::Duration::from_millis(200)).await;
444 sys.refresh_cpu_all();
445 sys.global_cpu_usage() as f64
446}
447
448async fn get_disk_info() -> DiskInfo {
449 use sysinfo::Disks;
450 let disks = Disks::new_with_refreshed_list();
451 let (mut total, mut used) = (0u64, 0u64);
452 for disk in disks.list() {
453 total += disk.total_space();
454 used += disk.total_space() - disk.available_space();
455 }
456 let total_gb = total / (1024 * 1024 * 1024);
457 let used_gb = used / (1024 * 1024 * 1024);
458 let free_gb = total_gb.saturating_sub(used_gb);
459 let usage_percent = if total_gb > 0 {
460 (used_gb as f64 / total_gb as f64) * 100.0
461 } else {
462 0.0
463 };
464
465 DiskInfo {
466 total_gb,
467 used_gb,
468 free_gb,
469 usage_percent,
470 }
471}
472
473async fn get_network_info() -> NetworkInfo {
474 use sysinfo::Networks;
475 let networks = Networks::new_with_refreshed_list();
476 let (mut sent, mut received) = (0u64, 0u64);
477 for data in networks.list().values() {
478 sent += data.total_transmitted();
479 received += data.total_received();
480 }
481
482 NetworkInfo {
483 requests_per_minute: 0, active_connections: 0, bytes_sent: sent,
486 bytes_received: received,
487 }
488}
489
490#[derive(Debug)]
491struct ServiceHealthResult {
492 pub status: String,
493 pub response_time_ms: u64,
494 pub error: Option<String>,
495}