1use serde::Serialize;
20use std::collections::HashMap;
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
26#[serde(rename_all = "lowercase")]
27pub enum HealthStatus {
28 Healthy,
30 Degraded,
32 Unhealthy,
34}
35
36impl HealthStatus {
37 pub fn as_str(&self) -> &'static str {
38 match self {
39 HealthStatus::Healthy => "healthy",
40 HealthStatus::Degraded => "degraded",
41 HealthStatus::Unhealthy => "unhealthy",
42 }
43 }
44}
45
46#[derive(Debug, Clone, Serialize)]
48pub struct ComponentCheck {
49 pub name: String,
50 pub status: HealthStatus,
51 #[serde(skip_serializing_if = "Option::is_none")]
52 pub message: Option<String>,
53 #[serde(skip_serializing_if = "Option::is_none")]
54 pub details: Option<serde_json::Value>,
55}
56
57#[derive(Debug, Clone, Serialize)]
59pub struct HealthReport {
60 pub status: HealthStatus,
61 pub uptime_secs: u64,
62 pub axon_version: String,
63 pub components: Vec<ComponentCheck>,
64}
65
66pub struct HealthInput {
71 pub uptime_secs: u64,
72 pub axon_version: String,
73 pub daemon_count: usize,
74 pub daemon_state_counts: HashMap<String, usize>,
75 pub bus_events_published: u64,
76 pub bus_subscriber_count: usize,
77 pub session_memory_count: usize,
78 pub session_store_count: usize,
79 pub flows_tracked: usize,
80 pub versions_total: usize,
81 pub rate_limiter_enabled: bool,
83 pub rate_limiter_max_requests: u32,
84 pub rate_limiter_window_secs: u64,
85 pub request_log_enabled: bool,
86 pub request_log_entries: usize,
87 pub request_log_capacity: usize,
88 pub api_keys_enabled: bool,
89 pub api_keys_active: usize,
90 pub api_keys_total: usize,
91 pub webhooks_active: usize,
92 pub webhooks_total: usize,
93 pub webhooks_total_failures: u64,
94 pub audit_log_entries: usize,
95 pub audit_log_total_recorded: u64,
96}
97
98pub fn evaluate(input: &HealthInput) -> HealthReport {
102 let mut components = Vec::new();
103
104 components.push(check_event_bus(input));
106
107 components.push(check_supervisor(input));
109
110 components.push(check_session_store(input));
112
113 components.push(check_version_registry(input));
115
116 components.push(check_rate_limiter(input));
118
119 components.push(check_request_logger(input));
121
122 components.push(check_api_keys(input));
124
125 components.push(check_webhooks(input));
127
128 components.push(check_audit_log(input));
130
131 let status = aggregate_status(&components);
133
134 HealthReport {
135 status,
136 uptime_secs: input.uptime_secs,
137 axon_version: input.axon_version.clone(),
138 components,
139 }
140}
141
142pub fn liveness() -> serde_json::Value {
144 serde_json::json!({
145 "status": "alive"
146 })
147}
148
149pub fn readiness(input: &HealthInput) -> serde_json::Value {
151 let report = evaluate(input);
152 let ready = report.status != HealthStatus::Unhealthy;
153 serde_json::json!({
154 "ready": ready,
155 "status": report.status.as_str()
156 })
157}
158
159fn check_event_bus(input: &HealthInput) -> ComponentCheck {
162 let details = serde_json::json!({
163 "events_published": input.bus_events_published,
164 "subscriber_count": input.bus_subscriber_count,
165 });
166
167 ComponentCheck {
169 name: "event_bus".to_string(),
170 status: HealthStatus::Healthy,
171 message: None,
172 details: Some(details),
173 }
174}
175
176fn check_supervisor(input: &HealthInput) -> ComponentCheck {
177 let dead = input.daemon_state_counts.get("dead").copied().unwrap_or(0);
178 let total = input.daemon_count;
179
180 let details = serde_json::json!({
181 "daemon_count": total,
182 "states": input.daemon_state_counts,
183 });
184
185 let (status, message) = if dead > 0 && dead == total && total > 0 {
186 (HealthStatus::Unhealthy, Some(format!("all {} daemons dead", total)))
187 } else if dead > 0 {
188 (HealthStatus::Degraded, Some(format!("{} of {} daemons dead", dead, total)))
189 } else {
190 (HealthStatus::Healthy, None)
191 };
192
193 ComponentCheck {
194 name: "supervisor".to_string(),
195 status,
196 message,
197 details: Some(details),
198 }
199}
200
201fn check_session_store(input: &HealthInput) -> ComponentCheck {
202 let details = serde_json::json!({
203 "memory_entries": input.session_memory_count,
204 "persistent_entries": input.session_store_count,
205 });
206
207 ComponentCheck {
209 name: "session_store".to_string(),
210 status: HealthStatus::Healthy,
211 message: None,
212 details: Some(details),
213 }
214}
215
216fn check_version_registry(input: &HealthInput) -> ComponentCheck {
217 let details = serde_json::json!({
218 "flows_tracked": input.flows_tracked,
219 "versions_total": input.versions_total,
220 });
221
222 ComponentCheck {
223 name: "version_registry".to_string(),
224 status: HealthStatus::Healthy,
225 message: None,
226 details: Some(details),
227 }
228}
229
230fn check_rate_limiter(input: &HealthInput) -> ComponentCheck {
231 let details = serde_json::json!({
232 "enabled": input.rate_limiter_enabled,
233 "max_requests": input.rate_limiter_max_requests,
234 "window_secs": input.rate_limiter_window_secs,
235 });
236
237 ComponentCheck {
238 name: "rate_limiter".to_string(),
239 status: HealthStatus::Healthy,
240 message: if !input.rate_limiter_enabled { Some("disabled".to_string()) } else { None },
241 details: Some(details),
242 }
243}
244
245fn check_request_logger(input: &HealthInput) -> ComponentCheck {
246 let details = serde_json::json!({
247 "enabled": input.request_log_enabled,
248 "entries": input.request_log_entries,
249 "capacity": input.request_log_capacity,
250 });
251
252 let (status, message) = if !input.request_log_enabled {
254 (HealthStatus::Healthy, Some("disabled".to_string()))
255 } else if input.request_log_capacity > 0 && input.request_log_entries * 100 / input.request_log_capacity > 90 {
256 (HealthStatus::Degraded, Some(format!("buffer {}% full ({}/{})", input.request_log_entries * 100 / input.request_log_capacity, input.request_log_entries, input.request_log_capacity)))
257 } else {
258 (HealthStatus::Healthy, None)
259 };
260
261 ComponentCheck {
262 name: "request_logger".to_string(),
263 status,
264 message,
265 details: Some(details),
266 }
267}
268
269fn check_api_keys(input: &HealthInput) -> ComponentCheck {
270 let details = serde_json::json!({
271 "enabled": input.api_keys_enabled,
272 "active_keys": input.api_keys_active,
273 "total_keys": input.api_keys_total,
274 });
275
276 let (status, message) = if input.api_keys_enabled && input.api_keys_active == 0 && input.api_keys_total > 0 {
278 (HealthStatus::Degraded, Some("all keys revoked — only master token works".to_string()))
279 } else {
280 (HealthStatus::Healthy, None)
281 };
282
283 ComponentCheck {
284 name: "api_keys".to_string(),
285 status,
286 message,
287 details: Some(details),
288 }
289}
290
291fn check_webhooks(input: &HealthInput) -> ComponentCheck {
292 let details = serde_json::json!({
293 "active_webhooks": input.webhooks_active,
294 "total_webhooks": input.webhooks_total,
295 "total_failures": input.webhooks_total_failures,
296 });
297
298 let (status, message) = if input.webhooks_total > 0 && input.webhooks_total_failures > input.webhooks_total as u64 * 5 {
300 (HealthStatus::Degraded, Some(format!("{} delivery failures across {} webhooks", input.webhooks_total_failures, input.webhooks_total)))
301 } else {
302 (HealthStatus::Healthy, None)
303 };
304
305 ComponentCheck {
306 name: "webhooks".to_string(),
307 status,
308 message,
309 details: Some(details),
310 }
311}
312
313fn check_audit_log(input: &HealthInput) -> ComponentCheck {
314 let details = serde_json::json!({
315 "buffered_entries": input.audit_log_entries,
316 "total_recorded": input.audit_log_total_recorded,
317 });
318
319 ComponentCheck {
320 name: "audit_log".to_string(),
321 status: HealthStatus::Healthy,
322 message: None,
323 details: Some(details),
324 }
325}
326
327fn aggregate_status(components: &[ComponentCheck]) -> HealthStatus {
328 let mut worst = HealthStatus::Healthy;
329 for c in components {
330 match c.status {
331 HealthStatus::Unhealthy => return HealthStatus::Unhealthy,
332 HealthStatus::Degraded => worst = HealthStatus::Degraded,
333 HealthStatus::Healthy => {}
334 }
335 }
336 worst
337}
338
339#[cfg(test)]
342mod tests {
343 use super::*;
344
345 fn sample_input() -> HealthInput {
346 let mut states = HashMap::new();
347 states.insert("running".to_string(), 2);
348 states.insert("waiting".to_string(), 1);
349
350 HealthInput {
351 uptime_secs: 3600,
352 axon_version: "0.31.0".to_string(),
353 daemon_count: 3,
354 daemon_state_counts: states,
355 bus_events_published: 100,
356 bus_subscriber_count: 3,
357 session_memory_count: 5,
358 session_store_count: 2,
359 flows_tracked: 4,
360 versions_total: 10,
361 rate_limiter_enabled: true,
362 rate_limiter_max_requests: 100,
363 rate_limiter_window_secs: 60,
364 request_log_enabled: true,
365 request_log_entries: 50,
366 request_log_capacity: 1000,
367 api_keys_enabled: true,
368 api_keys_active: 3,
369 api_keys_total: 5,
370 webhooks_active: 2,
371 webhooks_total: 3,
372 webhooks_total_failures: 0,
373 audit_log_entries: 100,
374 audit_log_total_recorded: 150,
375 }
376 }
377
378 #[test]
379 fn healthy_report_all_green() {
380 let report = evaluate(&sample_input());
381 assert_eq!(report.status, HealthStatus::Healthy);
382 assert_eq!(report.components.len(), 9);
383 for c in &report.components {
384 assert_eq!(c.status, HealthStatus::Healthy, "component {} not healthy", c.name);
385 }
386 }
387
388 #[test]
389 fn degraded_when_some_daemons_dead() {
390 let mut input = sample_input();
391 input.daemon_state_counts.insert("dead".to_string(), 1);
392 let report = evaluate(&input);
393 assert_eq!(report.status, HealthStatus::Degraded);
394 let sup = report.components.iter().find(|c| c.name == "supervisor").unwrap();
395 assert_eq!(sup.status, HealthStatus::Degraded);
396 assert!(sup.message.as_ref().unwrap().contains("1 of"));
397 }
398
399 #[test]
400 fn unhealthy_when_all_daemons_dead() {
401 let mut states = HashMap::new();
402 states.insert("dead".to_string(), 3);
403 let mut input = sample_input();
404 input.daemon_count = 3;
405 input.daemon_state_counts = states;
406 let report = evaluate(&input);
407 assert_eq!(report.status, HealthStatus::Unhealthy);
408 let sup = report.components.iter().find(|c| c.name == "supervisor").unwrap();
409 assert_eq!(sup.status, HealthStatus::Unhealthy);
410 assert!(sup.message.as_ref().unwrap().contains("all 3 daemons dead"));
411 }
412
413 #[test]
414 fn healthy_when_no_daemons() {
415 let mut input = sample_input();
416 input.daemon_count = 0;
417 input.daemon_state_counts.clear();
418 let report = evaluate(&input);
419 assert_eq!(report.status, HealthStatus::Healthy);
420 }
421
422 #[test]
423 fn liveness_always_alive() {
424 let live = liveness();
425 assert_eq!(live["status"], "alive");
426 }
427
428 #[test]
429 fn readiness_true_when_healthy() {
430 let ready = readiness(&sample_input());
431 assert_eq!(ready["ready"], true);
432 assert_eq!(ready["status"], "healthy");
433 }
434
435 #[test]
436 fn readiness_true_when_degraded() {
437 let mut input = sample_input();
438 input.daemon_state_counts.insert("dead".to_string(), 1);
439 let ready = readiness(&input);
440 assert_eq!(ready["ready"], true);
441 assert_eq!(ready["status"], "degraded");
442 }
443
444 #[test]
445 fn readiness_false_when_unhealthy() {
446 let mut states = HashMap::new();
447 states.insert("dead".to_string(), 2);
448 let mut input = sample_input();
449 input.daemon_count = 2;
450 input.daemon_state_counts = states;
451 let ready = readiness(&input);
452 assert_eq!(ready["ready"], false);
453 assert_eq!(ready["status"], "unhealthy");
454 }
455
456 #[test]
457 fn report_includes_uptime_and_version() {
458 let report = evaluate(&sample_input());
459 assert_eq!(report.uptime_secs, 3600);
460 assert_eq!(report.axon_version, "0.31.0");
461 }
462
463 #[test]
464 fn component_details_present() {
465 let report = evaluate(&sample_input());
466 for c in &report.components {
467 assert!(c.details.is_some(), "component {} missing details", c.name);
468 }
469 }
470
471 #[test]
472 fn event_bus_details_contain_counts() {
473 let report = evaluate(&sample_input());
474 let bus = report.components.iter().find(|c| c.name == "event_bus").unwrap();
475 let d = bus.details.as_ref().unwrap();
476 assert_eq!(d["events_published"], 100);
477 assert_eq!(d["subscriber_count"], 3);
478 }
479
480 #[test]
481 fn supervisor_details_contain_states() {
482 let report = evaluate(&sample_input());
483 let sup = report.components.iter().find(|c| c.name == "supervisor").unwrap();
484 let d = sup.details.as_ref().unwrap();
485 assert_eq!(d["daemon_count"], 3);
486 assert!(d["states"].is_object());
487 }
488
489 #[test]
490 fn session_store_details() {
491 let report = evaluate(&sample_input());
492 let sess = report.components.iter().find(|c| c.name == "session_store").unwrap();
493 let d = sess.details.as_ref().unwrap();
494 assert_eq!(d["memory_entries"], 5);
495 assert_eq!(d["persistent_entries"], 2);
496 }
497
498 #[test]
499 fn version_registry_details() {
500 let report = evaluate(&sample_input());
501 let ver = report.components.iter().find(|c| c.name == "version_registry").unwrap();
502 let d = ver.details.as_ref().unwrap();
503 assert_eq!(d["flows_tracked"], 4);
504 assert_eq!(d["versions_total"], 10);
505 }
506
507 #[test]
508 fn health_status_serialization() {
509 let json = serde_json::to_string(&HealthStatus::Healthy).unwrap();
510 assert_eq!(json, "\"healthy\"");
511 let json = serde_json::to_string(&HealthStatus::Degraded).unwrap();
512 assert_eq!(json, "\"degraded\"");
513 let json = serde_json::to_string(&HealthStatus::Unhealthy).unwrap();
514 assert_eq!(json, "\"unhealthy\"");
515 }
516
517 #[test]
518 fn full_report_serializable() {
519 let report = evaluate(&sample_input());
520 let json = serde_json::to_string(&report).unwrap();
521 assert!(json.contains("\"healthy\""));
522 assert!(json.contains("\"event_bus\""));
523 assert!(json.contains("\"supervisor\""));
524 assert!(json.contains("\"session_store\""));
525 assert!(json.contains("\"version_registry\""));
526 assert!(json.contains("\"rate_limiter\""));
527 assert!(json.contains("\"request_logger\""));
528 assert!(json.contains("\"api_keys\""));
529 assert!(json.contains("\"webhooks\""));
530 assert!(json.contains("\"audit_log\""));
531 }
532
533 #[test]
534 fn aggregate_picks_worst_status() {
535 let checks = vec![
536 ComponentCheck { name: "a".into(), status: HealthStatus::Healthy, message: None, details: None },
537 ComponentCheck { name: "b".into(), status: HealthStatus::Degraded, message: None, details: None },
538 ComponentCheck { name: "c".into(), status: HealthStatus::Healthy, message: None, details: None },
539 ];
540 assert_eq!(aggregate_status(&checks), HealthStatus::Degraded);
541
542 let checks2 = vec![
543 ComponentCheck { name: "a".into(), status: HealthStatus::Degraded, message: None, details: None },
544 ComponentCheck { name: "b".into(), status: HealthStatus::Unhealthy, message: None, details: None },
545 ];
546 assert_eq!(aggregate_status(&checks2), HealthStatus::Unhealthy);
547 }
548
549 #[test]
550 fn rate_limiter_details() {
551 let report = evaluate(&sample_input());
552 let rl = report.components.iter().find(|c| c.name == "rate_limiter").unwrap();
553 assert_eq!(rl.status, HealthStatus::Healthy);
554 let d = rl.details.as_ref().unwrap();
555 assert_eq!(d["enabled"], true);
556 assert_eq!(d["max_requests"], 100);
557 assert_eq!(d["window_secs"], 60);
558 }
559
560 #[test]
561 fn rate_limiter_disabled_shows_message() {
562 let mut input = sample_input();
563 input.rate_limiter_enabled = false;
564 let report = evaluate(&input);
565 let rl = report.components.iter().find(|c| c.name == "rate_limiter").unwrap();
566 assert_eq!(rl.status, HealthStatus::Healthy);
567 assert_eq!(rl.message.as_deref(), Some("disabled"));
568 }
569
570 #[test]
571 fn request_logger_degraded_when_buffer_full() {
572 let mut input = sample_input();
573 input.request_log_entries = 950;
574 input.request_log_capacity = 1000;
575 let report = evaluate(&input);
576 let rl = report.components.iter().find(|c| c.name == "request_logger").unwrap();
577 assert_eq!(rl.status, HealthStatus::Degraded);
578 assert!(rl.message.as_ref().unwrap().contains("95%"));
579 }
580
581 #[test]
582 fn request_logger_healthy_when_low_usage() {
583 let report = evaluate(&sample_input());
584 let rl = report.components.iter().find(|c| c.name == "request_logger").unwrap();
585 assert_eq!(rl.status, HealthStatus::Healthy);
586 assert!(rl.message.is_none());
587 }
588
589 #[test]
590 fn api_keys_degraded_when_all_revoked() {
591 let mut input = sample_input();
592 input.api_keys_active = 0;
593 input.api_keys_total = 3;
594 let report = evaluate(&input);
595 let ak = report.components.iter().find(|c| c.name == "api_keys").unwrap();
596 assert_eq!(ak.status, HealthStatus::Degraded);
597 assert!(ak.message.as_ref().unwrap().contains("all keys revoked"));
598 }
599
600 #[test]
601 fn api_keys_healthy_when_disabled() {
602 let mut input = sample_input();
603 input.api_keys_enabled = false;
604 input.api_keys_active = 0;
605 input.api_keys_total = 0;
606 let report = evaluate(&input);
607 let ak = report.components.iter().find(|c| c.name == "api_keys").unwrap();
608 assert_eq!(ak.status, HealthStatus::Healthy);
609 }
610
611 #[test]
612 fn webhooks_degraded_when_many_failures() {
613 let mut input = sample_input();
614 input.webhooks_total = 2;
615 input.webhooks_total_failures = 20; let report = evaluate(&input);
617 let wh = report.components.iter().find(|c| c.name == "webhooks").unwrap();
618 assert_eq!(wh.status, HealthStatus::Degraded);
619 assert!(wh.message.as_ref().unwrap().contains("20 delivery failures"));
620 }
621
622 #[test]
623 fn webhooks_healthy_with_low_failures() {
624 let report = evaluate(&sample_input());
625 let wh = report.components.iter().find(|c| c.name == "webhooks").unwrap();
626 assert_eq!(wh.status, HealthStatus::Healthy);
627 }
628
629 #[test]
630 fn audit_log_details() {
631 let report = evaluate(&sample_input());
632 let al = report.components.iter().find(|c| c.name == "audit_log").unwrap();
633 assert_eq!(al.status, HealthStatus::Healthy);
634 let d = al.details.as_ref().unwrap();
635 assert_eq!(d["buffered_entries"], 100);
636 assert_eq!(d["total_recorded"], 150);
637 }
638}