use trueno_cuda_edge::supervisor::{
GpuHealthMonitor, HealthAction, HeartbeatStatus, SupervisionStrategy, SupervisionTree,
};
#[test]
fn supervision_strategies() {
assert!(SupervisionStrategy::OneForOne.is_isolated());
assert!(!SupervisionStrategy::OneForAll.is_isolated());
assert!(!SupervisionStrategy::RestForOne.is_isolated());
}
#[test]
fn supervision_tree_operations() {
let mut tree = SupervisionTree::new(SupervisionStrategy::OneForOne, 4);
let action = tree.handle_crash(2, 0);
match action {
trueno_cuda_edge::supervisor::SupervisorAction::Restart(indices) => {
assert_eq!(indices, vec![2]);
}
_ => panic!("Expected Restart action"),
}
}
#[test]
fn one_for_all_restarts() {
let mut tree = SupervisionTree::new(SupervisionStrategy::OneForAll, 3);
let action = tree.handle_crash(1, 0);
match action {
trueno_cuda_edge::supervisor::SupervisorAction::Restart(indices) => {
assert_eq!(indices, vec![0, 1, 2]);
}
_ => panic!("Expected Restart action"),
}
}
#[test]
fn health_monitoring() {
let monitor =
GpuHealthMonitor::builder().max_missed(3).throttle_temp(85).shutdown_temp(95).build();
assert_eq!(monitor.check_status(HeartbeatStatus::Alive), HealthAction::Healthy);
assert_eq!(monitor.check_status(HeartbeatStatus::MissedBeats(2)), HealthAction::Healthy);
assert_eq!(monitor.check_status(HeartbeatStatus::MissedBeats(3)), HealthAction::RestartWorker);
assert_eq!(monitor.check_status(HeartbeatStatus::Dead), HealthAction::Shutdown);
}
#[test]
fn thermal_monitoring() {
let monitor = GpuHealthMonitor::new(3, 85, 95);
assert_eq!(monitor.check_temperature(70), HealthAction::Healthy);
assert_eq!(monitor.check_temperature(85), HealthAction::Throttle);
assert_eq!(monitor.check_temperature(90), HealthAction::Throttle);
assert_eq!(monitor.check_temperature(95), HealthAction::Shutdown);
}