use serde::{Deserialize, Serialize};
use std::collections::VecDeque;
use std::time::{Duration, Instant};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub struct MeltdownPolicy {
pub child_max_restarts: u32,
pub child_window: Duration,
pub supervisor_max_failures: u32,
pub supervisor_window: Duration,
pub reset_after: Duration,
}
impl MeltdownPolicy {
pub fn new(
child_max_restarts: u32,
child_window: Duration,
supervisor_max_failures: u32,
supervisor_window: Duration,
reset_after: Duration,
) -> Self {
Self {
child_max_restarts,
child_window,
supervisor_max_failures,
supervisor_window,
reset_after,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum MeltdownOutcome {
Continue,
ChildFuse,
SupervisorFuse,
}
#[derive(Debug, Clone)]
pub struct MeltdownTracker {
pub policy: MeltdownPolicy,
child_failures: VecDeque<Instant>,
supervisor_failures: VecDeque<Instant>,
last_failure: Option<Instant>,
}
impl MeltdownTracker {
pub fn new(policy: MeltdownPolicy) -> Self {
Self {
policy,
child_failures: VecDeque::new(),
supervisor_failures: VecDeque::new(),
last_failure: None,
}
}
pub fn record_child_restart(&mut self, now: Instant) -> MeltdownOutcome {
self.prune(now);
self.child_failures.push_back(now);
self.supervisor_failures.push_back(now);
self.last_failure = Some(now);
self.current_outcome()
}
pub fn reset_if_stable(&mut self, now: Instant) -> bool {
let Some(last_failure) = self.last_failure else {
return false;
};
if now.duration_since(last_failure) < self.policy.reset_after {
return false;
}
self.clear();
true
}
pub fn clear(&mut self) {
self.child_failures.clear();
self.supervisor_failures.clear();
self.last_failure = None;
}
pub fn child_failure_count(&self) -> usize {
self.child_failures.len()
}
fn prune(&mut self, now: Instant) {
prune_window(&mut self.child_failures, now, self.policy.child_window);
prune_window(
&mut self.supervisor_failures,
now,
self.policy.supervisor_window,
);
}
fn current_outcome(&self) -> MeltdownOutcome {
if self.supervisor_failures.len() > self.policy.supervisor_max_failures as usize {
MeltdownOutcome::SupervisorFuse
} else if self.child_failures.len() > self.policy.child_max_restarts as usize {
MeltdownOutcome::ChildFuse
} else {
MeltdownOutcome::Continue
}
}
}
fn prune_window(entries: &mut VecDeque<Instant>, now: Instant, window: Duration) {
while entries
.front()
.is_some_and(|entry| now.duration_since(*entry) > window)
{
entries.pop_front();
}
}