use std::collections::VecDeque;
use std::time::{Duration, Instant};
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct BackoffConfig {
pub initial: Duration,
pub factor: f32,
pub max: Duration,
pub crash_loop_threshold: u32,
pub crash_loop_window: Duration,
pub crash_loop_cooldown: Duration,
pub stable_run_threshold: Duration,
}
impl Default for BackoffConfig {
fn default() -> Self {
Self {
initial: Duration::from_millis(500),
factor: 2.0,
max: Duration::from_secs(60),
crash_loop_threshold: 5,
crash_loop_window: Duration::from_secs(60),
crash_loop_cooldown: Duration::from_secs(5 * 60),
stable_run_threshold: Duration::from_secs(60),
}
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum RestartState {
Idle,
BackingOff {
until: Instant,
},
CrashLooping {
until: Instant,
},
}
impl RestartState {
pub fn is_admissible(&self, now: Instant) -> bool {
match self {
RestartState::Idle => true,
RestartState::BackingOff { until } | RestartState::CrashLooping { until } => {
*until <= now
}
}
}
pub fn release_at(&self) -> Option<Instant> {
match self {
RestartState::Idle => None,
RestartState::BackingOff { until } | RestartState::CrashLooping { until } => {
Some(*until)
}
}
}
}
#[derive(Clone, Debug)]
pub struct BackoffTracker {
config: BackoffConfig,
crash_history: VecDeque<Instant>,
next_backoff: Duration,
last_started: Option<Instant>,
state: RestartState,
}
impl Default for BackoffTracker {
fn default() -> Self {
Self::new(BackoffConfig::default())
}
}
impl BackoffTracker {
pub fn new(config: BackoffConfig) -> Self {
Self {
next_backoff: config.initial,
crash_history: VecDeque::with_capacity(config.crash_loop_threshold as usize),
last_started: None,
state: RestartState::Idle,
config,
}
}
pub fn state(&self) -> RestartState {
self.state
}
pub fn observe_start(&mut self, now: Instant) {
if let Some(last) = self.last_started {
if now.saturating_duration_since(last) >= self.config.stable_run_threshold {
self.next_backoff = self.config.initial;
self.crash_history.clear();
}
}
self.last_started = Some(now);
self.state = RestartState::Idle;
}
pub fn observe_clean_exit(&mut self, now: Instant) {
if let Some(last) = self.last_started {
if now.saturating_duration_since(last) >= self.config.stable_run_threshold {
self.next_backoff = self.config.initial;
self.crash_history.clear();
}
}
self.state = RestartState::Idle;
}
pub fn observe_crash(&mut self, now: Instant) {
let cutoff = now
.checked_sub(self.config.crash_loop_window)
.unwrap_or(now);
while self
.crash_history
.front()
.copied()
.is_some_and(|t| t < cutoff)
{
self.crash_history.pop_front();
}
self.crash_history.push_back(now);
if self.crash_history.len() as u32 >= self.config.crash_loop_threshold {
self.state = RestartState::CrashLooping {
until: now
.checked_add(self.config.crash_loop_cooldown)
.unwrap_or(now),
};
self.next_backoff = self.config.initial;
return;
}
let until = now.checked_add(self.next_backoff).unwrap_or(now);
self.state = RestartState::BackingOff { until };
let doubled = self
.next_backoff
.as_secs_f64()
.max(self.config.initial.as_secs_f64())
* self.config.factor as f64;
let doubled = Duration::from_secs_f64(doubled.min(self.config.max.as_secs_f64()));
self.next_backoff = doubled;
}
pub fn maybe_release(&mut self, now: Instant) -> bool {
if !self.state.is_admissible(now) {
return false;
}
if matches!(self.state, RestartState::Idle) {
return false;
}
self.state = RestartState::Idle;
true
}
pub fn force_release(&mut self) -> bool {
let was_gated = !matches!(self.state, RestartState::Idle);
self.state = RestartState::Idle;
self.next_backoff = self.config.initial;
self.crash_history.clear();
was_gated
}
#[cfg(test)]
pub(crate) fn current_window(&self) -> Duration {
self.next_backoff
}
}
#[cfg(test)]
mod tests {
use super::*;
fn t(secs: u64) -> Instant {
static BASE: std::sync::OnceLock<Instant> = std::sync::OnceLock::new();
let base = *BASE.get_or_init(Instant::now);
base + Duration::from_secs(secs)
}
#[test]
fn fresh_tracker_is_idle_and_admissible() {
let bt = BackoffTracker::default();
assert_eq!(bt.state(), RestartState::Idle);
assert!(bt.state().is_admissible(t(0)));
}
#[test]
fn first_crash_flips_to_backing_off_with_initial_window() {
let mut bt = BackoffTracker::default();
bt.observe_crash(t(0));
match bt.state() {
RestartState::BackingOff { until } => {
assert_eq!(until, t(0) + Duration::from_millis(500));
}
other => panic!("expected BackingOff(500ms), got {other:?}"),
}
}
#[test]
fn consecutive_crashes_double_the_backoff_up_to_the_cap() {
let mut bt = BackoffTracker::default();
bt.observe_crash(t(0));
bt.observe_crash(t(1));
bt.observe_crash(t(2));
bt.observe_crash(t(3));
match bt.state() {
RestartState::BackingOff { until } => {
assert_eq!(until, t(3) + Duration::from_secs(4));
}
other => panic!("expected BackingOff after 4 crashes, got {other:?}"),
}
}
#[test]
fn fifth_crash_within_window_flips_to_crash_looping() {
let mut bt = BackoffTracker::default();
for i in 0..5 {
bt.observe_crash(t(i));
}
match bt.state() {
RestartState::CrashLooping { until } => {
assert_eq!(until, t(4) + Duration::from_secs(5 * 60));
}
other => panic!("expected CrashLooping after 5 crashes, got {other:?}"),
}
}
#[test]
fn crash_loop_count_slides_with_the_window() {
let mut bt = BackoffTracker::default();
for i in 0..4 {
bt.observe_crash(t(i));
}
bt.observe_crash(t(120));
assert!(
matches!(bt.state(), RestartState::BackingOff { .. }),
"got {:?}",
bt.state(),
);
}
#[test]
fn observe_start_after_stable_run_resets_the_window() {
let mut bt = BackoffTracker::default();
bt.observe_crash(t(0));
bt.observe_start(t(1));
bt.observe_start(t(121));
assert_eq!(bt.current_window(), Duration::from_millis(500));
}
#[test]
fn observe_start_before_stable_run_keeps_the_doubled_window() {
let mut bt = BackoffTracker::default();
bt.observe_crash(t(0));
assert_eq!(bt.current_window(), Duration::from_secs(1));
bt.observe_start(t(1));
bt.observe_start(t(5)); assert_eq!(bt.current_window(), Duration::from_secs(1));
}
#[test]
fn maybe_release_flips_backing_off_to_idle_after_until_elapses() {
let mut bt = BackoffTracker::default();
bt.observe_crash(t(0));
assert!(!bt.maybe_release(t(0)));
assert!(matches!(bt.state(), RestartState::BackingOff { .. }));
let released = bt.maybe_release(t(0) + Duration::from_millis(600));
assert!(released);
assert_eq!(bt.state(), RestartState::Idle);
}
#[test]
fn admissibility_predicate_honors_until_boundary() {
let mut bt = BackoffTracker::default();
bt.observe_crash(t(0));
let until = match bt.state() {
RestartState::BackingOff { until } => until,
_ => unreachable!(),
};
assert!(!bt.state().is_admissible(until - Duration::from_millis(1)));
assert!(bt.state().is_admissible(until));
}
#[test]
fn backoff_caps_at_max() {
let mut bt = BackoffTracker::default();
for i in 0..15 {
bt.observe_crash(t(i * 200));
}
assert_eq!(bt.current_window(), Duration::from_secs(60));
}
#[test]
fn force_release_clears_backing_off_gate_and_resets_window() {
let mut bt = BackoffTracker::default();
bt.observe_crash(t(0));
bt.observe_crash(t(1));
assert!(matches!(bt.state(), RestartState::BackingOff { .. }));
assert!(bt.current_window() > BackoffConfig::default().initial);
let was_gated = bt.force_release();
assert!(was_gated);
assert!(matches!(bt.state(), RestartState::Idle));
assert_eq!(bt.current_window(), BackoffConfig::default().initial);
}
#[test]
fn force_release_returns_false_when_already_idle() {
let mut bt = BackoffTracker::default();
assert!(!bt.force_release());
assert!(matches!(bt.state(), RestartState::Idle));
}
#[test]
fn force_release_clears_crash_looping_gate() {
let cfg = BackoffConfig {
crash_loop_threshold: 3,
crash_loop_window: Duration::from_secs(60),
..BackoffConfig::default()
};
let mut bt = BackoffTracker::new(cfg);
bt.observe_crash(t(0));
bt.observe_crash(t(1));
bt.observe_crash(t(2));
assert!(matches!(bt.state(), RestartState::CrashLooping { .. }));
assert!(bt.force_release());
assert!(matches!(bt.state(), RestartState::Idle));
}
}