use serde::{Deserialize, Serialize};
use crate::clock::Timestamp;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealthConfig {
pub decay_half_life_seconds: f64,
pub penalty_429: f32,
pub penalty_403: f32,
pub penalty_5xx: f32,
pub penalty_timeout: f32,
pub boost_success: f32,
pub cooldown_trigger_count: u32,
pub cooldown_multiplier: f64,
pub max_cooldown_seconds: u64,
}
impl Default for HealthConfig {
fn default() -> Self {
Self {
decay_half_life_seconds: 300.0,
penalty_429: 0.25,
penalty_403: 0.50,
penalty_5xx: 0.10,
penalty_timeout: 0.20,
boost_success: 0.02,
cooldown_trigger_count: 3,
cooldown_multiplier: 2.0,
max_cooldown_seconds: 600,
}
}
}
pub(crate) struct HealthState {
score: f32,
consecutive_failures: u32,
current_cooldown_secs: u64,
cooldown_until: Option<Timestamp>,
last_observation: Timestamp,
total_observations: u64,
total_successes: u64,
ewma_latency_ms: f64,
}
impl HealthState {
pub(crate) fn new(now: Timestamp) -> Self {
Self {
score: 1.0,
consecutive_failures: 0,
current_cooldown_secs: 0,
cooldown_until: None,
last_observation: now,
total_observations: 0,
total_successes: 0,
ewma_latency_ms: 0.0,
}
}
pub(crate) fn score(&self) -> f32 {
self.score
}
pub(crate) fn is_in_cooldown(&self, now: Timestamp) -> bool {
match self.cooldown_until {
Some(until) => now < until,
None => false,
}
}
pub(crate) fn latency_ms(&self) -> f64 {
self.ewma_latency_ms
}
pub(crate) fn record_success(
&mut self,
latency_ms: u64,
now: Timestamp,
config: &HealthConfig,
) {
self.apply_decay(now, config);
self.score = (self.score + config.boost_success).min(1.0);
self.consecutive_failures = 0;
self.total_observations += 1;
self.total_successes += 1;
self.update_latency(latency_ms, config);
self.last_observation = now;
}
pub(crate) fn record_rate_limited(
&mut self,
now: Timestamp,
config: &HealthConfig,
default_cooldown_secs: u64,
) {
self.apply_decay(now, config);
self.score = (self.score - config.penalty_429).max(0.0);
self.total_observations += 1;
self.record_failure(now, config, default_cooldown_secs);
self.last_observation = now;
}
pub(crate) fn record_forbidden(
&mut self,
now: Timestamp,
config: &HealthConfig,
default_cooldown_secs: u64,
) {
self.apply_decay(now, config);
self.score = (self.score - config.penalty_403).max(0.0);
self.total_observations += 1;
self.record_failure(now, config, default_cooldown_secs);
self.last_observation = now;
}
pub(crate) fn record_server_error(
&mut self,
now: Timestamp,
config: &HealthConfig,
default_cooldown_secs: u64,
) {
self.apply_decay(now, config);
self.score = (self.score - config.penalty_5xx).max(0.0);
self.total_observations += 1;
self.record_failure(now, config, default_cooldown_secs);
self.last_observation = now;
}
pub(crate) fn record_timeout(
&mut self,
now: Timestamp,
config: &HealthConfig,
default_cooldown_secs: u64,
) {
self.apply_decay(now, config);
self.score = (self.score - config.penalty_timeout).max(0.0);
self.total_observations += 1;
self.record_failure(now, config, default_cooldown_secs);
self.last_observation = now;
}
fn apply_decay(&mut self, now: Timestamp, config: &HealthConfig) {
let elapsed_secs = now.duration_since(self.last_observation) as f64 / 1_000_000_000.0;
if elapsed_secs <= 0.0 || config.decay_half_life_seconds <= 0.0 {
return;
}
let decay_factor = (0.5_f64).powf(elapsed_secs / config.decay_half_life_seconds);
let deficit = 1.0 - self.score;
self.score = 1.0 - deficit * decay_factor as f32;
self.score = self.score.clamp(0.0, 1.0);
}
fn record_failure(
&mut self,
now: Timestamp,
config: &HealthConfig,
default_cooldown_secs: u64,
) {
self.consecutive_failures += 1;
if self.consecutive_failures >= config.cooldown_trigger_count {
let excess = self.consecutive_failures - config.cooldown_trigger_count;
let multiplier = config.cooldown_multiplier.powi(excess as i32);
let cooldown_secs = (default_cooldown_secs as f64 * multiplier) as u64;
self.current_cooldown_secs = cooldown_secs.min(config.max_cooldown_seconds);
self.cooldown_until = Some(now.add_secs(self.current_cooldown_secs));
}
}
fn update_latency(&mut self, latency_ms: u64, _config: &HealthConfig) {
const ALPHA: f64 = 0.3; if self.total_observations <= 1 {
self.ewma_latency_ms = latency_ms as f64;
} else {
self.ewma_latency_ms = ALPHA * latency_ms as f64 + (1.0 - ALPHA) * self.ewma_latency_ms;
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn ts(ms: u64) -> Timestamp {
Timestamp(ms * 1_000_000)
}
#[test]
fn initial_health_is_perfect() {
let h = HealthState::new(ts(0));
assert_eq!(h.score(), 1.0);
assert!(!h.is_in_cooldown(ts(0)));
}
#[test]
fn success_maintains_health() {
let config = HealthConfig::default();
let mut h = HealthState::new(ts(0));
h.record_success(100, ts(1_000), &config);
assert!(h.score() >= 1.0); }
#[test]
fn rate_limit_reduces_health() {
let config = HealthConfig::default();
let mut h = HealthState::new(ts(0));
h.record_rate_limited(ts(1_000), &config, 60);
assert!(h.score() < 1.0);
assert!((h.score() - (1.0 - config.penalty_429)).abs() < 0.01);
}
#[test]
fn health_decays_toward_full() {
let config = HealthConfig {
decay_half_life_seconds: 10.0, ..Default::default()
};
let mut h = HealthState::new(ts(0));
h.record_rate_limited(ts(0), &config, 60);
let after_penalty = h.score();
h.record_success(100, ts(10_000), &config);
assert!(h.score() > after_penalty);
}
#[test]
fn consecutive_failures_trigger_cooldown() {
let config = HealthConfig {
cooldown_trigger_count: 3,
..Default::default()
};
let mut h = HealthState::new(ts(0));
h.record_rate_limited(ts(1_000), &config, 30);
assert!(!h.is_in_cooldown(ts(1_000)));
h.record_rate_limited(ts(2_000), &config, 30);
assert!(!h.is_in_cooldown(ts(2_000)));
h.record_rate_limited(ts(3_000), &config, 30);
assert!(h.is_in_cooldown(ts(3_000)));
assert!(h.is_in_cooldown(ts(32_000)));
assert!(!h.is_in_cooldown(ts(34_000)));
}
#[test]
fn cooldown_grows_exponentially() {
let config = HealthConfig {
cooldown_trigger_count: 2,
cooldown_multiplier: 2.0,
max_cooldown_seconds: 600,
..Default::default()
};
let mut h = HealthState::new(ts(0));
h.record_rate_limited(ts(1_000), &config, 30);
h.record_rate_limited(ts(2_000), &config, 30);
assert!(h.is_in_cooldown(ts(2_000)));
h.record_rate_limited(ts(33_000), &config, 30);
assert!(h.is_in_cooldown(ts(92_000))); }
#[test]
fn health_score_bounded() {
let config = HealthConfig::default();
let mut h = HealthState::new(ts(0));
for i in 0..20 {
h.record_rate_limited(ts(i * 1_000), &config, 60);
}
assert!(h.score() >= 0.0);
for i in 20..40 {
h.record_success(100, ts(i * 1_000), &config);
}
assert!(h.score() <= 1.0);
}
#[test]
fn ewma_latency_smooths() {
let config = HealthConfig::default();
let mut h = HealthState::new(ts(0));
h.record_success(100, ts(1_000), &config);
assert_eq!(h.latency_ms(), 100.0);
h.record_success(200, ts(2_000), &config);
assert!((h.latency_ms() - 130.0).abs() < 1.0);
}
#[test]
fn forbidden_is_severe() {
let config = HealthConfig::default();
let mut h = HealthState::new(ts(0));
h.record_forbidden(ts(1_000), &config, 60);
assert!((h.score() - (1.0 - config.penalty_403)).abs() < 0.01);
}
}