1use serde::{Deserialize, Serialize};
2
3use crate::clock::Timestamp;
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct HealthConfig {
8 pub decay_half_life_seconds: f64,
10 pub penalty_429: f32,
12 pub penalty_403: f32,
14 pub penalty_5xx: f32,
16 pub penalty_timeout: f32,
18 pub boost_success: f32,
20 pub cooldown_trigger_count: u32,
22 pub cooldown_multiplier: f64,
24 pub max_cooldown_seconds: u64,
26}
27
28impl Default for HealthConfig {
29 fn default() -> Self {
30 Self {
31 decay_half_life_seconds: 300.0,
32 penalty_429: 0.25,
33 penalty_403: 0.50,
34 penalty_5xx: 0.10,
35 penalty_timeout: 0.20,
36 boost_success: 0.02,
37 cooldown_trigger_count: 3,
38 cooldown_multiplier: 2.0,
39 max_cooldown_seconds: 600,
40 }
41 }
42}
43
44pub(crate) struct HealthState {
46 score: f32,
48 consecutive_failures: u32,
50 current_cooldown_secs: u64,
52 cooldown_until: Option<Timestamp>,
54 last_observation: Timestamp,
56 total_observations: u64,
58 total_successes: u64,
60 ewma_latency_ms: f64,
62}
63
64impl HealthState {
65 pub(crate) fn new(now: Timestamp) -> Self {
66 Self {
67 score: 1.0,
68 consecutive_failures: 0,
69 current_cooldown_secs: 0,
70 cooldown_until: None,
71 last_observation: now,
72 total_observations: 0,
73 total_successes: 0,
74 ewma_latency_ms: 0.0,
75 }
76 }
77
78 pub(crate) fn score(&self) -> f32 {
80 self.score
81 }
82
83 pub(crate) fn is_in_cooldown(&self, now: Timestamp) -> bool {
85 match self.cooldown_until {
86 Some(until) => now < until,
87 None => false,
88 }
89 }
90
91 pub(crate) fn latency_ms(&self) -> f64 {
93 self.ewma_latency_ms
94 }
95
96 pub(crate) fn record_success(
98 &mut self,
99 latency_ms: u64,
100 now: Timestamp,
101 config: &HealthConfig,
102 ) {
103 self.apply_decay(now, config);
104 self.score = (self.score + config.boost_success).min(1.0);
105 self.consecutive_failures = 0;
106 self.total_observations += 1;
107 self.total_successes += 1;
108 self.update_latency(latency_ms, config);
109 self.last_observation = now;
110 }
111
112 pub(crate) fn record_rate_limited(
114 &mut self,
115 now: Timestamp,
116 config: &HealthConfig,
117 default_cooldown_secs: u64,
118 ) {
119 self.apply_decay(now, config);
120 self.score = (self.score - config.penalty_429).max(0.0);
121 self.total_observations += 1;
122 self.record_failure(now, config, default_cooldown_secs);
123 self.last_observation = now;
124 }
125
126 pub(crate) fn record_forbidden(
128 &mut self,
129 now: Timestamp,
130 config: &HealthConfig,
131 default_cooldown_secs: u64,
132 ) {
133 self.apply_decay(now, config);
134 self.score = (self.score - config.penalty_403).max(0.0);
135 self.total_observations += 1;
136 self.record_failure(now, config, default_cooldown_secs);
137 self.last_observation = now;
138 }
139
140 pub(crate) fn record_server_error(
142 &mut self,
143 now: Timestamp,
144 config: &HealthConfig,
145 default_cooldown_secs: u64,
146 ) {
147 self.apply_decay(now, config);
148 self.score = (self.score - config.penalty_5xx).max(0.0);
149 self.total_observations += 1;
150 self.record_failure(now, config, default_cooldown_secs);
151 self.last_observation = now;
152 }
153
154 pub(crate) fn record_timeout(
156 &mut self,
157 now: Timestamp,
158 config: &HealthConfig,
159 default_cooldown_secs: u64,
160 ) {
161 self.apply_decay(now, config);
162 self.score = (self.score - config.penalty_timeout).max(0.0);
163 self.total_observations += 1;
164 self.record_failure(now, config, default_cooldown_secs);
165 self.last_observation = now;
166 }
167
168 fn apply_decay(&mut self, now: Timestamp, config: &HealthConfig) {
170 let elapsed_secs = now.duration_since(self.last_observation) as f64 / 1_000_000_000.0;
171 if elapsed_secs <= 0.0 || config.decay_half_life_seconds <= 0.0 {
172 return;
173 }
174
175 let decay_factor = (0.5_f64).powf(elapsed_secs / config.decay_half_life_seconds);
177 let deficit = 1.0 - self.score;
179 self.score = 1.0 - deficit * decay_factor as f32;
180 self.score = self.score.clamp(0.0, 1.0);
181 }
182
183 fn record_failure(
185 &mut self,
186 now: Timestamp,
187 config: &HealthConfig,
188 default_cooldown_secs: u64,
189 ) {
190 self.consecutive_failures += 1;
191
192 if self.consecutive_failures >= config.cooldown_trigger_count {
193 let excess = self.consecutive_failures - config.cooldown_trigger_count;
195 let multiplier = config.cooldown_multiplier.powi(excess as i32);
196 let cooldown_secs = (default_cooldown_secs as f64 * multiplier) as u64;
197 self.current_cooldown_secs = cooldown_secs.min(config.max_cooldown_seconds);
198 self.cooldown_until = Some(now.add_secs(self.current_cooldown_secs));
199 }
200 }
201
202 fn update_latency(&mut self, latency_ms: u64, _config: &HealthConfig) {
204 const ALPHA: f64 = 0.3; if self.total_observations <= 1 {
206 self.ewma_latency_ms = latency_ms as f64;
207 } else {
208 self.ewma_latency_ms = ALPHA * latency_ms as f64 + (1.0 - ALPHA) * self.ewma_latency_ms;
209 }
210 }
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216
217 fn ts(ms: u64) -> Timestamp {
218 Timestamp(ms * 1_000_000)
219 }
220
221 #[test]
222 fn initial_health_is_perfect() {
223 let h = HealthState::new(ts(0));
224 assert_eq!(h.score(), 1.0);
225 assert!(!h.is_in_cooldown(ts(0)));
226 }
227
228 #[test]
229 fn success_maintains_health() {
230 let config = HealthConfig::default();
231 let mut h = HealthState::new(ts(0));
232 h.record_success(100, ts(1_000), &config);
233 assert!(h.score() >= 1.0); }
235
236 #[test]
237 fn rate_limit_reduces_health() {
238 let config = HealthConfig::default();
239 let mut h = HealthState::new(ts(0));
240 h.record_rate_limited(ts(1_000), &config, 60);
241 assert!(h.score() < 1.0);
242 assert!((h.score() - (1.0 - config.penalty_429)).abs() < 0.01);
243 }
244
245 #[test]
246 fn health_decays_toward_full() {
247 let config = HealthConfig {
248 decay_half_life_seconds: 10.0, ..Default::default()
250 };
251 let mut h = HealthState::new(ts(0));
252 h.record_rate_limited(ts(0), &config, 60);
253 let after_penalty = h.score();
254
255 h.record_success(100, ts(10_000), &config);
257 assert!(h.score() > after_penalty);
258 }
259
260 #[test]
261 fn consecutive_failures_trigger_cooldown() {
262 let config = HealthConfig {
263 cooldown_trigger_count: 3,
264 ..Default::default()
265 };
266 let mut h = HealthState::new(ts(0));
267
268 h.record_rate_limited(ts(1_000), &config, 30);
269 assert!(!h.is_in_cooldown(ts(1_000)));
270
271 h.record_rate_limited(ts(2_000), &config, 30);
272 assert!(!h.is_in_cooldown(ts(2_000)));
273
274 h.record_rate_limited(ts(3_000), &config, 30);
275 assert!(h.is_in_cooldown(ts(3_000)));
276 assert!(h.is_in_cooldown(ts(32_000))); assert!(!h.is_in_cooldown(ts(34_000)));
280 }
281
282 #[test]
283 fn cooldown_grows_exponentially() {
284 let config = HealthConfig {
285 cooldown_trigger_count: 2,
286 cooldown_multiplier: 2.0,
287 max_cooldown_seconds: 600,
288 ..Default::default()
289 };
290 let mut h = HealthState::new(ts(0));
291
292 h.record_rate_limited(ts(1_000), &config, 30);
294 h.record_rate_limited(ts(2_000), &config, 30);
295 assert!(h.is_in_cooldown(ts(2_000)));
296
297 h.record_rate_limited(ts(33_000), &config, 30);
299 assert!(h.is_in_cooldown(ts(92_000))); }
302
303 #[test]
304 fn health_score_bounded() {
305 let config = HealthConfig::default();
306 let mut h = HealthState::new(ts(0));
307
308 for i in 0..20 {
310 h.record_rate_limited(ts(i * 1_000), &config, 60);
311 }
312 assert!(h.score() >= 0.0);
313
314 for i in 20..40 {
316 h.record_success(100, ts(i * 1_000), &config);
317 }
318 assert!(h.score() <= 1.0);
319 }
320
321 #[test]
322 fn ewma_latency_smooths() {
323 let config = HealthConfig::default();
324 let mut h = HealthState::new(ts(0));
325
326 h.record_success(100, ts(1_000), &config);
327 assert_eq!(h.latency_ms(), 100.0);
328
329 h.record_success(200, ts(2_000), &config);
330 assert!((h.latency_ms() - 130.0).abs() < 1.0);
332 }
333
334 #[test]
335 fn forbidden_is_severe() {
336 let config = HealthConfig::default();
337 let mut h = HealthState::new(ts(0));
338 h.record_forbidden(ts(1_000), &config, 60);
339 assert!((h.score() - (1.0 - config.penalty_403)).abs() < 0.01);
340 }
341}