1use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
11use std::time::Duration;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum HealthStatus {
16 Healthy,
18 Degraded,
20 Unhealthy,
22 Unknown,
24}
25
26impl std::fmt::Display for HealthStatus {
27 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28 match self {
29 HealthStatus::Healthy => write!(f, "healthy"),
30 HealthStatus::Degraded => write!(f, "degraded"),
31 HealthStatus::Unhealthy => write!(f, "unhealthy"),
32 HealthStatus::Unknown => write!(f, "unknown"),
33 }
34 }
35}
36
37#[derive(Debug, Clone)]
39pub struct HealthSnapshot {
40 pub avg_latency: Duration,
41 pub jitter: Duration,
42 pub min_latency: Duration,
43 pub max_latency: Duration,
44 pub last_latency: Duration,
45 pub sample_count: usize,
46 pub failure_count: usize,
47 pub health_status: HealthStatus,
48}
49
50pub struct ApiHealth {
55 total_latency_us: AtomicU64,
57 total_latency_sq_us: AtomicU64,
59 sample_count: AtomicUsize,
61 min_latency_us: AtomicU64,
63 max_latency_us: AtomicU64,
65 last_latency_us: AtomicU64,
67 failure_count: AtomicUsize,
69}
70
71impl ApiHealth {
72 const fn new() -> Self {
73 Self {
74 total_latency_us: AtomicU64::new(0),
75 total_latency_sq_us: AtomicU64::new(0),
76 sample_count: AtomicUsize::new(0),
77 min_latency_us: AtomicU64::new(u64::MAX),
78 max_latency_us: AtomicU64::new(0),
79 last_latency_us: AtomicU64::new(0),
80 failure_count: AtomicUsize::new(0),
81 }
82 }
83}
84
85static GLOBAL_HEALTH: ApiHealth = ApiHealth::new();
86
87pub fn record_latency(duration: Duration) {
90 let us = duration.as_micros() as u64;
91
92 GLOBAL_HEALTH
93 .total_latency_us
94 .fetch_add(us, Ordering::Relaxed);
95
96 let us_sq = us.saturating_mul(us);
99 GLOBAL_HEALTH
100 .total_latency_sq_us
101 .fetch_add(us_sq, Ordering::Relaxed);
102
103 GLOBAL_HEALTH.sample_count.fetch_add(1, Ordering::Relaxed);
104
105 GLOBAL_HEALTH
107 .min_latency_us
108 .fetch_min(us, Ordering::Relaxed);
109
110 GLOBAL_HEALTH
112 .max_latency_us
113 .fetch_max(us, Ordering::Relaxed);
114
115 GLOBAL_HEALTH.last_latency_us.store(us, Ordering::Relaxed);
117}
118
119pub fn record_failure() {
121 GLOBAL_HEALTH.failure_count.fetch_add(1, Ordering::Relaxed);
122}
123
124pub fn snapshot() -> HealthSnapshot {
126 let count = GLOBAL_HEALTH.sample_count.load(Ordering::Relaxed);
127 let failures = GLOBAL_HEALTH.failure_count.load(Ordering::Relaxed);
128
129 if count == 0 {
130 return HealthSnapshot {
131 avg_latency: Duration::ZERO,
132 jitter: Duration::ZERO,
133 min_latency: Duration::ZERO,
134 max_latency: Duration::ZERO,
135 last_latency: Duration::ZERO,
136 sample_count: 0,
137 failure_count: failures,
138 health_status: HealthStatus::Unknown,
139 };
140 }
141
142 let total_us = GLOBAL_HEALTH.total_latency_us.load(Ordering::Relaxed);
143 let total_sq_us = GLOBAL_HEALTH.total_latency_sq_us.load(Ordering::Relaxed);
144 let min_us = GLOBAL_HEALTH.min_latency_us.load(Ordering::Relaxed);
145 let max_us = GLOBAL_HEALTH.max_latency_us.load(Ordering::Relaxed);
146 let last_us = GLOBAL_HEALTH.last_latency_us.load(Ordering::Relaxed);
147
148 let avg_us = total_us / count as u64;
149
150 let mean_sq = total_sq_us / count as u64;
152 let sq_mean = avg_us.saturating_mul(avg_us);
153 let variance_us = mean_sq.saturating_sub(sq_mean);
154 let jitter_us = isqrt(variance_us);
155
156 let avg_ms = avg_us / 1000;
157 let jitter_ms = jitter_us / 1000;
158
159 let health_status = classify_health(avg_ms, jitter_ms);
160
161 HealthSnapshot {
162 avg_latency: Duration::from_micros(avg_us),
163 jitter: Duration::from_micros(jitter_us),
164 min_latency: Duration::from_micros(min_us),
165 max_latency: Duration::from_micros(max_us),
166 last_latency: Duration::from_micros(last_us),
167 sample_count: count,
168 failure_count: failures,
169 health_status,
170 }
171}
172
173pub fn status_line() -> String {
175 let snap = snapshot();
176 match snap.health_status {
177 HealthStatus::Unknown => "API: unknown (no samples)".to_string(),
178 _ => {
179 format!(
180 "API: {} (avg: {}, jitter: {})",
181 snap.health_status,
182 format_duration_ms(snap.avg_latency),
183 format_duration_ms(snap.jitter),
184 )
185 }
186 }
187}
188
189fn classify_health(avg_ms: u64, jitter_ms: u64) -> HealthStatus {
191 if avg_ms < 500 && jitter_ms < 200 {
192 HealthStatus::Healthy
193 } else if avg_ms < 2000 && jitter_ms < 500 {
194 HealthStatus::Degraded
195 } else {
196 HealthStatus::Unhealthy
197 }
198}
199
200fn isqrt(n: u64) -> u64 {
202 if n == 0 {
203 return 0;
204 }
205 let mut x = n;
206 let mut y = x.div_ceil(2);
207 while y < x {
208 x = y;
209 y = (x + n / x) / 2;
210 }
211 x
212}
213
214pub fn format_duration_ms(d: Duration) -> String {
216 let ms = d.as_millis();
217 if ms < 1000 {
218 format!("{}ms", ms)
219 } else {
220 format!("{:.1}s", d.as_secs_f64())
221 }
222}
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227
228 fn make_tracker() -> ApiHealth {
231 ApiHealth::new()
232 }
233
234 fn record_on(tracker: &ApiHealth, duration: Duration) {
235 let us = duration.as_micros() as u64;
236 tracker.total_latency_us.fetch_add(us, Ordering::Relaxed);
237 let us_sq = us.saturating_mul(us);
238 tracker
239 .total_latency_sq_us
240 .fetch_add(us_sq, Ordering::Relaxed);
241 tracker.sample_count.fetch_add(1, Ordering::Relaxed);
242 tracker.min_latency_us.fetch_min(us, Ordering::Relaxed);
243 tracker.max_latency_us.fetch_max(us, Ordering::Relaxed);
244 tracker.last_latency_us.store(us, Ordering::Relaxed);
245 }
246
247 fn snapshot_of(tracker: &ApiHealth) -> HealthSnapshot {
248 let count = tracker.sample_count.load(Ordering::Relaxed);
249 let failures = tracker.failure_count.load(Ordering::Relaxed);
250
251 if count == 0 {
252 return HealthSnapshot {
253 avg_latency: Duration::ZERO,
254 jitter: Duration::ZERO,
255 min_latency: Duration::ZERO,
256 max_latency: Duration::ZERO,
257 last_latency: Duration::ZERO,
258 sample_count: 0,
259 failure_count: failures,
260 health_status: HealthStatus::Unknown,
261 };
262 }
263
264 let total_us = tracker.total_latency_us.load(Ordering::Relaxed);
265 let total_sq_us = tracker.total_latency_sq_us.load(Ordering::Relaxed);
266 let min_us = tracker.min_latency_us.load(Ordering::Relaxed);
267 let max_us = tracker.max_latency_us.load(Ordering::Relaxed);
268 let last_us = tracker.last_latency_us.load(Ordering::Relaxed);
269
270 let avg_us = total_us / count as u64;
271 let mean_sq = total_sq_us / count as u64;
272 let sq_mean = avg_us.saturating_mul(avg_us);
273 let variance_us = mean_sq.saturating_sub(sq_mean);
274 let jitter_us = isqrt(variance_us);
275
276 let avg_ms = avg_us / 1000;
277 let jitter_ms = jitter_us / 1000;
278 let health_status = classify_health(avg_ms, jitter_ms);
279
280 HealthSnapshot {
281 avg_latency: Duration::from_micros(avg_us),
282 jitter: Duration::from_micros(jitter_us),
283 min_latency: Duration::from_micros(min_us),
284 max_latency: Duration::from_micros(max_us),
285 last_latency: Duration::from_micros(last_us),
286 sample_count: count,
287 failure_count: failures,
288 health_status,
289 }
290 }
291
292 #[test]
293 fn test_no_samples_unknown() {
294 let tracker = make_tracker();
295 let snap = snapshot_of(&tracker);
296 assert_eq!(snap.health_status, HealthStatus::Unknown);
297 assert_eq!(snap.sample_count, 0);
298 assert_eq!(snap.avg_latency, Duration::ZERO);
299 }
300
301 #[test]
302 fn test_single_sample() {
303 let tracker = make_tracker();
304 record_on(&tracker, Duration::from_millis(100));
305 let snap = snapshot_of(&tracker);
306 assert_eq!(snap.sample_count, 1);
307 assert_eq!(snap.avg_latency.as_millis(), 100);
308 assert_eq!(snap.min_latency.as_millis(), 100);
309 assert_eq!(snap.max_latency.as_millis(), 100);
310 assert_eq!(snap.jitter.as_millis(), 0);
311 assert_eq!(snap.health_status, HealthStatus::Healthy);
312 }
313
314 #[test]
315 fn test_average_latency() {
316 let tracker = make_tracker();
317 record_on(&tracker, Duration::from_millis(100));
318 record_on(&tracker, Duration::from_millis(200));
319 record_on(&tracker, Duration::from_millis(300));
320 let snap = snapshot_of(&tracker);
321 assert_eq!(snap.sample_count, 3);
322 assert_eq!(snap.avg_latency.as_millis(), 200);
323 assert_eq!(snap.min_latency.as_millis(), 100);
324 assert_eq!(snap.max_latency.as_millis(), 300);
325 }
326
327 #[test]
328 fn test_jitter_calculation() {
329 let tracker = make_tracker();
330 record_on(&tracker, Duration::from_millis(100));
334 record_on(&tracker, Duration::from_millis(300));
335 let snap = snapshot_of(&tracker);
336 let jitter_ms = snap.jitter.as_millis();
337 assert!(
339 jitter_ms >= 99 && jitter_ms <= 101,
340 "jitter was {}ms",
341 jitter_ms
342 );
343 }
344
345 #[test]
346 fn test_healthy_classification() {
347 assert_eq!(classify_health(200, 50), HealthStatus::Healthy);
348 assert_eq!(classify_health(499, 199), HealthStatus::Healthy);
349 }
350
351 #[test]
352 fn test_degraded_classification() {
353 assert_eq!(classify_health(500, 50), HealthStatus::Degraded);
354 assert_eq!(classify_health(1999, 499), HealthStatus::Degraded);
355 assert_eq!(classify_health(200, 200), HealthStatus::Degraded);
356 }
357
358 #[test]
359 fn test_unhealthy_classification() {
360 assert_eq!(classify_health(2000, 50), HealthStatus::Unhealthy);
361 assert_eq!(classify_health(200, 500), HealthStatus::Unhealthy);
362 assert_eq!(classify_health(5000, 1000), HealthStatus::Unhealthy);
363 }
364
365 #[test]
366 fn test_failure_count() {
367 let tracker = make_tracker();
368 tracker.failure_count.fetch_add(1, Ordering::Relaxed);
369 tracker.failure_count.fetch_add(1, Ordering::Relaxed);
370 let snap = snapshot_of(&tracker);
371 assert_eq!(snap.failure_count, 2);
372 }
373
374 #[test]
375 fn test_last_latency_tracks_most_recent() {
376 let tracker = make_tracker();
377 record_on(&tracker, Duration::from_millis(100));
378 record_on(&tracker, Duration::from_millis(500));
379 record_on(&tracker, Duration::from_millis(200));
380 let snap = snapshot_of(&tracker);
381 assert_eq!(snap.last_latency.as_millis(), 200);
382 }
383
384 #[test]
385 fn test_min_max_tracking() {
386 let tracker = make_tracker();
387 record_on(&tracker, Duration::from_millis(500));
388 record_on(&tracker, Duration::from_millis(100));
389 record_on(&tracker, Duration::from_millis(1000));
390 record_on(&tracker, Duration::from_millis(200));
391 let snap = snapshot_of(&tracker);
392 assert_eq!(snap.min_latency.as_millis(), 100);
393 assert_eq!(snap.max_latency.as_millis(), 1000);
394 }
395
396 #[test]
397 fn test_isqrt() {
398 assert_eq!(isqrt(0), 0);
399 assert_eq!(isqrt(1), 1);
400 assert_eq!(isqrt(4), 2);
401 assert_eq!(isqrt(9), 3);
402 assert_eq!(isqrt(10), 3); assert_eq!(isqrt(100), 10);
404 assert_eq!(isqrt(10000), 100);
405 }
406
407 #[test]
408 fn test_format_duration_ms() {
409 assert_eq!(format_duration_ms(Duration::from_millis(0)), "0ms");
410 assert_eq!(format_duration_ms(Duration::from_millis(340)), "340ms");
411 assert_eq!(format_duration_ms(Duration::from_millis(999)), "999ms");
412 assert_eq!(format_duration_ms(Duration::from_millis(1000)), "1.0s");
413 assert_eq!(format_duration_ms(Duration::from_millis(2100)), "2.1s");
414 }
415
416 #[test]
417 fn test_health_status_display() {
418 assert_eq!(HealthStatus::Healthy.to_string(), "healthy");
419 assert_eq!(HealthStatus::Degraded.to_string(), "degraded");
420 assert_eq!(HealthStatus::Unhealthy.to_string(), "unhealthy");
421 assert_eq!(HealthStatus::Unknown.to_string(), "unknown");
422 }
423
424 #[test]
425 fn test_overflow_safety_large_latency() {
426 let tracker = make_tracker();
427 record_on(&tracker, Duration::from_secs(100));
429 let snap = snapshot_of(&tracker);
430 assert_eq!(snap.sample_count, 1);
431 assert_eq!(snap.avg_latency.as_secs(), 100);
432 }
433}