chie_core/
degradation.rs

1//! Graceful degradation under resource pressure.
2//!
3//! This module implements strategies for gracefully degrading service quality when
4//! resources (CPU, memory, disk, bandwidth) are under pressure, rather than failing
5//! catastrophically. It allows the system to continue operating at reduced capacity
6//! while maintaining critical functionality.
7//!
8//! # Example
9//!
10//! ```rust
11//! use chie_core::degradation::{DegradationManager, ResourcePressure, ServiceDegradationLevel};
12//!
13//! # async fn example() {
14//! let mut manager = DegradationManager::new();
15//!
16//! // Report resource pressure
17//! manager.update_pressure(ResourcePressure {
18//!     cpu_usage: 0.95,
19//!     memory_usage: 0.85,
20//!     disk_usage: 0.90,
21//!     bandwidth_usage: 0.80,
22//! });
23//!
24//! // Get current degradation level
25//! let level = manager.current_level();
26//! match level {
27//!     ServiceDegradationLevel::Normal => {
28//!         // Operate normally
29//!     }
30//!     ServiceDegradationLevel::LightDegradation => {
31//!         // Reduce non-critical features
32//!     }
33//!     ServiceDegradationLevel::ModerateDegradation => {
34//!         // Focus on core functionality
35//!     }
36//!     ServiceDegradationLevel::SevereDegradation => {
37//!         // Minimal operations only
38//!     }
39//! }
40//!
41//! // Check if specific features should be disabled
42//! if manager.should_disable_prefetching() {
43//!     // Disable chunk prefetching
44//! }
45//! if manager.should_reduce_cache_size() {
46//!     // Reduce cache memory usage
47//! }
48//! # }
49//! ```
50
51use serde::{Deserialize, Serialize};
52use std::time::{Duration, Instant};
53
54/// Current resource pressure measurements.
55#[derive(Debug, Clone, Copy)]
56pub struct ResourcePressure {
57    /// CPU usage (0.0 to 1.0).
58    pub cpu_usage: f64,
59    /// Memory usage (0.0 to 1.0).
60    pub memory_usage: f64,
61    /// Disk usage (0.0 to 1.0).
62    pub disk_usage: f64,
63    /// Bandwidth usage (0.0 to 1.0).
64    pub bandwidth_usage: f64,
65}
66
67impl ResourcePressure {
68    /// Calculate overall pressure score (0.0 to 1.0).
69    #[must_use]
70    #[inline]
71    pub fn overall_score(&self) -> f64 {
72        // Weighted average (disk and memory are more critical)
73        (self.cpu_usage * 0.2
74            + self.memory_usage * 0.3
75            + self.disk_usage * 0.3
76            + self.bandwidth_usage * 0.2)
77            .clamp(0.0, 1.0)
78    }
79
80    /// Check if any resource is critically high.
81    #[must_use]
82    #[inline]
83    pub fn has_critical_resource(&self) -> bool {
84        self.cpu_usage > 0.95
85            || self.memory_usage > 0.95
86            || self.disk_usage > 0.95
87            || self.bandwidth_usage > 0.95
88    }
89}
90
91impl Default for ResourcePressure {
92    fn default() -> Self {
93        Self {
94            cpu_usage: 0.0,
95            memory_usage: 0.0,
96            disk_usage: 0.0,
97            bandwidth_usage: 0.0,
98        }
99    }
100}
101
102/// Service degradation levels.
103#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
104pub enum ServiceDegradationLevel {
105    /// Normal operation - all features enabled.
106    Normal = 0,
107    /// Light degradation - reduce non-critical features.
108    LightDegradation = 1,
109    /// Moderate degradation - focus on core functionality.
110    ModerateDegradation = 2,
111    /// Severe degradation - minimal operations only.
112    SevereDegradation = 3,
113}
114
115impl ServiceDegradationLevel {
116    /// Get degradation level from pressure score.
117    #[must_use]
118    #[inline]
119    pub fn from_pressure_score(score: f64) -> Self {
120        if score < 0.70 {
121            Self::Normal
122        } else if score < 0.80 {
123            Self::LightDegradation
124        } else if score < 0.90 {
125            Self::ModerateDegradation
126        } else {
127            Self::SevereDegradation
128        }
129    }
130
131    /// Get description of this degradation level.
132    #[must_use]
133    #[inline]
134    pub const fn description(&self) -> &'static str {
135        match self {
136            Self::Normal => "Operating normally",
137            Self::LightDegradation => "Light resource pressure - reducing non-critical features",
138            Self::ModerateDegradation => "Moderate resource pressure - core functionality only",
139            Self::SevereDegradation => "Severe resource pressure - minimal operations",
140        }
141    }
142
143    /// Check if this is a degraded state.
144    #[must_use]
145    #[inline]
146    pub const fn is_degraded(&self) -> bool {
147        !matches!(self, Self::Normal)
148    }
149}
150
151/// Actions to take during degradation.
152#[derive(Debug, Clone, Copy)]
153pub struct DegradationActions {
154    /// Disable chunk prefetching.
155    pub disable_prefetching: bool,
156    /// Reduce cache size.
157    pub reduce_cache_size: bool,
158    /// Disable analytics collection.
159    pub disable_analytics: bool,
160    /// Throttle bandwidth.
161    pub throttle_bandwidth: bool,
162    /// Pause garbage collection.
163    pub pause_gc: bool,
164    /// Disable backup operations.
165    pub disable_backups: bool,
166    /// Reduce connection pool size.
167    pub reduce_connection_pool: bool,
168    /// Reject new content pinning.
169    pub reject_new_pins: bool,
170}
171
172impl DegradationActions {
173    /// Get actions for a given degradation level.
174    #[must_use]
175    pub const fn for_level(level: ServiceDegradationLevel) -> Self {
176        match level {
177            ServiceDegradationLevel::Normal => Self {
178                disable_prefetching: false,
179                reduce_cache_size: false,
180                disable_analytics: false,
181                throttle_bandwidth: false,
182                pause_gc: false,
183                disable_backups: false,
184                reduce_connection_pool: false,
185                reject_new_pins: false,
186            },
187            ServiceDegradationLevel::LightDegradation => Self {
188                disable_prefetching: true,
189                reduce_cache_size: true,
190                disable_analytics: false,
191                throttle_bandwidth: false,
192                pause_gc: false,
193                disable_backups: false,
194                reduce_connection_pool: false,
195                reject_new_pins: false,
196            },
197            ServiceDegradationLevel::ModerateDegradation => Self {
198                disable_prefetching: true,
199                reduce_cache_size: true,
200                disable_analytics: true,
201                throttle_bandwidth: true,
202                pause_gc: true,
203                disable_backups: true,
204                reduce_connection_pool: true,
205                reject_new_pins: false,
206            },
207            ServiceDegradationLevel::SevereDegradation => Self {
208                disable_prefetching: true,
209                reduce_cache_size: true,
210                disable_analytics: true,
211                throttle_bandwidth: true,
212                pause_gc: true,
213                disable_backups: true,
214                reduce_connection_pool: true,
215                reject_new_pins: true,
216            },
217        }
218    }
219}
220
221/// Manages graceful degradation based on resource pressure.
222pub struct DegradationManager {
223    current_level: ServiceDegradationLevel,
224    current_pressure: ResourcePressure,
225    last_update: Instant,
226    pressure_history: Vec<(Instant, f64)>,
227    hysteresis_duration: Duration,
228}
229
230impl DegradationManager {
231    /// Create a new degradation manager.
232    #[must_use]
233    pub fn new() -> Self {
234        Self {
235            current_level: ServiceDegradationLevel::Normal,
236            current_pressure: ResourcePressure::default(),
237            last_update: Instant::now(),
238            pressure_history: Vec::new(),
239            hysteresis_duration: Duration::from_secs(60), // 1 minute hysteresis
240        }
241    }
242
243    /// Update resource pressure and recalculate degradation level.
244    pub fn update_pressure(&mut self, pressure: ResourcePressure) {
245        self.current_pressure = pressure;
246        self.last_update = Instant::now();
247
248        let score = pressure.overall_score();
249        self.pressure_history.push((Instant::now(), score));
250
251        // Keep only last 5 minutes of history
252        let cutoff = Instant::now() - Duration::from_secs(300);
253        self.pressure_history.retain(|(t, _)| *t > cutoff);
254
255        // Calculate new level with hysteresis to prevent flapping
256        let new_level = ServiceDegradationLevel::from_pressure_score(score);
257
258        // Only change level if sustained for hysteresis duration
259        if new_level != self.current_level {
260            let sustained = self.is_level_sustained(new_level);
261            if sustained {
262                self.current_level = new_level;
263            }
264        }
265    }
266
267    /// Check if a degradation level has been sustained.
268    fn is_level_sustained(&self, level: ServiceDegradationLevel) -> bool {
269        let cutoff = Instant::now() - self.hysteresis_duration;
270        let recent_scores: Vec<f64> = self
271            .pressure_history
272            .iter()
273            .filter(|(t, _)| *t > cutoff)
274            .map(|(_, s)| *s)
275            .collect();
276
277        if recent_scores.is_empty() {
278            return false;
279        }
280
281        // Check if all recent scores match this level
282        recent_scores
283            .iter()
284            .all(|&score| ServiceDegradationLevel::from_pressure_score(score) == level)
285    }
286
287    /// Get current degradation level.
288    #[must_use]
289    #[inline]
290    pub const fn current_level(&self) -> ServiceDegradationLevel {
291        self.current_level
292    }
293
294    /// Get current resource pressure.
295    #[must_use]
296    #[inline]
297    pub const fn current_pressure(&self) -> &ResourcePressure {
298        &self.current_pressure
299    }
300
301    /// Get recommended actions for current degradation level.
302    #[must_use]
303    pub const fn get_actions(&self) -> DegradationActions {
304        DegradationActions::for_level(self.current_level)
305    }
306
307    /// Check if prefetching should be disabled.
308    #[must_use]
309    #[inline]
310    pub fn should_disable_prefetching(&self) -> bool {
311        self.get_actions().disable_prefetching
312    }
313
314    /// Check if cache size should be reduced.
315    #[must_use]
316    #[inline]
317    pub fn should_reduce_cache_size(&self) -> bool {
318        self.get_actions().reduce_cache_size
319    }
320
321    /// Check if analytics should be disabled.
322    #[must_use]
323    #[inline]
324    pub fn should_disable_analytics(&self) -> bool {
325        self.get_actions().disable_analytics
326    }
327
328    /// Check if bandwidth should be throttled.
329    #[must_use]
330    #[inline]
331    pub fn should_throttle_bandwidth(&self) -> bool {
332        self.get_actions().throttle_bandwidth
333    }
334
335    /// Check if garbage collection should be paused.
336    #[must_use]
337    #[inline]
338    pub fn should_pause_gc(&self) -> bool {
339        self.get_actions().pause_gc
340    }
341
342    /// Check if new content pins should be rejected.
343    #[must_use]
344    #[inline]
345    pub fn should_reject_new_pins(&self) -> bool {
346        self.get_actions().reject_new_pins
347    }
348
349    /// Get time since last pressure update.
350    #[must_use]
351    pub fn time_since_update(&self) -> Duration {
352        Instant::now().duration_since(self.last_update)
353    }
354
355    /// Get average pressure score over the last duration.
356    #[must_use]
357    pub fn average_pressure_score(&self, duration: Duration) -> Option<f64> {
358        let cutoff = Instant::now() - duration;
359        let scores: Vec<f64> = self
360            .pressure_history
361            .iter()
362            .filter(|(t, _)| *t > cutoff)
363            .map(|(_, s)| *s)
364            .collect();
365
366        if scores.is_empty() {
367            None
368        } else {
369            Some(scores.iter().sum::<f64>() / scores.len() as f64)
370        }
371    }
372}
373
374impl Default for DegradationManager {
375    fn default() -> Self {
376        Self::new()
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383
384    #[test]
385    fn test_resource_pressure_overall_score() {
386        let pressure = ResourcePressure {
387            cpu_usage: 0.5,
388            memory_usage: 0.6,
389            disk_usage: 0.7,
390            bandwidth_usage: 0.4,
391        };
392
393        let score = pressure.overall_score();
394        assert!(score > 0.5 && score < 0.7);
395    }
396
397    #[test]
398    fn test_resource_pressure_critical() {
399        let pressure = ResourcePressure {
400            cpu_usage: 0.96,
401            memory_usage: 0.5,
402            disk_usage: 0.5,
403            bandwidth_usage: 0.5,
404        };
405
406        assert!(pressure.has_critical_resource());
407
408        let normal = ResourcePressure {
409            cpu_usage: 0.5,
410            memory_usage: 0.5,
411            disk_usage: 0.5,
412            bandwidth_usage: 0.5,
413        };
414
415        assert!(!normal.has_critical_resource());
416    }
417
418    #[test]
419    fn test_degradation_level_from_score() {
420        assert_eq!(
421            ServiceDegradationLevel::from_pressure_score(0.5),
422            ServiceDegradationLevel::Normal
423        );
424        assert_eq!(
425            ServiceDegradationLevel::from_pressure_score(0.75),
426            ServiceDegradationLevel::LightDegradation
427        );
428        assert_eq!(
429            ServiceDegradationLevel::from_pressure_score(0.85),
430            ServiceDegradationLevel::ModerateDegradation
431        );
432        assert_eq!(
433            ServiceDegradationLevel::from_pressure_score(0.95),
434            ServiceDegradationLevel::SevereDegradation
435        );
436    }
437
438    #[test]
439    fn test_degradation_actions() {
440        let normal_actions = DegradationActions::for_level(ServiceDegradationLevel::Normal);
441        assert!(!normal_actions.disable_prefetching);
442        assert!(!normal_actions.reject_new_pins);
443
444        let severe_actions =
445            DegradationActions::for_level(ServiceDegradationLevel::SevereDegradation);
446        assert!(severe_actions.disable_prefetching);
447        assert!(severe_actions.reject_new_pins);
448    }
449
450    #[test]
451    fn test_degradation_manager_update() {
452        let mut manager = DegradationManager::new();
453
454        assert_eq!(manager.current_level(), ServiceDegradationLevel::Normal);
455
456        // Update with high pressure
457        manager.update_pressure(ResourcePressure {
458            cpu_usage: 0.95,
459            memory_usage: 0.90,
460            disk_usage: 0.92,
461            bandwidth_usage: 0.88,
462        });
463
464        // Level won't change immediately due to hysteresis
465        // but pressure is recorded
466        assert!(manager.current_pressure().overall_score() > 0.9);
467    }
468
469    #[test]
470    fn test_degradation_manager_helpers() {
471        let mut manager = DegradationManager::new();
472
473        // Normal level
474        assert!(!manager.should_disable_prefetching());
475        assert!(!manager.should_reject_new_pins());
476
477        // Force severe degradation
478        manager.current_level = ServiceDegradationLevel::SevereDegradation;
479        assert!(manager.should_disable_prefetching());
480        assert!(manager.should_reject_new_pins());
481        assert!(manager.should_pause_gc());
482    }
483
484    #[test]
485    fn test_average_pressure_score() {
486        let mut manager = DegradationManager::new();
487
488        manager.update_pressure(ResourcePressure {
489            cpu_usage: 0.5,
490            memory_usage: 0.5,
491            disk_usage: 0.5,
492            bandwidth_usage: 0.5,
493        });
494
495        let avg = manager.average_pressure_score(Duration::from_secs(60));
496        assert!(avg.is_some());
497        assert!((avg.unwrap() - 0.5).abs() < 0.1);
498    }
499}