Skip to main content

fraiseql_server/encryption/
refresh_trigger.rs

1// Phase 12.4 Cycle 2: Automatic Refresh Triggers - GREEN
2//! Automatic key refresh triggering with background job coordination,
3//! TTL monitoring, and non-blocking refresh during operations.
4
5use std::{
6    sync::{
7        Arc,
8        atomic::{AtomicBool, AtomicU64, Ordering},
9    },
10    time::Instant,
11};
12
13use chrono::{DateTime, Timelike, Utc};
14
15/// Status of refresh job
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum RefreshJobStatus {
18    /// Job not started
19    Idle,
20    /// Job actively running
21    Running,
22    /// Job completed successfully
23    Success,
24    /// Job encountered error
25    Failed,
26    /// Job stopped/cancelled
27    Stopped,
28}
29
30impl std::fmt::Display for RefreshJobStatus {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        match self {
33            Self::Idle => write!(f, "idle"),
34            Self::Running => write!(f, "running"),
35            Self::Success => write!(f, "success"),
36            Self::Failed => write!(f, "failed"),
37            Self::Stopped => write!(f, "stopped"),
38        }
39    }
40}
41
42/// Configuration for automatic refresh
43#[derive(Debug, Clone)]
44pub struct RefreshConfig {
45    /// Enable automatic refresh (default: true)
46    pub enabled:                   bool,
47    /// Check interval in hours (default: 24)
48    pub check_interval_hours:      u32,
49    /// TTL consumption threshold to trigger refresh (default: 80)
50    pub refresh_threshold_percent: u32,
51    /// Quiet hours start (0-23, None = disabled)
52    pub quiet_hours_start:         Option<u32>,
53    /// Quiet hours end (0-23)
54    pub quiet_hours_end:           Option<u32>,
55}
56
57impl RefreshConfig {
58    /// Create default refresh config (daily check, 80% threshold)
59    pub fn new() -> Self {
60        Self {
61            enabled:                   true,
62            check_interval_hours:      24,
63            refresh_threshold_percent: 80,
64            quiet_hours_start:         None,
65            quiet_hours_end:           None,
66        }
67    }
68
69    /// Enable or disable automatic refresh
70    pub fn with_enabled(mut self, enabled: bool) -> Self {
71        self.enabled = enabled;
72        self
73    }
74
75    /// Set check interval in hours
76    pub fn with_check_interval(mut self, hours: u32) -> Self {
77        self.check_interval_hours = hours.max(1);
78        self
79    }
80
81    /// Set refresh threshold percentage
82    pub fn with_refresh_threshold(mut self, percent: u32) -> Self {
83        self.refresh_threshold_percent = percent.min(99);
84        self
85    }
86
87    /// Set quiet hours (e.g., 2 for 2am-4am)
88    pub fn with_quiet_hours(mut self, start_hour: u32, end_hour: u32) -> Self {
89        if start_hour < 24 && end_hour < 24 {
90            self.quiet_hours_start = Some(start_hour);
91            self.quiet_hours_end = Some(end_hour);
92        }
93        self
94    }
95}
96
97impl Default for RefreshConfig {
98    fn default() -> Self {
99        Self::new()
100    }
101}
102
103/// Refresh trigger state and history
104#[derive(Debug, Clone)]
105pub struct RefreshTrigger {
106    /// Refresh configuration
107    config:                   Arc<RefreshConfig>,
108    /// Last refresh check time
109    last_check:               Arc<std::sync::Mutex<Option<DateTime<Utc>>>>,
110    /// Last refresh completion time
111    last_refresh:             Arc<std::sync::Mutex<Option<DateTime<Utc>>>>,
112    /// Last refresh duration in milliseconds
113    last_refresh_duration_ms: Arc<AtomicU64>,
114    /// Total refreshes performed
115    total_refreshes:          Arc<AtomicU64>,
116    /// Failed refreshes count
117    failed_refreshes:         Arc<AtomicU64>,
118    /// Refresh pending flag
119    refresh_pending:          Arc<AtomicBool>,
120}
121
122impl RefreshTrigger {
123    /// Create new refresh trigger
124    pub fn new(config: RefreshConfig) -> Self {
125        Self {
126            config:                   Arc::new(config),
127            last_check:               Arc::new(std::sync::Mutex::new(None)),
128            last_refresh:             Arc::new(std::sync::Mutex::new(None)),
129            last_refresh_duration_ms: Arc::new(AtomicU64::new(0)),
130            total_refreshes:          Arc::new(AtomicU64::new(0)),
131            failed_refreshes:         Arc::new(AtomicU64::new(0)),
132            refresh_pending:          Arc::new(AtomicBool::new(false)),
133        }
134    }
135
136    /// Check if refresh should be triggered
137    pub fn should_trigger(&self, ttl_consumed_percent: u32) -> bool {
138        if !self.config.enabled {
139            return false;
140        }
141
142        // Check if refresh pending (already triggered for this version)
143        if self.refresh_pending.load(Ordering::Relaxed) {
144            return false;
145        }
146
147        // Check if TTL threshold reached
148        if ttl_consumed_percent < self.config.refresh_threshold_percent {
149            return false;
150        }
151
152        // Check quiet hours if configured
153        if let (Some(start), Some(end)) =
154            (self.config.quiet_hours_start, self.config.quiet_hours_end)
155        {
156            let now = Utc::now();
157            let hour = now.hour() as u32;
158
159            if start < end {
160                // Normal case: 2am-4am
161                if hour >= start && hour < end {
162                    return false;
163                }
164            } else {
165                // Wrap case: 22pm-2am
166                if hour >= start || hour < end {
167                    return false;
168                }
169            }
170        }
171
172        true
173    }
174
175    /// Mark refresh as pending
176    pub fn mark_pending(&self) {
177        self.refresh_pending.store(true, Ordering::Relaxed);
178    }
179
180    /// Clear pending flag after refresh completes
181    pub fn clear_pending(&self) {
182        self.refresh_pending.store(false, Ordering::Relaxed);
183    }
184
185    /// Record successful refresh
186    pub fn record_success(&self, duration_ms: u64) {
187        self.total_refreshes.fetch_add(1, Ordering::Relaxed);
188        self.last_refresh_duration_ms.store(duration_ms, Ordering::Relaxed);
189        if let Ok(mut last) = self.last_refresh.lock() {
190            *last = Some(Utc::now());
191        }
192    }
193
194    /// Record failed refresh
195    pub fn record_failure(&self) {
196        self.failed_refreshes.fetch_add(1, Ordering::Relaxed);
197    }
198
199    /// Record check attempt
200    pub fn record_check(&self) {
201        if let Ok(mut last) = self.last_check.lock() {
202            *last = Some(Utc::now());
203        }
204    }
205
206    /// Get last check time
207    pub fn last_check_time(&self) -> Option<DateTime<Utc>> {
208        if let Ok(last) = self.last_check.lock() {
209            *last
210        } else {
211            None
212        }
213    }
214
215    /// Get last refresh time
216    pub fn last_refresh_time(&self) -> Option<DateTime<Utc>> {
217        if let Ok(last) = self.last_refresh.lock() {
218            *last
219        } else {
220            None
221        }
222    }
223
224    /// Get total refreshes count
225    pub fn total_refreshes(&self) -> u64 {
226        self.total_refreshes.load(Ordering::Relaxed)
227    }
228
229    /// Get failed refreshes count
230    pub fn failed_refreshes(&self) -> u64 {
231        self.failed_refreshes.load(Ordering::Relaxed)
232    }
233
234    /// Get success rate percentage
235    pub fn success_rate_percent(&self) -> u32 {
236        let total = self.total_refreshes();
237        if total == 0 {
238            100
239        } else {
240            let failed = self.failed_refreshes();
241            let successful = total - failed;
242            ((successful as f64 / total as f64) * 100.0) as u32
243        }
244    }
245
246    /// Check if refresh is pending
247    pub fn is_pending(&self) -> bool {
248        self.refresh_pending.load(Ordering::Relaxed)
249    }
250
251    /// Check if refresh enabled
252    pub fn is_enabled(&self) -> bool {
253        self.config.enabled
254    }
255}
256
257impl Default for RefreshTrigger {
258    fn default() -> Self {
259        Self::new(RefreshConfig::default())
260    }
261}
262
263/// Refresh job state and coordination
264#[derive(Debug, Clone)]
265pub struct RefreshJob {
266    /// Job status
267    status:             Arc<std::sync::Mutex<RefreshJobStatus>>,
268    /// Job start time
269    start_time:         Arc<std::sync::Mutex<Option<Instant>>>,
270    /// Job last error message
271    last_error:         Arc<std::sync::Mutex<Option<String>>>,
272    /// Job is shutting down
273    shutdown_requested: Arc<AtomicBool>,
274}
275
276impl RefreshJob {
277    /// Create new refresh job
278    pub fn new() -> Self {
279        Self {
280            status:             Arc::new(std::sync::Mutex::new(RefreshJobStatus::Idle)),
281            start_time:         Arc::new(std::sync::Mutex::new(None)),
282            last_error:         Arc::new(std::sync::Mutex::new(None)),
283            shutdown_requested: Arc::new(AtomicBool::new(false)),
284        }
285    }
286
287    /// Mark job as running
288    pub fn start(&self) -> Result<(), String> {
289        let mut status = self.status.lock().map_err(|e| format!("Failed to lock status: {}", e))?;
290
291        if *status != RefreshJobStatus::Idle {
292            return Err(format!("Job already running: {}", status));
293        }
294
295        *status = RefreshJobStatus::Running;
296        let mut start = self
297            .start_time
298            .lock()
299            .map_err(|e| format!("Failed to lock start time: {}", e))?;
300        *start = Some(Instant::now());
301        Ok(())
302    }
303
304    /// Mark job as succeeded
305    pub fn complete_success(&self) -> Result<(), String> {
306        let mut status = self.status.lock().map_err(|e| format!("Failed to lock status: {}", e))?;
307        *status = RefreshJobStatus::Success;
308        Ok(())
309    }
310
311    /// Mark job as failed with error
312    pub fn complete_failure(&self, error: impl Into<String>) -> Result<(), String> {
313        let mut status = self.status.lock().map_err(|e| format!("Failed to lock status: {}", e))?;
314        *status = RefreshJobStatus::Failed;
315
316        let mut last_error =
317            self.last_error.lock().map_err(|e| format!("Failed to lock error: {}", e))?;
318        *last_error = Some(error.into());
319
320        Ok(())
321    }
322
323    /// Request job shutdown
324    pub fn request_shutdown(&self) {
325        self.shutdown_requested.store(true, Ordering::Relaxed);
326    }
327
328    /// Check if shutdown was requested
329    pub fn should_shutdown(&self) -> bool {
330        self.shutdown_requested.load(Ordering::Relaxed)
331    }
332
333    /// Get current job status
334    pub fn status(&self) -> Result<RefreshJobStatus, String> {
335        let status = self.status.lock().map_err(|e| format!("Failed to lock status: {}", e))?;
336        Ok(*status)
337    }
338
339    /// Get job duration if running or completed
340    pub fn duration(&self) -> Result<Option<std::time::Duration>, String> {
341        let start = self
342            .start_time
343            .lock()
344            .map_err(|e| format!("Failed to lock start time: {}", e))?;
345
346        Ok(start.map(|s| s.elapsed()))
347    }
348
349    /// Get last error message
350    pub fn last_error(&self) -> Result<Option<String>, String> {
351        let error = self.last_error.lock().map_err(|e| format!("Failed to lock error: {}", e))?;
352        Ok(error.clone())
353    }
354}
355
356impl Default for RefreshJob {
357    fn default() -> Self {
358        Self::new()
359    }
360}
361
362/// Refresh manager combining trigger and job coordination
363#[derive(Debug, Clone)]
364pub struct RefreshManager {
365    /// Refresh trigger
366    trigger: Arc<RefreshTrigger>,
367    /// Refresh job
368    job:     Arc<RefreshJob>,
369}
370
371impl RefreshManager {
372    /// Create new refresh manager
373    pub fn new(config: RefreshConfig) -> Self {
374        Self {
375            trigger: Arc::new(RefreshTrigger::new(config)),
376            job:     Arc::new(RefreshJob::new()),
377        }
378    }
379
380    /// Check if refresh should trigger and mark pending
381    pub fn check_and_trigger(&self, ttl_consumed_percent: u32) -> bool {
382        self.trigger.record_check();
383        if self.trigger.should_trigger(ttl_consumed_percent) {
384            self.trigger.mark_pending();
385            true
386        } else {
387            false
388        }
389    }
390
391    /// Start refresh job
392    pub fn start_job(&self) -> Result<(), String> {
393        self.job.start()
394    }
395
396    /// Complete refresh job successfully
397    pub fn complete_job_success(&self) -> Result<(), String> {
398        self.trigger.clear_pending();
399        self.job.complete_success()
400    }
401
402    /// Complete refresh job with failure
403    pub fn complete_job_failure(&self, error: impl Into<String>) -> Result<(), String> {
404        // Don't clear pending - allow retry
405        self.job.complete_failure(error)
406    }
407
408    /// Get refresh trigger
409    pub fn trigger(&self) -> Arc<RefreshTrigger> {
410        Arc::clone(&self.trigger)
411    }
412
413    /// Get refresh job
414    pub fn job(&self) -> Arc<RefreshJob> {
415        Arc::clone(&self.job)
416    }
417
418    /// Check if refresh is needed and pending
419    pub fn refresh_pending(&self) -> bool {
420        self.trigger.is_pending()
421    }
422
423    /// Check if automatic refresh enabled
424    pub fn is_enabled(&self) -> bool {
425        self.trigger.is_enabled()
426    }
427
428    /// Manually trigger refresh (bypass TTL check)
429    pub fn trigger_manual(&self) -> Result<(), String> {
430        if !self.trigger.is_pending() {
431            self.trigger.mark_pending();
432            Ok(())
433        } else {
434            Err("Refresh already pending".to_string())
435        }
436    }
437
438    /// Request job shutdown
439    pub fn request_shutdown(&self) {
440        self.job.request_shutdown();
441    }
442
443    // ========== REFACTOR ENHANCEMENTS ==========
444
445    /// Get time since last check
446    pub fn time_since_last_check(&self) -> Option<std::time::Duration> {
447        self.trigger
448            .last_check_time()
449            .map(|last| (Utc::now() - last).to_std().unwrap_or_default())
450    }
451
452    /// Get time since last refresh
453    pub fn time_since_last_refresh(&self) -> Option<std::time::Duration> {
454        self.trigger
455            .last_refresh_time()
456            .map(|last| (Utc::now() - last).to_std().unwrap_or_default())
457    }
458
459    /// Check if job is currently running
460    pub fn job_running(&self) -> bool {
461        self.job.status().map(|s| s == RefreshJobStatus::Running).unwrap_or(false)
462    }
463
464    /// Get job success rate percentage
465    pub fn job_success_rate_percent(&self) -> u32 {
466        self.trigger.success_rate_percent()
467    }
468
469    /// Get health status of refresh system
470    pub fn health_status(&self) -> RefreshHealthStatus {
471        let job_status = self.job.status().unwrap_or(RefreshJobStatus::Failed);
472
473        if !self.is_enabled() {
474            RefreshHealthStatus::Disabled
475        } else if self.job_running() {
476            RefreshHealthStatus::Running
477        } else if self.refresh_pending() {
478            RefreshHealthStatus::Pending
479        } else if job_status == RefreshJobStatus::Failed && self.trigger.failed_refreshes() > 2 {
480            RefreshHealthStatus::Degraded
481        } else {
482            RefreshHealthStatus::Healthy
483        }
484    }
485
486    /// Check if should retry refresh (has pending but not max retries)
487    pub fn should_retry_refresh(&self) -> bool {
488        self.refresh_pending() && self.trigger.failed_refreshes() < 5
489    }
490
491    /// Reset refresh state for retry
492    pub fn reset_for_retry(&self) {
493        // Clear pending so next check can trigger again
494        self.trigger.clear_pending();
495    }
496}
497
498/// Health status of refresh system
499#[derive(Debug, Clone, Copy, PartialEq, Eq)]
500pub enum RefreshHealthStatus {
501    /// Automatic refresh disabled
502    Disabled,
503    /// Refresh running
504    Running,
505    /// Refresh waiting to execute
506    Pending,
507    /// Refresh working but with failures
508    Degraded,
509    /// All systems healthy
510    Healthy,
511}
512
513impl Default for RefreshManager {
514    fn default() -> Self {
515        Self::new(RefreshConfig::default())
516    }
517}
518
519#[cfg(test)]
520mod tests {
521    use super::*;
522
523    #[test]
524    fn test_refresh_config_default() {
525        let config = RefreshConfig::new();
526        assert!(config.enabled);
527        assert_eq!(config.check_interval_hours, 24);
528        assert_eq!(config.refresh_threshold_percent, 80);
529    }
530
531    #[test]
532    fn test_refresh_config_builder() {
533        let config = RefreshConfig::new()
534            .with_enabled(false)
535            .with_check_interval(12)
536            .with_refresh_threshold(75);
537        assert!(!config.enabled);
538        assert_eq!(config.check_interval_hours, 12);
539        assert_eq!(config.refresh_threshold_percent, 75);
540    }
541
542    #[test]
543    fn test_refresh_trigger_creation() {
544        let trigger = RefreshTrigger::new(RefreshConfig::default());
545        assert!(trigger.is_enabled());
546        assert!(!trigger.is_pending());
547        assert_eq!(trigger.total_refreshes(), 0);
548    }
549
550    #[test]
551    fn test_refresh_trigger_should_trigger() {
552        let trigger = RefreshTrigger::new(RefreshConfig::default());
553        assert!(!trigger.should_trigger(75)); // Below threshold
554        assert!(trigger.should_trigger(80)); // At threshold
555        assert!(trigger.should_trigger(85)); // Above threshold
556    }
557
558    #[test]
559    fn test_refresh_trigger_disabled() {
560        let config = RefreshConfig::new().with_enabled(false);
561        let trigger = RefreshTrigger::new(config);
562        assert!(!trigger.should_trigger(85)); // Even above threshold
563    }
564
565    #[test]
566    fn test_refresh_trigger_mark_pending() {
567        let trigger = RefreshTrigger::new(RefreshConfig::default());
568        trigger.mark_pending();
569        assert!(trigger.is_pending());
570
571        trigger.clear_pending();
572        assert!(!trigger.is_pending());
573    }
574
575    #[test]
576    fn test_refresh_trigger_single_trigger() {
577        let trigger = RefreshTrigger::new(RefreshConfig::default());
578        assert!(trigger.should_trigger(85));
579        trigger.mark_pending();
580        assert!(!trigger.should_trigger(85)); // Won't trigger again
581    }
582
583    #[test]
584    fn test_refresh_trigger_record_success() {
585        let trigger = RefreshTrigger::new(RefreshConfig::default());
586        trigger.record_success(100);
587        assert_eq!(trigger.total_refreshes(), 1);
588        assert_eq!(trigger.failed_refreshes(), 0);
589        assert_eq!(trigger.success_rate_percent(), 100);
590    }
591
592    #[test]
593    fn test_refresh_trigger_record_failure() {
594        let trigger = RefreshTrigger::new(RefreshConfig::default());
595        trigger.record_success(100);
596        trigger.record_success(100);
597        trigger.record_failure();
598        assert_eq!(trigger.total_refreshes(), 2);
599        assert_eq!(trigger.failed_refreshes(), 1);
600        assert_eq!(trigger.success_rate_percent(), 50);
601    }
602
603    #[test]
604    fn test_refresh_job_creation() {
605        let job = RefreshJob::new();
606        assert_eq!(job.status().unwrap(), RefreshJobStatus::Idle);
607        assert!(!job.should_shutdown());
608    }
609
610    #[test]
611    fn test_refresh_job_lifecycle() {
612        let job = RefreshJob::new();
613        job.start().unwrap();
614        assert_eq!(job.status().unwrap(), RefreshJobStatus::Running);
615
616        job.complete_success().unwrap();
617        assert_eq!(job.status().unwrap(), RefreshJobStatus::Success);
618    }
619
620    #[test]
621    fn test_refresh_job_failure() {
622        let job = RefreshJob::new();
623        job.start().unwrap();
624        job.complete_failure("Vault unreachable").unwrap();
625        assert_eq!(job.status().unwrap(), RefreshJobStatus::Failed);
626        assert!(job.last_error().unwrap().is_some());
627    }
628
629    #[test]
630    fn test_refresh_job_shutdown() {
631        let job = RefreshJob::new();
632        assert!(!job.should_shutdown());
633        job.request_shutdown();
634        assert!(job.should_shutdown());
635    }
636
637    #[test]
638    fn test_refresh_manager_creation() {
639        let manager = RefreshManager::new(RefreshConfig::default());
640        assert!(manager.is_enabled());
641        assert!(!manager.refresh_pending());
642    }
643
644    #[test]
645    fn test_refresh_manager_check_and_trigger() {
646        let manager = RefreshManager::new(RefreshConfig::default());
647        assert!(!manager.check_and_trigger(75));
648        assert!(manager.check_and_trigger(80));
649        assert!(manager.refresh_pending());
650    }
651
652    #[test]
653    fn test_refresh_manager_job_lifecycle() {
654        let manager = RefreshManager::new(RefreshConfig::default());
655        assert!(manager.check_and_trigger(85));
656        manager.start_job().unwrap();
657        assert_eq!(manager.job().status().unwrap(), RefreshJobStatus::Running);
658
659        manager.complete_job_success().unwrap();
660        assert_eq!(manager.job().status().unwrap(), RefreshJobStatus::Success);
661        assert!(!manager.refresh_pending());
662    }
663
664    #[test]
665    fn test_refresh_manager_manual_trigger() {
666        let manager = RefreshManager::new(RefreshConfig::default());
667        manager.trigger_manual().unwrap();
668        assert!(manager.refresh_pending());
669    }
670
671    #[test]
672    fn test_refresh_manager_job_running() {
673        let manager = RefreshManager::new(RefreshConfig::default());
674        assert!(!manager.job_running());
675        manager.start_job().unwrap();
676        assert!(manager.job_running());
677    }
678
679    #[test]
680    fn test_refresh_manager_health_status_disabled() {
681        let config = RefreshConfig::default().with_enabled(false);
682        let manager = RefreshManager::new(config);
683        assert_eq!(manager.health_status(), RefreshHealthStatus::Disabled);
684    }
685
686    #[test]
687    fn test_refresh_manager_health_status_healthy() {
688        let manager = RefreshManager::new(RefreshConfig::default());
689        assert_eq!(manager.health_status(), RefreshHealthStatus::Healthy);
690    }
691
692    #[test]
693    fn test_refresh_manager_health_status_pending() {
694        let manager = RefreshManager::new(RefreshConfig::default());
695        manager.check_and_trigger(85);
696        assert_eq!(manager.health_status(), RefreshHealthStatus::Pending);
697    }
698
699    #[test]
700    fn test_refresh_manager_health_status_running() {
701        let manager = RefreshManager::new(RefreshConfig::default());
702        manager.check_and_trigger(85);
703        manager.start_job().unwrap();
704        assert_eq!(manager.health_status(), RefreshHealthStatus::Running);
705    }
706
707    #[test]
708    fn test_refresh_manager_should_retry() {
709        let manager = RefreshManager::new(RefreshConfig::default());
710        manager.check_and_trigger(85);
711        assert!(manager.should_retry_refresh());
712
713        // Simulate max failures
714        for _ in 0..5 {
715            manager.trigger().record_failure();
716        }
717        assert!(!manager.should_retry_refresh());
718    }
719
720    #[test]
721    fn test_refresh_manager_reset_for_retry() {
722        let manager = RefreshManager::new(RefreshConfig::default());
723        manager.check_and_trigger(85);
724        assert!(manager.refresh_pending());
725
726        manager.reset_for_retry();
727        assert!(!manager.refresh_pending());
728    }
729}