leptos_sync_core/reliability/monitoring/
alerts.rs

1//! Alert management and notification system
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::time::{Duration, SystemTime, UNIX_EPOCH};
6
7/// Alert rule definition
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct AlertRule {
10    /// Unique identifier for the rule
11    pub id: String,
12    /// Human-readable name for the rule
13    pub name: String,
14    /// Description of what this rule monitors
15    pub description: String,
16    /// Metric name to monitor
17    pub metric_name: String,
18    /// Condition that triggers the alert
19    pub condition: AlertCondition,
20    /// Severity level of the alert
21    pub severity: AlertSeverity,
22    /// Whether the rule is enabled
23    pub enabled: bool,
24    /// Cooldown period between alerts (in seconds)
25    pub cooldown_seconds: u64,
26}
27
28impl AlertRule {
29    /// Create a new alert rule
30    pub fn new(
31        id: String,
32        name: String,
33        metric_name: String,
34        condition: AlertCondition,
35        severity: AlertSeverity,
36    ) -> Self {
37        Self {
38            id,
39            name,
40            description: String::new(),
41            metric_name,
42            condition,
43            severity,
44            enabled: true,
45            cooldown_seconds: 300, // 5 minutes default
46        }
47    }
48
49    /// Set the description
50    pub fn with_description(mut self, description: String) -> Self {
51        self.description = description;
52        self
53    }
54
55    /// Set the cooldown period
56    pub fn with_cooldown(mut self, cooldown_seconds: u64) -> Self {
57        self.cooldown_seconds = cooldown_seconds;
58        self
59    }
60
61    /// Enable or disable the rule
62    pub fn set_enabled(mut self, enabled: bool) -> Self {
63        self.enabled = enabled;
64        self
65    }
66}
67
68/// Alert condition for triggering alerts
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct AlertCondition {
71    /// Comparison operator
72    pub operator: ComparisonOperator,
73    /// Threshold value
74    pub threshold: f64,
75    /// Duration the condition must be true before triggering (in seconds)
76    pub duration_seconds: u64,
77}
78
79impl AlertCondition {
80    /// Create a new alert condition
81    pub fn new(operator: ComparisonOperator, threshold: f64, duration_seconds: u64) -> Self {
82        Self {
83            operator,
84            threshold,
85            duration_seconds,
86        }
87    }
88
89    /// Check if a value satisfies this condition
90    pub fn is_satisfied(&self, value: f64) -> bool {
91        match self.operator {
92            ComparisonOperator::GreaterThan => value > self.threshold,
93            ComparisonOperator::GreaterThanOrEqual => value >= self.threshold,
94            ComparisonOperator::LessThan => value < self.threshold,
95            ComparisonOperator::LessThanOrEqual => value <= self.threshold,
96            ComparisonOperator::Equal => (value - self.threshold).abs() < f64::EPSILON,
97            ComparisonOperator::NotEqual => (value - self.threshold).abs() >= f64::EPSILON,
98        }
99    }
100}
101
102/// Comparison operators for alert conditions
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub enum ComparisonOperator {
105    /// Greater than
106    GreaterThan,
107    /// Greater than or equal
108    GreaterThanOrEqual,
109    /// Less than
110    LessThan,
111    /// Less than or equal
112    LessThanOrEqual,
113    /// Equal to
114    Equal,
115    /// Not equal to
116    NotEqual,
117}
118
119/// Alert severity levels
120#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
121pub enum AlertSeverity {
122    /// Low severity - informational
123    Low,
124    /// Medium severity - warning
125    Medium,
126    /// High severity - error
127    High,
128    /// Critical severity - system failure
129    Critical,
130}
131
132/// An active alert
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct Alert {
135    /// Unique identifier for the alert
136    pub id: String,
137    /// Rule that triggered this alert
138    pub rule_id: String,
139    /// Alert severity
140    pub severity: AlertSeverity,
141    /// Alert message
142    pub message: String,
143    /// Timestamp when the alert was triggered
144    pub triggered_at: u64,
145    /// Timestamp when the alert was resolved (if resolved)
146    pub resolved_at: Option<u64>,
147    /// Whether the alert is currently active
148    pub is_active: bool,
149    /// Additional context data
150    pub context: HashMap<String, String>,
151}
152
153impl Alert {
154    /// Create a new alert
155    pub fn new(
156        id: String,
157        rule_id: String,
158        severity: AlertSeverity,
159        message: String,
160    ) -> Self {
161        Self {
162            id,
163            rule_id,
164            severity,
165            message,
166            triggered_at: SystemTime::now()
167                .duration_since(UNIX_EPOCH)
168                .unwrap()
169                .as_secs(),
170            resolved_at: None,
171            is_active: true,
172            context: HashMap::new(),
173        }
174    }
175
176    /// Add context data to the alert
177    pub fn with_context(mut self, context: HashMap<String, String>) -> Self {
178        self.context = context;
179        self
180    }
181
182    /// Resolve the alert
183    pub fn resolve(&mut self) {
184        self.is_active = false;
185        self.resolved_at = Some(
186            SystemTime::now()
187                .duration_since(UNIX_EPOCH)
188                .unwrap()
189                .as_secs(),
190        );
191    }
192
193    /// Get the duration the alert has been active
194    pub fn duration(&self) -> Duration {
195        let end_time = self.resolved_at.unwrap_or_else(|| {
196            SystemTime::now()
197                .duration_since(UNIX_EPOCH)
198                .unwrap()
199                .as_secs()
200        });
201        Duration::from_secs(end_time - self.triggered_at)
202    }
203}
204
205/// Alert manager for handling alert rules and notifications
206#[derive(Debug, Clone)]
207pub struct AlertManager {
208    /// Alert rules
209    rules: HashMap<String, AlertRule>,
210    /// Active alerts
211    active_alerts: HashMap<String, Alert>,
212    /// Alert history
213    alert_history: Vec<Alert>,
214    /// Maximum number of alerts to keep in history
215    max_history_size: usize,
216    /// Last trigger times for cooldown tracking
217    last_trigger_times: HashMap<String, u64>,
218}
219
220impl AlertManager {
221    /// Create a new alert manager
222    pub fn new() -> Self {
223        Self {
224            rules: HashMap::new(),
225            active_alerts: HashMap::new(),
226            alert_history: Vec::new(),
227            max_history_size: 1000,
228            last_trigger_times: HashMap::new(),
229        }
230    }
231
232    /// Create an alert manager with configuration
233    pub fn with_config(config: AlertConfig) -> Self {
234        Self {
235            rules: HashMap::new(),
236            active_alerts: HashMap::new(),
237            alert_history: Vec::new(),
238            max_history_size: config.max_history_size,
239            last_trigger_times: HashMap::new(),
240        }
241    }
242
243    /// Add an alert rule
244    pub fn add_rule(&mut self, rule: AlertRule) {
245        self.rules.insert(rule.id.clone(), rule);
246    }
247
248    /// Remove an alert rule
249    pub fn remove_rule(&mut self, rule_id: &str) {
250        self.rules.remove(rule_id);
251        // Also remove any active alerts for this rule
252        self.active_alerts.retain(|_, alert| alert.rule_id != rule_id);
253    }
254
255    /// Update an alert rule
256    pub fn update_rule(&mut self, rule: AlertRule) {
257        self.rules.insert(rule.id.clone(), rule);
258    }
259
260    /// Get all alert rules
261    pub fn get_rules(&self) -> Vec<&AlertRule> {
262        self.rules.values().collect()
263    }
264
265    /// Get a specific alert rule
266    pub fn get_rule(&self, rule_id: &str) -> Option<&AlertRule> {
267        self.rules.get(rule_id)
268    }
269
270    /// Check if a metric value should trigger an alert
271    pub fn check_metric(&mut self, metric_name: &str, value: f64) -> Vec<Alert> {
272        let mut new_alerts = Vec::new();
273        let current_time = SystemTime::now()
274            .duration_since(UNIX_EPOCH)
275            .unwrap()
276            .as_secs();
277
278        // Find rules that match this metric
279        let rule_ids: Vec<String> = self.rules.keys().cloned().collect();
280        let mut rules_to_resolve = Vec::new();
281        
282        for rule_id in rule_ids {
283            if let Some(rule) = self.rules.get(&rule_id) {
284                if !rule.enabled || rule.metric_name != metric_name {
285                    continue;
286                }
287
288                // Check cooldown
289                if let Some(last_trigger) = self.last_trigger_times.get(&rule.id) {
290                    if current_time - last_trigger < rule.cooldown_seconds {
291                        continue;
292                    }
293                }
294
295                // Check if condition is satisfied
296                if rule.condition.is_satisfied(value) {
297                    // Check if there's already an active alert for this rule
298                    let has_active_alert = self.active_alerts.values()
299                        .any(|alert| alert.rule_id == rule.id && alert.is_active);
300
301                    if !has_active_alert {
302                        // Create new alert
303                        let alert_id = format!("{}_{}", rule.id, current_time);
304                        let message = format!(
305                            "Alert triggered: {} (value: {}, threshold: {})",
306                            rule.name, value, rule.condition.threshold
307                        );
308
309                        let mut alert = Alert::new(
310                            alert_id,
311                            rule.id.clone(),
312                            rule.severity.clone(),
313                            message,
314                        );
315
316                        // Add context
317                        let mut context = HashMap::new();
318                        context.insert("metric_name".to_string(), metric_name.to_string());
319                        context.insert("value".to_string(), value.to_string());
320                        context.insert("threshold".to_string(), rule.condition.threshold.to_string());
321                        alert.context = context;
322
323                        self.active_alerts.insert(alert.id.clone(), alert.clone());
324                        self.alert_history.push(alert.clone());
325                        self.last_trigger_times.insert(rule.id.clone(), current_time);
326                        new_alerts.push(alert);
327                    }
328                } else {
329                    // Condition not satisfied, mark for resolution
330                    rules_to_resolve.push(rule.id.clone());
331                }
332            }
333        }
334        
335        // Resolve alerts for rules that no longer match
336        for rule_id in rules_to_resolve {
337            self.resolve_alerts_for_rule(&rule_id);
338        }
339
340        // Clean up old history
341        self.cleanup_history();
342
343        new_alerts
344    }
345
346    /// Get all active alerts
347    pub fn get_active_alerts(&self) -> Vec<&Alert> {
348        self.active_alerts.values().filter(|alert| alert.is_active).collect()
349    }
350
351    /// Get alerts by severity
352    pub fn get_alerts_by_severity(&self, severity: &AlertSeverity) -> Vec<&Alert> {
353        self.active_alerts
354            .values()
355            .filter(|alert| alert.is_active && &alert.severity == severity)
356            .collect()
357    }
358
359    /// Resolve an alert by ID
360    pub fn resolve_alert(&mut self, alert_id: &str) -> bool {
361        if let Some(alert) = self.active_alerts.get_mut(alert_id) {
362            alert.resolve();
363            true
364        } else {
365            false
366        }
367    }
368
369    /// Resolve all alerts for a specific rule
370    fn resolve_alerts_for_rule(&mut self, rule_id: &str) {
371        for alert in self.active_alerts.values_mut() {
372            if alert.rule_id == rule_id && alert.is_active {
373                alert.resolve();
374            }
375        }
376    }
377
378    /// Get alert history
379    pub fn get_alert_history(&self, limit: Option<usize>) -> Vec<&Alert> {
380        let limit = limit.unwrap_or(self.alert_history.len());
381        self.alert_history
382            .iter()
383            .rev()
384            .take(limit)
385            .collect()
386    }
387
388    /// Clear alert history
389    pub fn clear_history(&mut self) {
390        self.alert_history.clear();
391    }
392
393    /// Clean up old history entries
394    fn cleanup_history(&mut self) {
395        if self.alert_history.len() > self.max_history_size {
396            let excess = self.alert_history.len() - self.max_history_size;
397            self.alert_history.drain(0..excess);
398        }
399    }
400
401    /// Get alert statistics
402    pub fn get_stats(&self) -> AlertStats {
403        let active_count = self.active_alerts.values().filter(|a| a.is_active).count();
404        let critical_count = self.get_alerts_by_severity(&AlertSeverity::Critical).len();
405        let high_count = self.get_alerts_by_severity(&AlertSeverity::High).len();
406        let medium_count = self.get_alerts_by_severity(&AlertSeverity::Medium).len();
407        let low_count = self.get_alerts_by_severity(&AlertSeverity::Low).len();
408
409        AlertStats {
410            total_rules: self.rules.len(),
411            active_alerts: active_count,
412            critical_alerts: critical_count,
413            high_alerts: high_count,
414            medium_alerts: medium_count,
415            low_alerts: low_count,
416            total_history: self.alert_history.len(),
417        }
418    }
419}
420
421impl Default for AlertManager {
422    fn default() -> Self {
423        Self::new()
424    }
425}
426
427/// Alert statistics
428#[derive(Debug, Clone, Serialize, Deserialize)]
429pub struct AlertStats {
430    /// Total number of alert rules
431    pub total_rules: usize,
432    /// Number of active alerts
433    pub active_alerts: usize,
434    /// Number of critical alerts
435    pub critical_alerts: usize,
436    /// Number of high severity alerts
437    pub high_alerts: usize,
438    /// Number of medium severity alerts
439    pub medium_alerts: usize,
440    /// Number of low severity alerts
441    pub low_alerts: usize,
442    /// Total number of alerts in history
443    pub total_history: usize,
444}
445
446/// Configuration for alert management
447#[derive(Debug, Clone, Serialize, Deserialize)]
448pub struct AlertConfig {
449    /// Maximum number of alerts to keep in history
450    pub max_history_size: usize,
451}
452
453impl Default for AlertConfig {
454    fn default() -> Self {
455        Self {
456            max_history_size: 1000,
457        }
458    }
459}
460
461#[cfg(test)]
462mod tests {
463    use super::*;
464
465    #[test]
466    fn test_alert_rule_creation() {
467        let condition = AlertCondition::new(ComparisonOperator::GreaterThan, 80.0, 60);
468        let rule = AlertRule::new(
469            "cpu_high".to_string(),
470            "High CPU Usage".to_string(),
471            "cpu_usage".to_string(),
472            condition,
473            AlertSeverity::High,
474        );
475
476        assert_eq!(rule.id, "cpu_high");
477        assert_eq!(rule.metric_name, "cpu_usage");
478        assert!(rule.enabled);
479    }
480
481    #[test]
482    fn test_alert_condition() {
483        let condition = AlertCondition::new(ComparisonOperator::GreaterThan, 80.0, 60);
484        
485        assert!(condition.is_satisfied(85.0));
486        assert!(!condition.is_satisfied(75.0));
487        assert!(!condition.is_satisfied(80.0));
488    }
489
490    #[test]
491    fn test_alert_creation() {
492        let alert = Alert::new(
493            "alert_1".to_string(),
494            "rule_1".to_string(),
495            AlertSeverity::High,
496            "Test alert".to_string(),
497        );
498
499        assert_eq!(alert.id, "alert_1");
500        assert!(alert.is_active);
501        assert!(alert.resolved_at.is_none());
502    }
503
504    #[test]
505    fn test_alert_resolution() {
506        let mut alert = Alert::new(
507            "alert_1".to_string(),
508            "rule_1".to_string(),
509            AlertSeverity::High,
510            "Test alert".to_string(),
511        );
512
513        assert!(alert.is_active);
514        alert.resolve();
515        assert!(!alert.is_active);
516        assert!(alert.resolved_at.is_some());
517    }
518
519    #[test]
520    fn test_alert_manager() {
521        let mut manager = AlertManager::new();
522        
523        // Add a rule
524        let condition = AlertCondition::new(ComparisonOperator::GreaterThan, 80.0, 60);
525        let rule = AlertRule::new(
526            "cpu_high".to_string(),
527            "High CPU Usage".to_string(),
528            "cpu_usage".to_string(),
529            condition,
530            AlertSeverity::High,
531        );
532        manager.add_rule(rule);
533
534        // Check metric that should trigger alert
535        let alerts = manager.check_metric("cpu_usage", 85.0);
536        assert_eq!(alerts.len(), 1);
537        assert_eq!(alerts[0].severity, AlertSeverity::High);
538
539        // Check that we have an active alert
540        let active_alerts = manager.get_active_alerts();
541        assert_eq!(active_alerts.len(), 1);
542
543        // Check metric that should not trigger alert
544        let alerts = manager.check_metric("cpu_usage", 75.0);
545        assert_eq!(alerts.len(), 0);
546
547        // Check that the alert was resolved
548        let active_alerts = manager.get_active_alerts();
549        assert_eq!(active_alerts.len(), 0);
550    }
551
552    #[test]
553    fn test_alert_cooldown() {
554        let mut manager = AlertManager::new();
555        
556        let condition = AlertCondition::new(ComparisonOperator::GreaterThan, 80.0, 60);
557        let rule = AlertRule::new(
558            "cpu_high".to_string(),
559            "High CPU Usage".to_string(),
560            "cpu_usage".to_string(),
561            condition,
562            AlertSeverity::High,
563        ).with_cooldown(300); // 5 minutes
564        manager.add_rule(rule);
565
566        // First trigger should create alert
567        let alerts = manager.check_metric("cpu_usage", 85.0);
568        assert_eq!(alerts.len(), 1);
569
570        // Second trigger within cooldown should not create alert
571        let alerts = manager.check_metric("cpu_usage", 90.0);
572        assert_eq!(alerts.len(), 0);
573    }
574}