pulseengine_mcp_logging/
alerting.rs

1//! Alerting and notification system for MCP servers
2//!
3//! This module provides:
4//! - Configurable alert rules and thresholds
5//! - Multiple notification channels (email, webhook, Slack, etc.)
6//! - Alert de-duplication and escalation
7//! - Alert history and acknowledgment
8//! - Integration with metrics system
9
10use crate::metrics::MetricsSnapshot;
11use chrono::{DateTime, Utc};
12use serde::{Deserialize, Serialize};
13use std::collections::{HashMap, HashSet};
14use std::sync::Arc;
15use std::time::Duration;
16use tokio::sync::{RwLock, mpsc};
17use tracing::{error, info, warn};
18use uuid::Uuid;
19
20/// Alert severity levels
21#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
22#[serde(rename_all = "lowercase")]
23pub enum AlertSeverity {
24    Critical,
25    High,
26    Medium,
27    Low,
28    Info,
29}
30
31/// Alert states
32#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
33#[serde(rename_all = "lowercase")]
34pub enum AlertState {
35    Active,
36    Acknowledged,
37    Resolved,
38    Suppressed,
39}
40
41/// Alert rule configuration
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct AlertRule {
44    /// Unique rule ID
45    pub id: String,
46
47    /// Human-readable name
48    pub name: String,
49
50    /// Description of what triggers this alert
51    pub description: String,
52
53    /// Metric to monitor
54    pub metric: MetricType,
55
56    /// Comparison operator
57    pub operator: ComparisonOperator,
58
59    /// Threshold value
60    pub threshold: f64,
61
62    /// Duration the condition must persist before alerting
63    pub duration_secs: u64,
64
65    /// Alert severity
66    pub severity: AlertSeverity,
67
68    /// Enable/disable this rule
69    pub enabled: bool,
70
71    /// Notification channels to use
72    pub channels: Vec<String>,
73
74    /// Custom labels for this alert
75    pub labels: HashMap<String, String>,
76
77    /// Suppress similar alerts for this duration
78    pub suppress_duration_secs: u64,
79}
80
81/// Types of metrics that can be monitored
82#[derive(Debug, Clone, Serialize, Deserialize)]
83#[serde(rename_all = "snake_case")]
84pub enum MetricType {
85    ErrorRate,
86    ResponseTime,
87    RequestCount,
88    MemoryUsage,
89    CpuUsage,
90    DiskUsage,
91    ActiveConnections,
92    HealthCheckFailures,
93    Custom(String),
94}
95
96/// Comparison operators for thresholds
97#[derive(Debug, Clone, Serialize, Deserialize)]
98#[serde(rename_all = "snake_case")]
99pub enum ComparisonOperator {
100    GreaterThan,
101    GreaterThanOrEqual,
102    LessThan,
103    LessThanOrEqual,
104    Equal,
105    NotEqual,
106}
107
108/// Alert instance
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct Alert {
111    /// Unique alert ID
112    pub id: Uuid,
113
114    /// Rule that triggered this alert
115    pub rule_id: String,
116
117    /// Alert message
118    pub message: String,
119
120    /// Alert severity
121    pub severity: AlertSeverity,
122
123    /// Current state
124    pub state: AlertState,
125
126    /// When the alert was first triggered
127    pub triggered_at: DateTime<Utc>,
128
129    /// When the alert was last updated
130    pub updated_at: DateTime<Utc>,
131
132    /// When the alert was acknowledged (if applicable)
133    pub acknowledged_at: Option<DateTime<Utc>>,
134
135    /// Who acknowledged the alert
136    pub acknowledged_by: Option<String>,
137
138    /// When the alert was resolved (if applicable)
139    pub resolved_at: Option<DateTime<Utc>>,
140
141    /// Current metric value that triggered the alert
142    pub current_value: f64,
143
144    /// Threshold that was exceeded
145    pub threshold: f64,
146
147    /// Labels associated with this alert
148    pub labels: HashMap<String, String>,
149
150    /// Number of times this alert has been triggered
151    pub trigger_count: u64,
152
153    /// Last notification sent timestamp
154    pub last_notification_at: Option<DateTime<Utc>>,
155}
156
157/// Notification channel types
158#[derive(Debug, Clone, Serialize, Deserialize)]
159#[serde(tag = "type")]
160pub enum NotificationChannel {
161    Email {
162        smtp_server: String,
163        smtp_port: u16,
164        username: String,
165        password: String,
166        from_address: String,
167        to_addresses: Vec<String>,
168        use_tls: bool,
169    },
170    Webhook {
171        url: String,
172        method: String,
173        headers: HashMap<String, String>,
174        template: String,
175        timeout_secs: u64,
176    },
177    Slack {
178        webhook_url: String,
179        channel: String,
180        username: Option<String>,
181        icon_emoji: Option<String>,
182    },
183    PagerDuty {
184        integration_key: String,
185        service_name: String,
186    },
187    Console {
188        use_colors: bool,
189    },
190}
191
192/// Alert manager configuration
193#[derive(Debug, Clone, Serialize, Deserialize)]
194pub struct AlertConfig {
195    /// Enable alerting system
196    pub enabled: bool,
197
198    /// Alert rules
199    pub rules: Vec<AlertRule>,
200
201    /// Notification channels
202    pub channels: HashMap<String, NotificationChannel>,
203
204    /// Default notification channels
205    pub default_channels: Vec<String>,
206
207    /// Alert evaluation interval in seconds
208    pub evaluation_interval_secs: u64,
209
210    /// Maximum number of active alerts to keep
211    pub max_active_alerts: usize,
212
213    /// Maximum number of resolved alerts to keep in history
214    pub max_alert_history: usize,
215
216    /// Enable alert de-duplication
217    pub deduplication_enabled: bool,
218
219    /// Re-notification interval for unacknowledged alerts
220    pub renotification_interval_secs: u64,
221}
222
223/// Alert manager
224pub struct AlertManager {
225    config: AlertConfig,
226    active_alerts: Arc<RwLock<HashMap<Uuid, Alert>>>,
227    alert_history: Arc<RwLock<HashMap<Uuid, Alert>>>,
228    rule_states: Arc<RwLock<HashMap<String, RuleState>>>,
229    suppressed_alerts: Arc<RwLock<HashSet<String>>>,
230    notification_tx: mpsc::Sender<NotificationRequest>,
231    notification_rx: Arc<RwLock<mpsc::Receiver<NotificationRequest>>>,
232}
233
234/// Internal rule state tracking
235#[derive(Debug, Clone)]
236struct RuleState {
237    condition_start: Option<DateTime<Utc>>,
238    last_evaluation: DateTime<Utc>,
239    consecutive_failures: u32,
240}
241
242/// Notification request
243#[derive(Debug, Clone)]
244struct NotificationRequest {
245    alert: Alert,
246    channels: Vec<String>,
247    #[allow(dead_code)]
248    is_resolved: bool,
249}
250
251impl AlertManager {
252    /// Create a new alert manager
253    pub fn new(config: AlertConfig) -> Self {
254        let (notification_tx, notification_rx) = mpsc::channel(1000);
255
256        Self {
257            config,
258            active_alerts: Arc::new(RwLock::new(HashMap::new())),
259            alert_history: Arc::new(RwLock::new(HashMap::new())),
260            rule_states: Arc::new(RwLock::new(HashMap::new())),
261            suppressed_alerts: Arc::new(RwLock::new(HashSet::new())),
262            notification_tx,
263            notification_rx: Arc::new(RwLock::new(notification_rx)),
264        }
265    }
266
267    /// Start the alert manager
268    pub async fn start(&self) {
269        if !self.config.enabled {
270            info!("Alert manager is disabled");
271            return;
272        }
273
274        info!("Starting alert manager");
275
276        // Start evaluation loop
277        let active_alerts = self.active_alerts.clone();
278        let alert_history = self.alert_history.clone();
279        let rule_states = self.rule_states.clone();
280        let suppressed_alerts = self.suppressed_alerts.clone();
281        let notification_tx = self.notification_tx.clone();
282        let config = self.config.clone();
283
284        tokio::spawn(async move {
285            Self::evaluation_loop(
286                active_alerts,
287                alert_history,
288                rule_states,
289                suppressed_alerts,
290                notification_tx,
291                config,
292            )
293            .await;
294        });
295
296        // Start notification handler
297        let notification_rx = self.notification_rx.clone();
298        let config = self.config.clone();
299
300        tokio::spawn(async move {
301            Self::notification_loop(notification_rx, config).await;
302        });
303
304        // Start cleanup tasks
305        self.start_cleanup_tasks().await;
306    }
307
308    /// Main evaluation loop
309    async fn evaluation_loop(
310        active_alerts: Arc<RwLock<HashMap<Uuid, Alert>>>,
311        alert_history: Arc<RwLock<HashMap<Uuid, Alert>>>,
312        rule_states: Arc<RwLock<HashMap<String, RuleState>>>,
313        suppressed_alerts: Arc<RwLock<HashSet<String>>>,
314        notification_tx: mpsc::Sender<NotificationRequest>,
315        config: AlertConfig,
316    ) {
317        let mut interval =
318            tokio::time::interval(Duration::from_secs(config.evaluation_interval_secs));
319
320        loop {
321            interval.tick().await;
322
323            // Get current metrics
324            let metrics = crate::metrics::get_metrics().get_metrics_snapshot().await;
325
326            // Evaluate each rule
327            for rule in &config.rules {
328                if !rule.enabled {
329                    continue;
330                }
331
332                if let Err(e) = Self::evaluate_rule(
333                    rule,
334                    &metrics,
335                    &active_alerts,
336                    &alert_history,
337                    &rule_states,
338                    &suppressed_alerts,
339                    &notification_tx,
340                    &config,
341                )
342                .await
343                {
344                    error!("Error evaluating rule {}: {}", rule.id, e);
345                }
346            }
347
348            // Check for resolved alerts
349            Self::check_resolved_alerts(&active_alerts, &alert_history, &notification_tx, &config)
350                .await;
351
352            // Send re-notifications for unacknowledged alerts
353            Self::send_renotifications(&active_alerts, &notification_tx, &config).await;
354        }
355    }
356
357    /// Evaluate a single rule
358    #[allow(clippy::too_many_arguments)]
359    async fn evaluate_rule(
360        rule: &AlertRule,
361        metrics: &MetricsSnapshot,
362        active_alerts: &Arc<RwLock<HashMap<Uuid, Alert>>>,
363        alert_history: &Arc<RwLock<HashMap<Uuid, Alert>>>,
364        rule_states: &Arc<RwLock<HashMap<String, RuleState>>>,
365        suppressed_alerts: &Arc<RwLock<HashSet<String>>>,
366        notification_tx: &mpsc::Sender<NotificationRequest>,
367        config: &AlertConfig,
368    ) -> Result<(), AlertError> {
369        let current_value = Self::extract_metric_value(rule, metrics);
370        let condition_met = Self::evaluate_condition(rule, current_value);
371
372        let mut states = rule_states.write().await;
373        let rule_state = states.entry(rule.id.clone()).or_insert_with(|| RuleState {
374            condition_start: None,
375            last_evaluation: Utc::now(),
376            consecutive_failures: 0,
377        });
378
379        rule_state.last_evaluation = Utc::now();
380
381        if condition_met {
382            rule_state.consecutive_failures += 1;
383
384            if rule_state.condition_start.is_none() {
385                rule_state.condition_start = Some(Utc::now());
386            }
387
388            // Check if condition has persisted long enough
389            if let Some(start_time) = rule_state.condition_start {
390                let duration = Utc::now().signed_duration_since(start_time);
391                if duration.num_seconds() >= rule.duration_secs as i64 {
392                    // Trigger alert
393                    Self::trigger_alert(
394                        rule,
395                        current_value,
396                        active_alerts,
397                        alert_history,
398                        suppressed_alerts,
399                        notification_tx,
400                        config,
401                    )
402                    .await?;
403                }
404            }
405        } else {
406            rule_state.consecutive_failures = 0;
407            rule_state.condition_start = None;
408        }
409
410        Ok(())
411    }
412
413    /// Extract metric value from snapshot
414    fn extract_metric_value(rule: &AlertRule, metrics: &MetricsSnapshot) -> f64 {
415        match &rule.metric {
416            MetricType::ErrorRate => metrics.error_metrics.error_rate_5min,
417            MetricType::ResponseTime => metrics.request_metrics.avg_response_time_ms,
418            MetricType::RequestCount => metrics.request_metrics.total_requests as f64,
419            MetricType::MemoryUsage => metrics.health_metrics.memory_usage_mb.unwrap_or(0.0),
420            MetricType::CpuUsage => metrics.health_metrics.cpu_usage_percent.unwrap_or(0.0),
421            MetricType::DiskUsage => metrics.health_metrics.disk_usage_percent.unwrap_or(0.0),
422            MetricType::ActiveConnections => {
423                metrics.health_metrics.connection_pool_active.unwrap_or(0) as f64
424            }
425            MetricType::HealthCheckFailures => {
426                if metrics.health_metrics.last_health_check_success {
427                    0.0
428                } else {
429                    1.0
430                }
431            }
432            MetricType::Custom(_) => 0.0, // TODO: Support custom metrics
433        }
434    }
435
436    /// Evaluate condition against threshold
437    fn evaluate_condition(rule: &AlertRule, current_value: f64) -> bool {
438        match rule.operator {
439            ComparisonOperator::GreaterThan => current_value > rule.threshold,
440            ComparisonOperator::GreaterThanOrEqual => current_value >= rule.threshold,
441            ComparisonOperator::LessThan => current_value < rule.threshold,
442            ComparisonOperator::LessThanOrEqual => current_value <= rule.threshold,
443            ComparisonOperator::Equal => (current_value - rule.threshold).abs() < f64::EPSILON,
444            ComparisonOperator::NotEqual => (current_value - rule.threshold).abs() >= f64::EPSILON,
445        }
446    }
447
448    /// Trigger an alert
449    async fn trigger_alert(
450        rule: &AlertRule,
451        current_value: f64,
452        active_alerts: &Arc<RwLock<HashMap<Uuid, Alert>>>,
453        _alert_history: &Arc<RwLock<HashMap<Uuid, Alert>>>,
454        suppressed_alerts: &Arc<RwLock<HashSet<String>>>,
455        notification_tx: &mpsc::Sender<NotificationRequest>,
456        config: &AlertConfig,
457    ) -> Result<(), AlertError> {
458        // Check if this alert is suppressed
459        let suppression_key = format!("{}:{}", rule.id, rule.threshold);
460        {
461            let suppressed = suppressed_alerts.read().await;
462            if suppressed.contains(&suppression_key) {
463                return Ok(());
464            }
465        }
466
467        // Create alert
468        let alert = Alert {
469            id: Uuid::new_v4(),
470            rule_id: rule.id.clone(),
471            message: Self::format_alert_message(rule, current_value),
472            severity: rule.severity.clone(),
473            state: AlertState::Active,
474            triggered_at: Utc::now(),
475            updated_at: Utc::now(),
476            acknowledged_at: None,
477            acknowledged_by: None,
478            resolved_at: None,
479            current_value,
480            threshold: rule.threshold,
481            labels: rule.labels.clone(),
482            trigger_count: 1,
483            last_notification_at: None,
484        };
485
486        // Add to active alerts
487        let mut active = active_alerts.write().await;
488
489        // Check capacity
490        if active.len() >= config.max_active_alerts {
491            warn!("Active alerts at capacity, removing oldest");
492            if let Some(oldest_id) = active.keys().next().cloned() {
493                active.remove(&oldest_id);
494            }
495        }
496
497        active.insert(alert.id, alert.clone());
498
499        // Send notification
500        let channels = if rule.channels.is_empty() {
501            config.default_channels.clone()
502        } else {
503            rule.channels.clone()
504        };
505
506        let alert_id = alert.id;
507        let notification = NotificationRequest {
508            alert,
509            channels,
510            is_resolved: false,
511        };
512
513        if let Err(e) = notification_tx.send(notification).await {
514            error!("Failed to send notification: {}", e);
515        }
516
517        // Add to suppression list
518        if rule.suppress_duration_secs > 0 {
519            let mut suppressed = suppressed_alerts.write().await;
520            suppressed.insert(suppression_key.clone());
521
522            // Remove from suppression after duration
523            let suppressed_clone = suppressed_alerts.clone();
524            let suppress_duration = rule.suppress_duration_secs;
525            tokio::spawn(async move {
526                tokio::time::sleep(Duration::from_secs(suppress_duration)).await;
527                let mut suppressed = suppressed_clone.write().await;
528                suppressed.remove(&suppression_key);
529            });
530        }
531
532        info!("Alert triggered: {} ({})", rule.name, alert_id);
533
534        Ok(())
535    }
536
537    /// Format alert message
538    fn format_alert_message(rule: &AlertRule, current_value: f64) -> String {
539        format!(
540            "{}: {} is {} {} (current: {:.2})",
541            rule.name, rule.metric, rule.operator, rule.threshold, current_value
542        )
543    }
544
545    /// Check for resolved alerts
546    async fn check_resolved_alerts(
547        _active_alerts: &Arc<RwLock<HashMap<Uuid, Alert>>>,
548        _alert_history: &Arc<RwLock<HashMap<Uuid, Alert>>>,
549        _notification_tx: &mpsc::Sender<NotificationRequest>,
550        _config: &AlertConfig,
551    ) {
552        // TODO: Implement resolution logic based on metrics
553        // For now, this is a placeholder
554    }
555
556    /// Send re-notifications for unacknowledged alerts
557    async fn send_renotifications(
558        active_alerts: &Arc<RwLock<HashMap<Uuid, Alert>>>,
559        notification_tx: &mpsc::Sender<NotificationRequest>,
560        config: &AlertConfig,
561    ) {
562        let renotify_threshold =
563            Utc::now() - chrono::Duration::seconds(config.renotification_interval_secs as i64);
564
565        let active = active_alerts.read().await;
566        for alert in active.values() {
567            if alert.state == AlertState::Active {
568                let should_renotify = if let Some(last_notif) = alert.last_notification_at {
569                    last_notif < renotify_threshold
570                } else {
571                    alert.triggered_at < renotify_threshold
572                };
573
574                if should_renotify {
575                    let notification = NotificationRequest {
576                        alert: alert.clone(),
577                        channels: config.default_channels.clone(),
578                        is_resolved: false,
579                    };
580
581                    if let Err(e) = notification_tx.send(notification).await {
582                        error!("Failed to send re-notification: {}", e);
583                    }
584                }
585            }
586        }
587    }
588
589    /// Notification processing loop
590    async fn notification_loop(
591        notification_rx: Arc<RwLock<mpsc::Receiver<NotificationRequest>>>,
592        config: AlertConfig,
593    ) {
594        let mut rx = notification_rx.write().await;
595
596        while let Some(notification) = rx.recv().await {
597            for channel_id in &notification.channels {
598                if let Some(channel) = config.channels.get(channel_id) {
599                    if let Err(e) = Self::send_notification(channel, &notification).await {
600                        error!("Failed to send notification to {}: {}", channel_id, e);
601                    }
602                }
603            }
604        }
605    }
606
607    /// Send notification to a specific channel
608    async fn send_notification(
609        channel: &NotificationChannel,
610        notification: &NotificationRequest,
611    ) -> Result<(), AlertError> {
612        match channel {
613            NotificationChannel::Console { use_colors } => {
614                let message = if *use_colors {
615                    format!("\x1b[31m[ALERT]\x1b[0m {}", notification.alert.message)
616                } else {
617                    format!("[ALERT] {}", notification.alert.message)
618                };
619                println!("{message}");
620            }
621            NotificationChannel::Webhook { url, .. } => {
622                info!("Sending webhook notification to {}", url);
623                // TODO: Implement webhook sending
624            }
625            NotificationChannel::Email { .. } => {
626                info!("Sending email notification");
627                // TODO: Implement email sending
628            }
629            NotificationChannel::Slack { webhook_url, .. } => {
630                info!("Sending Slack notification to {}", webhook_url);
631                // TODO: Implement Slack notification
632            }
633            NotificationChannel::PagerDuty { .. } => {
634                info!("Sending PagerDuty notification");
635                // TODO: Implement PagerDuty notification
636            }
637        }
638
639        Ok(())
640    }
641
642    /// Start cleanup tasks
643    async fn start_cleanup_tasks(&self) {
644        let alert_history = self.alert_history.clone();
645        let config = self.config.clone();
646
647        tokio::spawn(async move {
648            let mut interval = tokio::time::interval(Duration::from_secs(3600)); // Cleanup every hour
649
650            loop {
651                interval.tick().await;
652
653                let mut history = alert_history.write().await;
654                if history.len() > config.max_alert_history {
655                    // Remove oldest alerts
656                    let mut alerts: Vec<_> = history.values().cloned().collect();
657                    alerts.sort_by(|a, b| a.triggered_at.cmp(&b.triggered_at));
658
659                    let to_remove = alerts.len() - config.max_alert_history;
660                    for alert in alerts.iter().take(to_remove) {
661                        history.remove(&alert.id);
662                    }
663                }
664            }
665        });
666    }
667
668    /// Get active alerts
669    pub async fn get_active_alerts(&self) -> Vec<Alert> {
670        let active = self.active_alerts.read().await;
671        active.values().cloned().collect()
672    }
673
674    /// Get alert history
675    pub async fn get_alert_history(&self) -> Vec<Alert> {
676        let history = self.alert_history.read().await;
677        history.values().cloned().collect()
678    }
679
680    /// Acknowledge an alert
681    pub async fn acknowledge_alert(
682        &self,
683        alert_id: Uuid,
684        acknowledged_by: String,
685    ) -> Result<(), AlertError> {
686        let mut active = self.active_alerts.write().await;
687
688        if let Some(alert) = active.get_mut(&alert_id) {
689            alert.state = AlertState::Acknowledged;
690            alert.acknowledged_at = Some(Utc::now());
691            alert.acknowledged_by = Some(acknowledged_by);
692            alert.updated_at = Utc::now();
693
694            info!("Alert {} acknowledged", alert_id);
695            Ok(())
696        } else {
697            Err(AlertError::AlertNotFound(alert_id))
698        }
699    }
700
701    /// Resolve an alert
702    pub async fn resolve_alert(&self, alert_id: Uuid) -> Result<(), AlertError> {
703        let mut active = self.active_alerts.write().await;
704
705        if let Some(mut alert) = active.remove(&alert_id) {
706            alert.state = AlertState::Resolved;
707            alert.resolved_at = Some(Utc::now());
708            alert.updated_at = Utc::now();
709
710            // Move to history
711            let mut history = self.alert_history.write().await;
712            history.insert(alert_id, alert.clone());
713
714            // Send resolved notification
715            let notification = NotificationRequest {
716                alert,
717                channels: self.config.default_channels.clone(),
718                is_resolved: true,
719            };
720
721            if let Err(e) = self.notification_tx.send(notification).await {
722                error!("Failed to send resolved notification: {}", e);
723            }
724
725            info!("Alert {} resolved", alert_id);
726            Ok(())
727        } else {
728            Err(AlertError::AlertNotFound(alert_id))
729        }
730    }
731}
732
733impl Default for AlertConfig {
734    fn default() -> Self {
735        Self {
736            enabled: true,
737            rules: vec![
738                AlertRule {
739                    id: "high_error_rate".to_string(),
740                    name: "High Error Rate".to_string(),
741                    description: "Error rate exceeds 5%".to_string(),
742                    metric: MetricType::ErrorRate,
743                    operator: ComparisonOperator::GreaterThan,
744                    threshold: 0.05,
745                    duration_secs: 300,
746                    severity: AlertSeverity::High,
747                    enabled: true,
748                    channels: vec![],
749                    labels: HashMap::new(),
750                    suppress_duration_secs: 3600,
751                },
752                AlertRule {
753                    id: "high_response_time".to_string(),
754                    name: "High Response Time".to_string(),
755                    description: "Average response time exceeds 5 seconds".to_string(),
756                    metric: MetricType::ResponseTime,
757                    operator: ComparisonOperator::GreaterThan,
758                    threshold: 5000.0,
759                    duration_secs: 180,
760                    severity: AlertSeverity::Medium,
761                    enabled: true,
762                    channels: vec![],
763                    labels: HashMap::new(),
764                    suppress_duration_secs: 1800,
765                },
766            ],
767            channels: {
768                let mut channels = HashMap::new();
769                channels.insert(
770                    "console".to_string(),
771                    NotificationChannel::Console { use_colors: true },
772                );
773                channels
774            },
775            default_channels: vec!["console".to_string()],
776            evaluation_interval_secs: 30,
777            max_active_alerts: 1000,
778            max_alert_history: 10000,
779            deduplication_enabled: true,
780            renotification_interval_secs: 3600,
781        }
782    }
783}
784
785/// Display implementations for better formatting
786impl std::fmt::Display for MetricType {
787    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
788        match self {
789            MetricType::ErrorRate => write!(f, "error_rate"),
790            MetricType::ResponseTime => write!(f, "response_time"),
791            MetricType::RequestCount => write!(f, "request_count"),
792            MetricType::MemoryUsage => write!(f, "memory_usage"),
793            MetricType::CpuUsage => write!(f, "cpu_usage"),
794            MetricType::DiskUsage => write!(f, "disk_usage"),
795            MetricType::ActiveConnections => write!(f, "active_connections"),
796            MetricType::HealthCheckFailures => write!(f, "health_check_failures"),
797            MetricType::Custom(name) => write!(f, "custom_{name}"),
798        }
799    }
800}
801
802impl std::fmt::Display for ComparisonOperator {
803    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
804        match self {
805            ComparisonOperator::GreaterThan => write!(f, ">"),
806            ComparisonOperator::GreaterThanOrEqual => write!(f, ">="),
807            ComparisonOperator::LessThan => write!(f, "<"),
808            ComparisonOperator::LessThanOrEqual => write!(f, "<="),
809            ComparisonOperator::Equal => write!(f, "=="),
810            ComparisonOperator::NotEqual => write!(f, "!="),
811        }
812    }
813}
814
815/// Alert system errors
816#[derive(Debug, thiserror::Error)]
817pub enum AlertError {
818    #[error("Alert not found: {0}")]
819    AlertNotFound(Uuid),
820
821    #[error("Rule not found: {0}")]
822    RuleNotFound(String),
823
824    #[error("Channel not found: {0}")]
825    ChannelNotFound(String),
826
827    #[error("Notification failed: {0}")]
828    NotificationFailed(String),
829
830    #[error("Configuration error: {0}")]
831    Config(String),
832
833    #[error("Serialization error: {0}")]
834    Serialization(#[from] serde_json::Error),
835}
836
837#[cfg(test)]
838mod tests {
839    use super::*;
840
841    #[test]
842    fn test_alert_rule_creation() {
843        let rule = AlertRule {
844            id: "test_rule".to_string(),
845            name: "Test Rule".to_string(),
846            description: "Test description".to_string(),
847            metric: MetricType::ErrorRate,
848            operator: ComparisonOperator::GreaterThan,
849            threshold: 0.1,
850            duration_secs: 300,
851            severity: AlertSeverity::High,
852            enabled: true,
853            channels: vec!["console".to_string()],
854            labels: HashMap::new(),
855            suppress_duration_secs: 3600,
856        };
857
858        assert_eq!(rule.id, "test_rule");
859        assert_eq!(rule.severity, AlertSeverity::High);
860        assert!(rule.enabled);
861    }
862
863    #[test]
864    fn test_condition_evaluation() {
865        let rule = AlertRule {
866            id: "test".to_string(),
867            name: "Test".to_string(),
868            description: "Test".to_string(),
869            metric: MetricType::ErrorRate,
870            operator: ComparisonOperator::GreaterThan,
871            threshold: 0.05,
872            duration_secs: 300,
873            severity: AlertSeverity::High,
874            enabled: true,
875            channels: vec![],
876            labels: HashMap::new(),
877            suppress_duration_secs: 3600,
878        };
879
880        assert!(AlertManager::evaluate_condition(&rule, 0.1));
881        assert!(!AlertManager::evaluate_condition(&rule, 0.01));
882    }
883
884    #[tokio::test]
885    async fn test_alert_manager_creation() {
886        let config = AlertConfig::default();
887        let manager = AlertManager::new(config);
888
889        let alerts = manager.get_active_alerts().await;
890        assert!(alerts.is_empty());
891    }
892}