Skip to main content

aster/agents/monitor/
alerts.rs

1//! Alert Manager
2//!
3//! Manages alerts for agent issues including timeout,
4//! cost threshold, error rate, and latency violations.
5//!
6//! This module provides:
7//! - Alert creation for various threshold violations
8//! - Alert severity levels (low, medium, high, critical)
9//! - Alert lifecycle management (acknowledge, clear)
10
11use chrono::{DateTime, Utc};
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14use std::time::Duration;
15
16/// Alert severity levels
17#[derive(
18    Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Serialize, Deserialize,
19)]
20#[serde(rename_all = "lowercase")]
21pub enum AlertSeverity {
22    /// Low severity - informational
23    Low,
24    /// Medium severity - warning
25    #[default]
26    Medium,
27    /// High severity - requires attention
28    High,
29    /// Critical severity - immediate action required
30    Critical,
31}
32
33impl std::fmt::Display for AlertSeverity {
34    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35        match self {
36            AlertSeverity::Low => write!(f, "low"),
37            AlertSeverity::Medium => write!(f, "medium"),
38            AlertSeverity::High => write!(f, "high"),
39            AlertSeverity::Critical => write!(f, "critical"),
40        }
41    }
42}
43
44/// Alert types
45#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
46#[serde(rename_all = "snake_case")]
47pub enum AlertType {
48    /// Agent execution timeout
49    Timeout,
50    /// Cost threshold exceeded
51    CostThreshold,
52    /// Error rate threshold exceeded
53    ErrorRate,
54    /// Latency threshold exceeded
55    Latency,
56    /// Resource limit exceeded
57    ResourceLimit,
58    /// Custom alert type
59    Custom(String),
60}
61
62impl std::fmt::Display for AlertType {
63    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64        match self {
65            AlertType::Timeout => write!(f, "timeout"),
66            AlertType::CostThreshold => write!(f, "cost_threshold"),
67            AlertType::ErrorRate => write!(f, "error_rate"),
68            AlertType::Latency => write!(f, "latency"),
69            AlertType::ResourceLimit => write!(f, "resource_limit"),
70            AlertType::Custom(name) => write!(f, "custom:{}", name),
71        }
72    }
73}
74
75/// An alert representing an issue with agent execution
76#[derive(Debug, Clone, Serialize, Deserialize)]
77#[serde(rename_all = "camelCase")]
78pub struct Alert {
79    /// Unique alert identifier
80    pub id: String,
81    /// Type of alert
82    pub alert_type: AlertType,
83    /// Severity level
84    pub severity: AlertSeverity,
85    /// Agent ID that triggered the alert
86    pub agent_id: String,
87    /// Human-readable message
88    pub message: String,
89    /// Timestamp when alert was created
90    pub timestamp: DateTime<Utc>,
91    /// Whether the alert has been acknowledged
92    pub acknowledged: bool,
93    /// Additional metadata
94    pub metadata: Option<HashMap<String, serde_json::Value>>,
95}
96
97impl Alert {
98    /// Create a new alert
99    pub fn new(
100        alert_type: AlertType,
101        severity: AlertSeverity,
102        agent_id: impl Into<String>,
103        message: impl Into<String>,
104    ) -> Self {
105        Self {
106            id: uuid::Uuid::new_v4().to_string(),
107            alert_type,
108            severity,
109            agent_id: agent_id.into(),
110            message: message.into(),
111            timestamp: Utc::now(),
112            acknowledged: false,
113            metadata: None,
114        }
115    }
116
117    /// Create a timeout alert
118    pub fn timeout(agent_id: impl Into<String>, duration: Duration, timeout: Duration) -> Self {
119        let agent_id = agent_id.into();
120        let severity = if duration > timeout * 2 {
121            AlertSeverity::Critical
122        } else {
123            AlertSeverity::High
124        };
125
126        let mut alert = Self::new(
127            AlertType::Timeout,
128            severity,
129            agent_id.clone(),
130            format!(
131                "Agent {} exceeded timeout: {:?} > {:?}",
132                agent_id, duration, timeout
133            ),
134        );
135
136        let mut metadata = HashMap::new();
137        metadata.insert(
138            "duration_ms".to_string(),
139            serde_json::json!(duration.as_millis()),
140        );
141        metadata.insert(
142            "timeout_ms".to_string(),
143            serde_json::json!(timeout.as_millis()),
144        );
145        alert.metadata = Some(metadata);
146
147        alert
148    }
149
150    /// Create a cost threshold alert
151    pub fn cost_threshold(agent_id: impl Into<String>, cost: f64, threshold: f64) -> Self {
152        let agent_id = agent_id.into();
153        let ratio = cost / threshold;
154        let severity = if ratio >= 2.0 {
155            AlertSeverity::Critical
156        } else if ratio >= 1.5 {
157            AlertSeverity::High
158        } else {
159            AlertSeverity::Medium
160        };
161
162        let mut alert = Self::new(
163            AlertType::CostThreshold,
164            severity,
165            agent_id.clone(),
166            format!(
167                "Agent {} exceeded cost threshold: ${:.4} > ${:.4}",
168                agent_id, cost, threshold
169            ),
170        );
171
172        let mut metadata = HashMap::new();
173        metadata.insert("cost".to_string(), serde_json::json!(cost));
174        metadata.insert("threshold".to_string(), serde_json::json!(threshold));
175        metadata.insert("ratio".to_string(), serde_json::json!(ratio));
176        alert.metadata = Some(metadata);
177
178        alert
179    }
180
181    /// Create an error rate alert
182    pub fn error_rate(agent_id: impl Into<String>, error_rate: f32, threshold: f32) -> Self {
183        let agent_id = agent_id.into();
184        let severity = if error_rate >= 0.75 {
185            AlertSeverity::Critical
186        } else if error_rate >= 0.5 {
187            AlertSeverity::High
188        } else if error_rate >= threshold {
189            AlertSeverity::Medium
190        } else {
191            AlertSeverity::Low
192        };
193
194        let mut alert = Self::new(
195            AlertType::ErrorRate,
196            severity,
197            agent_id.clone(),
198            format!(
199                "Agent {} exceeded error rate threshold: {:.1}% > {:.1}%",
200                agent_id,
201                error_rate * 100.0,
202                threshold * 100.0
203            ),
204        );
205
206        let mut metadata = HashMap::new();
207        metadata.insert("error_rate".to_string(), serde_json::json!(error_rate));
208        metadata.insert("threshold".to_string(), serde_json::json!(threshold));
209        alert.metadata = Some(metadata);
210
211        alert
212    }
213
214    /// Create a latency alert
215    pub fn latency(agent_id: impl Into<String>, latency: Duration, threshold: Duration) -> Self {
216        let agent_id = agent_id.into();
217        let ratio = latency.as_millis() as f64 / threshold.as_millis() as f64;
218        let severity = if ratio >= 3.0 {
219            AlertSeverity::Critical
220        } else if ratio >= 2.0 {
221            AlertSeverity::High
222        } else {
223            AlertSeverity::Medium
224        };
225
226        let mut alert = Self::new(
227            AlertType::Latency,
228            severity,
229            agent_id.clone(),
230            format!(
231                "Agent {} exceeded latency threshold: {:?} > {:?}",
232                agent_id, latency, threshold
233            ),
234        );
235
236        let mut metadata = HashMap::new();
237        metadata.insert(
238            "latency_ms".to_string(),
239            serde_json::json!(latency.as_millis()),
240        );
241        metadata.insert(
242            "threshold_ms".to_string(),
243            serde_json::json!(threshold.as_millis()),
244        );
245        alert.metadata = Some(metadata);
246
247        alert
248    }
249
250    /// Add metadata to the alert
251    pub fn with_metadata(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
252        let metadata = self.metadata.get_or_insert_with(HashMap::new);
253        metadata.insert(key.into(), value);
254        self
255    }
256
257    /// Acknowledge the alert
258    pub fn acknowledge(&mut self) {
259        self.acknowledged = true;
260    }
261
262    /// Check if the alert is active (not acknowledged)
263    pub fn is_active(&self) -> bool {
264        !self.acknowledged
265    }
266}
267
268impl PartialEq for Alert {
269    fn eq(&self, other: &Self) -> bool {
270        self.id == other.id
271    }
272}
273
274impl Eq for Alert {}
275
276impl std::hash::Hash for Alert {
277    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
278        self.id.hash(state);
279    }
280}
281
282/// Agent execution status for metrics
283#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
284#[serde(rename_all = "snake_case")]
285pub enum AgentExecutionStatus {
286    /// Agent is currently running
287    #[default]
288    Running,
289    /// Agent completed successfully
290    Completed,
291    /// Agent failed with an error
292    Failed,
293    /// Agent was cancelled
294    Cancelled,
295    /// Agent timed out
296    TimedOut,
297}
298
299/// Token usage tracking
300#[derive(Debug, Clone, Default, Serialize, Deserialize)]
301#[serde(rename_all = "camelCase")]
302pub struct TokenUsage {
303    /// Input tokens used
304    pub input: usize,
305    /// Output tokens used
306    pub output: usize,
307    /// Total tokens used
308    pub total: usize,
309}
310
311impl TokenUsage {
312    /// Create new token usage
313    pub fn new(input: usize, output: usize) -> Self {
314        Self {
315            input,
316            output,
317            total: input + output,
318        }
319    }
320}
321
322/// Error record for tracking agent errors
323#[derive(Debug, Clone, Serialize, Deserialize)]
324#[serde(rename_all = "camelCase")]
325pub struct ErrorRecord {
326    /// Error message
327    pub message: String,
328    /// Error timestamp
329    pub timestamp: DateTime<Utc>,
330    /// Error phase (e.g., "tool_call", "api_call")
331    pub phase: Option<String>,
332    /// Stack trace if available
333    pub stack_trace: Option<String>,
334}
335
336impl ErrorRecord {
337    /// Create a new error record
338    pub fn new(message: impl Into<String>) -> Self {
339        Self {
340            message: message.into(),
341            timestamp: Utc::now(),
342            phase: None,
343            stack_trace: None,
344        }
345    }
346
347    /// Set the phase
348    pub fn with_phase(mut self, phase: impl Into<String>) -> Self {
349        self.phase = Some(phase.into());
350        self
351    }
352}
353
354/// Agent metrics for monitoring
355#[derive(Debug, Clone, Serialize, Deserialize)]
356#[serde(rename_all = "camelCase")]
357pub struct AgentMetrics {
358    /// Agent ID
359    pub agent_id: String,
360    /// Agent type
361    pub agent_type: String,
362    /// Start time
363    pub start_time: DateTime<Utc>,
364    /// End time (if completed)
365    pub end_time: Option<DateTime<Utc>>,
366    /// Duration (if completed)
367    pub duration: Option<Duration>,
368    /// Execution status
369    pub status: AgentExecutionStatus,
370    /// Token usage
371    pub tokens_used: TokenUsage,
372    /// Number of API calls
373    pub api_calls: usize,
374    /// Number of successful API calls
375    pub api_calls_successful: usize,
376    /// Number of tool calls
377    pub tool_calls_count: usize,
378    /// Total cost
379    pub cost: f64,
380    /// Errors encountered
381    pub errors: Vec<ErrorRecord>,
382    /// Configured timeout
383    pub timeout: Option<Duration>,
384}
385
386impl AgentMetrics {
387    /// Create new agent metrics
388    pub fn new(agent_id: impl Into<String>, agent_type: impl Into<String>) -> Self {
389        Self {
390            agent_id: agent_id.into(),
391            agent_type: agent_type.into(),
392            start_time: Utc::now(),
393            end_time: None,
394            duration: None,
395            status: AgentExecutionStatus::Running,
396            tokens_used: TokenUsage::default(),
397            api_calls: 0,
398            api_calls_successful: 0,
399            tool_calls_count: 0,
400            cost: 0.0,
401            errors: Vec::new(),
402            timeout: None,
403        }
404    }
405
406    /// Set the timeout
407    pub fn with_timeout(mut self, timeout: Duration) -> Self {
408        self.timeout = Some(timeout);
409        self
410    }
411
412    /// Calculate error rate
413    pub fn error_rate(&self) -> f32 {
414        if self.api_calls == 0 {
415            0.0
416        } else {
417            (self.api_calls - self.api_calls_successful) as f32 / self.api_calls as f32
418        }
419    }
420
421    /// Check if the agent has timed out
422    pub fn is_timed_out(&self) -> bool {
423        if let Some(timeout) = self.timeout {
424            if let Some(duration) = self.duration {
425                return duration > timeout;
426            }
427            // Check if currently running and exceeded timeout
428            let elapsed = Utc::now().signed_duration_since(self.start_time);
429            if let Ok(elapsed_std) = elapsed.to_std() {
430                return elapsed_std > timeout;
431            }
432        }
433        false
434    }
435}
436
437/// Alert thresholds configuration
438#[derive(Debug, Clone, Serialize, Deserialize)]
439#[serde(rename_all = "camelCase")]
440pub struct AlertThresholds {
441    /// Cost threshold for alerts
442    pub cost_threshold: Option<f64>,
443    /// Error rate threshold (0.0 - 1.0)
444    pub error_rate_threshold: Option<f32>,
445    /// Latency threshold
446    pub latency_threshold: Option<Duration>,
447}
448
449impl Default for AlertThresholds {
450    fn default() -> Self {
451        Self {
452            cost_threshold: Some(1.0),                        // $1.00 default
453            error_rate_threshold: Some(0.1),                  // 10% error rate
454            latency_threshold: Some(Duration::from_secs(30)), // 30 seconds
455        }
456    }
457}
458
459/// Alert Manager for managing agent alerts
460#[derive(Debug)]
461pub struct AlertManager {
462    /// All alerts indexed by ID
463    alerts: HashMap<String, Alert>,
464    /// Alert thresholds
465    thresholds: AlertThresholds,
466}
467
468impl Default for AlertManager {
469    fn default() -> Self {
470        Self::new()
471    }
472}
473
474impl AlertManager {
475    /// Create a new AlertManager
476    pub fn new() -> Self {
477        Self {
478            alerts: HashMap::new(),
479            thresholds: AlertThresholds::default(),
480        }
481    }
482
483    /// Create an AlertManager with custom thresholds
484    pub fn with_thresholds(thresholds: AlertThresholds) -> Self {
485        Self {
486            alerts: HashMap::new(),
487            thresholds,
488        }
489    }
490
491    /// Add an alert
492    pub fn add_alert(&mut self, alert: Alert) -> String {
493        let id = alert.id.clone();
494        self.alerts.insert(id.clone(), alert);
495        id
496    }
497
498    /// Check for timeout and create alert if exceeded
499    pub fn check_timeout(&mut self, metrics: &AgentMetrics) -> Option<Alert> {
500        if let Some(timeout) = metrics.timeout {
501            let duration = metrics.duration.unwrap_or_else(|| {
502                let elapsed = Utc::now().signed_duration_since(metrics.start_time);
503                elapsed.to_std().unwrap_or(Duration::ZERO)
504            });
505
506            if duration > timeout {
507                let alert = Alert::timeout(&metrics.agent_id, duration, timeout);
508                let id = alert.id.clone();
509                self.alerts.insert(id, alert.clone());
510                return Some(alert);
511            }
512        }
513        None
514    }
515
516    /// Check for cost threshold and create alert if exceeded
517    pub fn check_cost(&mut self, metrics: &AgentMetrics, threshold: f64) -> Option<Alert> {
518        if metrics.cost > threshold {
519            let alert = Alert::cost_threshold(&metrics.agent_id, metrics.cost, threshold);
520            let id = alert.id.clone();
521            self.alerts.insert(id, alert.clone());
522            return Some(alert);
523        }
524        None
525    }
526
527    /// Check for error rate threshold and create alert if exceeded
528    pub fn check_errors(&mut self, metrics: &AgentMetrics, threshold: f32) -> Option<Alert> {
529        let error_rate = metrics.error_rate();
530        if error_rate > threshold {
531            let alert = Alert::error_rate(&metrics.agent_id, error_rate, threshold);
532            let id = alert.id.clone();
533            self.alerts.insert(id, alert.clone());
534            return Some(alert);
535        }
536        None
537    }
538
539    /// Check all thresholds and create alerts as needed
540    pub fn check_all(&mut self, metrics: &AgentMetrics) -> Vec<Alert> {
541        let mut alerts = Vec::new();
542
543        if let Some(alert) = self.check_timeout(metrics) {
544            alerts.push(alert);
545        }
546
547        if let Some(threshold) = self.thresholds.cost_threshold {
548            if let Some(alert) = self.check_cost(metrics, threshold) {
549                alerts.push(alert);
550            }
551        }
552
553        if let Some(threshold) = self.thresholds.error_rate_threshold {
554            if let Some(alert) = self.check_errors(metrics, threshold) {
555                alerts.push(alert);
556            }
557        }
558
559        alerts
560    }
561
562    /// Get an alert by ID
563    pub fn get_alert(&self, alert_id: &str) -> Option<&Alert> {
564        self.alerts.get(alert_id)
565    }
566
567    /// Get a mutable reference to an alert by ID
568    pub fn get_alert_mut(&mut self, alert_id: &str) -> Option<&mut Alert> {
569        self.alerts.get_mut(alert_id)
570    }
571
572    /// Get all active (unacknowledged) alerts
573    pub fn get_active_alerts(&self) -> Vec<&Alert> {
574        self.alerts.values().filter(|a| a.is_active()).collect()
575    }
576
577    /// Get all alerts
578    pub fn get_all_alerts(&self) -> Vec<&Alert> {
579        self.alerts.values().collect()
580    }
581
582    /// Get alerts by agent ID
583    pub fn get_alerts_by_agent(&self, agent_id: &str) -> Vec<&Alert> {
584        self.alerts
585            .values()
586            .filter(|a| a.agent_id == agent_id)
587            .collect()
588    }
589
590    /// Get alerts by severity
591    pub fn get_alerts_by_severity(&self, severity: AlertSeverity) -> Vec<&Alert> {
592        self.alerts
593            .values()
594            .filter(|a| a.severity == severity)
595            .collect()
596    }
597
598    /// Get alerts by type
599    pub fn get_alerts_by_type(&self, alert_type: &AlertType) -> Vec<&Alert> {
600        self.alerts
601            .values()
602            .filter(|a| &a.alert_type == alert_type)
603            .collect()
604    }
605
606    /// Acknowledge an alert by ID
607    pub fn acknowledge(&mut self, alert_id: &str) -> bool {
608        if let Some(alert) = self.alerts.get_mut(alert_id) {
609            alert.acknowledge();
610            true
611        } else {
612            false
613        }
614    }
615
616    /// Acknowledge all alerts
617    pub fn acknowledge_all(&mut self) {
618        for alert in self.alerts.values_mut() {
619            alert.acknowledge();
620        }
621    }
622
623    /// Clear all acknowledged alerts
624    pub fn clear_acknowledged(&mut self) -> usize {
625        let before = self.alerts.len();
626        self.alerts.retain(|_, alert| !alert.acknowledged);
627        before - self.alerts.len()
628    }
629
630    /// Clear all alerts
631    pub fn clear_all(&mut self) {
632        self.alerts.clear();
633    }
634
635    /// Get the number of alerts
636    pub fn alert_count(&self) -> usize {
637        self.alerts.len()
638    }
639
640    /// Get the number of active alerts
641    pub fn active_alert_count(&self) -> usize {
642        self.alerts.values().filter(|a| a.is_active()).count()
643    }
644
645    /// Update thresholds
646    pub fn set_thresholds(&mut self, thresholds: AlertThresholds) {
647        self.thresholds = thresholds;
648    }
649
650    /// Get current thresholds
651    pub fn thresholds(&self) -> &AlertThresholds {
652        &self.thresholds
653    }
654}
655
656#[cfg(test)]
657mod tests {
658    use super::*;
659
660    #[test]
661    fn test_alert_severity_ordering() {
662        assert!(AlertSeverity::Low < AlertSeverity::Medium);
663        assert!(AlertSeverity::Medium < AlertSeverity::High);
664        assert!(AlertSeverity::High < AlertSeverity::Critical);
665    }
666
667    #[test]
668    fn test_alert_creation() {
669        let alert = Alert::new(
670            AlertType::Timeout,
671            AlertSeverity::High,
672            "agent-1",
673            "Test alert",
674        );
675
676        assert!(!alert.id.is_empty());
677        assert_eq!(alert.alert_type, AlertType::Timeout);
678        assert_eq!(alert.severity, AlertSeverity::High);
679        assert_eq!(alert.agent_id, "agent-1");
680        assert_eq!(alert.message, "Test alert");
681        assert!(!alert.acknowledged);
682        assert!(alert.is_active());
683    }
684
685    #[test]
686    fn test_timeout_alert() {
687        let alert = Alert::timeout("agent-1", Duration::from_secs(70), Duration::from_secs(30));
688
689        assert_eq!(alert.alert_type, AlertType::Timeout);
690        assert_eq!(alert.severity, AlertSeverity::Critical); // 70s > 30s * 2
691        assert!(alert.message.contains("agent-1"));
692        assert!(alert.metadata.is_some());
693    }
694
695    #[test]
696    fn test_cost_threshold_alert() {
697        let alert = Alert::cost_threshold("agent-1", 2.5, 1.0);
698
699        assert_eq!(alert.alert_type, AlertType::CostThreshold);
700        assert_eq!(alert.severity, AlertSeverity::Critical); // 2.5 >= 2.0 * 1.0
701        assert!(alert.message.contains("$2.5"));
702    }
703
704    #[test]
705    fn test_error_rate_alert() {
706        let alert = Alert::error_rate("agent-1", 0.6, 0.1);
707
708        assert_eq!(alert.alert_type, AlertType::ErrorRate);
709        assert_eq!(alert.severity, AlertSeverity::High); // 0.6 >= 0.5
710        assert!(alert.message.contains("60.0%"));
711    }
712
713    #[test]
714    fn test_alert_acknowledge() {
715        let mut alert = Alert::new(AlertType::Timeout, AlertSeverity::High, "agent-1", "Test");
716
717        assert!(alert.is_active());
718        alert.acknowledge();
719        assert!(!alert.is_active());
720        assert!(alert.acknowledged);
721    }
722
723    #[test]
724    fn test_alert_manager_basic() {
725        let mut manager = AlertManager::new();
726
727        let alert = Alert::new(AlertType::Timeout, AlertSeverity::High, "agent-1", "Test");
728        let id = manager.add_alert(alert);
729
730        assert_eq!(manager.alert_count(), 1);
731        assert!(manager.get_alert(&id).is_some());
732    }
733
734    #[test]
735    fn test_alert_manager_check_timeout() {
736        let mut manager = AlertManager::new();
737
738        let mut metrics = AgentMetrics::new("agent-1", "test");
739        metrics.timeout = Some(Duration::from_secs(10));
740        metrics.duration = Some(Duration::from_secs(20));
741
742        let alert = manager.check_timeout(&metrics);
743        assert!(alert.is_some());
744        assert_eq!(manager.alert_count(), 1);
745    }
746
747    #[test]
748    fn test_alert_manager_check_cost() {
749        let mut manager = AlertManager::new();
750
751        let mut metrics = AgentMetrics::new("agent-1", "test");
752        metrics.cost = 2.0;
753
754        let alert = manager.check_cost(&metrics, 1.0);
755        assert!(alert.is_some());
756        assert_eq!(manager.alert_count(), 1);
757    }
758
759    #[test]
760    fn test_alert_manager_check_errors() {
761        let mut manager = AlertManager::new();
762
763        let mut metrics = AgentMetrics::new("agent-1", "test");
764        metrics.api_calls = 10;
765        metrics.api_calls_successful = 5; // 50% error rate
766
767        let alert = manager.check_errors(&metrics, 0.1);
768        assert!(alert.is_some());
769        assert_eq!(manager.alert_count(), 1);
770    }
771
772    #[test]
773    fn test_alert_manager_acknowledge() {
774        let mut manager = AlertManager::new();
775
776        let alert = Alert::new(AlertType::Timeout, AlertSeverity::High, "agent-1", "Test");
777        let id = manager.add_alert(alert);
778
779        assert_eq!(manager.active_alert_count(), 1);
780        assert!(manager.acknowledge(&id));
781        assert_eq!(manager.active_alert_count(), 0);
782    }
783
784    #[test]
785    fn test_alert_manager_clear_acknowledged() {
786        let mut manager = AlertManager::new();
787
788        let alert1 = Alert::new(AlertType::Timeout, AlertSeverity::High, "agent-1", "Test 1");
789        let alert2 = Alert::new(
790            AlertType::CostThreshold,
791            AlertSeverity::Medium,
792            "agent-2",
793            "Test 2",
794        );
795
796        let id1 = manager.add_alert(alert1);
797        manager.add_alert(alert2);
798
799        manager.acknowledge(&id1);
800        let cleared = manager.clear_acknowledged();
801
802        assert_eq!(cleared, 1);
803        assert_eq!(manager.alert_count(), 1);
804    }
805
806    #[test]
807    fn test_alert_manager_get_active_alerts() {
808        let mut manager = AlertManager::new();
809
810        let alert1 = Alert::new(AlertType::Timeout, AlertSeverity::High, "agent-1", "Test 1");
811        let alert2 = Alert::new(
812            AlertType::CostThreshold,
813            AlertSeverity::Medium,
814            "agent-2",
815            "Test 2",
816        );
817
818        let id1 = manager.add_alert(alert1);
819        manager.add_alert(alert2);
820
821        manager.acknowledge(&id1);
822
823        let active = manager.get_active_alerts();
824        assert_eq!(active.len(), 1);
825        assert_eq!(active[0].agent_id, "agent-2");
826    }
827
828    #[test]
829    fn test_alert_manager_get_by_severity() {
830        let mut manager = AlertManager::new();
831
832        manager.add_alert(Alert::new(
833            AlertType::Timeout,
834            AlertSeverity::High,
835            "agent-1",
836            "Test 1",
837        ));
838        manager.add_alert(Alert::new(
839            AlertType::CostThreshold,
840            AlertSeverity::High,
841            "agent-2",
842            "Test 2",
843        ));
844        manager.add_alert(Alert::new(
845            AlertType::ErrorRate,
846            AlertSeverity::Medium,
847            "agent-3",
848            "Test 3",
849        ));
850
851        let high_alerts = manager.get_alerts_by_severity(AlertSeverity::High);
852        assert_eq!(high_alerts.len(), 2);
853
854        let medium_alerts = manager.get_alerts_by_severity(AlertSeverity::Medium);
855        assert_eq!(medium_alerts.len(), 1);
856    }
857
858    #[test]
859    fn test_agent_metrics_error_rate() {
860        let mut metrics = AgentMetrics::new("agent-1", "test");
861        metrics.api_calls = 10;
862        metrics.api_calls_successful = 8;
863
864        assert!((metrics.error_rate() - 0.2).abs() < 0.001);
865    }
866
867    #[test]
868    fn test_agent_metrics_error_rate_zero_calls() {
869        let metrics = AgentMetrics::new("agent-1", "test");
870        assert_eq!(metrics.error_rate(), 0.0);
871    }
872}