ai_lib/error_handling/
monitoring.rs

1//! Error monitoring and alerting
2
3use crate::types::AiLibError;
4use crate::error_handling::ErrorContext;
5use crate::metrics::Metrics;
6use std::sync::Arc;
7use std::time::Duration;
8use serde::{Deserialize, Serialize};
9
10/// Error monitoring configuration
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct ErrorThresholds {
13    /// Maximum error rate (errors per second)
14    pub error_rate_threshold: f64,
15    /// Maximum consecutive errors before alerting
16    pub consecutive_errors: u32,
17    /// Time window for error rate calculation
18    pub time_window: Duration,
19}
20
21impl Default for ErrorThresholds {
22    fn default() -> Self {
23        Self {
24            error_rate_threshold: 0.1, // 10% error rate
25            consecutive_errors: 5,
26            time_window: Duration::from_secs(60),
27        }
28    }
29}
30
31/// Error monitor for tracking and alerting
32pub struct ErrorMonitor {
33    metrics: Arc<dyn Metrics>,
34    alert_thresholds: ErrorThresholds,
35}
36
37impl ErrorMonitor {
38    /// Create a new error monitor
39    pub fn new(metrics: Arc<dyn Metrics>, alert_thresholds: ErrorThresholds) -> Self {
40        Self {
41            metrics,
42            alert_thresholds,
43        }
44    }
45
46    /// Record an error and check for alerts
47    pub async fn record_error(&self, error: &AiLibError, context: &ErrorContext) {
48        // Record error metrics
49        self.metrics.incr_counter("errors.total", 1).await;
50        self.metrics.incr_counter(&format!("errors.{}", self.error_type_name(error)), 1).await;
51        
52        // Check if we should send an alert
53        if self.should_alert(error, context).await {
54            self.send_alert(error, context).await;
55        }
56    }
57
58    /// Check if an alert should be sent
59    async fn should_alert(&self, error: &AiLibError, _context: &ErrorContext) -> bool {
60        // This is a simplified implementation
61        // In a real system, you would check error rates, consecutive errors, etc.
62        matches!(error, AiLibError::RateLimitExceeded(_) | AiLibError::ProviderError(_))
63    }
64
65    /// Send an alert (placeholder implementation)
66    async fn send_alert(&self, error: &AiLibError, context: &ErrorContext) {
67        // In a real implementation, this would send alerts via email, Slack, etc.
68        eprintln!("ALERT: Error detected - {:?} in context {:?}", error, context);
69    }
70
71    /// Get error type name for metrics
72    fn error_type_name(&self, error: &AiLibError) -> String {
73        match error {
74            AiLibError::RateLimitExceeded(_) => "rate_limit".to_string(),
75            AiLibError::NetworkError(_) => "network".to_string(),
76            AiLibError::AuthenticationError(_) => "authentication".to_string(),
77            AiLibError::ProviderError(_) => "provider".to_string(),
78            AiLibError::TimeoutError(_) => "timeout".to_string(),
79            _ => "unknown".to_string(),
80        }
81    }
82}