ai_lib/error_handling/
monitoring.rs

1//! Error monitoring and alerting
2
3use crate::types::AiLibError;
4use crate::error_handling::ErrorContext;
5use crate::metrics::Metrics;
6use std::sync::Arc;
7use std::time::Duration;
8use serde::{Deserialize, Serialize};
9
10/// Error monitoring configuration
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct ErrorThresholds {
13    /// Maximum error rate (errors per second)
14    pub error_rate_threshold: f64,
15    /// Maximum consecutive errors before alerting
16    pub consecutive_errors: u32,
17    /// Time window for error rate calculation
18    pub time_window: Duration,
19}
20
21impl Default for ErrorThresholds {
22    fn default() -> Self {
23        Self {
24            error_rate_threshold: 0.1, // 10% error rate
25            consecutive_errors: 5,
26            time_window: Duration::from_secs(60),
27        }
28    }
29}
30
31/// Error monitor for tracking and alerting
32pub struct ErrorMonitor {
33    metrics: Arc<dyn Metrics>,
34    #[allow(dead_code)] // Reserved for future alerting functionality
35    alert_thresholds: ErrorThresholds,
36}
37
38impl ErrorMonitor {
39    /// Create a new error monitor
40    pub fn new(metrics: Arc<dyn Metrics>, alert_thresholds: ErrorThresholds) -> Self {
41        Self {
42            metrics,
43            alert_thresholds,
44        }
45    }
46
47    /// Record an error and check for alerts
48    pub async fn record_error(&self, error: &AiLibError, context: &ErrorContext) {
49        // Record error metrics
50        self.metrics.incr_counter("errors.total", 1).await;
51        self.metrics.incr_counter(&format!("errors.{}", self.error_type_name(error)), 1).await;
52        
53        // Check if we should send an alert
54        if self.should_alert(error, context).await {
55            self.send_alert(error, context).await;
56        }
57    }
58
59    /// Check if an alert should be sent
60    async fn should_alert(&self, error: &AiLibError, _context: &ErrorContext) -> bool {
61        // This is a simplified implementation
62        // In a real system, you would check error rates, consecutive errors, etc.
63        matches!(error, AiLibError::RateLimitExceeded(_) | AiLibError::ProviderError(_))
64    }
65
66    /// Send an alert (placeholder implementation)
67    async fn send_alert(&self, error: &AiLibError, context: &ErrorContext) {
68        // In a real implementation, this would send alerts via email, Slack, etc.
69        eprintln!("ALERT: Error detected - {:?} in context {:?}", error, context);
70    }
71
72    /// Get error type name for metrics
73    fn error_type_name(&self, error: &AiLibError) -> String {
74        match error {
75            AiLibError::RateLimitExceeded(_) => "rate_limit".to_string(),
76            AiLibError::NetworkError(_) => "network".to_string(),
77            AiLibError::AuthenticationError(_) => "authentication".to_string(),
78            AiLibError::ProviderError(_) => "provider".to_string(),
79            AiLibError::TimeoutError(_) => "timeout".to_string(),
80            _ => "unknown".to_string(),
81        }
82    }
83}