Skip to main content

aura_effects/system/
monitoring.rs

1//! Simplified monitoring handler that delegates to external services.
2
3use async_trait::async_trait;
4use aura_core::effects::{SystemEffects, SystemError};
5use aura_core::hash;
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use tracing::{error, info, warn};
9
10use super::types::{ComponentId, LogLevel};
11
12/// Stateless monitoring handler.
13#[derive(Debug, Clone)]
14pub struct MonitoringSystemHandler {
15    config: MonitoringConfig,
16}
17
18/// Health status levels
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
20pub enum HealthStatus {
21    /// Component is operating normally
22    Healthy,
23    /// Component is operating with reduced performance or minor issues
24    Degraded,
25    /// Component has encountered significant problems
26    Unhealthy,
27    /// Component has encountered severe problems requiring immediate attention
28    Critical,
29}
30
31/// Alert severity levels
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
33pub enum AlertSeverity {
34    /// Informational alert, no action required
35    Info,
36    /// Warning alert, may require attention or investigation
37    Warning,
38    /// Critical alert, requires immediate attention
39    Critical,
40}
41
42/// Health check result
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct HealthCheckResult {
45    /// Name of the component being checked
46    pub component: ComponentId,
47    /// Current health status of the component
48    pub status: HealthStatus,
49    /// Status message or diagnostic information
50    pub message: String,
51}
52
53/// Alert notification
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct Alert {
56    /// Unique alert identifier (deterministic counter-based for Layer 3 handlers)
57    pub id: u64,
58    /// Component that triggered the alert
59    pub component: ComponentId,
60    /// Alert severity level
61    pub severity: AlertSeverity,
62    /// Alert title/summary
63    pub title: String,
64    /// Detailed alert message
65    pub message: String,
66    /// Whether the alert has been resolved
67    pub resolved: bool,
68    /// Additional metadata for the alert
69    pub metadata: HashMap<String, String>,
70}
71
72/// Configuration for monitoring system
73#[derive(Debug, Clone)]
74pub struct MonitoringConfig {
75    /// Maximum number of alerts to keep in buffer
76    pub max_alerts: u32,
77}
78
79impl Default for MonitoringConfig {
80    fn default() -> Self {
81        Self { max_alerts: 256 }
82    }
83}
84
85/// Monitoring system statistics
86#[derive(Debug, Clone, Default)]
87pub struct MonitoringStats {
88    /// Total number of health checks performed
89    pub total_health_checks: u64,
90    /// Number of failed health checks
91    pub failed_health_checks: u64,
92    /// Total number of alerts generated
93    pub total_alerts: u64,
94    /// Number of currently active (unresolved) alerts
95    pub active_alerts: u64,
96}
97
98impl MonitoringSystemHandler {
99    /// Create a new monitoring system handler
100    pub fn new(config: MonitoringConfig) -> Self {
101        Self { config }
102    }
103
104    /// Create a monitoring system handler with default configuration
105    pub fn with_defaults() -> Self {
106        Self::new(MonitoringConfig::default())
107    }
108
109    /// Manually trigger a health check for a specific component
110    pub async fn check_component_health(
111        &self,
112        component: ComponentId,
113    ) -> Result<HealthCheckResult, SystemError> {
114        Ok(HealthCheckResult {
115            component,
116            status: HealthStatus::Healthy,
117            message: "ok".to_string(),
118        })
119    }
120
121    /// Send a custom alert
122    pub async fn send_alert(
123        &self,
124        component: ComponentId,
125        severity: AlertSeverity,
126        title: &str,
127        message: &str,
128        metadata: HashMap<String, String>,
129    ) -> Result<(), SystemError> {
130        let mut material = Vec::new();
131        material.extend_from_slice(component.as_str().as_bytes());
132        material.push(severity as u8);
133        material.extend_from_slice(title.as_bytes());
134        material.extend_from_slice(message.as_bytes());
135        let digest = hash::hash(&material);
136        let mut id_bytes = [0u8; 8];
137        id_bytes.copy_from_slice(&digest[..8]);
138        let component_id = component.clone();
139        let alert = Alert {
140            id: u64::from_le_bytes(id_bytes),
141            component: component_id,
142            severity,
143            title: title.to_string(),
144            message: message.to_string(),
145            resolved: false,
146            metadata,
147        };
148        warn!(
149            alert_id = alert.id,
150            component = %component,
151            ?severity,
152            title,
153            message,
154            "Monitoring alert emitted"
155        );
156        Ok(())
157    }
158
159    /// Get the most recent alerts up to the specified count
160    pub async fn get_recent_alerts(&self, count: usize) -> Vec<Alert> {
161        let _ = count;
162        Vec::new()
163    }
164
165    /// Resolve an alert by its ID
166    pub async fn resolve_alert(&self, alert_id: u64) -> Result<(), SystemError> {
167        let _ = alert_id;
168        Ok(())
169    }
170
171    /// Get monitoring statistics (stateless - delegates to external service)
172    pub async fn get_statistics(&self) -> MonitoringStats {
173        MonitoringStats::default()
174    }
175}
176
177impl Default for MonitoringSystemHandler {
178    fn default() -> Self {
179        Self::with_defaults()
180    }
181}
182
183#[async_trait]
184impl SystemEffects for MonitoringSystemHandler {
185    async fn log(&self, level: &str, component: &str, message: &str) -> Result<(), SystemError> {
186        let parsed_level = LogLevel::try_from(level).unwrap_or(LogLevel::Info);
187        let component_id = ComponentId::from(component);
188        match parsed_level {
189            LogLevel::Error => error!("{}: {}", component_id, message),
190            LogLevel::Warn => warn!("{}: {}", component_id, message),
191            LogLevel::Info | LogLevel::Debug => info!("{}: {}", component_id, message),
192        }
193        Ok(())
194    }
195
196    async fn log_with_context(
197        &self,
198        level: &str,
199        component: &str,
200        message: &str,
201        context: HashMap<String, String>,
202    ) -> Result<(), SystemError> {
203        let context_str = context
204            .iter()
205            .map(|(k, v)| format!("{k}={v}"))
206            .collect::<Vec<_>>()
207            .join(", ");
208        let full_message = format!("{message} [{context_str}]");
209        self.log(level, component, &full_message).await
210    }
211
212    async fn get_system_info(&self) -> Result<HashMap<String, String>, SystemError> {
213        let mut info = HashMap::new();
214        info.insert("component".to_string(), ComponentId::Monitoring.to_string());
215        info.insert("max_alerts".to_string(), self.config.max_alerts.to_string());
216        info.insert("status".to_string(), "operational".to_string());
217        info.insert("total_health_checks".to_string(), "0".to_string());
218        info.insert("failed_health_checks".to_string(), "0".to_string());
219        info.insert("active_alerts".to_string(), "0".to_string());
220        Ok(info)
221    }
222
223    async fn set_config(&self, key: &str, value: &str) -> Result<(), SystemError> {
224        match key {
225            "max_alerts" => {
226                let parsed =
227                    value
228                        .parse::<usize>()
229                        .map_err(|_| SystemError::InvalidConfiguration {
230                            key: key.to_string(),
231                            value: value.to_string(),
232                        })?;
233                let _ = parsed;
234                Ok(())
235            }
236            _ => Err(SystemError::InvalidConfiguration {
237                key: key.to_string(),
238                value: value.to_string(),
239            }),
240        }
241    }
242
243    async fn get_config(&self, key: &str) -> Result<String, SystemError> {
244        match key {
245            "max_alerts" => Ok(self.config.max_alerts.to_string()),
246            _ => Err(SystemError::InvalidConfiguration {
247                key: key.to_string(),
248                value: "unknown".to_string(),
249            }),
250        }
251    }
252
253    async fn health_check(&self) -> Result<bool, SystemError> {
254        Ok(true)
255    }
256
257    async fn get_metrics(&self) -> Result<HashMap<String, f64>, SystemError> {
258        let mut metrics = HashMap::new();
259        metrics.insert("active_alerts".to_string(), 0.0);
260        metrics.insert(
261            "max_alerts_configured".to_string(),
262            self.config.max_alerts as f64,
263        );
264        metrics.insert("total_health_checks".to_string(), 0.0);
265        metrics.insert("failed_health_checks".to_string(), 0.0);
266        metrics.insert("total_alerts".to_string(), 0.0);
267        Ok(metrics)
268    }
269
270    async fn restart_component(&self, component: &str) -> Result<(), SystemError> {
271        tracing::warn!(
272            component = component,
273            "Restart requested via monitoring handler"
274        );
275        Err(SystemError::OperationFailed {
276            message: "restart_component not supported in monitoring handler".to_string(),
277        })
278    }
279
280    async fn shutdown(&self) -> Result<(), SystemError> {
281        Ok(())
282    }
283}
284
285#[cfg(test)]
286#[allow(clippy::expect_used)] // Test code: expect() is acceptable for test assertions
287mod tests {
288    use super::*;
289
290    #[tokio::test]
291    async fn test_monitoring_handler_creation() {
292        let handler = MonitoringSystemHandler::default();
293        // MonitoringSystemHandler should be created successfully
294        let max_alerts = handler.get_config("max_alerts").await.unwrap();
295        assert_eq!(max_alerts, "256");
296    }
297
298    #[tokio::test]
299    async fn test_alert_operations() {
300        let handler = MonitoringSystemHandler::default();
301
302        handler
303            .send_alert(
304                ComponentId::Custom("component".to_string()),
305                AlertSeverity::Warning,
306                "title",
307                "body",
308                HashMap::new(),
309            )
310            .await
311            .expect("alert ok");
312
313        // Test health check
314        let health = handler
315            .check_component_health(ComponentId::Custom("test_component".to_string()))
316            .await
317            .unwrap();
318        assert_eq!(
319            health.component,
320            ComponentId::Custom("test_component".to_string())
321        );
322        assert_eq!(health.status, HealthStatus::Healthy);
323    }
324
325    #[tokio::test]
326    async fn test_system_effects() {
327        let handler = MonitoringSystemHandler::default();
328
329        // Test system info
330        let info = handler.get_system_info().await.unwrap();
331        assert_eq!(info.get("component"), Some(&"monitoring".to_string()));
332        assert_eq!(info.get("total_health_checks"), Some(&"0".to_string()));
333
334        // Test config operations
335        let config_value = handler.get_config("max_alerts").await.unwrap();
336        assert_eq!(config_value, "256");
337
338        let metrics = handler.get_metrics().await.unwrap();
339        assert_eq!(metrics.get("active_alerts"), Some(&0.0)); // Should be 0 since no alerts generated
340    }
341}