leptos_sync_core/reliability/monitoring/
mod.rs

1//! Monitoring System
2//!
3//! This module provides comprehensive monitoring capabilities including:
4//! - Metrics collection and aggregation
5//! - Alert management and notification
6//! - Health reporting and status monitoring
7//! - Performance tracking and analysis
8
9pub mod metrics;
10pub mod alerts;
11pub mod health;
12pub mod config;
13
14// Re-export main types for convenience
15pub use metrics::{
16    Metric, TimeRange, AggregationType, AggregatedMetric, MetricsCollector, MetricsConfig,
17};
18pub use alerts::{
19    AlertRule, AlertCondition, ComparisonOperator, AlertSeverity, Alert, AlertManager,
20    AlertStats, AlertConfig,
21};
22pub use health::{
23    HealthCheck, HealthStatus, SystemStatus, HealthCheckResult, HealthReporter, HealthStats,
24    HealthConfig,
25};
26pub use config::{
27    MonitorConfig, MonitoringStats, PerformanceConfig, ResourceConfig, ExtendedMonitorConfig,
28};
29
30use std::sync::Arc;
31use tokio::sync::RwLock;
32
33/// Reliability monitoring system
34#[derive(Debug, Clone)]
35pub struct ReliabilityMonitor {
36    /// Metrics collector
37    pub metrics_collector: MetricsCollector,
38    /// Alert manager
39    pub alert_manager: AlertManager,
40    /// Health reporter
41    pub health_reporter: HealthReporter,
42    /// Monitoring statistics
43    pub stats: Arc<RwLock<MonitoringStats>>,
44    /// Whether the system is initialized
45    pub initialized: bool,
46}
47
48impl ReliabilityMonitor {
49    /// Create a new reliability monitor
50    pub fn new() -> Self {
51        Self {
52            metrics_collector: MetricsCollector::new(),
53            alert_manager: AlertManager::new(),
54            health_reporter: HealthReporter::new(),
55            stats: Arc::new(RwLock::new(MonitoringStats::new())),
56            initialized: false,
57        }
58    }
59    
60    /// Create a new reliability monitor with configuration
61    pub fn with_config(config: MonitorConfig) -> Self {
62        Self {
63            metrics_collector: MetricsCollector::with_config(config.metrics_config),
64            alert_manager: AlertManager::with_config(config.alert_config),
65            health_reporter: HealthReporter::with_config(config.health_config),
66            stats: Arc::new(RwLock::new(MonitoringStats::new())),
67            initialized: false,
68        }
69    }
70
71    /// Initialize the monitoring system
72    pub async fn initialize(&mut self) -> Result<(), String> {
73        if self.initialized {
74            return Err("Monitoring system is already initialized".to_string());
75        }
76
77        // Initialize components
78        self.metrics_collector = MetricsCollector::new();
79        self.alert_manager = AlertManager::new();
80        self.health_reporter = HealthReporter::new();
81
82        self.initialized = true;
83        Ok(())
84    }
85
86    /// Record a metric
87    pub fn record_metric(&mut self, metric: Metric) {
88        if !self.initialized {
89            return;
90        }
91
92        self.metrics_collector.record(metric.clone());
93        
94        // Check if this metric should trigger any alerts
95        let alerts = self.alert_manager.check_metric(&metric.name, metric.value);
96        
97        // Update statistics
98        if let Ok(mut stats) = self.stats.try_write() {
99            stats.increment_metrics_collected();
100            if !alerts.is_empty() {
101                stats.increment_alerts_triggered();
102            }
103        }
104    }
105
106    /// Add an alert rule
107    pub fn add_alert_rule(&mut self, rule: AlertRule) {
108        if !self.initialized {
109            return;
110        }
111        self.alert_manager.add_rule(rule);
112    }
113
114    /// Add a health check
115    pub fn add_health_check(&mut self, health_check: HealthCheck) {
116        if !self.initialized {
117            return;
118        }
119        self.health_reporter.add_health_check(health_check);
120    }
121
122    /// Perform all health checks
123    pub fn perform_health_checks(&mut self) -> Vec<HealthCheckResult> {
124        if !self.initialized {
125            return Vec::new();
126        }
127
128        let results = self.health_reporter.perform_all_health_checks();
129        
130        // Update statistics
131        if let Ok(mut stats) = self.stats.try_write() {
132            stats.increment_health_checks();
133        }
134
135        results
136    }
137
138    /// Get the current system status
139    pub fn get_system_status(&mut self) -> SystemStatus {
140        if !self.initialized {
141            return SystemStatus::new();
142        }
143        self.health_reporter.get_current_system_status()
144    }
145
146    /// Get monitoring statistics
147    pub async fn get_stats(&self) -> Result<MonitoringStats, String> {
148        if !self.initialized {
149            return Err("Monitoring system is not initialized".to_string());
150        }
151
152        let stats = self.stats.read().await;
153        Ok(stats.clone())
154    }
155
156    /// Get alert statistics
157    pub fn get_alert_stats(&self) -> AlertStats {
158        if !self.initialized {
159            return AlertStats {
160                total_rules: 0,
161                active_alerts: 0,
162                critical_alerts: 0,
163                high_alerts: 0,
164                medium_alerts: 0,
165                low_alerts: 0,
166                total_history: 0,
167            };
168        }
169        self.alert_manager.get_stats()
170    }
171
172    /// Get health statistics
173    pub fn get_health_stats(&self) -> HealthStats {
174        if !self.initialized {
175            return HealthStats {
176                total_checks: 0,
177                healthy_checks: 0,
178                degraded_checks: 0,
179                unhealthy_checks: 0,
180                unknown_checks: 0,
181                overall_status: HealthStatus::Unknown,
182                uptime_seconds: 0,
183            };
184        }
185        self.health_reporter.get_health_stats()
186    }
187
188    /// Get metrics for a specific name and time range
189    pub fn get_metrics(&self, name: &str, time_range: &TimeRange) -> Vec<&Metric> {
190        if !self.initialized {
191            return Vec::new();
192        }
193        self.metrics_collector.get_metrics(name, time_range)
194    }
195
196    /// Get aggregated metrics
197    pub fn get_aggregated_metrics(
198        &self,
199        name: &str,
200        time_range: &TimeRange,
201        aggregation_type: AggregationType,
202    ) -> Option<AggregatedMetric> {
203        if !self.initialized {
204            return None;
205        }
206        self.metrics_collector.aggregate_metrics(name, time_range, aggregation_type)
207    }
208
209    /// Get all active alerts
210    pub fn get_active_alerts(&self) -> Vec<&Alert> {
211        if !self.initialized {
212            return Vec::new();
213        }
214        self.alert_manager.get_active_alerts()
215    }
216
217    /// Resolve an alert
218    pub fn resolve_alert(&mut self, alert_id: &str) -> bool {
219        if !self.initialized {
220            return false;
221        }
222        self.alert_manager.resolve_alert(alert_id)
223    }
224
225    /// Get all metric names
226    pub fn get_metric_names(&self) -> Vec<String> {
227        if !self.initialized {
228            return Vec::new();
229        }
230        self.metrics_collector.get_metric_names()
231    }
232
233    /// Clear all metrics
234    pub fn clear_metrics(&mut self) {
235        if !self.initialized {
236            return;
237        }
238        self.metrics_collector.clear();
239    }
240
241    /// Clear alert history
242    pub fn clear_alert_history(&mut self) {
243        if !self.initialized {
244            return;
245        }
246        self.alert_manager.clear_history();
247    }
248
249    /// Shutdown the monitoring system
250    pub async fn shutdown(&mut self) -> Result<(), String> {
251        if !self.initialized {
252            return Err("Monitoring system is not initialized".to_string());
253        }
254
255        // Perform final health checks
256        self.perform_health_checks();
257
258        // Clear all data
259        self.clear_metrics();
260        self.clear_alert_history();
261
262        self.initialized = false;
263        Ok(())
264    }
265}
266
267impl Default for ReliabilityMonitor {
268    fn default() -> Self {
269        Self::new()
270    }
271}
272
273#[cfg(test)]
274mod integration_tests {
275    use super::*;
276
277    #[test]
278    fn test_reliability_monitor_creation() {
279        let monitor = ReliabilityMonitor::new();
280        assert!(!monitor.initialized);
281    }
282
283    #[test]
284    fn test_reliability_monitor_with_config() {
285        let config = MonitorConfig::new();
286        let monitor = ReliabilityMonitor::with_config(config);
287        assert!(!monitor.initialized);
288    }
289
290    #[tokio::test]
291    async fn test_reliability_monitor_initialization() {
292        let mut monitor = ReliabilityMonitor::new();
293        
294        // Should not be initialized initially
295        assert!(!monitor.initialized);
296        
297        // Initialize the monitor
298        let result = monitor.initialize().await;
299        assert!(result.is_ok());
300        assert!(monitor.initialized);
301        
302        // Try to initialize again (should fail)
303        let result = monitor.initialize().await;
304        assert!(result.is_err());
305    }
306
307    #[tokio::test]
308    async fn test_reliability_monitor_operations() {
309        let mut monitor = ReliabilityMonitor::new();
310        
311        // Operations should not work when not initialized
312        let metric = Metric::new("test_metric".to_string(), 42.0);
313        monitor.record_metric(metric);
314        
315        let metrics = monitor.get_metrics("test_metric", &TimeRange::last_seconds(3600));
316        assert!(metrics.is_empty());
317        
318        // Initialize the monitor
319        let result = monitor.initialize();
320        assert!(result.await.is_ok());
321        
322        // Now operations should work
323        let metric = Metric::new("test_metric".to_string(), 42.0);
324        monitor.record_metric(metric);
325        
326        let metrics = monitor.get_metrics("test_metric", &TimeRange::last_seconds(3600));
327        assert_eq!(metrics.len(), 1);
328        assert_eq!(metrics[0].value, 42.0);
329    }
330
331    #[tokio::test]
332    async fn test_reliability_monitor_alert_integration() {
333        let mut monitor = ReliabilityMonitor::new();
334        monitor.initialize().await.unwrap();
335        
336        // Add an alert rule
337        let condition = AlertCondition::new(ComparisonOperator::GreaterThan, 80.0, 60);
338        let rule = AlertRule::new(
339            "cpu_high".to_string(),
340            "High CPU Usage".to_string(),
341            "cpu_usage".to_string(),
342            condition,
343            AlertSeverity::High,
344        );
345        monitor.add_alert_rule(rule);
346        
347        // Record a metric that should trigger the alert
348        let metric = Metric::new("cpu_usage".to_string(), 85.0);
349        monitor.record_metric(metric);
350        
351        // Check that we have an active alert
352        let active_alerts = monitor.get_active_alerts();
353        assert_eq!(active_alerts.len(), 1);
354        assert_eq!(active_alerts[0].severity, AlertSeverity::High);
355    }
356
357    #[tokio::test]
358    async fn test_reliability_monitor_health_integration() {
359        let mut monitor = ReliabilityMonitor::new();
360        monitor.initialize().await.unwrap();
361        
362        // Add a health check
363        let health_check = HealthCheck::new(
364            "database".to_string(),
365            "Database Health".to_string(),
366            "Checks database connectivity".to_string(),
367            "check_database".to_string(),
368        );
369        monitor.add_health_check(health_check);
370        
371        // Perform health checks
372        let results = monitor.perform_health_checks();
373        assert_eq!(results.len(), 1);
374        assert_eq!(results[0].check_id, "database");
375        
376        // Get system status
377        let status = monitor.get_system_status();
378        assert!(status.uptime_seconds > 0);
379    }
380
381    #[tokio::test]
382    async fn test_reliability_monitor_statistics() {
383        let mut monitor = ReliabilityMonitor::new();
384        monitor.initialize().await.unwrap();
385        
386        // Record some metrics
387        monitor.record_metric(Metric::new("metric1".to_string(), 10.0));
388        monitor.record_metric(Metric::new("metric2".to_string(), 20.0));
389        
390        // Get statistics
391        let alert_stats = monitor.get_alert_stats();
392        let health_stats = monitor.get_health_stats();
393        
394        assert_eq!(alert_stats.total_rules, 0);
395        assert_eq!(health_stats.total_checks, 0);
396        
397        // Get metric names
398        let metric_names = monitor.get_metric_names();
399        assert_eq!(metric_names.len(), 2);
400        assert!(metric_names.contains(&"metric1".to_string()));
401        assert!(metric_names.contains(&"metric2".to_string()));
402    }
403
404    #[tokio::test]
405    async fn test_reliability_monitor_shutdown() {
406        let mut monitor = ReliabilityMonitor::new();
407        monitor.initialize().await.unwrap();
408        
409        assert!(monitor.initialized);
410        
411        // Shutdown the monitor
412        let result = monitor.shutdown().await;
413        assert!(result.is_ok());
414        assert!(!monitor.initialized);
415        
416        // Try to shutdown again (should fail)
417        let result = monitor.shutdown().await;
418        assert!(result.is_err());
419    }
420}