ant_quic/monitoring/
mod.rs

1//! Production Monitoring and Diagnostics
2//!
3//! This module provides comprehensive observability for NAT traversal operations
4//! in production environments, including metrics collection, alerting, tracing,
5//! and diagnostic capabilities.
6
7use std::{
8    collections::HashMap,
9    sync::Arc,
10    time::{Duration, SystemTime},
11};
12
13use serde::{Deserialize, Serialize};
14use tokio::sync::RwLock;
15use tracing::{info, error};
16
17// Import from NAT traversal API for configuration types
18use crate::nat_traversal_api::NatTraversalConfig;
19
20pub mod metrics;
21pub mod alerting;
22pub mod distributed_tracing;
23pub mod diagnostics;
24pub mod health;
25pub mod export;
26pub mod dashboards;
27pub mod structured_logging;
28pub mod error_recovery;
29
30// Selective imports to avoid conflicts
31pub use metrics::{ProductionMetricsCollector, MetricsConfig};
32pub use alerting::{ProductionAlertManager, AlertingConfig};
33pub use distributed_tracing::{DistributedTraceCollector, TracingConfig};
34pub use diagnostics::{DiagnosticEngine, DiagnosticsConfig};
35pub use health::{HealthMonitor, HealthConfig};
36pub use export::{ExportManager, ExportConfig as MonitoringExportConfig};
37pub use dashboards::{DashboardManager, DashboardConfig};
38
39/// Central monitoring system for NAT traversal operations
40pub struct MonitoringSystem {
41    /// Metrics collection subsystem
42    metrics_collector: Arc<ProductionMetricsCollector>,
43    /// Alerting subsystem
44    alert_manager: Arc<ProductionAlertManager>,
45    /// Distributed tracing subsystem
46    trace_collector: Arc<DistributedTraceCollector>,
47    /// Health monitoring subsystem
48    health_monitor: Arc<HealthMonitor>,
49    /// Diagnostic engine
50    diagnostic_engine: Arc<DiagnosticEngine>,
51    /// Export manager
52    export_manager: Arc<ExportManager>,
53    /// Dashboard manager
54    dashboard_manager: Arc<DashboardManager>,
55    /// System configuration
56    config: MonitoringConfig,
57    /// System state
58    state: Arc<RwLock<MonitoringState>>,
59}
60
61impl MonitoringSystem {
62    /// Create new monitoring system
63    pub async fn new(config: MonitoringConfig) -> Result<Self, MonitoringError> {
64        info!("Initializing production monitoring system");
65        
66        // Initialize subsystems
67        let metrics_collector = Arc::new(
68            ProductionMetricsCollector::new(config.metrics.clone()).await?
69        );
70        
71        let alert_manager = Arc::new(
72            ProductionAlertManager::new(config.alerting.clone()).await?
73        );
74        
75        let trace_collector = Arc::new(
76            DistributedTraceCollector::new(config.tracing.clone()).await?
77        );
78        
79        let health_monitor = Arc::new(
80            HealthMonitor::new(config.health.clone()).await?
81        );
82        
83        let diagnostic_engine = Arc::new(
84            DiagnosticEngine::new(config.diagnostics.clone()).await?
85        );
86        
87        let export_manager = Arc::new(
88            ExportManager::new(config.export.clone()).await?
89        );
90        
91        let dashboard_manager = Arc::new(
92            DashboardManager::new(config.dashboards.clone()).await?
93        );
94        
95        let state = Arc::new(RwLock::new(MonitoringState::new()));
96        
97        Ok(Self {
98            metrics_collector,
99            alert_manager,
100            trace_collector,
101            health_monitor,
102            diagnostic_engine,
103            export_manager,
104            dashboard_manager,
105            config,
106            state,
107        })
108    }
109    
110    /// Start monitoring system
111    pub async fn start(&self) -> Result<(), MonitoringError> {
112        info!("Starting production monitoring system");
113        
114        // Update state
115        {
116            let mut state = self.state.write().await;
117            state.start_time = Some(SystemTime::now());
118            state.status = MonitoringStatus::Starting;
119        }
120        
121        // Start subsystems in dependency order
122        self.metrics_collector.start().await?;
123        self.trace_collector.start().await?;
124        self.health_monitor.start().await?;
125        self.diagnostic_engine.start().await?;
126        self.alert_manager.start().await?;
127        self.export_manager.start().await?;
128        self.dashboard_manager.start().await?;
129        
130        // Update state to running
131        {
132            let mut state = self.state.write().await;
133            state.status = MonitoringStatus::Running;
134        }
135        
136        info!("Production monitoring system started successfully");
137        Ok(())
138    }
139    
140    /// Stop monitoring system
141    pub async fn stop(&self) -> Result<(), MonitoringError> {
142        info!("Stopping production monitoring system");
143        
144        // Update state
145        {
146            let mut state = self.state.write().await;
147            state.status = MonitoringStatus::Stopping;
148        }
149        
150        // Stop subsystems in reverse order
151        self.dashboard_manager.stop().await?;
152        self.export_manager.stop().await?;
153        self.alert_manager.stop().await?;
154        self.diagnostic_engine.stop().await?;
155        self.health_monitor.stop().await?;
156        self.trace_collector.stop().await?;
157        self.metrics_collector.stop().await?;
158        
159        // Update state to stopped
160        {
161            let mut state = self.state.write().await;
162            state.status = MonitoringStatus::Stopped;
163            state.stop_time = Some(SystemTime::now());
164        }
165        
166        info!("Production monitoring system stopped");
167        Ok(())
168    }
169    
170    /// Record NAT traversal attempt
171    pub async fn record_nat_attempt(&self, attempt: NatTraversalAttempt) -> Result<(), MonitoringError> {
172        // Record metrics
173        self.metrics_collector.record_nat_attempt(&attempt).await?;
174        
175        // Start trace if configured
176        if self.config.tracing.enabled {
177            self.trace_collector.start_nat_trace(&attempt).await?;
178        }
179        
180        // Check for alerts
181        self.alert_manager.evaluate_nat_attempt(&attempt).await?;
182        
183        Ok(())
184    }
185    
186    /// Record NAT traversal result
187    pub async fn record_nat_result(&self, result: NatTraversalResult) -> Result<(), MonitoringError> {
188        // Record metrics
189        self.metrics_collector.record_nat_result(&result).await?;
190        
191        // Complete trace
192        if self.config.tracing.enabled {
193            self.trace_collector.complete_nat_trace(&result).await?;
194        }
195        
196        // Update health status
197        self.health_monitor.update_nat_health(&result).await?;
198        
199        // Check for alerts
200        self.alert_manager.evaluate_nat_result(&result).await?;
201        
202        // Trigger diagnostics if needed
203        if !result.success {
204            self.diagnostic_engine.analyze_failure(&result).await?;
205        }
206        
207        Ok(())
208    }
209    
210    /// Get system status
211    pub async fn get_status(&self) -> MonitoringSystemStatus {
212        let state = self.state.read().await;
213        let uptime = state.start_time.map(|start| start.elapsed().unwrap_or_default());
214        
215        MonitoringSystemStatus {
216            status: state.status.clone(),
217            uptime,
218            subsystems: SubsystemStatus {
219                metrics: self.metrics_collector.get_status().await,
220                alerting: self.alert_manager.get_status().await,
221                tracing: self.trace_collector.get_status().await,
222                health: self.health_monitor.get_status().await,
223                diagnostics: self.diagnostic_engine.get_status().await,
224                export: self.export_manager.get_status().await,
225                dashboards: self.dashboard_manager.get_status().await,
226            },
227            metrics_summary: self.get_metrics_summary().await,
228        }
229    }
230    
231    /// Get metrics summary
232    async fn get_metrics_summary(&self) -> MetricsSummary {
233        self.metrics_collector.get_summary().await
234    }
235    
236    /// Trigger manual diagnostic
237    pub async fn trigger_diagnostic(&self, diagnostic_type: DiagnosticType) -> Result<DiagnosticReport, MonitoringError> {
238        self.diagnostic_engine.run_diagnostic(diagnostic_type).await
239    }
240    
241    /// Get health check
242    pub async fn health_check(&self) -> HealthCheckResult {
243        self.health_monitor.comprehensive_health_check().await
244    }
245}
246
247/// Health check result
248#[derive(Debug, Serialize, Deserialize)]
249pub struct HealthCheckResult {
250    /// Overall health status
251    pub status: HealthStatus,
252    /// Individual component health
253    pub components: HashMap<String, ComponentHealth>,
254    /// Health check timestamp
255    pub timestamp: SystemTime,
256    /// Overall score (0-100)
257    pub score: u8,
258}
259
260/// Health status enumeration
261#[derive(Debug, Clone, Serialize, Deserialize)]
262pub enum HealthStatus {
263    Healthy,
264    Degraded,
265    Unhealthy,
266    Unknown,
267}
268
269/// Component health information
270#[derive(Debug, Serialize, Deserialize)]
271pub struct ComponentHealth {
272    /// Component status
273    pub status: HealthStatus,
274    /// Status message
275    pub message: String,
276    /// Response time
277    pub response_time_ms: u64,
278    /// Error count
279    pub error_count: u64,
280}
281
282/// Diagnostic type enumeration
283#[derive(Debug, Clone, Serialize, Deserialize)]
284pub enum DiagnosticType {
285    NatTraversalFailure,
286    ConnectionPerformance,
287    NetworkConnectivity,
288    SystemHealth,
289    SecurityAudit,
290}
291
292/// Diagnostic report
293#[derive(Debug, Serialize, Deserialize)]
294pub struct DiagnosticReport {
295    /// Report identifier
296    pub id: String,
297    /// Diagnostic type
298    pub diagnostic_type: DiagnosticType,
299    /// Report timestamp
300    pub timestamp: SystemTime,
301    /// Overall severity
302    pub severity: DiagnosticSeverity,
303    /// Findings
304    pub findings: Vec<DiagnosticFinding>,
305    /// Recommended actions
306    pub recommendations: Vec<DiagnosticRecommendation>,
307    /// Report metadata
308    pub metadata: HashMap<String, String>,
309}
310
311/// Diagnostic severity levels
312#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
313pub enum DiagnosticSeverity {
314    Info,
315    Warning,
316    Error,
317    Critical,
318}
319
320/// Individual diagnostic finding
321#[derive(Debug, Serialize, Deserialize)]
322pub struct DiagnosticFinding {
323    /// Finding identifier
324    pub id: String,
325    /// Finding title
326    pub title: String,
327    /// Finding description
328    pub description: String,
329    /// Severity level
330    pub severity: DiagnosticSeverity,
331    /// Evidence
332    pub evidence: Vec<String>,
333    /// Confidence score (0-100)
334    pub confidence: u8,
335}
336
337/// Diagnostic recommendation
338#[derive(Debug, Serialize, Deserialize)]
339pub struct DiagnosticRecommendation {
340    /// Recommendation identifier
341    pub id: String,
342    /// Action title
343    pub title: String,
344    /// Action description
345    pub description: String,
346    /// Priority level
347    pub priority: RecommendationPriority,
348    /// Implementation steps
349    pub steps: Vec<String>,
350    /// Expected impact
351    pub impact: String,
352}
353
354/// Recommendation priority levels
355#[derive(Debug, Clone, Serialize, Deserialize)]
356pub enum RecommendationPriority {
357    Low,
358    Medium,
359    High,
360    Critical,
361}
362
363/// Monitoring system configuration
364#[derive(Debug, Clone, Serialize, Deserialize)]
365pub struct MonitoringConfig {
366    /// Metrics collection configuration
367    pub metrics: MetricsConfig,
368    /// Alerting configuration
369    pub alerting: AlertingConfig,
370    /// Tracing configuration
371    pub tracing: TracingConfig,
372    /// Health monitoring configuration
373    pub health: HealthConfig,
374    /// Diagnostics configuration
375    pub diagnostics: DiagnosticsConfig,
376    /// Export configuration
377    pub export: MonitoringExportConfig,
378    /// Dashboard configuration
379    pub dashboards: DashboardConfig,
380    /// Global settings
381    pub global: GlobalConfig,
382}
383
384impl Default for MonitoringConfig {
385    fn default() -> Self {
386        Self {
387            metrics: MetricsConfig::default(),
388            alerting: AlertingConfig::default(),
389            tracing: TracingConfig::default(),
390            health: HealthConfig::default(),
391            diagnostics: DiagnosticsConfig::default(),
392            export: MonitoringExportConfig::default(),
393            dashboards: DashboardConfig::default(),
394            global: GlobalConfig::default(),
395        }
396    }
397}
398
399/// Global monitoring configuration
400#[derive(Debug, Clone, Serialize, Deserialize)]
401pub struct GlobalConfig {
402    /// Service name for identification
403    pub service_name: String,
404    /// Service version
405    pub service_version: String,
406    /// Environment (dev, staging, prod)
407    pub environment: String,
408    /// Region identifier
409    pub region: String,
410    /// Instance identifier
411    pub instance_id: String,
412    /// Maximum overhead budget (percentage)
413    pub max_overhead_percent: f32,
414    /// Security settings
415    pub security: SecurityConfig,
416}
417
418impl Default for GlobalConfig {
419    fn default() -> Self {
420        Self {
421            service_name: "ant-quic".to_string(),
422            service_version: "0.2.1".to_string(),
423            environment: "dev".to_string(),
424            region: "local".to_string(),
425            instance_id: "default".to_string(),
426            max_overhead_percent: 1.0,
427            security: SecurityConfig::default(),
428        }
429    }
430}
431
432/// Security configuration for monitoring
433#[derive(Debug, Clone, Serialize, Deserialize)]
434pub struct SecurityConfig {
435    /// Enable data anonymization
436    pub anonymize_data: bool,
437    /// Enable audit logging
438    pub audit_logging: bool,
439    /// Access control settings
440    pub access_control: AccessControlConfig,
441    /// Encryption settings
442    pub encryption: EncryptionConfig,
443}
444
445impl Default for SecurityConfig {
446    fn default() -> Self {
447        Self {
448            anonymize_data: true,
449            audit_logging: true,
450            access_control: AccessControlConfig::default(),
451            encryption: EncryptionConfig::default(),
452        }
453    }
454}
455
456/// Access control configuration
457#[derive(Debug, Clone, Serialize, Deserialize)]
458pub struct AccessControlConfig {
459    /// Enable authentication
460    pub enabled: bool,
461    /// Authentication provider
462    pub provider: AuthProvider,
463    /// Required permissions
464    pub required_permissions: Vec<String>,
465}
466
467impl Default for AccessControlConfig {
468    fn default() -> Self {
469        Self {
470            enabled: true,
471            provider: AuthProvider::OAuth2,
472            required_permissions: vec!["monitoring:read".to_string()],
473        }
474    }
475}
476
477/// Authentication providers
478#[derive(Debug, Clone, Serialize, Deserialize)]
479pub enum AuthProvider {
480    OAuth2,
481    OIDC,
482    ApiKey,
483    Certificate,
484}
485
486/// Encryption configuration
487#[derive(Debug, Clone, Serialize, Deserialize)]
488pub struct EncryptionConfig {
489    /// Enable TLS for metrics export
490    pub tls_enabled: bool,
491    /// Encrypt data at rest
492    pub at_rest_encryption: bool,
493    /// Encryption key rotation period
494    pub key_rotation_period: Duration,
495}
496
497impl Default for EncryptionConfig {
498    fn default() -> Self {
499        Self {
500            tls_enabled: true,
501            at_rest_encryption: true,
502            key_rotation_period: Duration::from_secs(86400 * 30), // 30 days
503        }
504    }
505}
506
507/// NAT traversal attempt information
508#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct NatTraversalAttempt {
510    /// Unique attempt identifier
511    pub attempt_id: String,
512    /// Timestamp of attempt start
513    pub timestamp: SystemTime,
514    /// Client endpoint information
515    pub client_info: EndpointInfo,
516    /// Server endpoint information
517    pub server_info: EndpointInfo,
518    /// NAT traversal configuration
519    pub nat_config: NatTraversalConfig,
520    /// Bootstrap nodes involved
521    pub bootstrap_nodes: Vec<String>,
522    /// Network conditions
523    pub network_conditions: NetworkConditions,
524}
525
526/// NAT traversal result information
527#[derive(Debug, Clone, Serialize, Deserialize)]
528pub struct NatTraversalResult {
529    /// Attempt identifier
530    pub attempt_id: String,
531    /// Whether traversal was successful
532    pub success: bool,
533    /// Duration of traversal attempt
534    pub duration: Duration,
535    /// Final connection information
536    pub connection_info: Option<ConnectionInfo>,
537    /// Error information if failed
538    pub error_info: Option<ErrorInfo>,
539    /// Performance metrics
540    pub performance_metrics: PerformanceMetrics,
541    /// Candidate information
542    pub candidates_used: Vec<CandidateInfo>,
543}
544
545/// Endpoint information
546#[derive(Debug, Clone, Serialize, Deserialize)]
547pub struct EndpointInfo {
548    /// Endpoint identifier
549    pub id: String,
550    /// Endpoint role
551    pub role: EndpointRole,
552    /// Network address (anonymized)
553    pub address_hash: String,
554    /// NAT type detected
555    pub nat_type: Option<NatType>,
556    /// Geographic region
557    pub region: Option<String>,
558}
559
560/// Connection information
561#[derive(Debug, Clone, Serialize, Deserialize)]
562pub struct ConnectionInfo {
563    /// Established connection path
564    pub path: ConnectionPath,
565    /// Connection quality metrics
566    pub quality: ConnectionQuality,
567    /// Protocol information
568    pub protocol_info: ProtocolInfo,
569}
570
571/// Error information
572#[derive(Debug, Clone, Serialize, Deserialize)]
573pub struct ErrorInfo {
574    /// Error code
575    pub error_code: String,
576    /// Error category
577    pub error_category: ErrorCategory,
578    /// Human-readable error message
579    pub error_message: String,
580    /// Detailed error context
581    pub error_context: HashMap<String, String>,
582    /// Recovery suggestions
583    pub recovery_suggestions: Vec<String>,
584}
585
586/// Performance metrics
587#[derive(Debug, Clone, Serialize, Deserialize)]
588pub struct PerformanceMetrics {
589    /// Total connection establishment time
590    pub connection_time_ms: u64,
591    /// Time to first candidate
592    pub first_candidate_time_ms: u64,
593    /// Time to successful connection
594    pub success_time_ms: Option<u64>,
595    /// Number of candidates tried
596    pub candidates_tried: u32,
597    /// Number of round trips required
598    pub round_trips: u32,
599    /// Bytes transferred during setup
600    pub setup_bytes: u64,
601}
602
603/// Network conditions during attempt
604#[derive(Debug, Clone, Serialize, Deserialize)]
605pub struct NetworkConditions {
606    /// Estimated round-trip time
607    pub rtt_ms: Option<u32>,
608    /// Packet loss rate
609    pub packet_loss_rate: Option<f32>,
610    /// Available bandwidth
611    pub bandwidth_mbps: Option<u32>,
612    /// Network congestion indicator
613    pub congestion_level: CongestionLevel,
614}
615
616/// Connection path information
617#[derive(Debug, Clone, Serialize, Deserialize)]
618pub struct ConnectionPath {
619    /// Path type (direct, relayed, etc.)
620    pub path_type: PathType,
621    /// Intermediate hops
622    pub hops: Vec<HopInfo>,
623    /// Path quality score
624    pub quality_score: f32,
625}
626
627/// Connection quality metrics
628#[derive(Debug, Clone, Serialize, Deserialize)]
629pub struct ConnectionQuality {
630    /// Latency in milliseconds
631    pub latency_ms: u32,
632    /// Jitter in milliseconds
633    pub jitter_ms: u32,
634    /// Throughput in Mbps
635    pub throughput_mbps: f32,
636    /// Stability score (0-1)
637    pub stability_score: f32,
638}
639
640/// Protocol information
641#[derive(Debug, Clone, Serialize, Deserialize)]
642pub struct ProtocolInfo {
643    /// QUIC version used
644    pub quic_version: String,
645    /// Encryption cipher
646    pub cipher: String,
647    /// Extensions used
648    pub extensions: Vec<String>,
649}
650
651/// Candidate information
652#[derive(Debug, Clone, Serialize, Deserialize)]
653pub struct CandidateInfo {
654    /// Candidate type
655    pub candidate_type: CandidateType,
656    /// Priority assigned
657    pub priority: u32,
658    /// Whether candidate was successful
659    pub success: bool,
660    /// Time to test candidate
661    pub test_time_ms: u64,
662}
663
664/// Hop information for connection path
665#[derive(Debug, Clone, Serialize, Deserialize)]
666pub struct HopInfo {
667    /// Hop identifier (anonymized)
668    pub hop_id: String,
669    /// Hop type
670    pub hop_type: HopType,
671    /// Latency to this hop
672    pub latency_ms: u32,
673}
674
675/// Endpoint roles
676#[derive(Debug, Clone, Serialize, Deserialize)]
677pub enum EndpointRole {
678    Client,
679    Server,
680    Bootstrap,
681    Relay,
682}
683
684/// NAT types for monitoring
685#[derive(Debug, Clone, Serialize, Deserialize)]
686pub enum NatType {
687    FullCone,
688    RestrictedCone,
689    PortRestrictedCone,
690    Symmetric,
691    CarrierGrade,
692    DoubleNat,
693    None,
694}
695
696/// Error categories
697#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
698pub enum ErrorCategory {
699    NetworkConnectivity,
700    NatTraversal,
701    Authentication,
702    Protocol,
703    Timeout,
704    ResourceExhaustion,
705    Configuration,
706    Unknown,
707}
708
709/// Congestion levels
710#[derive(Debug, Clone, Serialize, Deserialize)]
711pub enum CongestionLevel {
712    Low,
713    Medium,
714    High,
715    Critical,
716}
717
718/// Path types
719#[derive(Debug, Clone, Serialize, Deserialize)]
720pub enum PathType {
721    Direct,
722    NatTraversed,
723    Relayed,
724    TurnRelayed,
725}
726
727/// Candidate types
728#[derive(Debug, Clone, Serialize, Deserialize)]
729pub enum CandidateType {
730    Host,
731    ServerReflexive,
732    PeerReflexive,
733    Relay,
734}
735
736/// Hop types
737#[derive(Debug, Clone, Serialize, Deserialize)]
738pub enum HopType {
739    Router,
740    Nat,
741    Firewall,
742    Proxy,
743    Relay,
744}
745
746/// Monitoring system state
747#[derive(Debug)]
748struct MonitoringState {
749    /// Current status
750    status: MonitoringStatus,
751    /// Start time
752    start_time: Option<SystemTime>,
753    /// Stop time
754    stop_time: Option<SystemTime>,
755    /// Error count
756    error_count: u64,
757    /// Last error
758    last_error: Option<String>,
759}
760
761impl MonitoringState {
762    fn new() -> Self {
763        Self {
764            status: MonitoringStatus::Stopped,
765            start_time: None,
766            stop_time: None,
767            error_count: 0,
768            last_error: None,
769        }
770    }
771}
772
773/// Monitoring system status
774#[derive(Debug, Clone, Serialize, Deserialize)]
775pub enum MonitoringStatus {
776    Stopped,
777    Starting,
778    Running,
779    Stopping,
780    Error,
781}
782
783/// Overall system status
784#[derive(Debug, Serialize, Deserialize)]
785pub struct MonitoringSystemStatus {
786    /// Current status
787    pub status: MonitoringStatus,
788    /// System uptime
789    pub uptime: Option<Duration>,
790    /// Subsystem statuses
791    pub subsystems: SubsystemStatus,
792    /// Metrics summary
793    pub metrics_summary: MetricsSummary,
794}
795
796/// Subsystem statuses
797#[derive(Debug, Serialize, Deserialize)]
798pub struct SubsystemStatus {
799    /// Metrics subsystem status
800    pub metrics: String,
801    /// Alerting subsystem status  
802    pub alerting: String,
803    /// Tracing subsystem status
804    pub tracing: String,
805    /// Health subsystem status
806    pub health: String,
807    /// Diagnostics subsystem status
808    pub diagnostics: String,
809    /// Export subsystem status
810    pub export: String,
811    /// Dashboard subsystem status
812    pub dashboards: String,
813}
814
815/// Metrics summary
816#[derive(Debug, Serialize, Deserialize)]
817pub struct MetricsSummary {
818    /// Total NAT attempts in last hour
819    pub nat_attempts_last_hour: u64,
820    /// Success rate in last hour
821    pub success_rate_last_hour: f32,
822    /// Average connection time in last hour
823    pub avg_connection_time_ms: u64,
824    /// Active connections
825    pub active_connections: u64,
826    /// Error rate in last hour
827    pub error_rate_last_hour: f32,
828}
829
830/// Monitoring errors
831#[derive(Debug, Clone, thiserror::Error)]
832pub enum MonitoringError {
833    #[error("Metrics collection error: {0}")]
834    MetricsError(String),
835    
836    #[error("Alerting error: {0}")]
837    AlertingError(String),
838    
839    #[error("Tracing error: {0}")]
840    TracingError(String),
841    
842    #[error("Health monitoring error: {0}")]
843    HealthError(String),
844    
845    #[error("Diagnostics error: {0}")]
846    DiagnosticsError(String),
847    
848    #[error("Export error: {0}")]
849    ExportError(String),
850    
851    #[error("Configuration error: {0}")]
852    ConfigError(String),
853    
854    #[error("System error: {0}")]
855    SystemError(String),
856}
857
858#[cfg(test)]
859mod tests {
860    use super::*;
861
862    #[tokio::test]
863    async fn test_monitoring_system_creation() {
864        let config = MonitoringConfig::default();
865        let monitoring = MonitoringSystem::new(config).await.unwrap();
866        
867        let status = monitoring.get_status().await;
868        assert!(matches!(status.status, MonitoringStatus::Stopped));
869    }
870    
871    #[test]
872    fn test_config_serialization() {
873        let config = MonitoringConfig::default();
874        let json = serde_json::to_string_pretty(&config).unwrap();
875        let _deserialized: MonitoringConfig = serde_json::from_str(&json).unwrap();
876    }
877}