spawn_access_control/
monitoring.rs1use crate::ml_metrics::ModelMetrics;
2use crate::model_explainer::SecurityImpactAnalysis;
3use chrono::{DateTime, Utc, Duration};
4use serde::Serialize;
5use std::collections::VecDeque;
6use tokio::sync::RwLock;
7use std::sync::Arc;
8
9#[derive(Debug, Serialize)]
10pub struct ModelHealth {
11 pub current_status: HealthStatus,
12 pub performance_trend: PerformanceTrend,
13 pub alerts: Vec<HealthAlert>,
14 pub last_update: DateTime<Utc>,
15}
16
17#[derive(Debug, Serialize, PartialEq)]
18pub enum HealthStatus {
19 Healthy,
20 Degraded,
21 Critical,
22 Unknown,
23}
24
25#[derive(Debug, Serialize)]
26pub enum PerformanceTrend {
27 Improving,
28 Stable,
29 Degrading,
30}
31
32#[derive(Debug, Serialize, Clone)]
33pub struct HealthAlert {
34 pub severity: AlertSeverity,
35 pub message: String,
36 pub timestamp: DateTime<Utc>,
37 pub metric_name: String,
38 pub threshold: f64,
39 pub current_value: f64,
40}
41
42#[derive(Debug, Clone, Serialize)]
43pub enum AlertSeverity {
44 Critical,
45 Warning,
46 Info,
47}
48
49pub struct ModelMonitor {
50 metrics_history: Arc<RwLock<VecDeque<ModelMetrics>>>,
51 security_history: Arc<RwLock<VecDeque<SecurityImpactAnalysis>>>,
52 config: MonitoringConfig,
53 alerts: Arc<RwLock<Vec<HealthAlert>>>,
54}
55
56#[derive(Clone)]
57pub struct MonitoringConfig {
58 pub metrics_window_size: usize,
59 pub performance_threshold: f64,
60 pub security_threshold: f64,
61 pub alert_cooldown: Duration,
62}
63
64impl ModelMonitor {
65 pub fn new(config: MonitoringConfig) -> Self {
66 Self {
67 metrics_history: Arc::new(RwLock::new(VecDeque::new())),
68 security_history: Arc::new(RwLock::new(VecDeque::new())),
69 config,
70 alerts: Arc::new(RwLock::new(Vec::new())),
71 }
72 }
73
74 pub async fn update_metrics(&self, metrics: ModelMetrics) {
75 let mut history = self.metrics_history.write().await;
76 if history.len() >= self.config.metrics_window_size {
77 history.pop_front();
78 }
79 history.push_back(metrics.clone());
80
81 self.check_performance_alerts(&metrics).await;
82 }
83
84 pub async fn update_security_analysis(&self, analysis: SecurityImpactAnalysis) {
85 let mut history = self.security_history.write().await;
86 if history.len() >= self.config.metrics_window_size {
87 history.pop_front();
88 }
89 history.push_back(analysis.clone());
90
91 self.check_security_alerts(&analysis).await;
92 }
93
94 pub async fn get_model_health(&self) -> ModelHealth {
95 let metrics = self.metrics_history.read().await;
96 let security = self.security_history.read().await;
97 let alerts = self.alerts.read().await;
98
99 let status = self.calculate_health_status(&metrics, &security).await;
100 let trend = self.calculate_performance_trend(&metrics).await;
101
102 ModelHealth {
103 current_status: status,
104 performance_trend: trend,
105 alerts: alerts.clone(),
106 last_update: Utc::now(),
107 }
108 }
109
110 async fn check_performance_alerts(&self, metrics: &ModelMetrics) {
111 let mut alerts = self.alerts.write().await;
112
113 if metrics.f1_score < self.config.performance_threshold {
115 alerts.push(HealthAlert {
116 severity: AlertSeverity::Warning,
117 message: format!("Low F1 score: {:.2}", metrics.f1_score),
118 timestamp: Utc::now(),
119 metric_name: "f1_score".to_string(),
120 threshold: self.config.performance_threshold,
121 current_value: metrics.f1_score,
122 });
123 }
124
125 let pr_diff = (metrics.precision - metrics.recall).abs();
127 if pr_diff > 0.2 {
128 alerts.push(HealthAlert {
129 severity: AlertSeverity::Warning,
130 message: "Significant precision-recall imbalance detected".to_string(),
131 timestamp: Utc::now(),
132 metric_name: "pr_balance".to_string(),
133 threshold: 0.2,
134 current_value: pr_diff,
135 });
136 }
137 }
138
139 async fn check_security_alerts(&self, analysis: &SecurityImpactAnalysis) {
140 let mut alerts = self.alerts.write().await;
141
142 if analysis.false_positive_impact > self.config.security_threshold {
144 alerts.push(HealthAlert {
145 severity: AlertSeverity::Critical,
146 message: "High false positive impact detected".to_string(),
147 timestamp: Utc::now(),
148 metric_name: "false_positive_impact".to_string(),
149 threshold: self.config.security_threshold,
150 current_value: analysis.false_positive_impact,
151 });
152 }
153
154 for factor in &analysis.risk_factors {
156 if factor.impact_score > 0.8 {
157 alerts.push(HealthAlert {
158 severity: AlertSeverity::Critical,
159 message: format!("Critical risk factor: {}", factor.name),
160 timestamp: Utc::now(),
161 metric_name: "risk_factor".to_string(),
162 threshold: 0.8,
163 current_value: factor.impact_score,
164 });
165 }
166 }
167 }
168
169 async fn calculate_health_status(
170 &self,
171 metrics: &VecDeque<ModelMetrics>,
172 security: &VecDeque<SecurityImpactAnalysis>
173 ) -> HealthStatus {
174 if metrics.is_empty() || security.is_empty() {
175 return HealthStatus::Unknown;
176 }
177
178 let latest_metrics = metrics.back().unwrap();
179 let latest_security = security.back().unwrap();
180
181 if latest_metrics.f1_score < 0.6 || latest_security.false_positive_impact > 0.4 {
182 HealthStatus::Critical
183 } else if latest_metrics.f1_score < 0.8 || latest_security.false_positive_impact > 0.2 {
184 HealthStatus::Degraded
185 } else {
186 HealthStatus::Healthy
187 }
188 }
189
190 async fn calculate_performance_trend(&self, metrics: &VecDeque<ModelMetrics>) -> PerformanceTrend {
191 if metrics.len() < 2 {
192 return PerformanceTrend::Stable;
193 }
194
195 let recent_scores: Vec<f64> = metrics.iter()
196 .rev()
197 .take(5)
198 .map(|m| m.f1_score)
199 .collect();
200
201 let trend = recent_scores.windows(2)
202 .map(|w| w[0] - w[1])
203 .sum::<f64>();
204
205 match trend {
206 t if t > 0.05 => PerformanceTrend::Improving,
207 t if t < -0.05 => PerformanceTrend::Degrading,
208 _ => PerformanceTrend::Stable,
209 }
210 }
211}