actr_runtime/
monitoring.rs

1//! monitoringandalert
2
3use crate::error::{RuntimeError, RuntimeResult};
4use chrono::{DateTime, Utc};
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use uuid::Uuid;
8
9/// Alert severity
10#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
11pub enum AlertSeverity {
12    /// info
13    Info = 1,
14    /// Warning
15    Warning = 2,
16    /// Error
17    Error = 3,
18    /// critical
19    Critical = 4,
20}
21
22impl AlertSeverity {
23    /// Getseverity description
24    pub fn description(&self) -> &'static str {
25        match self {
26            AlertSeverity::Info => "info",
27            AlertSeverity::Warning => "Warning",
28            AlertSeverity::Error => "Error",
29            AlertSeverity::Critical => "critical",
30        }
31    }
32}
33
34/// Alert information
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct Alert {
37    /// alert ID
38    pub id: Uuid,
39
40    /// alert title
41    pub title: String,
42
43    /// alert description
44    pub description: String,
45
46    /// severity
47    pub severity: AlertSeverity,
48
49    /// alert source
50    pub source: String,
51
52    /// occurrence time
53    pub timestamp: DateTime<Utc>,
54
55    /// whetheracknowledged
56    pub acknowledged: bool,
57
58    /// whetherresolved
59    pub resolved: bool,
60
61    /// tags
62    pub labels: HashMap<String, String>,
63
64    /// metric value
65    pub metric_value: Option<f64>,
66
67    /// threshold
68    pub threshold: Option<f64>,
69}
70
71impl Alert {
72    /// Createnew alert
73    pub fn new(
74        title: String,
75        description: String,
76        severity: AlertSeverity,
77        source: String,
78    ) -> Self {
79        Self {
80            id: Uuid::new_v4(),
81            title,
82            description,
83            severity,
84            source,
85            timestamp: Utc::now(),
86            acknowledged: false,
87            resolved: false,
88            labels: HashMap::new(),
89            metric_value: None,
90            threshold: None,
91        }
92    }
93
94    /// add tags
95    pub fn with_label(mut self, key: String, value: String) -> Self {
96        self.labels.insert(key, value);
97        self
98    }
99
100    /// Setmetric valueandthreshold
101    pub fn with_metric(mut self, value: f64, threshold: f64) -> Self {
102        self.metric_value = Some(value);
103        self.threshold = Some(threshold);
104        self
105    }
106
107    /// acknowledge alert
108    pub fn acknowledge(&mut self) {
109        self.acknowledged = true;
110    }
111
112    /// resolve alert
113    pub fn resolve(&mut self) {
114        self.resolved = true;
115    }
116}
117
118/// alertconfiguration
119#[derive(Debug, Clone)]
120pub struct AlertConfig {
121    /// whetherenable alerts
122    pub enabled: bool,
123
124    /// CPU usage ratealertthreshold
125    pub cpu_warning_threshold: f64,
126    pub cpu_critical_threshold: f64,
127
128    /// memoryusage ratealertthreshold
129    pub memory_warning_threshold: f64,
130    pub memory_critical_threshold: f64,
131
132    /// Errorrate alertthreshold
133    pub error_rate_warning_threshold: f64,
134    pub error_rate_critical_threshold: f64,
135
136    /// response respond temporal duration alertthreshold(milliseconds)
137    pub response_time_warning_threshold_ms: f64,
138    pub response_time_critical_threshold_ms: f64,
139}
140
141impl Default for AlertConfig {
142    fn default() -> Self {
143        Self {
144            enabled: true,
145            cpu_warning_threshold: 0.8,
146            cpu_critical_threshold: 0.95,
147            memory_warning_threshold: 0.8,
148            memory_critical_threshold: 0.95,
149            error_rate_warning_threshold: 0.05,
150            error_rate_critical_threshold: 0.1,
151            response_time_warning_threshold_ms: 1000.0,
152            response_time_critical_threshold_ms: 5000.0,
153        }
154    }
155}
156
157/// monitoringconfiguration
158#[derive(Debug, Clone)]
159pub struct MonitoringConfig {
160    /// whetherenable monitoring
161    pub enabled: bool,
162
163    /// monitoringinterval(seconds)
164    pub monitoring_interval_seconds: u64,
165
166    /// metrics keep retain temporal duration (seconds)
167    pub metrics_retention_seconds: u64,
168
169    /// alertconfiguration
170    pub alert_config: AlertConfig,
171}
172
173impl Default for MonitoringConfig {
174    fn default() -> Self {
175        Self {
176            enabled: true,
177            monitoring_interval_seconds: 30,
178            metrics_retention_seconds: 7 * 24 * 3600, // 7 days
179            alert_config: AlertConfig::default(),
180        }
181    }
182}
183
184/// Monitoring metrics
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct Metric {
187    /// metric name
188    pub name: String,
189
190    /// metric value
191    pub value: f64,
192
193    /// timestamp
194    pub timestamp: DateTime<Utc>,
195
196    /// tags
197    pub labels: HashMap<String, String>,
198
199    /// unit
200    pub unit: Option<String>,
201}
202
203/// Monitor interface
204pub trait Monitor: Send + Sync {
205    /// record metrics
206    fn record_metric(&mut self, metric: Metric) -> RuntimeResult<()>;
207
208    /// Getmetrics
209    fn get_metrics(&self, name: &str, duration_seconds: u64) -> RuntimeResult<Vec<Metric>>;
210
211    /// Checkalert conditions
212    fn check_alerts(&mut self) -> RuntimeResult<Vec<Alert>>;
213
214    /// Getactive alerts
215    fn get_active_alerts(&self) -> Vec<&Alert>;
216
217    /// acknowledge alert
218    fn acknowledge_alert(&mut self, alert_id: Uuid) -> RuntimeResult<()>;
219
220    /// resolve alert
221    fn resolve_alert(&mut self, alert_id: Uuid) -> RuntimeResult<()>;
222}
223
224/// Basic monitor implementation
225pub struct BasicMonitor {
226    config: MonitoringConfig,
227    metrics: Vec<Metric>,
228    alerts: Vec<Alert>,
229}
230
231impl BasicMonitor {
232    /// Create newmonitor
233    pub fn new(config: MonitoringConfig) -> Self {
234        Self {
235            config,
236            metrics: Vec::new(),
237            alerts: Vec::new(),
238        }
239    }
240
241    /// Check CPU usage ratealert
242    fn check_cpu_alerts(&mut self, cpu_usage: f64) -> RuntimeResult<Option<Alert>> {
243        if !self.config.alert_config.enabled {
244            return Ok(None);
245        }
246
247        if cpu_usage >= self.config.alert_config.cpu_critical_threshold {
248            let alert = Alert::new(
249                "CPU usage ratecritical".to_string(),
250                format!("CPU usage ratereachedto {:.1}%", cpu_usage * 100.0),
251                AlertSeverity::Critical,
252                "system".to_string(),
253            )
254            .with_metric(cpu_usage, self.config.alert_config.cpu_critical_threshold);
255
256            Ok(Some(alert))
257        } else if cpu_usage >= self.config.alert_config.cpu_warning_threshold {
258            let alert = Alert::new(
259                "CPU usage rateWarning".to_string(),
260                format!("CPU usage ratereachedto {:.1}%", cpu_usage * 100.0),
261                AlertSeverity::Warning,
262                "system".to_string(),
263            )
264            .with_metric(cpu_usage, self.config.alert_config.cpu_warning_threshold);
265
266            Ok(Some(alert))
267        } else {
268            Ok(None)
269        }
270    }
271}
272
273impl Monitor for BasicMonitor {
274    fn record_metric(&mut self, metric: Metric) -> RuntimeResult<()> {
275        if !self.config.enabled {
276            return Ok(());
277        }
278
279        self.metrics.push(metric);
280
281        // Clean up expired metrics
282        let cutoff =
283            Utc::now() - chrono::Duration::seconds(self.config.metrics_retention_seconds as i64);
284        self.metrics.retain(|m| m.timestamp > cutoff);
285
286        Ok(())
287    }
288
289    fn get_metrics(&self, name: &str, duration_seconds: u64) -> RuntimeResult<Vec<Metric>> {
290        let cutoff = Utc::now() - chrono::Duration::seconds(duration_seconds as i64);
291
292        let metrics: Vec<Metric> = self
293            .metrics
294            .iter()
295            .filter(|m| m.name == name && m.timestamp > cutoff)
296            .cloned()
297            .collect();
298
299        Ok(metrics)
300    }
301
302    fn check_alerts(&mut self) -> RuntimeResult<Vec<Alert>> {
303        if !self.config.alert_config.enabled {
304            return Ok(Vec::new());
305        }
306
307        let mut new_alerts = Vec::new();
308
309        // Check CPU usage rate
310        if let Ok(cpu_metrics) = self.get_metrics("cpu_usage", 300) {
311            if let Some(latest) = cpu_metrics.last() {
312                if let Some(alert) = self.check_cpu_alerts(latest.value)? {
313                    new_alerts.push(alert);
314                }
315            }
316        }
317
318        // Add new alerttolist
319        for alert in &new_alerts {
320            self.alerts.push(alert.clone());
321        }
322
323        Ok(new_alerts)
324    }
325
326    fn get_active_alerts(&self) -> Vec<&Alert> {
327        self.alerts.iter().filter(|alert| !alert.resolved).collect()
328    }
329
330    fn acknowledge_alert(&mut self, alert_id: Uuid) -> RuntimeResult<()> {
331        if let Some(alert) = self.alerts.iter_mut().find(|a| a.id == alert_id) {
332            alert.acknowledge();
333            Ok(())
334        } else {
335            Err(RuntimeError::Other(anyhow::anyhow!("Alert not found")))
336        }
337    }
338
339    fn resolve_alert(&mut self, alert_id: Uuid) -> RuntimeResult<()> {
340        if let Some(alert) = self.alerts.iter_mut().find(|a| a.id == alert_id) {
341            alert.resolve();
342            Ok(())
343        } else {
344            Err(RuntimeError::Other(anyhow::anyhow!("Alert not found")))
345        }
346    }
347}