1use crate::analytics::MetricsBucket;
4use chrono::{DateTime, Utc};
5use parking_lot::RwLock;
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::sync::Arc;
9use tracing::{debug, info, warn};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13pub enum AlertSeverity {
14 Info,
15 Warning,
16 Critical,
17}
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
21#[serde(tag = "type")]
22pub enum AlertType {
23 HighEventRate {
25 events_per_minute: usize,
26 threshold: usize,
27 },
28 HighLatency {
30 avg_latency_ms: f64,
31 threshold_ms: u64,
32 },
33 HighFaultRate {
35 faults_per_minute: usize,
36 threshold: usize,
37 },
38 RateLimitViolations {
40 violations_per_minute: usize,
41 threshold: usize,
42 },
43 EndpointStress {
45 endpoint: String,
46 events_per_minute: usize,
47 threshold: usize,
48 },
49 HighImpact { severity_score: f64, threshold: f64 },
51 Custom {
53 message: String,
54 metadata: HashMap<String, String>,
55 },
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct Alert {
61 pub id: String,
63 pub timestamp: DateTime<Utc>,
65 pub severity: AlertSeverity,
67 pub alert_type: AlertType,
69 pub message: String,
71 pub resolved: bool,
73 pub resolved_at: Option<DateTime<Utc>>,
75}
76
77impl Alert {
78 pub fn new(severity: AlertSeverity, alert_type: AlertType, message: impl Into<String>) -> Self {
80 Self {
81 id: uuid::Uuid::new_v4().to_string(),
82 timestamp: Utc::now(),
83 severity,
84 alert_type,
85 message: message.into(),
86 resolved: false,
87 resolved_at: None,
88 }
89 }
90
91 pub fn resolve(&mut self) {
93 self.resolved = true;
94 self.resolved_at = Some(Utc::now());
95 }
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct AlertRule {
101 pub id: String,
103 pub name: String,
105 pub enabled: bool,
107 pub severity: AlertSeverity,
109 pub rule_type: AlertRuleType,
111}
112
113#[derive(Debug, Clone, Serialize, Deserialize)]
115#[serde(tag = "type")]
116pub enum AlertRuleType {
117 EventRateThreshold {
119 threshold: usize,
120 window_minutes: i64,
121 },
122 LatencyThreshold {
124 threshold_ms: u64,
125 window_minutes: i64,
126 },
127 FaultRateThreshold {
129 threshold: usize,
130 window_minutes: i64,
131 },
132 RateLimitThreshold {
134 threshold: usize,
135 window_minutes: i64,
136 },
137 EndpointThreshold {
139 endpoint: String,
140 threshold: usize,
141 window_minutes: i64,
142 },
143 ImpactThreshold { threshold: f64, window_minutes: i64 },
145}
146
147impl AlertRule {
148 pub fn new(
150 id: impl Into<String>,
151 name: impl Into<String>,
152 severity: AlertSeverity,
153 rule_type: AlertRuleType,
154 ) -> Self {
155 Self {
156 id: id.into(),
157 name: name.into(),
158 enabled: true,
159 severity,
160 rule_type,
161 }
162 }
163
164 pub fn evaluate(&self, metrics: &[MetricsBucket]) -> Option<Alert> {
166 if !self.enabled || metrics.is_empty() {
167 return None;
168 }
169
170 match &self.rule_type {
171 AlertRuleType::EventRateThreshold { threshold, .. } => {
172 let total_events: usize = metrics.iter().map(|m| m.total_events).sum();
173 let events_per_minute = total_events / metrics.len().max(1);
174
175 if events_per_minute > *threshold {
176 Some(Alert::new(
177 self.severity,
178 AlertType::HighEventRate {
179 events_per_minute,
180 threshold: *threshold,
181 },
182 format!(
183 "High chaos event rate detected: {} events/min (threshold: {})",
184 events_per_minute, threshold
185 ),
186 ))
187 } else {
188 None
189 }
190 }
191 AlertRuleType::LatencyThreshold { threshold_ms, .. } => {
192 let avg_latency: f64 =
193 metrics.iter().map(|m| m.avg_latency_ms).sum::<f64>() / metrics.len() as f64;
194
195 if avg_latency > *threshold_ms as f64 {
196 Some(Alert::new(
197 self.severity,
198 AlertType::HighLatency {
199 avg_latency_ms: avg_latency,
200 threshold_ms: *threshold_ms,
201 },
202 format!(
203 "High latency detected: {:.0}ms (threshold: {}ms)",
204 avg_latency, threshold_ms
205 ),
206 ))
207 } else {
208 None
209 }
210 }
211 AlertRuleType::FaultRateThreshold { threshold, .. } => {
212 let total_faults: usize = metrics.iter().map(|m| m.total_faults).sum();
213 let faults_per_minute = total_faults / metrics.len().max(1);
214
215 if faults_per_minute > *threshold {
216 Some(Alert::new(
217 self.severity,
218 AlertType::HighFaultRate {
219 faults_per_minute,
220 threshold: *threshold,
221 },
222 format!(
223 "High fault injection rate detected: {} faults/min (threshold: {})",
224 faults_per_minute, threshold
225 ),
226 ))
227 } else {
228 None
229 }
230 }
231 AlertRuleType::RateLimitThreshold { threshold, .. } => {
232 let total_violations: usize = metrics.iter().map(|m| m.rate_limit_violations).sum();
233 let violations_per_minute = total_violations / metrics.len().max(1);
234
235 if violations_per_minute > *threshold {
236 Some(Alert::new(
237 self.severity,
238 AlertType::RateLimitViolations {
239 violations_per_minute,
240 threshold: *threshold,
241 },
242 format!(
243 "High rate limit violations: {} violations/min (threshold: {})",
244 violations_per_minute, threshold
245 ),
246 ))
247 } else {
248 None
249 }
250 }
251 AlertRuleType::EndpointThreshold {
252 endpoint,
253 threshold,
254 ..
255 } => {
256 let endpoint_events: usize = metrics
257 .iter()
258 .map(|m| m.affected_endpoints.get(endpoint).copied().unwrap_or(0))
259 .sum();
260 let events_per_minute = endpoint_events / metrics.len().max(1);
261
262 if events_per_minute > *threshold {
263 Some(Alert::new(
264 self.severity,
265 AlertType::EndpointStress {
266 endpoint: endpoint.clone(),
267 events_per_minute,
268 threshold: *threshold,
269 },
270 format!(
271 "Endpoint '{}' under chaos stress: {} events/min (threshold: {})",
272 endpoint, events_per_minute, threshold
273 ),
274 ))
275 } else {
276 None
277 }
278 }
279 AlertRuleType::ImpactThreshold { .. } => {
280 None
283 }
284 }
285 }
286}
287
288pub trait AlertHandler: Send + Sync {
290 fn handle(&self, alert: &Alert);
292}
293
294pub struct ConsoleAlertHandler;
296
297impl AlertHandler for ConsoleAlertHandler {
298 fn handle(&self, alert: &Alert) {
299 match alert.severity {
300 AlertSeverity::Info => info!("[ALERT] {}: {}", alert.id, alert.message),
301 AlertSeverity::Warning => warn!("[ALERT] {}: {}", alert.id, alert.message),
302 AlertSeverity::Critical => {
303 tracing::error!("[ALERT] {}: {}", alert.id, alert.message)
304 }
305 }
306 }
307}
308
309pub struct AlertManager {
311 rules: Arc<RwLock<HashMap<String, AlertRule>>>,
313 active_alerts: Arc<RwLock<HashMap<String, Alert>>>,
315 alert_history: Arc<RwLock<Vec<Alert>>>,
317 handlers: Arc<RwLock<Vec<Box<dyn AlertHandler>>>>,
319 max_history: usize,
321}
322
323impl AlertManager {
324 pub fn new() -> Self {
326 Self {
327 rules: Arc::new(RwLock::new(HashMap::new())),
328 active_alerts: Arc::new(RwLock::new(HashMap::new())),
329 alert_history: Arc::new(RwLock::new(Vec::new())),
330 handlers: Arc::new(RwLock::new(vec![Box::new(ConsoleAlertHandler)])),
331 max_history: 1000,
332 }
333 }
334
335 pub fn add_rule(&self, rule: AlertRule) {
337 let id = rule.id.clone();
338 let mut rules = self.rules.write();
339 rules.insert(id.clone(), rule);
340 info!("Added alert rule: {}", id);
341 }
342
343 pub fn remove_rule(&self, id: &str) -> Option<AlertRule> {
345 let mut rules = self.rules.write();
346 let removed = rules.remove(id);
347 if removed.is_some() {
348 info!("Removed alert rule: {}", id);
349 }
350 removed
351 }
352
353 pub fn set_rule_enabled(&self, id: &str, enabled: bool) -> Result<(), String> {
355 let mut rules = self.rules.write();
356 if let Some(rule) = rules.get_mut(id) {
357 rule.enabled = enabled;
358 info!("Alert rule '{}' {}", id, if enabled { "enabled" } else { "disabled" });
359 Ok(())
360 } else {
361 Err(format!("Rule '{}' not found", id))
362 }
363 }
364
365 pub fn get_rules(&self) -> Vec<AlertRule> {
367 let rules = self.rules.read();
368 rules.values().cloned().collect()
369 }
370
371 pub fn evaluate_rules(&self, metrics: &[MetricsBucket]) {
373 let rules = self.rules.read();
374
375 for rule in rules.values() {
376 if let Some(alert) = rule.evaluate(metrics) {
377 self.fire_alert(alert);
378 }
379 }
380 }
381
382 pub fn fire_alert(&self, alert: Alert) {
384 debug!("Firing alert: {} - {}", alert.id, alert.message);
385
386 {
388 let mut active = self.active_alerts.write();
389 active.insert(alert.id.clone(), alert.clone());
390 }
391
392 {
394 let mut history = self.alert_history.write();
395 history.push(alert.clone());
396
397 if history.len() > self.max_history {
399 let excess = history.len() - self.max_history;
400 history.drain(0..excess);
401 }
402 }
403
404 let handlers = self.handlers.read();
406 for handler in handlers.iter() {
407 handler.handle(&alert);
408 }
409 }
410
411 pub fn resolve_alert(&self, alert_id: &str) -> Result<(), String> {
413 let mut alert = {
414 let mut active = self.active_alerts.write();
415 active.remove(alert_id)
416 };
417
418 if let Some(ref mut alert_ref) = alert {
419 alert_ref.resolve();
420
421 let mut history = self.alert_history.write();
423 if let Some(historical_alert) = history.iter_mut().find(|a| a.id == alert_id) {
424 *historical_alert = alert_ref.clone();
425 }
426
427 info!("Resolved alert: {}", alert_id);
428 Ok(())
429 } else {
430 Err(format!("Alert '{}' not found", alert_id))
431 }
432 }
433
434 pub fn get_active_alerts(&self) -> Vec<Alert> {
436 let active = self.active_alerts.read();
437 active.values().cloned().collect()
438 }
439
440 pub fn get_alert_history(&self, limit: Option<usize>) -> Vec<Alert> {
442 let history = self.alert_history.read();
443 let mut alerts: Vec<_> = history.clone();
444
445 if let Some(limit) = limit {
446 alerts.truncate(limit);
447 }
448
449 alerts
450 }
451
452 pub fn add_handler(&self, handler: Box<dyn AlertHandler>) {
454 let mut handlers = self.handlers.write();
455 handlers.push(handler);
456 }
457
458 pub fn clear_alerts(&self) {
460 let mut active = self.active_alerts.write();
461 let mut history = self.alert_history.write();
462 active.clear();
463 history.clear();
464 info!("Cleared all alerts");
465 }
466}
467
468impl Default for AlertManager {
469 fn default() -> Self {
470 Self::new()
471 }
472}
473
474#[cfg(test)]
475mod tests {
476 use super::*;
477 use crate::analytics::MetricsBucket;
478
479 #[test]
480 fn test_alert_creation() {
481 let alert = Alert::new(
482 AlertSeverity::Warning,
483 AlertType::HighEventRate {
484 events_per_minute: 100,
485 threshold: 50,
486 },
487 "Test alert",
488 );
489
490 assert_eq!(alert.severity, AlertSeverity::Warning);
491 assert!(!alert.resolved);
492 }
493
494 #[test]
495 fn test_alert_resolve() {
496 let mut alert = Alert::new(
497 AlertSeverity::Info,
498 AlertType::Custom {
499 message: "test".to_string(),
500 metadata: HashMap::new(),
501 },
502 "Test",
503 );
504
505 alert.resolve();
506 assert!(alert.resolved);
507 assert!(alert.resolved_at.is_some());
508 }
509
510 #[test]
511 fn test_alert_rule_evaluation() {
512 let rule = AlertRule::new(
513 "test_rule",
514 "Test Rule",
515 AlertSeverity::Warning,
516 AlertRuleType::EventRateThreshold {
517 threshold: 50,
518 window_minutes: 1,
519 },
520 );
521
522 let mut bucket = MetricsBucket::new(Utc::now(), crate::analytics::TimeBucket::Minute);
523 bucket.total_events = 100;
524
525 let alert = rule.evaluate(&[bucket]);
526 assert!(alert.is_some());
527
528 let alert = alert.unwrap();
529 assert_eq!(alert.severity, AlertSeverity::Warning);
530 }
531
532 #[test]
533 fn test_alert_manager() {
534 let manager = AlertManager::new();
535
536 let rule = AlertRule::new(
537 "test_rule",
538 "Test Rule",
539 AlertSeverity::Info,
540 AlertRuleType::EventRateThreshold {
541 threshold: 10,
542 window_minutes: 1,
543 },
544 );
545
546 manager.add_rule(rule);
547
548 let rules = manager.get_rules();
549 assert_eq!(rules.len(), 1);
550
551 manager.remove_rule("test_rule");
552 let rules = manager.get_rules();
553 assert_eq!(rules.len(), 0);
554 }
555}