Skip to main content

optirs_bench/regression_tester/
alerts.rs

1// Alert system for performance regression notifications
2//
3// This module provides a comprehensive alerting system that can notify stakeholders
4// through multiple channels (email, Slack, GitHub issues) when performance
5// regressions are detected.
6
7use crate::error::Result;
8use crate::regression_tester::config::{Alert, AlertConfig, AlertSeverity, AlertStatus};
9use crate::regression_tester::types::RegressionResult;
10use scirs2_core::numeric::Float;
11use std::collections::VecDeque;
12use std::time::{Duration, SystemTime, UNIX_EPOCH};
13
14/// Alert system for regression notifications
15///
16/// Manages alert generation, notification delivery, cooldown periods,
17/// and integration with external services like email, Slack, and GitHub.
18#[derive(Debug)]
19pub struct AlertSystem {
20    /// Alert configuration
21    config: AlertConfig,
22    /// Alert history for tracking and cooldown management
23    alert_history: VecDeque<Alert>,
24}
25
26impl AlertSystem {
27    /// Create a new alert system with default configuration
28    pub fn new() -> Self {
29        Self {
30            config: AlertConfig::default(),
31            alert_history: VecDeque::new(),
32        }
33    }
34
35    /// Create a new alert system with custom configuration
36    pub fn with_config(config: AlertConfig) -> Self {
37        Self {
38            config,
39            alert_history: VecDeque::new(),
40        }
41    }
42
43    /// Get the current alert configuration
44    pub fn config(&self) -> &AlertConfig {
45        &self.config
46    }
47
48    /// Update the alert configuration
49    pub fn update_config(&mut self, config: AlertConfig) {
50        self.config = config;
51    }
52
53    /// Get alert history
54    pub fn alert_history(&self) -> &VecDeque<Alert> {
55        &self.alert_history
56    }
57
58    /// Send an alert for a regression
59    pub fn send_alert<A: Float>(&mut self, regression: &RegressionResult<A>) -> Result<()> {
60        if regression.severity < self.config.severity_threshold {
61            return Ok(()); // Below threshold
62        }
63
64        let alert = Alert::new(
65            format!(
66                "alert_{}",
67                SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs()
68            ),
69            self.map_severity(regression.severity),
70            format!(
71                "Performance regression detected in {}: {:.2}% degradation",
72                regression.test_id, regression.performance_change_percent
73            ),
74            regression.test_id.clone(),
75        );
76
77        self.alert_history.push_back(alert.clone());
78
79        // Maintain alert history size
80        if self.alert_history.len() > 100 {
81            self.alert_history.pop_front();
82        }
83
84        // Send actual alerts through configured channels
85        self.send_alert_notifications(&alert)?;
86
87        Ok(())
88    }
89
90    /// Map numeric severity to AlertSeverity enum
91    fn map_severity(&self, severity: f64) -> AlertSeverity {
92        match severity {
93            s if s >= 0.8 => AlertSeverity::Critical,
94            s if s >= 0.6 => AlertSeverity::High,
95            s if s >= 0.3 => AlertSeverity::Medium,
96            _ => AlertSeverity::Low,
97        }
98    }
99
100    /// Send alert notifications through configured channels
101    fn send_alert_notifications(&self, alert: &Alert) -> Result<()> {
102        // Check if alerts are enabled and severity meets threshold
103        if !self.config.enable_alerts || self.severity_below_threshold(alert) {
104            return Ok(());
105        }
106
107        // Check cooldown period
108        if self.is_in_cooldown_period(alert)? {
109            return Ok(());
110        }
111
112        let mut notification_results = Vec::new();
113
114        // Send email notifications
115        if self.config.enable_email {
116            match self.send_email_notification(alert) {
117                Ok(()) => notification_results.push("Email sent successfully".to_string()),
118                Err(e) => notification_results.push(format!("Email failed: {}", e)),
119            }
120        }
121
122        // Send Slack notifications
123        if self.config.enable_slack {
124            match self.send_slack_notification(alert) {
125                Ok(()) => {
126                    notification_results.push("Slack notification sent successfully".to_string())
127                }
128                Err(e) => notification_results.push(format!("Slack notification failed: {}", e)),
129            }
130        }
131
132        // Create GitHub issues
133        if self.config.enable_github_issues {
134            match self.create_github_issue(alert) {
135                Ok(()) => {
136                    notification_results.push("GitHub issue created successfully".to_string())
137                }
138                Err(e) => notification_results.push(format!("GitHub issue creation failed: {}", e)),
139            }
140        }
141
142        // Log notification results
143        for result in notification_results {
144            eprintln!("Alert notification: {}", result);
145        }
146
147        Ok(())
148    }
149
150    /// Check if alert severity is below configured threshold
151    fn severity_below_threshold(&self, alert: &Alert) -> bool {
152        let alert_severity_value = match alert.severity {
153            AlertSeverity::Critical => 1.0,
154            AlertSeverity::High => 0.75,
155            AlertSeverity::Medium => 0.5,
156            AlertSeverity::Low => 0.25,
157        };
158        alert_severity_value < self.config.severity_threshold
159    }
160
161    /// Check if we're in cooldown period for similar alerts
162    fn is_in_cooldown_period(&self, alert: &Alert) -> Result<bool> {
163        let cooldown_duration = Duration::from_secs(self.config.cooldown_minutes * 60);
164        let current_time = SystemTime::now();
165
166        // Check for similar recent alerts
167        for recent_alert in self.alert_history.iter().rev().take(10) {
168            if recent_alert.regression_id == alert.regression_id {
169                let recent_time = UNIX_EPOCH + Duration::from_secs(recent_alert.timestamp);
170                if current_time.duration_since(recent_time)? < cooldown_duration {
171                    return Ok(true);
172                }
173            }
174        }
175
176        Ok(false)
177    }
178
179    /// Send email notification
180    fn send_email_notification(&self, alert: &Alert) -> Result<()> {
181        // In a real implementation, this would use an email service like:
182        // - SMTP with lettre crate
183        // - AWS SES
184        // - SendGrid
185        // - Mailgun
186
187        let email_body = self.format_email_body(alert);
188        let subject = format!("Performance Regression Alert: {}", alert.regression_id);
189
190        // Placeholder implementation - would integrate with actual email service
191        eprintln!("EMAIL ALERT:");
192        eprintln!("To: performance-team@company.com");
193        eprintln!("Subject: {}", subject);
194        eprintln!("Body:\n{}", email_body);
195        eprintln!("---");
196
197        // INTEGRATION STUB (v1.0.0): Email service integration planned for v1.1.0+
198        //
199        // For v1.0.0, alerts are printed to stderr for logging/monitoring integration.
200        // Production systems should redirect stderr to their alerting infrastructure.
201        //
202        // PLANNED (v1.1.0+): Direct email integration with lettre crate:
203        // let email = Message::builder()
204        //     .from("alerts@company.com".parse()?)
205        //     .to("performance-team@company.com".parse()?)
206        //     .subject(&subject)
207        //     .body(email_body)?;
208        // let mailer = SmtpTransport::relay("smtp.company.com")?.build();
209        // mailer.send(&email)?;
210
211        Ok(())
212    }
213
214    /// Send Slack notification
215    fn send_slack_notification(&self, alert: &Alert) -> Result<()> {
216        // In a real implementation, this would use:
217        // - Slack webhook URL
218        // - reqwest crate for HTTP requests
219        // - JSON payload formatting
220
221        let slack_message = self.format_slack_message(alert);
222
223        // Placeholder implementation - would make HTTP POST to Slack webhook
224        eprintln!("SLACK ALERT:");
225        eprintln!("Channel: #performance-alerts");
226        eprintln!("Message: {}", slack_message);
227        eprintln!("---");
228
229        // INTEGRATION STUB (v1.0.0): Slack API integration planned for v1.1.0+
230        //
231        // For v1.0.0, alerts are printed to stderr for logging/monitoring integration.
232        // Production systems should redirect stderr to their alerting infrastructure.
233        //
234        // PLANNED (v1.1.0+): Direct Slack webhook integration:
235        // let webhook_url = std::env::var("SLACK_WEBHOOK_URL")?;
236        // let payload = json!({
237        //     "text": slack_message,
238        //     "channel": "#performance-alerts",
239        //     "username": "Performance Bot"
240        // });
241        // let client = reqwest::Client::new();
242        // client.post(&webhook_url).json(&payload).send()?;
243
244        Ok(())
245    }
246
247    /// Create GitHub issue
248    fn create_github_issue(&self, alert: &Alert) -> Result<()> {
249        // In a real implementation, this would use:
250        // - GitHub API with octocrab crate
251        // - Personal access token
252        // - Repository configuration
253
254        let issue_title = format!("Performance regression in {}", alert.regression_id);
255        let issue_body = self.format_github_issue_body(alert);
256
257        // Placeholder implementation - would create actual GitHub issue
258        eprintln!("GITHUB ISSUE:");
259        eprintln!("Repository: company/performance-monitoring");
260        eprintln!("Title: {}", issue_title);
261        eprintln!("Body:\n{}", issue_body);
262        eprintln!("Labels: performance, regression, automated");
263        eprintln!("---");
264
265        // INTEGRATION STUB (v1.0.0): GitHub API integration planned for v1.1.0+
266        //
267        // For v1.0.0, issue information is printed to stderr for manual issue creation
268        // or integration with existing issue tracking systems.
269        //
270        // PLANNED (v1.1.0+): Direct GitHub API integration with octocrab:
271        // let token = std::env::var("GITHUB_TOKEN")?;
272        // let octocrab = octocrab::Octocrab::builder().personal_token(token).build()?;
273        // octocrab.issues("company", "performance-monitoring")
274        //     .create(&issue_title)
275        //     .body(&issue_body)
276        //     .labels(vec!["performance", "regression", "automated"])
277        //     .send().await?;
278
279        Ok(())
280    }
281
282    /// Format email body for alert
283    fn format_email_body(&self, alert: &Alert) -> String {
284        format!(
285            "Performance Regression Alert\n\
286            =============================\n\n\
287            Alert ID: {}\n\
288            Timestamp: {}\n\
289            Severity: {:?}\n\
290            Test: {}\n\n\
291            Details:\n\
292            {}\n\n\
293            Please investigate this performance regression immediately.\n\
294            \n\
295            View full details at: https://performance-dashboard.company.com/alerts/{}\n\
296            \n\
297            Best regards,\n\
298            Performance Monitoring System",
299            alert.id, alert.timestamp, alert.severity, alert.regression_id, alert.message, alert.id
300        )
301    }
302
303    /// Format Slack message for alert
304    fn format_slack_message(&self, alert: &Alert) -> String {
305        let severity_emoji = match alert.severity {
306            AlertSeverity::Critical => "🚨",
307            AlertSeverity::High => "⚠️",
308            AlertSeverity::Medium => "🟡",
309            AlertSeverity::Low => "🔵",
310        };
311
312        format!(
313            "{} *Performance Regression Alert*\n\
314            *Test:* {}\n\
315            *Severity:* {:?}\n\
316            *Details:* {}\n\
317            *Time:* <t:{}:F>\n\
318            <https://performance-dashboard.company.com/alerts/{}|View Details>",
319            severity_emoji,
320            alert.regression_id,
321            alert.severity,
322            alert.message,
323            alert.timestamp,
324            alert.id
325        )
326    }
327
328    /// Format GitHub issue body for alert
329    fn format_github_issue_body(&self, alert: &Alert) -> String {
330        format!(
331            "## Performance Regression Detected\n\n\
332            **Alert ID:** {}\n\
333            **Timestamp:** {}\n\
334            **Severity:** {:?}\n\
335            **Test:** {}\n\n\
336            ### Description\n\
337            {}\n\n\
338            ### Investigation Steps\n\
339            - [ ] Review recent code changes that might affect performance\n\
340            - [ ] Check system resource utilization during test execution\n\
341            - [ ] Run additional test iterations to confirm regression\n\
342            - [ ] Analyze profiling data for performance bottlenecks\n\
343            - [ ] Compare with baseline performance metrics\n\n\
344            ### Links\n\
345            - [Performance Dashboard](https://performance-dashboard.company.com/alerts/{})\n\
346            - [Test Results](https://ci.company.com/tests/{})\n\n\
347            ---\n\
348            *This issue was automatically created by the performance monitoring system.*",
349            alert.id,
350            alert.timestamp,
351            alert.severity,
352            alert.regression_id,
353            alert.message,
354            alert.id,
355            alert.regression_id
356        )
357    }
358
359    /// Get active alerts (not acknowledged or resolved)
360    pub fn get_active_alerts(&self) -> Vec<&Alert> {
361        self.alert_history
362            .iter()
363            .filter(|alert| alert.is_active())
364            .collect()
365    }
366
367    /// Get recent alerts within the specified duration
368    pub fn get_recent_alerts(&self, duration: Duration) -> Vec<&Alert> {
369        let cutoff_time = SystemTime::now()
370            .duration_since(UNIX_EPOCH)
371            .unwrap_or_default()
372            .as_secs()
373            .saturating_sub(duration.as_secs());
374
375        self.alert_history
376            .iter()
377            .filter(|alert| alert.timestamp >= cutoff_time)
378            .collect()
379    }
380
381    /// Acknowledge an alert by ID
382    pub fn acknowledge_alert(&mut self, alert_id: &str) -> Result<()> {
383        for alert in &mut self.alert_history {
384            if alert.id == alert_id {
385                alert.acknowledge();
386                return Ok(());
387            }
388        }
389        Err(crate::error::OptimError::InvalidParameter(format!(
390            "Alert with ID {} not found",
391            alert_id
392        )))
393    }
394
395    /// Resolve an alert by ID
396    pub fn resolve_alert(&mut self, alert_id: &str) -> Result<()> {
397        for alert in &mut self.alert_history {
398            if alert.id == alert_id {
399                alert.resolve();
400                return Ok(());
401            }
402        }
403        Err(crate::error::OptimError::InvalidParameter(format!(
404            "Alert with ID {} not found",
405            alert_id
406        )))
407    }
408
409    /// Clear old alerts from history
410    pub fn cleanup_old_alerts(&mut self, max_age: Duration) -> usize {
411        let now = SystemTime::now()
412            .duration_since(UNIX_EPOCH)
413            .unwrap_or_default()
414            .as_secs();
415        let cutoff_time = now.saturating_sub(max_age.as_secs());
416
417        // Debug output
418        println!(
419            "Cleanup: now = {}, max_age = {} secs, cutoff_time = {}",
420            now,
421            max_age.as_secs(),
422            cutoff_time
423        );
424
425        let original_len = self.alert_history.len();
426        self.alert_history.retain(|alert| {
427            let keep = alert.timestamp >= cutoff_time;
428            println!(
429                "Alert timestamp {}: {} >= {} = {}",
430                alert.timestamp, alert.timestamp, cutoff_time, keep
431            );
432            keep
433        });
434        let removed = original_len - self.alert_history.len();
435        println!("Removed {} alerts", removed);
436        removed
437    }
438
439    /// Get statistics about alerts
440    pub fn get_alert_statistics(&self) -> AlertStatistics {
441        let total_alerts = self.alert_history.len();
442        let active_alerts = self.get_active_alerts().len();
443
444        let severity_counts =
445            self.alert_history
446                .iter()
447                .fold(SeverityCounts::default(), |mut counts, alert| {
448                    match alert.severity {
449                        AlertSeverity::Critical => counts.critical += 1,
450                        AlertSeverity::High => counts.high += 1,
451                        AlertSeverity::Medium => counts.medium += 1,
452                        AlertSeverity::Low => counts.low += 1,
453                    }
454                    counts
455                });
456
457        AlertStatistics {
458            total_alerts,
459            active_alerts,
460            severity_counts,
461        }
462    }
463}
464
465impl Default for AlertSystem {
466    fn default() -> Self {
467        Self::new()
468    }
469}
470
471/// Alert statistics
472#[derive(Debug, Clone)]
473pub struct AlertStatistics {
474    /// Total number of alerts in history
475    pub total_alerts: usize,
476    /// Number of active (unresolved) alerts
477    pub active_alerts: usize,
478    /// Count of alerts by severity level
479    pub severity_counts: SeverityCounts,
480}
481
482/// Count of alerts by severity level
483#[derive(Debug, Clone, Default)]
484pub struct SeverityCounts {
485    /// Number of critical alerts
486    pub critical: usize,
487    /// Number of high severity alerts
488    pub high: usize,
489    /// Number of medium severity alerts
490    pub medium: usize,
491    /// Number of low severity alerts
492    pub low: usize,
493}
494
495#[cfg(test)]
496mod tests {
497    use super::*;
498    use crate::regression_tester::types::{
499        ChangePointAnalysis, OutlierAnalysis, RegressionAnalysis, StatisticalTestResult,
500        TrendAnalysis, TrendDirection,
501    };
502
503    fn create_test_regression(severity: f64, test_id: &str) -> RegressionResult<f64> {
504        RegressionResult {
505            test_id: test_id.to_string(),
506            regression_detected: true,
507            severity,
508            confidence: 0.95,
509            performance_change_percent: 15.0,
510            memory_change_percent: 5.0,
511            affected_metrics: vec!["timing".to_string()],
512            statistical_tests: vec![],
513            analysis: RegressionAnalysis {
514                trend_analysis: TrendAnalysis {
515                    direction: TrendDirection::Degrading,
516                    magnitude: 15.0,
517                    significance: 0.95,
518                    start_point: None,
519                },
520                change_point_analysis: ChangePointAnalysis {
521                    change_points: vec![],
522                    magnitudes: vec![],
523                    confidences: vec![],
524                },
525                outlier_analysis: OutlierAnalysis {
526                    outlier_indices: vec![],
527                    outlier_scores: vec![],
528                    outlier_types: vec![],
529                },
530                root_cause_hints: vec![],
531            },
532            recommendations: vec![],
533        }
534    }
535
536    #[test]
537    fn test_alert_system_creation() {
538        let alert_system = AlertSystem::new();
539        assert!(alert_system.config().enable_alerts);
540        assert_eq!(alert_system.alert_history().len(), 0);
541    }
542
543    #[test]
544    fn test_send_alert_above_threshold() {
545        let mut alert_system = AlertSystem::new();
546        let regression = create_test_regression(0.8, "test_high_severity");
547
548        let result = alert_system.send_alert(&regression);
549        assert!(result.is_ok());
550        assert_eq!(alert_system.alert_history().len(), 1);
551
552        let alert = &alert_system.alert_history()[0];
553        assert!(matches!(alert.severity, AlertSeverity::Critical));
554        assert_eq!(alert.regression_id, "test_high_severity");
555    }
556
557    #[test]
558    fn test_send_alert_below_threshold() {
559        let mut alert_system = AlertSystem::new();
560        let regression = create_test_regression(0.01, "test_low_severity"); // Below default threshold of 0.05
561
562        let result = alert_system.send_alert(&regression);
563        assert!(result.is_ok());
564        assert_eq!(alert_system.alert_history().len(), 0); // Below threshold
565    }
566
567    #[test]
568    fn test_severity_mapping() {
569        let alert_system = AlertSystem::new();
570
571        assert!(matches!(
572            alert_system.map_severity(0.9),
573            AlertSeverity::Critical
574        ));
575        assert!(matches!(
576            alert_system.map_severity(0.7),
577            AlertSeverity::High
578        ));
579        assert!(matches!(
580            alert_system.map_severity(0.4),
581            AlertSeverity::Medium
582        ));
583        assert!(matches!(alert_system.map_severity(0.1), AlertSeverity::Low));
584    }
585
586    #[test]
587    fn test_alert_history_limit() {
588        let mut alert_system = AlertSystem::new();
589
590        // Add more than 100 alerts
591        for i in 0..105 {
592            let regression = create_test_regression(0.8, &format!("test_{}", i));
593            let _ = alert_system.send_alert(&regression);
594        }
595
596        // Should maintain limit of 100
597        assert_eq!(alert_system.alert_history().len(), 100);
598    }
599
600    #[test]
601    fn test_custom_config() {
602        let custom_config = AlertConfig {
603            enable_alerts: true,
604            enable_email: true,
605            enable_slack: true,
606            enable_github_issues: false,
607            severity_threshold: 0.8,
608            cooldown_minutes: 30,
609        };
610
611        let alert_system = AlertSystem::with_config(custom_config.clone());
612        assert_eq!(alert_system.config().severity_threshold, 0.8);
613        assert_eq!(alert_system.config().cooldown_minutes, 30);
614        assert!(alert_system.config().enable_email);
615        assert!(alert_system.config().enable_slack);
616        assert!(!alert_system.config().enable_github_issues);
617    }
618
619    #[test]
620    fn test_alert_statistics() {
621        let mut alert_system = AlertSystem::new();
622
623        // Add alerts with different severities
624        let _ = alert_system.send_alert(&create_test_regression(0.9, "critical"));
625        let _ = alert_system.send_alert(&create_test_regression(0.7, "high"));
626        let _ = alert_system.send_alert(&create_test_regression(0.4, "medium"));
627        let _ = alert_system.send_alert(&create_test_regression(0.2, "low"));
628
629        let stats = alert_system.get_alert_statistics();
630        assert_eq!(stats.total_alerts, 4);
631        assert_eq!(stats.active_alerts, 4);
632        assert_eq!(stats.severity_counts.critical, 1);
633        assert_eq!(stats.severity_counts.high, 1);
634        assert_eq!(stats.severity_counts.medium, 1);
635        assert_eq!(stats.severity_counts.low, 1);
636    }
637
638    #[test]
639    fn test_alert_acknowledgment() {
640        let mut alert_system = AlertSystem::new();
641        let regression = create_test_regression(0.8, "test_ack");
642
643        let _ = alert_system.send_alert(&regression);
644        let alert_id = alert_system.alert_history()[0].id.clone();
645
646        let result = alert_system.acknowledge_alert(&alert_id);
647        assert!(result.is_ok());
648
649        let alert = &alert_system.alert_history()[0];
650        assert!(!alert.is_active());
651        assert!(matches!(alert.status, AlertStatus::Acknowledged));
652    }
653
654    #[test]
655    fn test_cleanup_old_alerts() {
656        let mut alert_system = AlertSystem::new();
657
658        // Add some alerts
659        for i in 0..5 {
660            let regression = create_test_regression(0.8, &format!("test_{}", i));
661            let _ = alert_system.send_alert(&regression);
662        }
663
664        assert_eq!(alert_system.alert_history().len(), 5);
665
666        // Wait longer than the cleanup duration to ensure alerts are "old"
667        std::thread::sleep(Duration::from_secs(2));
668        let removed = alert_system.cleanup_old_alerts(Duration::from_secs(1));
669        assert_eq!(removed, 5);
670        assert_eq!(alert_system.alert_history().len(), 0);
671    }
672}