ipfrs_semantic/
regression.rs

1//! Performance regression detection and tracking
2//!
3//! This module provides tools for detecting performance regressions in semantic search
4//! systems by comparing current performance against historical baselines.
5//!
6//! # Features
7//!
8//! - **Baseline Management**: Track performance baselines over time
9//! - **Regression Detection**: Automatically detect performance degradation
10//! - **Trend Analysis**: Identify performance trends
11//! - **Alerting**: Flag significant regressions for investigation
12//!
13//! # Example
14//!
15//! ```rust
16//! use ipfrs_semantic::regression::{RegressionDetector, PerformanceMetrics};
17//! use std::time::Duration;
18//!
19//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
20//! let mut detector = RegressionDetector::new();
21//!
22//! // Record baseline metrics
23//! let baseline = PerformanceMetrics {
24//!     avg_query_latency: Duration::from_micros(500),
25//!     p99_latency: Duration::from_millis(2),
26//!     throughput_qps: 5000.0,
27//!     memory_mb: 512.0,
28//!     index_size: 100000,
29//! };
30//! detector.set_baseline(baseline)?;
31//!
32//! // Test current performance
33//! let current = PerformanceMetrics {
34//!     avg_query_latency: Duration::from_micros(750), // 50% slower!
35//!     p99_latency: Duration::from_millis(3),
36//!     throughput_qps: 4000.0,
37//!     memory_mb: 520.0,
38//!     index_size: 100000,
39//! };
40//!
41//! let report = detector.check_regression(&current)?;
42//! if report.has_regression {
43//!     println!("⚠️  Regression detected!");
44//!     for issue in &report.issues {
45//!         println!("  - {}: {:.1}% change", issue.metric, issue.percent_change);
46//!     }
47//! }
48//! # Ok(())
49//! # }
50//! ```
51
52use ipfrs_core::Result;
53use serde::{Deserialize, Serialize};
54use std::collections::HashMap;
55use std::time::Duration;
56
57/// Performance metrics for a specific test run
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct PerformanceMetrics {
60    /// Average query latency
61    pub avg_query_latency: Duration,
62    /// P99 latency
63    pub p99_latency: Duration,
64    /// Throughput in queries per second
65    pub throughput_qps: f64,
66    /// Memory usage in MB
67    pub memory_mb: f64,
68    /// Index size (number of entries)
69    pub index_size: usize,
70}
71
72/// Regression issue detected
73#[derive(Debug, Clone)]
74pub struct RegressionIssue {
75    /// Metric name that regressed
76    pub metric: String,
77    /// Baseline value
78    pub baseline: f64,
79    /// Current value
80    pub current: f64,
81    /// Percent change (negative = improvement, positive = regression)
82    pub percent_change: f64,
83    /// Severity (0.0 to 1.0, where 1.0 is most severe)
84    pub severity: f64,
85}
86
87/// Regression detection report
88#[derive(Debug, Clone)]
89pub struct RegressionReport {
90    /// Whether any regressions were detected
91    pub has_regression: bool,
92    /// List of detected issues
93    pub issues: Vec<RegressionIssue>,
94    /// Overall regression score (0.0 to 1.0)
95    pub regression_score: f64,
96}
97
98/// Configuration for regression detection
99#[derive(Debug, Clone)]
100pub struct RegressionConfig {
101    /// Threshold for latency regression (e.g., 0.2 = 20% slower)
102    pub latency_threshold: f64,
103    /// Threshold for throughput regression (e.g., 0.15 = 15% slower)
104    pub throughput_threshold: f64,
105    /// Threshold for memory regression (e.g., 0.25 = 25% more memory)
106    pub memory_threshold: f64,
107}
108
109impl Default for RegressionConfig {
110    fn default() -> Self {
111        Self {
112            latency_threshold: 0.15,    // 15% slower
113            throughput_threshold: 0.10, // 10% slower
114            memory_threshold: 0.20,     // 20% more memory
115        }
116    }
117}
118
119/// Performance regression detector
120pub struct RegressionDetector {
121    /// Configuration
122    config: RegressionConfig,
123    /// Baseline metrics
124    baseline: Option<PerformanceMetrics>,
125    /// Historical metrics
126    history: Vec<(std::time::SystemTime, PerformanceMetrics)>,
127}
128
129impl RegressionDetector {
130    /// Create a new regression detector with default config
131    pub fn new() -> Self {
132        Self {
133            config: RegressionConfig::default(),
134            baseline: None,
135            history: Vec::new(),
136        }
137    }
138
139    /// Create a regression detector with custom config
140    pub fn with_config(config: RegressionConfig) -> Self {
141        Self {
142            config,
143            baseline: None,
144            history: Vec::new(),
145        }
146    }
147
148    /// Set the baseline metrics
149    pub fn set_baseline(&mut self, metrics: PerformanceMetrics) -> Result<()> {
150        self.baseline = Some(metrics);
151        Ok(())
152    }
153
154    /// Record metrics in history
155    pub fn record_metrics(&mut self, metrics: PerformanceMetrics) {
156        let now = std::time::SystemTime::now();
157        self.history.push((now, metrics));
158
159        // Keep only last 100 entries
160        if self.history.len() > 100 {
161            self.history.remove(0);
162        }
163    }
164
165    /// Check for regressions against baseline
166    pub fn check_regression(&self, current: &PerformanceMetrics) -> Result<RegressionReport> {
167        let baseline = self
168            .baseline
169            .as_ref()
170            .ok_or_else(|| ipfrs_core::Error::InvalidInput("No baseline set".into()))?;
171
172        let mut issues = Vec::new();
173
174        // Check latency regression
175        let latency_change = self.calculate_change(
176            baseline.avg_query_latency.as_micros() as f64,
177            current.avg_query_latency.as_micros() as f64,
178        );
179        if latency_change > self.config.latency_threshold {
180            issues.push(RegressionIssue {
181                metric: "avg_query_latency".to_string(),
182                baseline: baseline.avg_query_latency.as_micros() as f64,
183                current: current.avg_query_latency.as_micros() as f64,
184                percent_change: latency_change * 100.0,
185                severity: (latency_change / self.config.latency_threshold).min(1.0),
186            });
187        }
188
189        // Check P99 latency regression
190        let p99_change = self.calculate_change(
191            baseline.p99_latency.as_micros() as f64,
192            current.p99_latency.as_micros() as f64,
193        );
194        if p99_change > self.config.latency_threshold {
195            issues.push(RegressionIssue {
196                metric: "p99_latency".to_string(),
197                baseline: baseline.p99_latency.as_micros() as f64,
198                current: current.p99_latency.as_micros() as f64,
199                percent_change: p99_change * 100.0,
200                severity: (p99_change / self.config.latency_threshold).min(1.0),
201            });
202        }
203
204        // Check throughput regression (negative change means worse)
205        let throughput_change =
206            self.calculate_change(baseline.throughput_qps, current.throughput_qps);
207        if throughput_change < -self.config.throughput_threshold {
208            issues.push(RegressionIssue {
209                metric: "throughput_qps".to_string(),
210                baseline: baseline.throughput_qps,
211                current: current.throughput_qps,
212                percent_change: throughput_change * 100.0,
213                severity: (-throughput_change / self.config.throughput_threshold).min(1.0),
214            });
215        }
216
217        // Check memory regression
218        let memory_change = self.calculate_change(baseline.memory_mb, current.memory_mb);
219        if memory_change > self.config.memory_threshold {
220            issues.push(RegressionIssue {
221                metric: "memory_mb".to_string(),
222                baseline: baseline.memory_mb,
223                current: current.memory_mb,
224                percent_change: memory_change * 100.0,
225                severity: (memory_change / self.config.memory_threshold).min(1.0),
226            });
227        }
228
229        // Calculate overall regression score
230        let regression_score = if issues.is_empty() {
231            0.0
232        } else {
233            issues.iter().map(|i| i.severity).sum::<f64>() / issues.len() as f64
234        };
235
236        Ok(RegressionReport {
237            has_regression: !issues.is_empty(),
238            issues,
239            regression_score,
240        })
241    }
242
243    /// Calculate percent change (positive = increase, negative = decrease)
244    fn calculate_change(&self, baseline: f64, current: f64) -> f64 {
245        if baseline == 0.0 {
246            return 0.0;
247        }
248        (current - baseline) / baseline
249    }
250
251    /// Get historical trend for a metric
252    pub fn get_trend(&self, metric_name: &str) -> Vec<(std::time::SystemTime, f64)> {
253        self.history
254            .iter()
255            .map(|(time, metrics)| {
256                let value = match metric_name {
257                    "avg_query_latency" => metrics.avg_query_latency.as_micros() as f64,
258                    "p99_latency" => metrics.p99_latency.as_micros() as f64,
259                    "throughput_qps" => metrics.throughput_qps,
260                    "memory_mb" => metrics.memory_mb,
261                    _ => 0.0,
262                };
263                (*time, value)
264            })
265            .collect()
266    }
267
268    /// Generate a summary report of all historical metrics
269    pub fn summary(&self) -> HashMap<String, MetricSummary> {
270        let mut summaries = HashMap::new();
271
272        if self.history.is_empty() {
273            return summaries;
274        }
275
276        // Collect metrics for each type
277        let mut latencies = Vec::new();
278        let mut p99_latencies = Vec::new();
279        let mut throughputs = Vec::new();
280        let mut memories = Vec::new();
281
282        for (_, metrics) in &self.history {
283            latencies.push(metrics.avg_query_latency.as_micros() as f64);
284            p99_latencies.push(metrics.p99_latency.as_micros() as f64);
285            throughputs.push(metrics.throughput_qps);
286            memories.push(metrics.memory_mb);
287        }
288
289        summaries.insert(
290            "avg_query_latency".to_string(),
291            Self::compute_summary(&latencies),
292        );
293        summaries.insert(
294            "p99_latency".to_string(),
295            Self::compute_summary(&p99_latencies),
296        );
297        summaries.insert(
298            "throughput_qps".to_string(),
299            Self::compute_summary(&throughputs),
300        );
301        summaries.insert("memory_mb".to_string(), Self::compute_summary(&memories));
302
303        summaries
304    }
305
306    /// Compute statistical summary for a metric
307    fn compute_summary(values: &[f64]) -> MetricSummary {
308        if values.is_empty() {
309            return MetricSummary::default();
310        }
311
312        let mut sorted = values.to_vec();
313        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
314
315        let min = sorted[0];
316        let max = sorted[sorted.len() - 1];
317        let mean = sorted.iter().sum::<f64>() / sorted.len() as f64;
318        let median = sorted[sorted.len() / 2];
319
320        MetricSummary {
321            min,
322            max,
323            mean,
324            median,
325            count: values.len(),
326        }
327    }
328}
329
330impl Default for RegressionDetector {
331    fn default() -> Self {
332        Self::new()
333    }
334}
335
336/// Statistical summary for a metric
337#[derive(Debug, Clone, Default)]
338pub struct MetricSummary {
339    pub min: f64,
340    pub max: f64,
341    pub mean: f64,
342    pub median: f64,
343    pub count: usize,
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349
350    #[test]
351    fn test_regression_detector_creation() {
352        let detector = RegressionDetector::new();
353        assert!(detector.baseline.is_none());
354        assert_eq!(detector.history.len(), 0);
355    }
356
357    #[test]
358    fn test_set_baseline() {
359        let mut detector = RegressionDetector::new();
360        let metrics = PerformanceMetrics {
361            avg_query_latency: Duration::from_micros(500),
362            p99_latency: Duration::from_millis(2),
363            throughput_qps: 5000.0,
364            memory_mb: 512.0,
365            index_size: 100000,
366        };
367
368        detector.set_baseline(metrics).unwrap();
369        assert!(detector.baseline.is_some());
370    }
371
372    #[test]
373    fn test_no_regression() {
374        let mut detector = RegressionDetector::new();
375        let baseline = PerformanceMetrics {
376            avg_query_latency: Duration::from_micros(500),
377            p99_latency: Duration::from_millis(2),
378            throughput_qps: 5000.0,
379            memory_mb: 512.0,
380            index_size: 100000,
381        };
382        detector.set_baseline(baseline.clone()).unwrap();
383
384        let report = detector.check_regression(&baseline).unwrap();
385        assert!(!report.has_regression);
386        assert_eq!(report.issues.len(), 0);
387    }
388
389    #[test]
390    fn test_latency_regression() {
391        let mut detector = RegressionDetector::new();
392        let baseline = PerformanceMetrics {
393            avg_query_latency: Duration::from_micros(500),
394            p99_latency: Duration::from_millis(2),
395            throughput_qps: 5000.0,
396            memory_mb: 512.0,
397            index_size: 100000,
398        };
399        detector.set_baseline(baseline).unwrap();
400
401        // 50% slower latency
402        let current = PerformanceMetrics {
403            avg_query_latency: Duration::from_micros(750),
404            p99_latency: Duration::from_millis(2),
405            throughput_qps: 5000.0,
406            memory_mb: 512.0,
407            index_size: 100000,
408        };
409
410        let report = detector.check_regression(&current).unwrap();
411        assert!(report.has_regression);
412        assert!(report
413            .issues
414            .iter()
415            .any(|i| i.metric == "avg_query_latency"));
416    }
417
418    #[test]
419    fn test_throughput_regression() {
420        let mut detector = RegressionDetector::new();
421        let baseline = PerformanceMetrics {
422            avg_query_latency: Duration::from_micros(500),
423            p99_latency: Duration::from_millis(2),
424            throughput_qps: 5000.0,
425            memory_mb: 512.0,
426            index_size: 100000,
427        };
428        detector.set_baseline(baseline).unwrap();
429
430        // 20% lower throughput
431        let current = PerformanceMetrics {
432            avg_query_latency: Duration::from_micros(500),
433            p99_latency: Duration::from_millis(2),
434            throughput_qps: 4000.0,
435            memory_mb: 512.0,
436            index_size: 100000,
437        };
438
439        let report = detector.check_regression(&current).unwrap();
440        assert!(report.has_regression);
441        assert!(report.issues.iter().any(|i| i.metric == "throughput_qps"));
442    }
443
444    #[test]
445    fn test_memory_regression() {
446        let mut detector = RegressionDetector::new();
447        let baseline = PerformanceMetrics {
448            avg_query_latency: Duration::from_micros(500),
449            p99_latency: Duration::from_millis(2),
450            throughput_qps: 5000.0,
451            memory_mb: 512.0,
452            index_size: 100000,
453        };
454        detector.set_baseline(baseline).unwrap();
455
456        // 30% more memory
457        let current = PerformanceMetrics {
458            avg_query_latency: Duration::from_micros(500),
459            p99_latency: Duration::from_millis(2),
460            throughput_qps: 5000.0,
461            memory_mb: 665.6, // +30%
462            index_size: 100000,
463        };
464
465        let report = detector.check_regression(&current).unwrap();
466        assert!(report.has_regression);
467        assert!(report.issues.iter().any(|i| i.metric == "memory_mb"));
468    }
469
470    #[test]
471    fn test_record_metrics() {
472        let mut detector = RegressionDetector::new();
473        let metrics = PerformanceMetrics {
474            avg_query_latency: Duration::from_micros(500),
475            p99_latency: Duration::from_millis(2),
476            throughput_qps: 5000.0,
477            memory_mb: 512.0,
478            index_size: 100000,
479        };
480
481        detector.record_metrics(metrics.clone());
482        detector.record_metrics(metrics);
483        assert_eq!(detector.history.len(), 2);
484    }
485
486    #[test]
487    fn test_summary() {
488        let mut detector = RegressionDetector::new();
489
490        for i in 0..10 {
491            let metrics = PerformanceMetrics {
492                avg_query_latency: Duration::from_micros(500 + i * 10),
493                p99_latency: Duration::from_millis(2),
494                throughput_qps: 5000.0,
495                memory_mb: 512.0,
496                index_size: 100000,
497            };
498            detector.record_metrics(metrics);
499        }
500
501        let summary = detector.summary();
502        assert!(summary.contains_key("avg_query_latency"));
503        assert_eq!(summary["avg_query_latency"].count, 10);
504    }
505
506    #[test]
507    fn test_custom_thresholds() {
508        let config = RegressionConfig {
509            latency_threshold: 0.50, // 50% threshold
510            throughput_threshold: 0.30,
511            memory_threshold: 0.40,
512        };
513
514        let mut detector = RegressionDetector::with_config(config);
515        let baseline = PerformanceMetrics {
516            avg_query_latency: Duration::from_micros(500),
517            p99_latency: Duration::from_millis(2),
518            throughput_qps: 5000.0,
519            memory_mb: 512.0,
520            index_size: 100000,
521        };
522        detector.set_baseline(baseline).unwrap();
523
524        // 30% slower - should not trigger with 50% threshold
525        let current = PerformanceMetrics {
526            avg_query_latency: Duration::from_micros(650),
527            p99_latency: Duration::from_millis(2),
528            throughput_qps: 5000.0,
529            memory_mb: 512.0,
530            index_size: 100000,
531        };
532
533        let report = detector.check_regression(&current).unwrap();
534        assert!(!report.has_regression);
535    }
536}