Skip to main content

trustformers_core/performance/
continuous.rs

1//! Continuous benchmarking infrastructure for performance tracking
2
3use crate::performance::benchmark::{BenchmarkResult, BenchmarkSuite};
4use anyhow::Result;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8
9/// Performance regression detection
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct PerformanceRegression {
12    /// Benchmark name
13    pub benchmark_name: String,
14    /// Metric that regressed
15    pub metric_name: String,
16    /// Previous value
17    pub previous_value: f64,
18    /// Current value
19    pub current_value: f64,
20    /// Regression percentage
21    pub regression_percent: f64,
22    /// Statistical significance
23    pub is_significant: bool,
24    /// Confidence level
25    pub confidence: f64,
26}
27
28/// Configuration for continuous benchmarking
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct ContinuousBenchmarkConfig {
31    /// Directory to store benchmark results
32    pub results_dir: PathBuf,
33    /// Git commit SHA
34    pub commit_sha: Option<String>,
35    /// Git branch name
36    pub branch: Option<String>,
37    /// Build configuration (debug/release)
38    pub build_config: String,
39    /// Regression threshold (percentage)
40    pub regression_threshold: f64,
41    /// Number of runs for statistical significance
42    pub num_runs: usize,
43    /// Confidence level for regression detection
44    pub confidence_level: f64,
45}
46
47impl Default for ContinuousBenchmarkConfig {
48    fn default() -> Self {
49        Self {
50            results_dir: PathBuf::from("benchmark_results"),
51            commit_sha: None,
52            branch: None,
53            build_config: "release".to_string(),
54            regression_threshold: 5.0, // 5% regression threshold
55            num_runs: 5,
56            confidence_level: 0.95,
57        }
58    }
59}
60
61/// Continuous benchmark runner
62pub struct ContinuousBenchmark {
63    config: ContinuousBenchmarkConfig,
64    history: BenchmarkHistory,
65}
66
67impl ContinuousBenchmark {
68    /// Create new continuous benchmark runner
69    pub fn new(config: ContinuousBenchmarkConfig) -> Result<Self> {
70        // Create results directory if it doesn't exist
71        std::fs::create_dir_all(&config.results_dir)?;
72
73        // Load history
74        let history = BenchmarkHistory::load(&config.results_dir)?;
75
76        Ok(Self { config, history })
77    }
78
79    /// Run benchmarks and check for regressions
80    pub fn run_and_check(
81        &mut self,
82        suite: &mut BenchmarkSuite,
83    ) -> Result<Vec<PerformanceRegression>> {
84        // Run benchmarks multiple times for statistical significance
85        let mut all_results = Vec::new();
86
87        for run in 0..self.config.num_runs {
88            println!(
89                "Running benchmark iteration {}/{}",
90                run + 1,
91                self.config.num_runs
92            );
93            // Note: In real implementation, you'd re-run the benchmarks here
94            // For now, we'll use the existing results
95            all_results.extend(suite.results().to_vec());
96        }
97
98        // Save results
99        let run_id = self.generate_run_id();
100        self.save_results(&run_id, &all_results)?;
101
102        // Check for regressions
103        let regressions = self.check_regressions(&all_results)?;
104
105        // Update history
106        self.history.add_run(run_id, all_results);
107        self.history.save(&self.config.results_dir)?;
108
109        Ok(regressions)
110    }
111
112    /// Check for performance regressions
113    fn check_regressions(
114        &self,
115        current_results: &[BenchmarkResult],
116    ) -> Result<Vec<PerformanceRegression>> {
117        let mut regressions = Vec::new();
118
119        // Get baseline results (previous run on same branch/config)
120        let baseline = self.history.get_baseline(&self.config.branch, &self.config.build_config);
121
122        if let Some(baseline_results) = baseline {
123            for current in current_results {
124                if let Some(baseline) = baseline_results.iter().find(|b| b.name == current.name) {
125                    // Check latency regression
126                    let latency_regression = self.check_metric_regression(
127                        &current.name,
128                        "avg_latency",
129                        baseline.avg_latency_ms,
130                        current.avg_latency_ms,
131                        true, // Higher is worse for latency
132                    );
133
134                    if let Some(reg) = latency_regression {
135                        regressions.push(reg);
136                    }
137
138                    // Check throughput regression
139                    let throughput_regression = self.check_metric_regression(
140                        &current.name,
141                        "throughput",
142                        baseline.throughput_tokens_per_sec,
143                        current.throughput_tokens_per_sec,
144                        false, // Lower is worse for throughput
145                    );
146
147                    if let Some(reg) = throughput_regression {
148                        regressions.push(reg);
149                    }
150
151                    // Check memory regression
152                    if let (Some(baseline_mem), Some(current_mem)) =
153                        (baseline.memory_bytes, current.memory_bytes)
154                    {
155                        let memory_regression = self.check_metric_regression(
156                            &current.name,
157                            "memory",
158                            baseline_mem as f64,
159                            current_mem as f64,
160                            true, // Higher is worse for memory
161                        );
162
163                        if let Some(reg) = memory_regression {
164                            regressions.push(reg);
165                        }
166                    }
167                }
168            }
169        }
170
171        Ok(regressions)
172    }
173
174    /// Check regression for a specific metric
175    fn check_metric_regression(
176        &self,
177        benchmark_name: &str,
178        metric_name: &str,
179        baseline_value: f64,
180        current_value: f64,
181        higher_is_worse: bool,
182    ) -> Option<PerformanceRegression> {
183        let change_percent = if higher_is_worse {
184            (current_value - baseline_value) / baseline_value * 100.0
185        } else {
186            (baseline_value - current_value) / baseline_value * 100.0
187        };
188
189        if change_percent > self.config.regression_threshold {
190            // Simple statistical test - in real implementation, use proper statistics
191            let is_significant = change_percent > self.config.regression_threshold * 2.0;
192
193            Some(PerformanceRegression {
194                benchmark_name: benchmark_name.to_string(),
195                metric_name: metric_name.to_string(),
196                previous_value: baseline_value,
197                current_value,
198                regression_percent: change_percent,
199                is_significant,
200                confidence: if is_significant { 0.95 } else { 0.5 },
201            })
202        } else {
203            None
204        }
205    }
206
207    /// Generate run ID
208    fn generate_run_id(&self) -> String {
209        let timestamp = chrono::Utc::now().format("%Y%m%d_%H%M%S");
210        let commit = self.config.commit_sha.as_ref().map(|s| &s[..8]).unwrap_or("unknown");
211        format!("{}_{}", timestamp, commit)
212    }
213
214    /// Save benchmark results
215    fn save_results(&self, run_id: &str, results: &[BenchmarkResult]) -> Result<()> {
216        let file_path = self.config.results_dir.join(format!("{}.json", run_id));
217        let json = serde_json::to_string_pretty(results)?;
218        std::fs::write(file_path, json)?;
219        Ok(())
220    }
221
222    /// Generate performance report
223    pub fn generate_report(&self) -> Result<PerformanceReport> {
224        let trends = self.history.calculate_trends()?;
225        let summary = self.history.generate_summary()?;
226
227        Ok(PerformanceReport {
228            trends,
229            summary,
230            latest_regressions: Vec::new(),
231        })
232    }
233}
234
235/// Benchmark history tracking
236#[derive(Debug, Clone, Serialize, Deserialize)]
237struct BenchmarkHistory {
238    runs: HashMap<String, Vec<BenchmarkResult>>,
239    metadata: HashMap<String, RunMetadata>,
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
243struct RunMetadata {
244    run_id: String,
245    timestamp: chrono::DateTime<chrono::Utc>,
246    commit_sha: Option<String>,
247    branch: Option<String>,
248    build_config: String,
249}
250
251impl BenchmarkHistory {
252    /// Load history from directory
253    fn load(dir: &Path) -> Result<Self> {
254        let history_file = dir.join("history.json");
255
256        if history_file.exists() {
257            let json = std::fs::read_to_string(history_file)?;
258            Ok(serde_json::from_str(&json)?)
259        } else {
260            Ok(Self {
261                runs: HashMap::new(),
262                metadata: HashMap::new(),
263            })
264        }
265    }
266
267    /// Save history to directory
268    fn save(&self, dir: &Path) -> Result<()> {
269        let history_file = dir.join("history.json");
270        let json = serde_json::to_string_pretty(self)?;
271        std::fs::write(history_file, json)?;
272        Ok(())
273    }
274
275    /// Add a benchmark run
276    fn add_run(&mut self, run_id: String, results: Vec<BenchmarkResult>) {
277        let metadata = RunMetadata {
278            run_id: run_id.clone(),
279            timestamp: chrono::Utc::now(),
280            commit_sha: None, // Would be set from config
281            branch: None,     // Would be set from config
282            build_config: "release".to_string(),
283        };
284
285        self.runs.insert(run_id.clone(), results);
286        self.metadata.insert(run_id, metadata);
287    }
288
289    /// Get baseline results for comparison
290    fn get_baseline(
291        &self,
292        branch: &Option<String>,
293        build_config: &str,
294    ) -> Option<&Vec<BenchmarkResult>> {
295        // Find the most recent run with matching branch and build config
296        let mut matching_runs: Vec<_> = self
297            .metadata
298            .iter()
299            .filter(|(_, meta)| {
300                meta.branch.as_ref() == branch.as_ref() && meta.build_config == build_config
301            })
302            .collect();
303
304        matching_runs.sort_by_key(|(_, meta)| meta.timestamp);
305
306        matching_runs.last().and_then(|(run_id, _)| self.runs.get(*run_id))
307    }
308
309    /// Calculate performance trends
310    fn calculate_trends(&self) -> Result<HashMap<String, PerformanceTrend>> {
311        let mut trends = HashMap::new();
312
313        // Group runs by benchmark name
314        let mut by_benchmark: HashMap<String, Vec<(&String, &BenchmarkResult)>> = HashMap::new();
315
316        for (run_id, results) in &self.runs {
317            for result in results {
318                by_benchmark.entry(result.name.clone()).or_default().push((run_id, result));
319            }
320        }
321
322        // Calculate trends for each benchmark
323        for (benchmark_name, mut runs) in by_benchmark {
324            // Sort by timestamp
325            runs.sort_by_key(|(run_id, _)| {
326                self.metadata.get(*run_id).map(|m| m.timestamp).unwrap_or_default()
327            });
328
329            if runs.len() >= 2 {
330                let latencies: Vec<f64> = runs.iter().map(|(_, r)| r.avg_latency_ms).collect();
331                let throughputs: Vec<f64> =
332                    runs.iter().map(|(_, r)| r.throughput_tokens_per_sec).collect();
333
334                trends.insert(
335                    benchmark_name,
336                    PerformanceTrend {
337                        latency_trend: calculate_trend(&latencies),
338                        throughput_trend: calculate_trend(&throughputs),
339                        sample_count: runs.len(),
340                    },
341                );
342            }
343        }
344
345        Ok(trends)
346    }
347
348    /// Generate summary statistics
349    fn generate_summary(&self) -> Result<PerformanceSummary> {
350        let total_runs = self.runs.len();
351        let total_benchmarks = self
352            .runs
353            .values()
354            .flat_map(|results| results.iter().map(|r| &r.name))
355            .collect::<std::collections::HashSet<_>>()
356            .len();
357
358        let latest_run = self.metadata.values().max_by_key(|m| m.timestamp).map(|m| m.timestamp);
359
360        Ok(PerformanceSummary {
361            total_runs,
362            total_benchmarks,
363            latest_run,
364            earliest_run: self.metadata.values().min_by_key(|m| m.timestamp).map(|m| m.timestamp),
365        })
366    }
367}
368
369/// Performance trend information
370#[derive(Debug, Clone, Serialize, Deserialize)]
371pub struct PerformanceTrend {
372    /// Latency trend (positive = getting worse)
373    pub latency_trend: f64,
374    /// Throughput trend (negative = getting worse)
375    pub throughput_trend: f64,
376    /// Number of data points
377    pub sample_count: usize,
378}
379
380/// Performance report
381#[derive(Debug, Clone, Serialize, Deserialize)]
382pub struct PerformanceReport {
383    /// Performance trends by benchmark
384    pub trends: HashMap<String, PerformanceTrend>,
385    /// Summary statistics
386    pub summary: PerformanceSummary,
387    /// Latest regressions
388    pub latest_regressions: Vec<PerformanceRegression>,
389}
390
391/// Performance summary
392#[derive(Debug, Clone, Serialize, Deserialize)]
393pub struct PerformanceSummary {
394    pub total_runs: usize,
395    pub total_benchmarks: usize,
396    pub latest_run: Option<chrono::DateTime<chrono::Utc>>,
397    pub earliest_run: Option<chrono::DateTime<chrono::Utc>>,
398}
399
400/// Calculate linear trend from data points
401fn calculate_trend(values: &[f64]) -> f64 {
402    if values.len() < 2 {
403        return 0.0;
404    }
405
406    let n = values.len() as f64;
407    let x_mean = (n - 1.0) / 2.0;
408    let y_mean = values.iter().sum::<f64>() / n;
409
410    let mut numerator = 0.0;
411    let mut denominator = 0.0;
412
413    for (i, &y) in values.iter().enumerate() {
414        let x = i as f64;
415        numerator += (x - x_mean) * (y - y_mean);
416        denominator += (x - x_mean) * (x - x_mean);
417    }
418
419    if denominator > 0.0 {
420        numerator / denominator
421    } else {
422        0.0
423    }
424}
425
426#[cfg(test)]
427mod tests {
428    use super::*;
429
430    #[test]
431    fn test_regression_detection() {
432        let config = ContinuousBenchmarkConfig::default();
433        let benchmark = ContinuousBenchmark::new(config).expect("operation failed in test");
434
435        let regression = benchmark.check_metric_regression(
436            "test_benchmark",
437            "latency",
438            100.0, // baseline
439            110.0, // current (10% worse)
440            true,  // higher is worse
441        );
442
443        assert!(regression.is_some());
444        let reg = regression.expect("operation failed in test");
445        assert_eq!(reg.regression_percent, 10.0);
446    }
447
448    #[test]
449    fn test_trend_calculation() {
450        let values = vec![100.0, 102.0, 104.0, 106.0, 108.0];
451        let trend = calculate_trend(&values);
452        assert!(trend > 0.0); // Positive trend (getting worse for latency)
453
454        let values = vec![100.0, 98.0, 96.0, 94.0, 92.0];
455        let trend = calculate_trend(&values);
456        assert!(trend < 0.0); // Negative trend (getting better for latency)
457    }
458}