Skip to main content

oxirs_arq/
query_regression_testing.rs

1//! # Query Regression Testing Framework
2//!
3//! This module provides a comprehensive framework for detecting performance
4//! regressions in SPARQL query execution. It supports:
5//!
6//! - **Golden Query Sets**: Reference queries with expected performance baselines
7//! - **Statistical Regression Detection**: Using significance tests and confidence intervals
8//! - **Execution Recording**: Historical execution metrics with rolling windows
9//! - **Automated Regression Reports**: Detailed analysis of performance changes
10//! - **CI/CD Integration**: Hooks for continuous integration pipelines
11//!
12//! ## Quick Start
13//!
14//! ```rust,ignore
15//! use oxirs_arq::query_regression_testing::{
16//!     RegressionTestSuite, GoldenQuery, RegressionConfig, ExecutionResult,
17//! };
18//!
19//! // Create a regression test suite
20//! let config = RegressionConfig::default();
21//! let mut suite = RegressionTestSuite::new("sparql_benchmarks", config);
22//!
23//! // Add golden queries with baselines
24//! suite.add_golden_query(GoldenQuery::new(
25//!     "simple_select",
26//!     "SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 100",
27//!     10.0,  // baseline_ms
28//! ));
29//!
30//! // Record execution results
31//! suite.record_execution("simple_select", ExecutionResult::success(8.5));
32//!
33//! // Check for regressions
34//! let report = suite.analyze_regressions();
35//! ```
36
37use std::collections::{HashMap, VecDeque};
38use std::fmt;
39use std::time::SystemTime;
40
41/// Configuration for regression testing
42#[derive(Debug, Clone)]
43pub struct RegressionConfig {
44    /// Threshold for regression detection (default: 1.2 = 20% slower)
45    pub regression_threshold: f64,
46    /// Threshold for improvement detection (default: 0.8 = 20% faster)
47    pub improvement_threshold: f64,
48    /// Minimum number of samples for statistical significance
49    pub min_samples: usize,
50    /// Rolling window size for baseline calculation
51    pub rolling_window_size: usize,
52    /// Confidence level for statistical tests (0.0-1.0)
53    pub confidence_level: f64,
54    /// Maximum history entries per query
55    pub max_history_entries: usize,
56    /// Enable detailed logging
57    pub verbose: bool,
58    /// Number of standard deviations for outlier detection
59    pub outlier_std_devs: f64,
60    /// Minimum execution time to consider (filters noise)
61    pub min_execution_time_ms: f64,
62}
63
64impl Default for RegressionConfig {
65    fn default() -> Self {
66        Self {
67            regression_threshold: 1.2,
68            improvement_threshold: 0.8,
69            min_samples: 5,
70            rolling_window_size: 20,
71            confidence_level: 0.95,
72            max_history_entries: 1000,
73            verbose: false,
74            outlier_std_devs: 3.0,
75            min_execution_time_ms: 0.1,
76        }
77    }
78}
79
80impl RegressionConfig {
81    /// Create a strict configuration for CI/CD
82    pub fn strict() -> Self {
83        Self {
84            regression_threshold: 1.1, // 10% regression triggers alert
85            improvement_threshold: 0.9,
86            min_samples: 10,
87            rolling_window_size: 30,
88            confidence_level: 0.99,
89            max_history_entries: 2000,
90            verbose: true,
91            outlier_std_devs: 2.5,
92            min_execution_time_ms: 0.1,
93        }
94    }
95
96    /// Create a lenient configuration for development
97    pub fn lenient() -> Self {
98        Self {
99            regression_threshold: 1.5, // 50% regression triggers alert
100            improvement_threshold: 0.5,
101            min_samples: 3,
102            rolling_window_size: 10,
103            confidence_level: 0.90,
104            max_history_entries: 500,
105            verbose: false,
106            outlier_std_devs: 4.0,
107            min_execution_time_ms: 0.05,
108        }
109    }
110}
111
112/// A golden query with expected performance baseline
113#[derive(Debug, Clone)]
114pub struct GoldenQuery {
115    /// Unique identifier for this query
116    pub id: String,
117    /// The SPARQL query text
118    pub query: String,
119    /// Description of what this query tests
120    pub description: String,
121    /// Baseline execution time in milliseconds
122    pub baseline_ms: f64,
123    /// Expected result count (optional)
124    pub expected_result_count: Option<usize>,
125    /// Tags for categorization
126    pub tags: Vec<String>,
127    /// Priority (1=highest, 5=lowest)
128    pub priority: u8,
129    /// Whether this query is active
130    pub active: bool,
131    /// Creation timestamp
132    pub created_at: SystemTime,
133    /// Last update timestamp
134    pub updated_at: SystemTime,
135}
136
137impl GoldenQuery {
138    /// Create a new golden query with basic parameters
139    pub fn new(id: impl Into<String>, query: impl Into<String>, baseline_ms: f64) -> Self {
140        let now = SystemTime::now();
141        Self {
142            id: id.into(),
143            query: query.into(),
144            description: String::new(),
145            baseline_ms,
146            expected_result_count: None,
147            tags: Vec::new(),
148            priority: 3,
149            active: true,
150            created_at: now,
151            updated_at: now,
152        }
153    }
154
155    /// Set the description
156    pub fn with_description(mut self, desc: impl Into<String>) -> Self {
157        self.description = desc.into();
158        self
159    }
160
161    /// Set expected result count
162    pub fn with_expected_count(mut self, count: usize) -> Self {
163        self.expected_result_count = Some(count);
164        self
165    }
166
167    /// Add a tag
168    pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
169        self.tags.push(tag.into());
170        self
171    }
172
173    /// Set priority
174    pub fn with_priority(mut self, priority: u8) -> Self {
175        self.priority = priority.clamp(1, 5);
176        self
177    }
178
179    /// Update the baseline
180    pub fn update_baseline(&mut self, new_baseline_ms: f64) {
181        self.baseline_ms = new_baseline_ms;
182        self.updated_at = SystemTime::now();
183    }
184}
185
186/// Result of a single query execution
187#[derive(Debug, Clone)]
188pub struct ExecutionResult {
189    /// Execution time in milliseconds
190    pub execution_time_ms: f64,
191    /// Whether execution succeeded
192    pub success: bool,
193    /// Number of results returned
194    pub result_count: Option<usize>,
195    /// Memory used in bytes
196    pub memory_bytes: Option<usize>,
197    /// Error message if failed
198    pub error: Option<String>,
199    /// Timestamp of execution
200    pub timestamp: SystemTime,
201    /// Additional metadata
202    pub metadata: HashMap<String, String>,
203}
204
205impl ExecutionResult {
206    /// Create a successful execution result
207    pub fn success(execution_time_ms: f64) -> Self {
208        Self {
209            execution_time_ms,
210            success: true,
211            result_count: None,
212            memory_bytes: None,
213            error: None,
214            timestamp: SystemTime::now(),
215            metadata: HashMap::new(),
216        }
217    }
218
219    /// Create a failed execution result
220    pub fn failure(error: impl Into<String>) -> Self {
221        Self {
222            execution_time_ms: 0.0,
223            success: false,
224            result_count: None,
225            memory_bytes: None,
226            error: Some(error.into()),
227            timestamp: SystemTime::now(),
228            metadata: HashMap::new(),
229        }
230    }
231
232    /// Set result count
233    pub fn with_result_count(mut self, count: usize) -> Self {
234        self.result_count = Some(count);
235        self
236    }
237
238    /// Set memory usage
239    pub fn with_memory(mut self, bytes: usize) -> Self {
240        self.memory_bytes = Some(bytes);
241        self
242    }
243
244    /// Add metadata
245    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
246        self.metadata.insert(key.into(), value.into());
247        self
248    }
249}
250
251/// Regression status for a query
252#[derive(Debug, Clone, Copy, PartialEq, Eq)]
253pub enum RegressionStatus {
254    /// Performance is stable
255    Stable,
256    /// Performance has improved
257    Improved,
258    /// Performance has regressed
259    Regressed,
260    /// Not enough data
261    InsufficientData,
262    /// Query is failing
263    Failing,
264}
265
266impl fmt::Display for RegressionStatus {
267    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
268        match self {
269            Self::Stable => write!(f, "STABLE"),
270            Self::Improved => write!(f, "IMPROVED"),
271            Self::Regressed => write!(f, "REGRESSED"),
272            Self::InsufficientData => write!(f, "INSUFFICIENT_DATA"),
273            Self::Failing => write!(f, "FAILING"),
274        }
275    }
276}
277
278/// Statistics for a query's execution history
279#[derive(Debug, Clone, Default)]
280pub struct ExecutionStatistics {
281    /// Number of executions
282    pub count: usize,
283    /// Number of successes
284    pub success_count: usize,
285    /// Number of failures
286    pub failure_count: usize,
287    /// Minimum execution time
288    pub min_ms: f64,
289    /// Maximum execution time
290    pub max_ms: f64,
291    /// Mean execution time
292    pub mean_ms: f64,
293    /// Median execution time
294    pub median_ms: f64,
295    /// Standard deviation
296    pub std_dev_ms: f64,
297    /// 95th percentile
298    pub p95_ms: f64,
299    /// 99th percentile
300    pub p99_ms: f64,
301    /// Coefficient of variation
302    pub cv: f64,
303}
304
305impl ExecutionStatistics {
306    /// Calculate statistics from execution results
307    pub fn from_results(results: &[ExecutionResult]) -> Self {
308        if results.is_empty() {
309            return Self::default();
310        }
311
312        let successes: Vec<f64> = results
313            .iter()
314            .filter(|r| r.success)
315            .map(|r| r.execution_time_ms)
316            .collect();
317
318        let success_count = successes.len();
319        let failure_count = results.len() - success_count;
320
321        if successes.is_empty() {
322            return Self {
323                count: results.len(),
324                success_count: 0,
325                failure_count,
326                ..Default::default()
327            };
328        }
329
330        let min_ms = successes.iter().cloned().fold(f64::INFINITY, f64::min);
331        let max_ms = successes.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
332        let mean_ms = successes.iter().sum::<f64>() / successes.len() as f64;
333
334        let variance = if successes.len() > 1 {
335            successes.iter().map(|x| (x - mean_ms).powi(2)).sum::<f64>()
336                / (successes.len() - 1) as f64
337        } else {
338            0.0
339        };
340        let std_dev_ms = variance.sqrt();
341
342        let mut sorted = successes.clone();
343        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
344
345        let median_ms = if sorted.len() % 2 == 0 {
346            (sorted[sorted.len() / 2 - 1] + sorted[sorted.len() / 2]) / 2.0
347        } else {
348            sorted[sorted.len() / 2]
349        };
350
351        let p95_idx = ((sorted.len() as f64 * 0.95) as usize).min(sorted.len() - 1);
352        let p99_idx = ((sorted.len() as f64 * 0.99) as usize).min(sorted.len() - 1);
353
354        let cv = if mean_ms > 0.0 {
355            std_dev_ms / mean_ms
356        } else {
357            0.0
358        };
359
360        Self {
361            count: results.len(),
362            success_count,
363            failure_count,
364            min_ms,
365            max_ms,
366            mean_ms,
367            median_ms,
368            std_dev_ms,
369            p95_ms: sorted[p95_idx],
370            p99_ms: sorted[p99_idx],
371            cv,
372        }
373    }
374}
375
376/// Detailed regression analysis for a single query
377#[derive(Debug, Clone)]
378pub struct QueryRegressionAnalysis {
379    /// Query ID
380    pub query_id: String,
381    /// Current regression status
382    pub status: RegressionStatus,
383    /// Baseline execution time
384    pub baseline_ms: f64,
385    /// Current mean execution time
386    pub current_mean_ms: f64,
387    /// Ratio of current to baseline (>1 = slower)
388    pub ratio: f64,
389    /// Percentage change from baseline
390    pub change_percent: f64,
391    /// Statistical significance (p-value)
392    pub p_value: f64,
393    /// Whether the change is statistically significant
394    pub is_significant: bool,
395    /// Confidence interval lower bound
396    pub ci_lower: f64,
397    /// Confidence interval upper bound
398    pub ci_upper: f64,
399    /// Recent execution statistics
400    pub recent_stats: ExecutionStatistics,
401    /// Historical execution statistics
402    pub historical_stats: ExecutionStatistics,
403    /// Trend direction (-1 = improving, 0 = stable, 1 = degrading)
404    pub trend: i8,
405    /// Detailed message
406    pub message: String,
407}
408
409impl QueryRegressionAnalysis {
410    /// Check if this query needs attention
411    pub fn needs_attention(&self) -> bool {
412        matches!(
413            self.status,
414            RegressionStatus::Regressed | RegressionStatus::Failing
415        )
416    }
417}
418
419/// Overall regression report for the test suite
420#[derive(Debug, Clone)]
421pub struct RegressionReport {
422    /// Suite name
423    pub suite_name: String,
424    /// Report generation timestamp
425    pub generated_at: SystemTime,
426    /// Overall status
427    pub overall_status: RegressionStatus,
428    /// Individual query analyses
429    pub analyses: Vec<QueryRegressionAnalysis>,
430    /// Summary statistics
431    pub summary: ReportSummary,
432    /// Configuration used
433    pub config: RegressionConfig,
434}
435
436impl RegressionReport {
437    /// Get all regressed queries
438    pub fn regressed_queries(&self) -> Vec<&QueryRegressionAnalysis> {
439        self.analyses
440            .iter()
441            .filter(|a| a.status == RegressionStatus::Regressed)
442            .collect()
443    }
444
445    /// Get all improved queries
446    pub fn improved_queries(&self) -> Vec<&QueryRegressionAnalysis> {
447        self.analyses
448            .iter()
449            .filter(|a| a.status == RegressionStatus::Improved)
450            .collect()
451    }
452
453    /// Get all failing queries
454    pub fn failing_queries(&self) -> Vec<&QueryRegressionAnalysis> {
455        self.analyses
456            .iter()
457            .filter(|a| a.status == RegressionStatus::Failing)
458            .collect()
459    }
460
461    /// Check if the report indicates any issues
462    pub fn has_issues(&self) -> bool {
463        self.summary.regressed_count > 0 || self.summary.failing_count > 0
464    }
465
466    /// Generate a human-readable summary
467    pub fn summary_text(&self) -> String {
468        let mut text = format!("Regression Report: {}\n", self.suite_name);
469        text.push_str(&format!("Generated: {:?}\n\n", self.generated_at));
470        text.push_str(&format!("Overall Status: {}\n\n", self.overall_status));
471        text.push_str(&format!(
472            "Summary:\n  Total: {}\n  Stable: {}\n  Improved: {}\n  Regressed: {}\n  Failing: {}\n  Insufficient Data: {}\n",
473            self.summary.total_count,
474            self.summary.stable_count,
475            self.summary.improved_count,
476            self.summary.regressed_count,
477            self.summary.failing_count,
478            self.summary.insufficient_data_count
479        ));
480
481        if !self.regressed_queries().is_empty() {
482            text.push_str("\nRegressed Queries:\n");
483            for analysis in self.regressed_queries() {
484                text.push_str(&format!(
485                    "  - {}: {:.1}% slower ({:.2}ms -> {:.2}ms)\n",
486                    analysis.query_id,
487                    analysis.change_percent,
488                    analysis.baseline_ms,
489                    analysis.current_mean_ms
490                ));
491            }
492        }
493
494        if !self.improved_queries().is_empty() {
495            text.push_str("\nImproved Queries:\n");
496            for analysis in self.improved_queries() {
497                text.push_str(&format!(
498                    "  - {}: {:.1}% faster ({:.2}ms -> {:.2}ms)\n",
499                    analysis.query_id,
500                    -analysis.change_percent,
501                    analysis.baseline_ms,
502                    analysis.current_mean_ms
503                ));
504            }
505        }
506
507        text
508    }
509}
510
511/// Summary statistics for a regression report
512#[derive(Debug, Clone, Default)]
513pub struct ReportSummary {
514    /// Total number of queries analyzed
515    pub total_count: usize,
516    /// Number of stable queries
517    pub stable_count: usize,
518    /// Number of improved queries
519    pub improved_count: usize,
520    /// Number of regressed queries
521    pub regressed_count: usize,
522    /// Number of failing queries
523    pub failing_count: usize,
524    /// Number of queries with insufficient data
525    pub insufficient_data_count: usize,
526    /// Average regression percentage (for regressed queries)
527    pub avg_regression_percent: f64,
528    /// Average improvement percentage (for improved queries)
529    pub avg_improvement_percent: f64,
530    /// Worst regression percentage
531    pub worst_regression_percent: f64,
532    /// Best improvement percentage
533    pub best_improvement_percent: f64,
534}
535
536/// Execution history for a query
537#[derive(Debug, Clone)]
538struct QueryHistory {
539    /// Execution results in chronological order
540    results: VecDeque<ExecutionResult>,
541    /// Maximum entries
542    max_entries: usize,
543}
544
545impl QueryHistory {
546    fn new(max_entries: usize) -> Self {
547        Self {
548            results: VecDeque::new(),
549            max_entries,
550        }
551    }
552
553    fn add(&mut self, result: ExecutionResult) {
554        self.results.push_back(result);
555        while self.results.len() > self.max_entries {
556            self.results.pop_front();
557        }
558    }
559
560    fn recent(&self, count: usize) -> Vec<&ExecutionResult> {
561        self.results.iter().rev().take(count).collect()
562    }
563
564    fn all(&self) -> Vec<&ExecutionResult> {
565        self.results.iter().collect()
566    }
567}
568
569/// Main regression test suite
570#[derive(Debug)]
571pub struct RegressionTestSuite {
572    /// Suite name
573    name: String,
574    /// Configuration
575    config: RegressionConfig,
576    /// Golden queries
577    golden_queries: HashMap<String, GoldenQuery>,
578    /// Execution history per query
579    history: HashMap<String, QueryHistory>,
580    /// Statistics
581    stats: SuiteStatistics,
582}
583
584/// Statistics for the test suite
585#[derive(Debug, Clone, Default)]
586pub struct SuiteStatistics {
587    /// Total executions recorded
588    pub total_executions: usize,
589    /// Total analyses performed
590    pub total_analyses: usize,
591    /// Regressions detected
592    pub regressions_detected: usize,
593    /// Improvements detected
594    pub improvements_detected: usize,
595    /// Last analysis timestamp
596    pub last_analysis: Option<SystemTime>,
597}
598
599impl RegressionTestSuite {
600    /// Create a new regression test suite
601    pub fn new(name: impl Into<String>, config: RegressionConfig) -> Self {
602        Self {
603            name: name.into(),
604            config,
605            golden_queries: HashMap::new(),
606            history: HashMap::new(),
607            stats: SuiteStatistics::default(),
608        }
609    }
610
611    /// Create with default configuration
612    pub fn with_defaults(name: impl Into<String>) -> Self {
613        Self::new(name, RegressionConfig::default())
614    }
615
616    /// Add a golden query
617    pub fn add_golden_query(&mut self, query: GoldenQuery) {
618        let id = query.id.clone();
619        self.golden_queries.insert(id.clone(), query);
620        self.history
621            .entry(id)
622            .or_insert_with(|| QueryHistory::new(self.config.max_history_entries));
623    }
624
625    /// Remove a golden query
626    pub fn remove_golden_query(&mut self, id: &str) -> Option<GoldenQuery> {
627        self.history.remove(id);
628        self.golden_queries.remove(id)
629    }
630
631    /// Get a golden query by ID
632    pub fn get_golden_query(&self, id: &str) -> Option<&GoldenQuery> {
633        self.golden_queries.get(id)
634    }
635
636    /// Get all golden queries
637    pub fn golden_queries(&self) -> impl Iterator<Item = &GoldenQuery> {
638        self.golden_queries.values()
639    }
640
641    /// Record an execution result
642    pub fn record_execution(&mut self, query_id: &str, result: ExecutionResult) -> bool {
643        if let Some(history) = self.history.get_mut(query_id) {
644            history.add(result);
645            self.stats.total_executions += 1;
646            true
647        } else if self.golden_queries.contains_key(query_id) {
648            let mut history = QueryHistory::new(self.config.max_history_entries);
649            history.add(result);
650            self.history.insert(query_id.to_string(), history);
651            self.stats.total_executions += 1;
652            true
653        } else {
654            false
655        }
656    }
657
658    /// Record multiple execution results for a query
659    pub fn record_executions(&mut self, query_id: &str, results: Vec<ExecutionResult>) -> usize {
660        let mut recorded = 0;
661        for result in results {
662            if self.record_execution(query_id, result) {
663                recorded += 1;
664            }
665        }
666        recorded
667    }
668
669    /// Analyze a single query for regression
670    pub fn analyze_query(&self, query_id: &str) -> Option<QueryRegressionAnalysis> {
671        let query = self.golden_queries.get(query_id)?;
672        let history = self.history.get(query_id)?;
673
674        let all_results: Vec<ExecutionResult> = history.all().into_iter().cloned().collect();
675        let recent_results: Vec<ExecutionResult> = history
676            .recent(self.config.rolling_window_size)
677            .into_iter()
678            .cloned()
679            .collect();
680
681        // Check for insufficient data
682        if recent_results.len() < self.config.min_samples {
683            return Some(QueryRegressionAnalysis {
684                query_id: query_id.to_string(),
685                status: RegressionStatus::InsufficientData,
686                baseline_ms: query.baseline_ms,
687                current_mean_ms: 0.0,
688                ratio: 1.0,
689                change_percent: 0.0,
690                p_value: 1.0,
691                is_significant: false,
692                ci_lower: 0.0,
693                ci_upper: 0.0,
694                recent_stats: ExecutionStatistics::default(),
695                historical_stats: ExecutionStatistics::default(),
696                trend: 0,
697                message: format!(
698                    "Insufficient data: {} samples (need {})",
699                    recent_results.len(),
700                    self.config.min_samples
701                ),
702            });
703        }
704
705        // Check for failing queries
706        let recent_failures = recent_results.iter().filter(|r| !r.success).count();
707        let failure_rate = recent_failures as f64 / recent_results.len() as f64;
708        if failure_rate > 0.5 {
709            return Some(QueryRegressionAnalysis {
710                query_id: query_id.to_string(),
711                status: RegressionStatus::Failing,
712                baseline_ms: query.baseline_ms,
713                current_mean_ms: 0.0,
714                ratio: f64::INFINITY,
715                change_percent: f64::INFINITY,
716                p_value: 0.0,
717                is_significant: true,
718                ci_lower: 0.0,
719                ci_upper: 0.0,
720                recent_stats: ExecutionStatistics::from_results(&recent_results),
721                historical_stats: ExecutionStatistics::from_results(&all_results),
722                trend: 1,
723                message: format!("Query failing: {:.1}% failure rate", failure_rate * 100.0),
724            });
725        }
726
727        // Calculate statistics
728        let recent_stats = ExecutionStatistics::from_results(&recent_results);
729        let historical_stats = ExecutionStatistics::from_results(&all_results);
730
731        // Filter out outliers for analysis
732        let filtered_times: Vec<f64> = recent_results
733            .iter()
734            .filter(|r| r.success)
735            .map(|r| r.execution_time_ms)
736            .filter(|&t| {
737                if recent_stats.std_dev_ms > 0.0 {
738                    (t - recent_stats.mean_ms).abs()
739                        <= self.config.outlier_std_devs * recent_stats.std_dev_ms
740                } else {
741                    true
742                }
743            })
744            .collect();
745
746        if filtered_times.is_empty() {
747            return Some(QueryRegressionAnalysis {
748                query_id: query_id.to_string(),
749                status: RegressionStatus::InsufficientData,
750                baseline_ms: query.baseline_ms,
751                current_mean_ms: 0.0,
752                ratio: 1.0,
753                change_percent: 0.0,
754                p_value: 1.0,
755                is_significant: false,
756                ci_lower: 0.0,
757                ci_upper: 0.0,
758                recent_stats,
759                historical_stats,
760                trend: 0,
761                message: "All samples filtered as outliers".to_string(),
762            });
763        }
764
765        let current_mean: f64 = filtered_times.iter().sum::<f64>() / filtered_times.len() as f64;
766        let ratio = current_mean / query.baseline_ms;
767        let change_percent = (ratio - 1.0) * 100.0;
768
769        // Calculate confidence interval using t-distribution approximation
770        let (ci_lower, ci_upper, p_value) =
771            self.calculate_statistics(&filtered_times, query.baseline_ms);
772        let is_significant = p_value < (1.0 - self.config.confidence_level);
773
774        // Determine trend from historical data
775        let trend = self.calculate_trend(&all_results);
776
777        // Determine status
778        let status = if ratio > self.config.regression_threshold && is_significant {
779            RegressionStatus::Regressed
780        } else if ratio < self.config.improvement_threshold && is_significant {
781            RegressionStatus::Improved
782        } else {
783            RegressionStatus::Stable
784        };
785
786        let message = match status {
787            RegressionStatus::Regressed => format!(
788                "Performance regressed by {:.1}% (baseline: {:.2}ms, current: {:.2}ms)",
789                change_percent, query.baseline_ms, current_mean
790            ),
791            RegressionStatus::Improved => format!(
792                "Performance improved by {:.1}% (baseline: {:.2}ms, current: {:.2}ms)",
793                -change_percent, query.baseline_ms, current_mean
794            ),
795            RegressionStatus::Stable => format!(
796                "Performance stable ({:.1}% change, baseline: {:.2}ms, current: {:.2}ms)",
797                change_percent, query.baseline_ms, current_mean
798            ),
799            _ => String::new(),
800        };
801
802        Some(QueryRegressionAnalysis {
803            query_id: query_id.to_string(),
804            status,
805            baseline_ms: query.baseline_ms,
806            current_mean_ms: current_mean,
807            ratio,
808            change_percent,
809            p_value,
810            is_significant,
811            ci_lower,
812            ci_upper,
813            recent_stats,
814            historical_stats,
815            trend,
816            message,
817        })
818    }
819
820    /// Analyze all queries for regressions
821    pub fn analyze_regressions(&mut self) -> RegressionReport {
822        self.stats.total_analyses += 1;
823        self.stats.last_analysis = Some(SystemTime::now());
824
825        let mut analyses = Vec::new();
826        for query_id in self.golden_queries.keys() {
827            if let Some(analysis) = self.analyze_query(query_id) {
828                if analysis.status == RegressionStatus::Regressed {
829                    self.stats.regressions_detected += 1;
830                } else if analysis.status == RegressionStatus::Improved {
831                    self.stats.improvements_detected += 1;
832                }
833                analyses.push(analysis);
834            }
835        }
836
837        // Sort by status priority and change magnitude
838        analyses.sort_by(|a, b| {
839            let status_order = |s: &RegressionStatus| match s {
840                RegressionStatus::Failing => 0,
841                RegressionStatus::Regressed => 1,
842                RegressionStatus::Improved => 2,
843                RegressionStatus::Stable => 3,
844                RegressionStatus::InsufficientData => 4,
845            };
846            let a_order = status_order(&a.status);
847            let b_order = status_order(&b.status);
848            if a_order != b_order {
849                a_order.cmp(&b_order)
850            } else {
851                b.change_percent
852                    .abs()
853                    .partial_cmp(&a.change_percent.abs())
854                    .unwrap_or(std::cmp::Ordering::Equal)
855            }
856        });
857
858        let summary = self.calculate_summary(&analyses);
859        let overall_status = if summary.regressed_count > 0 || summary.failing_count > 0 {
860            RegressionStatus::Regressed
861        } else if summary.improved_count > 0 {
862            RegressionStatus::Improved
863        } else if summary.insufficient_data_count == summary.total_count {
864            RegressionStatus::InsufficientData
865        } else {
866            RegressionStatus::Stable
867        };
868
869        RegressionReport {
870            suite_name: self.name.clone(),
871            generated_at: SystemTime::now(),
872            overall_status,
873            analyses,
874            summary,
875            config: self.config.clone(),
876        }
877    }
878
879    /// Update baseline for a query based on recent performance
880    pub fn update_baseline(&mut self, query_id: &str) -> Option<f64> {
881        let history = self.history.get(query_id)?;
882        let recent: Vec<f64> = history
883            .recent(self.config.rolling_window_size)
884            .into_iter()
885            .filter(|r| r.success)
886            .map(|r| r.execution_time_ms)
887            .collect();
888
889        if recent.len() >= self.config.min_samples {
890            let new_baseline = recent.iter().sum::<f64>() / recent.len() as f64;
891            if let Some(query) = self.golden_queries.get_mut(query_id) {
892                query.update_baseline(new_baseline);
893                return Some(new_baseline);
894            }
895        }
896        None
897    }
898
899    /// Get suite statistics
900    pub fn statistics(&self) -> &SuiteStatistics {
901        &self.stats
902    }
903
904    /// Get configuration
905    pub fn config(&self) -> &RegressionConfig {
906        &self.config
907    }
908
909    /// Update configuration
910    pub fn set_config(&mut self, config: RegressionConfig) {
911        self.config = config;
912    }
913
914    /// Clear all execution history
915    pub fn clear_history(&mut self) {
916        for history in self.history.values_mut() {
917            history.results.clear();
918        }
919    }
920
921    /// Export suite data for persistence
922    pub fn export(&self) -> SuiteExport {
923        SuiteExport {
924            name: self.name.clone(),
925            config: self.config.clone(),
926            golden_queries: self.golden_queries.values().cloned().collect(),
927            stats: self.stats.clone(),
928        }
929    }
930
931    /// Import suite data
932    pub fn import(data: SuiteExport) -> Self {
933        let mut suite = Self::new(data.name, data.config);
934        for query in data.golden_queries {
935            suite.add_golden_query(query);
936        }
937        suite.stats = data.stats;
938        suite
939    }
940
941    // Private helper methods
942
943    fn calculate_statistics(&self, samples: &[f64], baseline: f64) -> (f64, f64, f64) {
944        if samples.is_empty() {
945            return (0.0, 0.0, 1.0);
946        }
947
948        let n = samples.len() as f64;
949        let mean = samples.iter().sum::<f64>() / n;
950
951        if samples.len() < 2 {
952            return (mean, mean, 0.5);
953        }
954
955        let variance = samples.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0);
956        let std_error = (variance / n).sqrt();
957
958        // T-critical value for 95% CI (approximation)
959        let t_critical = if n > 30.0 { 1.96 } else { 2.0 + 1.0 / n.sqrt() };
960
961        let ci_lower = mean - t_critical * std_error;
962        let ci_upper = mean + t_critical * std_error;
963
964        // Calculate p-value using t-test against baseline
965        let t_stat = (mean - baseline) / std_error;
966        let p_value = self.approximate_p_value(t_stat.abs(), (n - 1.0) as usize);
967
968        (ci_lower, ci_upper, p_value)
969    }
970
971    fn approximate_p_value(&self, t_stat: f64, df: usize) -> f64 {
972        // Simple approximation of two-tailed p-value
973        // For more accuracy, use a proper statistical library
974        let df = df as f64;
975        // Note: x would be used for incomplete beta function approximation
976        let _x = df / (df + t_stat * t_stat);
977
978        // Beta function approximation for incomplete beta
979        // This is a simplified version
980        if t_stat < 0.5 {
981            1.0
982        } else if t_stat > 5.0 {
983            0.0001
984        } else {
985            // Linear interpolation approximation
986            let p = 2.0 * (1.0 - 0.5 * (1.0 + (t_stat / (1.0 + t_stat / df.sqrt())).tanh()));
987            p.clamp(0.0001, 1.0)
988        }
989    }
990
991    fn calculate_trend(&self, results: &[ExecutionResult]) -> i8 {
992        let times: Vec<f64> = results
993            .iter()
994            .filter(|r| r.success)
995            .map(|r| r.execution_time_ms)
996            .collect();
997
998        if times.len() < 3 {
999            return 0;
1000        }
1001
1002        // Simple linear trend using first and last thirds
1003        let third = times.len() / 3;
1004        let first_third_avg: f64 = times[..third].iter().sum::<f64>() / third as f64;
1005        let last_third_avg: f64 = times[times.len() - third..].iter().sum::<f64>() / third as f64;
1006
1007        let ratio = last_third_avg / first_third_avg;
1008        if ratio > 1.1 {
1009            1 // Degrading
1010        } else if ratio < 0.9 {
1011            -1 // Improving
1012        } else {
1013            0 // Stable
1014        }
1015    }
1016
1017    fn calculate_summary(&self, analyses: &[QueryRegressionAnalysis]) -> ReportSummary {
1018        let mut summary = ReportSummary {
1019            total_count: analyses.len(),
1020            ..Default::default()
1021        };
1022
1023        let mut regression_percents = Vec::new();
1024        let mut improvement_percents = Vec::new();
1025
1026        for analysis in analyses {
1027            match analysis.status {
1028                RegressionStatus::Stable => summary.stable_count += 1,
1029                RegressionStatus::Improved => {
1030                    summary.improved_count += 1;
1031                    improvement_percents.push(-analysis.change_percent);
1032                }
1033                RegressionStatus::Regressed => {
1034                    summary.regressed_count += 1;
1035                    regression_percents.push(analysis.change_percent);
1036                }
1037                RegressionStatus::InsufficientData => summary.insufficient_data_count += 1,
1038                RegressionStatus::Failing => summary.failing_count += 1,
1039            }
1040        }
1041
1042        if !regression_percents.is_empty() {
1043            summary.avg_regression_percent =
1044                regression_percents.iter().sum::<f64>() / regression_percents.len() as f64;
1045            summary.worst_regression_percent = regression_percents
1046                .iter()
1047                .cloned()
1048                .fold(f64::NEG_INFINITY, f64::max);
1049        }
1050
1051        if !improvement_percents.is_empty() {
1052            summary.avg_improvement_percent =
1053                improvement_percents.iter().sum::<f64>() / improvement_percents.len() as f64;
1054            summary.best_improvement_percent = improvement_percents
1055                .iter()
1056                .cloned()
1057                .fold(f64::NEG_INFINITY, f64::max);
1058        }
1059
1060        summary
1061    }
1062}
1063
1064/// Export format for suite persistence
1065#[derive(Debug, Clone)]
1066pub struct SuiteExport {
1067    /// Suite name
1068    pub name: String,
1069    /// Configuration
1070    pub config: RegressionConfig,
1071    /// Golden queries
1072    pub golden_queries: Vec<GoldenQuery>,
1073    /// Statistics
1074    pub stats: SuiteStatistics,
1075}
1076
1077/// Builder for creating regression test suites
1078#[derive(Debug, Default)]
1079pub struct RegressionTestSuiteBuilder {
1080    name: String,
1081    config: Option<RegressionConfig>,
1082    golden_queries: Vec<GoldenQuery>,
1083}
1084
1085impl RegressionTestSuiteBuilder {
1086    /// Create a new builder
1087    pub fn new(name: impl Into<String>) -> Self {
1088        Self {
1089            name: name.into(),
1090            config: None,
1091            golden_queries: Vec::new(),
1092        }
1093    }
1094
1095    /// Set configuration
1096    pub fn with_config(mut self, config: RegressionConfig) -> Self {
1097        self.config = Some(config);
1098        self
1099    }
1100
1101    /// Add a golden query
1102    pub fn add_query(mut self, query: GoldenQuery) -> Self {
1103        self.golden_queries.push(query);
1104        self
1105    }
1106
1107    /// Add multiple golden queries
1108    pub fn add_queries(mut self, queries: Vec<GoldenQuery>) -> Self {
1109        self.golden_queries.extend(queries);
1110        self
1111    }
1112
1113    /// Build the suite
1114    pub fn build(self) -> RegressionTestSuite {
1115        let mut suite = RegressionTestSuite::new(self.name, self.config.unwrap_or_default());
1116        for query in self.golden_queries {
1117            suite.add_golden_query(query);
1118        }
1119        suite
1120    }
1121}
1122
1123/// Comparison result between two regression reports
1124#[derive(Debug, Clone)]
1125pub struct ReportComparison {
1126    /// Queries that newly regressed
1127    pub new_regressions: Vec<String>,
1128    /// Queries that were fixed (no longer regressed)
1129    pub fixed_regressions: Vec<String>,
1130    /// Queries that newly improved
1131    pub new_improvements: Vec<String>,
1132    /// Queries that degraded from improved to stable/regressed
1133    pub lost_improvements: Vec<String>,
1134    /// Overall status change
1135    pub status_change: Option<(RegressionStatus, RegressionStatus)>,
1136}
1137
1138impl ReportComparison {
1139    /// Compare two reports
1140    pub fn compare(old: &RegressionReport, new: &RegressionReport) -> Self {
1141        let old_status: HashMap<&str, RegressionStatus> = old
1142            .analyses
1143            .iter()
1144            .map(|a| (a.query_id.as_str(), a.status))
1145            .collect();
1146
1147        let new_status: HashMap<&str, RegressionStatus> = new
1148            .analyses
1149            .iter()
1150            .map(|a| (a.query_id.as_str(), a.status))
1151            .collect();
1152
1153        let mut comparison = Self {
1154            new_regressions: Vec::new(),
1155            fixed_regressions: Vec::new(),
1156            new_improvements: Vec::new(),
1157            lost_improvements: Vec::new(),
1158            status_change: if old.overall_status != new.overall_status {
1159                Some((old.overall_status, new.overall_status))
1160            } else {
1161                None
1162            },
1163        };
1164
1165        for (query_id, &new_stat) in &new_status {
1166            let old_stat = old_status.get(query_id).copied();
1167            match (old_stat, new_stat) {
1168                (Some(RegressionStatus::Stable), RegressionStatus::Regressed)
1169                | (Some(RegressionStatus::Improved), RegressionStatus::Regressed)
1170                | (None, RegressionStatus::Regressed) => {
1171                    comparison.new_regressions.push(query_id.to_string());
1172                }
1173                (Some(RegressionStatus::Regressed), RegressionStatus::Stable)
1174                | (Some(RegressionStatus::Regressed), RegressionStatus::Improved) => {
1175                    comparison.fixed_regressions.push(query_id.to_string());
1176                }
1177                (Some(RegressionStatus::Stable), RegressionStatus::Improved)
1178                | (None, RegressionStatus::Improved) => {
1179                    comparison.new_improvements.push(query_id.to_string());
1180                }
1181                (Some(RegressionStatus::Improved), RegressionStatus::Stable)
1182                | (Some(RegressionStatus::Improved), RegressionStatus::Failing) => {
1183                    comparison.lost_improvements.push(query_id.to_string());
1184                }
1185                _ => {}
1186            }
1187        }
1188
1189        comparison
1190    }
1191
1192    /// Check if there are any changes
1193    pub fn has_changes(&self) -> bool {
1194        !self.new_regressions.is_empty()
1195            || !self.fixed_regressions.is_empty()
1196            || !self.new_improvements.is_empty()
1197            || !self.lost_improvements.is_empty()
1198    }
1199}
1200
1201#[cfg(test)]
1202mod tests {
1203    use super::*;
1204
1205    #[test]
1206    fn test_golden_query_creation() {
1207        let query = GoldenQuery::new("test_query", "SELECT * WHERE { ?s ?p ?o }", 10.0)
1208            .with_description("Test query")
1209            .with_tag("basic")
1210            .with_expected_count(100)
1211            .with_priority(1);
1212
1213        assert_eq!(query.id, "test_query");
1214        assert_eq!(query.baseline_ms, 10.0);
1215        assert_eq!(query.description, "Test query");
1216        assert_eq!(query.tags, vec!["basic"]);
1217        assert_eq!(query.expected_result_count, Some(100));
1218        assert_eq!(query.priority, 1);
1219    }
1220
1221    #[test]
1222    fn test_execution_result() {
1223        let success = ExecutionResult::success(5.5)
1224            .with_result_count(50)
1225            .with_memory(1024)
1226            .with_metadata("version", "1.0");
1227
1228        assert!(success.success);
1229        assert_eq!(success.execution_time_ms, 5.5);
1230        assert_eq!(success.result_count, Some(50));
1231        assert_eq!(success.memory_bytes, Some(1024));
1232        assert_eq!(success.metadata.get("version"), Some(&"1.0".to_string()));
1233
1234        let failure = ExecutionResult::failure("Query timeout");
1235        assert!(!failure.success);
1236        assert_eq!(failure.error, Some("Query timeout".to_string()));
1237    }
1238
1239    #[test]
1240    fn test_execution_statistics() {
1241        let results: Vec<ExecutionResult> = vec![
1242            ExecutionResult::success(10.0),
1243            ExecutionResult::success(12.0),
1244            ExecutionResult::success(11.0),
1245            ExecutionResult::success(9.0),
1246            ExecutionResult::success(13.0),
1247        ];
1248
1249        let stats = ExecutionStatistics::from_results(&results);
1250        assert_eq!(stats.count, 5);
1251        assert_eq!(stats.success_count, 5);
1252        assert_eq!(stats.failure_count, 0);
1253        assert_eq!(stats.min_ms, 9.0);
1254        assert_eq!(stats.max_ms, 13.0);
1255        assert_eq!(stats.mean_ms, 11.0);
1256        assert_eq!(stats.median_ms, 11.0);
1257    }
1258
1259    #[test]
1260    fn test_suite_creation() {
1261        let config = RegressionConfig::default();
1262        let mut suite = RegressionTestSuite::new("test_suite", config);
1263
1264        suite.add_golden_query(GoldenQuery::new("q1", "SELECT ?s WHERE { ?s ?p ?o }", 10.0));
1265        suite.add_golden_query(GoldenQuery::new("q2", "SELECT ?p WHERE { ?s ?p ?o }", 15.0));
1266
1267        assert_eq!(suite.golden_queries().count(), 2);
1268        assert!(suite.get_golden_query("q1").is_some());
1269        assert!(suite.get_golden_query("q3").is_none());
1270    }
1271
1272    #[test]
1273    fn test_record_execution() {
1274        let mut suite = RegressionTestSuite::with_defaults("test");
1275        suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1276
1277        assert!(suite.record_execution("q1", ExecutionResult::success(9.5)));
1278        assert!(suite.record_execution("q1", ExecutionResult::success(10.5)));
1279        assert!(!suite.record_execution("nonexistent", ExecutionResult::success(5.0)));
1280
1281        assert_eq!(suite.statistics().total_executions, 2);
1282    }
1283
1284    #[test]
1285    fn test_regression_detection() {
1286        let config = RegressionConfig {
1287            min_samples: 3,
1288            regression_threshold: 1.2,
1289            ..Default::default()
1290        };
1291        let mut suite = RegressionTestSuite::new("test", config);
1292        suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1293
1294        // Add results that show regression (50% slower)
1295        for _ in 0..5 {
1296            suite.record_execution("q1", ExecutionResult::success(15.0));
1297        }
1298
1299        let analysis = suite.analyze_query("q1").unwrap();
1300        assert_eq!(analysis.status, RegressionStatus::Regressed);
1301        assert!(analysis.ratio > 1.2);
1302    }
1303
1304    #[test]
1305    fn test_improvement_detection() {
1306        let config = RegressionConfig {
1307            min_samples: 3,
1308            improvement_threshold: 0.8,
1309            ..Default::default()
1310        };
1311        let mut suite = RegressionTestSuite::new("test", config);
1312        suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1313
1314        // Add results that show improvement (50% faster)
1315        for _ in 0..5 {
1316            suite.record_execution("q1", ExecutionResult::success(5.0));
1317        }
1318
1319        let analysis = suite.analyze_query("q1").unwrap();
1320        assert_eq!(analysis.status, RegressionStatus::Improved);
1321        assert!(analysis.ratio < 0.8);
1322    }
1323
1324    #[test]
1325    fn test_insufficient_data() {
1326        let config = RegressionConfig {
1327            min_samples: 10,
1328            ..Default::default()
1329        };
1330        let mut suite = RegressionTestSuite::new("test", config);
1331        suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1332
1333        // Only add 3 samples (less than min_samples)
1334        for _ in 0..3 {
1335            suite.record_execution("q1", ExecutionResult::success(10.0));
1336        }
1337
1338        let analysis = suite.analyze_query("q1").unwrap();
1339        assert_eq!(analysis.status, RegressionStatus::InsufficientData);
1340    }
1341
1342    #[test]
1343    fn test_failing_query_detection() {
1344        let config = RegressionConfig {
1345            min_samples: 3,
1346            ..Default::default()
1347        };
1348        let mut suite = RegressionTestSuite::new("test", config);
1349        suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1350
1351        // Add mostly failures
1352        for _ in 0..4 {
1353            suite.record_execution("q1", ExecutionResult::failure("Timeout"));
1354        }
1355        suite.record_execution("q1", ExecutionResult::success(10.0));
1356
1357        let analysis = suite.analyze_query("q1").unwrap();
1358        assert_eq!(analysis.status, RegressionStatus::Failing);
1359    }
1360
1361    #[test]
1362    fn test_regression_report() {
1363        let config = RegressionConfig {
1364            min_samples: 3,
1365            ..Default::default()
1366        };
1367        let mut suite = RegressionTestSuite::new("test", config);
1368
1369        suite.add_golden_query(GoldenQuery::new(
1370            "stable",
1371            "SELECT ?s WHERE { ?s ?p ?o }",
1372            10.0,
1373        ));
1374        suite.add_golden_query(GoldenQuery::new(
1375            "regressed",
1376            "SELECT ?p WHERE { ?s ?p ?o }",
1377            10.0,
1378        ));
1379        suite.add_golden_query(GoldenQuery::new(
1380            "improved",
1381            "SELECT ?o WHERE { ?s ?p ?o }",
1382            10.0,
1383        ));
1384
1385        // Stable query
1386        for _ in 0..5 {
1387            suite.record_execution("stable", ExecutionResult::success(10.0));
1388        }
1389
1390        // Regressed query
1391        for _ in 0..5 {
1392            suite.record_execution("regressed", ExecutionResult::success(15.0));
1393        }
1394
1395        // Improved query
1396        for _ in 0..5 {
1397            suite.record_execution("improved", ExecutionResult::success(5.0));
1398        }
1399
1400        let report = suite.analyze_regressions();
1401        assert_eq!(report.summary.total_count, 3);
1402        assert!(report.summary.regressed_count >= 1);
1403        assert!(report.summary.improved_count >= 1);
1404    }
1405
1406    #[test]
1407    fn test_update_baseline() {
1408        let config = RegressionConfig {
1409            min_samples: 3,
1410            rolling_window_size: 5,
1411            ..Default::default()
1412        };
1413        let mut suite = RegressionTestSuite::new("test", config);
1414        suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1415
1416        // Add consistent results around 8ms
1417        for _ in 0..5 {
1418            suite.record_execution("q1", ExecutionResult::success(8.0));
1419        }
1420
1421        let new_baseline = suite.update_baseline("q1").unwrap();
1422        assert!((new_baseline - 8.0).abs() < 0.1);
1423        assert_eq!(
1424            suite.get_golden_query("q1").unwrap().baseline_ms,
1425            new_baseline
1426        );
1427    }
1428
1429    #[test]
1430    fn test_report_comparison() {
1431        let config = RegressionConfig {
1432            min_samples: 3,
1433            ..Default::default()
1434        };
1435
1436        // Create first suite and report
1437        let mut suite1 = RegressionTestSuite::new("test", config.clone());
1438        suite1.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1439        suite1.add_golden_query(GoldenQuery::new("q2", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1440
1441        for _ in 0..5 {
1442            suite1.record_execution("q1", ExecutionResult::success(10.0)); // Stable
1443            suite1.record_execution("q2", ExecutionResult::success(10.0)); // Stable
1444        }
1445        let report1 = suite1.analyze_regressions();
1446
1447        // Create second suite with changes
1448        let mut suite2 = RegressionTestSuite::new("test", config);
1449        suite2.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1450        suite2.add_golden_query(GoldenQuery::new("q2", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1451
1452        for _ in 0..5 {
1453            suite2.record_execution("q1", ExecutionResult::success(15.0)); // Regressed
1454            suite2.record_execution("q2", ExecutionResult::success(5.0)); // Improved
1455        }
1456        let report2 = suite2.analyze_regressions();
1457
1458        let comparison = ReportComparison::compare(&report1, &report2);
1459        assert!(comparison.has_changes());
1460        assert!(comparison.new_regressions.contains(&"q1".to_string()));
1461        assert!(comparison.new_improvements.contains(&"q2".to_string()));
1462    }
1463
1464    #[test]
1465    fn test_suite_builder() {
1466        let suite = RegressionTestSuiteBuilder::new("builder_test")
1467            .with_config(RegressionConfig::strict())
1468            .add_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0))
1469            .add_query(GoldenQuery::new("q2", "SELECT * WHERE { ?s ?p ?o }", 20.0))
1470            .build();
1471
1472        assert_eq!(suite.golden_queries().count(), 2);
1473        assert_eq!(suite.config().regression_threshold, 1.1); // Strict config
1474    }
1475
1476    #[test]
1477    fn test_suite_export_import() {
1478        let mut suite = RegressionTestSuite::with_defaults("export_test");
1479        suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1480        suite.record_execution("q1", ExecutionResult::success(9.0));
1481
1482        let export = suite.export();
1483        let imported = RegressionTestSuite::import(export);
1484
1485        assert_eq!(imported.golden_queries().count(), 1);
1486        assert!(imported.get_golden_query("q1").is_some());
1487    }
1488
1489    #[test]
1490    fn test_regression_status_display() {
1491        assert_eq!(format!("{}", RegressionStatus::Stable), "STABLE");
1492        assert_eq!(format!("{}", RegressionStatus::Regressed), "REGRESSED");
1493        assert_eq!(format!("{}", RegressionStatus::Improved), "IMPROVED");
1494        assert_eq!(format!("{}", RegressionStatus::Failing), "FAILING");
1495        assert_eq!(
1496            format!("{}", RegressionStatus::InsufficientData),
1497            "INSUFFICIENT_DATA"
1498        );
1499    }
1500
1501    #[test]
1502    fn test_report_summary_text() {
1503        let config = RegressionConfig {
1504            min_samples: 3,
1505            ..Default::default()
1506        };
1507        let mut suite = RegressionTestSuite::new("summary_test", config);
1508        suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1509
1510        for _ in 0..5 {
1511            suite.record_execution("q1", ExecutionResult::success(15.0));
1512        }
1513
1514        let report = suite.analyze_regressions();
1515        let summary = report.summary_text();
1516
1517        assert!(summary.contains("summary_test"));
1518        assert!(summary.contains("Total:"));
1519    }
1520
1521    #[test]
1522    fn test_config_presets() {
1523        let strict = RegressionConfig::strict();
1524        assert_eq!(strict.regression_threshold, 1.1);
1525        assert_eq!(strict.confidence_level, 0.99);
1526
1527        let lenient = RegressionConfig::lenient();
1528        assert_eq!(lenient.regression_threshold, 1.5);
1529        assert_eq!(lenient.confidence_level, 0.90);
1530    }
1531}