1use std::collections::{HashMap, VecDeque};
38use std::fmt;
39use std::time::SystemTime;
40
41#[derive(Debug, Clone)]
43pub struct RegressionConfig {
44 pub regression_threshold: f64,
46 pub improvement_threshold: f64,
48 pub min_samples: usize,
50 pub rolling_window_size: usize,
52 pub confidence_level: f64,
54 pub max_history_entries: usize,
56 pub verbose: bool,
58 pub outlier_std_devs: f64,
60 pub min_execution_time_ms: f64,
62}
63
64impl Default for RegressionConfig {
65 fn default() -> Self {
66 Self {
67 regression_threshold: 1.2,
68 improvement_threshold: 0.8,
69 min_samples: 5,
70 rolling_window_size: 20,
71 confidence_level: 0.95,
72 max_history_entries: 1000,
73 verbose: false,
74 outlier_std_devs: 3.0,
75 min_execution_time_ms: 0.1,
76 }
77 }
78}
79
80impl RegressionConfig {
81 pub fn strict() -> Self {
83 Self {
84 regression_threshold: 1.1, improvement_threshold: 0.9,
86 min_samples: 10,
87 rolling_window_size: 30,
88 confidence_level: 0.99,
89 max_history_entries: 2000,
90 verbose: true,
91 outlier_std_devs: 2.5,
92 min_execution_time_ms: 0.1,
93 }
94 }
95
96 pub fn lenient() -> Self {
98 Self {
99 regression_threshold: 1.5, improvement_threshold: 0.5,
101 min_samples: 3,
102 rolling_window_size: 10,
103 confidence_level: 0.90,
104 max_history_entries: 500,
105 verbose: false,
106 outlier_std_devs: 4.0,
107 min_execution_time_ms: 0.05,
108 }
109 }
110}
111
112#[derive(Debug, Clone)]
114pub struct GoldenQuery {
115 pub id: String,
117 pub query: String,
119 pub description: String,
121 pub baseline_ms: f64,
123 pub expected_result_count: Option<usize>,
125 pub tags: Vec<String>,
127 pub priority: u8,
129 pub active: bool,
131 pub created_at: SystemTime,
133 pub updated_at: SystemTime,
135}
136
137impl GoldenQuery {
138 pub fn new(id: impl Into<String>, query: impl Into<String>, baseline_ms: f64) -> Self {
140 let now = SystemTime::now();
141 Self {
142 id: id.into(),
143 query: query.into(),
144 description: String::new(),
145 baseline_ms,
146 expected_result_count: None,
147 tags: Vec::new(),
148 priority: 3,
149 active: true,
150 created_at: now,
151 updated_at: now,
152 }
153 }
154
155 pub fn with_description(mut self, desc: impl Into<String>) -> Self {
157 self.description = desc.into();
158 self
159 }
160
161 pub fn with_expected_count(mut self, count: usize) -> Self {
163 self.expected_result_count = Some(count);
164 self
165 }
166
167 pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
169 self.tags.push(tag.into());
170 self
171 }
172
173 pub fn with_priority(mut self, priority: u8) -> Self {
175 self.priority = priority.clamp(1, 5);
176 self
177 }
178
179 pub fn update_baseline(&mut self, new_baseline_ms: f64) {
181 self.baseline_ms = new_baseline_ms;
182 self.updated_at = SystemTime::now();
183 }
184}
185
186#[derive(Debug, Clone)]
188pub struct ExecutionResult {
189 pub execution_time_ms: f64,
191 pub success: bool,
193 pub result_count: Option<usize>,
195 pub memory_bytes: Option<usize>,
197 pub error: Option<String>,
199 pub timestamp: SystemTime,
201 pub metadata: HashMap<String, String>,
203}
204
205impl ExecutionResult {
206 pub fn success(execution_time_ms: f64) -> Self {
208 Self {
209 execution_time_ms,
210 success: true,
211 result_count: None,
212 memory_bytes: None,
213 error: None,
214 timestamp: SystemTime::now(),
215 metadata: HashMap::new(),
216 }
217 }
218
219 pub fn failure(error: impl Into<String>) -> Self {
221 Self {
222 execution_time_ms: 0.0,
223 success: false,
224 result_count: None,
225 memory_bytes: None,
226 error: Some(error.into()),
227 timestamp: SystemTime::now(),
228 metadata: HashMap::new(),
229 }
230 }
231
232 pub fn with_result_count(mut self, count: usize) -> Self {
234 self.result_count = Some(count);
235 self
236 }
237
238 pub fn with_memory(mut self, bytes: usize) -> Self {
240 self.memory_bytes = Some(bytes);
241 self
242 }
243
244 pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
246 self.metadata.insert(key.into(), value.into());
247 self
248 }
249}
250
251#[derive(Debug, Clone, Copy, PartialEq, Eq)]
253pub enum RegressionStatus {
254 Stable,
256 Improved,
258 Regressed,
260 InsufficientData,
262 Failing,
264}
265
266impl fmt::Display for RegressionStatus {
267 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
268 match self {
269 Self::Stable => write!(f, "STABLE"),
270 Self::Improved => write!(f, "IMPROVED"),
271 Self::Regressed => write!(f, "REGRESSED"),
272 Self::InsufficientData => write!(f, "INSUFFICIENT_DATA"),
273 Self::Failing => write!(f, "FAILING"),
274 }
275 }
276}
277
278#[derive(Debug, Clone, Default)]
280pub struct ExecutionStatistics {
281 pub count: usize,
283 pub success_count: usize,
285 pub failure_count: usize,
287 pub min_ms: f64,
289 pub max_ms: f64,
291 pub mean_ms: f64,
293 pub median_ms: f64,
295 pub std_dev_ms: f64,
297 pub p95_ms: f64,
299 pub p99_ms: f64,
301 pub cv: f64,
303}
304
305impl ExecutionStatistics {
306 pub fn from_results(results: &[ExecutionResult]) -> Self {
308 if results.is_empty() {
309 return Self::default();
310 }
311
312 let successes: Vec<f64> = results
313 .iter()
314 .filter(|r| r.success)
315 .map(|r| r.execution_time_ms)
316 .collect();
317
318 let success_count = successes.len();
319 let failure_count = results.len() - success_count;
320
321 if successes.is_empty() {
322 return Self {
323 count: results.len(),
324 success_count: 0,
325 failure_count,
326 ..Default::default()
327 };
328 }
329
330 let min_ms = successes.iter().cloned().fold(f64::INFINITY, f64::min);
331 let max_ms = successes.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
332 let mean_ms = successes.iter().sum::<f64>() / successes.len() as f64;
333
334 let variance = if successes.len() > 1 {
335 successes.iter().map(|x| (x - mean_ms).powi(2)).sum::<f64>()
336 / (successes.len() - 1) as f64
337 } else {
338 0.0
339 };
340 let std_dev_ms = variance.sqrt();
341
342 let mut sorted = successes.clone();
343 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
344
345 let median_ms = if sorted.len() % 2 == 0 {
346 (sorted[sorted.len() / 2 - 1] + sorted[sorted.len() / 2]) / 2.0
347 } else {
348 sorted[sorted.len() / 2]
349 };
350
351 let p95_idx = ((sorted.len() as f64 * 0.95) as usize).min(sorted.len() - 1);
352 let p99_idx = ((sorted.len() as f64 * 0.99) as usize).min(sorted.len() - 1);
353
354 let cv = if mean_ms > 0.0 {
355 std_dev_ms / mean_ms
356 } else {
357 0.0
358 };
359
360 Self {
361 count: results.len(),
362 success_count,
363 failure_count,
364 min_ms,
365 max_ms,
366 mean_ms,
367 median_ms,
368 std_dev_ms,
369 p95_ms: sorted[p95_idx],
370 p99_ms: sorted[p99_idx],
371 cv,
372 }
373 }
374}
375
376#[derive(Debug, Clone)]
378pub struct QueryRegressionAnalysis {
379 pub query_id: String,
381 pub status: RegressionStatus,
383 pub baseline_ms: f64,
385 pub current_mean_ms: f64,
387 pub ratio: f64,
389 pub change_percent: f64,
391 pub p_value: f64,
393 pub is_significant: bool,
395 pub ci_lower: f64,
397 pub ci_upper: f64,
399 pub recent_stats: ExecutionStatistics,
401 pub historical_stats: ExecutionStatistics,
403 pub trend: i8,
405 pub message: String,
407}
408
409impl QueryRegressionAnalysis {
410 pub fn needs_attention(&self) -> bool {
412 matches!(
413 self.status,
414 RegressionStatus::Regressed | RegressionStatus::Failing
415 )
416 }
417}
418
419#[derive(Debug, Clone)]
421pub struct RegressionReport {
422 pub suite_name: String,
424 pub generated_at: SystemTime,
426 pub overall_status: RegressionStatus,
428 pub analyses: Vec<QueryRegressionAnalysis>,
430 pub summary: ReportSummary,
432 pub config: RegressionConfig,
434}
435
436impl RegressionReport {
437 pub fn regressed_queries(&self) -> Vec<&QueryRegressionAnalysis> {
439 self.analyses
440 .iter()
441 .filter(|a| a.status == RegressionStatus::Regressed)
442 .collect()
443 }
444
445 pub fn improved_queries(&self) -> Vec<&QueryRegressionAnalysis> {
447 self.analyses
448 .iter()
449 .filter(|a| a.status == RegressionStatus::Improved)
450 .collect()
451 }
452
453 pub fn failing_queries(&self) -> Vec<&QueryRegressionAnalysis> {
455 self.analyses
456 .iter()
457 .filter(|a| a.status == RegressionStatus::Failing)
458 .collect()
459 }
460
461 pub fn has_issues(&self) -> bool {
463 self.summary.regressed_count > 0 || self.summary.failing_count > 0
464 }
465
466 pub fn summary_text(&self) -> String {
468 let mut text = format!("Regression Report: {}\n", self.suite_name);
469 text.push_str(&format!("Generated: {:?}\n\n", self.generated_at));
470 text.push_str(&format!("Overall Status: {}\n\n", self.overall_status));
471 text.push_str(&format!(
472 "Summary:\n Total: {}\n Stable: {}\n Improved: {}\n Regressed: {}\n Failing: {}\n Insufficient Data: {}\n",
473 self.summary.total_count,
474 self.summary.stable_count,
475 self.summary.improved_count,
476 self.summary.regressed_count,
477 self.summary.failing_count,
478 self.summary.insufficient_data_count
479 ));
480
481 if !self.regressed_queries().is_empty() {
482 text.push_str("\nRegressed Queries:\n");
483 for analysis in self.regressed_queries() {
484 text.push_str(&format!(
485 " - {}: {:.1}% slower ({:.2}ms -> {:.2}ms)\n",
486 analysis.query_id,
487 analysis.change_percent,
488 analysis.baseline_ms,
489 analysis.current_mean_ms
490 ));
491 }
492 }
493
494 if !self.improved_queries().is_empty() {
495 text.push_str("\nImproved Queries:\n");
496 for analysis in self.improved_queries() {
497 text.push_str(&format!(
498 " - {}: {:.1}% faster ({:.2}ms -> {:.2}ms)\n",
499 analysis.query_id,
500 -analysis.change_percent,
501 analysis.baseline_ms,
502 analysis.current_mean_ms
503 ));
504 }
505 }
506
507 text
508 }
509}
510
511#[derive(Debug, Clone, Default)]
513pub struct ReportSummary {
514 pub total_count: usize,
516 pub stable_count: usize,
518 pub improved_count: usize,
520 pub regressed_count: usize,
522 pub failing_count: usize,
524 pub insufficient_data_count: usize,
526 pub avg_regression_percent: f64,
528 pub avg_improvement_percent: f64,
530 pub worst_regression_percent: f64,
532 pub best_improvement_percent: f64,
534}
535
536#[derive(Debug, Clone)]
538struct QueryHistory {
539 results: VecDeque<ExecutionResult>,
541 max_entries: usize,
543}
544
545impl QueryHistory {
546 fn new(max_entries: usize) -> Self {
547 Self {
548 results: VecDeque::new(),
549 max_entries,
550 }
551 }
552
553 fn add(&mut self, result: ExecutionResult) {
554 self.results.push_back(result);
555 while self.results.len() > self.max_entries {
556 self.results.pop_front();
557 }
558 }
559
560 fn recent(&self, count: usize) -> Vec<&ExecutionResult> {
561 self.results.iter().rev().take(count).collect()
562 }
563
564 fn all(&self) -> Vec<&ExecutionResult> {
565 self.results.iter().collect()
566 }
567}
568
569#[derive(Debug)]
571pub struct RegressionTestSuite {
572 name: String,
574 config: RegressionConfig,
576 golden_queries: HashMap<String, GoldenQuery>,
578 history: HashMap<String, QueryHistory>,
580 stats: SuiteStatistics,
582}
583
584#[derive(Debug, Clone, Default)]
586pub struct SuiteStatistics {
587 pub total_executions: usize,
589 pub total_analyses: usize,
591 pub regressions_detected: usize,
593 pub improvements_detected: usize,
595 pub last_analysis: Option<SystemTime>,
597}
598
599impl RegressionTestSuite {
600 pub fn new(name: impl Into<String>, config: RegressionConfig) -> Self {
602 Self {
603 name: name.into(),
604 config,
605 golden_queries: HashMap::new(),
606 history: HashMap::new(),
607 stats: SuiteStatistics::default(),
608 }
609 }
610
611 pub fn with_defaults(name: impl Into<String>) -> Self {
613 Self::new(name, RegressionConfig::default())
614 }
615
616 pub fn add_golden_query(&mut self, query: GoldenQuery) {
618 let id = query.id.clone();
619 self.golden_queries.insert(id.clone(), query);
620 self.history
621 .entry(id)
622 .or_insert_with(|| QueryHistory::new(self.config.max_history_entries));
623 }
624
625 pub fn remove_golden_query(&mut self, id: &str) -> Option<GoldenQuery> {
627 self.history.remove(id);
628 self.golden_queries.remove(id)
629 }
630
631 pub fn get_golden_query(&self, id: &str) -> Option<&GoldenQuery> {
633 self.golden_queries.get(id)
634 }
635
636 pub fn golden_queries(&self) -> impl Iterator<Item = &GoldenQuery> {
638 self.golden_queries.values()
639 }
640
641 pub fn record_execution(&mut self, query_id: &str, result: ExecutionResult) -> bool {
643 if let Some(history) = self.history.get_mut(query_id) {
644 history.add(result);
645 self.stats.total_executions += 1;
646 true
647 } else if self.golden_queries.contains_key(query_id) {
648 let mut history = QueryHistory::new(self.config.max_history_entries);
649 history.add(result);
650 self.history.insert(query_id.to_string(), history);
651 self.stats.total_executions += 1;
652 true
653 } else {
654 false
655 }
656 }
657
658 pub fn record_executions(&mut self, query_id: &str, results: Vec<ExecutionResult>) -> usize {
660 let mut recorded = 0;
661 for result in results {
662 if self.record_execution(query_id, result) {
663 recorded += 1;
664 }
665 }
666 recorded
667 }
668
669 pub fn analyze_query(&self, query_id: &str) -> Option<QueryRegressionAnalysis> {
671 let query = self.golden_queries.get(query_id)?;
672 let history = self.history.get(query_id)?;
673
674 let all_results: Vec<ExecutionResult> = history.all().into_iter().cloned().collect();
675 let recent_results: Vec<ExecutionResult> = history
676 .recent(self.config.rolling_window_size)
677 .into_iter()
678 .cloned()
679 .collect();
680
681 if recent_results.len() < self.config.min_samples {
683 return Some(QueryRegressionAnalysis {
684 query_id: query_id.to_string(),
685 status: RegressionStatus::InsufficientData,
686 baseline_ms: query.baseline_ms,
687 current_mean_ms: 0.0,
688 ratio: 1.0,
689 change_percent: 0.0,
690 p_value: 1.0,
691 is_significant: false,
692 ci_lower: 0.0,
693 ci_upper: 0.0,
694 recent_stats: ExecutionStatistics::default(),
695 historical_stats: ExecutionStatistics::default(),
696 trend: 0,
697 message: format!(
698 "Insufficient data: {} samples (need {})",
699 recent_results.len(),
700 self.config.min_samples
701 ),
702 });
703 }
704
705 let recent_failures = recent_results.iter().filter(|r| !r.success).count();
707 let failure_rate = recent_failures as f64 / recent_results.len() as f64;
708 if failure_rate > 0.5 {
709 return Some(QueryRegressionAnalysis {
710 query_id: query_id.to_string(),
711 status: RegressionStatus::Failing,
712 baseline_ms: query.baseline_ms,
713 current_mean_ms: 0.0,
714 ratio: f64::INFINITY,
715 change_percent: f64::INFINITY,
716 p_value: 0.0,
717 is_significant: true,
718 ci_lower: 0.0,
719 ci_upper: 0.0,
720 recent_stats: ExecutionStatistics::from_results(&recent_results),
721 historical_stats: ExecutionStatistics::from_results(&all_results),
722 trend: 1,
723 message: format!("Query failing: {:.1}% failure rate", failure_rate * 100.0),
724 });
725 }
726
727 let recent_stats = ExecutionStatistics::from_results(&recent_results);
729 let historical_stats = ExecutionStatistics::from_results(&all_results);
730
731 let filtered_times: Vec<f64> = recent_results
733 .iter()
734 .filter(|r| r.success)
735 .map(|r| r.execution_time_ms)
736 .filter(|&t| {
737 if recent_stats.std_dev_ms > 0.0 {
738 (t - recent_stats.mean_ms).abs()
739 <= self.config.outlier_std_devs * recent_stats.std_dev_ms
740 } else {
741 true
742 }
743 })
744 .collect();
745
746 if filtered_times.is_empty() {
747 return Some(QueryRegressionAnalysis {
748 query_id: query_id.to_string(),
749 status: RegressionStatus::InsufficientData,
750 baseline_ms: query.baseline_ms,
751 current_mean_ms: 0.0,
752 ratio: 1.0,
753 change_percent: 0.0,
754 p_value: 1.0,
755 is_significant: false,
756 ci_lower: 0.0,
757 ci_upper: 0.0,
758 recent_stats,
759 historical_stats,
760 trend: 0,
761 message: "All samples filtered as outliers".to_string(),
762 });
763 }
764
765 let current_mean: f64 = filtered_times.iter().sum::<f64>() / filtered_times.len() as f64;
766 let ratio = current_mean / query.baseline_ms;
767 let change_percent = (ratio - 1.0) * 100.0;
768
769 let (ci_lower, ci_upper, p_value) =
771 self.calculate_statistics(&filtered_times, query.baseline_ms);
772 let is_significant = p_value < (1.0 - self.config.confidence_level);
773
774 let trend = self.calculate_trend(&all_results);
776
777 let status = if ratio > self.config.regression_threshold && is_significant {
779 RegressionStatus::Regressed
780 } else if ratio < self.config.improvement_threshold && is_significant {
781 RegressionStatus::Improved
782 } else {
783 RegressionStatus::Stable
784 };
785
786 let message = match status {
787 RegressionStatus::Regressed => format!(
788 "Performance regressed by {:.1}% (baseline: {:.2}ms, current: {:.2}ms)",
789 change_percent, query.baseline_ms, current_mean
790 ),
791 RegressionStatus::Improved => format!(
792 "Performance improved by {:.1}% (baseline: {:.2}ms, current: {:.2}ms)",
793 -change_percent, query.baseline_ms, current_mean
794 ),
795 RegressionStatus::Stable => format!(
796 "Performance stable ({:.1}% change, baseline: {:.2}ms, current: {:.2}ms)",
797 change_percent, query.baseline_ms, current_mean
798 ),
799 _ => String::new(),
800 };
801
802 Some(QueryRegressionAnalysis {
803 query_id: query_id.to_string(),
804 status,
805 baseline_ms: query.baseline_ms,
806 current_mean_ms: current_mean,
807 ratio,
808 change_percent,
809 p_value,
810 is_significant,
811 ci_lower,
812 ci_upper,
813 recent_stats,
814 historical_stats,
815 trend,
816 message,
817 })
818 }
819
820 pub fn analyze_regressions(&mut self) -> RegressionReport {
822 self.stats.total_analyses += 1;
823 self.stats.last_analysis = Some(SystemTime::now());
824
825 let mut analyses = Vec::new();
826 for query_id in self.golden_queries.keys() {
827 if let Some(analysis) = self.analyze_query(query_id) {
828 if analysis.status == RegressionStatus::Regressed {
829 self.stats.regressions_detected += 1;
830 } else if analysis.status == RegressionStatus::Improved {
831 self.stats.improvements_detected += 1;
832 }
833 analyses.push(analysis);
834 }
835 }
836
837 analyses.sort_by(|a, b| {
839 let status_order = |s: &RegressionStatus| match s {
840 RegressionStatus::Failing => 0,
841 RegressionStatus::Regressed => 1,
842 RegressionStatus::Improved => 2,
843 RegressionStatus::Stable => 3,
844 RegressionStatus::InsufficientData => 4,
845 };
846 let a_order = status_order(&a.status);
847 let b_order = status_order(&b.status);
848 if a_order != b_order {
849 a_order.cmp(&b_order)
850 } else {
851 b.change_percent
852 .abs()
853 .partial_cmp(&a.change_percent.abs())
854 .unwrap_or(std::cmp::Ordering::Equal)
855 }
856 });
857
858 let summary = self.calculate_summary(&analyses);
859 let overall_status = if summary.regressed_count > 0 || summary.failing_count > 0 {
860 RegressionStatus::Regressed
861 } else if summary.improved_count > 0 {
862 RegressionStatus::Improved
863 } else if summary.insufficient_data_count == summary.total_count {
864 RegressionStatus::InsufficientData
865 } else {
866 RegressionStatus::Stable
867 };
868
869 RegressionReport {
870 suite_name: self.name.clone(),
871 generated_at: SystemTime::now(),
872 overall_status,
873 analyses,
874 summary,
875 config: self.config.clone(),
876 }
877 }
878
879 pub fn update_baseline(&mut self, query_id: &str) -> Option<f64> {
881 let history = self.history.get(query_id)?;
882 let recent: Vec<f64> = history
883 .recent(self.config.rolling_window_size)
884 .into_iter()
885 .filter(|r| r.success)
886 .map(|r| r.execution_time_ms)
887 .collect();
888
889 if recent.len() >= self.config.min_samples {
890 let new_baseline = recent.iter().sum::<f64>() / recent.len() as f64;
891 if let Some(query) = self.golden_queries.get_mut(query_id) {
892 query.update_baseline(new_baseline);
893 return Some(new_baseline);
894 }
895 }
896 None
897 }
898
899 pub fn statistics(&self) -> &SuiteStatistics {
901 &self.stats
902 }
903
904 pub fn config(&self) -> &RegressionConfig {
906 &self.config
907 }
908
909 pub fn set_config(&mut self, config: RegressionConfig) {
911 self.config = config;
912 }
913
914 pub fn clear_history(&mut self) {
916 for history in self.history.values_mut() {
917 history.results.clear();
918 }
919 }
920
921 pub fn export(&self) -> SuiteExport {
923 SuiteExport {
924 name: self.name.clone(),
925 config: self.config.clone(),
926 golden_queries: self.golden_queries.values().cloned().collect(),
927 stats: self.stats.clone(),
928 }
929 }
930
931 pub fn import(data: SuiteExport) -> Self {
933 let mut suite = Self::new(data.name, data.config);
934 for query in data.golden_queries {
935 suite.add_golden_query(query);
936 }
937 suite.stats = data.stats;
938 suite
939 }
940
941 fn calculate_statistics(&self, samples: &[f64], baseline: f64) -> (f64, f64, f64) {
944 if samples.is_empty() {
945 return (0.0, 0.0, 1.0);
946 }
947
948 let n = samples.len() as f64;
949 let mean = samples.iter().sum::<f64>() / n;
950
951 if samples.len() < 2 {
952 return (mean, mean, 0.5);
953 }
954
955 let variance = samples.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0);
956 let std_error = (variance / n).sqrt();
957
958 let t_critical = if n > 30.0 { 1.96 } else { 2.0 + 1.0 / n.sqrt() };
960
961 let ci_lower = mean - t_critical * std_error;
962 let ci_upper = mean + t_critical * std_error;
963
964 let t_stat = (mean - baseline) / std_error;
966 let p_value = self.approximate_p_value(t_stat.abs(), (n - 1.0) as usize);
967
968 (ci_lower, ci_upper, p_value)
969 }
970
971 fn approximate_p_value(&self, t_stat: f64, df: usize) -> f64 {
972 let df = df as f64;
975 let _x = df / (df + t_stat * t_stat);
977
978 if t_stat < 0.5 {
981 1.0
982 } else if t_stat > 5.0 {
983 0.0001
984 } else {
985 let p = 2.0 * (1.0 - 0.5 * (1.0 + (t_stat / (1.0 + t_stat / df.sqrt())).tanh()));
987 p.clamp(0.0001, 1.0)
988 }
989 }
990
991 fn calculate_trend(&self, results: &[ExecutionResult]) -> i8 {
992 let times: Vec<f64> = results
993 .iter()
994 .filter(|r| r.success)
995 .map(|r| r.execution_time_ms)
996 .collect();
997
998 if times.len() < 3 {
999 return 0;
1000 }
1001
1002 let third = times.len() / 3;
1004 let first_third_avg: f64 = times[..third].iter().sum::<f64>() / third as f64;
1005 let last_third_avg: f64 = times[times.len() - third..].iter().sum::<f64>() / third as f64;
1006
1007 let ratio = last_third_avg / first_third_avg;
1008 if ratio > 1.1 {
1009 1 } else if ratio < 0.9 {
1011 -1 } else {
1013 0 }
1015 }
1016
1017 fn calculate_summary(&self, analyses: &[QueryRegressionAnalysis]) -> ReportSummary {
1018 let mut summary = ReportSummary {
1019 total_count: analyses.len(),
1020 ..Default::default()
1021 };
1022
1023 let mut regression_percents = Vec::new();
1024 let mut improvement_percents = Vec::new();
1025
1026 for analysis in analyses {
1027 match analysis.status {
1028 RegressionStatus::Stable => summary.stable_count += 1,
1029 RegressionStatus::Improved => {
1030 summary.improved_count += 1;
1031 improvement_percents.push(-analysis.change_percent);
1032 }
1033 RegressionStatus::Regressed => {
1034 summary.regressed_count += 1;
1035 regression_percents.push(analysis.change_percent);
1036 }
1037 RegressionStatus::InsufficientData => summary.insufficient_data_count += 1,
1038 RegressionStatus::Failing => summary.failing_count += 1,
1039 }
1040 }
1041
1042 if !regression_percents.is_empty() {
1043 summary.avg_regression_percent =
1044 regression_percents.iter().sum::<f64>() / regression_percents.len() as f64;
1045 summary.worst_regression_percent = regression_percents
1046 .iter()
1047 .cloned()
1048 .fold(f64::NEG_INFINITY, f64::max);
1049 }
1050
1051 if !improvement_percents.is_empty() {
1052 summary.avg_improvement_percent =
1053 improvement_percents.iter().sum::<f64>() / improvement_percents.len() as f64;
1054 summary.best_improvement_percent = improvement_percents
1055 .iter()
1056 .cloned()
1057 .fold(f64::NEG_INFINITY, f64::max);
1058 }
1059
1060 summary
1061 }
1062}
1063
1064#[derive(Debug, Clone)]
1066pub struct SuiteExport {
1067 pub name: String,
1069 pub config: RegressionConfig,
1071 pub golden_queries: Vec<GoldenQuery>,
1073 pub stats: SuiteStatistics,
1075}
1076
1077#[derive(Debug, Default)]
1079pub struct RegressionTestSuiteBuilder {
1080 name: String,
1081 config: Option<RegressionConfig>,
1082 golden_queries: Vec<GoldenQuery>,
1083}
1084
1085impl RegressionTestSuiteBuilder {
1086 pub fn new(name: impl Into<String>) -> Self {
1088 Self {
1089 name: name.into(),
1090 config: None,
1091 golden_queries: Vec::new(),
1092 }
1093 }
1094
1095 pub fn with_config(mut self, config: RegressionConfig) -> Self {
1097 self.config = Some(config);
1098 self
1099 }
1100
1101 pub fn add_query(mut self, query: GoldenQuery) -> Self {
1103 self.golden_queries.push(query);
1104 self
1105 }
1106
1107 pub fn add_queries(mut self, queries: Vec<GoldenQuery>) -> Self {
1109 self.golden_queries.extend(queries);
1110 self
1111 }
1112
1113 pub fn build(self) -> RegressionTestSuite {
1115 let mut suite = RegressionTestSuite::new(self.name, self.config.unwrap_or_default());
1116 for query in self.golden_queries {
1117 suite.add_golden_query(query);
1118 }
1119 suite
1120 }
1121}
1122
1123#[derive(Debug, Clone)]
1125pub struct ReportComparison {
1126 pub new_regressions: Vec<String>,
1128 pub fixed_regressions: Vec<String>,
1130 pub new_improvements: Vec<String>,
1132 pub lost_improvements: Vec<String>,
1134 pub status_change: Option<(RegressionStatus, RegressionStatus)>,
1136}
1137
1138impl ReportComparison {
1139 pub fn compare(old: &RegressionReport, new: &RegressionReport) -> Self {
1141 let old_status: HashMap<&str, RegressionStatus> = old
1142 .analyses
1143 .iter()
1144 .map(|a| (a.query_id.as_str(), a.status))
1145 .collect();
1146
1147 let new_status: HashMap<&str, RegressionStatus> = new
1148 .analyses
1149 .iter()
1150 .map(|a| (a.query_id.as_str(), a.status))
1151 .collect();
1152
1153 let mut comparison = Self {
1154 new_regressions: Vec::new(),
1155 fixed_regressions: Vec::new(),
1156 new_improvements: Vec::new(),
1157 lost_improvements: Vec::new(),
1158 status_change: if old.overall_status != new.overall_status {
1159 Some((old.overall_status, new.overall_status))
1160 } else {
1161 None
1162 },
1163 };
1164
1165 for (query_id, &new_stat) in &new_status {
1166 let old_stat = old_status.get(query_id).copied();
1167 match (old_stat, new_stat) {
1168 (Some(RegressionStatus::Stable), RegressionStatus::Regressed)
1169 | (Some(RegressionStatus::Improved), RegressionStatus::Regressed)
1170 | (None, RegressionStatus::Regressed) => {
1171 comparison.new_regressions.push(query_id.to_string());
1172 }
1173 (Some(RegressionStatus::Regressed), RegressionStatus::Stable)
1174 | (Some(RegressionStatus::Regressed), RegressionStatus::Improved) => {
1175 comparison.fixed_regressions.push(query_id.to_string());
1176 }
1177 (Some(RegressionStatus::Stable), RegressionStatus::Improved)
1178 | (None, RegressionStatus::Improved) => {
1179 comparison.new_improvements.push(query_id.to_string());
1180 }
1181 (Some(RegressionStatus::Improved), RegressionStatus::Stable)
1182 | (Some(RegressionStatus::Improved), RegressionStatus::Failing) => {
1183 comparison.lost_improvements.push(query_id.to_string());
1184 }
1185 _ => {}
1186 }
1187 }
1188
1189 comparison
1190 }
1191
1192 pub fn has_changes(&self) -> bool {
1194 !self.new_regressions.is_empty()
1195 || !self.fixed_regressions.is_empty()
1196 || !self.new_improvements.is_empty()
1197 || !self.lost_improvements.is_empty()
1198 }
1199}
1200
1201#[cfg(test)]
1202mod tests {
1203 use super::*;
1204
1205 #[test]
1206 fn test_golden_query_creation() {
1207 let query = GoldenQuery::new("test_query", "SELECT * WHERE { ?s ?p ?o }", 10.0)
1208 .with_description("Test query")
1209 .with_tag("basic")
1210 .with_expected_count(100)
1211 .with_priority(1);
1212
1213 assert_eq!(query.id, "test_query");
1214 assert_eq!(query.baseline_ms, 10.0);
1215 assert_eq!(query.description, "Test query");
1216 assert_eq!(query.tags, vec!["basic"]);
1217 assert_eq!(query.expected_result_count, Some(100));
1218 assert_eq!(query.priority, 1);
1219 }
1220
1221 #[test]
1222 fn test_execution_result() {
1223 let success = ExecutionResult::success(5.5)
1224 .with_result_count(50)
1225 .with_memory(1024)
1226 .with_metadata("version", "1.0");
1227
1228 assert!(success.success);
1229 assert_eq!(success.execution_time_ms, 5.5);
1230 assert_eq!(success.result_count, Some(50));
1231 assert_eq!(success.memory_bytes, Some(1024));
1232 assert_eq!(success.metadata.get("version"), Some(&"1.0".to_string()));
1233
1234 let failure = ExecutionResult::failure("Query timeout");
1235 assert!(!failure.success);
1236 assert_eq!(failure.error, Some("Query timeout".to_string()));
1237 }
1238
1239 #[test]
1240 fn test_execution_statistics() {
1241 let results: Vec<ExecutionResult> = vec![
1242 ExecutionResult::success(10.0),
1243 ExecutionResult::success(12.0),
1244 ExecutionResult::success(11.0),
1245 ExecutionResult::success(9.0),
1246 ExecutionResult::success(13.0),
1247 ];
1248
1249 let stats = ExecutionStatistics::from_results(&results);
1250 assert_eq!(stats.count, 5);
1251 assert_eq!(stats.success_count, 5);
1252 assert_eq!(stats.failure_count, 0);
1253 assert_eq!(stats.min_ms, 9.0);
1254 assert_eq!(stats.max_ms, 13.0);
1255 assert_eq!(stats.mean_ms, 11.0);
1256 assert_eq!(stats.median_ms, 11.0);
1257 }
1258
1259 #[test]
1260 fn test_suite_creation() {
1261 let config = RegressionConfig::default();
1262 let mut suite = RegressionTestSuite::new("test_suite", config);
1263
1264 suite.add_golden_query(GoldenQuery::new("q1", "SELECT ?s WHERE { ?s ?p ?o }", 10.0));
1265 suite.add_golden_query(GoldenQuery::new("q2", "SELECT ?p WHERE { ?s ?p ?o }", 15.0));
1266
1267 assert_eq!(suite.golden_queries().count(), 2);
1268 assert!(suite.get_golden_query("q1").is_some());
1269 assert!(suite.get_golden_query("q3").is_none());
1270 }
1271
1272 #[test]
1273 fn test_record_execution() {
1274 let mut suite = RegressionTestSuite::with_defaults("test");
1275 suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1276
1277 assert!(suite.record_execution("q1", ExecutionResult::success(9.5)));
1278 assert!(suite.record_execution("q1", ExecutionResult::success(10.5)));
1279 assert!(!suite.record_execution("nonexistent", ExecutionResult::success(5.0)));
1280
1281 assert_eq!(suite.statistics().total_executions, 2);
1282 }
1283
1284 #[test]
1285 fn test_regression_detection() {
1286 let config = RegressionConfig {
1287 min_samples: 3,
1288 regression_threshold: 1.2,
1289 ..Default::default()
1290 };
1291 let mut suite = RegressionTestSuite::new("test", config);
1292 suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1293
1294 for _ in 0..5 {
1296 suite.record_execution("q1", ExecutionResult::success(15.0));
1297 }
1298
1299 let analysis = suite.analyze_query("q1").unwrap();
1300 assert_eq!(analysis.status, RegressionStatus::Regressed);
1301 assert!(analysis.ratio > 1.2);
1302 }
1303
1304 #[test]
1305 fn test_improvement_detection() {
1306 let config = RegressionConfig {
1307 min_samples: 3,
1308 improvement_threshold: 0.8,
1309 ..Default::default()
1310 };
1311 let mut suite = RegressionTestSuite::new("test", config);
1312 suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1313
1314 for _ in 0..5 {
1316 suite.record_execution("q1", ExecutionResult::success(5.0));
1317 }
1318
1319 let analysis = suite.analyze_query("q1").unwrap();
1320 assert_eq!(analysis.status, RegressionStatus::Improved);
1321 assert!(analysis.ratio < 0.8);
1322 }
1323
1324 #[test]
1325 fn test_insufficient_data() {
1326 let config = RegressionConfig {
1327 min_samples: 10,
1328 ..Default::default()
1329 };
1330 let mut suite = RegressionTestSuite::new("test", config);
1331 suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1332
1333 for _ in 0..3 {
1335 suite.record_execution("q1", ExecutionResult::success(10.0));
1336 }
1337
1338 let analysis = suite.analyze_query("q1").unwrap();
1339 assert_eq!(analysis.status, RegressionStatus::InsufficientData);
1340 }
1341
1342 #[test]
1343 fn test_failing_query_detection() {
1344 let config = RegressionConfig {
1345 min_samples: 3,
1346 ..Default::default()
1347 };
1348 let mut suite = RegressionTestSuite::new("test", config);
1349 suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1350
1351 for _ in 0..4 {
1353 suite.record_execution("q1", ExecutionResult::failure("Timeout"));
1354 }
1355 suite.record_execution("q1", ExecutionResult::success(10.0));
1356
1357 let analysis = suite.analyze_query("q1").unwrap();
1358 assert_eq!(analysis.status, RegressionStatus::Failing);
1359 }
1360
1361 #[test]
1362 fn test_regression_report() {
1363 let config = RegressionConfig {
1364 min_samples: 3,
1365 ..Default::default()
1366 };
1367 let mut suite = RegressionTestSuite::new("test", config);
1368
1369 suite.add_golden_query(GoldenQuery::new(
1370 "stable",
1371 "SELECT ?s WHERE { ?s ?p ?o }",
1372 10.0,
1373 ));
1374 suite.add_golden_query(GoldenQuery::new(
1375 "regressed",
1376 "SELECT ?p WHERE { ?s ?p ?o }",
1377 10.0,
1378 ));
1379 suite.add_golden_query(GoldenQuery::new(
1380 "improved",
1381 "SELECT ?o WHERE { ?s ?p ?o }",
1382 10.0,
1383 ));
1384
1385 for _ in 0..5 {
1387 suite.record_execution("stable", ExecutionResult::success(10.0));
1388 }
1389
1390 for _ in 0..5 {
1392 suite.record_execution("regressed", ExecutionResult::success(15.0));
1393 }
1394
1395 for _ in 0..5 {
1397 suite.record_execution("improved", ExecutionResult::success(5.0));
1398 }
1399
1400 let report = suite.analyze_regressions();
1401 assert_eq!(report.summary.total_count, 3);
1402 assert!(report.summary.regressed_count >= 1);
1403 assert!(report.summary.improved_count >= 1);
1404 }
1405
1406 #[test]
1407 fn test_update_baseline() {
1408 let config = RegressionConfig {
1409 min_samples: 3,
1410 rolling_window_size: 5,
1411 ..Default::default()
1412 };
1413 let mut suite = RegressionTestSuite::new("test", config);
1414 suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1415
1416 for _ in 0..5 {
1418 suite.record_execution("q1", ExecutionResult::success(8.0));
1419 }
1420
1421 let new_baseline = suite.update_baseline("q1").unwrap();
1422 assert!((new_baseline - 8.0).abs() < 0.1);
1423 assert_eq!(
1424 suite.get_golden_query("q1").unwrap().baseline_ms,
1425 new_baseline
1426 );
1427 }
1428
1429 #[test]
1430 fn test_report_comparison() {
1431 let config = RegressionConfig {
1432 min_samples: 3,
1433 ..Default::default()
1434 };
1435
1436 let mut suite1 = RegressionTestSuite::new("test", config.clone());
1438 suite1.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1439 suite1.add_golden_query(GoldenQuery::new("q2", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1440
1441 for _ in 0..5 {
1442 suite1.record_execution("q1", ExecutionResult::success(10.0)); suite1.record_execution("q2", ExecutionResult::success(10.0)); }
1445 let report1 = suite1.analyze_regressions();
1446
1447 let mut suite2 = RegressionTestSuite::new("test", config);
1449 suite2.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1450 suite2.add_golden_query(GoldenQuery::new("q2", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1451
1452 for _ in 0..5 {
1453 suite2.record_execution("q1", ExecutionResult::success(15.0)); suite2.record_execution("q2", ExecutionResult::success(5.0)); }
1456 let report2 = suite2.analyze_regressions();
1457
1458 let comparison = ReportComparison::compare(&report1, &report2);
1459 assert!(comparison.has_changes());
1460 assert!(comparison.new_regressions.contains(&"q1".to_string()));
1461 assert!(comparison.new_improvements.contains(&"q2".to_string()));
1462 }
1463
1464 #[test]
1465 fn test_suite_builder() {
1466 let suite = RegressionTestSuiteBuilder::new("builder_test")
1467 .with_config(RegressionConfig::strict())
1468 .add_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0))
1469 .add_query(GoldenQuery::new("q2", "SELECT * WHERE { ?s ?p ?o }", 20.0))
1470 .build();
1471
1472 assert_eq!(suite.golden_queries().count(), 2);
1473 assert_eq!(suite.config().regression_threshold, 1.1); }
1475
1476 #[test]
1477 fn test_suite_export_import() {
1478 let mut suite = RegressionTestSuite::with_defaults("export_test");
1479 suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1480 suite.record_execution("q1", ExecutionResult::success(9.0));
1481
1482 let export = suite.export();
1483 let imported = RegressionTestSuite::import(export);
1484
1485 assert_eq!(imported.golden_queries().count(), 1);
1486 assert!(imported.get_golden_query("q1").is_some());
1487 }
1488
1489 #[test]
1490 fn test_regression_status_display() {
1491 assert_eq!(format!("{}", RegressionStatus::Stable), "STABLE");
1492 assert_eq!(format!("{}", RegressionStatus::Regressed), "REGRESSED");
1493 assert_eq!(format!("{}", RegressionStatus::Improved), "IMPROVED");
1494 assert_eq!(format!("{}", RegressionStatus::Failing), "FAILING");
1495 assert_eq!(
1496 format!("{}", RegressionStatus::InsufficientData),
1497 "INSUFFICIENT_DATA"
1498 );
1499 }
1500
1501 #[test]
1502 fn test_report_summary_text() {
1503 let config = RegressionConfig {
1504 min_samples: 3,
1505 ..Default::default()
1506 };
1507 let mut suite = RegressionTestSuite::new("summary_test", config);
1508 suite.add_golden_query(GoldenQuery::new("q1", "SELECT * WHERE { ?s ?p ?o }", 10.0));
1509
1510 for _ in 0..5 {
1511 suite.record_execution("q1", ExecutionResult::success(15.0));
1512 }
1513
1514 let report = suite.analyze_regressions();
1515 let summary = report.summary_text();
1516
1517 assert!(summary.contains("summary_test"));
1518 assert!(summary.contains("Total:"));
1519 }
1520
1521 #[test]
1522 fn test_config_presets() {
1523 let strict = RegressionConfig::strict();
1524 assert_eq!(strict.regression_threshold, 1.1);
1525 assert_eq!(strict.confidence_level, 0.99);
1526
1527 let lenient = RegressionConfig::lenient();
1528 assert_eq!(lenient.regression_threshold, 1.5);
1529 assert_eq!(lenient.confidence_level, 0.90);
1530 }
1531}