Skip to main content

mockforge_reporting/
comparison.rs

1//! Comparison reports for orchestration executions
2
3use crate::pdf::ExecutionReport;
4use crate::{ReportingError, Result};
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8/// Comparison report
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct ComparisonReport {
11    pub baseline_run: ExecutionSummary,
12    pub comparison_runs: Vec<ExecutionSummary>,
13    pub metric_differences: Vec<MetricDifference>,
14    pub regressions: Vec<Regression>,
15    pub improvements: Vec<Improvement>,
16    pub overall_assessment: ComparisonAssessment,
17}
18
19/// Execution summary for comparison
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct ExecutionSummary {
22    pub orchestration_name: String,
23    pub run_id: String,
24    pub timestamp: chrono::DateTime<chrono::Utc>,
25    pub status: String,
26    pub duration_seconds: u64,
27    pub metrics_snapshot: HashMap<String, f64>,
28}
29
30/// Difference in a metric
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct MetricDifference {
33    pub metric_name: String,
34    pub baseline_value: f64,
35    pub comparison_value: f64,
36    pub absolute_difference: f64,
37    pub percentage_difference: f64,
38    pub direction: ChangeDirection,
39    pub significance: SignificanceLevel,
40}
41
42/// Direction of change
43#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
44#[serde(rename_all = "lowercase")]
45pub enum ChangeDirection {
46    Increase,
47    Decrease,
48    NoChange,
49}
50
51/// Statistical significance level
52#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
53#[serde(rename_all = "lowercase")]
54pub enum SignificanceLevel {
55    NotSignificant,
56    Low,
57    Medium,
58    High,
59}
60
61/// Performance regression
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct Regression {
64    pub metric_name: String,
65    pub baseline_value: f64,
66    pub regressed_value: f64,
67    pub impact_percentage: f64,
68    pub severity: String,
69    pub description: String,
70}
71
72/// Performance improvement
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct Improvement {
75    pub metric_name: String,
76    pub baseline_value: f64,
77    pub improved_value: f64,
78    pub improvement_percentage: f64,
79    pub description: String,
80}
81
82/// Overall comparison assessment
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct ComparisonAssessment {
85    pub verdict: ComparisonVerdict,
86    pub summary: String,
87    pub regressions_count: usize,
88    pub improvements_count: usize,
89    pub confidence: f64,
90}
91
92/// Comparison verdict
93#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
94#[serde(rename_all = "lowercase")]
95pub enum ComparisonVerdict {
96    Better,
97    Worse,
98    Similar,
99    Mixed,
100}
101
102/// Comparison report generator
103pub struct ComparisonReportGenerator {
104    baseline: Option<ExecutionReport>,
105}
106
107impl ComparisonReportGenerator {
108    /// Create a new comparison generator
109    pub fn new() -> Self {
110        Self { baseline: None }
111    }
112
113    /// Set baseline report
114    pub fn set_baseline(&mut self, report: ExecutionReport) {
115        self.baseline = Some(report);
116    }
117
118    /// Compare against baseline
119    pub fn compare(&self, comparison_reports: Vec<ExecutionReport>) -> Result<ComparisonReport> {
120        let baseline = self
121            .baseline
122            .as_ref()
123            .ok_or_else(|| ReportingError::Analysis("No baseline set".to_string()))?;
124
125        let baseline_summary = self.extract_summary(baseline);
126        let comparison_summaries: Vec<_> =
127            comparison_reports.iter().map(|r| self.extract_summary(r)).collect();
128
129        // Calculate metric differences for each comparison
130        let mut all_differences = Vec::new();
131        let mut all_regressions = Vec::new();
132        let mut all_improvements = Vec::new();
133
134        for comp_summary in &comparison_summaries {
135            let differences = self.calculate_differences(&baseline_summary, comp_summary);
136            let (regressions, improvements) =
137                self.identify_regressions_and_improvements(&differences);
138
139            all_differences.extend(differences);
140            all_regressions.extend(regressions);
141            all_improvements.extend(improvements);
142        }
143
144        // Overall assessment
145        let assessment = self.generate_assessment(&all_regressions, &all_improvements);
146
147        Ok(ComparisonReport {
148            baseline_run: baseline_summary,
149            comparison_runs: comparison_summaries,
150            metric_differences: all_differences,
151            regressions: all_regressions,
152            improvements: all_improvements,
153            overall_assessment: assessment,
154        })
155    }
156
157    /// Extract execution summary
158    fn extract_summary(&self, report: &ExecutionReport) -> ExecutionSummary {
159        let mut metrics_snapshot = HashMap::new();
160
161        metrics_snapshot.insert("error_rate".to_string(), report.metrics.error_rate);
162        metrics_snapshot.insert("avg_latency_ms".to_string(), report.metrics.avg_latency_ms);
163        metrics_snapshot.insert("p95_latency_ms".to_string(), report.metrics.p95_latency_ms);
164        metrics_snapshot.insert("p99_latency_ms".to_string(), report.metrics.p99_latency_ms);
165        metrics_snapshot.insert("total_requests".to_string(), report.metrics.total_requests as f64);
166        metrics_snapshot
167            .insert("failed_requests".to_string(), report.metrics.failed_requests as f64);
168        metrics_snapshot
169            .insert("successful_requests".to_string(), report.metrics.successful_requests as f64);
170        metrics_snapshot.insert("duration_seconds".to_string(), report.duration_seconds as f64);
171        metrics_snapshot.insert("failed_steps".to_string(), report.failed_steps as f64);
172
173        ExecutionSummary {
174            orchestration_name: report.orchestration_name.clone(),
175            run_id: format!("{}", report.start_time.timestamp()),
176            timestamp: report.start_time,
177            status: report.status.clone(),
178            duration_seconds: report.duration_seconds,
179            metrics_snapshot,
180        }
181    }
182
183    /// Calculate differences between baseline and comparison
184    fn calculate_differences(
185        &self,
186        baseline: &ExecutionSummary,
187        comparison: &ExecutionSummary,
188    ) -> Vec<MetricDifference> {
189        let mut differences = Vec::new();
190
191        for (metric_name, baseline_value) in &baseline.metrics_snapshot {
192            if let Some(&comparison_value) = comparison.metrics_snapshot.get(metric_name) {
193                let absolute_difference = comparison_value - baseline_value;
194                let percentage_difference = if *baseline_value != 0.0 {
195                    (absolute_difference / baseline_value) * 100.0
196                } else if comparison_value != 0.0 {
197                    100.0 // Changed from 0 to non-zero
198                } else {
199                    0.0
200                };
201
202                let direction = if absolute_difference > 0.0 {
203                    ChangeDirection::Increase
204                } else if absolute_difference < 0.0 {
205                    ChangeDirection::Decrease
206                } else {
207                    ChangeDirection::NoChange
208                };
209
210                let significance = self.determine_significance(percentage_difference);
211
212                differences.push(MetricDifference {
213                    metric_name: metric_name.clone(),
214                    baseline_value: *baseline_value,
215                    comparison_value,
216                    absolute_difference,
217                    percentage_difference,
218                    direction,
219                    significance,
220                });
221            }
222        }
223
224        differences
225    }
226
227    /// Determine statistical significance
228    fn determine_significance(&self, percentage_diff: f64) -> SignificanceLevel {
229        let abs_diff = percentage_diff.abs();
230
231        if abs_diff < 5.0 {
232            SignificanceLevel::NotSignificant
233        } else if abs_diff < 15.0 {
234            SignificanceLevel::Low
235        } else if abs_diff < 30.0 {
236            SignificanceLevel::Medium
237        } else {
238            SignificanceLevel::High
239        }
240    }
241
242    /// Identify regressions and improvements
243    fn identify_regressions_and_improvements(
244        &self,
245        differences: &[MetricDifference],
246    ) -> (Vec<Regression>, Vec<Improvement>) {
247        let mut regressions = Vec::new();
248        let mut improvements = Vec::new();
249
250        for diff in differences {
251            // Metrics where increase is bad
252            let increase_is_bad = matches!(
253                diff.metric_name.as_str(),
254                "error_rate"
255                    | "avg_latency_ms"
256                    | "p95_latency_ms"
257                    | "p99_latency_ms"
258                    | "failed_requests"
259                    | "duration_seconds"
260                    | "failed_steps"
261            );
262
263            let is_significant = diff.significance != SignificanceLevel::NotSignificant;
264
265            if !is_significant {
266                continue;
267            }
268
269            match diff.direction {
270                ChangeDirection::Increase if increase_is_bad => {
271                    let severity = match diff.significance {
272                        SignificanceLevel::High => "Critical",
273                        SignificanceLevel::Medium => "High",
274                        SignificanceLevel::Low => "Medium",
275                        _ => "Low",
276                    };
277
278                    regressions.push(Regression {
279                        metric_name: diff.metric_name.clone(),
280                        baseline_value: diff.baseline_value,
281                        regressed_value: diff.comparison_value,
282                        impact_percentage: diff.percentage_difference,
283                        severity: severity.to_string(),
284                        description: format!(
285                            "{} increased by {:.1}% (from {:.2} to {:.2})",
286                            diff.metric_name,
287                            diff.percentage_difference,
288                            diff.baseline_value,
289                            diff.comparison_value
290                        ),
291                    });
292                }
293                ChangeDirection::Decrease if !increase_is_bad => {
294                    improvements.push(Improvement {
295                        metric_name: diff.metric_name.clone(),
296                        baseline_value: diff.baseline_value,
297                        improved_value: diff.comparison_value,
298                        improvement_percentage: diff.percentage_difference.abs(),
299                        description: format!(
300                            "{} decreased by {:.1}% (from {:.2} to {:.2})",
301                            diff.metric_name,
302                            diff.percentage_difference.abs(),
303                            diff.baseline_value,
304                            diff.comparison_value
305                        ),
306                    });
307                }
308                ChangeDirection::Increase if !increase_is_bad => {
309                    improvements.push(Improvement {
310                        metric_name: diff.metric_name.clone(),
311                        baseline_value: diff.baseline_value,
312                        improved_value: diff.comparison_value,
313                        improvement_percentage: diff.percentage_difference,
314                        description: format!(
315                            "{} increased by {:.1}% (from {:.2} to {:.2})",
316                            diff.metric_name,
317                            diff.percentage_difference,
318                            diff.baseline_value,
319                            diff.comparison_value
320                        ),
321                    });
322                }
323                ChangeDirection::Decrease if increase_is_bad => {
324                    improvements.push(Improvement {
325                        metric_name: diff.metric_name.clone(),
326                        baseline_value: diff.baseline_value,
327                        improved_value: diff.comparison_value,
328                        improvement_percentage: diff.percentage_difference.abs(),
329                        description: format!(
330                            "{} decreased by {:.1}% (from {:.2} to {:.2})",
331                            diff.metric_name,
332                            diff.percentage_difference.abs(),
333                            diff.baseline_value,
334                            diff.comparison_value
335                        ),
336                    });
337                }
338                _ => {}
339            }
340        }
341
342        (regressions, improvements)
343    }
344
345    /// Generate overall assessment
346    fn generate_assessment(
347        &self,
348        regressions: &[Regression],
349        improvements: &[Improvement],
350    ) -> ComparisonAssessment {
351        let regressions_count = regressions.len();
352        let improvements_count = improvements.len();
353
354        let critical_regressions = regressions.iter().filter(|r| r.severity == "Critical").count();
355
356        let verdict = if critical_regressions > 0 || regressions_count > improvements_count {
357            ComparisonVerdict::Worse
358        } else if improvements_count > regressions_count {
359            ComparisonVerdict::Better
360        } else if regressions_count > 0 && improvements_count > 0 {
361            ComparisonVerdict::Mixed
362        } else {
363            ComparisonVerdict::Similar
364        };
365
366        let summary = match verdict {
367            ComparisonVerdict::Better => {
368                format!(
369                    "Performance has improved with {} improvements and {} regressions detected.",
370                    improvements_count, regressions_count
371                )
372            }
373            ComparisonVerdict::Worse => {
374                format!(
375                    "Performance has degraded with {} regressions ({} critical) and {} improvements.",
376                    regressions_count, critical_regressions, improvements_count
377                )
378            }
379            ComparisonVerdict::Mixed => {
380                format!(
381                    "Mixed results with {} improvements and {} regressions.",
382                    improvements_count, regressions_count
383                )
384            }
385            ComparisonVerdict::Similar => {
386                "Performance is similar to baseline with no significant changes.".to_string()
387            }
388        };
389
390        let confidence = if regressions_count + improvements_count > 5 {
391            0.9
392        } else if regressions_count + improvements_count > 2 {
393            0.7
394        } else {
395            0.5
396        };
397
398        ComparisonAssessment {
399            verdict,
400            summary,
401            regressions_count,
402            improvements_count,
403            confidence,
404        }
405    }
406}
407
408impl Default for ComparisonReportGenerator {
409    fn default() -> Self {
410        Self::new()
411    }
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417    use crate::pdf::ReportMetrics;
418    use chrono::Utc;
419
420    fn create_baseline_report() -> ExecutionReport {
421        ExecutionReport {
422            orchestration_name: "test".to_string(),
423            start_time: Utc::now(),
424            end_time: Utc::now(),
425            duration_seconds: 100,
426            status: "Completed".to_string(),
427            total_steps: 5,
428            completed_steps: 5,
429            failed_steps: 0,
430            metrics: ReportMetrics {
431                total_requests: 1000,
432                successful_requests: 980,
433                failed_requests: 20,
434                avg_latency_ms: 100.0,
435                p95_latency_ms: 200.0,
436                p99_latency_ms: 300.0,
437                error_rate: 0.02,
438            },
439            failures: vec![],
440            recommendations: vec![],
441        }
442    }
443
444    #[test]
445    fn test_comparison_report_generator() {
446        let mut generator = ComparisonReportGenerator::new();
447        let baseline = create_baseline_report();
448        generator.set_baseline(baseline.clone());
449
450        let comparison = ExecutionReport {
451            metrics: ReportMetrics {
452                total_requests: 1000,
453                successful_requests: 990,
454                failed_requests: 10,
455                avg_latency_ms: 90.0,
456                p95_latency_ms: 180.0,
457                p99_latency_ms: 280.0,
458                error_rate: 0.01,
459            },
460            ..baseline
461        };
462
463        let report = generator.compare(vec![comparison]).unwrap();
464
465        assert!(!report.metric_differences.is_empty());
466        assert_eq!(report.overall_assessment.verdict, ComparisonVerdict::Better);
467    }
468
469    #[test]
470    fn test_comparison_generator_new() {
471        let generator = ComparisonReportGenerator::new();
472        assert!(generator.baseline.is_none());
473    }
474
475    #[test]
476    fn test_comparison_generator_default() {
477        let generator = ComparisonReportGenerator::default();
478        assert!(generator.baseline.is_none());
479    }
480
481    #[test]
482    fn test_comparison_no_baseline_error() {
483        let generator = ComparisonReportGenerator::new();
484        let result = generator.compare(vec![]);
485        assert!(result.is_err());
486    }
487
488    #[test]
489    fn test_comparison_worse_verdict() {
490        let mut generator = ComparisonReportGenerator::new();
491        let baseline = create_baseline_report();
492        generator.set_baseline(baseline.clone());
493
494        // Create comparison with worse metrics
495        let comparison = ExecutionReport {
496            metrics: ReportMetrics {
497                total_requests: 1000,
498                successful_requests: 900,
499                failed_requests: 100,  // Much more failures
500                avg_latency_ms: 200.0, // Higher latency
501                p95_latency_ms: 400.0,
502                p99_latency_ms: 600.0,
503                error_rate: 0.10, // Higher error rate
504            },
505            ..baseline
506        };
507
508        let report = generator.compare(vec![comparison]).unwrap();
509        assert_eq!(report.overall_assessment.verdict, ComparisonVerdict::Worse);
510        assert!(report.regressions.len() > 0);
511    }
512
513    #[test]
514    fn test_comparison_similar_verdict() {
515        let mut generator = ComparisonReportGenerator::new();
516        let baseline = create_baseline_report();
517        generator.set_baseline(baseline.clone());
518
519        // Create comparison with nearly identical metrics
520        let comparison = ExecutionReport {
521            metrics: ReportMetrics {
522                total_requests: 1000,
523                successful_requests: 980,
524                failed_requests: 20,
525                avg_latency_ms: 101.0, // Almost the same
526                p95_latency_ms: 201.0,
527                p99_latency_ms: 301.0,
528                error_rate: 0.0201,
529            },
530            ..baseline
531        };
532
533        let report = generator.compare(vec![comparison]).unwrap();
534        assert_eq!(report.overall_assessment.verdict, ComparisonVerdict::Similar);
535    }
536
537    #[test]
538    fn test_change_direction_enum() {
539        // Test serialization
540        let increase = ChangeDirection::Increase;
541        let json = serde_json::to_string(&increase).unwrap();
542        assert_eq!(json, "\"increase\"");
543
544        let decrease = ChangeDirection::Decrease;
545        let json = serde_json::to_string(&decrease).unwrap();
546        assert_eq!(json, "\"decrease\"");
547
548        let no_change = ChangeDirection::NoChange;
549        let json = serde_json::to_string(&no_change).unwrap();
550        assert_eq!(json, "\"nochange\"");
551    }
552
553    #[test]
554    fn test_significance_level_enum() {
555        let not_sig = SignificanceLevel::NotSignificant;
556        let json = serde_json::to_string(&not_sig).unwrap();
557        assert_eq!(json, "\"notsignificant\"");
558
559        let high = SignificanceLevel::High;
560        let json = serde_json::to_string(&high).unwrap();
561        assert_eq!(json, "\"high\"");
562    }
563
564    #[test]
565    fn test_comparison_verdict_enum() {
566        let better = ComparisonVerdict::Better;
567        let json = serde_json::to_string(&better).unwrap();
568        assert_eq!(json, "\"better\"");
569
570        let worse = ComparisonVerdict::Worse;
571        let json = serde_json::to_string(&worse).unwrap();
572        assert_eq!(json, "\"worse\"");
573
574        let similar = ComparisonVerdict::Similar;
575        let json = serde_json::to_string(&similar).unwrap();
576        assert_eq!(json, "\"similar\"");
577
578        let mixed = ComparisonVerdict::Mixed;
579        let json = serde_json::to_string(&mixed).unwrap();
580        assert_eq!(json, "\"mixed\"");
581    }
582
583    #[test]
584    fn test_execution_summary_clone() {
585        let summary = ExecutionSummary {
586            orchestration_name: "test".to_string(),
587            run_id: "123".to_string(),
588            timestamp: Utc::now(),
589            status: "Completed".to_string(),
590            duration_seconds: 100,
591            metrics_snapshot: HashMap::new(),
592        };
593
594        let cloned = summary.clone();
595        assert_eq!(summary.orchestration_name, cloned.orchestration_name);
596        assert_eq!(summary.run_id, cloned.run_id);
597    }
598
599    #[test]
600    fn test_metric_difference_clone() {
601        let diff = MetricDifference {
602            metric_name: "error_rate".to_string(),
603            baseline_value: 0.02,
604            comparison_value: 0.01,
605            absolute_difference: -0.01,
606            percentage_difference: -50.0,
607            direction: ChangeDirection::Decrease,
608            significance: SignificanceLevel::High,
609        };
610
611        let cloned = diff.clone();
612        assert_eq!(diff.metric_name, cloned.metric_name);
613        assert_eq!(diff.baseline_value, cloned.baseline_value);
614    }
615
616    #[test]
617    fn test_regression_clone() {
618        let regression = Regression {
619            metric_name: "latency".to_string(),
620            baseline_value: 100.0,
621            regressed_value: 200.0,
622            impact_percentage: 100.0,
623            severity: "High".to_string(),
624            description: "Latency doubled".to_string(),
625        };
626
627        let cloned = regression.clone();
628        assert_eq!(regression.metric_name, cloned.metric_name);
629        assert_eq!(regression.severity, cloned.severity);
630    }
631
632    #[test]
633    fn test_improvement_clone() {
634        let improvement = Improvement {
635            metric_name: "error_rate".to_string(),
636            baseline_value: 0.10,
637            improved_value: 0.02,
638            improvement_percentage: 80.0,
639            description: "Error rate improved".to_string(),
640        };
641
642        let cloned = improvement.clone();
643        assert_eq!(improvement.metric_name, cloned.metric_name);
644        assert_eq!(improvement.improvement_percentage, cloned.improvement_percentage);
645    }
646
647    #[test]
648    fn test_comparison_assessment_clone() {
649        let assessment = ComparisonAssessment {
650            verdict: ComparisonVerdict::Better,
651            summary: "Performance improved".to_string(),
652            regressions_count: 0,
653            improvements_count: 5,
654            confidence: 0.9,
655        };
656
657        let cloned = assessment.clone();
658        assert_eq!(assessment.verdict, cloned.verdict);
659        assert_eq!(assessment.confidence, cloned.confidence);
660    }
661
662    #[test]
663    fn test_comparison_report_serialize() {
664        let mut generator = ComparisonReportGenerator::new();
665        let baseline = create_baseline_report();
666        generator.set_baseline(baseline.clone());
667
668        let report = generator.compare(vec![baseline.clone()]).unwrap();
669        let json = serde_json::to_string(&report).unwrap();
670        assert!(json.contains("baseline_run"));
671        assert!(json.contains("comparison_runs"));
672    }
673
674    #[test]
675    fn test_multiple_comparisons() {
676        let mut generator = ComparisonReportGenerator::new();
677        let baseline = create_baseline_report();
678        generator.set_baseline(baseline.clone());
679
680        let comparison1 = ExecutionReport {
681            metrics: ReportMetrics {
682                avg_latency_ms: 90.0,
683                ..baseline.metrics.clone()
684            },
685            ..baseline.clone()
686        };
687
688        let comparison2 = ExecutionReport {
689            metrics: ReportMetrics {
690                avg_latency_ms: 110.0,
691                ..baseline.metrics.clone()
692            },
693            ..baseline.clone()
694        };
695
696        let report = generator.compare(vec![comparison1, comparison2]).unwrap();
697        assert_eq!(report.comparison_runs.len(), 2);
698    }
699}