Skip to main content

perfgate_domain/
lib.rs

1//! Domain logic for perfgate.
2//!
3//! Pure, I/O-free business logic: statistics computation, budget policy evaluation,
4//! host mismatch detection, and regression analysis. All data comes in via
5//! function arguments; no filesystem, network, or process access.
6//!
7//! Part of the [perfgate](https://github.com/EffortlessMetrics/perfgate) workspace.
8
9mod blame;
10mod paired;
11
12pub use blame::{
13    BinaryBlame, DependencyChange, DependencyChangeType, compare_lockfiles, parse_lockfile,
14};
15pub use paired::{PairedComparison, compare_paired_stats, compute_paired_cv, compute_paired_stats};
16
17pub use perfgate_host_detect::detect_host_mismatch;
18
19pub use perfgate_budget::{
20    BudgetError, BudgetResult, aggregate_verdict, calculate_regression, determine_status,
21    evaluate_budget, evaluate_budgets, reason_token,
22};
23
24pub use perfgate_significance::{compute_significance, mean_and_variance};
25pub use perfgate_stats::trend::{
26    DriftClass, TrendAnalysis, TrendConfig, analyze_trend, classify_drift, compute_headroom_pct,
27    linear_regression, predict_breach_run, spark_chart,
28};
29pub use perfgate_stats::{median_f64_sorted, median_u64_sorted, summarize_f64, summarize_u64};
30
31use perfgate_types::{
32    Budget, CHECK_ID_BUDGET, CompareReceipt, Delta, FINDING_CODE_METRIC_FAIL,
33    FINDING_CODE_METRIC_WARN, Metric, MetricStatistic, MetricStatus, RunReceipt, Stats, Verdict,
34    VerdictCounts, VerdictStatus,
35};
36use std::collections::BTreeMap;
37
38pub use perfgate_error::StatsError;
39
40#[derive(Debug, thiserror::Error)]
41pub enum DomainError {
42    #[error("no samples to summarize")]
43    NoSamples,
44
45    #[error(transparent)]
46    Stats(#[from] StatsError),
47
48    #[error("significance alpha must be between 0.0 and 1.0, got {0}")]
49    InvalidAlpha(f64),
50}
51
52#[cfg(test)]
53mod advanced_analytics_tests {
54    use super::*;
55    use perfgate_types::{BenchMeta, Direction, HostInfo, RunMeta, RunReceipt, Sample, ToolInfo};
56
57    fn make_run_receipt_with_walls(name: &str, walls: &[u64]) -> RunReceipt {
58        let samples: Vec<Sample> = walls
59            .iter()
60            .map(|&wall_ms| Sample {
61                wall_ms,
62                exit_code: 0,
63                warmup: false,
64                timed_out: false,
65                cpu_ms: None,
66                page_faults: None,
67                ctx_switches: None,
68                max_rss_kb: None,
69                io_read_bytes: None,
70                io_write_bytes: None,
71                network_packets: None,
72                energy_uj: None,
73                binary_bytes: None,
74                stdout: None,
75                stderr: None,
76            })
77            .collect();
78
79        let stats = compute_stats(&samples, None).expect("compute stats");
80
81        RunReceipt {
82            schema: perfgate_types::RUN_SCHEMA_V1.to_string(),
83            tool: ToolInfo {
84                name: "perfgate".to_string(),
85                version: "test".to_string(),
86            },
87            run: RunMeta {
88                id: format!("run-{}", name),
89                started_at: "2024-01-01T00:00:00Z".to_string(),
90                ended_at: "2024-01-01T00:00:01Z".to_string(),
91                host: HostInfo {
92                    os: "linux".to_string(),
93                    arch: "x86_64".to_string(),
94                    cpu_count: None,
95                    memory_bytes: None,
96                    hostname_hash: None,
97                },
98            },
99            bench: BenchMeta {
100                name: name.to_string(),
101                cwd: None,
102                command: vec!["echo".to_string(), "ok".to_string()],
103                repeat: walls.len() as u32,
104                warmup: 0,
105                work_units: None,
106                timeout_ms: None,
107            },
108            samples,
109            stats,
110        }
111    }
112
113    fn wall_budget(threshold: f64) -> BTreeMap<Metric, Budget> {
114        let mut budgets = BTreeMap::new();
115        budgets.insert(
116            Metric::WallMs,
117            Budget {
118                noise_threshold: None,
119                noise_policy: perfgate_types::NoisePolicy::Ignore,
120                threshold,
121                warn_threshold: threshold * 0.9,
122                direction: Direction::Lower,
123            },
124        );
125        budgets
126    }
127
128    #[test]
129    fn compare_runs_uses_p95_when_requested() {
130        let baseline =
131            make_run_receipt_with_walls("bench", &[100, 100, 100, 100, 100, 100, 100, 200]);
132        let current =
133            make_run_receipt_with_walls("bench", &[100, 100, 100, 100, 100, 100, 100, 300]);
134
135        let budgets = wall_budget(0.20);
136        let mut stats = BTreeMap::new();
137        stats.insert(Metric::WallMs, MetricStatistic::P95);
138
139        let comparison =
140            compare_runs(&baseline, &current, &budgets, &stats, None).expect("compare runs");
141
142        let delta = comparison.deltas.get(&Metric::WallMs).expect("wall delta");
143        assert_eq!(delta.statistic, MetricStatistic::P95);
144        assert!(delta.current > delta.baseline);
145        assert_eq!(delta.status, MetricStatus::Fail);
146    }
147
148    #[test]
149    fn compare_runs_can_require_significance() {
150        let baseline =
151            make_run_receipt_with_walls("bench", &[50, 60, 70, 80, 90, 100, 110, 120, 130, 140]);
152        let current =
153            make_run_receipt_with_walls("bench", &[56, 66, 76, 86, 96, 106, 116, 126, 136, 146]);
154        let budgets = wall_budget(0.05);
155        let stats = BTreeMap::new();
156
157        let advisory = compare_runs(
158            &baseline,
159            &current,
160            &budgets,
161            &stats,
162            Some(SignificancePolicy {
163                alpha: 0.05,
164                min_samples: 8,
165                require_significance: false,
166            }),
167        )
168        .expect("compare advisory");
169        let advisory_delta = advisory.deltas.get(&Metric::WallMs).expect("wall delta");
170        assert_eq!(advisory_delta.status, MetricStatus::Fail);
171        assert!(
172            advisory_delta
173                .significance
174                .as_ref()
175                .map(|s| !s.significant)
176                .unwrap_or(false)
177        );
178
179        let enforced = compare_runs(
180            &baseline,
181            &current,
182            &budgets,
183            &stats,
184            Some(SignificancePolicy {
185                alpha: 0.05,
186                min_samples: 8,
187                require_significance: true,
188            }),
189        )
190        .expect("compare enforced");
191        let enforced_delta = enforced.deltas.get(&Metric::WallMs).expect("wall delta");
192        assert_eq!(enforced_delta.status, MetricStatus::Pass);
193    }
194}
195
196/// Compute perfgate stats from samples.
197///
198/// Warmup samples (`sample.warmup == true`) are excluded.
199///
200/// # Examples
201///
202/// ```
203/// use perfgate_domain::compute_stats;
204/// use perfgate_types::Sample;
205///
206/// let samples = vec![
207///     Sample {
208///         wall_ms: 100, exit_code: 0, warmup: false, timed_out: false,
209///         cpu_ms: None, page_faults: None, ctx_switches: None,
210///         max_rss_kb: None, io_read_bytes: None, io_write_bytes: None,
211///         network_packets: None, energy_uj: None, binary_bytes: None, stdout: None, stderr: None,
212///     },
213///     Sample {
214///         wall_ms: 120, exit_code: 0, warmup: false, timed_out: false,
215///         cpu_ms: None, page_faults: None, ctx_switches: None,
216///         max_rss_kb: None, io_read_bytes: None, io_write_bytes: None,
217///         network_packets: None, energy_uj: None, binary_bytes: None, stdout: None, stderr: None,
218///     },
219/// ];
220///
221/// let stats = compute_stats(&samples, None).unwrap();
222/// assert_eq!(stats.wall_ms.min, 100);
223/// assert_eq!(stats.wall_ms.max, 120);
224/// ```
225pub fn compute_stats(
226    samples: &[perfgate_types::Sample],
227    work_units: Option<u64>,
228) -> Result<Stats, DomainError> {
229    let measured: Vec<&perfgate_types::Sample> = samples.iter().filter(|s| !s.warmup).collect();
230    if measured.is_empty() {
231        return Err(DomainError::NoSamples);
232    }
233
234    let wall: Vec<u64> = measured.iter().map(|s| s.wall_ms).collect();
235    let wall_ms = summarize_u64(&wall)?;
236
237    let cpu_vals: Vec<u64> = measured.iter().filter_map(|s| s.cpu_ms).collect();
238    let cpu_ms = if cpu_vals.is_empty() {
239        None
240    } else {
241        Some(summarize_u64(&cpu_vals)?)
242    };
243
244    let page_fault_vals: Vec<u64> = measured.iter().filter_map(|s| s.page_faults).collect();
245    let page_faults = if page_fault_vals.is_empty() {
246        None
247    } else {
248        Some(summarize_u64(&page_fault_vals)?)
249    };
250
251    let ctx_switch_vals: Vec<u64> = measured.iter().filter_map(|s| s.ctx_switches).collect();
252    let ctx_switches = if ctx_switch_vals.is_empty() {
253        None
254    } else {
255        Some(summarize_u64(&ctx_switch_vals)?)
256    };
257
258    let rss_vals: Vec<u64> = measured.iter().filter_map(|s| s.max_rss_kb).collect();
259    let max_rss_kb = if rss_vals.is_empty() {
260        None
261    } else {
262        Some(summarize_u64(&rss_vals)?)
263    };
264
265    let io_read_vals: Vec<u64> = measured.iter().filter_map(|s| s.io_read_bytes).collect();
266    let io_read_bytes = if io_read_vals.is_empty() {
267        None
268    } else {
269        Some(summarize_u64(&io_read_vals)?)
270    };
271
272    let io_write_vals: Vec<u64> = measured.iter().filter_map(|s| s.io_write_bytes).collect();
273    let io_write_bytes = if io_write_vals.is_empty() {
274        None
275    } else {
276        Some(summarize_u64(&io_write_vals)?)
277    };
278
279    let network_vals: Vec<u64> = measured.iter().filter_map(|s| s.network_packets).collect();
280    let network_packets = if network_vals.is_empty() {
281        None
282    } else {
283        Some(summarize_u64(&network_vals)?)
284    };
285
286    let energy_vals: Vec<u64> = measured.iter().filter_map(|s| s.energy_uj).collect();
287    let energy_uj = if energy_vals.is_empty() {
288        None
289    } else {
290        Some(summarize_u64(&energy_vals)?)
291    };
292
293    let binary_vals: Vec<u64> = measured.iter().filter_map(|s| s.binary_bytes).collect();
294    let binary_bytes = if binary_vals.is_empty() {
295        None
296    } else {
297        Some(summarize_u64(&binary_vals)?)
298    };
299
300    let throughput_per_s = match work_units {
301        Some(work) => {
302            let thr: Vec<f64> = measured
303                .iter()
304                .map(|s| {
305                    let secs = (s.wall_ms as f64) / 1000.0;
306                    if secs <= 0.0 {
307                        0.0
308                    } else {
309                        (work as f64) / secs
310                    }
311                })
312                .collect();
313            Some(summarize_f64(&thr)?)
314        }
315        None => None,
316    };
317
318    Ok(Stats {
319        wall_ms,
320        cpu_ms,
321        page_faults,
322        ctx_switches,
323        max_rss_kb,
324        io_read_bytes,
325        io_write_bytes,
326        network_packets,
327        energy_uj,
328        binary_bytes,
329        throughput_per_s,
330    })
331}
332
333#[derive(Debug, Clone, PartialEq)]
334pub struct Comparison {
335    pub deltas: BTreeMap<Metric, Delta>,
336    pub verdict: Verdict,
337}
338
339#[derive(Debug, Copy, Clone, PartialEq)]
340pub struct SignificancePolicy {
341    pub alpha: f64,
342    pub min_samples: usize,
343    pub require_significance: bool,
344}
345
346impl SignificancePolicy {
347    pub fn new(
348        alpha: f64,
349        min_samples: usize,
350        require_significance: bool,
351    ) -> Result<Self, DomainError> {
352        if !(0.0..=1.0).contains(&alpha) {
353            return Err(DomainError::InvalidAlpha(alpha));
354        }
355        Ok(Self {
356            alpha,
357            min_samples,
358            require_significance,
359        })
360    }
361}
362
363fn aggregate_verdict_from_counts(counts: VerdictCounts, reasons: Vec<String>) -> Verdict {
364    let status = if counts.fail > 0 {
365        VerdictStatus::Fail
366    } else if counts.warn > 0 {
367        VerdictStatus::Warn
368    } else if counts.pass > 0 {
369        VerdictStatus::Pass
370    } else {
371        VerdictStatus::Skip
372    };
373
374    Verdict {
375        status,
376        counts,
377        reasons,
378    }
379}
380
381/// Compare stats under the provided budgets.
382///
383/// Metrics without both baseline+current values are skipped (and therefore do not affect verdict).
384///
385/// # Examples
386///
387/// ```
388/// use perfgate_domain::compare_stats;
389/// use perfgate_types::*;
390/// use std::collections::BTreeMap;
391///
392/// let baseline = Stats {
393///     wall_ms: U64Summary::new(100, 90, 110 ),
394///     cpu_ms: None, page_faults: None, ctx_switches: None,
395///     max_rss_kb: None,
396///     io_read_bytes: None, io_write_bytes: None, network_packets: None,
397///     energy_uj: None,
398///     binary_bytes: None, throughput_per_s: None,
399/// };
400/// let current = Stats {
401///     wall_ms: U64Summary::new(105, 95, 115 ),
402///     cpu_ms: None, page_faults: None, ctx_switches: None,
403///     max_rss_kb: None,
404///     io_read_bytes: None, io_write_bytes: None, network_packets: None,
405///     energy_uj: None,
406///     binary_bytes: None, throughput_per_s: None,
407/// };
408///
409/// let mut budgets = BTreeMap::new();
410/// budgets.insert(Metric::WallMs, Budget {
411///     noise_threshold: None,
412///     noise_policy: perfgate_types::NoisePolicy::Ignore,
413///     threshold: 0.20, warn_threshold: 0.10, direction: Direction::Lower,
414/// });
415///
416/// let cmp = compare_stats(&baseline, &current, &budgets).unwrap();
417/// assert_eq!(cmp.verdict.status, VerdictStatus::Pass);
418/// ```
419pub fn compare_stats(
420    baseline: &Stats,
421    current: &Stats,
422    budgets: &BTreeMap<Metric, Budget>,
423) -> Result<Comparison, DomainError> {
424    let mut deltas: BTreeMap<Metric, Delta> = BTreeMap::new();
425    let mut reasons: Vec<String> = Vec::new();
426
427    let mut counts = VerdictCounts {
428        pass: 0,
429        warn: 0,
430        fail: 0,
431        skip: 0,
432    };
433
434    for (metric, budget) in budgets {
435        let b = metric_value(baseline, *metric);
436        let c = metric_value(current, *metric);
437        let current_cv = metric_cv(current, *metric);
438
439        let (Some(bv), Some(cv)) = (b, c) else {
440            continue;
441        };
442
443        if bv <= 0.0 {
444            deltas.insert(
445                *metric,
446                Delta {
447                    baseline: bv,
448                    current: cv,
449                    ratio: 1.0,
450                    pct: 0.0,
451                    regression: 0.0,
452                    status: MetricStatus::Skip,
453                    significance: None,
454                    cv: current_cv,
455                    noise_threshold: budget.noise_threshold,
456                    statistic: MetricStatistic::Median,
457                },
458            );
459            counts.skip += 1;
460            continue;
461        }
462
463        let result = evaluate_budget(bv, cv, budget, current_cv)
464            .expect("evaluate_budget is infallible for bv > 0");
465
466        match result.status {
467            MetricStatus::Pass => counts.pass += 1,
468            MetricStatus::Warn => {
469                counts.warn += 1;
470                reasons.push(reason_token(*metric, MetricStatus::Warn));
471            }
472            MetricStatus::Fail => {
473                counts.fail += 1;
474                reasons.push(reason_token(*metric, MetricStatus::Fail));
475            }
476            MetricStatus::Skip => {
477                counts.skip += 1;
478                reasons.push(reason_token(*metric, MetricStatus::Skip));
479            }
480        }
481
482        deltas.insert(
483            *metric,
484            Delta {
485                baseline: result.baseline,
486                current: result.current,
487                ratio: result.ratio,
488                pct: result.pct,
489                regression: result.regression,
490                cv: result.cv,
491                noise_threshold: result.noise_threshold,
492                statistic: MetricStatistic::Median,
493                significance: None,
494                status: result.status,
495            },
496        );
497    }
498
499    let verdict = aggregate_verdict_from_counts(counts, reasons);
500
501    Ok(Comparison { deltas, verdict })
502}
503
504/// Compare full run receipts under the provided budgets.
505///
506/// This variant supports:
507/// - Per-metric statistic selection (`median` or `p95`)
508/// - Optional significance analysis with Welch's t-test
509pub fn compare_runs(
510    baseline: &RunReceipt,
511    current: &RunReceipt,
512    budgets: &BTreeMap<Metric, Budget>,
513    metric_statistics: &BTreeMap<Metric, MetricStatistic>,
514    significance_policy: Option<SignificancePolicy>,
515) -> Result<Comparison, DomainError> {
516    let mut deltas: BTreeMap<Metric, Delta> = BTreeMap::new();
517    let mut reasons: Vec<String> = Vec::new();
518
519    let mut counts = VerdictCounts {
520        pass: 0,
521        warn: 0,
522        fail: 0,
523        skip: 0,
524    };
525
526    for (metric, budget) in budgets {
527        let statistic = metric_statistics
528            .get(metric)
529            .copied()
530            .unwrap_or(MetricStatistic::Median);
531
532        let b = metric_value_from_run(baseline, *metric, statistic);
533        let c = metric_value_from_run(current, *metric, statistic);
534        let current_cv = metric_cv(&current.stats, *metric);
535
536        let (Some(bv), Some(cv)) = (b, c) else {
537            continue;
538        };
539
540        if bv <= 0.0 {
541            deltas.insert(
542                *metric,
543                Delta {
544                    baseline: bv,
545                    current: cv,
546                    ratio: 1.0,
547                    pct: 0.0,
548                    regression: 0.0,
549                    status: MetricStatus::Skip,
550                    significance: None,
551                    cv: current_cv,
552                    noise_threshold: budget.noise_threshold,
553                    statistic,
554                },
555            );
556            counts.skip += 1;
557            continue;
558        }
559
560        let result = evaluate_budget(bv, cv, budget, current_cv)
561            .expect("evaluate_budget is infallible for bv > 0");
562
563        let mut status = result.status;
564
565        let significance = significance_policy.and_then(|policy| {
566            let baseline_series = metric_series_from_run(baseline, *metric);
567            let current_series = metric_series_from_run(current, *metric);
568            compute_significance(
569                &baseline_series,
570                &current_series,
571                policy.alpha,
572                policy.min_samples,
573            )
574        });
575
576        if let Some(policy) = significance_policy
577            && policy.require_significance
578            && matches!(status, MetricStatus::Warn | MetricStatus::Fail)
579        {
580            let is_significant = significance
581                .as_ref()
582                .map(|sig| sig.significant)
583                .unwrap_or(false);
584            if !is_significant {
585                status = MetricStatus::Pass;
586            }
587        }
588
589        match status {
590            MetricStatus::Pass => counts.pass += 1,
591            MetricStatus::Warn => {
592                counts.warn += 1;
593                reasons.push(reason_token(*metric, MetricStatus::Warn));
594            }
595            MetricStatus::Fail => {
596                counts.fail += 1;
597                reasons.push(reason_token(*metric, MetricStatus::Fail));
598            }
599            MetricStatus::Skip => {
600                counts.skip += 1;
601                reasons.push(reason_token(*metric, MetricStatus::Skip));
602            }
603        }
604
605        deltas.insert(
606            *metric,
607            Delta {
608                baseline: result.baseline,
609                current: result.current,
610                ratio: result.ratio,
611                pct: result.pct,
612                regression: result.regression,
613                cv: result.cv,
614                noise_threshold: result.noise_threshold,
615                statistic,
616                significance,
617                status,
618            },
619        );
620    }
621
622    let verdict = aggregate_verdict_from_counts(counts, reasons);
623
624    Ok(Comparison { deltas, verdict })
625}
626
627// ============================================================================
628// Report Derivation
629// ============================================================================
630
631/// Data for a single finding in a report.
632#[derive(Debug, Clone, PartialEq)]
633pub struct FindingData {
634    /// The metric name (e.g., "wall_ms", "max_rss_kb", "throughput_per_s").
635    pub metric_name: String,
636    /// The benchmark name.
637    pub bench_name: String,
638    /// Baseline value for the metric.
639    pub baseline: f64,
640    /// Current value for the metric.
641    pub current: f64,
642    /// Regression percentage (e.g., 0.15 means 15% regression).
643    pub regression_pct: f64,
644    /// The threshold that was exceeded (for fail) or approached (for warn).
645    pub threshold: f64,
646}
647
648/// A single finding in a report.
649#[derive(Debug, Clone, PartialEq)]
650pub struct Finding {
651    /// Finding code: "metric_warn" or "metric_fail".
652    pub code: String,
653    /// Check identifier: always "perf.budget".
654    pub check_id: String,
655    /// Finding data containing metric details.
656    pub data: FindingData,
657}
658
659/// Report derived from a CompareReceipt.
660#[derive(Debug, Clone, PartialEq)]
661pub struct Report {
662    /// The overall verdict status, matching the compare verdict.
663    pub verdict: VerdictStatus,
664    /// Findings for metrics that have Warn or Fail status.
665    /// Ordered deterministically by metric name, then bench name.
666    pub findings: Vec<Finding>,
667}
668
669/// Derives a report from a CompareReceipt.
670///
671/// Creates findings for each metric delta with status Warn or Fail.
672/// Findings are ordered deterministically by metric name (then bench name if
673/// multiple benches were compared, though currently CompareReceipt is per-bench).
674///
675/// # Invariants
676///
677/// - Number of findings equals count of warn + fail status deltas
678/// - Report verdict matches compare verdict
679/// - Findings are ordered deterministically (by metric name)
680///
681/// # Examples
682///
683/// ```
684/// use perfgate_domain::derive_report;
685/// use perfgate_types::*;
686/// use std::collections::BTreeMap;
687///
688/// let receipt = CompareReceipt {
689///     schema: COMPARE_SCHEMA_V1.to_string(),
690///     tool: ToolInfo { name: "perfgate".into(), version: "0.1.0".into() },
691///     bench: BenchMeta {
692///         name: "my-bench".into(), cwd: None,
693///         command: vec!["echo".into()], repeat: 5, warmup: 0,
694///         work_units: None, timeout_ms: None,
695///     },
696///     baseline_ref: CompareRef { path: None, run_id: None },
697///     current_ref: CompareRef { path: None, run_id: None },
698///     budgets: BTreeMap::new(),
699///     deltas: BTreeMap::new(),
700///     verdict: Verdict {
701///         status: VerdictStatus::Pass,
702///         counts: VerdictCounts { pass: 0, warn: 0, fail: 0, skip: 0 },
703///         reasons: vec![],
704///     },
705/// };
706///
707/// let report = derive_report(&receipt);
708/// assert_eq!(report.verdict, VerdictStatus::Pass);
709/// assert!(report.findings.is_empty());
710/// ```
711pub fn derive_report(receipt: &CompareReceipt) -> Report {
712    let mut findings = Vec::new();
713
714    // Iterate over deltas in deterministic order (BTreeMap is sorted by key)
715    for (metric, delta) in &receipt.deltas {
716        match delta.status {
717            MetricStatus::Pass | MetricStatus::Skip => continue,
718            MetricStatus::Warn | MetricStatus::Fail => {
719                let code = match delta.status {
720                    MetricStatus::Warn => FINDING_CODE_METRIC_WARN.to_string(),
721                    MetricStatus::Fail => FINDING_CODE_METRIC_FAIL.to_string(),
722                    _ => unreachable!(),
723                };
724
725                // Get the threshold from budgets if available
726                let threshold = receipt
727                    .budgets
728                    .get(metric)
729                    .map(|b| b.threshold)
730                    .unwrap_or(0.0);
731
732                findings.push(Finding {
733                    code,
734                    check_id: CHECK_ID_BUDGET.to_string(),
735                    data: FindingData {
736                        metric_name: metric_to_string(*metric),
737                        bench_name: receipt.bench.name.clone(),
738                        baseline: delta.baseline,
739                        current: delta.current,
740                        regression_pct: delta.regression,
741                        threshold,
742                    },
743                });
744            }
745        }
746    }
747
748    // Findings are already sorted by metric name since we iterate over BTreeMap
749    // For multi-bench scenarios (future), we would also sort by bench_name
750    // Currently sorting is: metric name (from BTreeMap order)
751
752    Report {
753        verdict: receipt.verdict.status,
754        findings,
755    }
756}
757
758fn metric_cv(stats: &Stats, metric: Metric) -> Option<f64> {
759    match metric {
760        Metric::BinaryBytes => stats.binary_bytes.as_ref().and_then(|s| s.cv()),
761        Metric::CpuMs => stats.cpu_ms.as_ref().and_then(|s| s.cv()),
762        Metric::CtxSwitches => stats.ctx_switches.as_ref().and_then(|s| s.cv()),
763        Metric::EnergyUj => stats.energy_uj.as_ref().and_then(|s| s.cv()),
764        Metric::IoReadBytes => stats.io_read_bytes.as_ref().and_then(|s| s.cv()),
765        Metric::IoWriteBytes => stats.io_write_bytes.as_ref().and_then(|s| s.cv()),
766        Metric::MaxRssKb => stats.max_rss_kb.as_ref().and_then(|s| s.cv()),
767        Metric::NetworkPackets => stats.network_packets.as_ref().and_then(|s| s.cv()),
768        Metric::PageFaults => stats.page_faults.as_ref().and_then(|s| s.cv()),
769        Metric::ThroughputPerS => stats.throughput_per_s.as_ref().and_then(|s| s.cv()),
770        Metric::WallMs => stats.wall_ms.cv(),
771    }
772}
773
774/// Converts a Metric enum to its string representation.
775fn metric_to_string(metric: Metric) -> String {
776    metric.as_str().to_string()
777}
778
779pub fn metric_value(stats: &Stats, metric: Metric) -> Option<f64> {
780    match metric {
781        Metric::BinaryBytes => stats.binary_bytes.as_ref().map(|s| s.median as f64),
782        Metric::CpuMs => stats.cpu_ms.as_ref().map(|s| s.median as f64),
783        Metric::CtxSwitches => stats.ctx_switches.as_ref().map(|s| s.median as f64),
784        Metric::EnergyUj => stats.energy_uj.as_ref().map(|s| s.median as f64),
785        Metric::IoReadBytes => stats.io_read_bytes.as_ref().map(|s| s.median as f64),
786        Metric::IoWriteBytes => stats.io_write_bytes.as_ref().map(|s| s.median as f64),
787        Metric::MaxRssKb => stats.max_rss_kb.as_ref().map(|s| s.median as f64),
788        Metric::NetworkPackets => stats.network_packets.as_ref().map(|s| s.median as f64),
789        Metric::PageFaults => stats.page_faults.as_ref().map(|s| s.median as f64),
790        Metric::ThroughputPerS => stats.throughput_per_s.as_ref().map(|s| s.median),
791        Metric::WallMs => Some(stats.wall_ms.median as f64),
792    }
793}
794
795fn metric_value_from_run(
796    run: &RunReceipt,
797    metric: Metric,
798    statistic: MetricStatistic,
799) -> Option<f64> {
800    match statistic {
801        MetricStatistic::Median => metric_value(&run.stats, metric),
802        MetricStatistic::P95 => {
803            let values = metric_series_from_run(run, metric);
804            if values.is_empty() {
805                metric_value(&run.stats, metric)
806            } else {
807                percentile(values, 0.95)
808            }
809        }
810    }
811}
812
813fn metric_series_from_run(run: &RunReceipt, metric: Metric) -> Vec<f64> {
814    let measured = run.samples.iter().filter(|s| !s.warmup);
815
816    match metric {
817        Metric::BinaryBytes => measured
818            .filter_map(|s| s.binary_bytes.map(|v| v as f64))
819            .collect(),
820        Metric::CpuMs => measured
821            .filter_map(|s| s.cpu_ms.map(|v| v as f64))
822            .collect(),
823        Metric::CtxSwitches => measured
824            .filter_map(|s| s.ctx_switches.map(|v| v as f64))
825            .collect(),
826        Metric::EnergyUj => measured
827            .filter_map(|s| s.energy_uj.map(|v| v as f64))
828            .collect(),
829        Metric::IoReadBytes => measured
830            .filter_map(|s| s.io_read_bytes.map(|v| v as f64))
831            .collect(),
832        Metric::IoWriteBytes => measured
833            .filter_map(|s| s.io_write_bytes.map(|v| v as f64))
834            .collect(),
835        Metric::MaxRssKb => measured
836            .filter_map(|s| s.max_rss_kb.map(|v| v as f64))
837            .collect(),
838        Metric::NetworkPackets => measured
839            .filter_map(|s| s.network_packets.map(|v| v as f64))
840            .collect(),
841        Metric::PageFaults => measured
842            .filter_map(|s| s.page_faults.map(|v| v as f64))
843            .collect(),
844        Metric::ThroughputPerS => {
845            let Some(work) = run.bench.work_units else {
846                return Vec::new();
847            };
848            measured
849                .map(|s| {
850                    let secs = (s.wall_ms as f64) / 1000.0;
851                    if secs <= 0.0 {
852                        0.0
853                    } else {
854                        (work as f64) / secs
855                    }
856                })
857                .collect()
858        }
859        Metric::WallMs => measured.map(|s| s.wall_ms as f64).collect(),
860    }
861}
862
863fn percentile(mut values: Vec<f64>, q: f64) -> Option<f64> {
864    if values.is_empty() {
865        return None;
866    }
867
868    values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
869
870    if values.len() == 1 {
871        return Some(values[0]);
872    }
873
874    let rank = q.clamp(0.0, 1.0) * (values.len() as f64 - 1.0);
875    let lower = rank.floor() as usize;
876    let upper = rank.ceil() as usize;
877
878    if lower == upper {
879        return Some(values[lower]);
880    }
881
882    let weight = rank - lower as f64;
883    Some(values[lower] + (values[upper] - values[lower]) * weight)
884}
885
886#[cfg(test)]
887mod tests {
888    use super::*;
889    use perfgate_types::{
890        Budget, Direction, F64Summary, Metric, MetricStatus, Sample, Stats, U64Summary,
891        VerdictStatus,
892    };
893    use proptest::prelude::*;
894
895    #[test]
896    fn summarize_u64_median_even_rounds_down() {
897        let s = summarize_u64(&[10, 20]).unwrap();
898        assert_eq!(s.median, 15);
899    }
900
901    // =========================================================================
902    // Property-Based Tests
903    // =========================================================================
904
905    /// **Validates: Requirements 3.1, 3.2, 3.3**
906    ///
907    /// Property 1: Statistics Computation Correctness
908    ///
909    /// For any non-empty list of u64 values, the computed summary SHALL have:
910    /// - `median` equal to the middle value (or average of two middle values for even-length lists)
911    /// - `min` equal to the smallest value
912    /// - `max` equal to the largest value
913    mod property_tests {
914        use super::*;
915
916        /// Helper function to compute the expected median for a sorted slice.
917        /// For even-length lists, computes the average of the two middle values,
918        /// matching the implementation's rounding behavior.
919        fn expected_median(sorted: &[u64]) -> u64 {
920            let n = sorted.len();
921            let mid = n / 2;
922            if n % 2 == 1 {
923                sorted[mid]
924            } else {
925                // Match the implementation's rounding: avoid overflow by splitting
926                let a = sorted[mid - 1];
927                let b = sorted[mid];
928                (a / 2) + (b / 2) + ((a % 2 + b % 2) / 2)
929            }
930        }
931
932        proptest! {
933            /// **Validates: Requirements 3.1, 3.2, 3.3**
934            ///
935            /// Property 1: Statistics Computation Correctness
936            ///
937            /// For any non-empty list of u64 values:
938            /// - min equals the smallest value
939            /// - max equals the largest value
940            /// - median equals the middle value (or average of two middle for even-length)
941            #[test]
942            fn prop_summarize_u64_correctness(values in prop::collection::vec(any::<u64>(), 1..100)) {
943                let summary = summarize_u64(&values).expect("non-empty vec should succeed");
944
945                // Sort the values to compute expected results
946                let mut sorted = values.clone();
947                sorted.sort_unstable();
948
949                // Property: min is the smallest value
950                let expected_min = *sorted.first().unwrap();
951                prop_assert_eq!(
952                    summary.min, expected_min,
953                    "min should be the smallest value"
954                );
955
956                // Property: max is the largest value
957                let expected_max = *sorted.last().unwrap();
958                prop_assert_eq!(
959                    summary.max, expected_max,
960                    "max should be the largest value"
961                );
962
963                // Property: median is correct
964                let expected_med = expected_median(&sorted);
965                prop_assert_eq!(
966                    summary.median, expected_med,
967                    "median should be the middle value (or average for even-length)"
968                );
969            }
970
971            /// **Validates: Requirements 3.1, 3.2, 3.3**
972            ///
973            /// Property: min <= median <= max always holds
974            #[test]
975            fn prop_summarize_u64_ordering(values in prop::collection::vec(any::<u64>(), 1..100)) {
976                let summary = summarize_u64(&values).expect("non-empty vec should succeed");
977
978                prop_assert!(
979                    summary.min <= summary.median,
980                    "min ({}) should be <= median ({})",
981                    summary.min, summary.median
982                );
983                prop_assert!(
984                    summary.median <= summary.max,
985                    "median ({}) should be <= max ({})",
986                    summary.median, summary.max
987                );
988            }
989
990            /// **Validates: Requirements 3.1, 3.2, 3.3**
991            ///
992            /// Property: For single-element vectors, min == median == max
993            #[test]
994            fn prop_summarize_u64_single_element(value: u64) {
995                let summary = summarize_u64(&[value]).expect("single element should succeed");
996
997                prop_assert_eq!(summary.min, value, "min should equal the single value");
998                prop_assert_eq!(summary.max, value, "max should equal the single value");
999                prop_assert_eq!(summary.median, value, "median should equal the single value");
1000            }
1001        }
1002
1003        // =====================================================================
1004        // Property 2: Statistics Ordering Invariant for f64
1005        // **Feature: comprehensive-test-coverage, Property 2: Statistics Ordering Invariant**
1006        // =====================================================================
1007
1008        /// Strategy to generate finite f64 values (no NaN, no infinity).
1009        /// This ensures we test the ordering invariant with valid numeric values.
1010        fn finite_f64_strategy() -> impl Strategy<Value = f64> {
1011            // Generate finite f64 values in a reasonable range
1012            prop::num::f64::NORMAL.prop_filter("must be finite", |v| v.is_finite())
1013        }
1014
1015        proptest! {
1016            #![proptest_config(ProptestConfig {
1017                cases: 100,
1018                ..ProptestConfig::default()
1019            })]
1020
1021            /// **Feature: comprehensive-test-coverage, Property 2: Statistics Ordering Invariant**
1022            /// **Validates: Requirements 4.6**
1023            ///
1024            /// Property 2: Statistics Ordering Invariant
1025            ///
1026            /// For any non-empty list of finite f64 values, the computed summary SHALL satisfy:
1027            /// min <= median <= max
1028            #[test]
1029            fn prop_summarize_f64_ordering(
1030                values in prop::collection::vec(finite_f64_strategy(), 1..100)
1031            ) {
1032                let summary = summarize_f64(&values).expect("non-empty vec should succeed");
1033
1034                prop_assert!(
1035                    summary.min <= summary.median,
1036                    "min ({}) should be <= median ({})",
1037                    summary.min, summary.median
1038                );
1039                prop_assert!(
1040                    summary.median <= summary.max,
1041                    "median ({}) should be <= max ({})",
1042                    summary.median, summary.max
1043                );
1044            }
1045
1046            /// **Feature: comprehensive-test-coverage, Property 2: Statistics Ordering Invariant**
1047            /// **Validates: Requirements 4.6**
1048            ///
1049            /// Property 2: Statistics Ordering Invariant (single element)
1050            ///
1051            /// For single-element vectors, min == median == max
1052            #[test]
1053            fn prop_summarize_f64_single_element(value in finite_f64_strategy()) {
1054                let summary = summarize_f64(&[value]).expect("single element should succeed");
1055
1056                prop_assert!(
1057                    (summary.min - value).abs() < f64::EPSILON,
1058                    "min ({}) should equal the single value ({})",
1059                    summary.min, value
1060                );
1061                prop_assert!(
1062                    (summary.max - value).abs() < f64::EPSILON,
1063                    "max ({}) should equal the single value ({})",
1064                    summary.max, value
1065                );
1066                prop_assert!(
1067                    (summary.median - value).abs() < f64::EPSILON,
1068                    "median ({}) should equal the single value ({})",
1069                    summary.median, value
1070                );
1071            }
1072
1073            /// **Feature: comprehensive-test-coverage, Property 2: Statistics Ordering Invariant**
1074            /// **Validates: Requirements 4.6**
1075            ///
1076            /// Property 2: Statistics Ordering Invariant (correctness)
1077            ///
1078            /// For any non-empty list of finite f64 values:
1079            /// - min equals the smallest value
1080            /// - max equals the largest value
1081            /// - median equals the middle value (or average of two middle for even-length)
1082            #[test]
1083            fn prop_summarize_f64_correctness(
1084                values in prop::collection::vec(finite_f64_strategy(), 1..100)
1085            ) {
1086                let summary = summarize_f64(&values).expect("non-empty vec should succeed");
1087
1088                // Sort the values to compute expected results
1089                let mut sorted = values.clone();
1090                sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1091
1092                // Property: min is the smallest value
1093                let expected_min = *sorted.first().unwrap();
1094                prop_assert!(
1095                    (summary.min - expected_min).abs() < f64::EPSILON,
1096                    "min ({}) should be the smallest value ({})",
1097                    summary.min, expected_min
1098                );
1099
1100                // Property: max is the largest value
1101                let expected_max = *sorted.last().unwrap();
1102                prop_assert!(
1103                    (summary.max - expected_max).abs() < f64::EPSILON,
1104                    "max ({}) should be the largest value ({})",
1105                    summary.max, expected_max
1106                );
1107
1108                // Property: median is correct
1109                let n = sorted.len();
1110                let mid = n / 2;
1111                let expected_median = if n % 2 == 1 {
1112                    sorted[mid]
1113                } else {
1114                    (sorted[mid - 1] + sorted[mid]) / 2.0
1115                };
1116                prop_assert!(
1117                    (summary.median - expected_median).abs() < f64::EPSILON * 10.0,
1118                    "median ({}) should be the middle value ({})",
1119                    summary.median, expected_median
1120                );
1121            }
1122
1123            /// **Feature: comprehensive-test-coverage, Property 2: Statistics Ordering Invariant**
1124            /// **Validates: Requirements 4.6**
1125            ///
1126            /// Property 2: Statistics Ordering Invariant (with infinity)
1127            ///
1128            /// The summarize_f64 function handles infinity values by sorting them
1129            /// appropriately (negative infinity is smallest, positive infinity is largest).
1130            /// The ordering invariant min <= median <= max should still hold.
1131            #[test]
1132            fn prop_summarize_f64_with_infinity(
1133                finite_values in prop::collection::vec(finite_f64_strategy(), 1..50),
1134                include_pos_inf in any::<bool>(),
1135                include_neg_inf in any::<bool>(),
1136            ) {
1137                let mut values = finite_values;
1138
1139                // Optionally add positive infinity
1140                if include_pos_inf {
1141                    values.push(f64::INFINITY);
1142                }
1143
1144                // Optionally add negative infinity
1145                if include_neg_inf {
1146                    values.push(f64::NEG_INFINITY);
1147                }
1148
1149                let summary = summarize_f64(&values).expect("non-empty vec should succeed");
1150
1151                // The ordering invariant should still hold
1152                prop_assert!(
1153                    summary.min <= summary.median,
1154                    "min ({}) should be <= median ({}) even with infinity values",
1155                    summary.min, summary.median
1156                );
1157                prop_assert!(
1158                    summary.median <= summary.max,
1159                    "median ({}) should be <= max ({}) even with infinity values",
1160                    summary.median, summary.max
1161                );
1162
1163                // If we included negative infinity, min should be negative infinity
1164                if include_neg_inf {
1165                    prop_assert!(
1166                        summary.min == f64::NEG_INFINITY,
1167                        "min should be NEG_INFINITY when included, got {}",
1168                        summary.min
1169                    );
1170                }
1171
1172                // If we included positive infinity, max should be positive infinity
1173                if include_pos_inf {
1174                    prop_assert!(
1175                        summary.max == f64::INFINITY,
1176                        "max should be INFINITY when included, got {}",
1177                        summary.max
1178                    );
1179                }
1180            }
1181
1182            /// **Feature: comprehensive-test-coverage, Property 2: Statistics Ordering Invariant**
1183            /// **Validates: Requirements 4.6**
1184            ///
1185            /// Property 2: Statistics Ordering Invariant (NaN handling)
1186            ///
1187            /// The summarize_f64 function uses partial_cmp with Ordering::Equal fallback
1188            /// for NaN values. This test verifies the function doesn't panic with NaN
1189            /// and the ordering invariant holds for the non-NaN interpretation.
1190            #[test]
1191            fn prop_summarize_f64_with_nan_no_panic(
1192                finite_values in prop::collection::vec(finite_f64_strategy(), 1..50),
1193                nan_count in 0usize..3,
1194            ) {
1195                let mut values = finite_values;
1196
1197                // Add some NaN values
1198                for _ in 0..nan_count {
1199                    values.push(f64::NAN);
1200                }
1201
1202                // The function should not panic
1203                let result = summarize_f64(&values);
1204                prop_assert!(result.is_ok(), "summarize_f64 should not panic with NaN values");
1205
1206                let summary = result.unwrap();
1207
1208                // Due to NaN comparison behavior, we can only verify the function completes
1209                // and returns some result. The ordering may not hold strictly with NaN
1210                // because NaN comparisons are undefined, but the function should not panic.
1211                // We verify that if all values are finite (no NaN in result), ordering holds.
1212                if summary.min.is_finite() && summary.median.is_finite() && summary.max.is_finite() {
1213                    prop_assert!(
1214                        summary.min <= summary.median,
1215                        "min ({}) should be <= median ({}) for finite results",
1216                        summary.min, summary.median
1217                    );
1218                    prop_assert!(
1219                        summary.median <= summary.max,
1220                        "median ({}) should be <= max ({}) for finite results",
1221                        summary.median, summary.max
1222                    );
1223                }
1224            }
1225        }
1226
1227        // =====================================================================
1228        // Property 3: Median Algorithm Correctness
1229        // **Feature: comprehensive-test-coverage, Property 3: Median Algorithm Correctness**
1230        // =====================================================================
1231
1232        /// Strategy to generate large u64 values near u64::MAX for overflow testing.
1233        /// This generates values in the upper 10% of the u64 range.
1234        fn large_u64_strategy() -> impl Strategy<Value = u64> {
1235            // Generate values in the range [u64::MAX - u64::MAX/10, u64::MAX]
1236            // This ensures we test values near the overflow boundary
1237            let min_val = u64::MAX - (u64::MAX / 10);
1238            min_val..=u64::MAX
1239        }
1240
1241        /// Reference implementation of median for u64 that uses u128 to avoid overflow.
1242        /// This serves as the oracle for testing the overflow-safe implementation.
1243        fn reference_median_u64(sorted: &[u64]) -> u64 {
1244            debug_assert!(!sorted.is_empty());
1245            let n = sorted.len();
1246            let mid = n / 2;
1247            if n % 2 == 1 {
1248                sorted[mid]
1249            } else {
1250                // Use u128 to compute the average without overflow, then truncate
1251                let a = sorted[mid - 1] as u128;
1252                let b = sorted[mid] as u128;
1253                ((a + b) / 2) as u64
1254            }
1255        }
1256
1257        proptest! {
1258            #![proptest_config(ProptestConfig {
1259                cases: 100,
1260                ..ProptestConfig::default()
1261            })]
1262
1263            /// **Feature: comprehensive-test-coverage, Property 3: Median Algorithm Correctness**
1264            /// **Validates: Requirements 8.5**
1265            ///
1266            /// Property 3: Median Algorithm Correctness (Overflow Handling)
1267            ///
1268            /// For any non-empty list of large u64 values near u64::MAX, the median
1269            /// algorithm SHALL compute the correct result without overflow.
1270            /// The implementation uses the formula:
1271            /// (a/2) + (b/2) + ((a%2 + b%2)/2) to avoid overflow.
1272            #[test]
1273            fn prop_median_u64_overflow_handling(
1274                values in prop::collection::vec(large_u64_strategy(), 2..50)
1275            ) {
1276                let summary = summarize_u64(&values).expect("non-empty vec should succeed");
1277
1278                // Sort values to compute expected median using reference implementation
1279                let mut sorted = values.clone();
1280                sorted.sort_unstable();
1281
1282                let expected_median = reference_median_u64(&sorted);
1283
1284                prop_assert_eq!(
1285                    summary.median, expected_median,
1286                    "median should match reference implementation for large values near u64::MAX"
1287                );
1288
1289                // Also verify the ordering invariant holds
1290                prop_assert!(
1291                    summary.min <= summary.median,
1292                    "min ({}) should be <= median ({}) for large values",
1293                    summary.min, summary.median
1294                );
1295                prop_assert!(
1296                    summary.median <= summary.max,
1297                    "median ({}) should be <= max ({}) for large values",
1298                    summary.median, summary.max
1299                );
1300            }
1301
1302            /// **Feature: comprehensive-test-coverage, Property 3: Median Algorithm Correctness**
1303            /// **Validates: Requirements 8.5**
1304            ///
1305            /// Property 3: Median Algorithm Correctness (Even Length - Average with Rounding Down)
1306            ///
1307            /// For any even-length sorted list of u64 values, the median SHALL equal
1308            /// the average of the two middle elements, rounded down (floor division).
1309            #[test]
1310            fn prop_median_u64_even_length_rounding(
1311                // Generate pairs of values to ensure even length
1312                pairs in prop::collection::vec((any::<u64>(), any::<u64>()), 1..50)
1313            ) {
1314                // Flatten pairs into a single vector (guaranteed even length)
1315                let values: Vec<u64> = pairs.into_iter().flat_map(|(a, b)| vec![a, b]).collect();
1316                prop_assert!(values.len().is_multiple_of(2), "length should be even");
1317
1318                let summary = summarize_u64(&values).expect("non-empty vec should succeed");
1319
1320                // Sort values to compute expected median
1321                let mut sorted = values.clone();
1322                sorted.sort_unstable();
1323
1324                let n = sorted.len();
1325                let mid = n / 2;
1326                let a = sorted[mid - 1];
1327                let b = sorted[mid];
1328
1329                // Expected median using reference implementation (u128 to avoid overflow)
1330                let expected_median = reference_median_u64(&sorted);
1331
1332                prop_assert_eq!(
1333                    summary.median, expected_median,
1334                    "median for even-length list should be floor((a + b) / 2) where a={}, b={}",
1335                    a, b
1336                );
1337
1338                // Verify rounding down behavior: median should be <= true average
1339                // (true average computed with u128 to avoid overflow)
1340                let true_avg_x2 = (a as u128) + (b as u128);
1341                let median_x2 = (summary.median as u128) * 2;
1342                prop_assert!(
1343                    median_x2 <= true_avg_x2,
1344                    "median*2 ({}) should be <= (a+b) ({}) due to floor rounding",
1345                    median_x2, true_avg_x2
1346                );
1347            }
1348
1349            /// **Feature: comprehensive-test-coverage, Property 3: Median Algorithm Correctness**
1350            /// **Validates: Requirements 8.5**
1351            ///
1352            /// Property 3: Median Algorithm Correctness (Odd Length - Exact Middle)
1353            ///
1354            /// For any odd-length sorted list of u64 values, the median SHALL equal
1355            /// exactly the middle element (no averaging or rounding).
1356            #[test]
1357            fn prop_median_u64_odd_length_exact_middle(
1358                // Generate odd-length vectors by generating n values and adding one more
1359                base_values in prop::collection::vec(any::<u64>(), 1..50),
1360                extra_value: u64,
1361            ) {
1362                // Ensure odd length by conditionally adding an extra value
1363                let mut values = base_values;
1364                if values.len() % 2 == 0 {
1365                    values.push(extra_value);
1366                }
1367                prop_assert!(values.len() % 2 == 1, "length should be odd");
1368
1369                let summary = summarize_u64(&values).expect("non-empty vec should succeed");
1370
1371                // Sort values to find the exact middle element
1372                let mut sorted = values.clone();
1373                sorted.sort_unstable();
1374
1375                let n = sorted.len();
1376                let mid = n / 2;
1377                let expected_median = sorted[mid];
1378
1379                prop_assert_eq!(
1380                    summary.median, expected_median,
1381                    "median for odd-length list should be exactly the middle element at index {}",
1382                    mid
1383                );
1384            }
1385
1386            /// **Feature: comprehensive-test-coverage, Property 3: Median Algorithm Correctness**
1387            /// **Validates: Requirements 8.5**
1388            ///
1389            /// Property 3: Median Algorithm Correctness (Extreme Values)
1390            ///
1391            /// Test with u64::MAX values to ensure no overflow occurs.
1392            /// When both middle values are u64::MAX, the median should be u64::MAX.
1393            #[test]
1394            fn prop_median_u64_max_values(
1395                count in 2usize..20,
1396            ) {
1397                // Create a vector of all u64::MAX values
1398                let values: Vec<u64> = vec![u64::MAX; count];
1399
1400                let summary = summarize_u64(&values).expect("non-empty vec should succeed");
1401
1402                // All values are u64::MAX, so median should be u64::MAX
1403                prop_assert_eq!(
1404                    summary.median, u64::MAX,
1405                    "median of all u64::MAX values should be u64::MAX"
1406                );
1407                prop_assert_eq!(
1408                    summary.min, u64::MAX,
1409                    "min of all u64::MAX values should be u64::MAX"
1410                );
1411                prop_assert_eq!(
1412                    summary.max, u64::MAX,
1413                    "max of all u64::MAX values should be u64::MAX"
1414                );
1415            }
1416
1417            /// **Feature: comprehensive-test-coverage, Property 3: Median Algorithm Correctness**
1418            /// **Validates: Requirements 8.5**
1419            ///
1420            /// Property 3: Median Algorithm Correctness (Mixed Large Values)
1421            ///
1422            /// Test with a mix of u64::MAX and u64::MAX-1 to verify correct averaging
1423            /// at the overflow boundary.
1424            #[test]
1425            fn prop_median_u64_adjacent_max_values(
1426                max_count in 1usize..10,
1427                max_minus_one_count in 1usize..10,
1428            ) {
1429                // Create a vector with u64::MAX and u64::MAX-1 values
1430                let mut values: Vec<u64> = Vec::new();
1431                for _ in 0..max_count {
1432                    values.push(u64::MAX);
1433                }
1434                for _ in 0..max_minus_one_count {
1435                    values.push(u64::MAX - 1);
1436                }
1437
1438                let summary = summarize_u64(&values).expect("non-empty vec should succeed");
1439
1440                // Sort to compute expected median
1441                let mut sorted = values.clone();
1442                sorted.sort_unstable();
1443
1444                let expected_median = reference_median_u64(&sorted);
1445
1446                prop_assert_eq!(
1447                    summary.median, expected_median,
1448                    "median should match reference for mix of u64::MAX and u64::MAX-1"
1449                );
1450
1451                // Verify ordering invariant
1452                prop_assert!(
1453                    summary.min <= summary.median && summary.median <= summary.max,
1454                    "ordering invariant should hold: {} <= {} <= {}",
1455                    summary.min, summary.median, summary.max
1456                );
1457            }
1458        }
1459
1460        // =====================================================================
1461        // Property 2: Warmup Sample Exclusion
1462        // =====================================================================
1463
1464        /// Helper to generate a non-warmup sample with arbitrary wall_ms
1465        fn non_warmup_sample(wall_ms: u64) -> Sample {
1466            Sample {
1467                wall_ms,
1468                exit_code: 0,
1469                warmup: false,
1470                timed_out: false,
1471                cpu_ms: None,
1472                page_faults: None,
1473                ctx_switches: None,
1474                max_rss_kb: None,
1475                io_read_bytes: None,
1476                io_write_bytes: None,
1477                network_packets: None,
1478                energy_uj: None,
1479                binary_bytes: None,
1480                stdout: None,
1481                stderr: None,
1482            }
1483        }
1484
1485        /// Helper to generate a warmup sample with arbitrary wall_ms
1486        fn warmup_sample(wall_ms: u64) -> Sample {
1487            Sample {
1488                wall_ms,
1489                exit_code: 0,
1490                warmup: true,
1491                timed_out: false,
1492                cpu_ms: None,
1493                page_faults: None,
1494                ctx_switches: None,
1495                max_rss_kb: None,
1496                io_read_bytes: None,
1497                io_write_bytes: None,
1498                network_packets: None,
1499                energy_uj: None,
1500                binary_bytes: None,
1501                stdout: None,
1502                stderr: None,
1503            }
1504        }
1505
1506        proptest! {
1507            /// **Validates: Requirements 3.4**
1508            ///
1509            /// Property 2: Warmup Sample Exclusion
1510            ///
1511            /// For any list of samples containing both warmup and non-warmup samples,
1512            /// the computed statistics SHALL only reflect non-warmup samples.
1513            /// Adding or modifying warmup samples SHALL NOT change the computed statistics.
1514            #[test]
1515            fn prop_warmup_samples_excluded_from_stats(
1516                // Generate 1-20 non-warmup sample wall_ms values
1517                non_warmup_wall_ms in prop::collection::vec(1u64..10000, 1..20),
1518                // Generate 0-10 warmup sample wall_ms values (can be any values)
1519                warmup_wall_ms in prop::collection::vec(any::<u64>(), 0..10),
1520            ) {
1521                // Create non-warmup samples
1522                let non_warmup_samples: Vec<Sample> = non_warmup_wall_ms
1523                    .iter()
1524                    .map(|&ms| non_warmup_sample(ms))
1525                    .collect();
1526
1527                // Create warmup samples
1528                let warmup_samples: Vec<Sample> = warmup_wall_ms
1529                    .iter()
1530                    .map(|&ms| warmup_sample(ms))
1531                    .collect();
1532
1533                // Compute stats with only non-warmup samples
1534                let stats_without_warmup = compute_stats(&non_warmup_samples, None)
1535                    .expect("non-empty non-warmup samples should succeed");
1536
1537                // Combine non-warmup and warmup samples
1538                let mut combined_samples = non_warmup_samples.clone();
1539                combined_samples.extend(warmup_samples.clone());
1540
1541                // Compute stats with combined samples (warmup + non-warmup)
1542                let stats_with_warmup = compute_stats(&combined_samples, None)
1543                    .expect("combined samples with non-warmup should succeed");
1544
1545                // Property: Statistics should be identical regardless of warmup samples
1546                prop_assert_eq!(
1547                    stats_without_warmup.wall_ms, stats_with_warmup.wall_ms,
1548                    "wall_ms stats should be identical with or without warmup samples"
1549                );
1550                prop_assert_eq!(
1551                    stats_without_warmup.max_rss_kb, stats_with_warmup.max_rss_kb,
1552                    "max_rss_kb stats should be identical with or without warmup samples"
1553                );
1554                prop_assert_eq!(
1555                    stats_without_warmup.throughput_per_s, stats_with_warmup.throughput_per_s,
1556                    "throughput_per_s stats should be identical with or without warmup samples"
1557                );
1558            }
1559
1560            /// **Validates: Requirements 3.4**
1561            ///
1562            /// Property 2: Warmup Sample Exclusion (modification variant)
1563            ///
1564            /// Modifying warmup sample values SHALL NOT change the computed statistics.
1565            #[test]
1566            fn prop_modifying_warmup_samples_does_not_affect_stats(
1567                // Generate 1-10 non-warmup sample wall_ms values
1568                non_warmup_wall_ms in prop::collection::vec(1u64..10000, 1..10),
1569                // Generate 1-5 warmup sample wall_ms values (original)
1570                warmup_wall_ms_original in prop::collection::vec(any::<u64>(), 1..5),
1571                // Generate 1-5 warmup sample wall_ms values (modified - different values)
1572                warmup_wall_ms_modified in prop::collection::vec(any::<u64>(), 1..5),
1573            ) {
1574                // Create non-warmup samples
1575                let non_warmup_samples: Vec<Sample> = non_warmup_wall_ms
1576                    .iter()
1577                    .map(|&ms| non_warmup_sample(ms))
1578                    .collect();
1579
1580                // Create original warmup samples
1581                let warmup_samples_original: Vec<Sample> = warmup_wall_ms_original
1582                    .iter()
1583                    .map(|&ms| warmup_sample(ms))
1584                    .collect();
1585
1586                // Create modified warmup samples (different values)
1587                let warmup_samples_modified: Vec<Sample> = warmup_wall_ms_modified
1588                    .iter()
1589                    .map(|&ms| warmup_sample(ms))
1590                    .collect();
1591
1592                // Combine with original warmup samples
1593                let mut samples_with_original_warmup = non_warmup_samples.clone();
1594                samples_with_original_warmup.extend(warmup_samples_original);
1595
1596                // Combine with modified warmup samples
1597                let mut samples_with_modified_warmup = non_warmup_samples.clone();
1598                samples_with_modified_warmup.extend(warmup_samples_modified);
1599
1600                // Compute stats with original warmup samples
1601                let stats_original = compute_stats(&samples_with_original_warmup, None)
1602                    .expect("samples with original warmup should succeed");
1603
1604                // Compute stats with modified warmup samples
1605                let stats_modified = compute_stats(&samples_with_modified_warmup, None)
1606                    .expect("samples with modified warmup should succeed");
1607
1608                // Property: Statistics should be identical regardless of warmup sample values
1609                prop_assert_eq!(
1610                    stats_original.wall_ms, stats_modified.wall_ms,
1611                    "wall_ms stats should be identical regardless of warmup sample values"
1612                );
1613            }
1614
1615            /// **Validates: Requirements 3.4**
1616            ///
1617            /// Property 2: Warmup Sample Exclusion (only warmup samples error)
1618            ///
1619            /// If all samples are warmup samples, compute_stats SHALL return an error.
1620            #[test]
1621            fn prop_only_warmup_samples_returns_error(
1622                warmup_wall_ms in prop::collection::vec(any::<u64>(), 1..10),
1623            ) {
1624                // Create only warmup samples
1625                let warmup_only_samples: Vec<Sample> = warmup_wall_ms
1626                    .iter()
1627                    .map(|&ms| warmup_sample(ms))
1628                    .collect();
1629
1630                // Compute stats should fail with NoSamples error
1631                let result = compute_stats(&warmup_only_samples, None);
1632
1633                prop_assert!(
1634                    result.is_err(),
1635                    "compute_stats should return error when all samples are warmup"
1636                );
1637
1638                // Verify it's specifically a NoSamples error
1639                match result {
1640                    Err(DomainError::NoSamples) => { /* expected */ }
1641                    Err(other) => prop_assert!(false, "expected NoSamples error, got: {:?}", other),
1642                    Ok(_) => prop_assert!(false, "expected error, got Ok"),
1643                }
1644            }
1645        }
1646
1647        // =====================================================================
1648        // Property 4: Metric Status Determination
1649        // =====================================================================
1650
1651        /// Helper to compute expected regression value based on direction.
1652        ///
1653        /// For Direction::Lower: regression = max(0, (current - baseline) / baseline)
1654        /// For Direction::Higher: regression = max(0, (baseline - current) / baseline)
1655        fn compute_regression(baseline: f64, current: f64, direction: Direction) -> f64 {
1656            let pct = (current - baseline) / baseline;
1657            match direction {
1658                Direction::Lower => pct.max(0.0),
1659                Direction::Higher => (-pct).max(0.0),
1660            }
1661        }
1662
1663        /// Helper to compute expected status based on regression and thresholds.
1664        fn expected_status(regression: f64, threshold: f64, warn_threshold: f64) -> MetricStatus {
1665            if regression > threshold {
1666                MetricStatus::Fail
1667            } else if regression >= warn_threshold {
1668                MetricStatus::Warn
1669            } else {
1670                MetricStatus::Pass
1671            }
1672        }
1673
1674        /// Strategy to generate valid threshold pairs where warn_threshold <= threshold.
1675        fn threshold_pair_strategy() -> impl Strategy<Value = (f64, f64)> {
1676            // Generate threshold in range (0.0, 1.0] and warn_factor in range [0.0, 1.0]
1677            (0.01f64..1.0, 0.0f64..=1.0).prop_map(|(threshold, warn_factor)| {
1678                let warn_threshold = threshold * warn_factor;
1679                (threshold, warn_threshold)
1680            })
1681        }
1682
1683        /// Strategy to generate a valid baseline value (must be > 0).
1684        fn baseline_strategy() -> impl Strategy<Value = f64> {
1685            // Use positive values, avoiding very small values that could cause precision issues
1686            1.0f64..10000.0
1687        }
1688
1689        /// Strategy to generate a current value (can be any positive value).
1690        fn current_strategy() -> impl Strategy<Value = f64> {
1691            // Use positive values
1692            0.1f64..20000.0
1693        }
1694
1695        proptest! {
1696            /// **Validates: Requirements 5.1, 5.2, 5.3**
1697            ///
1698            /// Property 4: Metric Status Determination
1699            ///
1700            /// For any baseline value, current value, threshold, warn_threshold, and direction:
1701            /// - If regression > threshold, status SHALL be Fail
1702            /// - If warn_threshold <= regression <= threshold, status SHALL be Warn
1703            /// - If regression < warn_threshold, status SHALL be Pass
1704            #[test]
1705            fn prop_metric_status_determination_lower_is_better(
1706                baseline in baseline_strategy(),
1707                current in current_strategy(),
1708                (threshold, warn_threshold) in threshold_pair_strategy(),
1709            ) {
1710                let direction = Direction::Lower;
1711
1712                // Create stats for baseline and current
1713                let baseline_stats = Stats {
1714                    wall_ms: U64Summary::new(baseline as u64, baseline as u64, baseline as u64),
1715                    cpu_ms: None,
1716                    page_faults: None,
1717                    ctx_switches: None,
1718                    max_rss_kb: None,
1719                    io_read_bytes: None,
1720                    io_write_bytes: None,
1721                    network_packets: None,
1722                    energy_uj: None,
1723                    binary_bytes: None,
1724                    throughput_per_s: None,
1725                };
1726
1727                let current_stats = Stats {
1728                    wall_ms: U64Summary::new(current as u64, current as u64, current as u64),
1729                    cpu_ms: None,
1730                    page_faults: None,
1731                    ctx_switches: None,
1732                    max_rss_kb: None,
1733                    io_read_bytes: None,
1734                    io_write_bytes: None,
1735                    network_packets: None,
1736                    energy_uj: None,
1737                    binary_bytes: None,
1738                    throughput_per_s: None,
1739                };
1740
1741                // Create budget with the generated thresholds
1742                let mut budgets = BTreeMap::new();
1743                budgets.insert(
1744                    Metric::WallMs,
1745                    Budget {
1746                        noise_threshold: None,
1747                        noise_policy: perfgate_types::NoisePolicy::Ignore,
1748                        threshold,
1749                        warn_threshold,
1750                        direction,
1751                    },
1752                );
1753
1754                // Compare stats
1755                let comparison = compare_stats(&baseline_stats, &current_stats, &budgets)
1756                    .expect("compare_stats should succeed with valid inputs");
1757
1758                // Get the delta for WallMs
1759                let delta = comparison.deltas.get(&Metric::WallMs)
1760                    .expect("WallMs delta should exist");
1761
1762                // Verify the status matches expected
1763                // Note: We use the actual median values (as u64) for comparison,
1764                // so we need to recompute expected based on actual values used
1765                let actual_baseline = baseline_stats.wall_ms.median as f64;
1766                let actual_current = current_stats.wall_ms.median as f64;
1767                let actual_regression = compute_regression(actual_baseline, actual_current, direction);
1768                let actual_expected = expected_status(actual_regression, threshold, warn_threshold);
1769
1770                prop_assert_eq!(
1771                    delta.status, actual_expected,
1772                    "Status mismatch for Direction::Lower: baseline={}, current={}, regression={}, threshold={}, warn_threshold={}",
1773                    actual_baseline, actual_current, actual_regression, threshold, warn_threshold
1774                );
1775            }
1776
1777            /// **Validates: Requirements 5.1, 5.2, 5.3**
1778            ///
1779            /// Property 4: Metric Status Determination (Higher is Better)
1780            ///
1781            /// For Direction::Higher (e.g., throughput), regression is computed as
1782            /// max(0, (baseline - current) / baseline), meaning a decrease in value
1783            /// is a regression.
1784            #[test]
1785            fn prop_metric_status_determination_higher_is_better(
1786                baseline in baseline_strategy(),
1787                current in current_strategy(),
1788                (threshold, warn_threshold) in threshold_pair_strategy(),
1789            ) {
1790                let direction = Direction::Higher;
1791
1792                // Create stats for baseline and current using throughput
1793                let baseline_stats = Stats {
1794                    wall_ms: U64Summary::new(1000, 1000, 1000),
1795                    cpu_ms: None,
1796                    page_faults: None,
1797                    ctx_switches: None,
1798                    max_rss_kb: None,
1799                    io_read_bytes: None,
1800                    io_write_bytes: None,
1801                    network_packets: None,
1802                    energy_uj: None,
1803                    binary_bytes: None,
1804                    throughput_per_s: Some(F64Summary::new(baseline, baseline, baseline)),                };
1805
1806                let current_stats = Stats {
1807                    wall_ms: U64Summary::new(1000, 1000, 1000),
1808                    cpu_ms: None,
1809                    page_faults: None,
1810                    ctx_switches: None,
1811                    max_rss_kb: None,
1812                    io_read_bytes: None,
1813                    io_write_bytes: None,
1814                    network_packets: None,
1815                    energy_uj: None,
1816                    binary_bytes: None,
1817                    throughput_per_s: Some(F64Summary::new(current, current, current)),                };
1818
1819                // Create budget with the generated thresholds
1820                let mut budgets = BTreeMap::new();
1821                budgets.insert(
1822                    Metric::ThroughputPerS,
1823                    Budget {
1824                        noise_threshold: None,
1825                        noise_policy: perfgate_types::NoisePolicy::Ignore,
1826                        threshold,
1827                        warn_threshold,
1828                        direction,
1829                    },
1830                );
1831
1832                // Compare stats
1833                let comparison = compare_stats(&baseline_stats, &current_stats, &budgets)
1834                    .expect("compare_stats should succeed with valid inputs");
1835
1836                // Get the delta for ThroughputPerS
1837                let delta = comparison.deltas.get(&Metric::ThroughputPerS)
1838                    .expect("ThroughputPerS delta should exist");
1839
1840                // Compute expected regression and status
1841                let regression = compute_regression(baseline, current, direction);
1842                let expected = expected_status(regression, threshold, warn_threshold);
1843
1844                prop_assert_eq!(
1845                    delta.status, expected,
1846                    "Status mismatch for Direction::Higher: baseline={}, current={}, regression={}, threshold={}, warn_threshold={}",
1847                    baseline, current, regression, threshold, warn_threshold
1848                );
1849            }
1850
1851            /// **Validates: Requirements 5.1, 5.2, 5.3**
1852            ///
1853            /// Property 4: Metric Status Determination (Regression is non-negative)
1854            ///
1855            /// The regression value SHALL always be >= 0, regardless of whether
1856            /// performance improved or degraded.
1857            #[test]
1858            fn prop_regression_is_non_negative(
1859                baseline in baseline_strategy(),
1860                current in current_strategy(),
1861                (threshold, warn_threshold) in threshold_pair_strategy(),
1862                direction_lower in any::<bool>(),
1863            ) {
1864                let direction = if direction_lower { Direction::Lower } else { Direction::Higher };
1865
1866                // Create appropriate stats based on direction
1867                let (baseline_stats, current_stats, metric, budgets) = if direction_lower {
1868                    let bs = Stats {
1869                        wall_ms: U64Summary::new(baseline as u64, baseline as u64, baseline as u64),
1870                        cpu_ms: None,
1871                        page_faults: None,
1872                        ctx_switches: None,
1873                        max_rss_kb: None,
1874                        io_read_bytes: None,
1875                        io_write_bytes: None,
1876                        network_packets: None,
1877                        energy_uj: None,
1878                        binary_bytes: None,
1879                        throughput_per_s: None,
1880                        };                    let cs = Stats {
1881                        wall_ms: U64Summary::new(current as u64, current as u64, current as u64),
1882                        cpu_ms: None,
1883                        page_faults: None,
1884                        ctx_switches: None,
1885                        max_rss_kb: None,
1886                        io_read_bytes: None,
1887                        io_write_bytes: None,
1888                        network_packets: None,
1889                        energy_uj: None,
1890                        binary_bytes: None,
1891                        throughput_per_s: None,
1892                        };                    let mut b = BTreeMap::new();
1893                    b.insert(Metric::WallMs, Budget {
1894                        noise_threshold: None,
1895                        noise_policy: perfgate_types::NoisePolicy::Ignore,  threshold, warn_threshold, direction });
1896                    (bs, cs, Metric::WallMs, b)
1897                } else {
1898                    let bs = Stats {
1899                        wall_ms: U64Summary::new(1000, 1000, 1000),
1900                        cpu_ms: None,
1901                        page_faults: None,
1902                        ctx_switches: None,
1903                        max_rss_kb: None,
1904                        io_read_bytes: None,
1905                        io_write_bytes: None,
1906                        network_packets: None,
1907                        energy_uj: None,
1908                        binary_bytes: None,
1909                        throughput_per_s: Some(F64Summary::new(baseline, baseline, baseline)),                    };
1910                    let cs = Stats {
1911                        wall_ms: U64Summary::new(1000, 1000, 1000),
1912                        cpu_ms: None,
1913                        page_faults: None,
1914                        ctx_switches: None,
1915                        max_rss_kb: None,
1916                        io_read_bytes: None,
1917                        io_write_bytes: None,
1918                        network_packets: None,
1919                        energy_uj: None,
1920                        binary_bytes: None,
1921                        throughput_per_s: Some(F64Summary::new(current, current, current)),                    };
1922                    let mut b = BTreeMap::new();
1923                    b.insert(Metric::ThroughputPerS, Budget {
1924                        noise_threshold: None,
1925                        noise_policy: perfgate_types::NoisePolicy::Ignore,  threshold, warn_threshold, direction });
1926                    (bs, cs, Metric::ThroughputPerS, b)
1927                };
1928
1929                // Compare stats
1930                let comparison = compare_stats(&baseline_stats, &current_stats, &budgets)
1931                    .expect("compare_stats should succeed with valid inputs");
1932
1933                // Get the delta
1934                let delta = comparison.deltas.get(&metric)
1935                    .expect("delta should exist");
1936
1937                // Property: regression is always >= 0
1938                prop_assert!(
1939                    delta.regression >= 0.0,
1940                    "Regression should be non-negative, got: {} for baseline={}, current={}, direction={:?}",
1941                    delta.regression, baseline, current, direction
1942                );
1943            }
1944
1945            /// **Validates: Requirements 5.1, 5.2, 5.3**
1946            ///
1947            /// Property 4: Metric Status Determination (Status boundaries)
1948            ///
1949            /// Verify the exact boundary conditions:
1950            /// - regression == threshold should be Warn (not Fail)
1951            /// - regression == warn_threshold should be Warn (not Pass)
1952            #[test]
1953            fn prop_status_boundary_conditions(
1954                baseline in 100.0f64..1000.0,
1955                (threshold, warn_threshold) in threshold_pair_strategy(),
1956            ) {
1957                let baseline_stats = Stats {
1958                    wall_ms: U64Summary::new(1000, 1000, 1000),
1959                    cpu_ms: None,
1960                    page_faults: None,
1961                    ctx_switches: None,
1962                    max_rss_kb: None,
1963                    io_read_bytes: None,
1964                    io_write_bytes: None,
1965                    network_packets: None,
1966                    energy_uj: None,
1967                    binary_bytes: None,
1968                    throughput_per_s: Some(F64Summary::new(baseline, baseline, baseline)),                };
1969
1970                // For Direction::Higher, regression = max(0, (baseline - current) / baseline)
1971                // To get regression = threshold, we need: (baseline - current) / baseline = threshold
1972                // So: current = baseline * (1 - threshold)
1973                let current_at_threshold_higher = baseline * (1.0 - threshold);
1974
1975                // Only test if current would be positive
1976                if current_at_threshold_higher > 0.0 {
1977                    let current_stats = Stats {
1978                        wall_ms: U64Summary::new(1000, 1000, 1000),
1979                        cpu_ms: None,
1980                        page_faults: None,
1981                        ctx_switches: None,
1982                        max_rss_kb: None,
1983                        io_read_bytes: None,
1984                        io_write_bytes: None,
1985                        network_packets: None,
1986                        energy_uj: None,
1987                        binary_bytes: None,
1988                        throughput_per_s: Some(F64Summary::new(current_at_threshold_higher, current_at_threshold_higher, current_at_threshold_higher)),
1989                    };
1990
1991                    let mut budgets = BTreeMap::new();
1992                    budgets.insert(
1993                        Metric::ThroughputPerS,
1994                        Budget {
1995                        noise_threshold: None,
1996                        noise_policy: perfgate_types::NoisePolicy::Ignore,
1997                            threshold,
1998                            warn_threshold,
1999                            direction: Direction::Higher,
2000                        },
2001                    );
2002
2003                    let comparison = compare_stats(&baseline_stats, &current_stats, &budgets)
2004                        .expect("compare_stats should succeed");
2005
2006                    let delta = comparison.deltas.get(&Metric::ThroughputPerS)
2007                        .expect("delta should exist");
2008
2009                    // At exactly threshold, status should be Warn (not Fail)
2010                    // because the condition is regression > threshold for Fail
2011                    prop_assert!(
2012                        delta.status != MetricStatus::Fail || delta.regression > threshold,
2013                        "At regression={} (threshold={}), status should not be Fail unless regression > threshold",
2014                        delta.regression, threshold
2015                    );
2016                }
2017            }
2018        }
2019
2020        // =====================================================================
2021        // Property 5: Verdict Aggregation
2022        // =====================================================================
2023
2024        /// Strategy to generate a random MetricStatus.
2025        fn metric_status_strategy() -> impl Strategy<Value = MetricStatus> {
2026            prop_oneof![
2027                Just(MetricStatus::Pass),
2028                Just(MetricStatus::Warn),
2029                Just(MetricStatus::Fail),
2030                Just(MetricStatus::Skip),
2031            ]
2032        }
2033
2034        /// Compute the expected verdict status from a set of metric statuses.
2035        ///
2036        /// - If any metric has Fail status, verdict SHALL be Fail
2037        /// - Else if any metric has Warn status, verdict SHALL be Warn
2038        /// - Else verdict SHALL be Pass
2039        fn expected_verdict_status(statuses: &[MetricStatus]) -> VerdictStatus {
2040            if statuses.contains(&MetricStatus::Fail) {
2041                VerdictStatus::Fail
2042            } else if statuses.contains(&MetricStatus::Warn) {
2043                VerdictStatus::Warn
2044            } else if statuses.contains(&MetricStatus::Pass) {
2045                VerdictStatus::Pass
2046            } else {
2047                VerdictStatus::Skip
2048            }
2049        }
2050
2051        /// Helper to create Stats with a specific wall_ms median value and optional CV.
2052        fn make_stats_with_wall_ms_and_cv(median: u64, cv: Option<f64>) -> Stats {
2053            let (mean, stddev) = if let Some(cv_val) = cv {
2054                let mean = median as f64;
2055                let stddev = mean * cv_val;
2056                (Some(mean), Some(stddev))
2057            } else {
2058                (None, None)
2059            };
2060
2061            Stats {
2062                wall_ms: U64Summary {
2063                    median,
2064                    min: median,
2065                    max: median,
2066                    mean,
2067                    stddev,
2068                },
2069                cpu_ms: None,
2070                page_faults: None,
2071                ctx_switches: None,
2072                max_rss_kb: Some(U64Summary {
2073                    median,
2074                    min: median,
2075                    max: median,
2076                    mean,
2077                    stddev,
2078                }),
2079                io_read_bytes: None,
2080                io_write_bytes: None,
2081                network_packets: None,
2082                energy_uj: None,
2083                binary_bytes: None,
2084                throughput_per_s: Some(F64Summary {
2085                    median: median as f64,
2086                    min: median as f64,
2087                    max: median as f64,
2088                    mean,
2089                    stddev,
2090                }),
2091            }
2092        }
2093
2094        fn make_stats_with_wall_ms(median: u64) -> Stats {
2095            make_stats_with_wall_ms_and_cv(median, None)
2096        }
2097
2098        /// Helper to compute the current value needed to achieve a specific status.
2099        ///
2100        /// Given a baseline, threshold, warn_threshold, and desired status,
2101        /// returns a current value that will produce that status.
2102        fn current_for_status(
2103            baseline: u64,
2104            threshold: f64,
2105            warn_threshold: f64,
2106            status: MetricStatus,
2107        ) -> u64 {
2108            let baseline_f = baseline as f64;
2109            match status {
2110                // For Pass: regression < warn_threshold
2111                // regression = (current - baseline) / baseline
2112                // So current = baseline * (1 + regression)
2113                // Use regression = 0 (no change) for Pass
2114                MetricStatus::Pass => baseline,
2115
2116                // For Warn: warn_threshold <= regression <= threshold
2117                // Use midpoint between warn_threshold and threshold
2118                MetricStatus::Warn => {
2119                    let regression = (warn_threshold + threshold) / 2.0;
2120                    (baseline_f * (1.0 + regression)).ceil() as u64
2121                }
2122
2123                // For Fail: regression > threshold
2124                // Use threshold + 0.1 to ensure we exceed it
2125                MetricStatus::Fail => {
2126                    let regression = threshold + 0.1;
2127                    (baseline_f * (1.0 + regression)).ceil() as u64
2128                }
2129
2130                // For Skip: return baseline (same as Pass for this helper)
2131                MetricStatus::Skip => baseline,
2132            }
2133        }
2134
2135        proptest! {
2136            /// **Validates: Requirements 5.4, 5.5, 5.6**
2137            ///
2138            /// Property 5: Verdict Aggregation
2139            ///
2140            /// For any set of metric statuses:
2141            /// - If any metric has Fail status, verdict SHALL be Fail
2142            /// - Else if any metric has Warn status, verdict SHALL be Warn
2143            /// - Else verdict SHALL be Pass
2144            #[test]
2145            fn prop_verdict_aggregation_single_metric(
2146                status in metric_status_strategy(),
2147            ) {
2148                // Use fixed baseline and thresholds
2149                let baseline = 1000u64;
2150                let threshold = 0.20;
2151                let warn_threshold = 0.10;
2152
2153                let baseline_stats = make_stats_with_wall_ms(baseline);
2154                let current_value = current_for_status(baseline, threshold, warn_threshold, status);
2155                let current_cv = if status == MetricStatus::Skip { Some(0.5) } else { None };
2156                let current_stats = make_stats_with_wall_ms_and_cv(current_value, current_cv);
2157
2158                let mut budget = Budget {
2159                    noise_threshold: None,
2160                    noise_policy: perfgate_types::NoisePolicy::Ignore,
2161                    threshold,
2162                    warn_threshold,
2163                    direction: Direction::Lower,
2164                };
2165
2166                if status == MetricStatus::Skip {
2167                    budget.noise_threshold = Some(0.1);
2168                    budget.noise_policy = perfgate_types::NoisePolicy::Skip;
2169                }
2170
2171                let mut budgets = BTreeMap::new();
2172                budgets.insert(Metric::WallMs, budget);
2173
2174                let comparison = compare_stats(&baseline_stats, &current_stats, &budgets)
2175                    .expect("compare_stats should succeed");
2176
2177                // Verify the verdict matches the expected aggregation
2178                let expected = expected_verdict_status(&[status]);
2179                prop_assert_eq!(
2180                    comparison.verdict.status, expected,
2181                    "Verdict should be {:?} when single metric status is {:?}",
2182                    expected, status
2183                );
2184            }
2185
2186            /// **Validates: Requirements 5.4, 5.5, 5.6**
2187            ///
2188            /// Property 5: Verdict Aggregation (Multiple Metrics)
2189            ///
2190            /// Test with multiple metrics to verify aggregation across all metrics.
2191            #[test]
2192            fn prop_verdict_aggregation_multiple_metrics(
2193                wall_ms_status in metric_status_strategy(),
2194                max_rss_status in metric_status_strategy(),
2195            ) {
2196                // Use fixed baseline and thresholds
2197                let baseline = 1000u64;
2198                let threshold = 0.20;
2199                let warn_threshold = 0.10;
2200
2201                // Create baseline stats with both wall_ms and max_rss_kb
2202                let baseline_stats = Stats {
2203                    wall_ms: U64Summary::new(baseline, baseline, baseline),
2204                    cpu_ms: None,
2205                    page_faults: None,
2206                    ctx_switches: None,
2207                    max_rss_kb: Some(U64Summary::new(baseline, baseline, baseline)),
2208                    io_read_bytes: None,
2209                    io_write_bytes: None,
2210                    network_packets: None,
2211                    energy_uj: None,
2212                    binary_bytes: None,
2213                    throughput_per_s: None,                };
2214
2215                // Compute current values to achieve desired statuses
2216                let wall_ms_current = current_for_status(baseline, threshold, warn_threshold, wall_ms_status);
2217                let max_rss_current = current_for_status(baseline, threshold, warn_threshold, max_rss_status);
2218
2219                let wall_cv = if wall_ms_status == MetricStatus::Skip { Some(0.5) } else { None };
2220                let rss_cv = if max_rss_status == MetricStatus::Skip { Some(0.5) } else { None };
2221
2222                let current_stats = Stats {
2223                    wall_ms: U64Summary {
2224                        median: wall_ms_current,
2225                        min: wall_ms_current,
2226                        max: wall_ms_current,
2227                        mean: wall_cv.map(|_cv| wall_ms_current as f64),
2228                        stddev: wall_cv.map(|cv| (wall_ms_current as f64) * cv),
2229                    },
2230                    cpu_ms: None,
2231                    page_faults: None,
2232                    ctx_switches: None,
2233                    max_rss_kb: Some(U64Summary {
2234                        median: max_rss_current,
2235                        min: max_rss_current,
2236                        max: max_rss_current,
2237                        mean: rss_cv.map(|_cv| max_rss_current as f64),
2238                        stddev: rss_cv.map(|cv| (max_rss_current as f64) * cv),
2239                    }),
2240                    io_read_bytes: None,
2241                    io_write_bytes: None,
2242                    network_packets: None,
2243                    energy_uj: None,
2244                    binary_bytes: None,
2245                    throughput_per_s: None,
2246                };
2247
2248                let mut wall_budget = Budget {
2249                    noise_threshold: None,
2250                    noise_policy: perfgate_types::NoisePolicy::Ignore,
2251                    threshold,
2252                    warn_threshold,
2253                    direction: Direction::Lower,
2254                };
2255                if wall_ms_status == MetricStatus::Skip {
2256                    wall_budget.noise_threshold = Some(0.1);
2257                    wall_budget.noise_policy = perfgate_types::NoisePolicy::Skip;
2258                }
2259
2260                let mut rss_budget = Budget {
2261                    noise_threshold: None,
2262                    noise_policy: perfgate_types::NoisePolicy::Ignore,
2263                    threshold,
2264                    warn_threshold,
2265                    direction: Direction::Lower,
2266                };
2267                if max_rss_status == MetricStatus::Skip {
2268                    rss_budget.noise_threshold = Some(0.1);
2269                    rss_budget.noise_policy = perfgate_types::NoisePolicy::Skip;
2270                }
2271
2272                let mut budgets = BTreeMap::new();
2273                budgets.insert(Metric::WallMs, wall_budget);
2274                budgets.insert(Metric::MaxRssKb, rss_budget);
2275
2276                let comparison = compare_stats(&baseline_stats, &current_stats, &budgets)
2277                    .expect("compare_stats should succeed");
2278
2279                // Verify the verdict matches the expected aggregation
2280                let expected = expected_verdict_status(&[wall_ms_status, max_rss_status]);
2281                prop_assert_eq!(
2282                    comparison.verdict.status, expected,
2283                    "Verdict should be {:?} when metric statuses are [{:?}, {:?}]",
2284                    expected, wall_ms_status, max_rss_status
2285                );
2286            }
2287
2288            /// **Validates: Requirements 5.4, 5.5, 5.6**
2289            ///
2290            /// Property 5: Verdict Aggregation (Three Metrics)
2291            ///
2292            /// Test with all three metric types to verify comprehensive aggregation.
2293            #[test]
2294            fn prop_verdict_aggregation_three_metrics(
2295                wall_ms_status in metric_status_strategy(),
2296                max_rss_status in metric_status_strategy(),
2297                throughput_status in metric_status_strategy(),
2298            ) {
2299                // Use fixed baseline and thresholds
2300                let baseline = 1000u64;
2301                let baseline_throughput = 100.0f64;
2302                let threshold = 0.20;
2303                let warn_threshold = 0.10;
2304
2305                // Create baseline stats with all three metrics
2306                let baseline_stats = Stats {
2307                    wall_ms: U64Summary::new(baseline, baseline, baseline),
2308                    cpu_ms: None,
2309                    page_faults: None,
2310                    ctx_switches: None,
2311                    max_rss_kb: Some(U64Summary::new(baseline, baseline, baseline)),
2312                    io_read_bytes: None,
2313                    io_write_bytes: None,
2314                    network_packets: None,
2315                    energy_uj: None,
2316                    binary_bytes: None,
2317                    throughput_per_s: Some(F64Summary::new(baseline_throughput, baseline_throughput, baseline_throughput)),                };
2318
2319                // Compute current values to achieve desired statuses
2320                let wall_ms_current = current_for_status(baseline, threshold, warn_threshold, wall_ms_status);
2321                let max_rss_current = current_for_status(baseline, threshold, warn_threshold, max_rss_status);
2322
2323                let wall_cv = if wall_ms_status == MetricStatus::Skip { Some(0.5) } else { None };
2324                let rss_cv = if max_rss_status == MetricStatus::Skip { Some(0.5) } else { None };
2325                let throughput_cv = if throughput_status == MetricStatus::Skip { Some(0.5) } else { None };
2326
2327                // For throughput (higher is better), we need to invert the logic
2328                // Pass: current >= baseline (no regression)
2329                // Warn: current = baseline * (1 - midpoint of warn/threshold)
2330                // Fail: current = baseline * (1 - (threshold + 0.1))
2331                let throughput_current = match throughput_status {
2332                    MetricStatus::Pass => baseline_throughput,
2333                    MetricStatus::Warn => {
2334                        let regression = (warn_threshold + threshold) / 2.0;
2335                        baseline_throughput * (1.0 - regression)
2336                    }
2337                    MetricStatus::Fail => {
2338                        let regression = threshold + 0.1;
2339                        baseline_throughput * (1.0 - regression)
2340                    }
2341                    MetricStatus::Skip => baseline_throughput,
2342                };
2343
2344                let current_stats = Stats {
2345                    wall_ms: U64Summary {
2346                        median: wall_ms_current,
2347                        min: wall_ms_current,
2348                        max: wall_ms_current,
2349                        mean: wall_cv.map(|_cv| wall_ms_current as f64),
2350                        stddev: wall_cv.map(|cv| (wall_ms_current as f64) * cv),
2351                    },
2352                    cpu_ms: None,
2353                    page_faults: None,
2354                    ctx_switches: None,
2355                    max_rss_kb: Some(U64Summary {
2356                        median: max_rss_current,
2357                        min: max_rss_current,
2358                        max: max_rss_current,
2359                        mean: rss_cv.map(|_cv| max_rss_current as f64),
2360                        stddev: rss_cv.map(|cv| (max_rss_current as f64) * cv),
2361                    }),
2362                    io_read_bytes: None,
2363                    io_write_bytes: None,
2364                    network_packets: None,
2365                    energy_uj: None,
2366                    binary_bytes: None,
2367                    throughput_per_s: Some(F64Summary {
2368                        median: throughput_current,
2369                        min: throughput_current,
2370                        max: throughput_current,
2371                        mean: throughput_cv.map(|_cv| throughput_current),
2372                        stddev: throughput_cv.map(|cv| (throughput_current) * cv),
2373                    }),
2374                };
2375
2376                let mut wall_budget = Budget {
2377                    noise_threshold: None,
2378                    noise_policy: perfgate_types::NoisePolicy::Ignore,
2379                    threshold,
2380                    warn_threshold,
2381                    direction: Direction::Lower,
2382                };
2383                if wall_ms_status == MetricStatus::Skip {
2384                    wall_budget.noise_threshold = Some(0.1);
2385                    wall_budget.noise_policy = perfgate_types::NoisePolicy::Skip;
2386                }
2387
2388                let mut rss_budget = Budget {
2389                    noise_threshold: None,
2390                    noise_policy: perfgate_types::NoisePolicy::Ignore,
2391                    threshold,
2392                    warn_threshold,
2393                    direction: Direction::Lower,
2394                };
2395                if max_rss_status == MetricStatus::Skip {
2396                    rss_budget.noise_threshold = Some(0.1);
2397                    rss_budget.noise_policy = perfgate_types::NoisePolicy::Skip;
2398                }
2399
2400                let mut throughput_budget = Budget {
2401                    noise_threshold: None,
2402                    noise_policy: perfgate_types::NoisePolicy::Ignore,
2403                    threshold,
2404                    warn_threshold,
2405                    direction: Direction::Higher,
2406                };
2407                if throughput_status == MetricStatus::Skip {
2408                    throughput_budget.noise_threshold = Some(0.1);
2409                    throughput_budget.noise_policy = perfgate_types::NoisePolicy::Skip;
2410                }
2411
2412                let mut budgets = BTreeMap::new();
2413                budgets.insert(Metric::WallMs, wall_budget);
2414                budgets.insert(Metric::MaxRssKb, rss_budget);
2415                budgets.insert(Metric::ThroughputPerS, throughput_budget);
2416
2417                let comparison = compare_stats(&baseline_stats, &current_stats, &budgets)
2418                    .expect("compare_stats should succeed");
2419
2420                // Verify the verdict matches the expected aggregation
2421                let expected = expected_verdict_status(&[wall_ms_status, max_rss_status, throughput_status]);
2422                prop_assert_eq!(
2423                    comparison.verdict.status, expected,
2424                    "Verdict should be {:?} when metric statuses are [{:?}, {:?}, {:?}]",
2425                    expected, wall_ms_status, max_rss_status, throughput_status
2426                );
2427            }
2428
2429            /// **Validates: Requirements 5.4, 5.5, 5.6**
2430            ///
2431            /// Property 5: Verdict Aggregation (Fail dominates)
2432            ///
2433            /// If any metric has Fail status, the verdict SHALL be Fail,
2434            /// regardless of other metric statuses.
2435            #[test]
2436            fn prop_verdict_fail_dominates(
2437                other_status in metric_status_strategy(),
2438            ) {
2439                // Use fixed baseline and thresholds
2440                let baseline = 1000u64;
2441                let threshold = 0.20;
2442                let warn_threshold = 0.10;
2443
2444                // Create baseline stats with both wall_ms and max_rss_kb
2445                let baseline_stats = Stats {
2446                    wall_ms: U64Summary::new(baseline, baseline, baseline),
2447                    cpu_ms: None,
2448                    page_faults: None,
2449                    ctx_switches: None,
2450                    max_rss_kb: Some(U64Summary::new(baseline, baseline, baseline)),
2451                    io_read_bytes: None,
2452                    io_write_bytes: None,
2453                    network_packets: None,
2454                    energy_uj: None,
2455                    binary_bytes: None,
2456                    throughput_per_s: None,                };
2457
2458                // wall_ms will be Fail, max_rss will be the random status
2459                let wall_ms_current = current_for_status(baseline, threshold, warn_threshold, MetricStatus::Fail);
2460                let max_rss_current = current_for_status(baseline, threshold, warn_threshold, other_status);
2461
2462                let current_stats = Stats {
2463                    wall_ms: U64Summary::new(wall_ms_current, wall_ms_current, wall_ms_current),
2464                    cpu_ms: None,
2465                    page_faults: None,
2466                    ctx_switches: None,
2467                    max_rss_kb: Some(U64Summary::new(max_rss_current, max_rss_current, max_rss_current)),
2468                    io_read_bytes: None,
2469                    io_write_bytes: None,
2470                    network_packets: None,
2471                    energy_uj: None,
2472                    binary_bytes: None,
2473                    throughput_per_s: None,
2474                };
2475
2476                let mut budgets = BTreeMap::new();
2477                budgets.insert(
2478                    Metric::WallMs,
2479                    Budget {
2480                        noise_threshold: None,
2481                        noise_policy: perfgate_types::NoisePolicy::Ignore,
2482                        threshold,
2483                        warn_threshold,
2484                        direction: Direction::Lower,
2485                    },
2486                );
2487                budgets.insert(
2488                    Metric::MaxRssKb,
2489                    Budget {
2490                        noise_threshold: None,
2491                        noise_policy: perfgate_types::NoisePolicy::Ignore,
2492                        threshold,
2493                        warn_threshold,
2494                        direction: Direction::Lower,
2495                    },
2496                );
2497
2498                let comparison = compare_stats(&baseline_stats, &current_stats, &budgets)
2499                    .expect("compare_stats should succeed");
2500
2501                // Verdict should always be Fail when any metric is Fail
2502                prop_assert_eq!(
2503                    comparison.verdict.status, VerdictStatus::Fail,
2504                    "Verdict should be Fail when any metric has Fail status, regardless of other_status={:?}",
2505                    other_status
2506                );
2507            }
2508
2509            /// **Validates: Requirements 5.4, 5.5, 5.6**
2510            ///
2511            /// Property 5: Verdict Aggregation (Warn without Fail)
2512            ///
2513            /// If no metric has Fail status but at least one has Warn status,
2514            /// the verdict SHALL be Warn.
2515            #[test]
2516            fn prop_verdict_warn_without_fail(
2517                // Generate only Pass or Warn statuses (no Fail)
2518                other_status in prop_oneof![Just(MetricStatus::Pass), Just(MetricStatus::Warn)],
2519            ) {
2520                // Use fixed baseline and thresholds
2521                let baseline = 1000u64;
2522                let threshold = 0.20;
2523                let warn_threshold = 0.10;
2524
2525                // Create baseline stats with both wall_ms and max_rss_kb
2526                let baseline_stats = Stats {
2527                    wall_ms: U64Summary::new(baseline, baseline, baseline),
2528                    cpu_ms: None,
2529                    page_faults: None,
2530                    ctx_switches: None,
2531                    max_rss_kb: Some(U64Summary::new(baseline, baseline, baseline)),
2532                    io_read_bytes: None,
2533                    io_write_bytes: None,
2534                    network_packets: None,
2535                    energy_uj: None,
2536                    binary_bytes: None,
2537                    throughput_per_s: None,                };
2538
2539                // wall_ms will be Warn, max_rss will be Pass or Warn
2540                let wall_ms_current = current_for_status(baseline, threshold, warn_threshold, MetricStatus::Warn);
2541                let max_rss_current = current_for_status(baseline, threshold, warn_threshold, other_status);
2542
2543                let current_stats = Stats {
2544                    wall_ms: U64Summary::new(wall_ms_current, wall_ms_current, wall_ms_current),
2545                    cpu_ms: None,
2546                    page_faults: None,
2547                    ctx_switches: None,
2548                    max_rss_kb: Some(U64Summary::new(max_rss_current, max_rss_current, max_rss_current)),
2549                    io_read_bytes: None,
2550                    io_write_bytes: None,
2551                    network_packets: None,
2552                    energy_uj: None,
2553                    binary_bytes: None,
2554                    throughput_per_s: None,
2555                };
2556
2557                let mut budgets = BTreeMap::new();
2558                budgets.insert(
2559                    Metric::WallMs,
2560                    Budget {
2561                        noise_threshold: None,
2562                        noise_policy: perfgate_types::NoisePolicy::Ignore,
2563                        threshold,
2564                        warn_threshold,
2565                        direction: Direction::Lower,
2566                    },
2567                );
2568                budgets.insert(
2569                    Metric::MaxRssKb,
2570                    Budget {
2571                        noise_threshold: None,
2572                        noise_policy: perfgate_types::NoisePolicy::Ignore,
2573                        threshold,
2574                        warn_threshold,
2575                        direction: Direction::Lower,
2576                    },
2577                );
2578
2579                let comparison = compare_stats(&baseline_stats, &current_stats, &budgets)
2580                    .expect("compare_stats should succeed");
2581
2582                // Verdict should be Warn when at least one metric is Warn and none are Fail
2583                prop_assert_eq!(
2584                    comparison.verdict.status, VerdictStatus::Warn,
2585                    "Verdict should be Warn when at least one metric is Warn and none are Fail, other_status={:?}",
2586                    other_status
2587                );
2588            }
2589
2590            /// **Validates: Requirements 5.4, 5.5, 5.6**
2591            ///
2592            /// Property 5: Verdict Aggregation (All Pass)
2593            ///
2594            /// If all metrics have Pass status, the verdict SHALL be Pass.
2595            #[test]
2596            fn prop_verdict_all_pass(
2597                // Generate 1-3 metrics, all with Pass status
2598                num_metrics in 1usize..=3,
2599            ) {
2600                // Use fixed baseline and thresholds
2601                let baseline = 1000u64;
2602                let baseline_throughput = 100.0f64;
2603                let threshold = 0.20;
2604                let warn_threshold = 0.10;
2605
2606                // All metrics will be Pass (current == baseline, no regression)
2607                let baseline_stats = Stats {
2608                    wall_ms: U64Summary::new(baseline, baseline, baseline),
2609                    cpu_ms: None,
2610                    page_faults: None,
2611                    ctx_switches: None,
2612                    max_rss_kb: if num_metrics >= 2 {
2613                        Some(U64Summary::new(baseline, baseline, baseline))
2614                    } else {
2615                        None
2616                    },
2617                    io_read_bytes: None,
2618                    io_write_bytes: None,
2619                    network_packets: None,
2620                    energy_uj: None,
2621                    binary_bytes: None,
2622                    throughput_per_s: if num_metrics >= 3 {
2623                        Some(F64Summary::new(baseline_throughput, baseline_throughput, baseline_throughput))
2624                    } else {
2625                        None
2626                    },
2627                };
2628
2629                // Current stats are same as baseline (Pass status)
2630                let current_stats = baseline_stats.clone();
2631
2632                let mut budgets = BTreeMap::new();
2633                budgets.insert(
2634                    Metric::WallMs,
2635                    Budget {
2636                        noise_threshold: None,
2637                        noise_policy: perfgate_types::NoisePolicy::Ignore,
2638                        threshold,
2639                        warn_threshold,
2640                        direction: Direction::Lower,
2641                    },
2642                );
2643                if num_metrics >= 2 {
2644                    budgets.insert(
2645                        Metric::MaxRssKb,
2646                        Budget {
2647                            noise_threshold: None,
2648                            noise_policy: perfgate_types::NoisePolicy::Ignore,
2649                            threshold,
2650                            warn_threshold,
2651                            direction: Direction::Lower,
2652                        },                    );
2653                }
2654                if num_metrics >= 3 {
2655                    budgets.insert(
2656                        Metric::ThroughputPerS,
2657                        Budget {
2658                        noise_threshold: None,
2659                        noise_policy: perfgate_types::NoisePolicy::Ignore,
2660                            threshold,
2661                            warn_threshold,
2662                            direction: Direction::Higher,
2663                        },
2664                    );
2665                }
2666
2667                let comparison = compare_stats(&baseline_stats, &current_stats, &budgets)
2668                    .expect("compare_stats should succeed");
2669
2670                // Verdict should be Pass when all metrics are Pass
2671                prop_assert_eq!(
2672                    comparison.verdict.status, VerdictStatus::Pass,
2673                    "Verdict should be Pass when all {} metrics have Pass status",
2674                    num_metrics
2675                );
2676
2677                // Also verify the counts are correct
2678                prop_assert_eq!(
2679                    comparison.verdict.counts.pass, num_metrics as u32,
2680                    "Pass count should equal number of metrics"
2681                );
2682                prop_assert_eq!(
2683                    comparison.verdict.counts.warn, 0,
2684                    "Warn count should be 0"
2685                );
2686                prop_assert_eq!(
2687                    comparison.verdict.counts.fail, 0,
2688                    "Fail count should be 0"
2689                );
2690            }
2691        }
2692
2693        // =====================================================================
2694        // Property: compare_stats determinism
2695        // =====================================================================
2696
2697        proptest! {
2698            #[test]
2699            fn prop_compare_stats_determinism(
2700                baseline_wall in 1u64..10000,
2701                current_wall in 1u64..10000,
2702                (threshold, warn_threshold) in threshold_pair_strategy(),
2703            ) {
2704            let baseline = Stats {
2705                wall_ms: U64Summary::new(baseline_wall, baseline_wall, baseline_wall),
2706                cpu_ms: None,
2707                page_faults: None,
2708                ctx_switches: None,
2709                max_rss_kb: None,
2710                io_read_bytes: None,
2711                io_write_bytes: None,
2712                network_packets: None,
2713                energy_uj: None,
2714                binary_bytes: None,
2715                throughput_per_s: None,
2716            };
2717            let current = Stats {
2718                wall_ms: U64Summary::new(current_wall, current_wall, current_wall),
2719                cpu_ms: None,
2720                page_faults: None,
2721                ctx_switches: None,
2722                max_rss_kb: None,
2723                io_read_bytes: None,
2724                io_write_bytes: None,
2725                network_packets: None,
2726                energy_uj: None,
2727                binary_bytes: None,
2728                throughput_per_s: None,
2729            };
2730                let mut budgets = BTreeMap::new();
2731                budgets.insert(Metric::WallMs, Budget {
2732                        noise_threshold: None,
2733                        noise_policy: perfgate_types::NoisePolicy::Ignore,
2734                    threshold, warn_threshold, direction: Direction::Lower,
2735                });
2736
2737                let r1 = compare_stats(&baseline, &current, &budgets).unwrap();
2738                let r2 = compare_stats(&baseline, &current, &budgets).unwrap();
2739                prop_assert_eq!(r1, r2, "compare_stats must be deterministic");
2740            }
2741        }
2742
2743        // =====================================================================
2744        // Property: compare_runs determinism
2745        // =====================================================================
2746
2747        fn make_run_receipt_for_prop(name: &str, wall: u64) -> RunReceipt {
2748            use perfgate_types::{BenchMeta, HostInfo, RUN_SCHEMA_V1, RunMeta, ToolInfo};
2749            let sample = non_warmup_sample(wall);
2750            let stats = compute_stats(std::slice::from_ref(&sample), None).unwrap();
2751            RunReceipt {
2752                schema: RUN_SCHEMA_V1.to_string(),
2753                tool: ToolInfo {
2754                    name: "perfgate".into(),
2755                    version: "test".into(),
2756                },
2757                run: RunMeta {
2758                    id: format!("run-{name}"),
2759                    started_at: "2024-01-01T00:00:00Z".into(),
2760                    ended_at: "2024-01-01T00:00:01Z".into(),
2761                    host: HostInfo {
2762                        os: "linux".into(),
2763                        arch: "x86_64".into(),
2764                        cpu_count: None,
2765                        memory_bytes: None,
2766                        hostname_hash: None,
2767                    },
2768                },
2769                bench: BenchMeta {
2770                    name: name.into(),
2771                    cwd: None,
2772                    command: vec!["echo".into()],
2773                    repeat: 1,
2774                    warmup: 0,
2775                    work_units: None,
2776                    timeout_ms: None,
2777                },
2778                samples: vec![sample],
2779                stats,
2780            }
2781        }
2782
2783        proptest! {
2784            #[test]
2785            fn prop_compare_runs_determinism(
2786                baseline_wall in 1u64..10000,
2787                current_wall in 1u64..10000,
2788                (threshold, warn_threshold) in threshold_pair_strategy(),
2789            ) {
2790                let baseline = make_run_receipt_for_prop("base", baseline_wall);
2791                let current = make_run_receipt_for_prop("cur", current_wall);
2792                let mut budgets = BTreeMap::new();
2793                budgets.insert(Metric::WallMs, Budget {
2794                        noise_threshold: None,
2795                        noise_policy: perfgate_types::NoisePolicy::Ignore,
2796                    threshold, warn_threshold, direction: Direction::Lower,
2797                });
2798                let stats_map = BTreeMap::new();
2799
2800                let c1 = compare_runs(&baseline, &current, &budgets, &stats_map, None).unwrap();
2801                let c2 = compare_runs(&baseline, &current, &budgets, &stats_map, None).unwrap();
2802                prop_assert_eq!(c1, c2, "compare_runs must be deterministic");
2803            }
2804        }
2805
2806        // =====================================================================
2807        // Property: derive_report consistency
2808        // =====================================================================
2809
2810        fn make_compare_receipt(
2811            deltas: BTreeMap<Metric, Delta>,
2812            budgets: BTreeMap<Metric, Budget>,
2813            verdict: Verdict,
2814        ) -> CompareReceipt {
2815            use perfgate_types::{BenchMeta, COMPARE_SCHEMA_V1, CompareRef, ToolInfo};
2816            CompareReceipt {
2817                schema: COMPARE_SCHEMA_V1.to_string(),
2818                tool: ToolInfo {
2819                    name: "perfgate".into(),
2820                    version: "test".into(),
2821                },
2822                bench: BenchMeta {
2823                    name: "bench".into(),
2824                    cwd: None,
2825                    command: vec!["echo".into()],
2826                    repeat: 1,
2827                    warmup: 0,
2828                    work_units: None,
2829                    timeout_ms: None,
2830                },
2831                baseline_ref: CompareRef {
2832                    path: None,
2833                    run_id: None,
2834                },
2835                current_ref: CompareRef {
2836                    path: None,
2837                    run_id: None,
2838                },
2839                budgets,
2840                deltas,
2841                verdict,
2842            }
2843        }
2844
2845        fn arb_metric_status() -> impl Strategy<Value = MetricStatus> {
2846            prop_oneof![
2847                Just(MetricStatus::Pass),
2848                Just(MetricStatus::Warn),
2849                Just(MetricStatus::Fail),
2850                Just(MetricStatus::Skip),
2851            ]
2852        }
2853
2854        fn arb_delta(status: MetricStatus) -> Delta {
2855            Delta {
2856                baseline: 100.0,
2857                current: 110.0,
2858                ratio: 1.1,
2859                pct: 0.1,
2860                regression: 0.1,
2861                cv: None,
2862                noise_threshold: None,
2863                statistic: MetricStatistic::Median,
2864                significance: None,
2865                status,
2866            }
2867        }
2868
2869        proptest! {
2870            #[test]
2871            fn prop_derive_report_finding_count(
2872                wall_status in arb_metric_status(),
2873                rss_status in arb_metric_status(),
2874                cpu_status in arb_metric_status(),
2875            ) {
2876                let statuses = vec![
2877                    (Metric::WallMs, wall_status),
2878                    (Metric::MaxRssKb, rss_status),
2879                    (Metric::CpuMs, cpu_status),
2880                ];
2881                let mut deltas = BTreeMap::new();
2882                let budgets = BTreeMap::new();
2883                let mut warn_count = 0u32;
2884                let mut fail_count = 0u32;
2885                let mut pass_count = 0u32;
2886                for (m, s) in &statuses {
2887                    deltas.insert(*m, arb_delta(*s));
2888                    match s {
2889                        MetricStatus::Pass => pass_count += 1,
2890                        MetricStatus::Warn => warn_count += 1,
2891                        MetricStatus::Fail => fail_count += 1,
2892                        MetricStatus::Skip => {}
2893                    }
2894                }
2895                let verdict = Verdict {
2896                    status: if fail_count > 0 { VerdictStatus::Fail }
2897                            else if warn_count > 0 { VerdictStatus::Warn }
2898                            else { VerdictStatus::Pass },
2899                    counts: VerdictCounts { pass: pass_count, warn: warn_count, fail: fail_count, skip: 0 },
2900                    reasons: vec![],
2901                };
2902                let receipt = make_compare_receipt(deltas, budgets, verdict.clone());
2903                let report = derive_report(&receipt);
2904
2905                prop_assert_eq!(
2906                    report.findings.len() as u32,
2907                    warn_count + fail_count,
2908                    "finding_count must equal warn_count + fail_count"
2909                );
2910            }
2911
2912            #[test]
2913            fn prop_derive_report_verdict_consistency(
2914                wall_status in arb_metric_status(),
2915                rss_status in arb_metric_status(),
2916            ) {
2917                let statuses = vec![
2918                    (Metric::WallMs, wall_status),
2919                    (Metric::MaxRssKb, rss_status),
2920                ];
2921                let mut deltas = BTreeMap::new();
2922                let budgets = BTreeMap::new();
2923                let mut warn_count = 0u32;
2924                let mut fail_count = 0u32;
2925                let mut pass_count = 0u32;
2926                for (m, s) in &statuses {
2927                    deltas.insert(*m, arb_delta(*s));
2928                    match s {
2929                        MetricStatus::Pass => pass_count += 1,
2930                        MetricStatus::Warn => warn_count += 1,
2931                        MetricStatus::Fail => fail_count += 1,
2932                        MetricStatus::Skip => {}
2933                    }
2934                }
2935                let expected_status = if fail_count > 0 { VerdictStatus::Fail }
2936                    else if warn_count > 0 { VerdictStatus::Warn }
2937                    else { VerdictStatus::Pass };
2938                let verdict = Verdict {
2939                    status: expected_status,
2940                    counts: VerdictCounts { pass: pass_count, warn: warn_count, fail: fail_count, skip: 0 },
2941                    reasons: vec![],
2942                };
2943                let receipt = make_compare_receipt(deltas, budgets, verdict);
2944                let report = derive_report(&receipt);
2945
2946                prop_assert_eq!(
2947                    report.verdict, expected_status,
2948                    "report verdict must match worst finding status"
2949                );
2950            }
2951        }
2952
2953        // =====================================================================
2954        // Property: summarize_u64/f64 sample_count & median bounds
2955        // =====================================================================
2956
2957        proptest! {
2958            #[test]
2959            fn prop_summarize_u64_median_between_min_max(
2960                values in prop::collection::vec(any::<u64>(), 1..100)
2961            ) {
2962                let s = summarize_u64(&values).unwrap();
2963                prop_assert!(s.min <= s.median, "min <= median");
2964                prop_assert!(s.median <= s.max, "median <= max");
2965            }
2966
2967            #[test]
2968            fn prop_summarize_f64_median_between_min_max(
2969                values in prop::collection::vec(finite_f64_strategy(), 1..100)
2970            ) {
2971                let s = summarize_f64(&values).unwrap();
2972                prop_assert!(s.min <= s.median, "min <= median");
2973                prop_assert!(s.median <= s.max, "median <= max");
2974            }
2975
2976            #[test]
2977            fn prop_compute_stats_sample_count(
2978                walls in prop::collection::vec(1u64..10000, 1..50)
2979            ) {
2980                let samples: Vec<Sample> = walls.iter().map(|&w| non_warmup_sample(w)).collect();
2981                let stats = compute_stats(&samples, None).unwrap();
2982                let expected = summarize_u64(&walls).unwrap();
2983                prop_assert_eq!(stats.wall_ms, expected, "stats.wall_ms must match direct summarize");
2984            }
2985        }
2986
2987        // =====================================================================
2988        // Property: mean is within [min, max] and variance >= 0
2989        // =====================================================================
2990
2991        proptest! {
2992            #[test]
2993            fn prop_mean_within_min_max_and_variance_non_negative(
2994                values in prop::collection::vec(1.0f64..10000.0, 1..100)
2995            ) {
2996                let (mean, var) = mean_and_variance(&values).expect("finite values");
2997                let min = values.iter().cloned().fold(f64::INFINITY, f64::min);
2998                let max = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
2999
3000                prop_assert!(
3001                    mean >= min && mean <= max,
3002                    "mean ({mean}) must be within [{min}, {max}]"
3003                );
3004                prop_assert!(var >= 0.0, "variance ({var}) must be >= 0");
3005            }
3006        }
3007
3008        // =====================================================================
3009        // Property: comparison symmetry – regression flips to improvement
3010        // =====================================================================
3011
3012        proptest! {
3013            /// If baseline=A, current=B shows a regression (regression > 0),
3014            /// then swapping (baseline=B, current=A) must show no regression
3015            /// (regression == 0) for the same direction.
3016            #[test]
3017            fn prop_comparison_symmetry(
3018                a in 1u64..10000,
3019                b in 1u64..10000,
3020            ) {
3021                let budget = Budget::new(1.0, 0.5, Direction::Lower);
3022
3023                let mk = |median: u64| Stats {
3024                    wall_ms: U64Summary::new(median, median, median),
3025                    cpu_ms: None,
3026                    page_faults: None,
3027                    ctx_switches: None,
3028                    max_rss_kb: None,
3029                    io_read_bytes: None,
3030                    io_write_bytes: None,
3031                    network_packets: None,
3032                    energy_uj: None,
3033                    binary_bytes: None,
3034                    throughput_per_s: None,
3035                };
3036
3037                let mut budgets = BTreeMap::new();
3038                budgets.insert(Metric::WallMs, budget);
3039
3040                let fwd = compare_stats(&mk(a), &mk(b), &budgets).unwrap();
3041                let rev = compare_stats(&mk(b), &mk(a), &budgets).unwrap();
3042
3043                let fwd_reg = fwd.deltas[&Metric::WallMs].regression;
3044                let rev_reg = rev.deltas[&Metric::WallMs].regression;
3045
3046                // If one direction regresses, the other must not
3047                if fwd_reg > 0.0 {
3048                    prop_assert!(
3049                        rev_reg == 0.0,
3050                        "fwd regression={fwd_reg} but rev regression={rev_reg} (expected 0)"
3051                    );
3052                }
3053                if rev_reg > 0.0 {
3054                    prop_assert!(
3055                        fwd_reg == 0.0,
3056                        "rev regression={rev_reg} but fwd regression={fwd_reg} (expected 0)"
3057                    );
3058                }
3059            }
3060        }
3061
3062        // =====================================================================
3063        // Property: budget threshold consistency
3064        // =====================================================================
3065
3066        proptest! {
3067            /// threshold=0.0 means any positive regression must fail.
3068            #[test]
3069            fn prop_budget_zero_threshold_fails_any_regression(
3070                baseline in 1u64..10000,
3071                delta in 1u64..5000,
3072            ) {
3073                let current = baseline + delta; // strictly worse (Direction::Lower)
3074                let budget = Budget::new(0.0, 0.0, Direction::Lower);
3075                let mk = |v: u64| Stats {
3076                    wall_ms: U64Summary::new(v, v, v),
3077                    cpu_ms: None,
3078                    page_faults: None,
3079                    ctx_switches: None,
3080                    max_rss_kb: None,
3081                    io_read_bytes: None,
3082                    io_write_bytes: None,
3083                    network_packets: None,
3084                    energy_uj: None,
3085                    binary_bytes: None,
3086                    throughput_per_s: None,
3087                };
3088                let mut budgets = BTreeMap::new();
3089                budgets.insert(Metric::WallMs, budget);
3090
3091                let cmp = compare_stats(&mk(baseline), &mk(current), &budgets).unwrap();
3092                let status = cmp.deltas[&Metric::WallMs].status;
3093                prop_assert_eq!(
3094                    status,
3095                    MetricStatus::Fail,
3096                    "threshold=0 must fail any regression (baseline={}, current={})",
3097                    baseline, current
3098                );
3099            }
3100
3101            /// threshold=1.0 allows up to 100% regression;
3102            /// a regression <= 100% must not fail.
3103            #[test]
3104            fn prop_budget_full_threshold_allows_up_to_100pct(
3105                baseline in 1u64..10000,
3106                factor in 0u64..=100, // 0..100% regression
3107            ) {
3108                // current = baseline * (1 + factor/100)
3109                let current = baseline + (baseline * factor) / 100;
3110                let budget = Budget::new(1.0, 0.5, Direction::Lower);
3111                let mk = |v: u64| Stats {
3112                    wall_ms: U64Summary::new(v, v, v),
3113                    cpu_ms: None,
3114                    page_faults: None,
3115                    ctx_switches: None,
3116                    max_rss_kb: None,
3117                    io_read_bytes: None,
3118                    io_write_bytes: None,
3119                    network_packets: None,
3120                    energy_uj: None,
3121                    binary_bytes: None,
3122                    throughput_per_s: None,
3123                };
3124                let mut budgets = BTreeMap::new();
3125                budgets.insert(Metric::WallMs, budget);
3126
3127                let cmp = compare_stats(&mk(baseline), &mk(current), &budgets).unwrap();
3128                let status = cmp.deltas[&Metric::WallMs].status;
3129                prop_assert!(
3130                    status != MetricStatus::Fail,
3131                    "threshold=1.0 must not fail <=100% regression \
3132                     (baseline={baseline}, current={current}, factor={factor}%)"
3133                );
3134            }
3135        }
3136    }
3137
3138    #[test]
3139    fn compute_stats_excludes_warmup() {
3140        let samples = vec![
3141            Sample {
3142                wall_ms: 100,
3143                exit_code: 0,
3144                warmup: true,
3145                timed_out: false,
3146                cpu_ms: None,
3147                page_faults: None,
3148                ctx_switches: None,
3149                max_rss_kb: None,
3150                io_read_bytes: None,
3151                io_write_bytes: None,
3152                network_packets: None,
3153                energy_uj: None,
3154                binary_bytes: None,
3155                stdout: None,
3156                stderr: None,
3157            },
3158            Sample {
3159                wall_ms: 200,
3160                exit_code: 0,
3161                warmup: false,
3162                timed_out: false,
3163                cpu_ms: None,
3164                page_faults: None,
3165                ctx_switches: None,
3166                max_rss_kb: None,
3167                io_read_bytes: None,
3168                io_write_bytes: None,
3169                network_packets: None,
3170                energy_uj: None,
3171                binary_bytes: None,
3172                stdout: None,
3173                stderr: None,
3174            },
3175        ];
3176
3177        let stats = compute_stats(&samples, None).unwrap();
3178        let expected = U64Summary {
3179            median: 200,
3180            min: 200,
3181            max: 200,
3182            mean: Some(200.0),
3183            stddev: Some(0.0),
3184        };
3185        assert_eq!(stats.wall_ms, expected);
3186    }
3187
3188    // =========================================================================
3189    // CPU Time (cpu_ms) Tests
3190    // =========================================================================
3191
3192    /// Test that compute_stats correctly computes cpu_ms summary from samples.
3193    #[test]
3194    fn compute_stats_computes_cpu_ms_summary() {
3195        let samples = vec![
3196            Sample {
3197                wall_ms: 100,
3198                exit_code: 0,
3199                warmup: false,
3200                timed_out: false,
3201                cpu_ms: Some(50),
3202                page_faults: None,
3203                ctx_switches: None,
3204                max_rss_kb: None,
3205                io_read_bytes: None,
3206                io_write_bytes: None,
3207                network_packets: None,
3208                energy_uj: None,
3209                binary_bytes: None,
3210                stdout: None,
3211                stderr: None,
3212            },
3213            Sample {
3214                wall_ms: 110,
3215                exit_code: 0,
3216                warmup: false,
3217                timed_out: false,
3218                cpu_ms: Some(60),
3219                page_faults: None,
3220                ctx_switches: None,
3221                max_rss_kb: None,
3222                io_read_bytes: None,
3223                io_write_bytes: None,
3224                network_packets: None,
3225                energy_uj: None,
3226                binary_bytes: None,
3227                stdout: None,
3228                stderr: None,
3229            },
3230            Sample {
3231                wall_ms: 105,
3232                exit_code: 0,
3233                warmup: false,
3234                timed_out: false,
3235                cpu_ms: Some(55),
3236                page_faults: None,
3237                ctx_switches: None,
3238                max_rss_kb: None,
3239                io_read_bytes: None,
3240                io_write_bytes: None,
3241                network_packets: None,
3242                energy_uj: None,
3243                binary_bytes: None,
3244                stdout: None,
3245                stderr: None,
3246            },
3247        ];
3248
3249        let stats = compute_stats(&samples, None).unwrap();
3250
3251        // cpu_ms should be present with correct statistics
3252        assert!(stats.cpu_ms.is_some(), "cpu_ms stats should be present");
3253        let cpu_stats = stats.cpu_ms.unwrap();
3254        assert_eq!(cpu_stats.min, 50, "cpu_ms min should be 50");
3255        assert_eq!(cpu_stats.max, 60, "cpu_ms max should be 60");
3256        assert_eq!(cpu_stats.median, 55, "cpu_ms median should be 55");
3257    }
3258
3259    /// Test that compute_stats returns None for cpu_ms when samples don't have it.
3260    #[test]
3261    fn compute_stats_cpu_ms_none_when_samples_missing_cpu() {
3262        let samples = vec![
3263            Sample {
3264                wall_ms: 100,
3265                exit_code: 0,
3266                warmup: false,
3267                timed_out: false,
3268                cpu_ms: None,
3269                page_faults: None,
3270                ctx_switches: None,
3271                max_rss_kb: Some(1024),
3272                io_read_bytes: None,
3273                io_write_bytes: None,
3274                network_packets: None,
3275                energy_uj: None,
3276                binary_bytes: None,
3277                stdout: None,
3278                stderr: None,
3279            },
3280            Sample {
3281                wall_ms: 110,
3282                exit_code: 0,
3283                warmup: false,
3284                timed_out: false,
3285                cpu_ms: None,
3286                page_faults: None,
3287                ctx_switches: None,
3288                max_rss_kb: Some(1028),
3289                io_read_bytes: None,
3290                io_write_bytes: None,
3291                network_packets: None,
3292                energy_uj: None,
3293                binary_bytes: None,
3294                stdout: None,
3295                stderr: None,
3296            },
3297        ];
3298
3299        let stats = compute_stats(&samples, None).unwrap();
3300
3301        // cpu_ms should be None since samples don't have cpu_ms
3302        assert!(
3303            stats.cpu_ms.is_none(),
3304            "cpu_ms stats should be None when samples lack cpu_ms"
3305        );
3306        // max_rss_kb should still be computed
3307        assert!(
3308            stats.max_rss_kb.is_some(),
3309            "max_rss_kb should still be present"
3310        );
3311    }
3312
3313    /// Test that compute_stats excludes warmup samples from cpu_ms calculation.
3314    #[test]
3315    fn compute_stats_cpu_ms_excludes_warmup() {
3316        let samples = vec![
3317            Sample {
3318                wall_ms: 100,
3319                exit_code: 0,
3320                warmup: true, // warmup - should be excluded
3321                timed_out: false,
3322                cpu_ms: Some(1000), // High value that would skew results
3323                page_faults: None,
3324                ctx_switches: None,
3325                max_rss_kb: None,
3326                io_read_bytes: None,
3327                io_write_bytes: None,
3328                network_packets: None,
3329                energy_uj: None,
3330                binary_bytes: None,
3331                stdout: None,
3332                stderr: None,
3333            },
3334            Sample {
3335                wall_ms: 100,
3336                exit_code: 0,
3337                warmup: false,
3338                timed_out: false,
3339                cpu_ms: Some(50),
3340                page_faults: None,
3341                ctx_switches: None,
3342                max_rss_kb: None,
3343                io_read_bytes: None,
3344                io_write_bytes: None,
3345                network_packets: None,
3346                energy_uj: None,
3347                binary_bytes: None,
3348                stdout: None,
3349                stderr: None,
3350            },
3351            Sample {
3352                wall_ms: 100,
3353                exit_code: 0,
3354                warmup: false,
3355                timed_out: false,
3356                cpu_ms: Some(60),
3357                page_faults: None,
3358                ctx_switches: None,
3359                max_rss_kb: None,
3360                io_read_bytes: None,
3361                io_write_bytes: None,
3362                network_packets: None,
3363                energy_uj: None,
3364                binary_bytes: None,
3365                stdout: None,
3366                stderr: None,
3367            },
3368        ];
3369
3370        let stats = compute_stats(&samples, None).unwrap();
3371
3372        let cpu_stats = stats.cpu_ms.expect("cpu_ms should be present");
3373        // Warmup sample with cpu_ms=1000 should be excluded
3374        assert_eq!(
3375            cpu_stats.min, 50,
3376            "cpu_ms min should be 50 (excluding warmup)"
3377        );
3378        assert_eq!(
3379            cpu_stats.max, 60,
3380            "cpu_ms max should be 60 (excluding warmup)"
3381        );
3382    }
3383
3384    /// Test that compare_stats correctly compares cpu_ms values.
3385    #[test]
3386    fn compare_stats_cpu_ms_regression_detection() {
3387        let baseline = Stats {
3388            wall_ms: U64Summary::new(100, 100, 100),
3389            cpu_ms: Some(U64Summary::new(50, 50, 50)),
3390            page_faults: None,
3391            ctx_switches: None,
3392            max_rss_kb: None,
3393            io_read_bytes: None,
3394            io_write_bytes: None,
3395            network_packets: None,
3396            energy_uj: None,
3397            binary_bytes: None,
3398            throughput_per_s: None,
3399        };
3400        // Current has 100% increase in cpu_ms (50 -> 100)
3401        let current = Stats {
3402            wall_ms: U64Summary::new(100, 100, 100),
3403            cpu_ms: Some(U64Summary::new(100, 100, 100)),
3404            page_faults: None,
3405            ctx_switches: None,
3406            max_rss_kb: None,
3407            io_read_bytes: None,
3408            io_write_bytes: None,
3409            network_packets: None,
3410            energy_uj: None,
3411            binary_bytes: None,
3412            throughput_per_s: None,
3413        };
3414        let mut budgets = BTreeMap::new();
3415        budgets.insert(Metric::CpuMs, Budget::new(0.20, 0.10, Direction::Lower));
3416
3417        let comparison = compare_stats(&baseline, &current, &budgets).unwrap();
3418
3419        // Should have cpu_ms delta
3420        let cpu_delta = comparison
3421            .deltas
3422            .get(&Metric::CpuMs)
3423            .expect("cpu_ms delta should exist");
3424
3425        // 100% regression should fail the 20% threshold
3426        assert_eq!(
3427            cpu_delta.status,
3428            MetricStatus::Fail,
3429            "100% cpu_ms regression should fail 20% threshold"
3430        );
3431        assert!(
3432            (cpu_delta.regression - 1.0).abs() < 0.001,
3433            "regression should be ~1.0 (100%)"
3434        );
3435        assert_eq!(cpu_delta.baseline, 50.0, "baseline should be 50");
3436        assert_eq!(cpu_delta.current, 100.0, "current should be 100");
3437    }
3438
3439    /// Test that compare_stats passes when cpu_ms improvement (decrease).
3440    #[test]
3441    fn compare_stats_cpu_ms_improvement_passes() {
3442        let baseline = Stats {
3443            wall_ms: U64Summary::new(100, 100, 100),
3444            cpu_ms: Some(U64Summary::new(100, 100, 100)),
3445            page_faults: None,
3446            ctx_switches: None,
3447            max_rss_kb: None,
3448            io_read_bytes: None,
3449            io_write_bytes: None,
3450            network_packets: None,
3451            energy_uj: None,
3452            binary_bytes: None,
3453            throughput_per_s: None,
3454        };
3455        // Current has 50% decrease in cpu_ms (100 -> 50) - improvement!
3456        let current = Stats {
3457            wall_ms: U64Summary::new(100, 100, 100),
3458            cpu_ms: Some(U64Summary::new(50, 50, 50)),
3459            page_faults: None,
3460            ctx_switches: None,
3461            max_rss_kb: None,
3462            io_read_bytes: None,
3463            io_write_bytes: None,
3464            network_packets: None,
3465            energy_uj: None,
3466            binary_bytes: None,
3467            throughput_per_s: None,
3468        };
3469        let mut budgets = BTreeMap::new();
3470        budgets.insert(Metric::CpuMs, Budget::new(0.20, 0.10, Direction::Lower));
3471
3472        let comparison = compare_stats(&baseline, &current, &budgets).unwrap();
3473
3474        let cpu_delta = comparison
3475            .deltas
3476            .get(&Metric::CpuMs)
3477            .expect("cpu_ms delta should exist");
3478
3479        // Improvement should pass (regression = 0 since current < baseline for Lower direction)
3480        assert_eq!(
3481            cpu_delta.status,
3482            MetricStatus::Pass,
3483            "cpu_ms improvement should pass"
3484        );
3485        assert_eq!(
3486            cpu_delta.regression, 0.0,
3487            "regression should be 0 for improvement"
3488        );
3489    }
3490
3491    /// Test that compare_stats skips cpu_ms when only baseline has it.
3492    #[test]
3493    fn compare_stats_skips_cpu_ms_when_only_baseline_has_it() {
3494        let baseline = Stats {
3495            wall_ms: U64Summary::new(100, 100, 100),
3496            cpu_ms: Some(U64Summary::new(50, 50, 50)),
3497            page_faults: None,
3498            ctx_switches: None,
3499            max_rss_kb: None,
3500            io_read_bytes: None,
3501            io_write_bytes: None,
3502            network_packets: None,
3503            energy_uj: None,
3504            binary_bytes: None,
3505            throughput_per_s: None,
3506        };
3507        let current = Stats {
3508            wall_ms: U64Summary::new(100, 100, 100),
3509            cpu_ms: None, // No cpu_ms in current
3510            page_faults: None,
3511            ctx_switches: None,
3512            max_rss_kb: None,
3513            io_read_bytes: None,
3514            io_write_bytes: None,
3515            network_packets: None,
3516            energy_uj: None,
3517            binary_bytes: None,
3518            throughput_per_s: None,
3519        };
3520        let mut budgets = BTreeMap::new();
3521        budgets.insert(Metric::CpuMs, Budget::new(0.20, 0.10, Direction::Lower));
3522
3523        let comparison = compare_stats(&baseline, &current, &budgets).unwrap();
3524
3525        // cpu_ms delta should NOT exist (skipped because current lacks it)
3526        assert!(
3527            !comparison.deltas.contains_key(&Metric::CpuMs),
3528            "cpu_ms delta should be skipped when current lacks cpu_ms"
3529        );
3530    }
3531
3532    /// Test that compare_stats skips cpu_ms when only current has it.
3533    #[test]
3534    fn compare_stats_skips_cpu_ms_when_only_current_has_it() {
3535        let baseline = Stats {
3536            wall_ms: U64Summary::new(100, 100, 100),
3537            cpu_ms: None, // No cpu_ms in baseline
3538            page_faults: None,
3539            ctx_switches: None,
3540            max_rss_kb: None,
3541            io_read_bytes: None,
3542            io_write_bytes: None,
3543            network_packets: None,
3544            energy_uj: None,
3545            binary_bytes: None,
3546            throughput_per_s: None,
3547        };
3548        let current = Stats {
3549            wall_ms: U64Summary::new(100, 100, 100),
3550            cpu_ms: Some(U64Summary::new(50, 50, 50)),
3551            page_faults: None,
3552            ctx_switches: None,
3553            max_rss_kb: None,
3554            io_read_bytes: None,
3555            io_write_bytes: None,
3556            network_packets: None,
3557            energy_uj: None,
3558            binary_bytes: None,
3559            throughput_per_s: None,
3560        };
3561        let mut budgets = BTreeMap::new();
3562        budgets.insert(Metric::CpuMs, Budget::new(0.20, 0.10, Direction::Lower));
3563
3564        let comparison = compare_stats(&baseline, &current, &budgets).unwrap();
3565
3566        // cpu_ms delta should NOT exist (skipped because baseline lacks it)
3567        assert!(
3568            !comparison.deltas.contains_key(&Metric::CpuMs),
3569            "cpu_ms delta should be skipped when baseline lacks cpu_ms"
3570        );
3571    }
3572
3573    /// Test that compare_stats warns on cpu_ms when within warn threshold.
3574    #[test]
3575    fn compare_stats_cpu_ms_warns_within_threshold() {
3576        let baseline = Stats {
3577            wall_ms: U64Summary::new(100, 100, 100),
3578            cpu_ms: Some(U64Summary::new(100, 100, 100)),
3579            page_faults: None,
3580            ctx_switches: None,
3581            max_rss_kb: None,
3582            io_read_bytes: None,
3583            io_write_bytes: None,
3584            network_packets: None,
3585            energy_uj: None,
3586            binary_bytes: None,
3587            throughput_per_s: None,
3588        };
3589        // Current has 15% increase in cpu_ms (100 -> 115)
3590        let current = Stats {
3591            wall_ms: U64Summary::new(100, 100, 100),
3592            cpu_ms: Some(U64Summary::new(115, 115, 115)),
3593            page_faults: None,
3594            ctx_switches: None,
3595            max_rss_kb: None,
3596            io_read_bytes: None,
3597            io_write_bytes: None,
3598            network_packets: None,
3599            energy_uj: None,
3600            binary_bytes: None,
3601            throughput_per_s: None,
3602        };
3603        let mut budgets = BTreeMap::new();
3604        budgets.insert(Metric::CpuMs, Budget::new(0.20, 0.10, Direction::Lower));
3605
3606        let comparison = compare_stats(&baseline, &current, &budgets).unwrap();
3607
3608        let cpu_delta = comparison
3609            .deltas
3610            .get(&Metric::CpuMs)
3611            .expect("cpu_ms delta should exist");
3612
3613        // 15% regression should warn (between 10% and 20%)
3614        assert_eq!(
3615            cpu_delta.status,
3616            MetricStatus::Warn,
3617            "15% cpu_ms regression should warn (10% < 15% < 20%)"
3618        );
3619    }
3620
3621    #[test]
3622    fn compare_lower_is_worse_regression_is_positive_pct() {
3623        let baseline = Stats {
3624            wall_ms: U64Summary::new(1000, 1000, 1000),
3625            cpu_ms: None,
3626            page_faults: None,
3627            ctx_switches: None,
3628            max_rss_kb: None,
3629            io_read_bytes: None,
3630            io_write_bytes: None,
3631            network_packets: None,
3632            energy_uj: None,
3633            binary_bytes: None,
3634            throughput_per_s: None,
3635        };
3636        let current = Stats {
3637            wall_ms: U64Summary::new(1100, 1100, 1100),
3638            cpu_ms: None,
3639            page_faults: None,
3640            ctx_switches: None,
3641            max_rss_kb: None,
3642            io_read_bytes: None,
3643            io_write_bytes: None,
3644            network_packets: None,
3645            energy_uj: None,
3646            binary_bytes: None,
3647            throughput_per_s: None,
3648        };
3649        let mut budgets = BTreeMap::new();
3650        budgets.insert(Metric::WallMs, Budget::new(0.20, 0.18, Direction::Lower));
3651
3652        let c = compare_stats(&baseline, &current, &budgets).unwrap();
3653        let d = c.deltas.get(&Metric::WallMs).unwrap();
3654        assert!(d.pct > 0.0);
3655        assert_eq!(d.status, MetricStatus::Pass);
3656    }
3657
3658    #[test]
3659    fn compare_higher_is_better_regression_is_negative_pct() {
3660        let baseline = Stats {
3661            wall_ms: U64Summary::new(1000, 1000, 1000),
3662            cpu_ms: None,
3663            page_faults: None,
3664            ctx_switches: None,
3665            max_rss_kb: None,
3666            io_read_bytes: None,
3667            io_write_bytes: None,
3668            network_packets: None,
3669            energy_uj: None,
3670            binary_bytes: None,
3671            throughput_per_s: Some(F64Summary::new(110.0, 110.0, 110.0)),
3672        };
3673        let current = Stats {
3674            wall_ms: U64Summary::new(1000, 1000, 1000),
3675            cpu_ms: None,
3676            page_faults: None,
3677            ctx_switches: None,
3678            max_rss_kb: None,
3679            io_read_bytes: None,
3680            io_write_bytes: None,
3681            network_packets: None,
3682            energy_uj: None,
3683            binary_bytes: None,
3684            throughput_per_s: Some(F64Summary::new(100.0, 100.0, 100.0)),
3685        };
3686        let mut budgets = BTreeMap::new();
3687        budgets.insert(
3688            Metric::ThroughputPerS,
3689            Budget::new(0.15, 0.135, Direction::Higher),
3690        );
3691
3692        let c = compare_stats(&baseline, &current, &budgets).unwrap();
3693        let d = c.deltas.get(&Metric::ThroughputPerS).unwrap();
3694        assert!(d.pct < 0.0);
3695        assert_eq!(d.status, MetricStatus::Pass);
3696    }
3697
3698    // =========================================================================
3699    // Unit Tests for Domain Error Conditions
3700    // **Validates: Requirements 11.1, 11.2**
3701    // =========================================================================
3702
3703    mod error_condition_tests {
3704        use super::*;
3705
3706        // ---------------------------------------------------------------------
3707        // DomainError::NoSamples Tests
3708        // ---------------------------------------------------------------------
3709
3710        /// Test that summarize_u64 returns DomainError::NoSamples for empty input.
3711        /// **Validates: Requirements 11.1**
3712        #[test]
3713        fn summarize_u64_empty_input_returns_no_samples_error() {
3714            let result = summarize_u64(&[]);
3715
3716            assert!(
3717                result.is_err(),
3718                "summarize_u64 should return error for empty input"
3719            );
3720            match result {
3721                Err(StatsError::NoSamples) => { /* expected */ }
3722                Ok(_) => panic!("expected error, got Ok"),
3723            }
3724        }
3725
3726        /// Test that summarize_f64 returns DomainError::NoSamples for empty input.
3727        /// **Validates: Requirements 11.1**
3728        #[test]
3729        fn summarize_f64_empty_input_returns_no_samples_error() {
3730            let result = summarize_f64(&[]);
3731
3732            assert!(
3733                result.is_err(),
3734                "summarize_f64 should return error for empty input"
3735            );
3736            match result {
3737                Err(StatsError::NoSamples) => { /* expected */ }
3738                Ok(_) => panic!("expected error, got Ok"),
3739            }
3740        }
3741
3742        /// Test that compute_stats returns DomainError::NoSamples for empty samples.
3743        /// **Validates: Requirements 11.1**
3744        #[test]
3745        fn compute_stats_empty_samples_returns_no_samples_error() {
3746            let samples: Vec<Sample> = vec![];
3747            let result = compute_stats(&samples, None);
3748
3749            assert!(
3750                result.is_err(),
3751                "compute_stats should return error for empty samples"
3752            );
3753            match result {
3754                Err(DomainError::NoSamples) => { /* expected */ }
3755                Err(other) => panic!("expected NoSamples error, got: {:?}", other),
3756                Ok(_) => panic!("expected error, got Ok"),
3757            }
3758        }
3759
3760        /// Test that compute_stats returns DomainError::NoSamples when all samples are warmup.
3761        /// **Validates: Requirements 11.1**
3762        #[test]
3763        fn compute_stats_all_warmup_samples_returns_no_samples_error() {
3764            // Create samples where all are marked as warmup
3765            let samples = vec![
3766                Sample {
3767                    wall_ms: 100,
3768                    exit_code: 0,
3769                    warmup: true,
3770                    timed_out: false,
3771                    cpu_ms: None,
3772                    page_faults: None,
3773                    ctx_switches: None,
3774                    max_rss_kb: Some(1024),
3775                    io_read_bytes: None,
3776                    io_write_bytes: None,
3777                    network_packets: None,
3778                    energy_uj: None,
3779                    binary_bytes: None,
3780                    stdout: None,
3781                    stderr: None,
3782                },
3783                Sample {
3784                    wall_ms: 200,
3785                    exit_code: 0,
3786                    warmup: true,
3787                    timed_out: false,
3788                    cpu_ms: None,
3789                    page_faults: None,
3790                    ctx_switches: None,
3791                    max_rss_kb: Some(2048),
3792                    io_read_bytes: None,
3793                    io_write_bytes: None,
3794                    network_packets: None,
3795                    energy_uj: None,
3796                    binary_bytes: None,
3797                    stdout: None,
3798                    stderr: None,
3799                },
3800                Sample {
3801                    wall_ms: 150,
3802                    exit_code: 0,
3803                    warmup: true,
3804                    timed_out: false,
3805                    cpu_ms: None,
3806                    page_faults: None,
3807                    ctx_switches: None,
3808                    max_rss_kb: Some(1536),
3809                    io_read_bytes: None,
3810                    io_write_bytes: None,
3811                    network_packets: None,
3812                    energy_uj: None,
3813                    binary_bytes: None,
3814                    stdout: None,
3815                    stderr: None,
3816                },
3817            ];
3818
3819            let result = compute_stats(&samples, None);
3820
3821            assert!(
3822                result.is_err(),
3823                "compute_stats should return error when all samples are warmup"
3824            );
3825            match result {
3826                Err(DomainError::NoSamples) => { /* expected */ }
3827                Err(other) => panic!("expected NoSamples error, got: {:?}", other),
3828                Ok(_) => panic!("expected error, got Ok"),
3829            }
3830        }
3831
3832        /// Test that compute_stats with work_units also returns NoSamples for all-warmup samples.
3833        /// **Validates: Requirements 11.1**
3834        #[test]
3835        fn compute_stats_all_warmup_with_work_units_returns_no_samples_error() {
3836            let samples = vec![Sample {
3837                wall_ms: 100,
3838                exit_code: 0,
3839                warmup: true,
3840                timed_out: false,
3841                cpu_ms: None,
3842                page_faults: None,
3843                ctx_switches: None,
3844                max_rss_kb: None,
3845                io_read_bytes: None,
3846                io_write_bytes: None,
3847                network_packets: None,
3848                energy_uj: None,
3849                binary_bytes: None,
3850                stdout: None,
3851                stderr: None,
3852            }];
3853
3854            // Even with work_units specified, should still fail
3855            let result = compute_stats(&samples, Some(1000));
3856
3857            assert!(
3858                result.is_err(),
3859                "compute_stats should return error when all samples are warmup, even with work_units"
3860            );
3861            match result {
3862                Err(DomainError::NoSamples) => { /* expected */ }
3863                Err(other) => panic!("expected NoSamples error, got: {:?}", other),
3864                Ok(_) => panic!("expected error, got Ok"),
3865            }
3866        }
3867
3868        // ---------------------------------------------------------------------
3869        // DomainError::InvalidBaseline Tests
3870        // ---------------------------------------------------------------------
3871
3872        /// Test that compare_stats returns Skip when baseline value is 0.
3873        /// **Validates: Requirements 11.2**
3874        #[test]
3875        fn compare_stats_zero_baseline_returns_skip() {
3876            // Create baseline stats with wall_ms median of 0
3877            let baseline = Stats {
3878                wall_ms: U64Summary::new(0, 0, 0),
3879                cpu_ms: None,
3880                page_faults: None,
3881                ctx_switches: None,
3882                max_rss_kb: None,
3883                io_read_bytes: None,
3884                io_write_bytes: None,
3885                network_packets: None,
3886                energy_uj: None,
3887                binary_bytes: None,
3888                throughput_per_s: None,
3889            };
3890
3891            let current = Stats {
3892                wall_ms: U64Summary::new(100, 100, 100),
3893                cpu_ms: None,
3894                page_faults: None,
3895                ctx_switches: None,
3896                max_rss_kb: None,
3897                io_read_bytes: None,
3898                io_write_bytes: None,
3899                network_packets: None,
3900                energy_uj: None,
3901                binary_bytes: None,
3902                throughput_per_s: None,
3903            };
3904
3905            let mut budgets = BTreeMap::new();
3906            budgets.insert(Metric::WallMs, Budget::new(0.20, 0.10, Direction::Lower));
3907
3908            let result = compare_stats(&baseline, &current, &budgets).unwrap();
3909
3910            assert_eq!(
3911                result.deltas.get(&Metric::WallMs).unwrap().status,
3912                MetricStatus::Skip,
3913                "compare_stats should return Skip status when baseline value is 0"
3914            );
3915        }
3916
3917        /// Test that compare_stats returns Skip for zero throughput baseline.
3918        /// **Validates: Requirements 11.2**
3919        #[test]
3920        fn compare_stats_zero_throughput_baseline_returns_skip() {
3921            let baseline = Stats {
3922                wall_ms: U64Summary::new(1000, 1000, 1000),
3923                cpu_ms: None,
3924                page_faults: None,
3925                ctx_switches: None,
3926                max_rss_kb: None,
3927                io_read_bytes: None,
3928                io_write_bytes: None,
3929                network_packets: None,
3930                energy_uj: None,
3931                binary_bytes: None,
3932                throughput_per_s: Some(F64Summary::new(0.0, 0.0, 0.0)),
3933            };
3934
3935            let current = Stats {
3936                wall_ms: U64Summary::new(1000, 1000, 1000),
3937                cpu_ms: None,
3938                page_faults: None,
3939                ctx_switches: None,
3940                max_rss_kb: None,
3941                io_read_bytes: None,
3942                io_write_bytes: None,
3943                network_packets: None,
3944                energy_uj: None,
3945                binary_bytes: None,
3946                throughput_per_s: Some(F64Summary::new(100.0, 100.0, 100.0)),
3947            };
3948
3949            let mut budgets = BTreeMap::new();
3950            budgets.insert(
3951                Metric::ThroughputPerS,
3952                Budget::new(0.20, 0.10, Direction::Higher),
3953            );
3954
3955            let result = compare_stats(&baseline, &current, &budgets).unwrap();
3956
3957            assert_eq!(
3958                result.deltas.get(&Metric::ThroughputPerS).unwrap().status,
3959                MetricStatus::Skip,
3960                "compare_stats should return Skip status when throughput baseline is 0"
3961            );
3962        }
3963
3964        /// Test that compare_stats returns Skip for zero max_rss_kb baseline.
3965        /// **Validates: Requirements 11.2**
3966        #[test]
3967        fn compare_stats_zero_max_rss_baseline_returns_skip() {
3968            let baseline = Stats {
3969                wall_ms: U64Summary::new(1000, 1000, 1000),
3970                cpu_ms: None,
3971                page_faults: None,
3972                ctx_switches: None,
3973                max_rss_kb: Some(U64Summary::new(0, 0, 0)),
3974                io_read_bytes: None,
3975                io_write_bytes: None,
3976                network_packets: None,
3977                energy_uj: None,
3978                binary_bytes: None,
3979                throughput_per_s: None,
3980            };
3981
3982            let current = Stats {
3983                wall_ms: U64Summary::new(1000, 1000, 1000),
3984                cpu_ms: None,
3985                page_faults: None,
3986                ctx_switches: None,
3987                max_rss_kb: Some(U64Summary::new(1024, 1024, 1024)),
3988                io_read_bytes: None,
3989                io_write_bytes: None,
3990                network_packets: None,
3991                energy_uj: None,
3992                binary_bytes: None,
3993                throughput_per_s: None,
3994            };
3995
3996            let mut budgets = BTreeMap::new();
3997            budgets.insert(Metric::MaxRssKb, Budget::new(0.20, 0.10, Direction::Lower));
3998
3999            let result = compare_stats(&baseline, &current, &budgets).unwrap();
4000
4001            assert_eq!(
4002                result.deltas.get(&Metric::MaxRssKb).unwrap().status,
4003                MetricStatus::Skip,
4004                "compare_stats should return Skip status when max_rss_kb baseline is 0"
4005            );
4006        }
4007
4008        /// Test that compare_stats returns Skip for negative throughput baseline.
4009        /// Note: While negative throughput is unusual, the check is for <= 0.
4010        /// **Validates: Requirements 11.2**
4011        #[test]
4012        fn compare_stats_negative_throughput_baseline_returns_skip() {
4013            let baseline = Stats {
4014                wall_ms: U64Summary::new(1000, 1000, 1000),
4015                cpu_ms: None,
4016                page_faults: None,
4017                ctx_switches: None,
4018                max_rss_kb: None,
4019                io_read_bytes: None,
4020                io_write_bytes: None,
4021                network_packets: None,
4022                energy_uj: None,
4023                binary_bytes: None,
4024                throughput_per_s: Some(F64Summary::new(-10.0, -10.0, -10.0)),
4025            };
4026
4027            let current = Stats {
4028                wall_ms: U64Summary::new(1000, 1000, 1000),
4029                cpu_ms: None,
4030                page_faults: None,
4031                ctx_switches: None,
4032                max_rss_kb: None,
4033                io_read_bytes: None,
4034                io_write_bytes: None,
4035                network_packets: None,
4036                energy_uj: None,
4037                binary_bytes: None,
4038                throughput_per_s: Some(F64Summary::new(100.0, 100.0, 100.0)),
4039            };
4040
4041            let mut budgets = BTreeMap::new();
4042            budgets.insert(
4043                Metric::ThroughputPerS,
4044                Budget::new(0.20, 0.10, Direction::Higher),
4045            );
4046
4047            let result = compare_stats(&baseline, &current, &budgets).unwrap();
4048
4049            assert_eq!(
4050                result.deltas.get(&Metric::ThroughputPerS).unwrap().status,
4051                MetricStatus::Skip,
4052                "compare_stats should return Skip status when throughput baseline is negative"
4053            );
4054        }
4055
4056        /// Test that DomainError::NoSamples has the expected error message.
4057        /// **Validates: Requirements 11.1**
4058        #[test]
4059        fn no_samples_error_has_descriptive_message() {
4060            let error = DomainError::NoSamples;
4061            let message = format!("{}", error);
4062            assert_eq!(message, "no samples to summarize");
4063        }
4064
4065        /// Test that DomainError::InvalidAlpha has the expected error message.
4066        #[test]
4067        fn invalid_alpha_error_has_descriptive_message() {
4068            let error = DomainError::InvalidAlpha(1.5);
4069            let message = format!("{}", error);
4070            assert_eq!(
4071                message,
4072                "significance alpha must be between 0.0 and 1.0, got 1.5"
4073            );
4074        }
4075
4076        /// Test that SignificancePolicy::new accepts valid alpha values.
4077        #[test]
4078        fn significance_policy_new_accepts_valid_alpha() {
4079            for alpha in [0.0, 0.05, 0.5, 1.0] {
4080                let policy = SignificancePolicy::new(alpha, 8, false);
4081                assert!(policy.is_ok(), "alpha={alpha} should be valid");
4082                let p = policy.unwrap();
4083                assert!((p.alpha - alpha).abs() < f64::EPSILON);
4084            }
4085        }
4086
4087        /// Test that SignificancePolicy::new rejects out-of-range alpha values.
4088        #[test]
4089        fn significance_policy_new_rejects_invalid_alpha() {
4090            for alpha in [-0.1, 1.1, 2.0] {
4091                let result = SignificancePolicy::new(alpha, 8, false);
4092                match result {
4093                    Err(DomainError::InvalidAlpha(v)) => {
4094                        assert!((v - alpha).abs() < f64::EPSILON);
4095                    }
4096                    other => panic!("expected InvalidAlpha for alpha={alpha}, got: {other:?}"),
4097                }
4098            }
4099        }
4100    }
4101
4102    // =========================================================================
4103    // derive_report Tests
4104    // =========================================================================
4105
4106    mod derive_report_tests {
4107        use super::*;
4108        use perfgate_types::{
4109            BenchMeta, Budget, COMPARE_SCHEMA_V1, CompareReceipt, CompareRef, Delta, Direction,
4110            Metric, MetricStatus, ToolInfo, Verdict, VerdictCounts, VerdictStatus,
4111        };
4112
4113        /// Helper to create a minimal CompareReceipt for testing.
4114        fn make_receipt(
4115            deltas: BTreeMap<Metric, Delta>,
4116            budgets: BTreeMap<Metric, Budget>,
4117            verdict_status: VerdictStatus,
4118            counts: VerdictCounts,
4119        ) -> CompareReceipt {
4120            CompareReceipt {
4121                schema: COMPARE_SCHEMA_V1.to_string(),
4122                tool: ToolInfo {
4123                    name: "perfgate".to_string(),
4124                    version: "0.1.0".to_string(),
4125                },
4126                bench: BenchMeta {
4127                    name: "test_bench".to_string(),
4128                    cwd: None,
4129                    command: vec!["echo".to_string(), "hello".to_string()],
4130                    repeat: 5,
4131                    warmup: 1,
4132                    work_units: None,
4133                    timeout_ms: None,
4134                },
4135                baseline_ref: CompareRef {
4136                    path: Some("baseline.json".to_string()),
4137                    run_id: None,
4138                },
4139                current_ref: CompareRef {
4140                    path: Some("current.json".to_string()),
4141                    run_id: None,
4142                },
4143                budgets,
4144                deltas,
4145                verdict: Verdict {
4146                    status: verdict_status,
4147                    counts,
4148                    reasons: vec![],
4149                },
4150            }
4151        }
4152
4153        /// Helper to create a Delta with given values.
4154        fn make_delta(baseline: f64, current: f64, status: MetricStatus) -> Delta {
4155            let ratio = current / baseline;
4156            let pct = (current - baseline) / baseline;
4157            let regression = pct.max(0.0);
4158            Delta {
4159                baseline,
4160                current,
4161                ratio,
4162                pct,
4163                regression,
4164                cv: None,
4165                noise_threshold: None,
4166                statistic: MetricStatistic::Median,
4167                significance: None,
4168                status,
4169            }
4170        }
4171
4172        /// Helper to create a Budget with given threshold.
4173        fn make_budget(threshold: f64) -> Budget {
4174            Budget::new(threshold, threshold * 0.9, Direction::Lower)
4175        }
4176
4177        /// Test: Empty deltas produces no findings.
4178        #[test]
4179        fn test_empty_deltas_no_findings() {
4180            let receipt = make_receipt(
4181                BTreeMap::new(),
4182                BTreeMap::new(),
4183                VerdictStatus::Pass,
4184                VerdictCounts {
4185                    pass: 1,
4186                    warn: 0,
4187                    fail: 0,
4188                    skip: 0,
4189                },
4190            );
4191
4192            let report = derive_report(&receipt);
4193
4194            assert!(report.findings.is_empty());
4195            assert_eq!(report.verdict, VerdictStatus::Pass);
4196        }
4197
4198        /// Test: All pass status deltas produce no findings.
4199        #[test]
4200        fn test_all_pass_no_findings() {
4201            let mut deltas = BTreeMap::new();
4202            deltas.insert(Metric::WallMs, make_delta(100.0, 105.0, MetricStatus::Pass));
4203            deltas.insert(
4204                Metric::MaxRssKb,
4205                make_delta(1000.0, 1050.0, MetricStatus::Pass),
4206            );
4207
4208            let mut budgets = BTreeMap::new();
4209            budgets.insert(Metric::WallMs, make_budget(0.2));
4210            budgets.insert(Metric::MaxRssKb, make_budget(0.2));
4211
4212            let receipt = make_receipt(
4213                deltas,
4214                budgets,
4215                VerdictStatus::Pass,
4216                VerdictCounts {
4217                    pass: 1,
4218                    warn: 0,
4219                    fail: 0,
4220                    skip: 0,
4221                },
4222            );
4223
4224            let report = derive_report(&receipt);
4225
4226            assert!(report.findings.is_empty());
4227            assert_eq!(report.verdict, VerdictStatus::Pass);
4228        }
4229
4230        /// Test: Mix of pass/warn/fail produces correct finding count and codes.
4231        #[test]
4232        fn test_mixed_status_correct_findings() {
4233            let mut deltas = BTreeMap::new();
4234            deltas.insert(Metric::WallMs, make_delta(100.0, 105.0, MetricStatus::Pass));
4235            deltas.insert(
4236                Metric::MaxRssKb,
4237                make_delta(1000.0, 1150.0, MetricStatus::Warn),
4238            );
4239            deltas.insert(
4240                Metric::ThroughputPerS,
4241                make_delta(500.0, 350.0, MetricStatus::Fail),
4242            );
4243
4244            let mut budgets = BTreeMap::new();
4245            budgets.insert(Metric::WallMs, make_budget(0.2));
4246            budgets.insert(Metric::MaxRssKb, make_budget(0.2));
4247            budgets.insert(Metric::ThroughputPerS, make_budget(0.2));
4248
4249            let receipt = make_receipt(
4250                deltas,
4251                budgets,
4252                VerdictStatus::Fail,
4253                VerdictCounts {
4254                    pass: 1,
4255                    warn: 0,
4256                    fail: 0,
4257                    skip: 0,
4258                },
4259            );
4260
4261            let report = derive_report(&receipt);
4262
4263            // Should have 2 findings (1 warn + 1 fail, not the pass)
4264            assert_eq!(report.findings.len(), 2);
4265
4266            // Verify finding codes
4267            let codes: Vec<&str> = report.findings.iter().map(|f| f.code.as_str()).collect();
4268            assert!(codes.contains(&"metric_warn"));
4269            assert!(codes.contains(&"metric_fail"));
4270
4271            // Verify all findings have check_id = "perf.budget"
4272            for finding in &report.findings {
4273                assert_eq!(finding.check_id, "perf.budget");
4274            }
4275
4276            // Verify verdict matches
4277            assert_eq!(report.verdict, VerdictStatus::Fail);
4278        }
4279
4280        /// Test: Finding count equals warn + fail count.
4281        #[test]
4282        fn test_finding_count_equals_warn_plus_fail() {
4283            let mut deltas = BTreeMap::new();
4284            deltas.insert(Metric::WallMs, make_delta(100.0, 125.0, MetricStatus::Warn));
4285            deltas.insert(
4286                Metric::MaxRssKb,
4287                make_delta(1000.0, 1300.0, MetricStatus::Fail),
4288            );
4289            deltas.insert(
4290                Metric::ThroughputPerS,
4291                make_delta(500.0, 300.0, MetricStatus::Fail),
4292            );
4293
4294            let mut budgets = BTreeMap::new();
4295            budgets.insert(Metric::WallMs, make_budget(0.2));
4296            budgets.insert(Metric::MaxRssKb, make_budget(0.2));
4297            budgets.insert(Metric::ThroughputPerS, make_budget(0.2));
4298
4299            let receipt = make_receipt(
4300                deltas,
4301                budgets,
4302                VerdictStatus::Fail,
4303                VerdictCounts {
4304                    pass: 0,
4305                    warn: 1,
4306                    fail: 2,
4307                    skip: 0,
4308                },
4309            );
4310
4311            let report = derive_report(&receipt);
4312
4313            // Invariant: finding count = warn + fail
4314            let expected_count = receipt.verdict.counts.warn + receipt.verdict.counts.fail;
4315            assert_eq!(report.findings.len(), expected_count as usize);
4316        }
4317
4318        /// Test: Report verdict matches compare verdict.
4319        #[test]
4320        fn test_verdict_matches() {
4321            // Test with Warn verdict
4322            let mut deltas_warn = BTreeMap::new();
4323            deltas_warn.insert(Metric::WallMs, make_delta(100.0, 115.0, MetricStatus::Warn));
4324
4325            let mut budgets = BTreeMap::new();
4326            budgets.insert(Metric::WallMs, make_budget(0.2));
4327
4328            let receipt_warn = make_receipt(
4329                deltas_warn,
4330                budgets.clone(),
4331                VerdictStatus::Warn,
4332                VerdictCounts {
4333                    pass: 1,
4334                    warn: 0,
4335                    fail: 0,
4336                    skip: 0,
4337                },
4338            );
4339
4340            let report_warn = derive_report(&receipt_warn);
4341            assert_eq!(report_warn.verdict, VerdictStatus::Warn);
4342
4343            // Test with Fail verdict
4344            let mut deltas_fail = BTreeMap::new();
4345            deltas_fail.insert(Metric::WallMs, make_delta(100.0, 130.0, MetricStatus::Fail));
4346
4347            let receipt_fail = make_receipt(
4348                deltas_fail,
4349                budgets,
4350                VerdictStatus::Fail,
4351                VerdictCounts {
4352                    pass: 1,
4353                    warn: 0,
4354                    fail: 0,
4355                    skip: 0,
4356                },
4357            );
4358
4359            let report_fail = derive_report(&receipt_fail);
4360            assert_eq!(report_fail.verdict, VerdictStatus::Fail);
4361        }
4362
4363        /// Test: Findings are ordered deterministically by metric name.
4364        #[test]
4365        fn test_deterministic_ordering() {
4366            // Insert in reverse order to verify ordering is by metric name
4367            let mut deltas = BTreeMap::new();
4368            deltas.insert(
4369                Metric::ThroughputPerS,
4370                make_delta(500.0, 300.0, MetricStatus::Fail),
4371            );
4372            deltas.insert(Metric::WallMs, make_delta(100.0, 130.0, MetricStatus::Fail));
4373            deltas.insert(
4374                Metric::MaxRssKb,
4375                make_delta(1000.0, 1300.0, MetricStatus::Warn),
4376            );
4377
4378            let mut budgets = BTreeMap::new();
4379            budgets.insert(Metric::WallMs, make_budget(0.2));
4380            budgets.insert(Metric::MaxRssKb, make_budget(0.2));
4381            budgets.insert(Metric::ThroughputPerS, make_budget(0.2));
4382
4383            let receipt = make_receipt(
4384                deltas,
4385                budgets,
4386                VerdictStatus::Fail,
4387                VerdictCounts {
4388                    pass: 1,
4389                    warn: 0,
4390                    fail: 0,
4391                    skip: 0,
4392                },
4393            );
4394
4395            let report = derive_report(&receipt);
4396
4397            // BTreeMap orders by Metric enum order (WallMs < MaxRssKb < ThroughputPerS based on derive order)
4398            // Verify the ordering is deterministic by checking metric names
4399            let metric_names: Vec<&str> = report
4400                .findings
4401                .iter()
4402                .map(|f| f.data.metric_name.as_str())
4403                .collect();
4404
4405            // Run twice to ensure deterministic
4406            let report2 = derive_report(&receipt);
4407            let metric_names2: Vec<&str> = report2
4408                .findings
4409                .iter()
4410                .map(|f| f.data.metric_name.as_str())
4411                .collect();
4412
4413            assert_eq!(metric_names, metric_names2);
4414        }
4415
4416        /// Test: Finding data contains correct values.
4417        #[test]
4418        fn test_finding_data_values() {
4419            let mut deltas = BTreeMap::new();
4420            deltas.insert(Metric::WallMs, make_delta(100.0, 125.0, MetricStatus::Fail));
4421
4422            let mut budgets = BTreeMap::new();
4423            budgets.insert(Metric::WallMs, make_budget(0.2));
4424
4425            let mut receipt = make_receipt(
4426                deltas,
4427                budgets,
4428                VerdictStatus::Fail,
4429                VerdictCounts {
4430                    pass: 1,
4431                    warn: 0,
4432                    fail: 0,
4433                    skip: 0,
4434                },
4435            );
4436            receipt.bench.name = "my_benchmark".to_string();
4437
4438            let report = derive_report(&receipt);
4439
4440            assert_eq!(report.findings.len(), 1);
4441            let finding = &report.findings[0];
4442
4443            assert_eq!(finding.code, "metric_fail");
4444            assert_eq!(finding.check_id, "perf.budget");
4445            assert_eq!(finding.data.metric_name, "wall_ms");
4446            assert_eq!(finding.data.bench_name, "my_benchmark");
4447            assert!((finding.data.baseline - 100.0).abs() < f64::EPSILON);
4448            assert!((finding.data.current - 125.0).abs() < f64::EPSILON);
4449            assert!((finding.data.regression_pct - 0.25).abs() < f64::EPSILON);
4450            assert!((finding.data.threshold - 0.2).abs() < f64::EPSILON);
4451        }
4452
4453        /// Test: Warn finding has correct code.
4454        #[test]
4455        fn test_warn_finding_code() {
4456            let mut deltas = BTreeMap::new();
4457            deltas.insert(Metric::WallMs, make_delta(100.0, 115.0, MetricStatus::Warn));
4458
4459            let mut budgets = BTreeMap::new();
4460            budgets.insert(Metric::WallMs, make_budget(0.2));
4461
4462            let receipt = make_receipt(
4463                deltas,
4464                budgets,
4465                VerdictStatus::Warn,
4466                VerdictCounts {
4467                    pass: 1,
4468                    warn: 0,
4469                    fail: 0,
4470                    skip: 0,
4471                },
4472            );
4473
4474            let report = derive_report(&receipt);
4475
4476            assert_eq!(report.findings.len(), 1);
4477            assert_eq!(report.findings[0].code, "metric_warn");
4478        }
4479
4480        /// Test: metric_to_string helper function.
4481        #[test]
4482        fn test_metric_to_string() {
4483            assert_eq!(metric_to_string(Metric::WallMs), "wall_ms");
4484            assert_eq!(metric_to_string(Metric::MaxRssKb), "max_rss_kb");
4485            assert_eq!(metric_to_string(Metric::ThroughputPerS), "throughput_per_s");
4486        }
4487    }
4488}