Skip to main content

perfgate_app/
lib.rs

1//! Application layer for perfgate.
2//!
3//! The app layer coordinates adapters and domain logic.
4//! It does not parse CLI flags and it does not do filesystem I/O.
5
6pub mod baseline_resolve;
7mod check;
8pub mod comparison_logic;
9mod paired;
10mod promote;
11mod report;
12mod sensor_report;
13
14pub use check::{CheckOutcome, CheckRequest, CheckUseCase};
15pub use paired::{PairedRunOutcome, PairedRunRequest, PairedRunUseCase};
16pub use promote::{PromoteRequest, PromoteResult, PromoteUseCase};
17pub use report::{ReportRequest, ReportResult, ReportUseCase};
18pub use sensor_report::{
19    BenchOutcome, SensorCheckOptions, SensorReportBuilder, classify_error,
20    default_engine_capability, run_sensor_check, sensor_fingerprint,
21};
22
23// Re-export rendering functions from perfgate-render for backward compatibility
24pub use perfgate_render::{
25    direction_str, format_metric, format_metric_with_statistic, format_pct, format_value,
26    github_annotations, markdown_template_context, metric_status_icon, metric_status_str,
27    parse_reason_token, render_markdown, render_markdown_template, render_reason_line,
28};
29
30// Re-export export functionality from perfgate-export for backward compatibility
31pub use perfgate_export::{CompareExportRow, ExportFormat, ExportUseCase, RunExportRow};
32
33use anyhow::Context;
34use perfgate_adapters::{CommandSpec, HostProbe, HostProbeOptions, ProcessRunner, RunResult};
35use perfgate_domain::{
36    Comparison, SignificancePolicy, compare_runs, compute_stats, detect_host_mismatch,
37};
38use perfgate_types::{
39    BenchMeta, Budget, CompareReceipt, CompareRef, HostMismatchInfo, HostMismatchPolicy, Metric,
40    MetricStatistic, RunMeta, RunReceipt, Sample, ToolInfo,
41};
42use std::collections::BTreeMap;
43use std::path::PathBuf;
44use std::time::Duration;
45
46pub trait Clock: Send + Sync {
47    fn now_rfc3339(&self) -> String;
48}
49
50#[derive(Debug, Default, Clone)]
51pub struct SystemClock;
52
53impl Clock for SystemClock {
54    fn now_rfc3339(&self) -> String {
55        use time::format_description::well_known::Rfc3339;
56        time::OffsetDateTime::now_utc()
57            .format(&Rfc3339)
58            .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string())
59    }
60}
61
62#[derive(Debug, Clone, Default)]
63pub struct RunBenchRequest {
64    pub name: String,
65    pub cwd: Option<PathBuf>,
66    pub command: Vec<String>,
67    pub repeat: u32,
68    pub warmup: u32,
69    pub work_units: Option<u64>,
70    pub timeout: Option<Duration>,
71    pub env: Vec<(String, String)>,
72    pub output_cap_bytes: usize,
73
74    /// If true, do not treat nonzero exit codes as a tool error.
75    /// The receipt will still record exit codes.
76    pub allow_nonzero: bool,
77
78    /// If true, include a hashed hostname in the host fingerprint.
79    /// This is opt-in for privacy reasons.
80    pub include_hostname_hash: bool,
81}
82
83#[derive(Debug, Clone)]
84pub struct RunBenchOutcome {
85    pub receipt: RunReceipt,
86
87    /// True if any measured (non-warmup) sample timed out or returned nonzero.
88    pub failed: bool,
89
90    /// Human-readable reasons (for CI logs).
91    pub reasons: Vec<String>,
92}
93
94pub struct RunBenchUseCase<R: ProcessRunner, H: HostProbe, C: Clock> {
95    runner: R,
96    host_probe: H,
97    clock: C,
98    tool: ToolInfo,
99}
100
101impl<R: ProcessRunner, H: HostProbe, C: Clock> RunBenchUseCase<R, H, C> {
102    pub fn new(runner: R, host_probe: H, clock: C, tool: ToolInfo) -> Self {
103        Self {
104            runner,
105            host_probe,
106            clock,
107            tool,
108        }
109    }
110
111    pub fn execute(&self, req: RunBenchRequest) -> anyhow::Result<RunBenchOutcome> {
112        let run_id = uuid::Uuid::new_v4().to_string();
113        let started_at = self.clock.now_rfc3339();
114
115        let host_options = HostProbeOptions {
116            include_hostname_hash: req.include_hostname_hash,
117        };
118        let host = self.host_probe.probe(&host_options);
119
120        let bench = BenchMeta {
121            name: req.name.clone(),
122            cwd: req.cwd.as_ref().map(|p| p.to_string_lossy().to_string()),
123            command: req.command.clone(),
124            repeat: req.repeat,
125            warmup: req.warmup,
126            work_units: req.work_units,
127            timeout_ms: req.timeout.map(|d| d.as_millis() as u64),
128        };
129
130        let mut samples: Vec<Sample> = Vec::new();
131        let mut reasons: Vec<String> = Vec::new();
132
133        let total = req.warmup + req.repeat;
134
135        for i in 0..total {
136            let is_warmup = i < req.warmup;
137
138            let spec = CommandSpec {
139                argv: req.command.clone(),
140                cwd: req.cwd.clone(),
141                env: req.env.clone(),
142                timeout: req.timeout,
143                output_cap_bytes: req.output_cap_bytes,
144            };
145
146            let run = self.runner.run(&spec).with_context(|| {
147                format!(
148                    "failed to run command (iteration {}): {:?}",
149                    i + 1,
150                    spec.argv
151                )
152            })?;
153
154            let s = sample_from_run(run, is_warmup);
155            if !is_warmup {
156                if s.timed_out {
157                    reasons.push(format!("iteration {} timed out", i + 1));
158                }
159                if s.exit_code != 0 {
160                    reasons.push(format!("iteration {} exit code {}", i + 1, s.exit_code));
161                }
162            }
163
164            samples.push(s);
165        }
166
167        let stats = compute_stats(&samples, req.work_units)?;
168
169        let ended_at = self.clock.now_rfc3339();
170
171        let receipt = RunReceipt {
172            schema: perfgate_types::RUN_SCHEMA_V1.to_string(),
173            tool: self.tool.clone(),
174            run: RunMeta {
175                id: run_id,
176                started_at,
177                ended_at,
178                host,
179            },
180            bench,
181            samples,
182            stats,
183        };
184
185        let failed = !reasons.is_empty();
186
187        if failed && !req.allow_nonzero {
188            // It's still a successful run from a *tooling* perspective, but callers may want a hard failure.
189            // We return the receipt either way; the CLI decides exit codes.
190        }
191
192        Ok(RunBenchOutcome {
193            receipt,
194            failed,
195            reasons,
196        })
197    }
198}
199
200fn sample_from_run(run: RunResult, warmup: bool) -> Sample {
201    Sample {
202        wall_ms: run.wall_ms,
203        exit_code: run.exit_code,
204        warmup,
205        timed_out: run.timed_out,
206        cpu_ms: run.cpu_ms,
207        page_faults: run.page_faults,
208        ctx_switches: run.ctx_switches,
209        max_rss_kb: run.max_rss_kb,
210        binary_bytes: run.binary_bytes,
211        stdout: if run.stdout.is_empty() {
212            None
213        } else {
214            Some(String::from_utf8_lossy(&run.stdout).to_string())
215        },
216        stderr: if run.stderr.is_empty() {
217            None
218        } else {
219            Some(String::from_utf8_lossy(&run.stderr).to_string())
220        },
221    }
222}
223
224#[derive(Debug, Clone)]
225pub struct CompareRequest {
226    pub baseline: RunReceipt,
227    pub current: RunReceipt,
228    pub budgets: BTreeMap<Metric, Budget>,
229    pub metric_statistics: BTreeMap<Metric, MetricStatistic>,
230    pub significance: Option<SignificancePolicy>,
231    pub baseline_ref: CompareRef,
232    pub current_ref: CompareRef,
233    pub tool: ToolInfo,
234    /// Policy for handling host mismatches.
235    #[allow(dead_code)]
236    pub host_mismatch_policy: HostMismatchPolicy,
237}
238
239/// Result from CompareUseCase including host mismatch information.
240#[derive(Debug, Clone)]
241pub struct CompareResult {
242    pub receipt: CompareReceipt,
243    /// Host mismatch info if detected (only populated when policy is not Ignore).
244    pub host_mismatch: Option<HostMismatchInfo>,
245}
246
247pub struct CompareUseCase;
248
249impl CompareUseCase {
250    pub fn execute(req: CompareRequest) -> anyhow::Result<CompareResult> {
251        // Check for host mismatch
252        let host_mismatch = if req.host_mismatch_policy != HostMismatchPolicy::Ignore {
253            detect_host_mismatch(&req.baseline.run.host, &req.current.run.host)
254        } else {
255            None
256        };
257
258        // If policy is Error and there's a mismatch, fail immediately
259        if req.host_mismatch_policy == HostMismatchPolicy::Error
260            && let Some(mismatch) = &host_mismatch
261        {
262            anyhow::bail!(
263                "host mismatch detected (--host-mismatch=error): {}",
264                mismatch.reasons.join("; ")
265            );
266        }
267
268        let Comparison { deltas, verdict } = compare_runs(
269            &req.baseline,
270            &req.current,
271            &req.budgets,
272            &req.metric_statistics,
273            req.significance,
274        )?;
275
276        let receipt = CompareReceipt {
277            schema: perfgate_types::COMPARE_SCHEMA_V1.to_string(),
278            tool: req.tool,
279            bench: req.current.bench,
280            baseline_ref: req.baseline_ref,
281            current_ref: req.current_ref,
282            budgets: req.budgets,
283            deltas,
284            verdict,
285        };
286
287        Ok(CompareResult {
288            receipt,
289            host_mismatch,
290        })
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297    use perfgate_types::{
298        Delta, Direction, HostInfo, MetricStatistic, MetricStatus, RUN_SCHEMA_V1, RunMeta,
299        RunReceipt, Stats, U64Summary, Verdict, VerdictCounts, VerdictStatus,
300    };
301    use std::collections::BTreeMap;
302
303    fn make_compare_receipt(status: MetricStatus) -> CompareReceipt {
304        let mut budgets = BTreeMap::new();
305        budgets.insert(
306            Metric::WallMs,
307            Budget {
308                threshold: 0.2,
309                warn_threshold: 0.1,
310                direction: Direction::Lower,
311            },
312        );
313
314        let mut deltas = BTreeMap::new();
315        deltas.insert(
316            Metric::WallMs,
317            Delta {
318                baseline: 100.0,
319                current: 115.0,
320                ratio: 1.15,
321                pct: 0.15,
322                regression: 0.15,
323                statistic: MetricStatistic::Median,
324                significance: None,
325                status,
326            },
327        );
328
329        CompareReceipt {
330            schema: perfgate_types::COMPARE_SCHEMA_V1.to_string(),
331            tool: ToolInfo {
332                name: "perfgate".into(),
333                version: "0.1.0".into(),
334            },
335            bench: BenchMeta {
336                name: "bench".into(),
337                cwd: None,
338                command: vec!["true".into()],
339                repeat: 1,
340                warmup: 0,
341                work_units: None,
342                timeout_ms: None,
343            },
344            baseline_ref: CompareRef {
345                path: None,
346                run_id: None,
347            },
348            current_ref: CompareRef {
349                path: None,
350                run_id: None,
351            },
352            budgets,
353            deltas,
354            verdict: Verdict {
355                status: VerdictStatus::Warn,
356                counts: VerdictCounts {
357                    pass: 0,
358                    warn: 1,
359                    fail: 0,
360                },
361                reasons: vec!["wall_ms_warn".to_string()],
362            },
363        }
364    }
365
366    fn make_run_receipt_with_host(host: HostInfo, wall_ms: u64) -> RunReceipt {
367        RunReceipt {
368            schema: RUN_SCHEMA_V1.to_string(),
369            tool: ToolInfo {
370                name: "perfgate".to_string(),
371                version: "0.1.0".to_string(),
372            },
373            run: RunMeta {
374                id: "run-id".to_string(),
375                started_at: "2024-01-01T00:00:00Z".to_string(),
376                ended_at: "2024-01-01T00:00:01Z".to_string(),
377                host,
378            },
379            bench: BenchMeta {
380                name: "bench".to_string(),
381                cwd: None,
382                command: vec!["true".to_string()],
383                repeat: 1,
384                warmup: 0,
385                work_units: None,
386                timeout_ms: None,
387            },
388            samples: Vec::new(),
389            stats: Stats {
390                wall_ms: U64Summary {
391                    median: wall_ms,
392                    min: wall_ms,
393                    max: wall_ms,
394                },
395                cpu_ms: None,
396                page_faults: None,
397                ctx_switches: None,
398                max_rss_kb: None,
399                binary_bytes: None,
400                throughput_per_s: None,
401            },
402        }
403    }
404
405    #[test]
406    fn markdown_renders_table() {
407        let mut budgets = BTreeMap::new();
408        budgets.insert(
409            Metric::WallMs,
410            Budget {
411                threshold: 0.2,
412                warn_threshold: 0.18,
413                direction: Direction::Lower,
414            },
415        );
416
417        let mut deltas = BTreeMap::new();
418        deltas.insert(
419            Metric::WallMs,
420            Delta {
421                baseline: 1000.0,
422                current: 1100.0,
423                ratio: 1.1,
424                pct: 0.1,
425                regression: 0.1,
426                statistic: MetricStatistic::Median,
427                significance: None,
428                status: MetricStatus::Pass,
429            },
430        );
431
432        let compare = CompareReceipt {
433            schema: perfgate_types::COMPARE_SCHEMA_V1.to_string(),
434            tool: ToolInfo {
435                name: "perfgate".into(),
436                version: "0.1.0".into(),
437            },
438            bench: BenchMeta {
439                name: "demo".into(),
440                cwd: None,
441                command: vec!["true".into()],
442                repeat: 1,
443                warmup: 0,
444                work_units: None,
445                timeout_ms: None,
446            },
447            baseline_ref: CompareRef {
448                path: None,
449                run_id: None,
450            },
451            current_ref: CompareRef {
452                path: None,
453                run_id: None,
454            },
455            budgets,
456            deltas,
457            verdict: Verdict {
458                status: VerdictStatus::Pass,
459                counts: VerdictCounts {
460                    pass: 1,
461                    warn: 0,
462                    fail: 0,
463                },
464                reasons: vec![],
465            },
466        };
467
468        let md = render_markdown(&compare);
469        assert!(md.contains("| metric | baseline"));
470        assert!(md.contains("wall_ms"));
471    }
472
473    #[test]
474    fn markdown_template_renders_context_rows() {
475        let compare = make_compare_receipt(MetricStatus::Warn);
476        let template = "{{header}}\nbench={{bench.name}}\n{{#each rows}}metric={{metric}} status={{status}}\n{{/each}}";
477
478        let rendered = render_markdown_template(&compare, template).expect("render template");
479        assert!(rendered.contains("bench=bench"));
480        assert!(rendered.contains("metric=wall_ms"));
481        assert!(rendered.contains("status=warn"));
482    }
483
484    #[test]
485    fn markdown_template_strict_mode_rejects_unknown_fields() {
486        let compare = make_compare_receipt(MetricStatus::Warn);
487        let err = render_markdown_template(&compare, "{{does_not_exist}}").unwrap_err();
488        assert!(
489            err.to_string().contains("render markdown template"),
490            "unexpected error: {}",
491            err
492        );
493    }
494
495    #[test]
496    fn parse_reason_token_handles_valid_and_invalid() {
497        let parsed = parse_reason_token("wall_ms_warn");
498        assert!(parsed.is_some());
499        let (metric, status) = parsed.unwrap();
500        assert_eq!(metric, Metric::WallMs);
501        assert_eq!(status, MetricStatus::Warn);
502
503        assert!(parse_reason_token("wall_ms_pass").is_none());
504        assert!(parse_reason_token("unknown_warn").is_none());
505    }
506
507    #[test]
508    fn render_reason_line_formats_thresholds() {
509        let compare = make_compare_receipt(MetricStatus::Warn);
510        let line = render_reason_line(&compare, "wall_ms_warn");
511        assert!(line.contains("warn >="));
512        assert!(line.contains("fail >"));
513        assert!(line.contains("+15.00%"));
514    }
515
516    #[test]
517    fn render_reason_line_falls_back_when_missing_budget() {
518        let mut compare = make_compare_receipt(MetricStatus::Warn);
519        compare.budgets.clear();
520        let line = render_reason_line(&compare, "wall_ms_warn");
521        assert_eq!(line, "- wall_ms_warn\n");
522    }
523
524    #[test]
525    fn format_value_and_pct_render_expected_strings() {
526        assert_eq!(format_value(Metric::ThroughputPerS, 1.23456), "1.235");
527        assert_eq!(format_value(Metric::WallMs, 123.0), "123");
528        assert_eq!(format_pct(0.1), "+10.00%");
529        assert_eq!(format_pct(-0.1), "-10.00%");
530        assert_eq!(format_pct(0.0), "0.00%");
531    }
532
533    #[test]
534    fn github_annotations_only_warn_and_fail() {
535        let mut compare = make_compare_receipt(MetricStatus::Warn);
536        compare.deltas.insert(
537            Metric::MaxRssKb,
538            Delta {
539                baseline: 100.0,
540                current: 150.0,
541                ratio: 1.5,
542                pct: 0.5,
543                regression: 0.5,
544                statistic: MetricStatistic::Median,
545                significance: None,
546                status: MetricStatus::Fail,
547            },
548        );
549        compare.deltas.insert(
550            Metric::ThroughputPerS,
551            Delta {
552                baseline: 100.0,
553                current: 90.0,
554                ratio: 0.9,
555                pct: -0.1,
556                regression: 0.0,
557                statistic: MetricStatistic::Median,
558                significance: None,
559                status: MetricStatus::Pass,
560            },
561        );
562
563        let lines = github_annotations(&compare);
564        assert_eq!(lines.len(), 2);
565        assert!(lines.iter().any(|l| l.starts_with("::warning::")));
566        assert!(lines.iter().any(|l| l.starts_with("::error::")));
567        assert!(lines.iter().all(|l| !l.contains("throughput_per_s")));
568    }
569
570    #[test]
571    fn sample_from_run_sets_optional_stdout_stderr() {
572        let run = RunResult {
573            wall_ms: 10,
574            exit_code: 0,
575            timed_out: false,
576            cpu_ms: None,
577            page_faults: None,
578            ctx_switches: None,
579            max_rss_kb: None,
580            binary_bytes: None,
581            stdout: b"ok".to_vec(),
582            stderr: Vec::new(),
583        };
584
585        let sample = sample_from_run(run, false);
586        assert_eq!(sample.stdout.as_deref(), Some("ok"));
587        assert!(sample.stderr.is_none());
588    }
589
590    #[test]
591    fn compare_use_case_host_mismatch_policies() {
592        let baseline = make_run_receipt_with_host(
593            HostInfo {
594                os: "linux".to_string(),
595                arch: "x86_64".to_string(),
596                cpu_count: None,
597                memory_bytes: None,
598                hostname_hash: None,
599            },
600            100,
601        );
602        let current = make_run_receipt_with_host(
603            HostInfo {
604                os: "windows".to_string(),
605                arch: "x86_64".to_string(),
606                cpu_count: None,
607                memory_bytes: None,
608                hostname_hash: None,
609            },
610            100,
611        );
612
613        let mut budgets = BTreeMap::new();
614        budgets.insert(
615            Metric::WallMs,
616            Budget {
617                threshold: 0.2,
618                warn_threshold: 0.1,
619                direction: Direction::Lower,
620            },
621        );
622
623        let err = CompareUseCase::execute(CompareRequest {
624            baseline: baseline.clone(),
625            current: current.clone(),
626            budgets: budgets.clone(),
627            metric_statistics: BTreeMap::new(),
628            significance: None,
629            baseline_ref: CompareRef {
630                path: None,
631                run_id: None,
632            },
633            current_ref: CompareRef {
634                path: None,
635                run_id: None,
636            },
637            tool: ToolInfo {
638                name: "perfgate".to_string(),
639                version: "0.1.0".to_string(),
640            },
641            host_mismatch_policy: HostMismatchPolicy::Error,
642        })
643        .unwrap_err();
644        assert!(err.to_string().contains("host mismatch"));
645
646        let matching = CompareUseCase::execute(CompareRequest {
647            baseline: baseline.clone(),
648            current: baseline.clone(),
649            budgets: budgets.clone(),
650            metric_statistics: BTreeMap::new(),
651            significance: None,
652            baseline_ref: CompareRef {
653                path: None,
654                run_id: None,
655            },
656            current_ref: CompareRef {
657                path: None,
658                run_id: None,
659            },
660            tool: ToolInfo {
661                name: "perfgate".to_string(),
662                version: "0.1.0".to_string(),
663            },
664            host_mismatch_policy: HostMismatchPolicy::Error,
665        })
666        .expect("matching hosts should not error");
667        assert!(matching.host_mismatch.is_none());
668
669        let ignore = CompareUseCase::execute(CompareRequest {
670            baseline,
671            current,
672            budgets,
673            metric_statistics: BTreeMap::new(),
674            significance: None,
675            baseline_ref: CompareRef {
676                path: None,
677                run_id: None,
678            },
679            current_ref: CompareRef {
680                path: None,
681                run_id: None,
682            },
683            tool: ToolInfo {
684                name: "perfgate".to_string(),
685                version: "0.1.0".to_string(),
686            },
687            host_mismatch_policy: HostMismatchPolicy::Ignore,
688        })
689        .expect("ignore mismatch should succeed");
690
691        assert!(ignore.host_mismatch.is_none());
692    }
693}
694
695#[cfg(test)]
696mod property_tests {
697    use super::*;
698    use perfgate_types::{
699        Delta, Direction, MetricStatistic, MetricStatus, Verdict, VerdictCounts, VerdictStatus,
700    };
701    use proptest::prelude::*;
702
703    // --- Strategies for generating CompareReceipt ---
704
705    // Strategy for generating valid non-empty strings (for names, IDs, etc.)
706    fn non_empty_string() -> impl Strategy<Value = String> {
707        "[a-zA-Z0-9_-]{1,20}".prop_map(|s| s)
708    }
709
710    // Strategy for ToolInfo
711    fn tool_info_strategy() -> impl Strategy<Value = ToolInfo> {
712        (non_empty_string(), non_empty_string())
713            .prop_map(|(name, version)| ToolInfo { name, version })
714    }
715
716    // Strategy for BenchMeta
717    fn bench_meta_strategy() -> impl Strategy<Value = BenchMeta> {
718        (
719            non_empty_string(),
720            proptest::option::of(non_empty_string()),
721            proptest::collection::vec(non_empty_string(), 1..5),
722            1u32..100,
723            0u32..10,
724            proptest::option::of(1u64..10000),
725            proptest::option::of(100u64..60000),
726        )
727            .prop_map(
728                |(name, cwd, command, repeat, warmup, work_units, timeout_ms)| BenchMeta {
729                    name,
730                    cwd,
731                    command,
732                    repeat,
733                    warmup,
734                    work_units,
735                    timeout_ms,
736                },
737            )
738    }
739
740    // Strategy for CompareRef
741    fn compare_ref_strategy() -> impl Strategy<Value = CompareRef> {
742        (
743            proptest::option::of(non_empty_string()),
744            proptest::option::of(non_empty_string()),
745        )
746            .prop_map(|(path, run_id)| CompareRef { path, run_id })
747    }
748
749    // Strategy for Direction
750    fn direction_strategy() -> impl Strategy<Value = Direction> {
751        prop_oneof![Just(Direction::Lower), Just(Direction::Higher),]
752    }
753
754    // Strategy for Budget - using finite positive floats for thresholds
755    fn budget_strategy() -> impl Strategy<Value = Budget> {
756        (0.01f64..1.0, 0.01f64..1.0, direction_strategy()).prop_map(
757            |(threshold, warn_factor, direction)| {
758                // warn_threshold should be <= threshold
759                let warn_threshold = threshold * warn_factor;
760                Budget {
761                    threshold,
762                    warn_threshold,
763                    direction,
764                }
765            },
766        )
767    }
768
769    // Strategy for MetricStatus
770    fn metric_status_strategy() -> impl Strategy<Value = MetricStatus> {
771        prop_oneof![
772            Just(MetricStatus::Pass),
773            Just(MetricStatus::Warn),
774            Just(MetricStatus::Fail),
775        ]
776    }
777
778    // Strategy for Delta - using finite positive floats
779    fn delta_strategy() -> impl Strategy<Value = Delta> {
780        (
781            0.1f64..10000.0, // baseline (positive, non-zero)
782            0.1f64..10000.0, // current (positive, non-zero)
783            metric_status_strategy(),
784        )
785            .prop_map(|(baseline, current, status)| {
786                let ratio = current / baseline;
787                let pct = (current - baseline) / baseline;
788                let regression = if pct > 0.0 { pct } else { 0.0 };
789                Delta {
790                    baseline,
791                    current,
792                    ratio,
793                    pct,
794                    regression,
795                    statistic: MetricStatistic::Median,
796                    significance: None,
797                    status,
798                }
799            })
800    }
801
802    // Strategy for VerdictStatus
803    fn verdict_status_strategy() -> impl Strategy<Value = VerdictStatus> {
804        prop_oneof![
805            Just(VerdictStatus::Pass),
806            Just(VerdictStatus::Warn),
807            Just(VerdictStatus::Fail),
808        ]
809    }
810
811    // Strategy for VerdictCounts
812    fn verdict_counts_strategy() -> impl Strategy<Value = VerdictCounts> {
813        (0u32..10, 0u32..10, 0u32..10).prop_map(|(pass, warn, fail)| VerdictCounts {
814            pass,
815            warn,
816            fail,
817        })
818    }
819
820    // Strategy for Verdict with reasons
821    fn verdict_strategy() -> impl Strategy<Value = Verdict> {
822        (
823            verdict_status_strategy(),
824            verdict_counts_strategy(),
825            proptest::collection::vec("[a-zA-Z0-9 ]{1,50}", 0..5),
826        )
827            .prop_map(|(status, counts, reasons)| Verdict {
828                status,
829                counts,
830                reasons,
831            })
832    }
833
834    // Strategy for Metric
835    fn metric_strategy() -> impl Strategy<Value = Metric> {
836        prop_oneof![
837            Just(Metric::BinaryBytes),
838            Just(Metric::CpuMs),
839            Just(Metric::CtxSwitches),
840            Just(Metric::WallMs),
841            Just(Metric::MaxRssKb),
842            Just(Metric::PageFaults),
843            Just(Metric::ThroughputPerS),
844        ]
845    }
846
847    // Strategy for BTreeMap<Metric, Budget>
848    fn budgets_map_strategy() -> impl Strategy<Value = BTreeMap<Metric, Budget>> {
849        proptest::collection::btree_map(metric_strategy(), budget_strategy(), 0..8)
850    }
851
852    // Strategy for BTreeMap<Metric, Delta>
853    fn deltas_map_strategy() -> impl Strategy<Value = BTreeMap<Metric, Delta>> {
854        proptest::collection::btree_map(metric_strategy(), delta_strategy(), 0..8)
855    }
856
857    // Strategy for CompareReceipt
858    fn compare_receipt_strategy() -> impl Strategy<Value = CompareReceipt> {
859        (
860            tool_info_strategy(),
861            bench_meta_strategy(),
862            compare_ref_strategy(),
863            compare_ref_strategy(),
864            budgets_map_strategy(),
865            deltas_map_strategy(),
866            verdict_strategy(),
867        )
868            .prop_map(
869                |(tool, bench, baseline_ref, current_ref, budgets, deltas, verdict)| {
870                    CompareReceipt {
871                        schema: perfgate_types::COMPARE_SCHEMA_V1.to_string(),
872                        tool,
873                        bench,
874                        baseline_ref,
875                        current_ref,
876                        budgets,
877                        deltas,
878                        verdict,
879                    }
880                },
881            )
882    }
883
884    // **Property 6: Markdown Rendering Completeness**
885    //
886    // For any valid CompareReceipt, the rendered Markdown SHALL contain:
887    // - A header with the correct verdict emoji (✅ for Pass, ⚠️ for Warn, ❌ for Fail)
888    // - The benchmark name
889    // - A table row for each metric in deltas
890    // - All verdict reasons (if any)
891    //
892    // **Validates: Requirements 7.2, 7.3, 7.4, 7.5**
893    proptest! {
894        #![proptest_config(ProptestConfig::with_cases(100))]
895
896        #[test]
897        fn markdown_rendering_completeness(receipt in compare_receipt_strategy()) {
898            let md = render_markdown(&receipt);
899
900            // Verify header contains correct verdict emoji (Requirement 7.2)
901            let expected_emoji = match receipt.verdict.status {
902                VerdictStatus::Pass => "✅",
903                VerdictStatus::Warn => "⚠️",
904                VerdictStatus::Fail => "❌",
905            };
906            prop_assert!(
907                md.contains(expected_emoji),
908                "Markdown should contain verdict emoji '{}' for status {:?}. Got:\n{}",
909                expected_emoji,
910                receipt.verdict.status,
911                md
912            );
913
914            // Verify header contains "perfgate" and verdict status word
915            let expected_status_word = match receipt.verdict.status {
916                VerdictStatus::Pass => "pass",
917                VerdictStatus::Warn => "warn",
918                VerdictStatus::Fail => "fail",
919            };
920            prop_assert!(
921                md.contains(expected_status_word),
922                "Markdown should contain status word '{}'. Got:\n{}",
923                expected_status_word,
924                md
925            );
926
927            // Verify benchmark name is present (Requirement 7.3)
928            prop_assert!(
929                md.contains(&receipt.bench.name),
930                "Markdown should contain benchmark name '{}'. Got:\n{}",
931                receipt.bench.name,
932                md
933            );
934
935            // Verify table header is present (Requirement 7.4)
936            prop_assert!(
937                md.contains("| metric |"),
938                "Markdown should contain table header. Got:\n{}",
939                md
940            );
941
942            // Verify a table row exists for each metric in deltas (Requirement 7.4)
943            for metric in receipt.deltas.keys() {
944                let metric_name = metric.as_str();
945                prop_assert!(
946                    md.contains(metric_name),
947                    "Markdown should contain metric '{}'. Got:\n{}",
948                    metric_name,
949                    md
950                );
951            }
952
953            // Verify all verdict reasons are present (Requirement 7.5)
954            for reason in &receipt.verdict.reasons {
955                prop_assert!(
956                    md.contains(reason),
957                    "Markdown should contain verdict reason '{}'. Got:\n{}",
958                    reason,
959                    md
960                );
961            }
962
963            // If there are reasons, verify the Notes section exists
964            if !receipt.verdict.reasons.is_empty() {
965                prop_assert!(
966                    md.contains("**Notes:**"),
967                    "Markdown should contain Notes section when there are reasons. Got:\n{}",
968                    md
969                );
970            }
971        }
972    }
973
974    // **Property 7: GitHub Annotation Generation**
975    //
976    // For any valid CompareReceipt:
977    // - Metrics with Fail status SHALL produce exactly one `::error::` annotation
978    // - Metrics with Warn status SHALL produce exactly one `::warning::` annotation
979    // - Metrics with Pass status SHALL produce no annotations
980    // - Each annotation SHALL contain the bench name, metric name, and delta percentage
981    //
982    // **Validates: Requirements 8.2, 8.3, 8.4, 8.5**
983    proptest! {
984        #![proptest_config(ProptestConfig::with_cases(100))]
985
986        #[test]
987        fn github_annotation_generation(receipt in compare_receipt_strategy()) {
988            let annotations = github_annotations(&receipt);
989
990            // Count expected annotations by status
991            let expected_fail_count = receipt.deltas.values()
992                .filter(|d| d.status == MetricStatus::Fail)
993                .count();
994            let expected_warn_count = receipt.deltas.values()
995                .filter(|d| d.status == MetricStatus::Warn)
996                .count();
997            let expected_pass_count = receipt.deltas.values()
998                .filter(|d| d.status == MetricStatus::Pass)
999                .count();
1000
1001            // Count actual annotations by type
1002            let actual_error_count = annotations.iter()
1003                .filter(|a| a.starts_with("::error::"))
1004                .count();
1005            let actual_warning_count = annotations.iter()
1006                .filter(|a| a.starts_with("::warning::"))
1007                .count();
1008
1009            // Requirement 8.2: Fail status produces exactly one ::error:: annotation
1010            prop_assert_eq!(
1011                actual_error_count,
1012                expected_fail_count,
1013                "Expected {} ::error:: annotations for {} Fail metrics, got {}. Annotations: {:?}",
1014                expected_fail_count,
1015                expected_fail_count,
1016                actual_error_count,
1017                annotations
1018            );
1019
1020            // Requirement 8.3: Warn status produces exactly one ::warning:: annotation
1021            prop_assert_eq!(
1022                actual_warning_count,
1023                expected_warn_count,
1024                "Expected {} ::warning:: annotations for {} Warn metrics, got {}. Annotations: {:?}",
1025                expected_warn_count,
1026                expected_warn_count,
1027                actual_warning_count,
1028                annotations
1029            );
1030
1031            // Requirement 8.4: Pass status produces no annotations
1032            // Total annotations should equal fail + warn count (no pass annotations)
1033            let total_annotations = annotations.len();
1034            let expected_total = expected_fail_count + expected_warn_count;
1035            prop_assert_eq!(
1036                total_annotations,
1037                expected_total,
1038                "Expected {} total annotations (fail: {}, warn: {}, pass: {} should produce none), got {}. Annotations: {:?}",
1039                expected_total,
1040                expected_fail_count,
1041                expected_warn_count,
1042                expected_pass_count,
1043                total_annotations,
1044                annotations
1045            );
1046
1047            // Requirement 8.5: Each annotation contains bench name, metric name, and delta percentage
1048            for (metric, delta) in &receipt.deltas {
1049                if delta.status == MetricStatus::Pass {
1050                    continue; // Pass metrics don't produce annotations
1051                }
1052
1053                let metric_name = metric.as_str();
1054
1055                // Find the annotation for this metric
1056                let matching_annotation = annotations.iter().find(|a| a.contains(metric_name));
1057
1058                prop_assert!(
1059                    matching_annotation.is_some(),
1060                    "Expected annotation for metric '{}' with status {:?}. Annotations: {:?}",
1061                    metric_name,
1062                    delta.status,
1063                    annotations
1064                );
1065
1066                let annotation = matching_annotation.unwrap();
1067
1068                // Verify annotation contains bench name
1069                prop_assert!(
1070                    annotation.contains(&receipt.bench.name),
1071                    "Annotation should contain bench name '{}'. Got: {}",
1072                    receipt.bench.name,
1073                    annotation
1074                );
1075
1076                // Verify annotation contains metric name
1077                prop_assert!(
1078                    annotation.contains(metric_name),
1079                    "Annotation should contain metric name '{}'. Got: {}",
1080                    metric_name,
1081                    annotation
1082                );
1083
1084                // Verify annotation contains delta percentage (formatted as +X.XX% or -X.XX%)
1085                // The format_pct function produces strings like "+10.00%" or "-5.50%"
1086                let pct_str = format_pct(delta.pct);
1087                prop_assert!(
1088                    annotation.contains(&pct_str),
1089                    "Annotation should contain delta percentage '{}'. Got: {}",
1090                    pct_str,
1091                    annotation
1092                );
1093
1094                // Verify correct annotation type based on status
1095                match delta.status {
1096                    MetricStatus::Fail => {
1097                        prop_assert!(
1098                            annotation.starts_with("::error::"),
1099                            "Fail metric should produce ::error:: annotation. Got: {}",
1100                            annotation
1101                        );
1102                    }
1103                    MetricStatus::Warn => {
1104                        prop_assert!(
1105                            annotation.starts_with("::warning::"),
1106                            "Warn metric should produce ::warning:: annotation. Got: {}",
1107                            annotation
1108                        );
1109                    }
1110                    MetricStatus::Pass => unreachable!(),
1111                }
1112            }
1113        }
1114    }
1115}