Skip to main content

perfgate_app/
lib.rs

1//! Application layer for perfgate.
2//!
3//! The app layer coordinates adapters and domain logic.
4//! It does not parse CLI flags and it does not do filesystem I/O.
5
6mod aggregate;
7pub mod baseline_resolve;
8pub mod bisect;
9pub mod blame;
10mod check;
11pub mod comparison_logic;
12mod explain;
13mod paired;
14mod promote;
15mod report;
16mod sensor_report;
17
18pub use aggregate::{AggregateOutcome, AggregateRequest, AggregateUseCase};
19pub use bisect::{BisectRequest, BisectUseCase};
20pub use blame::{BlameOutcome, BlameRequest, BlameUseCase};
21pub use check::{CheckOutcome, CheckRequest, CheckUseCase};
22pub use explain::{ExplainOutcome, ExplainRequest, ExplainUseCase};
23pub use paired::{PairedRunOutcome, PairedRunRequest, PairedRunUseCase};
24pub use promote::{PromoteRequest, PromoteResult, PromoteUseCase};
25pub use report::{ReportRequest, ReportResult, ReportUseCase};
26pub use sensor_report::{
27    BenchOutcome, SensorCheckOptions, SensorReportBuilder, classify_error,
28    default_engine_capability, run_sensor_check, sensor_fingerprint,
29};
30
31// Re-export rendering functions from perfgate-render for backward compatibility
32pub use perfgate_render::{
33    direction_str, format_metric, format_metric_with_statistic, format_pct, format_value,
34    github_annotations, markdown_template_context, metric_status_icon, metric_status_str,
35    parse_reason_token, render_markdown, render_markdown_template, render_reason_line,
36};
37
38// Re-export export functionality from perfgate-export for backward compatibility
39pub use perfgate_export::{CompareExportRow, ExportFormat, ExportUseCase, RunExportRow};
40
41use perfgate_adapters::{CommandSpec, HostProbe, HostProbeOptions, ProcessRunner, RunResult};
42use perfgate_domain::{
43    Comparison, SignificancePolicy, compare_runs, compute_stats, detect_host_mismatch,
44};
45use perfgate_types::{
46    BenchMeta, Budget, CompareReceipt, CompareRef, HostMismatchInfo, HostMismatchPolicy, Metric,
47    MetricStatistic, RunMeta, RunReceipt, Sample, ToolInfo,
48};
49use std::collections::BTreeMap;
50use std::path::PathBuf;
51use std::time::Duration;
52
53pub trait Clock: Send + Sync {
54    fn now_rfc3339(&self) -> String;
55}
56
57#[derive(Debug, Default, Clone)]
58pub struct SystemClock;
59
60impl Clock for SystemClock {
61    fn now_rfc3339(&self) -> String {
62        use time::format_description::well_known::Rfc3339;
63        time::OffsetDateTime::now_utc()
64            .format(&Rfc3339)
65            .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string())
66    }
67}
68
69#[derive(Debug, Clone, Default)]
70pub struct RunBenchRequest {
71    pub name: String,
72    pub cwd: Option<PathBuf>,
73    pub command: Vec<String>,
74    pub repeat: u32,
75    pub warmup: u32,
76    pub work_units: Option<u64>,
77    pub timeout: Option<Duration>,
78    pub env: Vec<(String, String)>,
79    pub output_cap_bytes: usize,
80
81    /// If true, do not treat nonzero exit codes as a tool error.
82    /// The receipt will still record exit codes.
83    pub allow_nonzero: bool,
84
85    /// If true, include a hashed hostname in the host fingerprint.
86    /// This is opt-in for privacy reasons.
87    pub include_hostname_hash: bool,
88}
89
90#[derive(Debug, Clone)]
91pub struct RunBenchOutcome {
92    pub receipt: RunReceipt,
93
94    /// True if any measured (non-warmup) sample timed out or returned nonzero.
95    pub failed: bool,
96
97    /// Human-readable reasons (for CI logs).
98    pub reasons: Vec<String>,
99}
100
101pub struct RunBenchUseCase<R: ProcessRunner, H: HostProbe, C: Clock> {
102    runner: R,
103    host_probe: H,
104    clock: C,
105    tool: ToolInfo,
106}
107
108impl<R: ProcessRunner, H: HostProbe, C: Clock> RunBenchUseCase<R, H, C> {
109    pub fn new(runner: R, host_probe: H, clock: C, tool: ToolInfo) -> Self {
110        Self {
111            runner,
112            host_probe,
113            clock,
114            tool,
115        }
116    }
117
118    pub fn execute(&self, req: RunBenchRequest) -> anyhow::Result<RunBenchOutcome> {
119        let run_id = uuid::Uuid::new_v4().to_string();
120        let started_at = self.clock.now_rfc3339();
121
122        let host_options = HostProbeOptions {
123            include_hostname_hash: req.include_hostname_hash,
124        };
125        let host = self.host_probe.probe(&host_options);
126
127        let bench = BenchMeta {
128            name: req.name.clone(),
129            cwd: req.cwd.as_ref().map(|p| p.to_string_lossy().to_string()),
130            command: req.command.clone(),
131            repeat: req.repeat,
132            warmup: req.warmup,
133            work_units: req.work_units,
134            timeout_ms: req.timeout.map(|d| d.as_millis() as u64),
135        };
136
137        let mut samples: Vec<Sample> = Vec::new();
138        let mut reasons: Vec<String> = Vec::new();
139
140        let total = req.warmup + req.repeat;
141
142        for i in 0..total {
143            let is_warmup = i < req.warmup;
144
145            let spec = CommandSpec {
146                name: req.name.clone(),
147                argv: req.command.clone(),
148                cwd: req.cwd.clone(),
149                env: req.env.clone(),
150                timeout: req.timeout,
151                output_cap_bytes: req.output_cap_bytes,
152            };
153
154            let run = self.runner.run(&spec).map_err(|e| match e {
155                perfgate_adapters::AdapterError::RunCommand { command, reason } => {
156                    anyhow::anyhow!("failed to run iteration {}: {}: {}", i + 1, command, reason)
157                }
158                _ => anyhow::anyhow!("failed to run iteration {}: {}", i + 1, e),
159            })?;
160
161            let s = sample_from_run(run, is_warmup);
162            if !is_warmup {
163                if s.timed_out {
164                    reasons.push(format!("iteration {} timed out", i + 1));
165                }
166                if s.exit_code != 0 {
167                    reasons.push(format!("iteration {} exit code {}", i + 1, s.exit_code));
168                }
169            }
170
171            samples.push(s);
172        }
173
174        let stats = compute_stats(&samples, req.work_units)?;
175
176        let ended_at = self.clock.now_rfc3339();
177
178        let receipt = RunReceipt {
179            schema: perfgate_types::RUN_SCHEMA_V1.to_string(),
180            tool: self.tool.clone(),
181            run: RunMeta {
182                id: run_id,
183                started_at,
184                ended_at,
185                host,
186            },
187            bench,
188            samples,
189            stats,
190        };
191
192        let failed = !reasons.is_empty();
193
194        if failed && !req.allow_nonzero {
195            // It's still a successful run from a *tooling* perspective, but callers may want a hard failure.
196            // We return the receipt either way; the CLI decides exit codes.
197        }
198
199        Ok(RunBenchOutcome {
200            receipt,
201            failed,
202            reasons,
203        })
204    }
205}
206
207fn sample_from_run(run: RunResult, warmup: bool) -> Sample {
208    Sample {
209        wall_ms: run.wall_ms,
210        exit_code: run.exit_code,
211        warmup,
212        timed_out: run.timed_out,
213        cpu_ms: run.cpu_ms,
214        page_faults: run.page_faults,
215        ctx_switches: run.ctx_switches,
216        max_rss_kb: run.max_rss_kb,
217        io_read_bytes: run.io_read_bytes,
218        io_write_bytes: run.io_write_bytes,
219        network_packets: run.network_packets,
220        energy_uj: run.energy_uj,
221        binary_bytes: run.binary_bytes,
222        stdout: if run.stdout.is_empty() {
223            None
224        } else {
225            Some(String::from_utf8_lossy(&run.stdout).to_string())
226        },
227        stderr: if run.stderr.is_empty() {
228            None
229        } else {
230            Some(String::from_utf8_lossy(&run.stderr).to_string())
231        },
232    }
233}
234
235#[derive(Debug, Clone)]
236pub struct CompareRequest {
237    pub baseline: RunReceipt,
238    pub current: RunReceipt,
239    pub budgets: BTreeMap<Metric, Budget>,
240    pub metric_statistics: BTreeMap<Metric, MetricStatistic>,
241    pub significance: Option<SignificancePolicy>,
242    pub baseline_ref: CompareRef,
243    pub current_ref: CompareRef,
244    pub tool: ToolInfo,
245    /// Policy for handling host mismatches.
246    #[allow(dead_code)]
247    pub host_mismatch_policy: HostMismatchPolicy,
248}
249
250/// Result from CompareUseCase including host mismatch information.
251#[derive(Debug, Clone)]
252pub struct CompareResult {
253    pub receipt: CompareReceipt,
254    /// Host mismatch info if detected (only populated when policy is not Ignore).
255    pub host_mismatch: Option<HostMismatchInfo>,
256}
257
258pub struct CompareUseCase;
259
260impl CompareUseCase {
261    pub fn execute(req: CompareRequest) -> anyhow::Result<CompareResult> {
262        // Check for host mismatch
263        let host_mismatch = if req.host_mismatch_policy != HostMismatchPolicy::Ignore {
264            detect_host_mismatch(&req.baseline.run.host, &req.current.run.host)
265        } else {
266            None
267        };
268
269        // If policy is Error and there's a mismatch, fail immediately
270        if req.host_mismatch_policy == HostMismatchPolicy::Error
271            && let Some(mismatch) = &host_mismatch
272        {
273            anyhow::bail!(
274                "host mismatch detected (--host-mismatch=error): {}",
275                mismatch.reasons.join("; ")
276            );
277        }
278
279        let Comparison { deltas, verdict } = compare_runs(
280            &req.baseline,
281            &req.current,
282            &req.budgets,
283            &req.metric_statistics,
284            req.significance,
285        )?;
286
287        let receipt = CompareReceipt {
288            schema: perfgate_types::COMPARE_SCHEMA_V1.to_string(),
289            tool: req.tool,
290            bench: req.current.bench,
291            baseline_ref: req.baseline_ref,
292            current_ref: req.current_ref,
293            budgets: req.budgets,
294            deltas,
295            verdict,
296        };
297
298        Ok(CompareResult {
299            receipt,
300            host_mismatch,
301        })
302    }
303}
304
305#[cfg(test)]
306mod tests {
307    use super::*;
308    use perfgate_types::{
309        Delta, Direction, HostInfo, MetricStatistic, MetricStatus, RUN_SCHEMA_V1, RunMeta,
310        RunReceipt, Stats, U64Summary, Verdict, VerdictCounts, VerdictStatus,
311    };
312    use std::collections::BTreeMap;
313
314    fn make_compare_receipt(status: MetricStatus) -> CompareReceipt {
315        let mut budgets = BTreeMap::new();
316        budgets.insert(
317            Metric::WallMs,
318            Budget {
319                threshold: 0.2,
320                warn_threshold: 0.1,
321                noise_threshold: None,
322                noise_policy: perfgate_types::NoisePolicy::Ignore,
323                direction: Direction::Lower,
324            },
325        );
326
327        let mut deltas = BTreeMap::new();
328        deltas.insert(
329            Metric::WallMs,
330            Delta {
331                baseline: 100.0,
332                current: 115.0,
333                ratio: 1.15,
334                pct: 0.15,
335                regression: 0.15,
336                cv: None,
337                noise_threshold: None,
338                statistic: MetricStatistic::Median,
339                significance: None,
340                status,
341            },
342        );
343
344        CompareReceipt {
345            schema: perfgate_types::COMPARE_SCHEMA_V1.to_string(),
346            tool: ToolInfo {
347                name: "perfgate".into(),
348                version: "0.1.0".into(),
349            },
350            bench: BenchMeta {
351                name: "bench".into(),
352                cwd: None,
353                command: vec!["true".into()],
354                repeat: 1,
355                warmup: 0,
356                work_units: None,
357                timeout_ms: None,
358            },
359            baseline_ref: CompareRef {
360                path: None,
361                run_id: None,
362            },
363            current_ref: CompareRef {
364                path: None,
365                run_id: None,
366            },
367            budgets,
368            deltas,
369            verdict: Verdict {
370                status: VerdictStatus::Warn,
371                counts: VerdictCounts {
372                    pass: if status == MetricStatus::Pass { 1 } else { 0 },
373                    warn: if status == MetricStatus::Warn { 1 } else { 0 },
374                    fail: if status == MetricStatus::Fail { 1 } else { 0 },
375                    skip: if status == MetricStatus::Skip { 1 } else { 0 },
376                },
377                reasons: vec!["wall_ms_warn".to_string()],
378            },
379        }
380    }
381
382    fn make_run_receipt_with_host(host: HostInfo, wall_ms: u64) -> RunReceipt {
383        RunReceipt {
384            schema: RUN_SCHEMA_V1.to_string(),
385            tool: ToolInfo {
386                name: "perfgate".to_string(),
387                version: "0.1.0".to_string(),
388            },
389            run: RunMeta {
390                id: "run-id".to_string(),
391                started_at: "2024-01-01T00:00:00Z".to_string(),
392                ended_at: "2024-01-01T00:00:01Z".to_string(),
393                host,
394            },
395            bench: BenchMeta {
396                name: "bench".to_string(),
397                cwd: None,
398                command: vec!["true".to_string()],
399                repeat: 1,
400                warmup: 0,
401                work_units: None,
402                timeout_ms: None,
403            },
404            samples: Vec::new(),
405            stats: Stats {
406                wall_ms: U64Summary::new(wall_ms, wall_ms, wall_ms),
407                cpu_ms: None,
408                page_faults: None,
409                ctx_switches: None,
410                max_rss_kb: None,
411                io_read_bytes: None,
412                io_write_bytes: None,
413                network_packets: None,
414                energy_uj: None,
415                binary_bytes: None,
416                throughput_per_s: None,
417            },
418        }
419    }
420
421    #[test]
422    fn markdown_renders_table() {
423        let mut budgets = BTreeMap::new();
424        budgets.insert(
425            Metric::WallMs,
426            Budget {
427                threshold: 0.2,
428                warn_threshold: 0.18,
429                noise_threshold: None,
430                noise_policy: perfgate_types::NoisePolicy::Ignore,
431                direction: Direction::Lower,
432            },
433        );
434
435        let mut deltas = BTreeMap::new();
436        deltas.insert(
437            Metric::WallMs,
438            Delta {
439                baseline: 1000.0,
440                current: 1100.0,
441                ratio: 1.1,
442                pct: 0.1,
443                regression: 0.1,
444                cv: None,
445                noise_threshold: None,
446                statistic: MetricStatistic::Median,
447                significance: None,
448                status: MetricStatus::Pass,
449            },
450        );
451
452        let compare = CompareReceipt {
453            schema: perfgate_types::COMPARE_SCHEMA_V1.to_string(),
454            tool: ToolInfo {
455                name: "perfgate".into(),
456                version: "0.1.0".into(),
457            },
458            bench: BenchMeta {
459                name: "demo".into(),
460                cwd: None,
461                command: vec!["true".into()],
462                repeat: 1,
463                warmup: 0,
464                work_units: None,
465                timeout_ms: None,
466            },
467            baseline_ref: CompareRef {
468                path: None,
469                run_id: None,
470            },
471            current_ref: CompareRef {
472                path: None,
473                run_id: None,
474            },
475            budgets,
476            deltas,
477            verdict: Verdict {
478                status: VerdictStatus::Pass,
479                counts: VerdictCounts {
480                    pass: 1,
481                    warn: 0,
482                    fail: 0,
483                    skip: 0,
484                },
485                reasons: vec![],
486            },
487        };
488
489        let md = render_markdown(&compare);
490        assert!(md.contains("| metric | baseline"));
491        assert!(md.contains("wall_ms"));
492    }
493
494    #[test]
495    fn markdown_template_renders_context_rows() {
496        let compare = make_compare_receipt(MetricStatus::Warn);
497        let template = "{{header}}\nbench={{bench.name}}\n{{#each rows}}metric={{metric}} status={{status}}\n{{/each}}";
498
499        let rendered = render_markdown_template(&compare, template).expect("render template");
500        assert!(rendered.contains("bench=bench"));
501        assert!(rendered.contains("metric=wall_ms"));
502        assert!(rendered.contains("status=warn"));
503    }
504
505    #[test]
506    fn markdown_template_strict_mode_rejects_unknown_fields() {
507        let compare = make_compare_receipt(MetricStatus::Warn);
508        let err = render_markdown_template(&compare, "{{does_not_exist}}").unwrap_err();
509        assert!(
510            err.to_string().contains("render markdown template"),
511            "unexpected error: {}",
512            err
513        );
514    }
515
516    #[test]
517    fn parse_reason_token_handles_valid_and_invalid() {
518        let parsed = parse_reason_token("wall_ms_warn");
519        assert!(parsed.is_some());
520        let (metric, status) = parsed.unwrap();
521        assert_eq!(metric, Metric::WallMs);
522        assert_eq!(status, MetricStatus::Warn);
523
524        assert!(parse_reason_token("wall_ms_pass").is_none());
525        assert!(parse_reason_token("unknown_warn").is_none());
526    }
527
528    #[test]
529    fn render_reason_line_formats_thresholds() {
530        let compare = make_compare_receipt(MetricStatus::Warn);
531        let line = render_reason_line(&compare, "wall_ms_warn");
532        assert!(line.contains("warn >="));
533        assert!(line.contains("fail >"));
534        assert!(line.contains("+15.00%"));
535    }
536
537    #[test]
538    fn render_reason_line_falls_back_when_missing_budget() {
539        let mut compare = make_compare_receipt(MetricStatus::Warn);
540        compare.budgets.clear();
541        let line = render_reason_line(&compare, "wall_ms_warn");
542        assert_eq!(line, "- wall_ms_warn\n");
543    }
544
545    #[test]
546    fn format_value_and_pct_render_expected_strings() {
547        assert_eq!(format_value(Metric::ThroughputPerS, 1.23456), "1.235");
548        assert_eq!(format_value(Metric::WallMs, 123.0), "123");
549        assert_eq!(format_pct(0.1), "+10.00%");
550        assert_eq!(format_pct(-0.1), "-10.00%");
551        assert_eq!(format_pct(0.0), "0.00%");
552    }
553
554    #[test]
555    fn github_annotations_only_warn_and_fail() {
556        let mut compare = make_compare_receipt(MetricStatus::Warn);
557        compare.deltas.insert(
558            Metric::MaxRssKb,
559            Delta {
560                baseline: 100.0,
561                current: 150.0,
562                ratio: 1.5,
563                pct: 0.5,
564                regression: 0.5,
565                cv: None,
566                noise_threshold: None,
567                statistic: MetricStatistic::Median,
568                significance: None,
569                status: MetricStatus::Fail,
570            },
571        );
572        compare.deltas.insert(
573            Metric::ThroughputPerS,
574            Delta {
575                baseline: 100.0,
576                current: 90.0,
577                ratio: 0.9,
578                pct: -0.1,
579                regression: 0.0,
580                cv: None,
581                noise_threshold: None,
582                statistic: MetricStatistic::Median,
583                significance: None,
584                status: MetricStatus::Pass,
585            },
586        );
587
588        let lines = github_annotations(&compare);
589        assert_eq!(lines.len(), 2);
590        assert!(lines.iter().any(|l| l.starts_with("::warning::")));
591        assert!(lines.iter().any(|l| l.starts_with("::error::")));
592        assert!(lines.iter().all(|l| !l.contains("throughput_per_s")));
593    }
594
595    #[test]
596    fn sample_from_run_sets_optional_stdout_stderr() {
597        let run = RunResult {
598            wall_ms: 100,
599            exit_code: 0,
600            timed_out: false,
601            cpu_ms: None,
602            page_faults: None,
603            ctx_switches: None,
604            max_rss_kb: None,
605            io_read_bytes: None,
606            io_write_bytes: None,
607            network_packets: None,
608            energy_uj: None,
609            binary_bytes: None,
610            stdout: b"ok".to_vec(),
611            stderr: vec![],
612        };
613
614        let sample = sample_from_run(run, false);
615        assert_eq!(sample.stdout.as_deref(), Some("ok"));
616        assert!(sample.stderr.is_none());
617    }
618
619    #[test]
620    fn compare_use_case_host_mismatch_policies() {
621        let baseline = make_run_receipt_with_host(
622            HostInfo {
623                os: "linux".to_string(),
624                arch: "x86_64".to_string(),
625                cpu_count: None,
626                memory_bytes: None,
627                hostname_hash: None,
628            },
629            100,
630        );
631        let current = make_run_receipt_with_host(
632            HostInfo {
633                os: "windows".to_string(),
634                arch: "x86_64".to_string(),
635                cpu_count: None,
636                memory_bytes: None,
637                hostname_hash: None,
638            },
639            100,
640        );
641
642        let mut budgets = BTreeMap::new();
643        budgets.insert(
644            Metric::WallMs,
645            Budget {
646                threshold: 0.2,
647                warn_threshold: 0.1,
648                noise_threshold: None,
649                noise_policy: perfgate_types::NoisePolicy::Ignore,
650                direction: Direction::Lower,
651            },
652        );
653
654        let err = CompareUseCase::execute(CompareRequest {
655            baseline: baseline.clone(),
656            current: current.clone(),
657            budgets: budgets.clone(),
658            metric_statistics: BTreeMap::new(),
659            significance: None,
660            baseline_ref: CompareRef {
661                path: None,
662                run_id: None,
663            },
664            current_ref: CompareRef {
665                path: None,
666                run_id: None,
667            },
668            tool: ToolInfo {
669                name: "perfgate".to_string(),
670                version: "0.1.0".to_string(),
671            },
672            host_mismatch_policy: HostMismatchPolicy::Error,
673        })
674        .unwrap_err();
675        assert!(err.to_string().contains("host mismatch"));
676
677        let matching = CompareUseCase::execute(CompareRequest {
678            baseline: baseline.clone(),
679            current: baseline.clone(),
680            budgets: budgets.clone(),
681            metric_statistics: BTreeMap::new(),
682            significance: None,
683            baseline_ref: CompareRef {
684                path: None,
685                run_id: None,
686            },
687            current_ref: CompareRef {
688                path: None,
689                run_id: None,
690            },
691            tool: ToolInfo {
692                name: "perfgate".to_string(),
693                version: "0.1.0".to_string(),
694            },
695            host_mismatch_policy: HostMismatchPolicy::Error,
696        })
697        .expect("matching hosts should not error");
698        assert!(matching.host_mismatch.is_none());
699
700        let ignore = CompareUseCase::execute(CompareRequest {
701            baseline,
702            current,
703            budgets,
704            metric_statistics: BTreeMap::new(),
705            significance: None,
706            baseline_ref: CompareRef {
707                path: None,
708                run_id: None,
709            },
710            current_ref: CompareRef {
711                path: None,
712                run_id: None,
713            },
714            tool: ToolInfo {
715                name: "perfgate".to_string(),
716                version: "0.1.0".to_string(),
717            },
718            host_mismatch_policy: HostMismatchPolicy::Ignore,
719        })
720        .expect("ignore mismatch should succeed");
721
722        assert!(ignore.host_mismatch.is_none());
723    }
724}
725
726#[cfg(test)]
727mod property_tests {
728    use super::*;
729    use perfgate_types::{
730        Delta, Direction, MetricStatistic, MetricStatus, Verdict, VerdictCounts, VerdictStatus,
731    };
732    use proptest::prelude::*;
733
734    // --- Strategies for generating CompareReceipt ---
735
736    // Strategy for generating valid non-empty strings (for names, IDs, etc.)
737    fn non_empty_string() -> impl Strategy<Value = String> {
738        "[a-zA-Z0-9_-]{1,20}".prop_map(|s| s)
739    }
740
741    // Strategy for ToolInfo
742    fn tool_info_strategy() -> impl Strategy<Value = ToolInfo> {
743        (non_empty_string(), non_empty_string())
744            .prop_map(|(name, version)| ToolInfo { name, version })
745    }
746
747    // Strategy for BenchMeta
748    fn bench_meta_strategy() -> impl Strategy<Value = BenchMeta> {
749        (
750            non_empty_string(),
751            proptest::option::of(non_empty_string()),
752            proptest::collection::vec(non_empty_string(), 1..5),
753            1u32..100,
754            0u32..10,
755            proptest::option::of(1u64..10000),
756            proptest::option::of(100u64..60000),
757        )
758            .prop_map(
759                |(name, cwd, command, repeat, warmup, work_units, timeout_ms)| BenchMeta {
760                    name,
761                    cwd,
762                    command,
763                    repeat,
764                    warmup,
765                    work_units,
766                    timeout_ms,
767                },
768            )
769    }
770
771    // Strategy for CompareRef
772    fn compare_ref_strategy() -> impl Strategy<Value = CompareRef> {
773        (
774            proptest::option::of(non_empty_string()),
775            proptest::option::of(non_empty_string()),
776        )
777            .prop_map(|(path, run_id)| CompareRef { path, run_id })
778    }
779
780    // Strategy for Direction
781    fn direction_strategy() -> impl Strategy<Value = Direction> {
782        prop_oneof![Just(Direction::Lower), Just(Direction::Higher),]
783    }
784
785    // Strategy for Budget - using finite positive floats for thresholds
786    fn budget_strategy() -> impl Strategy<Value = Budget> {
787        (0.01f64..1.0, 0.01f64..1.0, direction_strategy()).prop_map(
788            |(threshold, warn_factor, direction)| {
789                // warn_threshold should be <= threshold
790                let warn_threshold = threshold * warn_factor;
791                Budget {
792                    noise_threshold: None,
793                    noise_policy: perfgate_types::NoisePolicy::Ignore,
794                    threshold,
795                    warn_threshold,
796                    direction,
797                }
798            },
799        )
800    }
801
802    // Strategy for MetricStatus
803    fn metric_status_strategy() -> impl Strategy<Value = MetricStatus> {
804        prop_oneof![
805            Just(MetricStatus::Pass),
806            Just(MetricStatus::Warn),
807            Just(MetricStatus::Fail),
808            Just(MetricStatus::Skip),
809        ]
810    }
811
812    // Strategy for Delta - using finite positive floats
813    fn delta_strategy() -> impl Strategy<Value = Delta> {
814        (
815            0.1f64..10000.0, // baseline (positive, non-zero)
816            0.1f64..10000.0, // current (positive, non-zero)
817            metric_status_strategy(),
818        )
819            .prop_map(|(baseline, current, status)| {
820                let ratio = current / baseline;
821                let pct = (current - baseline) / baseline;
822                let regression = if pct > 0.0 { pct } else { 0.0 };
823                Delta {
824                    baseline,
825                    current,
826                    ratio,
827                    pct,
828                    regression,
829                    cv: None,
830                    noise_threshold: None,
831                    statistic: MetricStatistic::Median,
832                    significance: None,
833                    status,
834                }
835            })
836    }
837
838    // Strategy for VerdictStatus
839    fn verdict_status_strategy() -> impl Strategy<Value = VerdictStatus> {
840        prop_oneof![
841            Just(VerdictStatus::Pass),
842            Just(VerdictStatus::Warn),
843            Just(VerdictStatus::Fail),
844            Just(VerdictStatus::Skip),
845        ]
846    }
847
848    // Strategy for VerdictCounts
849    fn verdict_counts_strategy() -> impl Strategy<Value = VerdictCounts> {
850        (0u32..10, 0u32..10, 0u32..10, 0u32..10).prop_map(|(pass, warn, fail, skip)| {
851            VerdictCounts {
852                pass,
853                warn,
854                fail,
855                skip,
856            }
857        })
858    }
859
860    // Strategy for Verdict with reasons
861    fn verdict_strategy() -> impl Strategy<Value = Verdict> {
862        (
863            verdict_status_strategy(),
864            verdict_counts_strategy(),
865            proptest::collection::vec("[a-zA-Z0-9 ]{1,50}", 0..5),
866        )
867            .prop_map(|(status, counts, reasons)| Verdict {
868                status,
869                counts,
870                reasons,
871            })
872    }
873
874    // Strategy for Metric
875    fn metric_strategy() -> impl Strategy<Value = Metric> {
876        prop_oneof![
877            Just(Metric::BinaryBytes),
878            Just(Metric::CpuMs),
879            Just(Metric::CtxSwitches),
880            Just(Metric::IoReadBytes),
881            Just(Metric::IoWriteBytes),
882            Just(Metric::WallMs),
883            Just(Metric::MaxRssKb),
884            Just(Metric::NetworkPackets),
885            Just(Metric::PageFaults),
886            Just(Metric::ThroughputPerS),
887        ]
888    }
889
890    // Strategy for BTreeMap<Metric, Budget>
891    fn budgets_map_strategy() -> impl Strategy<Value = BTreeMap<Metric, Budget>> {
892        proptest::collection::btree_map(metric_strategy(), budget_strategy(), 0..8)
893    }
894
895    // Strategy for BTreeMap<Metric, Delta>
896    fn deltas_map_strategy() -> impl Strategy<Value = BTreeMap<Metric, Delta>> {
897        proptest::collection::btree_map(metric_strategy(), delta_strategy(), 0..8)
898    }
899
900    // Strategy for CompareReceipt
901    fn compare_receipt_strategy() -> impl Strategy<Value = CompareReceipt> {
902        (
903            tool_info_strategy(),
904            bench_meta_strategy(),
905            compare_ref_strategy(),
906            compare_ref_strategy(),
907            budgets_map_strategy(),
908            deltas_map_strategy(),
909            verdict_strategy(),
910        )
911            .prop_map(
912                |(tool, bench, baseline_ref, current_ref, budgets, deltas, verdict)| {
913                    CompareReceipt {
914                        schema: perfgate_types::COMPARE_SCHEMA_V1.to_string(),
915                        tool,
916                        bench,
917                        baseline_ref,
918                        current_ref,
919                        budgets,
920                        deltas,
921                        verdict,
922                    }
923                },
924            )
925    }
926
927    // **Property 6: Markdown Rendering Completeness**
928    //
929    // For any valid CompareReceipt, the rendered Markdown SHALL contain:
930    // - A header with the correct verdict emoji (✅ for Pass, ⚠️ for Warn, ❌ for Fail)
931    // - The benchmark name
932    // - A table row for each metric in deltas
933    // - All verdict reasons (if any)
934    //
935    // **Validates: Requirements 7.2, 7.3, 7.4, 7.5**
936    proptest! {
937        #![proptest_config(ProptestConfig::with_cases(100))]
938
939        #[test]
940        fn markdown_rendering_completeness(receipt in compare_receipt_strategy()) {
941            let md = render_markdown(&receipt);
942
943            // Verify header contains correct verdict emoji (Requirement 7.2)
944            let expected_emoji = match receipt.verdict.status {
945                VerdictStatus::Pass => "✅",
946                VerdictStatus::Warn => "⚠️",
947                VerdictStatus::Fail => "❌",
948                VerdictStatus::Skip => "⏭️",
949            };
950            prop_assert!(
951                md.contains(expected_emoji),
952                "Markdown should contain verdict emoji '{}' for status {:?}. Got:\n{}",
953                expected_emoji,
954                receipt.verdict.status,
955                md
956            );
957
958            // Verify header contains "perfgate" and verdict status word
959            let expected_status_word = match receipt.verdict.status {
960                VerdictStatus::Pass => "pass",
961                VerdictStatus::Warn => "warn",
962                VerdictStatus::Fail => "fail",
963                VerdictStatus::Skip => "skip",
964            };
965            prop_assert!(
966                md.contains(expected_status_word),
967                "Markdown should contain status word '{}'. Got:\n{}",
968                expected_status_word,
969                md
970            );
971
972            // Verify benchmark name is present (Requirement 7.3)
973            prop_assert!(
974                md.contains(&receipt.bench.name),
975                "Markdown should contain benchmark name '{}'. Got:\n{}",
976                receipt.bench.name,
977                md
978            );
979
980            // Verify table header is present (Requirement 7.4)
981            prop_assert!(
982                md.contains("| metric |"),
983                "Markdown should contain table header. Got:\n{}",
984                md
985            );
986
987            // Verify a table row exists for each metric in deltas (Requirement 7.4)
988            for metric in receipt.deltas.keys() {
989                let metric_name = metric.as_str();
990                prop_assert!(
991                    md.contains(metric_name),
992                    "Markdown should contain metric '{}'. Got:\n{}",
993                    metric_name,
994                    md
995                );
996            }
997
998            // Verify all verdict reasons are present (Requirement 7.5)
999            for reason in &receipt.verdict.reasons {
1000                prop_assert!(
1001                    md.contains(reason),
1002                    "Markdown should contain verdict reason '{}'. Got:\n{}",
1003                    reason,
1004                    md
1005                );
1006            }
1007
1008            // If there are reasons, verify the Notes section exists
1009            if !receipt.verdict.reasons.is_empty() {
1010                prop_assert!(
1011                    md.contains("**Notes:**"),
1012                    "Markdown should contain Notes section when there are reasons. Got:\n{}",
1013                    md
1014                );
1015            }
1016        }
1017    }
1018
1019    // **Property 7: GitHub Annotation Generation**
1020    //
1021    // For any valid CompareReceipt:
1022    // - Metrics with Fail status SHALL produce exactly one `::error::` annotation
1023    // - Metrics with Warn status SHALL produce exactly one `::warning::` annotation
1024    // - Metrics with Pass status SHALL produce no annotations
1025    // - Each annotation SHALL contain the bench name, metric name, and delta percentage
1026    //
1027    // **Validates: Requirements 8.2, 8.3, 8.4, 8.5**
1028    proptest! {
1029        #![proptest_config(ProptestConfig::with_cases(100))]
1030
1031        #[test]
1032        fn github_annotation_generation(receipt in compare_receipt_strategy()) {
1033            let annotations = github_annotations(&receipt);
1034
1035            // Count expected annotations by status
1036            let expected_fail_count = receipt.deltas.values()
1037                .filter(|d| d.status == MetricStatus::Fail)
1038                .count();
1039            let expected_warn_count = receipt.deltas.values()
1040                .filter(|d| d.status == MetricStatus::Warn)
1041                .count();
1042            let expected_pass_count = receipt.deltas.values()
1043                .filter(|d| d.status == MetricStatus::Pass)
1044                .count();
1045
1046            // Count actual annotations by type
1047            let actual_error_count = annotations.iter()
1048                .filter(|a| a.starts_with("::error::"))
1049                .count();
1050            let actual_warning_count = annotations.iter()
1051                .filter(|a| a.starts_with("::warning::"))
1052                .count();
1053
1054            // Requirement 8.2: Fail status produces exactly one ::error:: annotation
1055            prop_assert_eq!(
1056                actual_error_count,
1057                expected_fail_count,
1058                "Expected {} ::error:: annotations for {} Fail metrics, got {}. Annotations: {:?}",
1059                expected_fail_count,
1060                expected_fail_count,
1061                actual_error_count,
1062                annotations
1063            );
1064
1065            // Requirement 8.3: Warn status produces exactly one ::warning:: annotation
1066            prop_assert_eq!(
1067                actual_warning_count,
1068                expected_warn_count,
1069                "Expected {} ::warning:: annotations for {} Warn metrics, got {}. Annotations: {:?}",
1070                expected_warn_count,
1071                expected_warn_count,
1072                actual_warning_count,
1073                annotations
1074            );
1075
1076            // Requirement 8.4: Pass status produces no annotations
1077            // Total annotations should equal fail + warn count (no pass annotations)
1078            let total_annotations = annotations.len();
1079            let expected_total = expected_fail_count + expected_warn_count;
1080            prop_assert_eq!(
1081                total_annotations,
1082                expected_total,
1083                "Expected {} total annotations (fail: {}, warn: {}, pass: {} should produce none), got {}. Annotations: {:?}",
1084                expected_total,
1085                expected_fail_count,
1086                expected_warn_count,
1087                expected_pass_count,
1088                total_annotations,
1089                annotations
1090            );
1091
1092            // Requirement 8.5: Each annotation contains bench name, metric name, and delta percentage
1093            for (metric, delta) in &receipt.deltas {
1094                if delta.status == MetricStatus::Pass || delta.status == MetricStatus::Skip {
1095                    continue; // Pass metrics don't produce annotations
1096                }
1097
1098                let metric_name = metric.as_str();
1099
1100                // Find the annotation for this metric
1101                let matching_annotation = annotations.iter().find(|a| a.contains(metric_name));
1102
1103                prop_assert!(
1104                    matching_annotation.is_some(),
1105                    "Expected annotation for metric '{}' with status {:?}. Annotations: {:?}",
1106                    metric_name,
1107                    delta.status,
1108                    annotations
1109                );
1110
1111                let annotation = matching_annotation.unwrap();
1112
1113                // Verify annotation contains bench name
1114                prop_assert!(
1115                    annotation.contains(&receipt.bench.name),
1116                    "Annotation should contain bench name '{}'. Got: {}",
1117                    receipt.bench.name,
1118                    annotation
1119                );
1120
1121                // Verify annotation contains metric name
1122                prop_assert!(
1123                    annotation.contains(metric_name),
1124                    "Annotation should contain metric name '{}'. Got: {}",
1125                    metric_name,
1126                    annotation
1127                );
1128
1129                // Verify annotation contains delta percentage (formatted as +X.XX% or -X.XX%)
1130                // The format_pct function produces strings like "+10.00%" or "-5.50%"
1131                let pct_str = format_pct(delta.pct);
1132                prop_assert!(
1133                    annotation.contains(&pct_str),
1134                    "Annotation should contain delta percentage '{}'. Got: {}",
1135                    pct_str,
1136                    annotation
1137                );
1138
1139                // Verify correct annotation type based on status
1140                match delta.status {
1141                    MetricStatus::Fail => {
1142                        prop_assert!(
1143                            annotation.starts_with("::error::"),
1144                            "Fail metric should produce ::error:: annotation. Got: {}",
1145                            annotation
1146                        );
1147                    }
1148                    MetricStatus::Warn => {
1149                        prop_assert!(
1150                            annotation.starts_with("::warning::"),
1151                            "Warn metric should produce ::warning:: annotation. Got: {}",
1152                            annotation
1153                        );
1154                    }
1155                    MetricStatus::Pass | MetricStatus::Skip => unreachable!(),
1156                }
1157            }
1158        }
1159    }
1160}