1use perfgate_adapters::{CommandSpec, HostProbe, HostProbeOptions, ProcessRunner};
4use perfgate_domain::{compute_paired_cv, compute_paired_stats};
5use perfgate_types::{
6 NoiseDiagnostics, NoiseLevel, PAIRED_SCHEMA_V1, PairedBenchMeta, PairedRunReceipt,
7 PairedSample, PairedSampleHalf, RunMeta, SignificancePolicy, ToolInfo,
8};
9use std::path::PathBuf;
10use std::time::Duration;
11
12use crate::Clock;
13
14#[derive(Debug, Clone)]
15pub struct PairedRunRequest {
16 pub name: String,
17 pub cwd: Option<PathBuf>,
18 pub baseline_command: Vec<String>,
19 pub current_command: Vec<String>,
20 pub repeat: u32,
21 pub warmup: u32,
22 pub work_units: Option<u64>,
23 pub timeout: Option<Duration>,
24 pub env: Vec<(String, String)>,
25 pub output_cap_bytes: usize,
26 pub allow_nonzero: bool,
27 pub include_hostname_hash: bool,
28 pub significance_alpha: Option<f64>,
29 pub significance_min_samples: Option<u32>,
30 pub require_significance: bool,
31 pub max_retries: u32,
32 pub fail_on_regression: Option<f64>,
33 pub cv_threshold: Option<f64>,
37}
38
39#[derive(Debug, Clone)]
40pub struct PairedRunOutcome {
41 pub receipt: PairedRunReceipt,
42 pub failed: bool,
43 pub reasons: Vec<String>,
44}
45
46pub struct PairedRunUseCase<R: ProcessRunner, H: HostProbe, C: Clock> {
47 runner: R,
48 host_probe: H,
49 clock: C,
50 tool: ToolInfo,
51}
52
53impl<R: ProcessRunner, H: HostProbe, C: Clock> PairedRunUseCase<R, H, C> {
54 pub fn new(runner: R, host_probe: H, clock: C, tool: ToolInfo) -> Self {
55 Self {
56 runner,
57 host_probe,
58 clock,
59 tool,
60 }
61 }
62
63 pub fn execute(&self, req: PairedRunRequest) -> anyhow::Result<PairedRunOutcome> {
64 let run_id = uuid::Uuid::new_v4().to_string();
65 let started_at = self.clock.now_rfc3339();
66 let host = self.host_probe.probe(&HostProbeOptions {
67 include_hostname_hash: req.include_hostname_hash,
68 });
69
70 let mut bench = PairedBenchMeta {
71 name: req.name.clone(),
72 cwd: req.cwd.as_ref().map(|p| p.to_string_lossy().to_string()),
73 baseline_command: req.baseline_command.clone(),
74 current_command: req.current_command.clone(),
75 repeat: req.repeat,
76 warmup: req.warmup,
77 work_units: req.work_units,
78 timeout_ms: req.timeout.map(|d| d.as_millis() as u64),
79 };
80
81 let mut samples = Vec::new();
82 let mut reasons = Vec::new();
83
84 for i in 0..req.warmup {
86 self.run_pair(i, true, &req, &mut samples, &mut reasons)?;
87 }
88
89 let mut pairs_collected = 0;
91 for _ in 0..req.repeat {
92 self.run_pair(
93 req.warmup + pairs_collected,
94 false,
95 &req,
96 &mut samples,
97 &mut reasons,
98 )?;
99 pairs_collected += 1;
100 }
101
102 let significance_policy = SignificancePolicy {
103 alpha: req.significance_alpha,
104 min_samples: req.significance_min_samples,
105 };
106
107 let mut retries_done: u32 = 0;
109 let mut early_termination = false;
110 loop {
111 let stats = compute_paired_stats(&samples, req.work_units, Some(&significance_policy))?;
112 let significance_reached = stats
113 .wall_diff_ms
114 .significance
115 .as_ref()
116 .map(|s| s.significant)
117 .unwrap_or(true);
118
119 if !req.require_significance || significance_reached || retries_done >= req.max_retries
120 {
121 break;
122 }
123
124 if let Some(cv_thresh) = req.cv_threshold {
126 let cv = compute_paired_cv(&samples);
127 if cv > cv_thresh {
128 early_termination = true;
129 reasons.push(format!(
130 "early termination: CV {:.3} exceeds threshold {:.3}, benchmark too noisy for retries",
131 cv, cv_thresh
132 ));
133 break;
134 }
135 }
136
137 let extra_pairs = ((retries_done as f64 + 1.0) * 1.5).ceil() as u32;
140 retries_done += 1;
141
142 for _ in 0..extra_pairs {
143 self.run_pair(
144 req.warmup + pairs_collected,
145 false,
146 &req,
147 &mut samples,
148 &mut reasons,
149 )?;
150 pairs_collected += 1;
151 }
152 }
153
154 bench.repeat = pairs_collected;
156
157 let stats = compute_paired_stats(&samples, req.work_units, Some(&significance_policy))?;
158 let ended_at = self.clock.now_rfc3339();
159
160 let noise_diagnostics = if req.max_retries > 0 {
162 let cv = compute_paired_cv(&samples);
163 Some(NoiseDiagnostics {
164 cv,
165 noise_level: NoiseLevel::from_cv(cv),
166 retries_used: retries_done,
167 early_termination,
168 })
169 } else {
170 None
171 };
172
173 let receipt = PairedRunReceipt {
174 schema: PAIRED_SCHEMA_V1.to_string(),
175 tool: self.tool.clone(),
176 run: RunMeta {
177 id: run_id,
178 started_at,
179 ended_at,
180 host,
181 },
182 bench,
183 samples,
184 stats,
185 noise_diagnostics,
186 };
187
188 if let Some(threshold_pct) = req.fail_on_regression {
189 let comparison = perfgate_domain::compare_paired_stats(&receipt.stats);
190 let threshold_fraction = threshold_pct / 100.0;
191 if comparison.pct_change > threshold_fraction && comparison.is_significant {
192 reasons.push(format!(
193 "wall time regression ({:.2}%) exceeded threshold ({:.2}%)",
194 comparison.pct_change * 100.0,
195 threshold_pct
196 ));
197 }
198 }
199
200 let failed = !reasons.is_empty();
201 Ok(PairedRunOutcome {
202 receipt,
203 failed,
204 reasons,
205 })
206 }
207
208 fn run_pair(
209 &self,
210 pair_index: u32,
211 is_warmup: bool,
212 req: &PairedRunRequest,
213 samples: &mut Vec<PairedSample>,
214 reasons: &mut Vec<String>,
215 ) -> anyhow::Result<()> {
216 let baseline_spec = CommandSpec {
217 name: format!("{}-baseline", req.name),
218 argv: req.baseline_command.clone(),
219 cwd: req.cwd.clone(),
220 env: req.env.clone(),
221 timeout: req.timeout,
222 output_cap_bytes: req.output_cap_bytes,
223 };
224 let baseline_run = self.runner.run(&baseline_spec).map_err(|e| match e {
225 perfgate_adapters::AdapterError::RunCommand { command, reason } => {
226 anyhow::anyhow!(
227 "failed to run baseline pair {}: {}: {}",
228 pair_index + 1,
229 command,
230 reason
231 )
232 }
233 _ => anyhow::anyhow!("failed to run baseline pair {}: {}", pair_index + 1, e),
234 })?;
235
236 let current_spec = CommandSpec {
237 name: format!("{}-current", req.name),
238 argv: req.current_command.clone(),
239 cwd: req.cwd.clone(),
240 env: req.env.clone(),
241 timeout: req.timeout,
242 output_cap_bytes: req.output_cap_bytes,
243 };
244 let current_run = self.runner.run(¤t_spec).map_err(|e| match e {
245 perfgate_adapters::AdapterError::RunCommand { command, reason } => {
246 anyhow::anyhow!(
247 "failed to run current pair {}: {}: {}",
248 pair_index + 1,
249 command,
250 reason
251 )
252 }
253 _ => anyhow::anyhow!("failed to run current pair {}: {}", pair_index + 1, e),
254 })?;
255
256 let baseline = sample_half(&baseline_run);
257 let current = sample_half(¤t_run);
258
259 let wall_diff_ms = current.wall_ms as i64 - baseline.wall_ms as i64;
260 let rss_diff_kb = match (baseline.max_rss_kb, current.max_rss_kb) {
261 (Some(b), Some(c)) => Some(c as i64 - b as i64),
262 _ => None,
263 };
264
265 if !is_warmup {
266 if baseline.timed_out {
267 reasons.push(format!("pair {} baseline timed out", pair_index + 1));
268 }
269 if baseline.exit_code != 0 && !req.allow_nonzero {
270 reasons.push(format!(
271 "pair {} baseline exit {}",
272 pair_index + 1,
273 baseline.exit_code
274 ));
275 }
276 if current.timed_out {
277 reasons.push(format!("pair {} current timed out", pair_index + 1));
278 }
279 if current.exit_code != 0 && !req.allow_nonzero {
280 reasons.push(format!(
281 "pair {} current exit {}",
282 pair_index + 1,
283 current.exit_code
284 ));
285 }
286 }
287
288 samples.push(PairedSample {
289 pair_index,
290 warmup: is_warmup,
291 baseline,
292 current,
293 wall_diff_ms,
294 rss_diff_kb,
295 });
296
297 Ok(())
298 }
299}
300
301fn sample_half(run: &perfgate_adapters::RunResult) -> PairedSampleHalf {
302 PairedSampleHalf {
303 wall_ms: run.wall_ms,
304 exit_code: run.exit_code,
305 timed_out: run.timed_out,
306 max_rss_kb: run.max_rss_kb,
307 stdout: if run.stdout.is_empty() {
308 None
309 } else {
310 Some(String::from_utf8_lossy(&run.stdout).to_string())
311 },
312 stderr: if run.stderr.is_empty() {
313 None
314 } else {
315 Some(String::from_utf8_lossy(&run.stderr).to_string())
316 },
317 }
318}
319
320#[cfg(test)]
321mod tests {
322 use super::*;
323 use perfgate_adapters::{AdapterError, RunResult};
324 use perfgate_types::HostInfo;
325 use std::sync::{Arc, Mutex};
326
327 #[derive(Clone)]
328 struct TestRunner {
329 runs: Arc<Mutex<Vec<RunResult>>>,
330 }
331
332 impl TestRunner {
333 fn new(runs: Vec<RunResult>) -> Self {
334 Self {
335 runs: Arc::new(Mutex::new(runs)),
336 }
337 }
338 }
339
340 impl ProcessRunner for TestRunner {
341 fn run(&self, _spec: &CommandSpec) -> Result<RunResult, AdapterError> {
342 let mut runs = self.runs.lock().expect("lock runs");
343 if runs.is_empty() {
344 return Err(AdapterError::Other("no more queued runs".to_string()));
345 }
346 Ok(runs.remove(0))
347 }
348 }
349
350 #[derive(Clone)]
351 struct TestHostProbe {
352 host: HostInfo,
353 seen_include_hash: Arc<Mutex<Vec<bool>>>,
354 }
355
356 impl TestHostProbe {
357 fn new(host: HostInfo) -> Self {
358 Self {
359 host,
360 seen_include_hash: Arc::new(Mutex::new(Vec::new())),
361 }
362 }
363 }
364
365 impl HostProbe for TestHostProbe {
366 fn probe(&self, options: &HostProbeOptions) -> HostInfo {
367 self.seen_include_hash
368 .lock()
369 .expect("lock options")
370 .push(options.include_hostname_hash);
371 self.host.clone()
372 }
373 }
374
375 #[derive(Clone)]
376 struct TestClock {
377 now: String,
378 }
379
380 impl TestClock {
381 fn new(now: &str) -> Self {
382 Self {
383 now: now.to_string(),
384 }
385 }
386 }
387
388 impl Clock for TestClock {
389 fn now_rfc3339(&self) -> String {
390 self.now.clone()
391 }
392 }
393
394 fn run_result(
395 wall_ms: u64,
396 exit_code: i32,
397 timed_out: bool,
398 max_rss_kb: Option<u64>,
399 stdout: &[u8],
400 stderr: &[u8],
401 ) -> RunResult {
402 RunResult {
403 wall_ms,
404 exit_code,
405 timed_out,
406 cpu_ms: None,
407 page_faults: None,
408 ctx_switches: None,
409 max_rss_kb,
410 io_read_bytes: None,
411 io_write_bytes: None,
412 network_packets: None,
413 energy_uj: None,
414 binary_bytes: None,
415 stdout: stdout.to_vec(),
416 stderr: stderr.to_vec(),
417 }
418 }
419
420 #[test]
421 fn sample_half_maps_optional_output() {
422 let run = run_result(10, 0, false, None, b"hello", b"");
423 let sample = sample_half(&run);
424 assert_eq!(sample.stdout.as_deref(), Some("hello"));
425 assert!(sample.stderr.is_none());
426
427 let run2 = run_result(10, 0, false, None, b"", b"err");
428 let sample2 = sample_half(&run2);
429 assert!(sample2.stdout.is_none());
430 assert_eq!(sample2.stderr.as_deref(), Some("err"));
431 }
432
433 #[test]
434 fn paired_run_collects_samples_and_reasons() {
435 let runs = vec![
436 run_result(100, 0, false, None, b"", b""),
438 run_result(90, 1, false, None, b"", b""),
439 run_result(110, 2, true, Some(2000), b"out", b""),
441 run_result(105, 0, false, Some(2500), b"", b""),
442 ];
443
444 let runner = TestRunner::new(runs);
445 let host = HostInfo {
446 os: "linux".to_string(),
447 arch: "x86_64".to_string(),
448 cpu_count: None,
449 memory_bytes: None,
450 hostname_hash: None,
451 };
452 let host_probe = TestHostProbe::new(host.clone());
453 let clock = TestClock::new("2024-01-01T00:00:00Z");
454
455 let usecase = PairedRunUseCase::new(
456 runner,
457 host_probe.clone(),
458 clock,
459 ToolInfo {
460 name: "perfgate".to_string(),
461 version: "0.1.0".to_string(),
462 },
463 );
464
465 let outcome = usecase
466 .execute(PairedRunRequest {
467 name: "bench".to_string(),
468 cwd: None,
469 baseline_command: vec!["true".to_string()],
470 current_command: vec!["true".to_string()],
471 repeat: 1,
472 warmup: 1,
473 work_units: None,
474 timeout: None,
475 env: vec![],
476 output_cap_bytes: 1024,
477 allow_nonzero: false,
478 include_hostname_hash: true,
479 significance_alpha: None,
480 significance_min_samples: None,
481 require_significance: false,
482 max_retries: 0,
483 fail_on_regression: None,
484 cv_threshold: None,
485 })
486 .expect("paired run should succeed");
487
488 assert_eq!(outcome.receipt.samples.len(), 2);
489 assert!(outcome.receipt.samples[0].warmup);
490 assert!(!outcome.receipt.samples[1].warmup);
491 assert_eq!(outcome.receipt.samples[0].pair_index, 0);
492 assert_eq!(outcome.receipt.samples[1].pair_index, 1);
493
494 let measured = &outcome.receipt.samples[1];
495 assert_eq!(measured.rss_diff_kb, Some(500));
496
497 assert!(outcome.failed);
498 assert!(
499 outcome
500 .reasons
501 .iter()
502 .any(|r| r.contains("baseline timed out")),
503 "expected baseline timeout reason"
504 );
505 assert!(
506 outcome.reasons.iter().any(|r| r.contains("baseline exit")),
507 "expected baseline exit reason"
508 );
509 assert!(
510 !outcome
511 .reasons
512 .iter()
513 .any(|r| r.contains("pair 1 current exit")),
514 "warmup errors should not be recorded"
515 );
516
517 let seen = host_probe.seen_include_hash.lock().expect("lock seen");
518 assert_eq!(seen.as_slice(), &[true]);
519 assert_eq!(outcome.receipt.run.host, host);
520 }
521
522 #[test]
523 fn paired_run_all_warmup_no_measured_samples() {
524 let runs = vec![
526 run_result(100, 0, false, None, b"", b""),
527 run_result(90, 0, false, None, b"", b""),
528 run_result(110, 0, false, None, b"", b""),
529 run_result(95, 0, false, None, b"", b""),
530 ];
531
532 let runner = TestRunner::new(runs);
533 let host = HostInfo {
534 os: "linux".to_string(),
535 arch: "x86_64".to_string(),
536 cpu_count: None,
537 memory_bytes: None,
538 hostname_hash: None,
539 };
540 let host_probe = TestHostProbe::new(host);
541 let clock = TestClock::new("2024-01-01T00:00:00Z");
542
543 let usecase = PairedRunUseCase::new(
544 runner,
545 host_probe,
546 clock,
547 ToolInfo {
548 name: "perfgate".to_string(),
549 version: "0.1.0".to_string(),
550 },
551 );
552
553 let outcome = usecase
554 .execute(PairedRunRequest {
555 name: "warmup-only".to_string(),
556 cwd: None,
557 baseline_command: vec!["true".to_string()],
558 current_command: vec!["true".to_string()],
559 repeat: 2,
560 warmup: 0,
561 work_units: None,
562 timeout: None,
563 env: vec![],
564 output_cap_bytes: 1024,
565 allow_nonzero: false,
566 include_hostname_hash: false,
567 significance_alpha: None,
568 significance_min_samples: None,
569 require_significance: false,
570 max_retries: 0,
571 fail_on_regression: None,
572 cv_threshold: None,
573 })
574 .expect("paired run should succeed");
575
576 assert_eq!(outcome.receipt.samples.len(), 2);
577 assert!(!outcome.failed);
578 assert!(outcome.reasons.is_empty());
579 }
580
581 #[test]
582 fn paired_run_runner_error_propagates() {
583 let runner = TestRunner::new(vec![]);
585
586 let host = HostInfo {
587 os: "linux".to_string(),
588 arch: "x86_64".to_string(),
589 cpu_count: None,
590 memory_bytes: None,
591 hostname_hash: None,
592 };
593 let host_probe = TestHostProbe::new(host);
594 let clock = TestClock::new("2024-01-01T00:00:00Z");
595
596 let usecase = PairedRunUseCase::new(
597 runner,
598 host_probe,
599 clock,
600 ToolInfo {
601 name: "perfgate".to_string(),
602 version: "0.1.0".to_string(),
603 },
604 );
605
606 let err = usecase
607 .execute(PairedRunRequest {
608 name: "fail-bench".to_string(),
609 cwd: None,
610 baseline_command: vec!["true".to_string()],
611 current_command: vec!["true".to_string()],
612 repeat: 1,
613 warmup: 0,
614 work_units: None,
615 timeout: None,
616 env: vec![],
617 output_cap_bytes: 1024,
618 allow_nonzero: false,
619 include_hostname_hash: false,
620 significance_alpha: None,
621 significance_min_samples: None,
622 require_significance: false,
623 max_retries: 0,
624 fail_on_regression: None,
625 cv_threshold: None,
626 })
627 .unwrap_err();
628
629 assert!(
630 err.to_string().contains("no more queued runs")
631 || err.to_string().contains("failed to run"),
632 "expected runner error, got: {}",
633 err
634 );
635 }
636
637 #[test]
638 fn paired_run_wall_diff_computed_correctly() {
639 let runs = vec![
640 run_result(200, 0, false, Some(1000), b"", b""),
642 run_result(150, 0, false, Some(800), b"", b""),
643 ];
644
645 let runner = TestRunner::new(runs);
646 let host = HostInfo {
647 os: "linux".to_string(),
648 arch: "x86_64".to_string(),
649 cpu_count: None,
650 memory_bytes: None,
651 hostname_hash: None,
652 };
653 let host_probe = TestHostProbe::new(host);
654 let clock = TestClock::new("2024-01-01T00:00:00Z");
655
656 let usecase = PairedRunUseCase::new(
657 runner,
658 host_probe,
659 clock,
660 ToolInfo {
661 name: "perfgate".to_string(),
662 version: "0.1.0".to_string(),
663 },
664 );
665
666 let outcome = usecase
667 .execute(PairedRunRequest {
668 name: "diff-bench".to_string(),
669 cwd: None,
670 baseline_command: vec!["true".to_string()],
671 current_command: vec!["true".to_string()],
672 repeat: 1,
673 warmup: 0,
674 work_units: None,
675 timeout: None,
676 env: vec![],
677 output_cap_bytes: 1024,
678 allow_nonzero: false,
679 include_hostname_hash: false,
680 significance_alpha: None,
681 significance_min_samples: None,
682 require_significance: false,
683 max_retries: 0,
684 fail_on_regression: None,
685 cv_threshold: None,
686 })
687 .expect("paired run should succeed");
688
689 assert_eq!(outcome.receipt.samples.len(), 1);
690 let sample = &outcome.receipt.samples[0];
691 assert_eq!(sample.wall_diff_ms, -50);
692 assert_eq!(sample.rss_diff_kb, Some(-200));
693 assert!(!outcome.failed);
694 }
695
696 #[test]
697 fn paired_run_retries_until_significance() {
698 let runs = vec![
706 run_result(100, 0, false, None, b"", b""),
708 run_result(100, 0, false, None, b"", b""),
709 run_result(100, 0, false, None, b"", b""),
711 run_result(110, 0, false, None, b"", b""),
712 run_result(100, 0, false, None, b"", b""),
714 run_result(110, 0, false, None, b"", b""),
715 run_result(100, 0, false, None, b"", b""),
716 run_result(110, 0, false, None, b"", b""),
717 run_result(100, 0, false, None, b"", b""),
719 run_result(110, 0, false, None, b"", b""),
720 run_result(100, 0, false, None, b"", b""),
721 run_result(110, 0, false, None, b"", b""),
722 run_result(100, 0, false, None, b"", b""),
723 run_result(110, 0, false, None, b"", b""),
724 run_result(100, 0, false, None, b"", b""),
726 run_result(110, 0, false, None, b"", b""),
727 run_result(100, 0, false, None, b"", b""),
728 run_result(110, 0, false, None, b"", b""),
729 run_result(100, 0, false, None, b"", b""),
730 run_result(110, 0, false, None, b"", b""),
731 run_result(100, 0, false, None, b"", b""),
732 run_result(110, 0, false, None, b"", b""),
733 run_result(100, 0, false, None, b"", b""),
734 run_result(110, 0, false, None, b"", b""),
735 ];
736
737 let runner = TestRunner::new(runs);
738 let host = HostInfo {
739 os: "linux".to_string(),
740 arch: "x86_64".to_string(),
741 cpu_count: None,
742 memory_bytes: None,
743 hostname_hash: None,
744 };
745 let host_probe = TestHostProbe::new(host);
746 let clock = TestClock::new("2024-01-01T00:00:00Z");
747
748 let usecase = PairedRunUseCase::new(
749 runner,
750 host_probe,
751 clock,
752 ToolInfo {
753 name: "perfgate".to_string(),
754 version: "0.1.0".to_string(),
755 },
756 );
757
758 let outcome = usecase
759 .execute(PairedRunRequest {
760 name: "retry-bench".to_string(),
761 cwd: None,
762 baseline_command: vec!["true".to_string()],
763 current_command: vec!["true".to_string()],
764 repeat: 2, warmup: 0,
766 work_units: None,
767 timeout: None,
768 env: vec![],
769 output_cap_bytes: 1024,
770 allow_nonzero: false,
771 include_hostname_hash: false,
772 significance_alpha: Some(0.05),
773 significance_min_samples: Some(2),
774 require_significance: true,
775 max_retries: 5, fail_on_regression: None,
777 cv_threshold: None,
778 })
779 .expect("paired run should succeed");
780
781 assert!(outcome.receipt.samples.len() > 2);
783 assert_eq!(
784 outcome.receipt.bench.repeat,
785 outcome.receipt.samples.len() as u32
786 );
787
788 let diag = outcome
790 .receipt
791 .noise_diagnostics
792 .expect("should have noise diagnostics");
793 assert!(diag.retries_used > 0);
794 assert!(!diag.early_termination);
795 }
796
797 #[test]
798 fn paired_run_cv_threshold_early_termination() {
799 let runs = vec![
802 run_result(200, 0, false, None, b"", b""),
804 run_result(150, 0, false, None, b"", b""),
805 run_result(100, 0, false, None, b"", b""),
807 run_result(160, 0, false, None, b"", b""),
808 run_result(100, 0, false, None, b"", b""),
811 run_result(100, 0, false, None, b"", b""),
812 run_result(100, 0, false, None, b"", b""),
813 run_result(100, 0, false, None, b"", b""),
814 ];
815
816 let runner = TestRunner::new(runs);
817 let host = HostInfo {
818 os: "linux".to_string(),
819 arch: "x86_64".to_string(),
820 cpu_count: None,
821 memory_bytes: None,
822 hostname_hash: None,
823 };
824 let host_probe = TestHostProbe::new(host);
825 let clock = TestClock::new("2024-01-01T00:00:00Z");
826
827 let usecase = PairedRunUseCase::new(
828 runner,
829 host_probe,
830 clock,
831 ToolInfo {
832 name: "perfgate".to_string(),
833 version: "0.1.0".to_string(),
834 },
835 );
836
837 let outcome = usecase
838 .execute(PairedRunRequest {
839 name: "noisy-bench".to_string(),
840 cwd: None,
841 baseline_command: vec!["true".to_string()],
842 current_command: vec!["true".to_string()],
843 repeat: 2,
844 warmup: 0,
845 work_units: None,
846 timeout: None,
847 env: vec![],
848 output_cap_bytes: 1024,
849 allow_nonzero: false,
850 include_hostname_hash: false,
851 significance_alpha: Some(0.05),
852 significance_min_samples: Some(2),
853 require_significance: true,
854 max_retries: 5,
855 fail_on_regression: None,
856 cv_threshold: Some(0.5),
857 })
858 .expect("paired run should succeed");
859
860 let diag = outcome
861 .receipt
862 .noise_diagnostics
863 .expect("should have noise diagnostics");
864 assert!(diag.early_termination);
866 assert!(diag.cv > 0.5);
867 assert_eq!(diag.noise_level, perfgate_types::NoiseLevel::High);
868
869 assert!(
871 outcome
872 .reasons
873 .iter()
874 .any(|r| r.contains("early termination")),
875 "expected early termination reason, got: {:?}",
876 outcome.reasons
877 );
878 }
879
880 #[test]
881 fn paired_run_no_retries_no_noise_diagnostics() {
882 let runs = vec![
883 run_result(100, 0, false, None, b"", b""),
884 run_result(110, 0, false, None, b"", b""),
885 ];
886
887 let runner = TestRunner::new(runs);
888 let host = HostInfo {
889 os: "linux".to_string(),
890 arch: "x86_64".to_string(),
891 cpu_count: None,
892 memory_bytes: None,
893 hostname_hash: None,
894 };
895 let host_probe = TestHostProbe::new(host);
896 let clock = TestClock::new("2024-01-01T00:00:00Z");
897
898 let usecase = PairedRunUseCase::new(
899 runner,
900 host_probe,
901 clock,
902 ToolInfo {
903 name: "perfgate".to_string(),
904 version: "0.1.0".to_string(),
905 },
906 );
907
908 let outcome = usecase
909 .execute(PairedRunRequest {
910 name: "simple-bench".to_string(),
911 cwd: None,
912 baseline_command: vec!["true".to_string()],
913 current_command: vec!["true".to_string()],
914 repeat: 1,
915 warmup: 0,
916 work_units: None,
917 timeout: None,
918 env: vec![],
919 output_cap_bytes: 1024,
920 allow_nonzero: false,
921 include_hostname_hash: false,
922 significance_alpha: None,
923 significance_min_samples: None,
924 require_significance: false,
925 max_retries: 0,
926 fail_on_regression: None,
927 cv_threshold: None,
928 })
929 .expect("paired run should succeed");
930
931 assert!(
932 outcome.receipt.noise_diagnostics.is_none(),
933 "should not have noise diagnostics when max_retries=0"
934 );
935 }
936}