1use perfgate_adapters::{CommandSpec, HostProbe, HostProbeOptions, ProcessRunner};
4use perfgate_domain::compute_paired_stats;
5use perfgate_types::{
6 PAIRED_SCHEMA_V1, PairedBenchMeta, PairedRunReceipt, PairedSample, PairedSampleHalf, RunMeta,
7 SignificancePolicy, ToolInfo,
8};
9use std::path::PathBuf;
10use std::time::Duration;
11
12use crate::Clock;
13
14#[derive(Debug, Clone)]
15pub struct PairedRunRequest {
16 pub name: String,
17 pub cwd: Option<PathBuf>,
18 pub baseline_command: Vec<String>,
19 pub current_command: Vec<String>,
20 pub repeat: u32,
21 pub warmup: u32,
22 pub work_units: Option<u64>,
23 pub timeout: Option<Duration>,
24 pub env: Vec<(String, String)>,
25 pub output_cap_bytes: usize,
26 pub allow_nonzero: bool,
27 pub include_hostname_hash: bool,
28 pub significance_alpha: Option<f64>,
29 pub significance_min_samples: Option<u32>,
30 pub require_significance: bool,
31 pub max_retries: u32,
32 pub fail_on_regression: Option<f64>,
33}
34
35#[derive(Debug, Clone)]
36pub struct PairedRunOutcome {
37 pub receipt: PairedRunReceipt,
38 pub failed: bool,
39 pub reasons: Vec<String>,
40}
41
42pub struct PairedRunUseCase<R: ProcessRunner, H: HostProbe, C: Clock> {
43 runner: R,
44 host_probe: H,
45 clock: C,
46 tool: ToolInfo,
47}
48
49impl<R: ProcessRunner, H: HostProbe, C: Clock> PairedRunUseCase<R, H, C> {
50 pub fn new(runner: R, host_probe: H, clock: C, tool: ToolInfo) -> Self {
51 Self {
52 runner,
53 host_probe,
54 clock,
55 tool,
56 }
57 }
58
59 pub fn execute(&self, req: PairedRunRequest) -> anyhow::Result<PairedRunOutcome> {
60 let run_id = uuid::Uuid::new_v4().to_string();
61 let started_at = self.clock.now_rfc3339();
62 let host = self.host_probe.probe(&HostProbeOptions {
63 include_hostname_hash: req.include_hostname_hash,
64 });
65
66 let mut bench = PairedBenchMeta {
67 name: req.name.clone(),
68 cwd: req.cwd.as_ref().map(|p| p.to_string_lossy().to_string()),
69 baseline_command: req.baseline_command.clone(),
70 current_command: req.current_command.clone(),
71 repeat: req.repeat,
72 warmup: req.warmup,
73 work_units: req.work_units,
74 timeout_ms: req.timeout.map(|d| d.as_millis() as u64),
75 };
76
77 let mut samples = Vec::new();
78 let mut reasons = Vec::new();
79
80 for i in 0..req.warmup {
82 self.run_pair(i, true, &req, &mut samples, &mut reasons)?;
83 }
84
85 let mut pairs_collected = 0;
87 for _ in 0..req.repeat {
88 self.run_pair(
89 req.warmup + pairs_collected,
90 false,
91 &req,
92 &mut samples,
93 &mut reasons,
94 )?;
95 pairs_collected += 1;
96 }
97
98 let significance_policy = SignificancePolicy {
99 alpha: req.significance_alpha,
100 min_samples: req.significance_min_samples,
101 };
102
103 let mut retries_done = 0;
105 loop {
106 let stats = compute_paired_stats(&samples, req.work_units, Some(&significance_policy))?;
107 let significance_reached = stats
108 .wall_diff_ms
109 .significance
110 .as_ref()
111 .map(|s| s.significant)
112 .unwrap_or(true);
113
114 if !req.require_significance || significance_reached || retries_done >= req.max_retries
115 {
116 break;
117 }
118
119 retries_done += 1;
121 self.run_pair(
122 req.warmup + pairs_collected,
123 false,
124 &req,
125 &mut samples,
126 &mut reasons,
127 )?;
128 pairs_collected += 1;
129 }
130
131 bench.repeat = pairs_collected;
133
134 let stats = compute_paired_stats(&samples, req.work_units, Some(&significance_policy))?;
135 let ended_at = self.clock.now_rfc3339();
136
137 let receipt = PairedRunReceipt {
138 schema: PAIRED_SCHEMA_V1.to_string(),
139 tool: self.tool.clone(),
140 run: RunMeta {
141 id: run_id,
142 started_at,
143 ended_at,
144 host,
145 },
146 bench,
147 samples,
148 stats,
149 };
150
151 if let Some(threshold_pct) = req.fail_on_regression {
152 let comparison = perfgate_domain::compare_paired_stats(&receipt.stats);
153 let threshold_fraction = threshold_pct / 100.0;
154 if comparison.pct_change > threshold_fraction && comparison.is_significant {
155 reasons.push(format!(
156 "wall time regression ({:.2}%) exceeded threshold ({:.2}%)",
157 comparison.pct_change * 100.0,
158 threshold_pct
159 ));
160 }
161 }
162
163 let failed = !reasons.is_empty();
164 Ok(PairedRunOutcome {
165 receipt,
166 failed,
167 reasons,
168 })
169 }
170
171 fn run_pair(
172 &self,
173 pair_index: u32,
174 is_warmup: bool,
175 req: &PairedRunRequest,
176 samples: &mut Vec<PairedSample>,
177 reasons: &mut Vec<String>,
178 ) -> anyhow::Result<()> {
179 let baseline_spec = CommandSpec {
180 name: format!("{}-baseline", req.name),
181 argv: req.baseline_command.clone(),
182 cwd: req.cwd.clone(),
183 env: req.env.clone(),
184 timeout: req.timeout,
185 output_cap_bytes: req.output_cap_bytes,
186 };
187 let baseline_run = self.runner.run(&baseline_spec).map_err(|e| match e {
188 perfgate_adapters::AdapterError::RunCommand { command, reason } => {
189 anyhow::anyhow!(
190 "failed to run baseline pair {}: {}: {}",
191 pair_index + 1,
192 command,
193 reason
194 )
195 }
196 _ => anyhow::anyhow!("failed to run baseline pair {}: {}", pair_index + 1, e),
197 })?;
198
199 let current_spec = CommandSpec {
200 name: format!("{}-current", req.name),
201 argv: req.current_command.clone(),
202 cwd: req.cwd.clone(),
203 env: req.env.clone(),
204 timeout: req.timeout,
205 output_cap_bytes: req.output_cap_bytes,
206 };
207 let current_run = self.runner.run(¤t_spec).map_err(|e| match e {
208 perfgate_adapters::AdapterError::RunCommand { command, reason } => {
209 anyhow::anyhow!(
210 "failed to run current pair {}: {}: {}",
211 pair_index + 1,
212 command,
213 reason
214 )
215 }
216 _ => anyhow::anyhow!("failed to run current pair {}: {}", pair_index + 1, e),
217 })?;
218
219 let baseline = sample_half(&baseline_run);
220 let current = sample_half(¤t_run);
221
222 let wall_diff_ms = current.wall_ms as i64 - baseline.wall_ms as i64;
223 let rss_diff_kb = match (baseline.max_rss_kb, current.max_rss_kb) {
224 (Some(b), Some(c)) => Some(c as i64 - b as i64),
225 _ => None,
226 };
227
228 if !is_warmup {
229 if baseline.timed_out {
230 reasons.push(format!("pair {} baseline timed out", pair_index + 1));
231 }
232 if baseline.exit_code != 0 && !req.allow_nonzero {
233 reasons.push(format!(
234 "pair {} baseline exit {}",
235 pair_index + 1,
236 baseline.exit_code
237 ));
238 }
239 if current.timed_out {
240 reasons.push(format!("pair {} current timed out", pair_index + 1));
241 }
242 if current.exit_code != 0 && !req.allow_nonzero {
243 reasons.push(format!(
244 "pair {} current exit {}",
245 pair_index + 1,
246 current.exit_code
247 ));
248 }
249 }
250
251 samples.push(PairedSample {
252 pair_index,
253 warmup: is_warmup,
254 baseline,
255 current,
256 wall_diff_ms,
257 rss_diff_kb,
258 });
259
260 Ok(())
261 }
262}
263
264fn sample_half(run: &perfgate_adapters::RunResult) -> PairedSampleHalf {
265 PairedSampleHalf {
266 wall_ms: run.wall_ms,
267 exit_code: run.exit_code,
268 timed_out: run.timed_out,
269 max_rss_kb: run.max_rss_kb,
270 stdout: if run.stdout.is_empty() {
271 None
272 } else {
273 Some(String::from_utf8_lossy(&run.stdout).to_string())
274 },
275 stderr: if run.stderr.is_empty() {
276 None
277 } else {
278 Some(String::from_utf8_lossy(&run.stderr).to_string())
279 },
280 }
281}
282
283#[cfg(test)]
284mod tests {
285 use super::*;
286 use perfgate_adapters::{AdapterError, RunResult};
287 use perfgate_types::HostInfo;
288 use std::sync::{Arc, Mutex};
289
290 #[derive(Clone)]
291 struct TestRunner {
292 runs: Arc<Mutex<Vec<RunResult>>>,
293 }
294
295 impl TestRunner {
296 fn new(runs: Vec<RunResult>) -> Self {
297 Self {
298 runs: Arc::new(Mutex::new(runs)),
299 }
300 }
301 }
302
303 impl ProcessRunner for TestRunner {
304 fn run(&self, _spec: &CommandSpec) -> Result<RunResult, AdapterError> {
305 let mut runs = self.runs.lock().expect("lock runs");
306 if runs.is_empty() {
307 return Err(AdapterError::Other("no more queued runs".to_string()));
308 }
309 Ok(runs.remove(0))
310 }
311 }
312
313 #[derive(Clone)]
314 struct TestHostProbe {
315 host: HostInfo,
316 seen_include_hash: Arc<Mutex<Vec<bool>>>,
317 }
318
319 impl TestHostProbe {
320 fn new(host: HostInfo) -> Self {
321 Self {
322 host,
323 seen_include_hash: Arc::new(Mutex::new(Vec::new())),
324 }
325 }
326 }
327
328 impl HostProbe for TestHostProbe {
329 fn probe(&self, options: &HostProbeOptions) -> HostInfo {
330 self.seen_include_hash
331 .lock()
332 .expect("lock options")
333 .push(options.include_hostname_hash);
334 self.host.clone()
335 }
336 }
337
338 #[derive(Clone)]
339 struct TestClock {
340 now: String,
341 }
342
343 impl TestClock {
344 fn new(now: &str) -> Self {
345 Self {
346 now: now.to_string(),
347 }
348 }
349 }
350
351 impl Clock for TestClock {
352 fn now_rfc3339(&self) -> String {
353 self.now.clone()
354 }
355 }
356
357 fn run_result(
358 wall_ms: u64,
359 exit_code: i32,
360 timed_out: bool,
361 max_rss_kb: Option<u64>,
362 stdout: &[u8],
363 stderr: &[u8],
364 ) -> RunResult {
365 RunResult {
366 wall_ms,
367 exit_code,
368 timed_out,
369 cpu_ms: None,
370 page_faults: None,
371 ctx_switches: None,
372 max_rss_kb,
373 io_read_bytes: None,
374 io_write_bytes: None,
375 network_packets: None,
376 energy_uj: None,
377 binary_bytes: None,
378 stdout: stdout.to_vec(),
379 stderr: stderr.to_vec(),
380 }
381 }
382
383 #[test]
384 fn sample_half_maps_optional_output() {
385 let run = run_result(10, 0, false, None, b"hello", b"");
386 let sample = sample_half(&run);
387 assert_eq!(sample.stdout.as_deref(), Some("hello"));
388 assert!(sample.stderr.is_none());
389
390 let run2 = run_result(10, 0, false, None, b"", b"err");
391 let sample2 = sample_half(&run2);
392 assert!(sample2.stdout.is_none());
393 assert_eq!(sample2.stderr.as_deref(), Some("err"));
394 }
395
396 #[test]
397 fn paired_run_collects_samples_and_reasons() {
398 let runs = vec![
399 run_result(100, 0, false, None, b"", b""),
401 run_result(90, 1, false, None, b"", b""),
402 run_result(110, 2, true, Some(2000), b"out", b""),
404 run_result(105, 0, false, Some(2500), b"", b""),
405 ];
406
407 let runner = TestRunner::new(runs);
408 let host = HostInfo {
409 os: "linux".to_string(),
410 arch: "x86_64".to_string(),
411 cpu_count: None,
412 memory_bytes: None,
413 hostname_hash: None,
414 };
415 let host_probe = TestHostProbe::new(host.clone());
416 let clock = TestClock::new("2024-01-01T00:00:00Z");
417
418 let usecase = PairedRunUseCase::new(
419 runner,
420 host_probe.clone(),
421 clock,
422 ToolInfo {
423 name: "perfgate".to_string(),
424 version: "0.1.0".to_string(),
425 },
426 );
427
428 let outcome = usecase
429 .execute(PairedRunRequest {
430 name: "bench".to_string(),
431 cwd: None,
432 baseline_command: vec!["true".to_string()],
433 current_command: vec!["true".to_string()],
434 repeat: 1,
435 warmup: 1,
436 work_units: None,
437 timeout: None,
438 env: vec![],
439 output_cap_bytes: 1024,
440 allow_nonzero: false,
441 include_hostname_hash: true,
442 significance_alpha: None,
443 significance_min_samples: None,
444 require_significance: false,
445 max_retries: 0,
446 fail_on_regression: None,
447 })
448 .expect("paired run should succeed");
449
450 assert_eq!(outcome.receipt.samples.len(), 2);
451 assert!(outcome.receipt.samples[0].warmup);
452 assert!(!outcome.receipt.samples[1].warmup);
453 assert_eq!(outcome.receipt.samples[0].pair_index, 0);
454 assert_eq!(outcome.receipt.samples[1].pair_index, 1);
455
456 let measured = &outcome.receipt.samples[1];
457 assert_eq!(measured.rss_diff_kb, Some(500));
458
459 assert!(outcome.failed);
460 assert!(
461 outcome
462 .reasons
463 .iter()
464 .any(|r| r.contains("baseline timed out")),
465 "expected baseline timeout reason"
466 );
467 assert!(
468 outcome.reasons.iter().any(|r| r.contains("baseline exit")),
469 "expected baseline exit reason"
470 );
471 assert!(
472 !outcome
473 .reasons
474 .iter()
475 .any(|r| r.contains("pair 1 current exit")),
476 "warmup errors should not be recorded"
477 );
478
479 let seen = host_probe.seen_include_hash.lock().expect("lock seen");
480 assert_eq!(seen.as_slice(), &[true]);
481 assert_eq!(outcome.receipt.run.host, host);
482 }
483
484 #[test]
485 fn paired_run_all_warmup_no_measured_samples() {
486 let runs = vec![
488 run_result(100, 0, false, None, b"", b""),
489 run_result(90, 0, false, None, b"", b""),
490 run_result(110, 0, false, None, b"", b""),
491 run_result(95, 0, false, None, b"", b""),
492 ];
493
494 let runner = TestRunner::new(runs);
495 let host = HostInfo {
496 os: "linux".to_string(),
497 arch: "x86_64".to_string(),
498 cpu_count: None,
499 memory_bytes: None,
500 hostname_hash: None,
501 };
502 let host_probe = TestHostProbe::new(host);
503 let clock = TestClock::new("2024-01-01T00:00:00Z");
504
505 let usecase = PairedRunUseCase::new(
506 runner,
507 host_probe,
508 clock,
509 ToolInfo {
510 name: "perfgate".to_string(),
511 version: "0.1.0".to_string(),
512 },
513 );
514
515 let outcome = usecase
516 .execute(PairedRunRequest {
517 name: "warmup-only".to_string(),
518 cwd: None,
519 baseline_command: vec!["true".to_string()],
520 current_command: vec!["true".to_string()],
521 repeat: 2,
522 warmup: 0,
523 work_units: None,
524 timeout: None,
525 env: vec![],
526 output_cap_bytes: 1024,
527 allow_nonzero: false,
528 include_hostname_hash: false,
529 significance_alpha: None,
530 significance_min_samples: None,
531 require_significance: false,
532 max_retries: 0,
533 fail_on_regression: None,
534 })
535 .expect("paired run should succeed");
536
537 assert_eq!(outcome.receipt.samples.len(), 2);
538 assert!(!outcome.failed);
539 assert!(outcome.reasons.is_empty());
540 }
541
542 #[test]
543 fn paired_run_runner_error_propagates() {
544 let runner = TestRunner::new(vec![]);
546
547 let host = HostInfo {
548 os: "linux".to_string(),
549 arch: "x86_64".to_string(),
550 cpu_count: None,
551 memory_bytes: None,
552 hostname_hash: None,
553 };
554 let host_probe = TestHostProbe::new(host);
555 let clock = TestClock::new("2024-01-01T00:00:00Z");
556
557 let usecase = PairedRunUseCase::new(
558 runner,
559 host_probe,
560 clock,
561 ToolInfo {
562 name: "perfgate".to_string(),
563 version: "0.1.0".to_string(),
564 },
565 );
566
567 let err = usecase
568 .execute(PairedRunRequest {
569 name: "fail-bench".to_string(),
570 cwd: None,
571 baseline_command: vec!["true".to_string()],
572 current_command: vec!["true".to_string()],
573 repeat: 1,
574 warmup: 0,
575 work_units: None,
576 timeout: None,
577 env: vec![],
578 output_cap_bytes: 1024,
579 allow_nonzero: false,
580 include_hostname_hash: false,
581 significance_alpha: None,
582 significance_min_samples: None,
583 require_significance: false,
584 max_retries: 0,
585 fail_on_regression: None,
586 })
587 .unwrap_err();
588
589 assert!(
590 err.to_string().contains("no more queued runs")
591 || err.to_string().contains("failed to run"),
592 "expected runner error, got: {}",
593 err
594 );
595 }
596
597 #[test]
598 fn paired_run_wall_diff_computed_correctly() {
599 let runs = vec![
600 run_result(200, 0, false, Some(1000), b"", b""),
602 run_result(150, 0, false, Some(800), b"", b""),
603 ];
604
605 let runner = TestRunner::new(runs);
606 let host = HostInfo {
607 os: "linux".to_string(),
608 arch: "x86_64".to_string(),
609 cpu_count: None,
610 memory_bytes: None,
611 hostname_hash: None,
612 };
613 let host_probe = TestHostProbe::new(host);
614 let clock = TestClock::new("2024-01-01T00:00:00Z");
615
616 let usecase = PairedRunUseCase::new(
617 runner,
618 host_probe,
619 clock,
620 ToolInfo {
621 name: "perfgate".to_string(),
622 version: "0.1.0".to_string(),
623 },
624 );
625
626 let outcome = usecase
627 .execute(PairedRunRequest {
628 name: "diff-bench".to_string(),
629 cwd: None,
630 baseline_command: vec!["true".to_string()],
631 current_command: vec!["true".to_string()],
632 repeat: 1,
633 warmup: 0,
634 work_units: None,
635 timeout: None,
636 env: vec![],
637 output_cap_bytes: 1024,
638 allow_nonzero: false,
639 include_hostname_hash: false,
640 significance_alpha: None,
641 significance_min_samples: None,
642 require_significance: false,
643 max_retries: 0,
644 fail_on_regression: None,
645 })
646 .expect("paired run should succeed");
647
648 assert_eq!(outcome.receipt.samples.len(), 1);
649 let sample = &outcome.receipt.samples[0];
650 assert_eq!(sample.wall_diff_ms, -50);
651 assert_eq!(sample.rss_diff_kb, Some(-200));
652 assert!(!outcome.failed);
653 }
654
655 #[test]
656 fn paired_run_retries_until_significance() {
657 let runs = vec![
670 run_result(100, 0, false, None, b"", b""),
672 run_result(100, 0, false, None, b"", b""),
673 run_result(100, 0, false, None, b"", b""),
675 run_result(110, 0, false, None, b"", b""),
676 run_result(100, 0, false, None, b"", b""),
678 run_result(110, 0, false, None, b"", b""),
679 ];
680
681 let runner = TestRunner::new(runs);
682 let host = HostInfo {
683 os: "linux".to_string(),
684 arch: "x86_64".to_string(),
685 cpu_count: None,
686 memory_bytes: None,
687 hostname_hash: None,
688 };
689 let host_probe = TestHostProbe::new(host);
690 let clock = TestClock::new("2024-01-01T00:00:00Z");
691
692 let usecase = PairedRunUseCase::new(
693 runner,
694 host_probe,
695 clock,
696 ToolInfo {
697 name: "perfgate".to_string(),
698 version: "0.1.0".to_string(),
699 },
700 );
701
702 let outcome = usecase
703 .execute(PairedRunRequest {
704 name: "retry-bench".to_string(),
705 cwd: None,
706 baseline_command: vec!["true".to_string()],
707 current_command: vec!["true".to_string()],
708 repeat: 2, warmup: 0,
710 work_units: None,
711 timeout: None,
712 env: vec![],
713 output_cap_bytes: 1024,
714 allow_nonzero: false,
715 include_hostname_hash: false,
716 significance_alpha: Some(0.05),
717 significance_min_samples: Some(2),
718 require_significance: true,
719 max_retries: 5, fail_on_regression: None,
721 })
722 .expect("paired run should succeed");
723
724 assert!(outcome.receipt.samples.len() > 2);
726 assert_eq!(
727 outcome.receipt.bench.repeat,
728 outcome.receipt.samples.len() as u32
729 );
730 }
731}