Skip to main content

perfgate_app/
paired.rs

1//! Paired benchmark execution for perfgate.
2
3use anyhow::Context;
4use perfgate_adapters::{CommandSpec, HostProbe, HostProbeOptions, ProcessRunner};
5use perfgate_domain::compute_paired_stats;
6use perfgate_types::{
7    PAIRED_SCHEMA_V1, PairedBenchMeta, PairedRunReceipt, PairedSample, PairedSampleHalf, RunMeta,
8    ToolInfo,
9};
10use std::path::PathBuf;
11use std::time::Duration;
12
13use crate::Clock;
14
15#[derive(Debug, Clone)]
16pub struct PairedRunRequest {
17    pub name: String,
18    pub cwd: Option<PathBuf>,
19    pub baseline_command: Vec<String>,
20    pub current_command: Vec<String>,
21    pub repeat: u32,
22    pub warmup: u32,
23    pub work_units: Option<u64>,
24    pub timeout: Option<Duration>,
25    pub env: Vec<(String, String)>,
26    pub output_cap_bytes: usize,
27    pub allow_nonzero: bool,
28    pub include_hostname_hash: bool,
29}
30
31#[derive(Debug, Clone)]
32pub struct PairedRunOutcome {
33    pub receipt: PairedRunReceipt,
34    pub failed: bool,
35    pub reasons: Vec<String>,
36}
37
38pub struct PairedRunUseCase<R: ProcessRunner, H: HostProbe, C: Clock> {
39    runner: R,
40    host_probe: H,
41    clock: C,
42    tool: ToolInfo,
43}
44
45impl<R: ProcessRunner, H: HostProbe, C: Clock> PairedRunUseCase<R, H, C> {
46    pub fn new(runner: R, host_probe: H, clock: C, tool: ToolInfo) -> Self {
47        Self {
48            runner,
49            host_probe,
50            clock,
51            tool,
52        }
53    }
54
55    pub fn execute(&self, req: PairedRunRequest) -> anyhow::Result<PairedRunOutcome> {
56        let run_id = uuid::Uuid::new_v4().to_string();
57        let started_at = self.clock.now_rfc3339();
58        let host = self.host_probe.probe(&HostProbeOptions {
59            include_hostname_hash: req.include_hostname_hash,
60        });
61
62        let bench = PairedBenchMeta {
63            name: req.name.clone(),
64            cwd: req.cwd.as_ref().map(|p| p.to_string_lossy().to_string()),
65            baseline_command: req.baseline_command.clone(),
66            current_command: req.current_command.clone(),
67            repeat: req.repeat,
68            warmup: req.warmup,
69            work_units: req.work_units,
70            timeout_ms: req.timeout.map(|d| d.as_millis() as u64),
71        };
72
73        let mut samples = Vec::new();
74        let mut reasons = Vec::new();
75        let total = req.warmup + req.repeat;
76
77        for i in 0..total {
78            let is_warmup = i < req.warmup;
79
80            let baseline_spec = CommandSpec {
81                argv: req.baseline_command.clone(),
82                cwd: req.cwd.clone(),
83                env: req.env.clone(),
84                timeout: req.timeout,
85                output_cap_bytes: req.output_cap_bytes,
86            };
87            let baseline_run = self
88                .runner
89                .run(&baseline_spec)
90                .with_context(|| format!("failed to run baseline (pair {})", i + 1))?;
91
92            let current_spec = CommandSpec {
93                argv: req.current_command.clone(),
94                cwd: req.cwd.clone(),
95                env: req.env.clone(),
96                timeout: req.timeout,
97                output_cap_bytes: req.output_cap_bytes,
98            };
99            let current_run = self
100                .runner
101                .run(&current_spec)
102                .with_context(|| format!("failed to run current (pair {})", i + 1))?;
103
104            let baseline = sample_half(&baseline_run);
105            let current = sample_half(&current_run);
106
107            let wall_diff_ms = current.wall_ms as i64 - baseline.wall_ms as i64;
108            let rss_diff_kb = match (baseline.max_rss_kb, current.max_rss_kb) {
109                (Some(b), Some(c)) => Some(c as i64 - b as i64),
110                _ => None,
111            };
112
113            if !is_warmup {
114                if baseline.timed_out {
115                    reasons.push(format!("pair {} baseline timed out", i + 1));
116                }
117                if baseline.exit_code != 0 {
118                    reasons.push(format!(
119                        "pair {} baseline exit {}",
120                        i + 1,
121                        baseline.exit_code
122                    ));
123                }
124                if current.timed_out {
125                    reasons.push(format!("pair {} current timed out", i + 1));
126                }
127                if current.exit_code != 0 {
128                    reasons.push(format!("pair {} current exit {}", i + 1, current.exit_code));
129                }
130            }
131
132            samples.push(PairedSample {
133                pair_index: i,
134                warmup: is_warmup,
135                baseline,
136                current,
137                wall_diff_ms,
138                rss_diff_kb,
139            });
140        }
141
142        let stats = compute_paired_stats(&samples, req.work_units)?;
143        let ended_at = self.clock.now_rfc3339();
144
145        let receipt = PairedRunReceipt {
146            schema: PAIRED_SCHEMA_V1.to_string(),
147            tool: self.tool.clone(),
148            run: RunMeta {
149                id: run_id,
150                started_at,
151                ended_at,
152                host,
153            },
154            bench,
155            samples,
156            stats,
157        };
158
159        let failed = !reasons.is_empty();
160        Ok(PairedRunOutcome {
161            receipt,
162            failed,
163            reasons,
164        })
165    }
166}
167
168fn sample_half(run: &perfgate_adapters::RunResult) -> PairedSampleHalf {
169    PairedSampleHalf {
170        wall_ms: run.wall_ms,
171        exit_code: run.exit_code,
172        timed_out: run.timed_out,
173        max_rss_kb: run.max_rss_kb,
174        stdout: if run.stdout.is_empty() {
175            None
176        } else {
177            Some(String::from_utf8_lossy(&run.stdout).to_string())
178        },
179        stderr: if run.stderr.is_empty() {
180            None
181        } else {
182            Some(String::from_utf8_lossy(&run.stderr).to_string())
183        },
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190    use perfgate_adapters::{AdapterError, RunResult};
191    use perfgate_types::HostInfo;
192    use std::sync::{Arc, Mutex};
193
194    #[derive(Clone)]
195    struct TestRunner {
196        runs: Arc<Mutex<Vec<RunResult>>>,
197    }
198
199    impl TestRunner {
200        fn new(runs: Vec<RunResult>) -> Self {
201            Self {
202                runs: Arc::new(Mutex::new(runs)),
203            }
204        }
205    }
206
207    impl ProcessRunner for TestRunner {
208        fn run(&self, _spec: &CommandSpec) -> Result<RunResult, AdapterError> {
209            let mut runs = self.runs.lock().expect("lock runs");
210            if runs.is_empty() {
211                return Err(AdapterError::Other(anyhow::anyhow!("no more queued runs")));
212            }
213            Ok(runs.remove(0))
214        }
215    }
216
217    #[derive(Clone)]
218    struct TestHostProbe {
219        host: HostInfo,
220        seen_include_hash: Arc<Mutex<Vec<bool>>>,
221    }
222
223    impl TestHostProbe {
224        fn new(host: HostInfo) -> Self {
225            Self {
226                host,
227                seen_include_hash: Arc::new(Mutex::new(Vec::new())),
228            }
229        }
230    }
231
232    impl HostProbe for TestHostProbe {
233        fn probe(&self, options: &HostProbeOptions) -> HostInfo {
234            self.seen_include_hash
235                .lock()
236                .expect("lock options")
237                .push(options.include_hostname_hash);
238            self.host.clone()
239        }
240    }
241
242    #[derive(Clone)]
243    struct TestClock {
244        now: String,
245    }
246
247    impl TestClock {
248        fn new(now: &str) -> Self {
249            Self {
250                now: now.to_string(),
251            }
252        }
253    }
254
255    impl Clock for TestClock {
256        fn now_rfc3339(&self) -> String {
257            self.now.clone()
258        }
259    }
260
261    fn run_result(
262        wall_ms: u64,
263        exit_code: i32,
264        timed_out: bool,
265        max_rss_kb: Option<u64>,
266        stdout: &[u8],
267        stderr: &[u8],
268    ) -> RunResult {
269        RunResult {
270            wall_ms,
271            exit_code,
272            timed_out,
273            cpu_ms: None,
274            page_faults: None,
275            ctx_switches: None,
276            max_rss_kb,
277            binary_bytes: None,
278            stdout: stdout.to_vec(),
279            stderr: stderr.to_vec(),
280        }
281    }
282
283    #[test]
284    fn sample_half_maps_optional_output() {
285        let run = run_result(10, 0, false, None, b"hello", b"");
286        let sample = sample_half(&run);
287        assert_eq!(sample.stdout.as_deref(), Some("hello"));
288        assert!(sample.stderr.is_none());
289
290        let run2 = run_result(10, 0, false, None, b"", b"err");
291        let sample2 = sample_half(&run2);
292        assert!(sample2.stdout.is_none());
293        assert_eq!(sample2.stderr.as_deref(), Some("err"));
294    }
295
296    #[test]
297    fn paired_run_collects_samples_and_reasons() {
298        let runs = vec![
299            // warmup baseline/current (current exits nonzero, should be ignored)
300            run_result(100, 0, false, None, b"", b""),
301            run_result(90, 1, false, None, b"", b""),
302            // measured baseline/current (baseline times out + nonzero)
303            run_result(110, 2, true, Some(2000), b"out", b""),
304            run_result(105, 0, false, Some(2500), b"", b""),
305        ];
306
307        let runner = TestRunner::new(runs);
308        let host = HostInfo {
309            os: "linux".to_string(),
310            arch: "x86_64".to_string(),
311            cpu_count: None,
312            memory_bytes: None,
313            hostname_hash: None,
314        };
315        let host_probe = TestHostProbe::new(host.clone());
316        let clock = TestClock::new("2024-01-01T00:00:00Z");
317
318        let usecase = PairedRunUseCase::new(
319            runner,
320            host_probe.clone(),
321            clock,
322            ToolInfo {
323                name: "perfgate".to_string(),
324                version: "0.1.0".to_string(),
325            },
326        );
327
328        let outcome = usecase
329            .execute(PairedRunRequest {
330                name: "bench".to_string(),
331                cwd: None,
332                baseline_command: vec!["true".to_string()],
333                current_command: vec!["true".to_string()],
334                repeat: 1,
335                warmup: 1,
336                work_units: None,
337                timeout: None,
338                env: vec![],
339                output_cap_bytes: 1024,
340                allow_nonzero: false,
341                include_hostname_hash: true,
342            })
343            .expect("paired run should succeed");
344
345        assert_eq!(outcome.receipt.samples.len(), 2);
346        assert!(outcome.receipt.samples[0].warmup);
347        assert!(!outcome.receipt.samples[1].warmup);
348        assert_eq!(outcome.receipt.samples[0].pair_index, 0);
349        assert_eq!(outcome.receipt.samples[1].pair_index, 1);
350
351        let measured = &outcome.receipt.samples[1];
352        assert_eq!(measured.rss_diff_kb, Some(500));
353
354        assert!(outcome.failed);
355        assert!(
356            outcome
357                .reasons
358                .iter()
359                .any(|r| r.contains("baseline timed out")),
360            "expected baseline timeout reason"
361        );
362        assert!(
363            outcome.reasons.iter().any(|r| r.contains("baseline exit")),
364            "expected baseline exit reason"
365        );
366        assert!(
367            !outcome
368                .reasons
369                .iter()
370                .any(|r| r.contains("pair 1 current exit")),
371            "warmup errors should not be recorded"
372        );
373
374        let seen = host_probe.seen_include_hash.lock().expect("lock seen");
375        assert_eq!(seen.as_slice(), &[true]);
376        assert_eq!(outcome.receipt.run.host, host);
377    }
378
379    #[test]
380    fn paired_run_all_warmup_no_measured_samples() {
381        // 2 warmups, 0 measured → samples has 2 entries, all warmup, no failures
382        let runs = vec![
383            run_result(100, 0, false, None, b"", b""),
384            run_result(90, 0, false, None, b"", b""),
385            run_result(110, 0, false, None, b"", b""),
386            run_result(95, 0, false, None, b"", b""),
387        ];
388
389        let runner = TestRunner::new(runs);
390        let host = HostInfo {
391            os: "linux".to_string(),
392            arch: "x86_64".to_string(),
393            cpu_count: None,
394            memory_bytes: None,
395            hostname_hash: None,
396        };
397        let host_probe = TestHostProbe::new(host);
398        let clock = TestClock::new("2024-01-01T00:00:00Z");
399
400        let usecase = PairedRunUseCase::new(
401            runner,
402            host_probe,
403            clock,
404            ToolInfo {
405                name: "perfgate".to_string(),
406                version: "0.1.0".to_string(),
407            },
408        );
409
410        let outcome = usecase
411            .execute(PairedRunRequest {
412                name: "warmup-only".to_string(),
413                cwd: None,
414                baseline_command: vec!["true".to_string()],
415                current_command: vec!["true".to_string()],
416                repeat: 2,
417                warmup: 0,
418                work_units: None,
419                timeout: None,
420                env: vec![],
421                output_cap_bytes: 1024,
422                allow_nonzero: false,
423                include_hostname_hash: false,
424            })
425            .expect("paired run should succeed");
426
427        assert_eq!(outcome.receipt.samples.len(), 2);
428        assert!(!outcome.failed);
429        assert!(outcome.reasons.is_empty());
430    }
431
432    #[test]
433    fn paired_run_runner_error_propagates() {
434        // Runner that immediately fails
435        let runner = TestRunner::new(vec![]);
436
437        let host = HostInfo {
438            os: "linux".to_string(),
439            arch: "x86_64".to_string(),
440            cpu_count: None,
441            memory_bytes: None,
442            hostname_hash: None,
443        };
444        let host_probe = TestHostProbe::new(host);
445        let clock = TestClock::new("2024-01-01T00:00:00Z");
446
447        let usecase = PairedRunUseCase::new(
448            runner,
449            host_probe,
450            clock,
451            ToolInfo {
452                name: "perfgate".to_string(),
453                version: "0.1.0".to_string(),
454            },
455        );
456
457        let err = usecase
458            .execute(PairedRunRequest {
459                name: "fail-bench".to_string(),
460                cwd: None,
461                baseline_command: vec!["true".to_string()],
462                current_command: vec!["true".to_string()],
463                repeat: 1,
464                warmup: 0,
465                work_units: None,
466                timeout: None,
467                env: vec![],
468                output_cap_bytes: 1024,
469                allow_nonzero: false,
470                include_hostname_hash: false,
471            })
472            .unwrap_err();
473
474        assert!(
475            err.to_string().contains("no more queued runs")
476                || err.to_string().contains("failed to run"),
477            "expected runner error, got: {}",
478            err
479        );
480    }
481
482    #[test]
483    fn paired_run_wall_diff_computed_correctly() {
484        let runs = vec![
485            // baseline: 200ms, current: 150ms → diff = -50
486            run_result(200, 0, false, Some(1000), b"", b""),
487            run_result(150, 0, false, Some(800), b"", b""),
488        ];
489
490        let runner = TestRunner::new(runs);
491        let host = HostInfo {
492            os: "linux".to_string(),
493            arch: "x86_64".to_string(),
494            cpu_count: None,
495            memory_bytes: None,
496            hostname_hash: None,
497        };
498        let host_probe = TestHostProbe::new(host);
499        let clock = TestClock::new("2024-01-01T00:00:00Z");
500
501        let usecase = PairedRunUseCase::new(
502            runner,
503            host_probe,
504            clock,
505            ToolInfo {
506                name: "perfgate".to_string(),
507                version: "0.1.0".to_string(),
508            },
509        );
510
511        let outcome = usecase
512            .execute(PairedRunRequest {
513                name: "diff-bench".to_string(),
514                cwd: None,
515                baseline_command: vec!["true".to_string()],
516                current_command: vec!["true".to_string()],
517                repeat: 1,
518                warmup: 0,
519                work_units: None,
520                timeout: None,
521                env: vec![],
522                output_cap_bytes: 1024,
523                allow_nonzero: false,
524                include_hostname_hash: false,
525            })
526            .expect("paired run should succeed");
527
528        assert_eq!(outcome.receipt.samples.len(), 1);
529        let sample = &outcome.receipt.samples[0];
530        assert_eq!(sample.wall_diff_ms, -50);
531        assert_eq!(sample.rss_diff_kb, Some(-200));
532        assert!(!outcome.failed);
533    }
534}