Skip to main content

harn_cli/commands/
test_bench.rs

1//! `harn test-bench` runner.
2//!
3//! Wraps [`crate::commands::run::execute_run`] in a
4//! [`harn_vm::testbench::TestbenchSession`] so a script runs against a
5//! pinned clock, an optional LLM/process tape, and an optional
6//! filesystem overlay — all with deny-by-default network egress.
7//!
8//! The CLI flag names map onto [`harn_vm::testbench::Testbench`] one-for-one.
9//!
10//! # Runtime modes
11//!
12//! `--runtime paused-tokio` (default): multi-threaded Tokio runtime. Tasks
13//! from concurrent Harn agents run in parallel across worker threads. The
14//! paused mock clock keeps virtual time stable, but task-interleaving order
15//! varies between runs.
16//!
17//! `--runtime des`: single-threaded `current_thread` Tokio runtime. All
18//! tasks, I/O completions, and timer callbacks share one OS thread. Combined
19//! with the paused mock clock this produces bit-exact event tapes across
20//! reruns for scripts that stay within the DES-safe primitive set (no real
21//! network, no real subprocess, no real clock). See `docs/src/dev/des-mode.md`.
22
23use std::collections::HashSet;
24use std::fs;
25use std::path::{Path, PathBuf};
26use std::process;
27use std::thread;
28
29use harn_vm::testbench::annotations::{
30    annotations_for_record, validate_against_tape, AnnotationKind, AnnotationTape,
31};
32use harn_vm::testbench::fidelity::{compare, FidelityMode, FidelityReport};
33use harn_vm::testbench::overlay_fs::{render_unified_diff, DiffEntry, DiffKind};
34use harn_vm::testbench::tape::EventTape;
35use harn_vm::testbench::{
36    ClockConfig, FilesystemConfig, LlmConfig, NetworkConfig, SubprocessConfig, TapeConfig,
37    Testbench,
38};
39
40use crate::cli::{
41    TestBenchCommand, TestBenchExportAnnotationsArgs, TestBenchFidelityArgs, TestBenchReplayArgs,
42    TestBenchRunArgs, TestBenchValidateAnnotationsArgs,
43};
44use crate::commands::run::{execute_run, CliLlmMockMode, RunOutcome, RunProfileOptions};
45use crate::CLI_RUNTIME_STACK_SIZE;
46
47/// Default starting point for `--clock paused` runs. Picked to be
48/// stable, RFC-3339-friendly, and after every prerequisite Y2K38
49/// boundary so date-of-birth math doesn't underflow:
50/// 2026-01-01T00:00:00Z.
51const DEFAULT_TESTBENCH_START_MS: i64 = 1_767_225_600_000;
52
53/// Where the replay tape used by `harn test-bench fidelity` came from.
54enum ReplaySource {
55    /// Re-run the script under `--against` and emit a fresh tape.
56    ReRun,
57    /// Load an existing tape from disk.
58    Tape(String),
59}
60
61pub(crate) async fn run(command: TestBenchCommand) {
62    let outcome = match command {
63        TestBenchCommand::Run(args) => run_args(args).await,
64        TestBenchCommand::Replay(args) => replay_args(args).await,
65        TestBenchCommand::Fidelity(args) => fidelity_args(args).await,
66        TestBenchCommand::ValidateAnnotations(args) => validate_annotations_args(args),
67        TestBenchCommand::ExportAnnotations(args) => export_annotations_args(args),
68    };
69    flush_outcome(outcome);
70}
71
72async fn run_args(args: TestBenchRunArgs) -> RunOutcome {
73    let bench = match build_testbench(&args) {
74        Ok(bench) => bench,
75        Err(message) => return error_outcome(message),
76    };
77    let llm_mode = match build_llm_mode(&args) {
78        Ok(mode) => mode,
79        Err(message) => return error_outcome(message),
80    };
81    match args.runtime.as_str() {
82        "paused-tokio" | "" => run_with_bench(args, bench, llm_mode).await,
83        "des" => run_with_des_runtime(args, bench, llm_mode).await,
84        other => error_outcome(format!(
85            "--runtime must be `paused-tokio` or `des`, got `{other}`"
86        )),
87    }
88}
89
90/// Execute the script under a standard multi-thread Tokio runtime with the
91/// testbench mocks already active on the calling async task.
92async fn run_with_bench(
93    args: TestBenchRunArgs,
94    bench: Testbench,
95    llm_mode: CliLlmMockMode,
96) -> RunOutcome {
97    let session = match bench.activate() {
98        Ok(session) => session,
99        Err(error) => return error_outcome(format!("activate testbench: {error}")),
100    };
101    let outcome = execute_run(
102        &args.file,
103        false,
104        HashSet::new(),
105        args.argv.clone(),
106        Vec::new(),
107        llm_mode,
108        None,
109        RunProfileOptions::default(),
110    )
111    .await;
112    finalize_session(outcome, session, &args)
113}
114
115/// Execute the script under a **single-threaded** `current_thread` Tokio
116/// runtime for maximum inter-task scheduling determinism.
117///
118/// Spawns a fresh OS thread so we can call `Runtime::block_on` without
119/// nesting inside the caller's multi-thread runtime. The stack size is
120/// matched to the main CLI thread so deep recursion in scripts works.
121/// Thread-local testbench mocks (clock, overlay, process tape, recorder)
122/// are installed inside the new thread so they are visible to every task
123/// that runs there.
124///
125/// The `current_thread` scheduler cooperatively multiplexes all tasks on one
126/// OS thread, eliminating the inter-thread wake-up races that cause tape
127/// records to appear in different orders between runs. Combined with the
128/// paused mock clock this yields bit-exact event tapes for DES-safe scripts.
129async fn run_with_des_runtime(
130    args: TestBenchRunArgs,
131    bench: Testbench,
132    llm_mode: CliLlmMockMode,
133) -> RunOutcome {
134    let (tx, rx) = std::sync::mpsc::channel();
135    thread::Builder::new()
136        .name("harn-des".to_string())
137        .stack_size(CLI_RUNTIME_STACK_SIZE)
138        .spawn(move || {
139            let rt = tokio::runtime::Builder::new_current_thread()
140                .enable_all()
141                .build()
142                .unwrap_or_else(|e| panic!("failed to build DES runtime: {e}"));
143            let outcome = rt.block_on(async move {
144                harn_vm::reset_thread_local_state();
145                let session = match bench.activate() {
146                    Ok(s) => s,
147                    Err(e) => return error_outcome(format!("activate testbench: {e}")),
148                };
149                let outcome = execute_run(
150                    &args.file,
151                    false,
152                    HashSet::new(),
153                    args.argv.clone(),
154                    Vec::new(),
155                    llm_mode,
156                    None,
157                    RunProfileOptions::default(),
158                )
159                .await;
160                finalize_session(outcome, session, &args)
161            });
162            let _ = tx.send(outcome);
163        })
164        .expect("spawn DES thread");
165    tokio::task::spawn_blocking(move || {
166        rx.recv()
167            .unwrap_or_else(|_| error_outcome("DES runtime thread panicked".to_string()))
168    })
169    .await
170    .unwrap_or_else(|e| error_outcome(format!("DES runtime blocking task failed: {e:?}")))
171}
172
173fn build_llm_mode(args: &TestBenchRunArgs) -> Result<CliLlmMockMode, String> {
174    match (&args.llm_fixture, &args.llm_record) {
175        (Some(_), Some(_)) => Err("--llm-fixture and --llm-record are mutually exclusive".into()),
176        (Some(path), None) => Ok(CliLlmMockMode::Replay {
177            fixture_path: PathBuf::from(path),
178        }),
179        (None, Some(path)) => Ok(CliLlmMockMode::Record {
180            fixture_path: PathBuf::from(path),
181        }),
182        (None, None) => Ok(CliLlmMockMode::Off),
183    }
184}
185
186fn finalize_session(
187    outcome: RunOutcome,
188    session: harn_vm::testbench::TestbenchSession,
189    args: &TestBenchRunArgs,
190) -> RunOutcome {
191    let finalize = match session.finalize() {
192        Ok(f) => f,
193        Err(error) => return append_error(outcome, format!("finalize testbench: {error}")),
194    };
195    let mut outcome = outcome;
196    if matches!(args.network.as_str(), "deny") {
197        outcome
198            .stderr
199            .push_str("[testbench] network=deny applied for the duration of the run.\n");
200    }
201    if let Some(diff_path) = args.emit_diff.as_ref() {
202        if let Err(error) = persist_overlay_diff(&finalize.fs_diff, &PathBuf::from(diff_path)) {
203            outcome.stderr.push_str(&format!(
204                "warning: failed to write fs diff to {diff_path}: {error}\n"
205            ));
206        }
207    } else if !finalize.fs_diff.is_empty() {
208        outcome
209            .stderr
210            .push_str(&render_diff_summary(&finalize.fs_diff));
211    }
212    if let Some(record_path) = args.process_record.as_ref() {
213        outcome.stderr.push_str(&format!(
214            "[testbench] recorded {} subprocess invocation(s) to {record_path}.\n",
215            finalize.recorded_subprocesses.len()
216        ));
217    }
218    if let Some(toolchain_dir) = args.process_wasi.as_ref() {
219        outcome.stderr.push_str(&format!(
220            "[testbench] subprocess invocations resolved against WASI toolchain at {toolchain_dir}.\n"
221        ));
222    }
223    if let Some(tape) = finalize.tape.as_ref() {
224        outcome.stderr.push_str(&format!(
225            "[testbench] emitted unified tape with {} record(s) to {}.\n",
226            tape.records,
227            tape.path.display(),
228        ));
229    }
230    for leak in &finalize.clock_leaks {
231        outcome.stderr.push_str(&format!(
232            "[testbench] clock leak: {} (count={})\n",
233            leak.capability_id, leak.count,
234        ));
235    }
236    outcome
237}
238
239async fn replay_args(args: TestBenchReplayArgs) -> RunOutcome {
240    // Load + pre-validate annotations before running so a malformed
241    // sidecar fails fast and the run output stays focused on the script.
242    let annotations_loaded = match args.annotations.as_deref() {
243        None => None,
244        Some(path) => match AnnotationTape::load(Path::new(path)) {
245            Ok(tape) => Some((path.to_string(), tape)),
246            Err(error) => {
247                return error_outcome(format!("load annotations {path}: {error}"));
248            }
249        },
250    };
251
252    // Surfacing annotations during replay requires the emitted tape so
253    // we can resolve `event_id` → record. When the caller did not ask
254    // for `--emit-tape`, allocate a temp file and persist the tape
255    // there for the duration of the call.
256    let tape_temp = if annotations_loaded.is_some() && args.emit_tape.is_none() {
257        match tempfile::tempdir() {
258            Ok(dir) => Some(dir),
259            Err(error) => return error_outcome(format!("tempdir for replay tape: {error}")),
260        }
261    } else {
262        None
263    };
264    let emit_tape_path = match (&args.emit_tape, tape_temp.as_ref()) {
265        (Some(path), _) => Some(path.clone()),
266        (None, Some(dir)) => Some(dir.path().join("run.tape").to_string_lossy().into_owned()),
267        (None, None) => None,
268    };
269
270    let derived = TestBenchRunArgs {
271        file: args.file.clone(),
272        start_at_ms: args.start_at_ms,
273        clock: "paused".to_string(),
274        llm_fixture: args.llm_fixture.clone(),
275        llm_record: None,
276        fs_overlay: args.fs_overlay.clone(),
277        process_replay: Some(args.process_tape.clone()),
278        process_record: None,
279        process_wasi: None,
280        network: "deny".to_string(),
281        allow_host: Vec::new(),
282        emit_diff: None,
283        emit_tape: emit_tape_path.clone(),
284        runtime: "paused-tokio".to_string(),
285        argv: args.argv.clone(),
286    };
287    let mut outcome = run_args(derived).await;
288
289    if let (Some((annotations_path, annotations)), Some(tape_path)) =
290        (annotations_loaded, emit_tape_path)
291    {
292        match EventTape::load(Path::new(&tape_path)) {
293            Ok(tape) => {
294                let report = validate_against_tape(&annotations, &tape);
295                outcome.stderr.push_str(&render_annotations_block(
296                    &annotations_path,
297                    &annotations,
298                    &tape,
299                ));
300                if !report.is_ok() {
301                    outcome.stderr.push_str(&format!(
302                        "[testbench] annotations validation failed with {} problem(s); see `harn test-bench validate-annotations` for the structured report.\n",
303                        report.problems.len()
304                    ));
305                    outcome.exit_code = outcome.exit_code.max(2);
306                }
307            }
308            Err(error) => {
309                outcome.stderr.push_str(&format!(
310                    "warning: failed to load tape for annotation surfacing: {error}\n"
311                ));
312            }
313        }
314    }
315    outcome
316}
317
318/// Render a "[annotations]" stderr block grouping every annotation by
319/// the tape event it targets. Output is deterministic (sorted by `seq`)
320/// so it diffs cleanly across reruns.
321fn render_annotations_block(
322    annotations_path: &str,
323    annotations: &AnnotationTape,
324    tape: &EventTape,
325) -> String {
326    let mut out = String::new();
327    out.push_str(&format!(
328        "[annotations] loaded {} annotation(s) from {annotations_path}\n",
329        annotations.annotations.len()
330    ));
331    let mut sorted_records: Vec<_> = tape.records.iter().collect();
332    sorted_records.sort_by_key(|record| record.seq);
333    for record in sorted_records {
334        let matches = annotations_for_record(annotations, record);
335        if matches.is_empty() {
336            continue;
337        }
338        out.push_str(&format!(
339            "  event seq={} virtual_time_ms={} kind={}\n",
340            record.seq,
341            record.virtual_time_ms,
342            record.kind.label(),
343        ));
344        for annotation in matches {
345            let label = annotation.kind.as_str();
346            let evidence = annotation
347                .evidence
348                .as_deref()
349                .unwrap_or("(no evidence)")
350                .lines()
351                .next()
352                .unwrap_or("(no evidence)");
353            let id = if annotation.id.is_empty() {
354                "(no id)".to_string()
355            } else {
356                annotation.id.clone()
357            };
358            out.push_str(&format!("    [{label}] {id}: {evidence}\n"));
359        }
360    }
361    out
362}
363
364fn validate_annotations_args(args: TestBenchValidateAnnotationsArgs) -> RunOutcome {
365    let tape = match EventTape::load(Path::new(&args.tape)) {
366        Ok(tape) => tape,
367        Err(error) => return error_outcome(format!("load tape {}: {error}", args.tape)),
368    };
369    let annotations = match AnnotationTape::load(Path::new(&args.annotations)) {
370        Ok(tape) => tape,
371        Err(error) => {
372            return error_outcome(format!("load annotations {}: {error}", args.annotations));
373        }
374    };
375    let report = validate_against_tape(&annotations, &tape);
376    let json = match serde_json::to_string_pretty(&report) {
377        Ok(json) => json,
378        Err(error) => return error_outcome(format!("serialize validation report: {error}")),
379    };
380    let mut outcome = RunOutcome::default();
381    if let Some(path) = args.report.as_deref() {
382        if let Err(error) = persist_text(&json, Path::new(path)) {
383            return error_outcome(format!("write validation report: {error}"));
384        }
385        outcome.stderr.push_str(&format!(
386            "[testbench] annotations validation: checked={} problems={} ({})\n",
387            report.annotations_checked,
388            report.problems.len(),
389            path,
390        ));
391    } else {
392        outcome.stdout.push_str(&json);
393        outcome.stdout.push('\n');
394    }
395    if !report.is_ok() {
396        outcome.exit_code = 2;
397    }
398    outcome
399}
400
401fn export_annotations_args(args: TestBenchExportAnnotationsArgs) -> RunOutcome {
402    let annotations = match AnnotationTape::load(Path::new(&args.annotations)) {
403        Ok(tape) => tape,
404        Err(error) => {
405            return error_outcome(format!("load annotations {}: {error}", args.annotations));
406        }
407    };
408
409    let kinds: Vec<AnnotationKind> = if args.kind.is_empty() {
410        Vec::new()
411    } else {
412        let mut parsed = Vec::with_capacity(args.kind.len());
413        for raw in &args.kind {
414            match AnnotationKind::parse_cli(raw) {
415                Ok(kind) => parsed.push(kind),
416                Err(error) => return error_outcome(error),
417            }
418        }
419        parsed
420    };
421
422    let selected: Vec<_> = annotations
423        .annotations
424        .iter()
425        .filter(|annotation| kinds.is_empty() || kinds.contains(&annotation.kind))
426        .collect();
427
428    let body = match args.format.as_str() {
429        "jsonl" | "" => {
430            let mut out = String::new();
431            for annotation in &selected {
432                match serde_json::to_string(annotation) {
433                    Ok(line) => {
434                        out.push_str(&line);
435                        out.push('\n');
436                    }
437                    Err(error) => {
438                        return error_outcome(format!("serialize annotation: {error}"));
439                    }
440                }
441            }
442            out
443        }
444        "friction" => {
445            let mut out = String::new();
446            for annotation in &selected {
447                if let Some(event) = harn_vm::testbench::annotations::annotation_to_friction_event(
448                    annotation,
449                    &annotations.header,
450                ) {
451                    match serde_json::to_string(&event) {
452                        Ok(line) => {
453                            out.push_str(&line);
454                            out.push('\n');
455                        }
456                        Err(error) => {
457                            return error_outcome(format!("serialize friction event: {error}"));
458                        }
459                    }
460                }
461            }
462            out
463        }
464        other => {
465            return error_outcome(format!(
466                "--format must be `jsonl` or `friction`, got `{other}`"
467            ));
468        }
469    };
470
471    let mut outcome = RunOutcome::default();
472    if let Some(path) = args.output.as_deref() {
473        if let Err(error) = persist_text(&body, Path::new(path)) {
474            return error_outcome(format!("write export: {error}"));
475        }
476        outcome.stderr.push_str(&format!(
477            "[testbench] exported {} annotation(s) to {} (format={})\n",
478            selected.len(),
479            path,
480            args.format,
481        ));
482    } else {
483        outcome.stdout.push_str(&body);
484    }
485    outcome
486}
487
488async fn fidelity_args(args: TestBenchFidelityArgs) -> RunOutcome {
489    let mode = match FidelityMode::parse(&args.mode) {
490        Ok(mode) => mode,
491        Err(error) => return error_outcome(error),
492    };
493
494    let (recorded_path, replay_source) = match (&args.against, &args.replay) {
495        (Some(recorded), _) => (recorded.clone(), ReplaySource::ReRun),
496        (None, Some(replay)) => (args.primary.clone(), ReplaySource::Tape(replay.clone())),
497        (None, None) => {
498            return error_outcome(
499                "expected either two tape paths or `--against <tape> <script>`".to_string(),
500            )
501        }
502    };
503
504    let recorded = match EventTape::load(Path::new(&recorded_path)) {
505        Ok(tape) => tape,
506        Err(error) => return error_outcome(format!("load recorded tape: {error}")),
507    };
508
509    let (replay, mut prelude) = match replay_source {
510        ReplaySource::ReRun => {
511            let temp = match tempfile::tempdir() {
512                Ok(dir) => dir,
513                Err(error) => return error_outcome(format!("create temp tape dir: {error}")),
514            };
515            let replay_tape_path = temp.path().join("replay.tape");
516            let start_at = args
517                .start_at_ms
518                .or(recorded.header.started_at_unix_ms)
519                .unwrap_or(DEFAULT_TESTBENCH_START_MS);
520            let derived = TestBenchRunArgs {
521                file: args.primary.clone(),
522                start_at_ms: Some(start_at),
523                clock: "paused".to_string(),
524                llm_fixture: None,
525                llm_record: None,
526                fs_overlay: args.fs_overlay.clone(),
527                process_replay: None,
528                process_record: None,
529                process_wasi: None,
530                network: "deny".to_string(),
531                allow_host: Vec::new(),
532                emit_diff: None,
533                emit_tape: Some(replay_tape_path.to_string_lossy().into_owned()),
534                runtime: "paused-tokio".to_string(),
535                argv: args.argv.clone(),
536            };
537            let inner = run_args(derived).await;
538            match EventTape::load(&replay_tape_path) {
539                Ok(tape) => (tape, inner),
540                Err(error) => return append_error(inner, format!("load replay tape: {error}")),
541            }
542        }
543        ReplaySource::Tape(path) => match EventTape::load(Path::new(&path)) {
544            Ok(tape) => (tape, RunOutcome::default()),
545            Err(error) => return error_outcome(format!("load replay tape: {error}")),
546        },
547    };
548
549    let report = compare(&recorded, &replay, mode);
550    let json = match serde_json::to_string_pretty(&report) {
551        Ok(json) => json,
552        Err(error) => return append_error(prelude, format!("serialize fidelity report: {error}")),
553    };
554    if let Some(path) = args.report.as_ref() {
555        if let Err(error) = persist_fidelity_report(&json, Path::new(path)) {
556            return append_error(prelude, format!("write fidelity report: {error}"));
557        }
558        prelude.stderr.push_str(&format!(
559            "[testbench] fidelity report written to {path} (mode={:?}, score={:.4}, divergences={})\n",
560            report.mode,
561            report.score,
562            report.divergences.len(),
563        ));
564    } else {
565        prelude.stdout.push_str(&json);
566        prelude.stdout.push('\n');
567    }
568    if !report.divergences.is_empty() {
569        prelude.exit_code = prelude.exit_code.max(report_exit_code(&report));
570    }
571    prelude
572}
573
574fn report_exit_code(report: &FidelityReport) -> i32 {
575    // Exit non-zero on any divergence so CI gates can rely on the
576    // status code without parsing JSON.
577    if report.divergences.is_empty() {
578        0
579    } else {
580        2
581    }
582}
583
584fn persist_fidelity_report(json: &str, path: &Path) -> Result<(), String> {
585    persist_text(json, path)
586}
587
588fn persist_text(body: &str, path: &Path) -> Result<(), String> {
589    if let Some(parent) = path.parent() {
590        if !parent.as_os_str().is_empty() {
591            fs::create_dir_all(parent)
592                .map_err(|error| format!("mkdir {}: {error}", parent.display()))?;
593        }
594    }
595    fs::write(path, body).map_err(|error| format!("write {}: {error}", path.display()))
596}
597
598fn build_testbench(args: &TestBenchRunArgs) -> Result<Testbench, String> {
599    let clock = match args.clock.as_str() {
600        "paused" => ClockConfig::Paused {
601            starting_at_ms: args.start_at_ms.unwrap_or(DEFAULT_TESTBENCH_START_MS),
602        },
603        "real" => ClockConfig::Real,
604        other => return Err(format!("--clock must be `paused` or `real`, got `{other}`")),
605    };
606
607    let llm = if let Some(fixture) = &args.llm_fixture {
608        LlmConfig::Replay {
609            fixture: PathBuf::from(fixture),
610        }
611    } else if let Some(record) = &args.llm_record {
612        LlmConfig::Record {
613            fixture: PathBuf::from(record),
614        }
615    } else {
616        LlmConfig::Real
617    };
618
619    let filesystem = match &args.fs_overlay {
620        None => FilesystemConfig::Real,
621        Some(root) => FilesystemConfig::Overlay {
622            worktree: PathBuf::from(root),
623        },
624    };
625
626    let subprocess = if let Some(record) = &args.process_record {
627        SubprocessConfig::Record {
628            tape: PathBuf::from(record),
629        }
630    } else if let Some(replay) = &args.process_replay {
631        SubprocessConfig::Replay {
632            tape: PathBuf::from(replay),
633        }
634    } else if let Some(toolchain) = &args.process_wasi {
635        SubprocessConfig::WasiToolchain {
636            dir: PathBuf::from(toolchain),
637        }
638    } else {
639        SubprocessConfig::Real
640    };
641
642    let network = match args.network.as_str() {
643        "deny" => NetworkConfig::DenyByDefault {
644            allow: args.allow_host.clone(),
645        },
646        "real" => NetworkConfig::Real,
647        other => return Err(format!("--network must be `deny` or `real`, got `{other}`")),
648    };
649
650    let tape = match &args.emit_tape {
651        None => TapeConfig::Off,
652        Some(path) => TapeConfig::Emit {
653            path: PathBuf::from(path),
654            argv: args.argv.clone(),
655            script_path: Some(args.file.clone()),
656        },
657    };
658
659    Ok(Testbench {
660        clock,
661        llm,
662        filesystem,
663        subprocess,
664        network,
665        tape,
666    })
667}
668
669fn persist_overlay_diff(diff: &[DiffEntry], path: &PathBuf) -> Result<(), String> {
670    if let Some(parent) = path.parent() {
671        if !parent.as_os_str().is_empty() {
672            fs::create_dir_all(parent)
673                .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
674        }
675    }
676    let body = render_unified_diff(diff);
677    fs::write(path, body).map_err(|err| format!("write {}: {err}", path.display()))
678}
679
680fn render_diff_summary(diff: &[DiffEntry]) -> String {
681    let mut out = String::new();
682    out.push_str(&format!(
683        "[testbench] overlay fs diff: {} change(s)\n",
684        diff.len()
685    ));
686    for entry in diff {
687        let label = match &entry.kind {
688            DiffKind::Added { .. } => "added",
689            DiffKind::Modified { .. } => "modified",
690            DiffKind::Deleted => "deleted",
691        };
692        out.push_str(&format!("  {label} {}\n", entry.path.display()));
693    }
694    out
695}
696
697fn error_outcome(message: String) -> RunOutcome {
698    RunOutcome {
699        stdout: String::new(),
700        stderr: format!("error: {message}\n"),
701        exit_code: 1,
702    }
703}
704
705fn append_error(mut outcome: RunOutcome, message: String) -> RunOutcome {
706    outcome.stderr.push_str(&format!("error: {message}\n"));
707    outcome.exit_code = outcome.exit_code.max(1);
708    outcome
709}
710
711fn flush_outcome(outcome: RunOutcome) {
712    use std::io::Write;
713    let _ = std::io::stderr().write_all(outcome.stderr.as_bytes());
714    let _ = std::io::stdout().write_all(outcome.stdout.as_bytes());
715    if outcome.exit_code != 0 {
716        process::exit(outcome.exit_code);
717    }
718}