harn_cli/commands/
test_bench.rs

1//! `harn test-bench` runner.
2//!
3//! Wraps [`crate::commands::run::execute_run`] in a
4//! [`harn_vm::testbench::TestbenchSession`] so a script runs against a
5//! pinned clock, an optional LLM/process tape, and an optional
6//! filesystem overlay — all with deny-by-default network egress.
7//!
8//! The CLI flag names map onto [`harn_vm::testbench::Testbench`] one-for-one.
9
10use std::collections::HashSet;
11use std::fs;
12use std::path::PathBuf;
13use std::process;
14
15use harn_vm::testbench::overlay_fs::{render_unified_diff, DiffEntry, DiffKind};
16use harn_vm::testbench::{
17    ClockConfig, FilesystemConfig, LlmConfig, NetworkConfig, SubprocessConfig, Testbench,
18};
19
20use crate::cli::{TestBenchCommand, TestBenchReplayArgs, TestBenchRunArgs};
21use crate::commands::run::{execute_run, CliLlmMockMode, RunOutcome, RunProfileOptions};
22
23/// Default starting point for `--clock paused` runs. Picked to be
24/// stable, RFC-3339-friendly, and after every prerequisite Y2K38
25/// boundary so date-of-birth math doesn't underflow:
26/// 2026-01-01T00:00:00Z.
27const DEFAULT_TESTBENCH_START_MS: i64 = 1_767_225_600_000;
28
29pub(crate) async fn run(command: TestBenchCommand) {
30    let outcome = match command {
31        TestBenchCommand::Run(args) => run_args(args).await,
32        TestBenchCommand::Replay(args) => replay_args(args).await,
33    };
34    flush_outcome(outcome);
35}
36
37async fn run_args(args: TestBenchRunArgs) -> RunOutcome {
38    let bench = match build_testbench(&args) {
39        Ok(bench) => bench,
40        Err(message) => return error_outcome(message),
41    };
42    let llm_mode = match (&args.llm_fixture, &args.llm_record) {
43        (Some(_), Some(_)) => {
44            return error_outcome(
45                "--llm-fixture and --llm-record are mutually exclusive".to_string(),
46            )
47        }
48        (Some(path), None) => CliLlmMockMode::Replay {
49            fixture_path: PathBuf::from(path),
50        },
51        (None, Some(path)) => CliLlmMockMode::Record {
52            fixture_path: PathBuf::from(path),
53        },
54        (None, None) => CliLlmMockMode::Off,
55    };
56    let session = match bench.activate() {
57        Ok(session) => session,
58        Err(error) => return error_outcome(format!("activate testbench: {error}")),
59    };
60
61    let outcome = execute_run(
62        &args.file,
63        false,
64        HashSet::new(),
65        args.argv.clone(),
66        Vec::new(),
67        llm_mode,
68        None,
69        RunProfileOptions::default(),
70    )
71    .await;
72
73    let finalize = match session.finalize() {
74        Ok(f) => f,
75        Err(error) => return append_error(outcome, format!("finalize testbench: {error}")),
76    };
77
78    let mut outcome = outcome;
79    if matches!(args.network.as_str(), "deny") {
80        outcome
81            .stderr
82            .push_str("[testbench] network=deny applied for the duration of the run.\n");
83    }
84    if let Some(diff_path) = args.emit_diff.as_ref() {
85        if let Err(error) = persist_overlay_diff(&finalize.fs_diff, &PathBuf::from(diff_path)) {
86            outcome.stderr.push_str(&format!(
87                "warning: failed to write fs diff to {diff_path}: {error}\n"
88            ));
89        }
90    } else if !finalize.fs_diff.is_empty() {
91        outcome
92            .stderr
93            .push_str(&render_diff_summary(&finalize.fs_diff));
94    }
95    if let Some(record_path) = args.process_record.as_ref() {
96        outcome.stderr.push_str(&format!(
97            "[testbench] recorded {} subprocess invocation(s) to {record_path}.\n",
98            finalize.recorded_subprocesses.len()
99        ));
100    }
101    outcome
102}
103
104async fn replay_args(args: TestBenchReplayArgs) -> RunOutcome {
105    let derived = TestBenchRunArgs {
106        file: args.file.clone(),
107        start_at_ms: args.start_at_ms,
108        clock: "paused".to_string(),
109        llm_fixture: args.llm_fixture.clone(),
110        llm_record: None,
111        fs_overlay: args.fs_overlay.clone(),
112        process_replay: Some(args.process_tape.clone()),
113        process_record: None,
114        network: "deny".to_string(),
115        allow_host: Vec::new(),
116        emit_diff: None,
117        argv: args.argv.clone(),
118    };
119    run_args(derived).await
120}
121
122fn build_testbench(args: &TestBenchRunArgs) -> Result<Testbench, String> {
123    let clock = match args.clock.as_str() {
124        "paused" => ClockConfig::Paused {
125            starting_at_ms: args.start_at_ms.unwrap_or(DEFAULT_TESTBENCH_START_MS),
126        },
127        "real" => ClockConfig::Real,
128        other => return Err(format!("--clock must be `paused` or `real`, got `{other}`")),
129    };
130
131    let llm = if let Some(fixture) = &args.llm_fixture {
132        LlmConfig::Replay {
133            fixture: PathBuf::from(fixture),
134        }
135    } else if let Some(record) = &args.llm_record {
136        LlmConfig::Record {
137            fixture: PathBuf::from(record),
138        }
139    } else {
140        LlmConfig::Real
141    };
142
143    let filesystem = match &args.fs_overlay {
144        None => FilesystemConfig::Real,
145        Some(root) => FilesystemConfig::Overlay {
146            worktree: PathBuf::from(root),
147        },
148    };
149
150    let subprocess = if let Some(record) = &args.process_record {
151        SubprocessConfig::Record {
152            tape: PathBuf::from(record),
153        }
154    } else if let Some(replay) = &args.process_replay {
155        SubprocessConfig::Replay {
156            tape: PathBuf::from(replay),
157        }
158    } else {
159        SubprocessConfig::Real
160    };
161
162    let network = match args.network.as_str() {
163        "deny" => NetworkConfig::DenyByDefault {
164            allow: args.allow_host.clone(),
165        },
166        "real" => NetworkConfig::Real,
167        other => return Err(format!("--network must be `deny` or `real`, got `{other}`")),
168    };
169
170    Ok(Testbench {
171        clock,
172        llm,
173        filesystem,
174        subprocess,
175        network,
176    })
177}
178
179fn persist_overlay_diff(diff: &[DiffEntry], path: &PathBuf) -> Result<(), String> {
180    if let Some(parent) = path.parent() {
181        if !parent.as_os_str().is_empty() {
182            fs::create_dir_all(parent)
183                .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
184        }
185    }
186    let body = render_unified_diff(diff);
187    fs::write(path, body).map_err(|err| format!("write {}: {err}", path.display()))
188}
189
190fn render_diff_summary(diff: &[DiffEntry]) -> String {
191    let mut out = String::new();
192    out.push_str(&format!(
193        "[testbench] overlay fs diff: {} change(s)\n",
194        diff.len()
195    ));
196    for entry in diff {
197        let label = match &entry.kind {
198            DiffKind::Added { .. } => "added",
199            DiffKind::Modified { .. } => "modified",
200            DiffKind::Deleted => "deleted",
201        };
202        out.push_str(&format!("  {label} {}\n", entry.path.display()));
203    }
204    out
205}
206
207fn error_outcome(message: String) -> RunOutcome {
208    RunOutcome {
209        stdout: String::new(),
210        stderr: format!("error: {message}\n"),
211        exit_code: 1,
212    }
213}
214
215fn append_error(mut outcome: RunOutcome, message: String) -> RunOutcome {
216    outcome.stderr.push_str(&format!("error: {message}\n"));
217    outcome.exit_code = outcome.exit_code.max(1);
218    outcome
219}
220
221fn flush_outcome(outcome: RunOutcome) {
222    use std::io::Write;
223    let _ = std::io::stderr().write_all(outcome.stderr.as_bytes());
224    let _ = std::io::stdout().write_all(outcome.stdout.as_bytes());
225    if outcome.exit_code != 0 {
226        process::exit(outcome.exit_code);
227    }
228}
harn_cli/commands/test_bench.rs

harn_cli/commands/
test_bench.rs