1pub mod annotations;
40pub mod fidelity;
41pub mod mcp_mock;
42pub mod overlay_fs;
43pub mod process_tape;
44pub mod tape;
45#[cfg(feature = "testbench-wasi")]
46pub mod wasi_process;
47
48use std::path::PathBuf;
49use std::sync::Arc;
50
51use crate::clock_mock::leak_audit::{self, ClockLeak};
52use crate::clock_mock::{install_override, ClockOverrideGuard, MockClock};
53use crate::egress::reset_egress_policy_for_host;
54
55use overlay_fs::{install_overlay, OverlayFs, OverlayFsGuard};
56use process_tape::{install_process_tape, ProcessTape, ProcessTapeGuard, ProcessTapeMode};
57use tape::{install_recorder, TapeHeader, TapeRecorder, TapeRecorderGuard};
58
59#[derive(Debug, Default, Clone)]
62pub struct Testbench {
63 pub clock: ClockConfig,
64 pub llm: LlmConfig,
65 pub filesystem: FilesystemConfig,
66 pub subprocess: SubprocessConfig,
67 pub network: NetworkConfig,
68 pub tape: TapeConfig,
69}
70
71#[derive(Debug, Default, Clone)]
74pub enum ClockConfig {
75 #[default]
77 Real,
78 Paused { starting_at_ms: i64 },
81}
82
83#[derive(Debug, Default, Clone)]
89pub enum LlmConfig {
90 #[default]
92 Real,
93 Replay { fixture: PathBuf },
95 Record { fixture: PathBuf },
97}
98
99#[derive(Debug, Default, Clone)]
101pub enum FilesystemConfig {
102 #[default]
104 Real,
105 Overlay { worktree: PathBuf },
109}
110
111#[derive(Debug, Default, Clone)]
113pub enum SubprocessConfig {
114 #[default]
116 Real,
117 Record { tape: PathBuf },
120 Replay { tape: PathBuf },
123 WasiToolchain { dir: PathBuf },
131}
132
133#[derive(Debug, Default, Clone)]
136pub enum NetworkConfig {
137 #[default]
139 Real,
140 DenyByDefault {
144 allow: Vec<String>,
147 },
148}
149
150#[derive(Debug, Default, Clone)]
156pub enum TapeConfig {
157 #[default]
158 Off,
159 Emit {
160 path: PathBuf,
161 argv: Vec<String>,
165 script_path: Option<String>,
168 },
169}
170
171impl Testbench {
172 pub fn builder() -> TestbenchBuilder {
174 TestbenchBuilder::default()
175 }
176
177 pub fn activate(self) -> Result<TestbenchSession, TestbenchError> {
180 TestbenchSession::install(self)
181 }
182}
183
184#[derive(Debug, Default, Clone)]
186pub struct TestbenchBuilder {
187 bench: Testbench,
188}
189
190impl TestbenchBuilder {
191 pub fn paused_clock_at_ms(mut self, starting_at_ms: i64) -> Self {
192 self.bench.clock = ClockConfig::Paused { starting_at_ms };
193 self
194 }
195
196 pub fn replay_llm(mut self, fixture: impl Into<PathBuf>) -> Self {
197 self.bench.llm = LlmConfig::Replay {
198 fixture: fixture.into(),
199 };
200 self
201 }
202
203 pub fn record_llm(mut self, fixture: impl Into<PathBuf>) -> Self {
204 self.bench.llm = LlmConfig::Record {
205 fixture: fixture.into(),
206 };
207 self
208 }
209
210 pub fn fs_overlay(mut self, worktree: impl Into<PathBuf>) -> Self {
211 self.bench.filesystem = FilesystemConfig::Overlay {
212 worktree: worktree.into(),
213 };
214 self
215 }
216
217 pub fn record_subprocesses(mut self, tape: impl Into<PathBuf>) -> Self {
218 self.bench.subprocess = SubprocessConfig::Record { tape: tape.into() };
219 self
220 }
221
222 pub fn replay_subprocesses(mut self, tape: impl Into<PathBuf>) -> Self {
223 self.bench.subprocess = SubprocessConfig::Replay { tape: tape.into() };
224 self
225 }
226
227 pub fn wasi_toolchain(mut self, dir: impl Into<PathBuf>) -> Self {
230 self.bench.subprocess = SubprocessConfig::WasiToolchain { dir: dir.into() };
231 self
232 }
233
234 pub fn deny_network(mut self) -> Self {
235 self.bench.network = NetworkConfig::DenyByDefault { allow: Vec::new() };
236 self
237 }
238
239 pub fn allow_network(mut self, allow: impl IntoIterator<Item = String>) -> Self {
240 self.bench.network = NetworkConfig::DenyByDefault {
241 allow: allow.into_iter().collect(),
242 };
243 self
244 }
245
246 pub fn emit_tape(mut self, path: impl Into<PathBuf>) -> Self {
247 self.bench.tape = TapeConfig::Emit {
248 path: path.into(),
249 argv: Vec::new(),
250 script_path: None,
251 };
252 self
253 }
254
255 pub fn emit_tape_for(
256 mut self,
257 path: impl Into<PathBuf>,
258 script_path: Option<String>,
259 argv: Vec<String>,
260 ) -> Self {
261 self.bench.tape = TapeConfig::Emit {
262 path: path.into(),
263 argv,
264 script_path,
265 };
266 self
267 }
268
269 pub fn build(self) -> Testbench {
270 self.bench
271 }
272}
273
274#[must_use = "the testbench tears down on drop; bind the handle to a `_session` local"]
277pub struct TestbenchSession {
278 _clock: Option<ClockOverrideGuard>,
279 _process: Option<ProcessTapeGuard>,
280 _overlay: Option<OverlayFsGuard>,
281 _recorder: Option<TapeRecorderGuard>,
282 process_tape: Option<Arc<ProcessTape>>,
283 overlay: Option<Arc<OverlayFs>>,
284 recorder: Option<Arc<TapeRecorder>>,
285 tape_path: Option<PathBuf>,
286 tape_started_at_unix_ms: Option<i64>,
287 tape_script_path: Option<String>,
288 tape_argv: Vec<String>,
289 subprocess_mode: ProcessTapeMode,
290 subprocess_tape_path: Option<PathBuf>,
291 #[cfg(feature = "testbench-wasi")]
292 _wasi_toolchain: Option<wasi_process::WasiToolchainGuard>,
293 saved_egress_env: Option<SavedEgressEnv>,
297}
298
299#[derive(Debug, Clone)]
300struct SavedEgressEnv {
301 default: Option<String>,
302 allow: Option<String>,
303 deny: Option<String>,
304}
305
306impl TestbenchSession {
307 fn install(bench: Testbench) -> Result<Self, TestbenchError> {
308 leak_audit::reset();
312
313 let (clock_guard, started_at_unix_ms) = match bench.clock {
314 ClockConfig::Real => (None, None),
315 ClockConfig::Paused { starting_at_ms } => (
316 Some(install_override(MockClock::at_wall_ms(starting_at_ms))),
317 Some(starting_at_ms),
318 ),
319 };
320
321 #[allow(clippy::no_effect_underscore_binding)]
325 let _llm_config = bench.llm;
326
327 #[cfg(feature = "testbench-wasi")]
328 let mut wasi_guard: Option<wasi_process::WasiToolchainGuard> = None;
329
330 let (process_tape, process_guard, subprocess_mode, subprocess_tape_path) =
331 match bench.subprocess {
332 SubprocessConfig::Real => (None, None, ProcessTapeMode::Replay, None),
333 SubprocessConfig::Record { tape } => {
334 let active = Arc::new(ProcessTape::recording());
335 let guard = install_process_tape(Arc::clone(&active));
336 (
337 Some(Arc::clone(&active)),
338 Some(guard),
339 ProcessTapeMode::Record,
340 Some(tape),
341 )
342 }
343 SubprocessConfig::Replay { tape } => {
344 let loaded = ProcessTape::load(&tape).map_err(TestbenchError::Subprocess)?;
345 let active = Arc::new(loaded);
346 let guard = install_process_tape(Arc::clone(&active));
347 (
348 Some(Arc::clone(&active)),
349 Some(guard),
350 ProcessTapeMode::Replay,
351 Some(tape),
352 )
353 }
354 #[cfg(feature = "testbench-wasi")]
355 SubprocessConfig::WasiToolchain { dir } => {
356 if !dir.exists() {
357 return Err(TestbenchError::Subprocess(format!(
358 "wasi toolchain directory does not exist: {}",
359 dir.display()
360 )));
361 }
362 wasi_guard = Some(wasi_process::install_wasi_toolchain(dir));
363 (None, None, ProcessTapeMode::Replay, None)
364 }
365 #[cfg(not(feature = "testbench-wasi"))]
366 SubprocessConfig::WasiToolchain { .. } => {
367 return Err(TestbenchError::Subprocess(
368 "WasiToolchain requires the `testbench-wasi` Cargo feature".to_string(),
369 ));
370 }
371 };
372
373 let (overlay, overlay_guard) = match bench.filesystem {
374 FilesystemConfig::Real => (None, None),
375 FilesystemConfig::Overlay { worktree } => {
376 let overlay = Arc::new(OverlayFs::rooted_at(worktree));
377 let guard = install_overlay(Arc::clone(&overlay));
378 (Some(overlay), Some(guard))
379 }
380 };
381
382 let saved_egress_env = match bench.network {
383 NetworkConfig::Real => None,
384 NetworkConfig::DenyByDefault { allow } => {
385 let saved = SavedEgressEnv {
386 default: std::env::var("HARN_EGRESS_DEFAULT").ok(),
387 allow: std::env::var("HARN_EGRESS_ALLOW").ok(),
388 deny: std::env::var("HARN_EGRESS_DENY").ok(),
389 };
390 reset_egress_policy_for_host();
394 std::env::set_var("HARN_EGRESS_DEFAULT", "deny");
395 if allow.is_empty() {
396 std::env::remove_var("HARN_EGRESS_ALLOW");
397 } else {
398 std::env::set_var("HARN_EGRESS_ALLOW", allow.join(","));
399 }
400 std::env::remove_var("HARN_EGRESS_DENY");
401 Some(saved)
402 }
403 };
404
405 let (recorder, recorder_guard, tape_path, tape_argv, tape_script_path) = match bench.tape {
406 TapeConfig::Off => (None, None, None, Vec::new(), None),
407 TapeConfig::Emit {
408 path,
409 argv,
410 script_path,
411 } => {
412 let recorder = Arc::new(TapeRecorder::new());
413 let guard = install_recorder(Arc::clone(&recorder));
414 (
415 Some(Arc::clone(&recorder)),
416 Some(guard),
417 Some(path),
418 argv,
419 script_path,
420 )
421 }
422 };
423
424 Ok(Self {
425 _clock: clock_guard,
426 _process: process_guard,
427 _overlay: overlay_guard,
428 _recorder: recorder_guard,
429 process_tape,
430 overlay,
431 recorder,
432 tape_path,
433 tape_started_at_unix_ms: started_at_unix_ms,
434 tape_script_path,
435 tape_argv,
436 subprocess_mode,
437 subprocess_tape_path,
438 #[cfg(feature = "testbench-wasi")]
439 _wasi_toolchain: wasi_guard,
440 saved_egress_env,
441 })
442 }
443
444 pub fn subprocess_mode(&self) -> ProcessTapeMode {
446 self.subprocess_mode
447 }
448
449 pub fn subprocess_tape_path(&self) -> Option<&std::path::Path> {
452 self.subprocess_tape_path.as_deref()
453 }
454
455 pub fn overlay(&self) -> Option<&Arc<OverlayFs>> {
457 self.overlay.as_ref()
458 }
459
460 pub fn process_tape(&self) -> Option<&Arc<ProcessTape>> {
462 self.process_tape.as_ref()
463 }
464
465 pub fn tape_recorder(&self) -> Option<&Arc<TapeRecorder>> {
467 self.recorder.as_ref()
468 }
469
470 pub fn finalize(self) -> Result<TestbenchFinalize, TestbenchError> {
475 let diff = self
476 .overlay
477 .as_ref()
478 .map(|overlay| overlay.diff())
479 .unwrap_or_default();
480 let recorded = if matches!(self.subprocess_mode, ProcessTapeMode::Record) {
481 if let (Some(tape), Some(path)) = (
482 self.process_tape.as_ref(),
483 self.subprocess_tape_path.as_ref(),
484 ) {
485 tape.persist(path).map_err(TestbenchError::Subprocess)?;
486 }
487 self.process_tape
488 .as_ref()
489 .map(|tape| tape.recorded())
490 .unwrap_or_default()
491 } else {
492 Vec::new()
493 };
494 let mut emitted_tape = None;
495 if let (Some(recorder), Some(path)) = (self.recorder.as_ref(), self.tape_path.as_ref()) {
496 let header = TapeHeader::current(
497 self.tape_started_at_unix_ms,
498 self.tape_script_path.clone(),
499 self.tape_argv.clone(),
500 );
501 let tape = recorder.snapshot(header);
502 tape.persist(path).map_err(TestbenchError::Tape)?;
503 emitted_tape = Some(EmittedTape {
504 path: path.clone(),
505 records: tape.records.len(),
506 });
507 }
508 let clock_leaks = leak_audit::drain();
513 Ok(TestbenchFinalize {
515 fs_diff: diff,
516 recorded_subprocesses: recorded,
517 tape: emitted_tape,
518 clock_leaks,
519 })
520 }
521}
522
523impl Drop for TestbenchSession {
524 fn drop(&mut self) {
525 if let Some(saved) = self.saved_egress_env.take() {
526 restore_env("HARN_EGRESS_DEFAULT", saved.default);
527 restore_env("HARN_EGRESS_ALLOW", saved.allow);
528 restore_env("HARN_EGRESS_DENY", saved.deny);
529 reset_egress_policy_for_host();
530 }
531 }
534}
535
536fn restore_env(key: &str, prior: Option<String>) {
537 match prior {
538 Some(value) => std::env::set_var(key, value),
539 None => std::env::remove_var(key),
540 }
541}
542
543#[derive(Debug, Default, Clone)]
546pub struct TestbenchFinalize {
547 pub fs_diff: Vec<overlay_fs::DiffEntry>,
548 pub recorded_subprocesses: Vec<process_tape::TapeEntry>,
549 pub tape: Option<EmittedTape>,
550 pub clock_leaks: Vec<ClockLeak>,
555}
556
557#[derive(Debug, Clone)]
559pub struct EmittedTape {
560 pub path: PathBuf,
561 pub records: usize,
562}
563
564#[derive(Debug)]
566pub enum TestbenchError {
567 Subprocess(String),
568 Tape(String),
569}
570
571impl std::fmt::Display for TestbenchError {
572 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
573 match self {
574 Self::Subprocess(msg) => write!(f, "testbench subprocess: {msg}"),
575 Self::Tape(msg) => write!(f, "testbench tape: {msg}"),
576 }
577 }
578}
579
580impl std::error::Error for TestbenchError {}
581
582#[cfg(test)]
583mod tests {
584 use super::*;
585
586 fn serial<F: FnOnce()>(body: F) {
592 let _guard = leak_audit::TEST_LOCK
593 .lock()
594 .unwrap_or_else(|p| p.into_inner());
595 body();
596 }
597
598 #[test]
599 fn paused_clock_pins_now_ms_for_session_lifetime() {
600 serial(|| {
601 let bench = Testbench::builder()
602 .paused_clock_at_ms(1_700_000_000_000)
603 .build();
604 let session = bench.activate().expect("activate");
605 assert_eq!(crate::clock_mock::now_ms(), 1_700_000_000_000);
606 crate::clock_mock::advance(std::time::Duration::from_mins(1));
607 assert_eq!(crate::clock_mock::now_ms(), 1_700_000_060_000);
608 drop(session);
609 assert!(!crate::clock_mock::is_mocked());
611 });
612 }
613
614 #[test]
615 fn deny_by_default_blocks_egress_until_drop() {
616 serial(|| {
617 let bench = Testbench::builder().deny_network().build();
618 let session = bench.activate().expect("activate");
619 assert_eq!(std::env::var("HARN_EGRESS_DEFAULT").as_deref(), Ok("deny"));
620 drop(session);
621 assert!(std::env::var("HARN_EGRESS_DEFAULT").is_err());
622 });
623 }
624
625 #[test]
626 fn finalize_surfaces_clock_leaks_for_contrived_capability() {
627 serial(|| {
628 let bench = Testbench::builder()
629 .paused_clock_at_ms(1_700_000_000_000)
630 .build();
631 let session = bench.activate().expect("activate");
632
633 let _ = leak_audit::wall_now("test/contrived_leak");
637 let _ = leak_audit::instant_now("test/contrived_instant");
638 let _ = leak_audit::wall_now("test/contrived_leak");
639
640 let finalize = session.finalize().expect("finalize");
641 let by_id: std::collections::BTreeMap<&str, &ClockLeak> = finalize
642 .clock_leaks
643 .iter()
644 .map(|leak| (leak.capability_id.as_str(), leak))
645 .collect();
646 let wall = by_id
647 .get("test/contrived_leak")
648 .expect("wall leak surfaced");
649 assert_eq!(wall.count, 2);
650 let inst = by_id
651 .get("test/contrived_instant")
652 .expect("instant leak surfaced");
653 assert_eq!(inst.count, 1);
654
655 let next_session = Testbench::builder()
657 .paused_clock_at_ms(1_700_000_000_000)
658 .build()
659 .activate()
660 .expect("activate next");
661 let next = next_session.finalize().expect("finalize next");
662 assert!(next.clock_leaks.is_empty());
663 });
664 }
665
666 #[test]
667 fn audit_quiet_when_no_mock_is_active() {
668 serial(|| {
669 leak_audit::reset();
670 let _ = leak_audit::wall_now("test/no_mock");
673 let _ = leak_audit::instant_now("test/no_mock");
674 assert!(leak_audit::snapshot().is_empty());
675 });
676 }
677}