Skip to main content

harn_vm/testbench/
overlay_fs.rs

1//! Copy-on-write filesystem overlay.
2//!
3//! Reads pass through to the real filesystem under [`OverlayFs::root`].
4//! Writes (and deletes) land in an in-memory layer keyed by absolute
5//! path, so a hermetic run can observe the underlying tree without ever
6//! mutating it. Once the run finishes, [`OverlayFs::diff`] surfaces a
7//! readable summary of every change — emit it as a unified diff, apply
8//! it back with `git apply`, or discard it.
9//!
10//! Only the surface that stdlib `fs.*` builtins exercise is intercepted:
11//! read/write text and bytes, append, exists, remove, list, create_dir.
12//! `rename`, `copy`, and `metadata` fall through to the underlying fs
13//! (they're mostly useful only for production-style checks the testbench
14//! doesn't replace).
15
16use std::cell::RefCell;
17use std::collections::BTreeMap;
18use std::path::{Component, Path, PathBuf};
19use std::sync::{Arc, Mutex};
20
21use crate::testbench::tape::{self, TapeRecordKind};
22
23/// One change in the overlay's write layer relative to the underlying
24/// tree.
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct DiffEntry {
27    pub path: PathBuf,
28    pub kind: DiffKind,
29}
30
31#[derive(Debug, Clone, PartialEq, Eq)]
32pub enum DiffKind {
33    /// File created in the overlay (not in the underlying tree).
34    Added { content: Vec<u8> },
35    /// File present in the underlying tree, content changed in overlay.
36    Modified { content: Vec<u8> },
37    /// File present in the underlying tree, deleted in overlay.
38    Deleted,
39}
40
41#[derive(Debug, Clone)]
42enum OverlayEntry {
43    File(Vec<u8>),
44    Deleted,
45    Directory,
46}
47
48#[derive(Debug)]
49pub struct OverlayFs {
50    root: PathBuf,
51    layer: Mutex<BTreeMap<PathBuf, OverlayEntry>>,
52}
53
54impl OverlayFs {
55    pub fn rooted_at(root: impl Into<PathBuf>) -> Self {
56        let root = root.into();
57        // On macOS the kernel reports `getcwd` as the canonical
58        // (`/private`-prefixed) path even when callers `set_current_dir`
59        // to the un-prefixed form. Canonicalize the overlay root so
60        // `within_root(...)` lines up with `resolve_source_relative_path`,
61        // which sees post-canonicalization paths.
62        let canonical = std::fs::canonicalize(&root).unwrap_or_else(|_| root.clone());
63        Self {
64            root: normalize_logical(&canonical),
65            layer: Mutex::new(BTreeMap::new()),
66        }
67    }
68
69    pub fn root(&self) -> &Path {
70        &self.root
71    }
72
73    fn key(&self, path: &Path) -> PathBuf {
74        canonicalize_for_overlay(path)
75    }
76
77    /// Whether `path` is inside the overlay's root. Calls outside the
78    /// root fall through to the real filesystem so testbench-unaware
79    /// helpers (the LLM provider's own caches, the runtime's session
80    /// store) keep working.
81    fn within_root(&self, path: &Path) -> bool {
82        let key = self.key(path);
83        key.starts_with(&self.root)
84    }
85
86    pub fn read(&self, path: &Path) -> std::io::Result<Vec<u8>> {
87        if !self.within_root(path) {
88            return std::fs::read(path);
89        }
90        let key = self.key(path);
91        let layer = self.layer.lock().expect("overlay layer poisoned");
92        match layer.get(&key) {
93            Some(OverlayEntry::File(bytes)) => Ok(bytes.clone()),
94            Some(OverlayEntry::Deleted) => Err(std::io::Error::new(
95                std::io::ErrorKind::NotFound,
96                format!("overlay: {} was deleted", key.display()),
97            )),
98            Some(OverlayEntry::Directory) => Err(std::io::Error::new(
99                std::io::ErrorKind::IsADirectory,
100                format!("overlay: {} is a directory", key.display()),
101            )),
102            None => std::fs::read(path),
103        }
104    }
105
106    pub fn read_to_string(&self, path: &Path) -> std::io::Result<String> {
107        let bytes = self.read(path)?;
108        String::from_utf8(bytes)
109            .map_err(|err| std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string()))
110    }
111
112    pub fn write(&self, path: &Path, contents: &[u8]) -> std::io::Result<()> {
113        if !self.within_root(path) {
114            return std::fs::write(path, contents);
115        }
116        let key = self.key(path);
117        let mut layer = self.layer.lock().expect("overlay layer poisoned");
118        layer.insert(key, OverlayEntry::File(contents.to_vec()));
119        Ok(())
120    }
121
122    pub fn append(&self, path: &Path, contents: &[u8]) -> std::io::Result<()> {
123        if !self.within_root(path) {
124            return std::fs::OpenOptions::new()
125                .create(true)
126                .append(true)
127                .open(path)
128                .and_then(|mut file| std::io::Write::write_all(&mut file, contents));
129        }
130        let mut combined = match self.read(path) {
131            Ok(bytes) => bytes,
132            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Vec::new(),
133            Err(err) => return Err(err),
134        };
135        combined.extend_from_slice(contents);
136        self.write(path, &combined)
137    }
138
139    pub fn exists(&self, path: &Path) -> bool {
140        if !self.within_root(path) {
141            return path.exists();
142        }
143        let key = self.key(path);
144        let layer = self.layer.lock().expect("overlay layer poisoned");
145        match layer.get(&key) {
146            Some(OverlayEntry::File(_)) | Some(OverlayEntry::Directory) => true,
147            Some(OverlayEntry::Deleted) => false,
148            None => path.exists(),
149        }
150    }
151
152    pub fn remove_file(&self, path: &Path) -> std::io::Result<()> {
153        if !self.within_root(path) {
154            return std::fs::remove_file(path);
155        }
156        let key = self.key(path);
157        let mut layer = self.layer.lock().expect("overlay layer poisoned");
158        // Remove regardless of whether it exists in the underlying tree;
159        // when the original is absent and the overlay had no entry, no-op.
160        let underlying_present = path.exists();
161        match layer.get(&key) {
162            Some(OverlayEntry::Deleted) => Err(std::io::Error::new(
163                std::io::ErrorKind::NotFound,
164                format!("overlay: {} already deleted", key.display()),
165            )),
166            _ => {
167                if underlying_present {
168                    layer.insert(key, OverlayEntry::Deleted);
169                } else {
170                    layer.remove(&key);
171                }
172                Ok(())
173            }
174        }
175    }
176
177    pub fn create_dir_all(&self, path: &Path) -> std::io::Result<()> {
178        if !self.within_root(path) {
179            return std::fs::create_dir_all(path);
180        }
181        let key = self.key(path);
182        let mut layer = self.layer.lock().expect("overlay layer poisoned");
183        layer.insert(key, OverlayEntry::Directory);
184        Ok(())
185    }
186
187    pub fn read_dir(&self, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
188        if !self.within_root(path) {
189            let mut entries = Vec::new();
190            for entry in std::fs::read_dir(path)? {
191                let entry = entry?;
192                entries.push(OverlayDirEntry {
193                    path: entry.path(),
194                    is_dir: entry.file_type().map(|t| t.is_dir()).unwrap_or(false),
195                    is_file: entry.file_type().map(|t| t.is_file()).unwrap_or(false),
196                });
197            }
198            return Ok(entries);
199        }
200        let dir_key = self.key(path);
201        let mut entries: BTreeMap<PathBuf, OverlayDirEntry> = BTreeMap::new();
202        if path.exists() {
203            for entry in std::fs::read_dir(path)? {
204                let entry = entry?;
205                let p = entry.path();
206                entries.insert(
207                    p.clone(),
208                    OverlayDirEntry {
209                        path: p,
210                        is_dir: entry.file_type().map(|t| t.is_dir()).unwrap_or(false),
211                        is_file: entry.file_type().map(|t| t.is_file()).unwrap_or(false),
212                    },
213                );
214            }
215        }
216        let layer = self.layer.lock().expect("overlay layer poisoned");
217        for (key, entry) in layer.iter() {
218            if key.parent() != Some(dir_key.as_path()) {
219                continue;
220            }
221            match entry {
222                OverlayEntry::File(_) => {
223                    entries.insert(
224                        key.clone(),
225                        OverlayDirEntry {
226                            path: key.clone(),
227                            is_dir: false,
228                            is_file: true,
229                        },
230                    );
231                }
232                OverlayEntry::Directory => {
233                    entries.insert(
234                        key.clone(),
235                        OverlayDirEntry {
236                            path: key.clone(),
237                            is_dir: true,
238                            is_file: false,
239                        },
240                    );
241                }
242                OverlayEntry::Deleted => {
243                    entries.remove(key);
244                }
245            }
246        }
247        Ok(entries.into_values().collect())
248    }
249
250    /// Snapshot of every overlay change relative to the underlying tree.
251    pub fn diff(&self) -> Vec<DiffEntry> {
252        let layer = self.layer.lock().expect("overlay layer poisoned");
253        let mut diff = Vec::new();
254        for (path, entry) in layer.iter() {
255            match entry {
256                OverlayEntry::File(content) => {
257                    if path.exists() {
258                        let underlying = std::fs::read(path).unwrap_or_default();
259                        if &underlying != content {
260                            diff.push(DiffEntry {
261                                path: path.clone(),
262                                kind: DiffKind::Modified {
263                                    content: content.clone(),
264                                },
265                            });
266                        }
267                    } else {
268                        diff.push(DiffEntry {
269                            path: path.clone(),
270                            kind: DiffKind::Added {
271                                content: content.clone(),
272                            },
273                        });
274                    }
275                }
276                OverlayEntry::Deleted => {
277                    if path.exists() {
278                        diff.push(DiffEntry {
279                            path: path.clone(),
280                            kind: DiffKind::Deleted,
281                        });
282                    }
283                }
284                OverlayEntry::Directory => {}
285            }
286        }
287        diff
288    }
289
290    /// Render the overlay's diff in unified-style format. Convenience
291    /// wrapper around the standalone [`render_unified_diff`] that
292    /// snapshots the layer first.
293    pub fn render_unified_diff(&self) -> String {
294        render_unified_diff(&self.diff())
295    }
296}
297
298/// Render an overlay diff in unified-style format. Binary-safe but
299/// non-text bytes are escaped via `String::from_utf8_lossy`, so this
300/// is informational and not roundtrippable through `git apply` for
301/// non-utf8 files.
302pub fn render_unified_diff(diff: &[DiffEntry]) -> String {
303    let mut out = String::new();
304    for entry in diff {
305        match &entry.kind {
306            DiffKind::Added { content } => {
307                out.push_str(&format!("--- /dev/null\n+++ b/{}\n", entry.path.display()));
308                push_lines(&mut out, content, '+');
309            }
310            DiffKind::Modified { content } => {
311                let underlying = std::fs::read(&entry.path).unwrap_or_default();
312                out.push_str(&format!(
313                    "--- a/{}\n+++ b/{}\n",
314                    entry.path.display(),
315                    entry.path.display()
316                ));
317                push_lines(&mut out, &underlying, '-');
318                push_lines(&mut out, content, '+');
319            }
320            DiffKind::Deleted => {
321                let underlying = std::fs::read(&entry.path).unwrap_or_default();
322                out.push_str(&format!("--- a/{}\n+++ /dev/null\n", entry.path.display()));
323                push_lines(&mut out, &underlying, '-');
324            }
325        }
326    }
327    out
328}
329
330#[derive(Debug, Clone)]
331pub struct OverlayDirEntry {
332    pub path: PathBuf,
333    pub is_dir: bool,
334    pub is_file: bool,
335}
336
337fn push_lines(out: &mut String, bytes: &[u8], prefix: char) {
338    let text = String::from_utf8_lossy(bytes);
339    for line in text.split_inclusive('\n') {
340        out.push(prefix);
341        out.push_str(line);
342        if !line.ends_with('\n') {
343            out.push('\n');
344        }
345    }
346}
347
348/// Lexically normalize without resolving symlinks. Required because the
349/// overlay layer is a logical map keyed by absolute path, not a real
350/// filesystem; symlink chasing would be a security footgun.
351fn normalize_logical(path: &Path) -> PathBuf {
352    let absolute = if path.is_absolute() {
353        path.to_path_buf()
354    } else {
355        std::env::current_dir()
356            .map(|cwd| cwd.join(path))
357            .unwrap_or_else(|_| path.to_path_buf())
358    };
359    let mut out = PathBuf::new();
360    for component in absolute.components() {
361        match component {
362            Component::ParentDir => {
363                out.pop();
364            }
365            Component::CurDir => {}
366            other => out.push(other),
367        }
368    }
369    out
370}
371
372/// Make a path comparable to a canonicalized overlay root. If the file
373/// itself canonicalizes (it exists on disk), use that. Otherwise
374/// canonicalize the deepest existing ancestor and re-join the trailing
375/// non-existent components, so a not-yet-written file under a real
376/// directory still lands in the same key-space as the root.
377fn canonicalize_for_overlay(path: &Path) -> PathBuf {
378    let absolute = normalize_logical(path);
379    if let Ok(direct) = std::fs::canonicalize(&absolute) {
380        return direct;
381    }
382    let mut suffix = Vec::new();
383    let mut probe = absolute.clone();
384    loop {
385        if let Ok(canon) = std::fs::canonicalize(&probe) {
386            let mut joined = canon;
387            for component in suffix.iter().rev() {
388                joined.push(component);
389            }
390            return joined;
391        }
392        match probe.file_name().map(|n| n.to_owned()) {
393            Some(name) => {
394                suffix.push(name);
395                if !probe.pop() {
396                    break;
397                }
398            }
399            None => break,
400        }
401    }
402    absolute
403}
404
405thread_local! {
406    static ACTIVE_OVERLAY: RefCell<Option<Arc<OverlayFs>>> = const { RefCell::new(None) };
407}
408
409pub struct OverlayFsGuard {
410    previous: Option<Arc<OverlayFs>>,
411}
412
413impl Drop for OverlayFsGuard {
414    fn drop(&mut self) {
415        let prev = self.previous.take();
416        ACTIVE_OVERLAY.with(|slot| {
417            *slot.borrow_mut() = prev;
418        });
419    }
420}
421
422pub fn install_overlay(overlay: Arc<OverlayFs>) -> OverlayFsGuard {
423    let previous = ACTIVE_OVERLAY.with(|slot| slot.replace(Some(overlay)));
424    OverlayFsGuard { previous }
425}
426
427pub fn active_overlay() -> Option<Arc<OverlayFs>> {
428    ACTIVE_OVERLAY.with(|slot| slot.borrow().clone())
429}
430
431/// Helpers for fs builtins. Each helper falls through to `std::fs` when
432/// no overlay is active, keeping the testbench opt-in.
433///
434/// Every successful read/write/delete also pushes a [`TapeRecordKind`]
435/// into the active unified-tape recorder when one is installed, so the
436/// fidelity oracle can compare FS effects across runs even when the
437/// per-axis overlay diff is identical (the order in which writes land
438/// also matters for replay determinism).
439pub mod helpers {
440    use super::*;
441
442    fn record_file_read(path: &Path, bytes: &[u8]) {
443        // Skip the hash + path stringification when no recorder is
444        // installed — the fast path is the production path.
445        if tape::active_recorder().is_none() {
446            return;
447        }
448        let path_str = path.to_string_lossy().into_owned();
449        let len = bytes.len() as u64;
450        let hash = tape::content_hash(bytes);
451        tape::with_active_recorder(|_recorder| {
452            Some(TapeRecordKind::FileRead {
453                path: path_str,
454                content_hash: hash,
455                len_bytes: len,
456            })
457        });
458    }
459
460    fn record_file_write(path: &Path, bytes: &[u8]) {
461        if tape::active_recorder().is_none() {
462            return;
463        }
464        let path_str = path.to_string_lossy().into_owned();
465        let len = bytes.len() as u64;
466        let hash = tape::content_hash(bytes);
467        tape::with_active_recorder(|_recorder| {
468            Some(TapeRecordKind::FileWrite {
469                path: path_str,
470                content_hash: hash,
471                len_bytes: len,
472            })
473        });
474    }
475
476    fn record_file_delete(path: &Path) {
477        if tape::active_recorder().is_none() {
478            return;
479        }
480        let path_str = path.to_string_lossy().into_owned();
481        tape::with_active_recorder(|_recorder| Some(TapeRecordKind::FileDelete { path: path_str }));
482    }
483
484    pub fn read(path: &Path) -> std::io::Result<Vec<u8>> {
485        let result = match active_overlay() {
486            Some(overlay) => overlay.read(path),
487            None => std::fs::read(path),
488        };
489        if let Ok(bytes) = result.as_ref() {
490            record_file_read(path, bytes);
491        }
492        result
493    }
494
495    pub fn read_to_string(path: &Path) -> std::io::Result<String> {
496        let result = match active_overlay() {
497            Some(overlay) => overlay.read_to_string(path),
498            None => std::fs::read_to_string(path),
499        };
500        if let Ok(text) = result.as_ref() {
501            record_file_read(path, text.as_bytes());
502        }
503        result
504    }
505
506    pub fn write(path: &Path, contents: &[u8]) -> std::io::Result<()> {
507        let result = match active_overlay() {
508            Some(overlay) => overlay.write(path, contents),
509            None => std::fs::write(path, contents),
510        };
511        if result.is_ok() {
512            record_file_write(path, contents);
513        }
514        result
515    }
516
517    pub fn append(path: &Path, contents: &[u8]) -> std::io::Result<()> {
518        let result = match active_overlay() {
519            Some(overlay) => overlay.append(path, contents),
520            None => std::fs::OpenOptions::new()
521                .create(true)
522                .append(true)
523                .open(path)
524                .and_then(|mut file| std::io::Write::write_all(&mut file, contents)),
525        };
526        if result.is_ok() {
527            record_file_write(path, contents);
528        }
529        result
530    }
531
532    pub fn exists(path: &Path) -> bool {
533        match active_overlay() {
534            Some(overlay) => overlay.exists(path),
535            None => path.exists(),
536        }
537    }
538
539    pub fn remove_file(path: &Path) -> std::io::Result<()> {
540        let result = match active_overlay() {
541            Some(overlay) => overlay.remove_file(path),
542            None => std::fs::remove_file(path),
543        };
544        if result.is_ok() {
545            record_file_delete(path);
546        }
547        result
548    }
549
550    pub fn create_dir_all(path: &Path) -> std::io::Result<()> {
551        match active_overlay() {
552            Some(overlay) => overlay.create_dir_all(path),
553            None => std::fs::create_dir_all(path),
554        }
555    }
556}
557
558#[cfg(test)]
559mod tests {
560    use super::*;
561
562    #[test]
563    fn writes_land_in_overlay_only() {
564        let dir = tempfile::tempdir().unwrap();
565        let overlay = OverlayFs::rooted_at(dir.path());
566        overlay.write(&dir.path().join("hello.txt"), b"hi").unwrap();
567        // Real disk untouched.
568        assert!(!dir.path().join("hello.txt").exists());
569        // Overlay reports it back.
570        assert_eq!(
571            overlay
572                .read_to_string(&dir.path().join("hello.txt"))
573                .unwrap(),
574            "hi"
575        );
576    }
577
578    #[test]
579    fn reads_pass_through_to_underlying_tree() {
580        let dir = tempfile::tempdir().unwrap();
581        std::fs::write(dir.path().join("seed.txt"), "underlying").unwrap();
582        let overlay = OverlayFs::rooted_at(dir.path());
583        assert_eq!(
584            overlay
585                .read_to_string(&dir.path().join("seed.txt"))
586                .unwrap(),
587            "underlying"
588        );
589    }
590
591    #[test]
592    fn delete_masks_underlying_file() {
593        let dir = tempfile::tempdir().unwrap();
594        std::fs::write(dir.path().join("doomed.txt"), "x").unwrap();
595        let overlay = OverlayFs::rooted_at(dir.path());
596        overlay.remove_file(&dir.path().join("doomed.txt")).unwrap();
597        assert!(!overlay.exists(&dir.path().join("doomed.txt")));
598        // Real disk untouched.
599        assert!(dir.path().join("doomed.txt").exists());
600        let diff = overlay.diff();
601        assert_eq!(diff.len(), 1);
602        assert!(matches!(diff[0].kind, DiffKind::Deleted));
603    }
604
605    #[test]
606    fn diff_distinguishes_added_vs_modified() {
607        let dir = tempfile::tempdir().unwrap();
608        std::fs::write(dir.path().join("existing.txt"), "v1").unwrap();
609        let overlay = OverlayFs::rooted_at(dir.path());
610        overlay
611            .write(&dir.path().join("existing.txt"), b"v2")
612            .unwrap();
613        overlay
614            .write(&dir.path().join("brand-new.txt"), b"hi")
615            .unwrap();
616        let mut diff = overlay.diff();
617        diff.sort_by(|a, b| a.path.cmp(&b.path));
618        assert_eq!(diff.len(), 2);
619        assert!(matches!(diff[0].kind, DiffKind::Added { .. }));
620        assert!(matches!(diff[1].kind, DiffKind::Modified { .. }));
621    }
622}