Skip to main content

harn_hostlib/
fs_snapshot.rs

1//! Per-tool-call filesystem snapshots — Gemini-style `/restore` primitives.
2//!
3//! Captures the pre-image of paths touched by a mutating tool call so a
4//! client can roll the change back surgically without losing untracked
5//! work. Snapshot identity is the ACP `toolCallId`, so consumers index
6//! into the same id space the rest of the transcript already records.
7//!
8//! Two capture modes:
9//!
10//! 1. **Explicit** — the caller passes a `paths` list to
11//!    `hostlib_fs_snapshot`; bytes are copied immediately.
12//! 2. **Auto-on-write** — calling `hostlib_fs_snapshot` without `paths`
13//!    registers an open snapshot. The
14//!    [`auto_capture_for_write`] hook fires from inside
15//!    `tools/write_file` and `tools/delete_file` and lazy-copies each
16//!    pre-image into the active snapshot keyed by the current
17//!    [`harn_vm::agent_sessions::current_tool_call_id`].
18//!
19//! Storage layout (per session):
20//!
21//! ```text
22//! .harn/state/snapshots/<session_id>/
23//!   <snapshot_id>/
24//!     manifest.json    # path -> { kind, body_hash?, mode? }
25//!     bodies/<sha256>  # content-addressed; deduped across snapshots
26//! ```
27//!
28//! Snapshots are session-scoped and ephemeral. They are not persisted
29//! across machine reboots; consumers that need durable rollback bundle
30//! them into a session via `session/load`.
31
32use std::collections::{BTreeMap, BTreeSet};
33use std::fs as stdfs;
34use std::path::{Component, Path, PathBuf};
35use std::sync::Arc;
36use std::sync::{Mutex, OnceLock};
37
38use harn_vm::VmValue;
39use serde::{Deserialize, Serialize};
40use sha2::{Digest, Sha256};
41
42use crate::error::HostlibError;
43use crate::registry::{BuiltinRegistry, HostlibCapability};
44use crate::tools::args::{
45    build_dict, dict_arg, optional_string, optional_string_list, require_string, str_value,
46    to_agent_path,
47};
48
49const SNAPSHOT_BUILTIN: &str = "hostlib_fs_snapshot";
50const RESTORE_BUILTIN: &str = "hostlib_fs_restore";
51const LIST_BUILTIN: &str = "hostlib_fs_list_snapshots";
52const DROP_BUILTIN: &str = "hostlib_fs_drop_snapshot";
53
54const MANIFEST_VERSION: u32 = 1;
55const STATE_REL: &[&str] = &[".harn", "state", "snapshots"];
56
57/// Default cap on the on-disk footprint of one session's snapshot bundle
58/// before the oldest snapshots are evicted. Matches the proposal in
59/// [#1720](https://github.com/burin-labs/harn/issues/1720): 1 GiB.
60pub const DEFAULT_SESSION_BYTE_CAP: u64 = 1024 * 1024 * 1024;
61
62/// Hostlib filesystem snapshot capability handle.
63#[derive(Default)]
64pub struct FsSnapshotCapability;
65
66impl HostlibCapability for FsSnapshotCapability {
67    fn module_name(&self) -> &'static str {
68        // Snapshots live under the existing `fs/` schema directory so the
69        // contract surface stays consolidated alongside the staging
70        // primitives.
71        "fs"
72    }
73
74    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
75        registry.register_fn("fs", SNAPSHOT_BUILTIN, "snapshot", snapshot_builtin);
76        registry.register_fn("fs", RESTORE_BUILTIN, "restore", restore_builtin);
77        registry.register_fn("fs", LIST_BUILTIN, "list_snapshots", list_snapshots_builtin);
78        registry.register_fn("fs", DROP_BUILTIN, "drop_snapshot", drop_snapshot_builtin);
79    }
80}
81
82#[derive(Clone, Debug, Serialize, Deserialize)]
83#[serde(tag = "kind", rename_all = "snake_case")]
84enum SnapshotEntry {
85    File {
86        body_hash: String,
87        len: u64,
88        #[serde(default, skip_serializing_if = "Option::is_none")]
89        mode: Option<u32>,
90    },
91    Absent,
92}
93
94#[derive(Clone, Debug, Serialize, Deserialize)]
95struct Manifest {
96    version: u32,
97    snapshot_id: String,
98    scope_id: String,
99    session_id: String,
100    root: String,
101    taken_at_ms: i64,
102    entries: BTreeMap<String, SnapshotEntry>,
103}
104
105#[derive(Clone, Debug)]
106struct SnapshotState {
107    snapshot_id: String,
108    scope_id: String,
109    session_id: String,
110    root: PathBuf,
111    taken_at_ms: i64,
112    /// Logical absolute paths (workspace-relative when storage permits).
113    entries: BTreeMap<PathBuf, SnapshotEntry>,
114}
115
116/// Per-snapshot summary returned by `list_snapshots`.
117#[derive(Clone, Debug)]
118pub struct SnapshotSummary {
119    /// Stable identifier (canonically the ACP toolCallId).
120    pub snapshot_id: String,
121    /// Caller-chosen scope id passed when the snapshot was created.
122    pub scope_id: String,
123    /// Wall-clock capture time, milliseconds since the UNIX epoch.
124    pub taken_at_ms: i64,
125    /// Logical paths captured at snapshot time.
126    pub captured_paths: Vec<String>,
127    /// Total bytes captured for `captured_paths`.
128    pub byte_count: u64,
129}
130
131/// Result returned after capturing a new snapshot.
132#[derive(Clone, Debug)]
133pub struct SnapshotResult {
134    /// Stable identifier (equal to the requested `scope_id`).
135    pub snapshot_id: String,
136    /// Paths captured into this snapshot.
137    pub captured_paths: Vec<String>,
138    /// Total bytes captured for `captured_paths`.
139    pub byte_count: u64,
140}
141
142/// Result returned after restoring a snapshot.
143#[derive(Clone, Debug)]
144pub struct RestoreResult {
145    /// Echoed snapshot id.
146    pub snapshot_id: String,
147    /// Paths successfully restored.
148    pub restored_paths: Vec<String>,
149    /// Paths skipped, with human-readable reasons.
150    pub skipped_paths_with_reasons: Vec<(String, String)>,
151}
152
153/// Result returned after dropping a snapshot.
154#[derive(Clone, Debug)]
155pub struct DropResult {
156    /// Echoed snapshot id.
157    pub snapshot_id: String,
158    /// True when an existing snapshot was removed.
159    pub dropped: bool,
160}
161
162#[derive(Debug)]
163struct SessionSnapshots {
164    /// Snapshots, in insertion order.
165    snapshots: Vec<SnapshotState>,
166    /// Bytes currently held in this session's snapshot bundle. We track
167    /// this rather than recomputing from `bodies/` so eviction stays
168    /// O(snapshots) instead of walking the filesystem on every write.
169    byte_count: u64,
170    /// Per-session byte cap. Defaults to [`DEFAULT_SESSION_BYTE_CAP`] and
171    /// can be overridden with [`configure_session_byte_cap`].
172    byte_cap: u64,
173}
174
175impl Default for SessionSnapshots {
176    fn default() -> Self {
177        Self {
178            snapshots: Vec::new(),
179            byte_count: 0,
180            byte_cap: DEFAULT_SESSION_BYTE_CAP,
181        }
182    }
183}
184
185static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionSnapshots>>> = OnceLock::new();
186
187fn sessions() -> &'static Mutex<BTreeMap<String, SessionSnapshots>> {
188    SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
189}
190
191/// Override the byte cap for a specific session and immediately enforce
192/// it. Returns the previous cap.
193///
194/// Primarily intended for tests that want to force eviction without
195/// writing a gigabyte. Production embedders generally leave the default
196/// in place; touching one session never affects another.
197pub fn configure_session_byte_cap(session_id: &str, bytes: u64) -> u64 {
198    let mut guard = sessions()
199        .lock()
200        .expect("fs_snapshot session mutex poisoned");
201    let bundle = guard.entry(session_id.to_string()).or_default();
202    let previous = bundle.byte_cap;
203    bundle.byte_cap = bytes.max(1);
204    enforce_byte_cap(bundle, session_id, None);
205    previous
206}
207
208/// Drop every snapshot registered for `session_id`, both in memory and
209/// on disk. Returns the number of snapshots removed.
210///
211/// ACP hosts should call this on session close so the snapshot bundle
212/// doesn't outlive the conversation. Tests can also call it on
213/// teardown when reusing a session id across cases.
214pub fn drop_session_snapshots(session_id: &str) -> usize {
215    let mut guard = sessions()
216        .lock()
217        .expect("fs_snapshot session mutex poisoned");
218    let Some(bundle) = guard.remove(session_id) else {
219        return 0;
220    };
221    let count = bundle.snapshots.len();
222    for snapshot in &bundle.snapshots {
223        remove_snapshot_dir(snapshot);
224    }
225    count
226}
227
228/// Drop every registered session's snapshots, in memory and on disk.
229/// Returns the number of sessions removed.
230///
231/// [`drop_session_snapshots`] handles a single conversation on ACP
232/// session close. This drains the entire process-global map and is
233/// intended for host reset paths (e.g. the test runner between cases)
234/// where the worker is reused and snapshot bundles would otherwise
235/// accumulate one session at a time.
236pub fn reset_all_sessions() -> usize {
237    let mut guard = sessions()
238        .lock()
239        .expect("fs_snapshot session mutex poisoned");
240    let session_count = guard.len();
241    for bundle in guard.values() {
242        for snapshot in &bundle.snapshots {
243            remove_snapshot_dir(snapshot);
244        }
245    }
246    guard.clear();
247    session_count
248}
249
250/// Number of sessions with registered snapshots. Test-only.
251#[cfg(test)]
252pub fn session_count() -> usize {
253    sessions()
254        .lock()
255        .expect("fs_snapshot session mutex poisoned")
256        .len()
257}
258
259/// Take a snapshot. When `paths` is empty the snapshot is "open" — bytes
260/// are captured lazily as `auto_capture_for_write` fires from inside
261/// the mutating tool builtins.
262pub fn snapshot(
263    session_id: &str,
264    scope_id: &str,
265    paths: &[String],
266    root: Option<&Path>,
267) -> Result<SnapshotResult, HostlibError> {
268    validate_session_id(SNAPSHOT_BUILTIN, session_id)?;
269    validate_scope_id(SNAPSHOT_BUILTIN, scope_id)?;
270    let root = resolve_root(root);
271    let mut guard = sessions()
272        .lock()
273        .expect("fs_snapshot session mutex poisoned");
274    let bundle = guard.entry(session_id.to_string()).or_default();
275    upsert_snapshot(bundle, session_id, scope_id, &root)?;
276    let mut captured_paths = Vec::new();
277    let mut byte_count = 0u64;
278    for raw in paths {
279        let path = normalize_logical(Path::new(raw));
280        let added =
281            capture_path(bundle, session_id, scope_id, &path, &root).map_err(|message| {
282                HostlibError::Backend {
283                    builtin: SNAPSHOT_BUILTIN,
284                    message,
285                }
286            })?;
287        if let Some(bytes) = added {
288            byte_count = byte_count.saturating_add(bytes);
289            captured_paths.push(to_agent_path(&path));
290        }
291    }
292    enforce_byte_cap(bundle, session_id, Some(scope_id));
293    let state = bundle
294        .snapshots
295        .iter()
296        .find(|snap| snap.snapshot_id == scope_id)
297        .expect("snapshot just upserted is protected from byte-cap eviction");
298    persist_manifest(state).map_err(|err| HostlibError::Backend {
299        builtin: SNAPSHOT_BUILTIN,
300        message: err,
301    })?;
302    Ok(SnapshotResult {
303        snapshot_id: state.snapshot_id.clone(),
304        captured_paths,
305        byte_count,
306    })
307}
308
309/// Restore a previously-captured snapshot.
310pub fn restore(
311    session_id: &str,
312    snapshot_id: &str,
313    paths: &[String],
314) -> Result<RestoreResult, HostlibError> {
315    validate_session_id(RESTORE_BUILTIN, session_id)?;
316    validate_scope_id(RESTORE_BUILTIN, snapshot_id)?;
317    let mut guard = sessions()
318        .lock()
319        .expect("fs_snapshot session mutex poisoned");
320    let bundle = guard
321        .get_mut(session_id)
322        .ok_or_else(|| HostlibError::Backend {
323            builtin: RESTORE_BUILTIN,
324            message: format!("no snapshots registered for session `{session_id}`"),
325        })?;
326    let state = bundle
327        .snapshots
328        .iter()
329        .find(|snap| snap.snapshot_id == snapshot_id)
330        .cloned()
331        .ok_or_else(|| HostlibError::Backend {
332            builtin: RESTORE_BUILTIN,
333            message: format!("unknown snapshot `{snapshot_id}` for session `{session_id}`"),
334        })?;
335    let selected = select_paths(&state, paths);
336    let mut restored_paths = Vec::new();
337    let mut skipped_paths_with_reasons = Vec::new();
338    for path in selected {
339        let Some(entry) = state.entries.get(&path) else {
340            continue;
341        };
342        let label = to_agent_path(&path);
343        match restore_entry(&state, &path, entry) {
344            Ok(()) => restored_paths.push(label),
345            Err(reason) => skipped_paths_with_reasons.push((label, reason)),
346        }
347    }
348    Ok(RestoreResult {
349        snapshot_id: snapshot_id.to_string(),
350        restored_paths,
351        skipped_paths_with_reasons,
352    })
353}
354
355/// List snapshots registered for a session, sorted by capture time.
356pub fn list_snapshots(session_id: &str) -> Result<Vec<SnapshotSummary>, HostlibError> {
357    validate_session_id(LIST_BUILTIN, session_id)?;
358    let guard = sessions()
359        .lock()
360        .expect("fs_snapshot session mutex poisoned");
361    let Some(bundle) = guard.get(session_id) else {
362        return Ok(Vec::new());
363    };
364    let mut summaries: Vec<SnapshotSummary> = bundle
365        .snapshots
366        .iter()
367        .map(|state| SnapshotSummary {
368            snapshot_id: state.snapshot_id.clone(),
369            scope_id: state.scope_id.clone(),
370            taken_at_ms: state.taken_at_ms,
371            captured_paths: state.entries.keys().map(to_agent_path).collect(),
372            byte_count: entry_byte_count(state),
373        })
374        .collect();
375    summaries.sort_by_key(|summary| summary.taken_at_ms);
376    Ok(summaries)
377}
378
379/// Drop a snapshot's in-memory and on-disk state.
380pub fn drop_snapshot(session_id: &str, snapshot_id: &str) -> Result<DropResult, HostlibError> {
381    validate_session_id(DROP_BUILTIN, session_id)?;
382    validate_scope_id(DROP_BUILTIN, snapshot_id)?;
383    let mut guard = sessions()
384        .lock()
385        .expect("fs_snapshot session mutex poisoned");
386    let Some(bundle) = guard.get_mut(session_id) else {
387        return Ok(DropResult {
388            snapshot_id: snapshot_id.to_string(),
389            dropped: false,
390        });
391    };
392    let position = bundle
393        .snapshots
394        .iter()
395        .position(|snap| snap.snapshot_id == snapshot_id);
396    let dropped = match position {
397        Some(idx) => {
398            let removed = bundle.snapshots.remove(idx);
399            bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&removed));
400            remove_snapshot_dir(&removed);
401            true
402        }
403        None => false,
404    };
405    Ok(DropResult {
406        snapshot_id: snapshot_id.to_string(),
407        dropped,
408    })
409}
410
411/// Auto-on-write hook called from the mutating tool builtins.
412///
413/// Captures `path`'s pre-image into the snapshot whose id matches the
414/// current [`harn_vm::agent_sessions::current_tool_call_id`]. The first
415/// write in a tool call auto-opens that snapshot. The hook silently no-ops
416/// when no session is active or no tool-call id is set, which keeps read-only
417/// tools and writes outside active tool scopes cheap.
418pub(crate) fn auto_capture_for_write(builtin: &'static str, path: &Path) {
419    let Some(session_id) = active_session_id() else {
420        return;
421    };
422    // Record the mutated path against the session BEFORE the snapshot/tool-call
423    // gate below: this is the single chokepoint every hostlib write reaches, so
424    // it is the authoritative source for a session's `files_written` (consumed by
425    // the sub-agent receipt). Recorded unconditionally — even when no restore
426    // snapshot is open (no active tool call) — because the write still happened.
427    harn_vm::agent_sessions::record_session_changed_path(
428        &session_id,
429        normalize_logical(path).to_string_lossy().as_ref(),
430    );
431    let Some(snapshot_id) = harn_vm::agent_sessions::current_tool_call_id() else {
432        return;
433    };
434    let mut guard = sessions()
435        .lock()
436        .expect("fs_snapshot session mutex poisoned");
437    let bundle = guard.entry(session_id.clone()).or_default();
438    if !bundle
439        .snapshots
440        .iter()
441        .any(|snap| snap.snapshot_id == snapshot_id)
442    {
443        let root =
444            crate::fs::configured_session_root(&session_id).unwrap_or_else(|| resolve_root(None));
445        if let Err(error) = upsert_snapshot(bundle, &session_id, &snapshot_id, &root) {
446            tracing::warn!(
447                "fs_snapshot: failed to auto-open snapshot {snapshot_id} in session {session_id} (builtin={builtin}): {error}"
448            );
449            return;
450        }
451    }
452    let Some(snapshot) = bundle
453        .snapshots
454        .iter()
455        .find(|snap| snap.snapshot_id == snapshot_id)
456    else {
457        return;
458    };
459    let scope_id = snapshot.scope_id.clone();
460    let root = snapshot.root.clone();
461    let key = normalize_logical(path);
462    match capture_path(bundle, &session_id, &snapshot_id, &key, &root) {
463        Ok(_added) => {
464            if let Some(state) = bundle
465                .snapshots
466                .iter()
467                .find(|snap| snap.snapshot_id == snapshot_id)
468            {
469                if let Err(err) = persist_manifest(state) {
470                    tracing::warn!(
471                        "fs_snapshot: failed to persist manifest for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}"
472                    );
473                }
474            }
475        }
476        Err(err) => {
477            tracing::warn!(
478                "fs_snapshot: failed to auto-capture `{}` for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}",
479                key.display()
480            );
481        }
482    }
483    enforce_byte_cap(bundle, &session_id, Some(&snapshot_id));
484}
485
486fn snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
487    let raw = dict_arg(SNAPSHOT_BUILTIN, args)?;
488    let dict = raw.as_ref();
489    let session_id = require_string(SNAPSHOT_BUILTIN, dict, "session_id")?;
490    let scope_id = require_string(SNAPSHOT_BUILTIN, dict, "scope_id")?;
491    let paths = optional_string_list(SNAPSHOT_BUILTIN, dict, "paths")?;
492    let root = optional_string(SNAPSHOT_BUILTIN, dict, "root")?.map(PathBuf::from);
493    let result = snapshot(&session_id, &scope_id, &paths, root.as_deref())?;
494    Ok(build_dict([
495        ("snapshot_id", str_value(&result.snapshot_id)),
496        (
497            "captured_paths",
498            VmValue::List(Arc::new(
499                result
500                    .captured_paths
501                    .into_iter()
502                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
503                    .collect(),
504            )),
505        ),
506        ("byte_count", VmValue::Int(result.byte_count as i64)),
507    ]))
508}
509
510fn restore_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
511    let raw = dict_arg(RESTORE_BUILTIN, args)?;
512    let dict = raw.as_ref();
513    let session_id = require_string(RESTORE_BUILTIN, dict, "session_id")?;
514    let snapshot_id = require_string(RESTORE_BUILTIN, dict, "snapshot_id")?;
515    let paths = optional_string_list(RESTORE_BUILTIN, dict, "paths")?;
516    let result = restore(&session_id, &snapshot_id, &paths)?;
517    Ok(build_dict([
518        ("snapshot_id", str_value(&result.snapshot_id)),
519        (
520            "restored_paths",
521            VmValue::List(Arc::new(
522                result
523                    .restored_paths
524                    .into_iter()
525                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
526                    .collect(),
527            )),
528        ),
529        (
530            "skipped_paths_with_reasons",
531            VmValue::List(Arc::new(
532                result
533                    .skipped_paths_with_reasons
534                    .into_iter()
535                    .map(|(path, reason)| {
536                        build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
537                    })
538                    .collect(),
539            )),
540        ),
541    ]))
542}
543
544fn list_snapshots_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
545    let raw = dict_arg(LIST_BUILTIN, args)?;
546    let dict = raw.as_ref();
547    let session_id = require_string(LIST_BUILTIN, dict, "session_id")?;
548    let summaries = list_snapshots(&session_id)?;
549    Ok(build_dict([(
550        "snapshots",
551        VmValue::List(Arc::new(
552            summaries.into_iter().map(snapshot_summary_value).collect(),
553        )),
554    )]))
555}
556
557fn drop_snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
558    let raw = dict_arg(DROP_BUILTIN, args)?;
559    let dict = raw.as_ref();
560    let session_id = require_string(DROP_BUILTIN, dict, "session_id")?;
561    let snapshot_id = require_string(DROP_BUILTIN, dict, "snapshot_id")?;
562    let result = drop_snapshot(&session_id, &snapshot_id)?;
563    Ok(build_dict([
564        ("snapshot_id", str_value(&result.snapshot_id)),
565        ("dropped", VmValue::Bool(result.dropped)),
566    ]))
567}
568
569fn snapshot_summary_value(summary: SnapshotSummary) -> VmValue {
570    build_dict([
571        ("snapshot_id", str_value(&summary.snapshot_id)),
572        ("scope_id", str_value(&summary.scope_id)),
573        ("taken_at_ms", VmValue::Int(summary.taken_at_ms)),
574        (
575            "captured_paths",
576            VmValue::List(Arc::new(
577                summary
578                    .captured_paths
579                    .into_iter()
580                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
581                    .collect(),
582            )),
583        ),
584        ("byte_count", VmValue::Int(summary.byte_count as i64)),
585    ])
586}
587
588fn upsert_snapshot(
589    bundle: &mut SessionSnapshots,
590    session_id: &str,
591    scope_id: &str,
592    root: &Path,
593) -> Result<(), HostlibError> {
594    if bundle
595        .snapshots
596        .iter()
597        .any(|snap| snap.snapshot_id == scope_id)
598    {
599        return Ok(());
600    }
601    let state = SnapshotState {
602        snapshot_id: scope_id.to_string(),
603        scope_id: scope_id.to_string(),
604        session_id: session_id.to_string(),
605        root: root.to_path_buf(),
606        taken_at_ms: now_ms(),
607        entries: BTreeMap::new(),
608    };
609    let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
610    stdfs::create_dir_all(dir.join("bodies")).map_err(|err| HostlibError::Backend {
611        builtin: SNAPSHOT_BUILTIN,
612        message: format!("mkdir {}: {err}", dir.display()),
613    })?;
614    bundle.snapshots.push(state);
615    Ok(())
616}
617
618fn capture_path(
619    bundle: &mut SessionSnapshots,
620    session_id: &str,
621    snapshot_id: &str,
622    path: &Path,
623    root: &Path,
624) -> Result<Option<u64>, String> {
625    let snap_index = bundle
626        .snapshots
627        .iter()
628        .position(|snap| snap.snapshot_id == snapshot_id)
629        .ok_or_else(|| format!("snapshot `{snapshot_id}` is not registered"))?;
630    if bundle.snapshots[snap_index].entries.contains_key(path) {
631        return Ok(None);
632    }
633    let metadata = stdfs::symlink_metadata(path);
634    let (entry, byte_count) = match metadata {
635        Err(err) if err.kind() == std::io::ErrorKind::NotFound => (SnapshotEntry::Absent, 0u64),
636        Err(err) => {
637            return Err(format!("stat `{}`: {err}", path.display()));
638        }
639        Ok(metadata) if metadata.is_dir() => {
640            return Err(format!(
641                "snapshot of directory `{}` is not supported yet",
642                path.display()
643            ));
644        }
645        Ok(metadata) if metadata.file_type().is_symlink() => {
646            return Err(format!(
647                "snapshot of symlink `{}` is not supported yet",
648                path.display()
649            ));
650        }
651        Ok(metadata) => {
652            let bytes = stdfs::read(path)
653                .map_err(|err| format!("read `{}` for snapshot: {err}", path.display()))?;
654            let body_hash = hex::encode(Sha256::digest(&bytes));
655            let len = bytes.len() as u64;
656            store_body(root, session_id, snapshot_id, &body_hash, &bytes)?;
657            #[cfg(unix)]
658            let mode = {
659                use std::os::unix::fs::MetadataExt;
660                Some(metadata.mode())
661            };
662            #[cfg(not(unix))]
663            let mode = {
664                let _ = &metadata;
665                None
666            };
667            (
668                SnapshotEntry::File {
669                    body_hash,
670                    len,
671                    mode,
672                },
673                len,
674            )
675        }
676    };
677    let snap = &mut bundle.snapshots[snap_index];
678    snap.entries.insert(path.to_path_buf(), entry);
679    bundle.byte_count = bundle.byte_count.saturating_add(byte_count);
680    Ok(Some(byte_count))
681}
682
683fn store_body(
684    root: &Path,
685    session_id: &str,
686    snapshot_id: &str,
687    body_hash: &str,
688    bytes: &[u8],
689) -> Result<(), String> {
690    let bodies = snapshot_dir(root, session_id, snapshot_id).join("bodies");
691    stdfs::create_dir_all(&bodies).map_err(|err| format!("mkdir {}: {err}", bodies.display()))?;
692    let body_path = bodies.join(body_hash);
693    if !body_path.exists() {
694        atomic_write(&body_path, bytes)?;
695    }
696    Ok(())
697}
698
699fn restore_entry(state: &SnapshotState, path: &Path, entry: &SnapshotEntry) -> Result<(), String> {
700    match entry {
701        SnapshotEntry::Absent => match stdfs::symlink_metadata(path) {
702            Ok(metadata) if metadata.is_dir() => stdfs::remove_dir_all(path)
703                .map_err(|err| format!("remove_dir_all {}: {err}", path.display())),
704            Ok(_) => stdfs::remove_file(path)
705                .map_err(|err| format!("remove_file {}: {err}", path.display())),
706            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
707            Err(err) => Err(format!("stat {}: {err}", path.display())),
708        },
709        SnapshotEntry::File {
710            body_hash, mode, ..
711        } => {
712            let body_path = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id)
713                .join("bodies")
714                .join(body_hash);
715            let bytes = stdfs::read(&body_path)
716                .map_err(|err| format!("read snapshot body `{}`: {err}", body_path.display()))?;
717            atomic_write(path, &bytes)?;
718            #[cfg(unix)]
719            if let Some(bits) = mode {
720                use std::os::unix::fs::PermissionsExt;
721                let permissions = stdfs::Permissions::from_mode(*bits);
722                stdfs::set_permissions(path, permissions)
723                    .map_err(|err| format!("set_permissions `{}`: {err}", path.display()))?;
724            }
725            #[cfg(not(unix))]
726            let _ = mode;
727            Ok(())
728        }
729    }
730}
731
732fn persist_manifest(state: &SnapshotState) -> Result<(), String> {
733    let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
734    stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
735    let manifest = Manifest {
736        version: MANIFEST_VERSION,
737        snapshot_id: state.snapshot_id.clone(),
738        scope_id: state.scope_id.clone(),
739        session_id: state.session_id.clone(),
740        root: state.root.to_string_lossy().into_owned(),
741        taken_at_ms: state.taken_at_ms,
742        entries: state
743            .entries
744            .iter()
745            .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
746            .collect(),
747    };
748    let bytes = serde_json::to_vec_pretty(&manifest)
749        .map_err(|err| format!("serialize snapshot manifest: {err}"))?;
750    atomic_write(&dir.join("manifest.json"), &bytes)
751}
752
753fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
754    if let Some(parent) = path.parent() {
755        stdfs::create_dir_all(parent)
756            .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
757    }
758    let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
759    stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
760    match stdfs::rename(&tmp, path) {
761        Ok(()) => Ok(()),
762        Err(rename_err) => {
763            let _ = stdfs::remove_file(path);
764            stdfs::rename(&tmp, path).map_err(|retry| {
765                // Both renames failed; the temp file would otherwise linger
766                // and accumulate in the snapshot directory.
767                let _ = stdfs::remove_file(&tmp);
768                format!(
769                    "rename {} to {}: {rename_err}; retry: {retry}",
770                    tmp.display(),
771                    path.display()
772                )
773            })
774        }
775    }
776}
777
778/// Evict snapshots oldest-first until the session is back under its byte
779/// cap. `protected` names the snapshot currently being written (if any);
780/// it is never evicted, even when it alone exceeds the cap — otherwise the
781/// caller would lose the very snapshot it just captured (and `snapshot`
782/// would panic re-fetching it). A snapshot larger than the whole cap is
783/// therefore retained: rollback for an in-flight write takes precedence
784/// over the soft budget.
785fn enforce_byte_cap(bundle: &mut SessionSnapshots, session_id: &str, protected: Option<&str>) {
786    while bundle.byte_count > bundle.byte_cap {
787        let Some(idx) = bundle
788            .snapshots
789            .iter()
790            .position(|snap| Some(snap.snapshot_id.as_str()) != protected)
791        else {
792            break;
793        };
794        let evicted = bundle.snapshots.remove(idx);
795        bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&evicted));
796        tracing::info!(
797            "fs_snapshot: evicting snapshot `{}` from session `{session_id}` (over byte cap {})",
798            evicted.snapshot_id,
799            bundle.byte_cap,
800        );
801        remove_snapshot_dir(&evicted);
802    }
803}
804
805fn remove_snapshot_dir(state: &SnapshotState) {
806    let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
807    let _ = stdfs::remove_dir_all(&dir);
808}
809
810fn entry_byte_count(state: &SnapshotState) -> u64 {
811    state
812        .entries
813        .values()
814        .map(|entry| match entry {
815            SnapshotEntry::File { len, .. } => *len,
816            SnapshotEntry::Absent => 0,
817        })
818        .sum()
819}
820
821fn select_paths(state: &SnapshotState, paths: &[String]) -> Vec<PathBuf> {
822    if paths.is_empty() {
823        return state.entries.keys().cloned().collect();
824    }
825    let requested: BTreeSet<PathBuf> = paths
826        .iter()
827        .map(|path| normalize_logical(Path::new(path)))
828        .collect();
829    state
830        .entries
831        .keys()
832        .filter(|path| requested.contains(*path))
833        .cloned()
834        .collect()
835}
836
837fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
838    if session_id.trim().is_empty() {
839        return Err(HostlibError::InvalidParameter {
840            builtin,
841            param: "session_id",
842            message: "must not be empty".to_string(),
843        });
844    }
845    Ok(())
846}
847
848fn validate_scope_id(builtin: &'static str, scope_id: &str) -> Result<(), HostlibError> {
849    if scope_id.trim().is_empty() {
850        let param = match builtin {
851            SNAPSHOT_BUILTIN => "scope_id",
852            _ => "snapshot_id",
853        };
854        return Err(HostlibError::InvalidParameter {
855            builtin,
856            param,
857            message: "must not be empty".to_string(),
858        });
859    }
860    Ok(())
861}
862
863fn active_session_id() -> Option<String> {
864    harn_vm::agent_sessions::current_session_id().filter(|id| !id.trim().is_empty())
865}
866
867fn resolve_root(root: Option<&Path>) -> PathBuf {
868    match root {
869        Some(path) => normalize_logical(path),
870        None => normalize_logical(&std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))),
871    }
872}
873
874fn snapshot_dir(root: &Path, session_id: &str, snapshot_id: &str) -> PathBuf {
875    let mut dir = root.to_path_buf();
876    for component in STATE_REL {
877        dir.push(component);
878    }
879    dir.push(sanitize_component(session_id));
880    dir.push(sanitize_component(snapshot_id));
881    dir
882}
883
884fn sanitize_component(input: &str) -> String {
885    let sanitized: String = input
886        .chars()
887        .map(|ch| match ch {
888            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
889            _ => '_',
890        })
891        .collect();
892    if sanitized == input {
893        sanitized
894    } else {
895        let hash = hex::encode(Sha256::digest(input.as_bytes()));
896        format!("{sanitized}-{}", &hash[..12])
897    }
898}
899
900fn normalize_logical(path: &Path) -> PathBuf {
901    let absolute = if path.is_absolute() {
902        path.to_path_buf()
903    } else {
904        std::env::current_dir()
905            .unwrap_or_else(|_| PathBuf::from("."))
906            .join(path)
907    };
908    let mut out = PathBuf::new();
909    for component in absolute.components() {
910        match component {
911            Component::ParentDir => {
912                out.pop();
913            }
914            Component::CurDir => {}
915            other => out.push(other),
916        }
917    }
918    out
919}
920
921fn now_ms() -> i64 {
922    std::time::SystemTime::now()
923        .duration_since(std::time::UNIX_EPOCH)
924        .map(|duration| duration.as_millis() as i64)
925        .unwrap_or(0)
926}
927
928#[cfg(test)]
929mod tests {
930    use super::*;
931    use std::sync::atomic::{AtomicU64, Ordering};
932    use tempfile::TempDir;
933
934    /// Hand each test its own session id so the process-wide `SESSIONS`
935    /// map isolates them by key — no serialization or process-wide
936    /// reset required.
937    fn unique_session(prefix: &str) -> String {
938        static COUNTER: AtomicU64 = AtomicU64::new(0);
939        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
940        format!("{prefix}-{n}-{}", std::process::id())
941    }
942
943    fn unique_scope() -> String {
944        static COUNTER: AtomicU64 = AtomicU64::new(0);
945        format!("tc-{}", COUNTER.fetch_add(1, Ordering::Relaxed))
946    }
947
948    fn enter_session(id: &str) -> harn_vm::agent_sessions::CurrentSessionGuard {
949        harn_vm::agent_sessions::open_or_create(Some(id.to_string()));
950        harn_vm::agent_sessions::enter_current_session(id.to_string())
951    }
952
953    #[test]
954    fn explicit_snapshot_then_restore_round_trips_file_bytes() {
955        let dir = TempDir::new().unwrap();
956        let file = dir.path().join("note.txt");
957        stdfs::write(&file, b"v1").unwrap();
958        let session = unique_session("snap-roundtrip");
959        let scope = unique_scope();
960        let _session_guard = enter_session(&session);
961
962        let result = snapshot(
963            &session,
964            &scope,
965            &[file.to_string_lossy().into_owned()],
966            Some(dir.path()),
967        )
968        .unwrap();
969        assert_eq!(result.snapshot_id, scope);
970        assert_eq!(result.captured_paths.len(), 1);
971        assert_eq!(result.byte_count, 2);
972
973        stdfs::write(&file, b"clobbered").unwrap();
974        let restored = restore(&session, &scope, &[]).unwrap();
975        assert_eq!(restored.restored_paths.len(), 1);
976        assert!(restored.skipped_paths_with_reasons.is_empty());
977        assert_eq!(stdfs::read(&file).unwrap(), b"v1");
978    }
979
980    #[test]
981    fn restore_reinstates_deleted_file() {
982        let dir = TempDir::new().unwrap();
983        let file = dir.path().join("doomed.txt");
984        stdfs::write(&file, b"alive").unwrap();
985        let session = unique_session("snap-reinstate");
986        let scope = unique_scope();
987        let _session_guard = enter_session(&session);
988
989        snapshot(
990            &session,
991            &scope,
992            &[file.to_string_lossy().into_owned()],
993            Some(dir.path()),
994        )
995        .unwrap();
996        stdfs::remove_file(&file).unwrap();
997        assert!(!file.exists());
998        let restored = restore(&session, &scope, &[]).unwrap();
999        assert_eq!(restored.restored_paths.len(), 1);
1000        assert_eq!(stdfs::read(&file).unwrap(), b"alive");
1001    }
1002
1003    #[test]
1004    fn absent_snapshot_means_restore_deletes_paths_created_during_the_call() {
1005        let dir = TempDir::new().unwrap();
1006        let file = dir.path().join("new.txt");
1007        assert!(!file.exists());
1008        let session = unique_session("snap-absent");
1009        let scope = unique_scope();
1010        let _session_guard = enter_session(&session);
1011
1012        snapshot(
1013            &session,
1014            &scope,
1015            &[file.to_string_lossy().into_owned()],
1016            Some(dir.path()),
1017        )
1018        .unwrap();
1019        stdfs::write(&file, b"created during call").unwrap();
1020        let restored = restore(&session, &scope, &[]).unwrap();
1021        assert_eq!(restored.restored_paths.len(), 1);
1022        assert!(
1023            !file.exists(),
1024            "restore must delete files that the snapshot saw as absent"
1025        );
1026    }
1027
1028    #[test]
1029    fn list_and_drop_round_trip_through_metadata() {
1030        let dir = TempDir::new().unwrap();
1031        let file = dir.path().join("listed.txt");
1032        stdfs::write(&file, b"abc").unwrap();
1033        let session = unique_session("snap-list");
1034        let scope = unique_scope();
1035        let _session_guard = enter_session(&session);
1036
1037        snapshot(
1038            &session,
1039            &scope,
1040            &[file.to_string_lossy().into_owned()],
1041            Some(dir.path()),
1042        )
1043        .unwrap();
1044        let summaries = list_snapshots(&session).unwrap();
1045        assert_eq!(summaries.len(), 1);
1046        assert_eq!(summaries[0].snapshot_id, scope);
1047        assert_eq!(summaries[0].byte_count, 3);
1048
1049        let dropped = drop_snapshot(&session, &scope).unwrap();
1050        assert!(dropped.dropped);
1051        assert!(list_snapshots(&session).unwrap().is_empty());
1052
1053        let again = drop_snapshot(&session, &scope).unwrap();
1054        assert!(!again.dropped, "second drop must be idempotent");
1055    }
1056
1057    #[test]
1058    fn auto_capture_records_pre_image_keyed_by_current_tool_call_id() {
1059        let dir = TempDir::new().unwrap();
1060        let file = dir.path().join("auto.txt");
1061        stdfs::write(&file, b"pre").unwrap();
1062        let session = unique_session("snap-auto");
1063        let scope = unique_scope();
1064        let _session_guard = enter_session(&session);
1065        let _tool_guard = harn_vm::agent_sessions::enter_current_tool_call(scope.clone());
1066
1067        snapshot(&session, &scope, &[], Some(dir.path())).unwrap();
1068        auto_capture_for_write("hostlib_tools_write_file", &file);
1069        stdfs::write(&file, b"post").unwrap();
1070
1071        let restored = restore(&session, &scope, &[]).unwrap();
1072        assert_eq!(restored.restored_paths.len(), 1);
1073        assert_eq!(stdfs::read(&file).unwrap(), b"pre");
1074    }
1075
1076    #[test]
1077    fn auto_capture_records_session_changed_path_for_files_written_receipt() {
1078        let dir = TempDir::new().unwrap();
1079        let one = dir.path().join("a.txt");
1080        let two = dir.path().join("b.txt");
1081        let session = unique_session("snap-changed");
1082        harn_vm::agent_sessions::clear_session_changed_paths(&session);
1083        let _session_guard = enter_session(&session);
1084
1085        // No active tool call / open snapshot: the write still happened, so the
1086        // path must be recorded for the receipt regardless.
1087        auto_capture_for_write("hostlib_tools_write_file", &one);
1088        auto_capture_for_write("hostlib_tools_write_file", &two);
1089        // A duplicate write of the same path must dedupe.
1090        auto_capture_for_write("hostlib_tools_write_file", &one);
1091
1092        let changed = harn_vm::agent_sessions::session_changed_paths(&session);
1093        assert_eq!(changed.len(), 2, "two distinct paths recorded (deduped)");
1094        let expect_one = normalize_logical(&one).to_string_lossy().into_owned();
1095        let expect_two = normalize_logical(&two).to_string_lossy().into_owned();
1096        assert!(
1097            changed.contains(&expect_one),
1098            "path a recorded: {changed:?}"
1099        );
1100        assert!(
1101            changed.contains(&expect_two),
1102            "path b recorded: {changed:?}"
1103        );
1104
1105        // `take` drains so the receipt captures the set exactly once.
1106        let drained = harn_vm::agent_sessions::take_session_changed_paths(&session);
1107        assert_eq!(drained.len(), 2);
1108        assert!(
1109            harn_vm::agent_sessions::session_changed_paths(&session).is_empty(),
1110            "take drains the session's recorded paths"
1111        );
1112    }
1113
1114    #[test]
1115    fn byte_cap_evicts_oldest_snapshot_when_exceeded() {
1116        let dir = TempDir::new().unwrap();
1117        let session = unique_session("snap-evict");
1118        let _session_guard = enter_session(&session);
1119
1120        // Per-session cap: only affects this test's session, so other
1121        // tests can run in parallel without seeing the squeeze.
1122        configure_session_byte_cap(&session, 8);
1123
1124        let mk = |name: &str| {
1125            let path = dir.path().join(name);
1126            stdfs::write(&path, b"12345").unwrap();
1127            path
1128        };
1129
1130        let scope_a = unique_scope();
1131        let scope_b = unique_scope();
1132        let a = mk("a.txt");
1133        snapshot(
1134            &session,
1135            &scope_a,
1136            &[a.to_string_lossy().into_owned()],
1137            Some(dir.path()),
1138        )
1139        .unwrap();
1140        let b = mk("b.txt");
1141        snapshot(
1142            &session,
1143            &scope_b,
1144            &[b.to_string_lossy().into_owned()],
1145            Some(dir.path()),
1146        )
1147        .unwrap();
1148
1149        let ids: Vec<String> = list_snapshots(&session)
1150            .unwrap()
1151            .into_iter()
1152            .map(|summary| summary.snapshot_id)
1153            .collect();
1154        assert_eq!(
1155            ids,
1156            vec![scope_b],
1157            "older snapshot must be evicted when the per-session byte cap is exceeded"
1158        );
1159    }
1160
1161    #[test]
1162    fn snapshot_larger_than_cap_is_retained_not_evicted() {
1163        // A single snapshot whose captured bytes exceed the whole cap must
1164        // survive — evicting the snapshot we just took would lose rollback
1165        // for the in-flight write (and previously panicked re-fetching it).
1166        let dir = TempDir::new().unwrap();
1167        let session = unique_session("snap-oversized");
1168        let _session_guard = enter_session(&session);
1169        configure_session_byte_cap(&session, 4);
1170
1171        let scope = unique_scope();
1172        let file = dir.path().join("big.txt");
1173        stdfs::write(&file, b"0123456789").unwrap();
1174        let result = snapshot(
1175            &session,
1176            &scope,
1177            &[file.to_string_lossy().into_owned()],
1178            Some(dir.path()),
1179        )
1180        .unwrap();
1181        assert_eq!(result.byte_count, 10);
1182
1183        let ids: Vec<String> = list_snapshots(&session)
1184            .unwrap()
1185            .into_iter()
1186            .map(|summary| summary.snapshot_id)
1187            .collect();
1188        assert_eq!(
1189            ids,
1190            vec![scope],
1191            "an oversized snapshot must be retained rather than evicting itself"
1192        );
1193    }
1194
1195    #[test]
1196    fn drop_session_snapshots_removes_every_snapshot_for_a_session() {
1197        let dir = TempDir::new().unwrap();
1198        let file = dir.path().join("retained.txt");
1199        stdfs::write(&file, b"x").unwrap();
1200        let session = unique_session("snap-drop-session");
1201        let scope_a = unique_scope();
1202        let scope_b = unique_scope();
1203        let _session_guard = enter_session(&session);
1204
1205        snapshot(
1206            &session,
1207            &scope_a,
1208            &[file.to_string_lossy().into_owned()],
1209            Some(dir.path()),
1210        )
1211        .unwrap();
1212        snapshot(
1213            &session,
1214            &scope_b,
1215            &[file.to_string_lossy().into_owned()],
1216            Some(dir.path()),
1217        )
1218        .unwrap();
1219        assert_eq!(list_snapshots(&session).unwrap().len(), 2);
1220
1221        assert_eq!(drop_session_snapshots(&session), 2);
1222        assert!(list_snapshots(&session).unwrap().is_empty());
1223        assert_eq!(drop_session_snapshots(&session), 0, "idempotent");
1224    }
1225}