Skip to main content

harn_hostlib/
fs_snapshot.rs

1//! Per-tool-call filesystem snapshots — Gemini-style `/restore` primitives.
2//!
3//! Captures the pre-image of paths touched by a mutating tool call so a
4//! client can roll the change back surgically without losing untracked
5//! work. Snapshot identity is the ACP `toolCallId`, so consumers index
6//! into the same id space the rest of the transcript already records.
7//!
8//! Two capture modes:
9//!
10//! 1. **Explicit** — the caller passes a `paths` list to
11//!    `hostlib_fs_snapshot`; bytes are copied immediately.
12//! 2. **Auto-on-write** — calling `hostlib_fs_snapshot` without `paths`
13//!    registers an open snapshot. The
14//!    [`auto_capture_for_write`] hook fires from inside
15//!    `tools/write_file` and `tools/delete_file` and lazy-copies each
16//!    pre-image into the active snapshot keyed by the current
17//!    [`harn_vm::agent_sessions::current_tool_call_id`].
18//!
19//! Storage layout (per session):
20//!
21//! ```text
22//! .harn/state/snapshots/<session_id>/
23//!   <snapshot_id>/
24//!     manifest.json    # path -> { kind, body_hash?, mode? }
25//!     bodies/<sha256>  # content-addressed; deduped across snapshots
26//! ```
27//!
28//! Snapshots are session-scoped and ephemeral. They are not persisted
29//! across machine reboots; consumers that need durable rollback bundle
30//! them into a session via `session/load`.
31
32use std::collections::{BTreeMap, BTreeSet};
33use std::fs as stdfs;
34use std::path::{Component, Path, PathBuf};
35use std::sync::Arc;
36use std::sync::{Mutex, OnceLock};
37
38use harn_vm::VmValue;
39use serde::{Deserialize, Serialize};
40use sha2::{Digest, Sha256};
41
42use crate::error::HostlibError;
43use crate::registry::{BuiltinRegistry, HostlibCapability};
44use crate::tools::args::{
45    build_dict, dict_arg, optional_string, optional_string_list, require_string, str_value,
46};
47
48const SNAPSHOT_BUILTIN: &str = "hostlib_fs_snapshot";
49const RESTORE_BUILTIN: &str = "hostlib_fs_restore";
50const LIST_BUILTIN: &str = "hostlib_fs_list_snapshots";
51const DROP_BUILTIN: &str = "hostlib_fs_drop_snapshot";
52
53const MANIFEST_VERSION: u32 = 1;
54const STATE_REL: &[&str] = &[".harn", "state", "snapshots"];
55
56/// Default cap on the on-disk footprint of one session's snapshot bundle
57/// before the oldest snapshots are evicted. Matches the proposal in
58/// [#1720](https://github.com/burin-labs/harn/issues/1720): 1 GiB.
59pub const DEFAULT_SESSION_BYTE_CAP: u64 = 1024 * 1024 * 1024;
60
61/// Hostlib filesystem snapshot capability handle.
62#[derive(Default)]
63pub struct FsSnapshotCapability;
64
65impl HostlibCapability for FsSnapshotCapability {
66    fn module_name(&self) -> &'static str {
67        // Snapshots live under the existing `fs/` schema directory so the
68        // contract surface stays consolidated alongside the staging
69        // primitives.
70        "fs"
71    }
72
73    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
74        registry.register_fn("fs", SNAPSHOT_BUILTIN, "snapshot", snapshot_builtin);
75        registry.register_fn("fs", RESTORE_BUILTIN, "restore", restore_builtin);
76        registry.register_fn("fs", LIST_BUILTIN, "list_snapshots", list_snapshots_builtin);
77        registry.register_fn("fs", DROP_BUILTIN, "drop_snapshot", drop_snapshot_builtin);
78    }
79}
80
81#[derive(Clone, Debug, Serialize, Deserialize)]
82#[serde(tag = "kind", rename_all = "snake_case")]
83enum SnapshotEntry {
84    File {
85        body_hash: String,
86        len: u64,
87        #[serde(default, skip_serializing_if = "Option::is_none")]
88        mode: Option<u32>,
89    },
90    Absent,
91}
92
93#[derive(Clone, Debug, Serialize, Deserialize)]
94struct Manifest {
95    version: u32,
96    snapshot_id: String,
97    scope_id: String,
98    session_id: String,
99    root: String,
100    taken_at_ms: i64,
101    entries: BTreeMap<String, SnapshotEntry>,
102}
103
104#[derive(Clone, Debug)]
105struct SnapshotState {
106    snapshot_id: String,
107    scope_id: String,
108    session_id: String,
109    root: PathBuf,
110    taken_at_ms: i64,
111    /// Logical absolute paths (workspace-relative when storage permits).
112    entries: BTreeMap<PathBuf, SnapshotEntry>,
113}
114
115/// Per-snapshot summary returned by `list_snapshots`.
116#[derive(Clone, Debug)]
117pub struct SnapshotSummary {
118    /// Stable identifier (canonically the ACP toolCallId).
119    pub snapshot_id: String,
120    /// Caller-chosen scope id passed when the snapshot was created.
121    pub scope_id: String,
122    /// Wall-clock capture time, milliseconds since the UNIX epoch.
123    pub taken_at_ms: i64,
124    /// Logical paths captured at snapshot time.
125    pub captured_paths: Vec<String>,
126    /// Total bytes captured for `captured_paths`.
127    pub byte_count: u64,
128}
129
130/// Result returned after capturing a new snapshot.
131#[derive(Clone, Debug)]
132pub struct SnapshotResult {
133    /// Stable identifier (equal to the requested `scope_id`).
134    pub snapshot_id: String,
135    /// Paths captured into this snapshot.
136    pub captured_paths: Vec<String>,
137    /// Total bytes captured for `captured_paths`.
138    pub byte_count: u64,
139}
140
141/// Result returned after restoring a snapshot.
142#[derive(Clone, Debug)]
143pub struct RestoreResult {
144    /// Echoed snapshot id.
145    pub snapshot_id: String,
146    /// Paths successfully restored.
147    pub restored_paths: Vec<String>,
148    /// Paths skipped, with human-readable reasons.
149    pub skipped_paths_with_reasons: Vec<(String, String)>,
150}
151
152/// Result returned after dropping a snapshot.
153#[derive(Clone, Debug)]
154pub struct DropResult {
155    /// Echoed snapshot id.
156    pub snapshot_id: String,
157    /// True when an existing snapshot was removed.
158    pub dropped: bool,
159}
160
161#[derive(Debug)]
162struct SessionSnapshots {
163    /// Snapshots, in insertion order.
164    snapshots: Vec<SnapshotState>,
165    /// Bytes currently held in this session's snapshot bundle. We track
166    /// this rather than recomputing from `bodies/` so eviction stays
167    /// O(snapshots) instead of walking the filesystem on every write.
168    byte_count: u64,
169    /// Per-session byte cap. Defaults to [`DEFAULT_SESSION_BYTE_CAP`] and
170    /// can be overridden with [`configure_session_byte_cap`].
171    byte_cap: u64,
172}
173
174impl Default for SessionSnapshots {
175    fn default() -> Self {
176        Self {
177            snapshots: Vec::new(),
178            byte_count: 0,
179            byte_cap: DEFAULT_SESSION_BYTE_CAP,
180        }
181    }
182}
183
184static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionSnapshots>>> = OnceLock::new();
185
186fn sessions() -> &'static Mutex<BTreeMap<String, SessionSnapshots>> {
187    SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
188}
189
190/// Override the byte cap for a specific session and immediately enforce
191/// it. Returns the previous cap.
192///
193/// Primarily intended for tests that want to force eviction without
194/// writing a gigabyte. Production embedders generally leave the default
195/// in place; touching one session never affects another.
196pub fn configure_session_byte_cap(session_id: &str, bytes: u64) -> u64 {
197    let mut guard = sessions()
198        .lock()
199        .expect("fs_snapshot session mutex poisoned");
200    let bundle = guard.entry(session_id.to_string()).or_default();
201    let previous = bundle.byte_cap;
202    bundle.byte_cap = bytes.max(1);
203    enforce_byte_cap(bundle, session_id, None);
204    previous
205}
206
207/// Drop every snapshot registered for `session_id`, both in memory and
208/// on disk. Returns the number of snapshots removed.
209///
210/// ACP hosts should call this on session close so the snapshot bundle
211/// doesn't outlive the conversation. Tests can also call it on
212/// teardown when reusing a session id across cases.
213pub fn drop_session_snapshots(session_id: &str) -> usize {
214    let mut guard = sessions()
215        .lock()
216        .expect("fs_snapshot session mutex poisoned");
217    let Some(bundle) = guard.remove(session_id) else {
218        return 0;
219    };
220    let count = bundle.snapshots.len();
221    for snapshot in &bundle.snapshots {
222        remove_snapshot_dir(snapshot);
223    }
224    count
225}
226
227/// Drop every registered session's snapshots, in memory and on disk.
228/// Returns the number of sessions removed.
229///
230/// [`drop_session_snapshots`] handles a single conversation on ACP
231/// session close. This drains the entire process-global map and is
232/// intended for host reset paths (e.g. the test runner between cases)
233/// where the worker is reused and snapshot bundles would otherwise
234/// accumulate one session at a time.
235pub fn reset_all_sessions() -> usize {
236    let mut guard = sessions()
237        .lock()
238        .expect("fs_snapshot session mutex poisoned");
239    let session_count = guard.len();
240    for bundle in guard.values() {
241        for snapshot in &bundle.snapshots {
242            remove_snapshot_dir(snapshot);
243        }
244    }
245    guard.clear();
246    session_count
247}
248
249/// Number of sessions with registered snapshots. Test-only.
250#[cfg(test)]
251pub fn session_count() -> usize {
252    sessions()
253        .lock()
254        .expect("fs_snapshot session mutex poisoned")
255        .len()
256}
257
258/// Take a snapshot. When `paths` is empty the snapshot is "open" — bytes
259/// are captured lazily as `auto_capture_for_write` fires from inside
260/// the mutating tool builtins.
261pub fn snapshot(
262    session_id: &str,
263    scope_id: &str,
264    paths: &[String],
265    root: Option<&Path>,
266) -> Result<SnapshotResult, HostlibError> {
267    validate_session_id(SNAPSHOT_BUILTIN, session_id)?;
268    validate_scope_id(SNAPSHOT_BUILTIN, scope_id)?;
269    let root = resolve_root(root);
270    let mut guard = sessions()
271        .lock()
272        .expect("fs_snapshot session mutex poisoned");
273    let bundle = guard.entry(session_id.to_string()).or_default();
274    upsert_snapshot(bundle, session_id, scope_id, &root)?;
275    let mut captured_paths = Vec::new();
276    let mut byte_count = 0u64;
277    for raw in paths {
278        let path = normalize_logical(Path::new(raw));
279        let added =
280            capture_path(bundle, session_id, scope_id, &path, &root).map_err(|message| {
281                HostlibError::Backend {
282                    builtin: SNAPSHOT_BUILTIN,
283                    message,
284                }
285            })?;
286        if let Some(bytes) = added {
287            byte_count = byte_count.saturating_add(bytes);
288            captured_paths.push(path.to_string_lossy().into_owned());
289        }
290    }
291    enforce_byte_cap(bundle, session_id, Some(scope_id));
292    let state = bundle
293        .snapshots
294        .iter()
295        .find(|snap| snap.snapshot_id == scope_id)
296        .expect("snapshot just upserted is protected from byte-cap eviction");
297    persist_manifest(state).map_err(|err| HostlibError::Backend {
298        builtin: SNAPSHOT_BUILTIN,
299        message: err,
300    })?;
301    Ok(SnapshotResult {
302        snapshot_id: state.snapshot_id.clone(),
303        captured_paths,
304        byte_count,
305    })
306}
307
308/// Restore a previously-captured snapshot.
309pub fn restore(
310    session_id: &str,
311    snapshot_id: &str,
312    paths: &[String],
313) -> Result<RestoreResult, HostlibError> {
314    validate_session_id(RESTORE_BUILTIN, session_id)?;
315    validate_scope_id(RESTORE_BUILTIN, snapshot_id)?;
316    let mut guard = sessions()
317        .lock()
318        .expect("fs_snapshot session mutex poisoned");
319    let bundle = guard
320        .get_mut(session_id)
321        .ok_or_else(|| HostlibError::Backend {
322            builtin: RESTORE_BUILTIN,
323            message: format!("no snapshots registered for session `{session_id}`"),
324        })?;
325    let state = bundle
326        .snapshots
327        .iter()
328        .find(|snap| snap.snapshot_id == snapshot_id)
329        .cloned()
330        .ok_or_else(|| HostlibError::Backend {
331            builtin: RESTORE_BUILTIN,
332            message: format!("unknown snapshot `{snapshot_id}` for session `{session_id}`"),
333        })?;
334    let selected = select_paths(&state, paths);
335    let mut restored_paths = Vec::new();
336    let mut skipped_paths_with_reasons = Vec::new();
337    for path in selected {
338        let Some(entry) = state.entries.get(&path) else {
339            continue;
340        };
341        let label = path.to_string_lossy().into_owned();
342        match restore_entry(&state, &path, entry) {
343            Ok(()) => restored_paths.push(label),
344            Err(reason) => skipped_paths_with_reasons.push((label, reason)),
345        }
346    }
347    Ok(RestoreResult {
348        snapshot_id: snapshot_id.to_string(),
349        restored_paths,
350        skipped_paths_with_reasons,
351    })
352}
353
354/// List snapshots registered for a session, sorted by capture time.
355pub fn list_snapshots(session_id: &str) -> Result<Vec<SnapshotSummary>, HostlibError> {
356    validate_session_id(LIST_BUILTIN, session_id)?;
357    let guard = sessions()
358        .lock()
359        .expect("fs_snapshot session mutex poisoned");
360    let Some(bundle) = guard.get(session_id) else {
361        return Ok(Vec::new());
362    };
363    let mut summaries: Vec<SnapshotSummary> = bundle
364        .snapshots
365        .iter()
366        .map(|state| SnapshotSummary {
367            snapshot_id: state.snapshot_id.clone(),
368            scope_id: state.scope_id.clone(),
369            taken_at_ms: state.taken_at_ms,
370            captured_paths: state
371                .entries
372                .keys()
373                .map(|path| path.to_string_lossy().into_owned())
374                .collect(),
375            byte_count: entry_byte_count(state),
376        })
377        .collect();
378    summaries.sort_by_key(|summary| summary.taken_at_ms);
379    Ok(summaries)
380}
381
382/// Drop a snapshot's in-memory and on-disk state.
383pub fn drop_snapshot(session_id: &str, snapshot_id: &str) -> Result<DropResult, HostlibError> {
384    validate_session_id(DROP_BUILTIN, session_id)?;
385    validate_scope_id(DROP_BUILTIN, snapshot_id)?;
386    let mut guard = sessions()
387        .lock()
388        .expect("fs_snapshot session mutex poisoned");
389    let Some(bundle) = guard.get_mut(session_id) else {
390        return Ok(DropResult {
391            snapshot_id: snapshot_id.to_string(),
392            dropped: false,
393        });
394    };
395    let position = bundle
396        .snapshots
397        .iter()
398        .position(|snap| snap.snapshot_id == snapshot_id);
399    let dropped = match position {
400        Some(idx) => {
401            let removed = bundle.snapshots.remove(idx);
402            bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&removed));
403            remove_snapshot_dir(&removed);
404            true
405        }
406        None => false,
407    };
408    Ok(DropResult {
409        snapshot_id: snapshot_id.to_string(),
410        dropped,
411    })
412}
413
414/// Auto-on-write hook called from the mutating tool builtins.
415///
416/// Captures `path`'s pre-image into the snapshot whose id matches the
417/// current [`harn_vm::agent_sessions::current_tool_call_id`]. The first
418/// write in a tool call auto-opens that snapshot. The hook silently no-ops
419/// when no session is active or no tool-call id is set, which keeps read-only
420/// tools and writes outside active tool scopes cheap.
421pub(crate) fn auto_capture_for_write(builtin: &'static str, path: &Path) {
422    let Some(session_id) = active_session_id() else {
423        return;
424    };
425    // Record the mutated path against the session BEFORE the snapshot/tool-call
426    // gate below: this is the single chokepoint every hostlib write reaches, so
427    // it is the authoritative source for a session's `files_written` (consumed by
428    // the sub-agent receipt). Recorded unconditionally — even when no restore
429    // snapshot is open (no active tool call) — because the write still happened.
430    harn_vm::agent_sessions::record_session_changed_path(
431        &session_id,
432        normalize_logical(path).to_string_lossy().as_ref(),
433    );
434    let Some(snapshot_id) = harn_vm::agent_sessions::current_tool_call_id() else {
435        return;
436    };
437    let mut guard = sessions()
438        .lock()
439        .expect("fs_snapshot session mutex poisoned");
440    let bundle = guard.entry(session_id.clone()).or_default();
441    if !bundle
442        .snapshots
443        .iter()
444        .any(|snap| snap.snapshot_id == snapshot_id)
445    {
446        let root =
447            crate::fs::configured_session_root(&session_id).unwrap_or_else(|| resolve_root(None));
448        if let Err(error) = upsert_snapshot(bundle, &session_id, &snapshot_id, &root) {
449            tracing::warn!(
450                "fs_snapshot: failed to auto-open snapshot {snapshot_id} in session {session_id} (builtin={builtin}): {error}"
451            );
452            return;
453        }
454    }
455    let Some(snapshot) = bundle
456        .snapshots
457        .iter()
458        .find(|snap| snap.snapshot_id == snapshot_id)
459    else {
460        return;
461    };
462    let scope_id = snapshot.scope_id.clone();
463    let root = snapshot.root.clone();
464    let key = normalize_logical(path);
465    match capture_path(bundle, &session_id, &snapshot_id, &key, &root) {
466        Ok(_added) => {
467            if let Some(state) = bundle
468                .snapshots
469                .iter()
470                .find(|snap| snap.snapshot_id == snapshot_id)
471            {
472                if let Err(err) = persist_manifest(state) {
473                    tracing::warn!(
474                        "fs_snapshot: failed to persist manifest for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}"
475                    );
476                }
477            }
478        }
479        Err(err) => {
480            tracing::warn!(
481                "fs_snapshot: failed to auto-capture `{}` for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}",
482                key.display()
483            );
484        }
485    }
486    enforce_byte_cap(bundle, &session_id, Some(&snapshot_id));
487}
488
489fn snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
490    let raw = dict_arg(SNAPSHOT_BUILTIN, args)?;
491    let dict = raw.as_ref();
492    let session_id = require_string(SNAPSHOT_BUILTIN, dict, "session_id")?;
493    let scope_id = require_string(SNAPSHOT_BUILTIN, dict, "scope_id")?;
494    let paths = optional_string_list(SNAPSHOT_BUILTIN, dict, "paths")?;
495    let root = optional_string(SNAPSHOT_BUILTIN, dict, "root")?.map(PathBuf::from);
496    let result = snapshot(&session_id, &scope_id, &paths, root.as_deref())?;
497    Ok(build_dict([
498        ("snapshot_id", str_value(&result.snapshot_id)),
499        (
500            "captured_paths",
501            VmValue::List(Arc::new(
502                result
503                    .captured_paths
504                    .into_iter()
505                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
506                    .collect(),
507            )),
508        ),
509        ("byte_count", VmValue::Int(result.byte_count as i64)),
510    ]))
511}
512
513fn restore_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
514    let raw = dict_arg(RESTORE_BUILTIN, args)?;
515    let dict = raw.as_ref();
516    let session_id = require_string(RESTORE_BUILTIN, dict, "session_id")?;
517    let snapshot_id = require_string(RESTORE_BUILTIN, dict, "snapshot_id")?;
518    let paths = optional_string_list(RESTORE_BUILTIN, dict, "paths")?;
519    let result = restore(&session_id, &snapshot_id, &paths)?;
520    Ok(build_dict([
521        ("snapshot_id", str_value(&result.snapshot_id)),
522        (
523            "restored_paths",
524            VmValue::List(Arc::new(
525                result
526                    .restored_paths
527                    .into_iter()
528                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
529                    .collect(),
530            )),
531        ),
532        (
533            "skipped_paths_with_reasons",
534            VmValue::List(Arc::new(
535                result
536                    .skipped_paths_with_reasons
537                    .into_iter()
538                    .map(|(path, reason)| {
539                        build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
540                    })
541                    .collect(),
542            )),
543        ),
544    ]))
545}
546
547fn list_snapshots_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
548    let raw = dict_arg(LIST_BUILTIN, args)?;
549    let dict = raw.as_ref();
550    let session_id = require_string(LIST_BUILTIN, dict, "session_id")?;
551    let summaries = list_snapshots(&session_id)?;
552    Ok(build_dict([(
553        "snapshots",
554        VmValue::List(Arc::new(
555            summaries.into_iter().map(snapshot_summary_value).collect(),
556        )),
557    )]))
558}
559
560fn drop_snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
561    let raw = dict_arg(DROP_BUILTIN, args)?;
562    let dict = raw.as_ref();
563    let session_id = require_string(DROP_BUILTIN, dict, "session_id")?;
564    let snapshot_id = require_string(DROP_BUILTIN, dict, "snapshot_id")?;
565    let result = drop_snapshot(&session_id, &snapshot_id)?;
566    Ok(build_dict([
567        ("snapshot_id", str_value(&result.snapshot_id)),
568        ("dropped", VmValue::Bool(result.dropped)),
569    ]))
570}
571
572fn snapshot_summary_value(summary: SnapshotSummary) -> VmValue {
573    build_dict([
574        ("snapshot_id", str_value(&summary.snapshot_id)),
575        ("scope_id", str_value(&summary.scope_id)),
576        ("taken_at_ms", VmValue::Int(summary.taken_at_ms)),
577        (
578            "captured_paths",
579            VmValue::List(Arc::new(
580                summary
581                    .captured_paths
582                    .into_iter()
583                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
584                    .collect(),
585            )),
586        ),
587        ("byte_count", VmValue::Int(summary.byte_count as i64)),
588    ])
589}
590
591fn upsert_snapshot(
592    bundle: &mut SessionSnapshots,
593    session_id: &str,
594    scope_id: &str,
595    root: &Path,
596) -> Result<(), HostlibError> {
597    if bundle
598        .snapshots
599        .iter()
600        .any(|snap| snap.snapshot_id == scope_id)
601    {
602        return Ok(());
603    }
604    let state = SnapshotState {
605        snapshot_id: scope_id.to_string(),
606        scope_id: scope_id.to_string(),
607        session_id: session_id.to_string(),
608        root: root.to_path_buf(),
609        taken_at_ms: now_ms(),
610        entries: BTreeMap::new(),
611    };
612    let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
613    stdfs::create_dir_all(dir.join("bodies")).map_err(|err| HostlibError::Backend {
614        builtin: SNAPSHOT_BUILTIN,
615        message: format!("mkdir {}: {err}", dir.display()),
616    })?;
617    bundle.snapshots.push(state);
618    Ok(())
619}
620
621fn capture_path(
622    bundle: &mut SessionSnapshots,
623    session_id: &str,
624    snapshot_id: &str,
625    path: &Path,
626    root: &Path,
627) -> Result<Option<u64>, String> {
628    let snap_index = bundle
629        .snapshots
630        .iter()
631        .position(|snap| snap.snapshot_id == snapshot_id)
632        .ok_or_else(|| format!("snapshot `{snapshot_id}` is not registered"))?;
633    if bundle.snapshots[snap_index].entries.contains_key(path) {
634        return Ok(None);
635    }
636    let metadata = stdfs::symlink_metadata(path);
637    let (entry, byte_count) = match metadata {
638        Err(err) if err.kind() == std::io::ErrorKind::NotFound => (SnapshotEntry::Absent, 0u64),
639        Err(err) => {
640            return Err(format!("stat `{}`: {err}", path.display()));
641        }
642        Ok(metadata) if metadata.is_dir() => {
643            return Err(format!(
644                "snapshot of directory `{}` is not supported yet",
645                path.display()
646            ));
647        }
648        Ok(metadata) if metadata.file_type().is_symlink() => {
649            return Err(format!(
650                "snapshot of symlink `{}` is not supported yet",
651                path.display()
652            ));
653        }
654        Ok(metadata) => {
655            let bytes = stdfs::read(path)
656                .map_err(|err| format!("read `{}` for snapshot: {err}", path.display()))?;
657            let body_hash = hex::encode(Sha256::digest(&bytes));
658            let len = bytes.len() as u64;
659            store_body(root, session_id, snapshot_id, &body_hash, &bytes)?;
660            #[cfg(unix)]
661            let mode = {
662                use std::os::unix::fs::MetadataExt;
663                Some(metadata.mode())
664            };
665            #[cfg(not(unix))]
666            let mode = {
667                let _ = &metadata;
668                None
669            };
670            (
671                SnapshotEntry::File {
672                    body_hash,
673                    len,
674                    mode,
675                },
676                len,
677            )
678        }
679    };
680    let snap = &mut bundle.snapshots[snap_index];
681    snap.entries.insert(path.to_path_buf(), entry);
682    bundle.byte_count = bundle.byte_count.saturating_add(byte_count);
683    Ok(Some(byte_count))
684}
685
686fn store_body(
687    root: &Path,
688    session_id: &str,
689    snapshot_id: &str,
690    body_hash: &str,
691    bytes: &[u8],
692) -> Result<(), String> {
693    let bodies = snapshot_dir(root, session_id, snapshot_id).join("bodies");
694    stdfs::create_dir_all(&bodies).map_err(|err| format!("mkdir {}: {err}", bodies.display()))?;
695    let body_path = bodies.join(body_hash);
696    if !body_path.exists() {
697        atomic_write(&body_path, bytes)?;
698    }
699    Ok(())
700}
701
702fn restore_entry(state: &SnapshotState, path: &Path, entry: &SnapshotEntry) -> Result<(), String> {
703    match entry {
704        SnapshotEntry::Absent => match stdfs::symlink_metadata(path) {
705            Ok(metadata) if metadata.is_dir() => stdfs::remove_dir_all(path)
706                .map_err(|err| format!("remove_dir_all {}: {err}", path.display())),
707            Ok(_) => stdfs::remove_file(path)
708                .map_err(|err| format!("remove_file {}: {err}", path.display())),
709            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
710            Err(err) => Err(format!("stat {}: {err}", path.display())),
711        },
712        SnapshotEntry::File {
713            body_hash, mode, ..
714        } => {
715            let body_path = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id)
716                .join("bodies")
717                .join(body_hash);
718            let bytes = stdfs::read(&body_path)
719                .map_err(|err| format!("read snapshot body `{}`: {err}", body_path.display()))?;
720            atomic_write(path, &bytes)?;
721            #[cfg(unix)]
722            if let Some(bits) = mode {
723                use std::os::unix::fs::PermissionsExt;
724                let permissions = stdfs::Permissions::from_mode(*bits);
725                stdfs::set_permissions(path, permissions)
726                    .map_err(|err| format!("set_permissions `{}`: {err}", path.display()))?;
727            }
728            #[cfg(not(unix))]
729            let _ = mode;
730            Ok(())
731        }
732    }
733}
734
735fn persist_manifest(state: &SnapshotState) -> Result<(), String> {
736    let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
737    stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
738    let manifest = Manifest {
739        version: MANIFEST_VERSION,
740        snapshot_id: state.snapshot_id.clone(),
741        scope_id: state.scope_id.clone(),
742        session_id: state.session_id.clone(),
743        root: state.root.to_string_lossy().into_owned(),
744        taken_at_ms: state.taken_at_ms,
745        entries: state
746            .entries
747            .iter()
748            .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
749            .collect(),
750    };
751    let bytes = serde_json::to_vec_pretty(&manifest)
752        .map_err(|err| format!("serialize snapshot manifest: {err}"))?;
753    atomic_write(&dir.join("manifest.json"), &bytes)
754}
755
756fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
757    if let Some(parent) = path.parent() {
758        stdfs::create_dir_all(parent)
759            .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
760    }
761    let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
762    stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
763    match stdfs::rename(&tmp, path) {
764        Ok(()) => Ok(()),
765        Err(rename_err) => {
766            let _ = stdfs::remove_file(path);
767            stdfs::rename(&tmp, path).map_err(|retry| {
768                // Both renames failed; the temp file would otherwise linger
769                // and accumulate in the snapshot directory.
770                let _ = stdfs::remove_file(&tmp);
771                format!(
772                    "rename {} to {}: {rename_err}; retry: {retry}",
773                    tmp.display(),
774                    path.display()
775                )
776            })
777        }
778    }
779}
780
781/// Evict snapshots oldest-first until the session is back under its byte
782/// cap. `protected` names the snapshot currently being written (if any);
783/// it is never evicted, even when it alone exceeds the cap — otherwise the
784/// caller would lose the very snapshot it just captured (and `snapshot`
785/// would panic re-fetching it). A snapshot larger than the whole cap is
786/// therefore retained: rollback for an in-flight write takes precedence
787/// over the soft budget.
788fn enforce_byte_cap(bundle: &mut SessionSnapshots, session_id: &str, protected: Option<&str>) {
789    while bundle.byte_count > bundle.byte_cap {
790        let Some(idx) = bundle
791            .snapshots
792            .iter()
793            .position(|snap| Some(snap.snapshot_id.as_str()) != protected)
794        else {
795            break;
796        };
797        let evicted = bundle.snapshots.remove(idx);
798        bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&evicted));
799        tracing::info!(
800            "fs_snapshot: evicting snapshot `{}` from session `{session_id}` (over byte cap {})",
801            evicted.snapshot_id,
802            bundle.byte_cap,
803        );
804        remove_snapshot_dir(&evicted);
805    }
806}
807
808fn remove_snapshot_dir(state: &SnapshotState) {
809    let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
810    let _ = stdfs::remove_dir_all(&dir);
811}
812
813fn entry_byte_count(state: &SnapshotState) -> u64 {
814    state
815        .entries
816        .values()
817        .map(|entry| match entry {
818            SnapshotEntry::File { len, .. } => *len,
819            SnapshotEntry::Absent => 0,
820        })
821        .sum()
822}
823
824fn select_paths(state: &SnapshotState, paths: &[String]) -> Vec<PathBuf> {
825    if paths.is_empty() {
826        return state.entries.keys().cloned().collect();
827    }
828    let requested: BTreeSet<PathBuf> = paths
829        .iter()
830        .map(|path| normalize_logical(Path::new(path)))
831        .collect();
832    state
833        .entries
834        .keys()
835        .filter(|path| requested.contains(*path))
836        .cloned()
837        .collect()
838}
839
840fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
841    if session_id.trim().is_empty() {
842        return Err(HostlibError::InvalidParameter {
843            builtin,
844            param: "session_id",
845            message: "must not be empty".to_string(),
846        });
847    }
848    Ok(())
849}
850
851fn validate_scope_id(builtin: &'static str, scope_id: &str) -> Result<(), HostlibError> {
852    if scope_id.trim().is_empty() {
853        let param = match builtin {
854            SNAPSHOT_BUILTIN => "scope_id",
855            _ => "snapshot_id",
856        };
857        return Err(HostlibError::InvalidParameter {
858            builtin,
859            param,
860            message: "must not be empty".to_string(),
861        });
862    }
863    Ok(())
864}
865
866fn active_session_id() -> Option<String> {
867    harn_vm::agent_sessions::current_session_id().filter(|id| !id.trim().is_empty())
868}
869
870fn resolve_root(root: Option<&Path>) -> PathBuf {
871    match root {
872        Some(path) => normalize_logical(path),
873        None => normalize_logical(&std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))),
874    }
875}
876
877fn snapshot_dir(root: &Path, session_id: &str, snapshot_id: &str) -> PathBuf {
878    let mut dir = root.to_path_buf();
879    for component in STATE_REL {
880        dir.push(component);
881    }
882    dir.push(sanitize_component(session_id));
883    dir.push(sanitize_component(snapshot_id));
884    dir
885}
886
887fn sanitize_component(input: &str) -> String {
888    let sanitized: String = input
889        .chars()
890        .map(|ch| match ch {
891            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
892            _ => '_',
893        })
894        .collect();
895    if sanitized == input {
896        sanitized
897    } else {
898        let hash = hex::encode(Sha256::digest(input.as_bytes()));
899        format!("{sanitized}-{}", &hash[..12])
900    }
901}
902
903fn normalize_logical(path: &Path) -> PathBuf {
904    let absolute = if path.is_absolute() {
905        path.to_path_buf()
906    } else {
907        std::env::current_dir()
908            .unwrap_or_else(|_| PathBuf::from("."))
909            .join(path)
910    };
911    let mut out = PathBuf::new();
912    for component in absolute.components() {
913        match component {
914            Component::ParentDir => {
915                out.pop();
916            }
917            Component::CurDir => {}
918            other => out.push(other),
919        }
920    }
921    out
922}
923
924fn now_ms() -> i64 {
925    std::time::SystemTime::now()
926        .duration_since(std::time::UNIX_EPOCH)
927        .map(|duration| duration.as_millis() as i64)
928        .unwrap_or(0)
929}
930
931#[cfg(test)]
932mod tests {
933    use super::*;
934    use std::sync::atomic::{AtomicU64, Ordering};
935    use tempfile::TempDir;
936
937    /// Hand each test its own session id so the process-wide `SESSIONS`
938    /// map isolates them by key — no serialization or process-wide
939    /// reset required.
940    fn unique_session(prefix: &str) -> String {
941        static COUNTER: AtomicU64 = AtomicU64::new(0);
942        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
943        format!("{prefix}-{n}-{}", std::process::id())
944    }
945
946    fn unique_scope() -> String {
947        static COUNTER: AtomicU64 = AtomicU64::new(0);
948        format!("tc-{}", COUNTER.fetch_add(1, Ordering::Relaxed))
949    }
950
951    fn enter_session(id: &str) -> harn_vm::agent_sessions::CurrentSessionGuard {
952        harn_vm::agent_sessions::open_or_create(Some(id.to_string()));
953        harn_vm::agent_sessions::enter_current_session(id.to_string())
954    }
955
956    #[test]
957    fn explicit_snapshot_then_restore_round_trips_file_bytes() {
958        let dir = TempDir::new().unwrap();
959        let file = dir.path().join("note.txt");
960        stdfs::write(&file, b"v1").unwrap();
961        let session = unique_session("snap-roundtrip");
962        let scope = unique_scope();
963        let _session_guard = enter_session(&session);
964
965        let result = snapshot(
966            &session,
967            &scope,
968            &[file.to_string_lossy().into_owned()],
969            Some(dir.path()),
970        )
971        .unwrap();
972        assert_eq!(result.snapshot_id, scope);
973        assert_eq!(result.captured_paths.len(), 1);
974        assert_eq!(result.byte_count, 2);
975
976        stdfs::write(&file, b"clobbered").unwrap();
977        let restored = restore(&session, &scope, &[]).unwrap();
978        assert_eq!(restored.restored_paths.len(), 1);
979        assert!(restored.skipped_paths_with_reasons.is_empty());
980        assert_eq!(stdfs::read(&file).unwrap(), b"v1");
981    }
982
983    #[test]
984    fn restore_reinstates_deleted_file() {
985        let dir = TempDir::new().unwrap();
986        let file = dir.path().join("doomed.txt");
987        stdfs::write(&file, b"alive").unwrap();
988        let session = unique_session("snap-reinstate");
989        let scope = unique_scope();
990        let _session_guard = enter_session(&session);
991
992        snapshot(
993            &session,
994            &scope,
995            &[file.to_string_lossy().into_owned()],
996            Some(dir.path()),
997        )
998        .unwrap();
999        stdfs::remove_file(&file).unwrap();
1000        assert!(!file.exists());
1001        let restored = restore(&session, &scope, &[]).unwrap();
1002        assert_eq!(restored.restored_paths.len(), 1);
1003        assert_eq!(stdfs::read(&file).unwrap(), b"alive");
1004    }
1005
1006    #[test]
1007    fn absent_snapshot_means_restore_deletes_paths_created_during_the_call() {
1008        let dir = TempDir::new().unwrap();
1009        let file = dir.path().join("new.txt");
1010        assert!(!file.exists());
1011        let session = unique_session("snap-absent");
1012        let scope = unique_scope();
1013        let _session_guard = enter_session(&session);
1014
1015        snapshot(
1016            &session,
1017            &scope,
1018            &[file.to_string_lossy().into_owned()],
1019            Some(dir.path()),
1020        )
1021        .unwrap();
1022        stdfs::write(&file, b"created during call").unwrap();
1023        let restored = restore(&session, &scope, &[]).unwrap();
1024        assert_eq!(restored.restored_paths.len(), 1);
1025        assert!(
1026            !file.exists(),
1027            "restore must delete files that the snapshot saw as absent"
1028        );
1029    }
1030
1031    #[test]
1032    fn list_and_drop_round_trip_through_metadata() {
1033        let dir = TempDir::new().unwrap();
1034        let file = dir.path().join("listed.txt");
1035        stdfs::write(&file, b"abc").unwrap();
1036        let session = unique_session("snap-list");
1037        let scope = unique_scope();
1038        let _session_guard = enter_session(&session);
1039
1040        snapshot(
1041            &session,
1042            &scope,
1043            &[file.to_string_lossy().into_owned()],
1044            Some(dir.path()),
1045        )
1046        .unwrap();
1047        let summaries = list_snapshots(&session).unwrap();
1048        assert_eq!(summaries.len(), 1);
1049        assert_eq!(summaries[0].snapshot_id, scope);
1050        assert_eq!(summaries[0].byte_count, 3);
1051
1052        let dropped = drop_snapshot(&session, &scope).unwrap();
1053        assert!(dropped.dropped);
1054        assert!(list_snapshots(&session).unwrap().is_empty());
1055
1056        let again = drop_snapshot(&session, &scope).unwrap();
1057        assert!(!again.dropped, "second drop must be idempotent");
1058    }
1059
1060    #[test]
1061    fn auto_capture_records_pre_image_keyed_by_current_tool_call_id() {
1062        let dir = TempDir::new().unwrap();
1063        let file = dir.path().join("auto.txt");
1064        stdfs::write(&file, b"pre").unwrap();
1065        let session = unique_session("snap-auto");
1066        let scope = unique_scope();
1067        let _session_guard = enter_session(&session);
1068        let _tool_guard = harn_vm::agent_sessions::enter_current_tool_call(scope.clone());
1069
1070        snapshot(&session, &scope, &[], Some(dir.path())).unwrap();
1071        auto_capture_for_write("hostlib_tools_write_file", &file);
1072        stdfs::write(&file, b"post").unwrap();
1073
1074        let restored = restore(&session, &scope, &[]).unwrap();
1075        assert_eq!(restored.restored_paths.len(), 1);
1076        assert_eq!(stdfs::read(&file).unwrap(), b"pre");
1077    }
1078
1079    #[test]
1080    fn auto_capture_records_session_changed_path_for_files_written_receipt() {
1081        let dir = TempDir::new().unwrap();
1082        let one = dir.path().join("a.txt");
1083        let two = dir.path().join("b.txt");
1084        let session = unique_session("snap-changed");
1085        harn_vm::agent_sessions::clear_session_changed_paths(&session);
1086        let _session_guard = enter_session(&session);
1087
1088        // No active tool call / open snapshot: the write still happened, so the
1089        // path must be recorded for the receipt regardless.
1090        auto_capture_for_write("hostlib_tools_write_file", &one);
1091        auto_capture_for_write("hostlib_tools_write_file", &two);
1092        // A duplicate write of the same path must dedupe.
1093        auto_capture_for_write("hostlib_tools_write_file", &one);
1094
1095        let changed = harn_vm::agent_sessions::session_changed_paths(&session);
1096        assert_eq!(changed.len(), 2, "two distinct paths recorded (deduped)");
1097        let expect_one = normalize_logical(&one).to_string_lossy().into_owned();
1098        let expect_two = normalize_logical(&two).to_string_lossy().into_owned();
1099        assert!(
1100            changed.contains(&expect_one),
1101            "path a recorded: {changed:?}"
1102        );
1103        assert!(
1104            changed.contains(&expect_two),
1105            "path b recorded: {changed:?}"
1106        );
1107
1108        // `take` drains so the receipt captures the set exactly once.
1109        let drained = harn_vm::agent_sessions::take_session_changed_paths(&session);
1110        assert_eq!(drained.len(), 2);
1111        assert!(
1112            harn_vm::agent_sessions::session_changed_paths(&session).is_empty(),
1113            "take drains the session's recorded paths"
1114        );
1115    }
1116
1117    #[test]
1118    fn byte_cap_evicts_oldest_snapshot_when_exceeded() {
1119        let dir = TempDir::new().unwrap();
1120        let session = unique_session("snap-evict");
1121        let _session_guard = enter_session(&session);
1122
1123        // Per-session cap: only affects this test's session, so other
1124        // tests can run in parallel without seeing the squeeze.
1125        configure_session_byte_cap(&session, 8);
1126
1127        let mk = |name: &str| {
1128            let path = dir.path().join(name);
1129            stdfs::write(&path, b"12345").unwrap();
1130            path
1131        };
1132
1133        let scope_a = unique_scope();
1134        let scope_b = unique_scope();
1135        let a = mk("a.txt");
1136        snapshot(
1137            &session,
1138            &scope_a,
1139            &[a.to_string_lossy().into_owned()],
1140            Some(dir.path()),
1141        )
1142        .unwrap();
1143        let b = mk("b.txt");
1144        snapshot(
1145            &session,
1146            &scope_b,
1147            &[b.to_string_lossy().into_owned()],
1148            Some(dir.path()),
1149        )
1150        .unwrap();
1151
1152        let ids: Vec<String> = list_snapshots(&session)
1153            .unwrap()
1154            .into_iter()
1155            .map(|summary| summary.snapshot_id)
1156            .collect();
1157        assert_eq!(
1158            ids,
1159            vec![scope_b],
1160            "older snapshot must be evicted when the per-session byte cap is exceeded"
1161        );
1162    }
1163
1164    #[test]
1165    fn snapshot_larger_than_cap_is_retained_not_evicted() {
1166        // A single snapshot whose captured bytes exceed the whole cap must
1167        // survive — evicting the snapshot we just took would lose rollback
1168        // for the in-flight write (and previously panicked re-fetching it).
1169        let dir = TempDir::new().unwrap();
1170        let session = unique_session("snap-oversized");
1171        let _session_guard = enter_session(&session);
1172        configure_session_byte_cap(&session, 4);
1173
1174        let scope = unique_scope();
1175        let file = dir.path().join("big.txt");
1176        stdfs::write(&file, b"0123456789").unwrap();
1177        let result = snapshot(
1178            &session,
1179            &scope,
1180            &[file.to_string_lossy().into_owned()],
1181            Some(dir.path()),
1182        )
1183        .unwrap();
1184        assert_eq!(result.byte_count, 10);
1185
1186        let ids: Vec<String> = list_snapshots(&session)
1187            .unwrap()
1188            .into_iter()
1189            .map(|summary| summary.snapshot_id)
1190            .collect();
1191        assert_eq!(
1192            ids,
1193            vec![scope],
1194            "an oversized snapshot must be retained rather than evicting itself"
1195        );
1196    }
1197
1198    #[test]
1199    fn drop_session_snapshots_removes_every_snapshot_for_a_session() {
1200        let dir = TempDir::new().unwrap();
1201        let file = dir.path().join("retained.txt");
1202        stdfs::write(&file, b"x").unwrap();
1203        let session = unique_session("snap-drop-session");
1204        let scope_a = unique_scope();
1205        let scope_b = unique_scope();
1206        let _session_guard = enter_session(&session);
1207
1208        snapshot(
1209            &session,
1210            &scope_a,
1211            &[file.to_string_lossy().into_owned()],
1212            Some(dir.path()),
1213        )
1214        .unwrap();
1215        snapshot(
1216            &session,
1217            &scope_b,
1218            &[file.to_string_lossy().into_owned()],
1219            Some(dir.path()),
1220        )
1221        .unwrap();
1222        assert_eq!(list_snapshots(&session).unwrap().len(), 2);
1223
1224        assert_eq!(drop_session_snapshots(&session), 2);
1225        assert!(list_snapshots(&session).unwrap().is_empty());
1226        assert_eq!(drop_session_snapshots(&session), 0, "idempotent");
1227    }
1228}