Skip to main content

harn_hostlib/
fs_snapshot.rs

1//! Per-tool-call filesystem snapshots — Gemini-style `/restore` primitives.
2//!
3//! Captures the pre-image of paths touched by a mutating tool call so a
4//! client can roll the change back surgically without losing untracked
5//! work. Snapshot identity is the ACP `toolCallId`, so consumers index
6//! into the same id space the rest of the transcript already records.
7//!
8//! Two capture modes:
9//!
10//! 1. **Explicit** — the caller passes a `paths` list to
11//!    `hostlib_fs_snapshot`; bytes are copied immediately.
12//! 2. **Auto-on-write** — calling `hostlib_fs_snapshot` without `paths`
13//!    registers an open snapshot. The
14//!    [`auto_capture_for_write`] hook fires from inside
15//!    `tools/write_file` and `tools/delete_file` and lazy-copies each
16//!    pre-image into the active snapshot keyed by the current
17//!    [`harn_vm::agent_sessions::current_tool_call_id`].
18//!
19//! Storage layout (per session):
20//!
21//! ```text
22//! .harn/state/snapshots/<session_id>/
23//!   <snapshot_id>/
24//!     manifest.json    # path -> { kind, body_hash?, mode? }
25//!     bodies/<sha256>  # content-addressed; deduped across snapshots
26//! ```
27//!
28//! Snapshots are session-scoped and ephemeral. They are not persisted
29//! across machine reboots; consumers that need durable rollback bundle
30//! them into a session via `session/load`.
31
32use std::collections::{BTreeMap, BTreeSet};
33use std::fs as stdfs;
34use std::path::{Component, Path, PathBuf};
35use std::sync::Arc;
36use std::sync::{Mutex, OnceLock};
37
38use harn_vm::VmValue;
39use serde::{Deserialize, Serialize};
40use sha2::{Digest, Sha256};
41
42use crate::error::HostlibError;
43use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
44use crate::tools::args::{
45    build_dict, dict_arg, optional_string, optional_string_list, require_string, str_value,
46};
47
48const SNAPSHOT_BUILTIN: &str = "hostlib_fs_snapshot";
49const RESTORE_BUILTIN: &str = "hostlib_fs_restore";
50const LIST_BUILTIN: &str = "hostlib_fs_list_snapshots";
51const DROP_BUILTIN: &str = "hostlib_fs_drop_snapshot";
52
53const MANIFEST_VERSION: u32 = 1;
54const STATE_REL: &[&str] = &[".harn", "state", "snapshots"];
55
56/// Default cap on the on-disk footprint of one session's snapshot bundle
57/// before the oldest snapshots are evicted. Matches the proposal in
58/// [#1720](https://github.com/burin-labs/harn/issues/1720): 1 GiB.
59pub const DEFAULT_SESSION_BYTE_CAP: u64 = 1024 * 1024 * 1024;
60
61/// Hostlib filesystem snapshot capability handle.
62#[derive(Default)]
63pub struct FsSnapshotCapability;
64
65impl HostlibCapability for FsSnapshotCapability {
66    fn module_name(&self) -> &'static str {
67        // Snapshots live under the existing `fs/` schema directory so the
68        // contract surface stays consolidated alongside the staging
69        // primitives.
70        "fs"
71    }
72
73    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
74        register(registry, SNAPSHOT_BUILTIN, "snapshot", snapshot_builtin);
75        register(registry, RESTORE_BUILTIN, "restore", restore_builtin);
76        register(
77            registry,
78            LIST_BUILTIN,
79            "list_snapshots",
80            list_snapshots_builtin,
81        );
82        register(
83            registry,
84            DROP_BUILTIN,
85            "drop_snapshot",
86            drop_snapshot_builtin,
87        );
88    }
89}
90
91fn register(
92    registry: &mut BuiltinRegistry,
93    name: &'static str,
94    method: &'static str,
95    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
96) {
97    let handler: SyncHandler = std::sync::Arc::new(runner);
98    registry.register(RegisteredBuiltin {
99        name,
100        module: "fs",
101        method,
102        handler,
103    });
104}
105
106#[derive(Clone, Debug, Serialize, Deserialize)]
107#[serde(tag = "kind", rename_all = "snake_case")]
108enum SnapshotEntry {
109    File {
110        body_hash: String,
111        len: u64,
112        #[serde(default, skip_serializing_if = "Option::is_none")]
113        mode: Option<u32>,
114    },
115    Absent,
116}
117
118#[derive(Clone, Debug, Serialize, Deserialize)]
119struct Manifest {
120    version: u32,
121    snapshot_id: String,
122    scope_id: String,
123    session_id: String,
124    root: String,
125    taken_at_ms: i64,
126    entries: BTreeMap<String, SnapshotEntry>,
127}
128
129#[derive(Clone, Debug)]
130struct SnapshotState {
131    snapshot_id: String,
132    scope_id: String,
133    session_id: String,
134    root: PathBuf,
135    taken_at_ms: i64,
136    /// Logical absolute paths (workspace-relative when storage permits).
137    entries: BTreeMap<PathBuf, SnapshotEntry>,
138}
139
140/// Per-snapshot summary returned by `list_snapshots`.
141#[derive(Clone, Debug)]
142pub struct SnapshotSummary {
143    /// Stable identifier (canonically the ACP toolCallId).
144    pub snapshot_id: String,
145    /// Caller-chosen scope id passed when the snapshot was created.
146    pub scope_id: String,
147    /// Wall-clock capture time, milliseconds since the UNIX epoch.
148    pub taken_at_ms: i64,
149    /// Logical paths captured at snapshot time.
150    pub captured_paths: Vec<String>,
151    /// Total bytes captured for `captured_paths`.
152    pub byte_count: u64,
153}
154
155/// Result returned after capturing a new snapshot.
156#[derive(Clone, Debug)]
157pub struct SnapshotResult {
158    /// Stable identifier (equal to the requested `scope_id`).
159    pub snapshot_id: String,
160    /// Paths captured into this snapshot.
161    pub captured_paths: Vec<String>,
162    /// Total bytes captured for `captured_paths`.
163    pub byte_count: u64,
164}
165
166/// Result returned after restoring a snapshot.
167#[derive(Clone, Debug)]
168pub struct RestoreResult {
169    /// Echoed snapshot id.
170    pub snapshot_id: String,
171    /// Paths successfully restored.
172    pub restored_paths: Vec<String>,
173    /// Paths skipped, with human-readable reasons.
174    pub skipped_paths_with_reasons: Vec<(String, String)>,
175}
176
177/// Result returned after dropping a snapshot.
178#[derive(Clone, Debug)]
179pub struct DropResult {
180    /// Echoed snapshot id.
181    pub snapshot_id: String,
182    /// True when an existing snapshot was removed.
183    pub dropped: bool,
184}
185
186#[derive(Debug)]
187struct SessionSnapshots {
188    /// Snapshots, in insertion order.
189    snapshots: Vec<SnapshotState>,
190    /// Bytes currently held in this session's snapshot bundle. We track
191    /// this rather than recomputing from `bodies/` so eviction stays
192    /// O(snapshots) instead of walking the filesystem on every write.
193    byte_count: u64,
194    /// Per-session byte cap. Defaults to [`DEFAULT_SESSION_BYTE_CAP`] and
195    /// can be overridden with [`configure_session_byte_cap`].
196    byte_cap: u64,
197}
198
199impl Default for SessionSnapshots {
200    fn default() -> Self {
201        Self {
202            snapshots: Vec::new(),
203            byte_count: 0,
204            byte_cap: DEFAULT_SESSION_BYTE_CAP,
205        }
206    }
207}
208
209static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionSnapshots>>> = OnceLock::new();
210
211fn sessions() -> &'static Mutex<BTreeMap<String, SessionSnapshots>> {
212    SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
213}
214
215/// Override the byte cap for a specific session and immediately enforce
216/// it. Returns the previous cap.
217///
218/// Primarily intended for tests that want to force eviction without
219/// writing a gigabyte. Production embedders generally leave the default
220/// in place; touching one session never affects another.
221pub fn configure_session_byte_cap(session_id: &str, bytes: u64) -> u64 {
222    let mut guard = sessions()
223        .lock()
224        .expect("fs_snapshot session mutex poisoned");
225    let bundle = guard.entry(session_id.to_string()).or_default();
226    let previous = bundle.byte_cap;
227    bundle.byte_cap = bytes.max(1);
228    enforce_byte_cap(bundle, session_id, None);
229    previous
230}
231
232/// Drop every snapshot registered for `session_id`, both in memory and
233/// on disk. Returns the number of snapshots removed.
234///
235/// ACP hosts should call this on session close so the snapshot bundle
236/// doesn't outlive the conversation. Tests can also call it on
237/// teardown when reusing a session id across cases.
238pub fn drop_session_snapshots(session_id: &str) -> usize {
239    let mut guard = sessions()
240        .lock()
241        .expect("fs_snapshot session mutex poisoned");
242    let Some(bundle) = guard.remove(session_id) else {
243        return 0;
244    };
245    let count = bundle.snapshots.len();
246    for snapshot in &bundle.snapshots {
247        remove_snapshot_dir(snapshot);
248    }
249    count
250}
251
252/// Drop every registered session's snapshots, in memory and on disk.
253/// Returns the number of sessions removed.
254///
255/// [`drop_session_snapshots`] handles a single conversation on ACP
256/// session close. This drains the entire process-global map and is
257/// intended for host reset paths (e.g. the test runner between cases)
258/// where the worker is reused and snapshot bundles would otherwise
259/// accumulate one session at a time.
260pub fn reset_all_sessions() -> usize {
261    let mut guard = sessions()
262        .lock()
263        .expect("fs_snapshot session mutex poisoned");
264    let session_count = guard.len();
265    for bundle in guard.values() {
266        for snapshot in &bundle.snapshots {
267            remove_snapshot_dir(snapshot);
268        }
269    }
270    guard.clear();
271    session_count
272}
273
274/// Number of sessions with registered snapshots. Test-only.
275#[cfg(test)]
276pub fn session_count() -> usize {
277    sessions()
278        .lock()
279        .expect("fs_snapshot session mutex poisoned")
280        .len()
281}
282
283/// Take a snapshot. When `paths` is empty the snapshot is "open" — bytes
284/// are captured lazily as `auto_capture_for_write` fires from inside
285/// the mutating tool builtins.
286pub fn snapshot(
287    session_id: &str,
288    scope_id: &str,
289    paths: &[String],
290    root: Option<&Path>,
291) -> Result<SnapshotResult, HostlibError> {
292    validate_session_id(SNAPSHOT_BUILTIN, session_id)?;
293    validate_scope_id(SNAPSHOT_BUILTIN, scope_id)?;
294    let root = resolve_root(root);
295    let mut guard = sessions()
296        .lock()
297        .expect("fs_snapshot session mutex poisoned");
298    let bundle = guard.entry(session_id.to_string()).or_default();
299    upsert_snapshot(bundle, session_id, scope_id, &root)?;
300    let mut captured_paths = Vec::new();
301    let mut byte_count = 0u64;
302    for raw in paths {
303        let path = normalize_logical(Path::new(raw));
304        let added =
305            capture_path(bundle, session_id, scope_id, &path, &root).map_err(|message| {
306                HostlibError::Backend {
307                    builtin: SNAPSHOT_BUILTIN,
308                    message,
309                }
310            })?;
311        if let Some(bytes) = added {
312            byte_count = byte_count.saturating_add(bytes);
313            captured_paths.push(path.to_string_lossy().into_owned());
314        }
315    }
316    enforce_byte_cap(bundle, session_id, Some(scope_id));
317    let state = bundle
318        .snapshots
319        .iter()
320        .find(|snap| snap.snapshot_id == scope_id)
321        .expect("snapshot just upserted is protected from byte-cap eviction");
322    persist_manifest(state).map_err(|err| HostlibError::Backend {
323        builtin: SNAPSHOT_BUILTIN,
324        message: err,
325    })?;
326    Ok(SnapshotResult {
327        snapshot_id: state.snapshot_id.clone(),
328        captured_paths,
329        byte_count,
330    })
331}
332
333/// Restore a previously-captured snapshot.
334pub fn restore(
335    session_id: &str,
336    snapshot_id: &str,
337    paths: &[String],
338) -> Result<RestoreResult, HostlibError> {
339    validate_session_id(RESTORE_BUILTIN, session_id)?;
340    validate_scope_id(RESTORE_BUILTIN, snapshot_id)?;
341    let mut guard = sessions()
342        .lock()
343        .expect("fs_snapshot session mutex poisoned");
344    let bundle = guard
345        .get_mut(session_id)
346        .ok_or_else(|| HostlibError::Backend {
347            builtin: RESTORE_BUILTIN,
348            message: format!("no snapshots registered for session `{session_id}`"),
349        })?;
350    let state = bundle
351        .snapshots
352        .iter()
353        .find(|snap| snap.snapshot_id == snapshot_id)
354        .cloned()
355        .ok_or_else(|| HostlibError::Backend {
356            builtin: RESTORE_BUILTIN,
357            message: format!("unknown snapshot `{snapshot_id}` for session `{session_id}`"),
358        })?;
359    let selected = select_paths(&state, paths);
360    let mut restored_paths = Vec::new();
361    let mut skipped_paths_with_reasons = Vec::new();
362    for path in selected {
363        let Some(entry) = state.entries.get(&path) else {
364            continue;
365        };
366        let label = path.to_string_lossy().into_owned();
367        match restore_entry(&state, &path, entry) {
368            Ok(()) => restored_paths.push(label),
369            Err(reason) => skipped_paths_with_reasons.push((label, reason)),
370        }
371    }
372    Ok(RestoreResult {
373        snapshot_id: snapshot_id.to_string(),
374        restored_paths,
375        skipped_paths_with_reasons,
376    })
377}
378
379/// List snapshots registered for a session, sorted by capture time.
380pub fn list_snapshots(session_id: &str) -> Result<Vec<SnapshotSummary>, HostlibError> {
381    validate_session_id(LIST_BUILTIN, session_id)?;
382    let guard = sessions()
383        .lock()
384        .expect("fs_snapshot session mutex poisoned");
385    let Some(bundle) = guard.get(session_id) else {
386        return Ok(Vec::new());
387    };
388    let mut summaries: Vec<SnapshotSummary> = bundle
389        .snapshots
390        .iter()
391        .map(|state| SnapshotSummary {
392            snapshot_id: state.snapshot_id.clone(),
393            scope_id: state.scope_id.clone(),
394            taken_at_ms: state.taken_at_ms,
395            captured_paths: state
396                .entries
397                .keys()
398                .map(|path| path.to_string_lossy().into_owned())
399                .collect(),
400            byte_count: entry_byte_count(state),
401        })
402        .collect();
403    summaries.sort_by_key(|summary| summary.taken_at_ms);
404    Ok(summaries)
405}
406
407/// Drop a snapshot's in-memory and on-disk state.
408pub fn drop_snapshot(session_id: &str, snapshot_id: &str) -> Result<DropResult, HostlibError> {
409    validate_session_id(DROP_BUILTIN, session_id)?;
410    validate_scope_id(DROP_BUILTIN, snapshot_id)?;
411    let mut guard = sessions()
412        .lock()
413        .expect("fs_snapshot session mutex poisoned");
414    let Some(bundle) = guard.get_mut(session_id) else {
415        return Ok(DropResult {
416            snapshot_id: snapshot_id.to_string(),
417            dropped: false,
418        });
419    };
420    let position = bundle
421        .snapshots
422        .iter()
423        .position(|snap| snap.snapshot_id == snapshot_id);
424    let dropped = match position {
425        Some(idx) => {
426            let removed = bundle.snapshots.remove(idx);
427            bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&removed));
428            remove_snapshot_dir(&removed);
429            true
430        }
431        None => false,
432    };
433    Ok(DropResult {
434        snapshot_id: snapshot_id.to_string(),
435        dropped,
436    })
437}
438
439/// Auto-on-write hook called from the mutating tool builtins.
440///
441/// Captures `path`'s pre-image into the snapshot whose id matches the
442/// current [`harn_vm::agent_sessions::current_tool_call_id`]. The first
443/// write in a tool call auto-opens that snapshot. The hook silently no-ops
444/// when no session is active or no tool-call id is set, which keeps read-only
445/// tools and writes outside active tool scopes cheap.
446pub(crate) fn auto_capture_for_write(builtin: &'static str, path: &Path) {
447    let Some(session_id) = active_session_id() else {
448        return;
449    };
450    // Record the mutated path against the session BEFORE the snapshot/tool-call
451    // gate below: this is the single chokepoint every hostlib write reaches, so
452    // it is the authoritative source for a session's `files_written` (consumed by
453    // the sub-agent receipt). Recorded unconditionally — even when no restore
454    // snapshot is open (no active tool call) — because the write still happened.
455    harn_vm::agent_sessions::record_session_changed_path(
456        &session_id,
457        normalize_logical(path).to_string_lossy().as_ref(),
458    );
459    let Some(snapshot_id) = harn_vm::agent_sessions::current_tool_call_id() else {
460        return;
461    };
462    let mut guard = sessions()
463        .lock()
464        .expect("fs_snapshot session mutex poisoned");
465    let bundle = guard.entry(session_id.clone()).or_default();
466    if !bundle
467        .snapshots
468        .iter()
469        .any(|snap| snap.snapshot_id == snapshot_id)
470    {
471        let root =
472            crate::fs::configured_session_root(&session_id).unwrap_or_else(|| resolve_root(None));
473        if let Err(error) = upsert_snapshot(bundle, &session_id, &snapshot_id, &root) {
474            tracing::warn!(
475                "fs_snapshot: failed to auto-open snapshot {snapshot_id} in session {session_id} (builtin={builtin}): {error}"
476            );
477            return;
478        }
479    }
480    let Some(snapshot) = bundle
481        .snapshots
482        .iter()
483        .find(|snap| snap.snapshot_id == snapshot_id)
484    else {
485        return;
486    };
487    let scope_id = snapshot.scope_id.clone();
488    let root = snapshot.root.clone();
489    let key = normalize_logical(path);
490    match capture_path(bundle, &session_id, &snapshot_id, &key, &root) {
491        Ok(_added) => {
492            if let Some(state) = bundle
493                .snapshots
494                .iter()
495                .find(|snap| snap.snapshot_id == snapshot_id)
496            {
497                if let Err(err) = persist_manifest(state) {
498                    tracing::warn!(
499                        "fs_snapshot: failed to persist manifest for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}"
500                    );
501                }
502            }
503        }
504        Err(err) => {
505            tracing::warn!(
506                "fs_snapshot: failed to auto-capture `{}` for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}",
507                key.display()
508            );
509        }
510    }
511    enforce_byte_cap(bundle, &session_id, Some(&snapshot_id));
512}
513
514fn snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
515    let raw = dict_arg(SNAPSHOT_BUILTIN, args)?;
516    let dict = raw.as_ref();
517    let session_id = require_string(SNAPSHOT_BUILTIN, dict, "session_id")?;
518    let scope_id = require_string(SNAPSHOT_BUILTIN, dict, "scope_id")?;
519    let paths = optional_string_list(SNAPSHOT_BUILTIN, dict, "paths")?;
520    let root = optional_string(SNAPSHOT_BUILTIN, dict, "root")?.map(PathBuf::from);
521    let result = snapshot(&session_id, &scope_id, &paths, root.as_deref())?;
522    Ok(build_dict([
523        ("snapshot_id", str_value(&result.snapshot_id)),
524        (
525            "captured_paths",
526            VmValue::List(Arc::new(
527                result
528                    .captured_paths
529                    .into_iter()
530                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
531                    .collect(),
532            )),
533        ),
534        ("byte_count", VmValue::Int(result.byte_count as i64)),
535    ]))
536}
537
538fn restore_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
539    let raw = dict_arg(RESTORE_BUILTIN, args)?;
540    let dict = raw.as_ref();
541    let session_id = require_string(RESTORE_BUILTIN, dict, "session_id")?;
542    let snapshot_id = require_string(RESTORE_BUILTIN, dict, "snapshot_id")?;
543    let paths = optional_string_list(RESTORE_BUILTIN, dict, "paths")?;
544    let result = restore(&session_id, &snapshot_id, &paths)?;
545    Ok(build_dict([
546        ("snapshot_id", str_value(&result.snapshot_id)),
547        (
548            "restored_paths",
549            VmValue::List(Arc::new(
550                result
551                    .restored_paths
552                    .into_iter()
553                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
554                    .collect(),
555            )),
556        ),
557        (
558            "skipped_paths_with_reasons",
559            VmValue::List(Arc::new(
560                result
561                    .skipped_paths_with_reasons
562                    .into_iter()
563                    .map(|(path, reason)| {
564                        build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
565                    })
566                    .collect(),
567            )),
568        ),
569    ]))
570}
571
572fn list_snapshots_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
573    let raw = dict_arg(LIST_BUILTIN, args)?;
574    let dict = raw.as_ref();
575    let session_id = require_string(LIST_BUILTIN, dict, "session_id")?;
576    let summaries = list_snapshots(&session_id)?;
577    Ok(build_dict([(
578        "snapshots",
579        VmValue::List(Arc::new(
580            summaries.into_iter().map(snapshot_summary_value).collect(),
581        )),
582    )]))
583}
584
585fn drop_snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
586    let raw = dict_arg(DROP_BUILTIN, args)?;
587    let dict = raw.as_ref();
588    let session_id = require_string(DROP_BUILTIN, dict, "session_id")?;
589    let snapshot_id = require_string(DROP_BUILTIN, dict, "snapshot_id")?;
590    let result = drop_snapshot(&session_id, &snapshot_id)?;
591    Ok(build_dict([
592        ("snapshot_id", str_value(&result.snapshot_id)),
593        ("dropped", VmValue::Bool(result.dropped)),
594    ]))
595}
596
597fn snapshot_summary_value(summary: SnapshotSummary) -> VmValue {
598    build_dict([
599        ("snapshot_id", str_value(&summary.snapshot_id)),
600        ("scope_id", str_value(&summary.scope_id)),
601        ("taken_at_ms", VmValue::Int(summary.taken_at_ms)),
602        (
603            "captured_paths",
604            VmValue::List(Arc::new(
605                summary
606                    .captured_paths
607                    .into_iter()
608                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
609                    .collect(),
610            )),
611        ),
612        ("byte_count", VmValue::Int(summary.byte_count as i64)),
613    ])
614}
615
616fn upsert_snapshot(
617    bundle: &mut SessionSnapshots,
618    session_id: &str,
619    scope_id: &str,
620    root: &Path,
621) -> Result<(), HostlibError> {
622    if bundle
623        .snapshots
624        .iter()
625        .any(|snap| snap.snapshot_id == scope_id)
626    {
627        return Ok(());
628    }
629    let state = SnapshotState {
630        snapshot_id: scope_id.to_string(),
631        scope_id: scope_id.to_string(),
632        session_id: session_id.to_string(),
633        root: root.to_path_buf(),
634        taken_at_ms: now_ms(),
635        entries: BTreeMap::new(),
636    };
637    let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
638    stdfs::create_dir_all(dir.join("bodies")).map_err(|err| HostlibError::Backend {
639        builtin: SNAPSHOT_BUILTIN,
640        message: format!("mkdir {}: {err}", dir.display()),
641    })?;
642    bundle.snapshots.push(state);
643    Ok(())
644}
645
646fn capture_path(
647    bundle: &mut SessionSnapshots,
648    session_id: &str,
649    snapshot_id: &str,
650    path: &Path,
651    root: &Path,
652) -> Result<Option<u64>, String> {
653    let snap_index = bundle
654        .snapshots
655        .iter()
656        .position(|snap| snap.snapshot_id == snapshot_id)
657        .ok_or_else(|| format!("snapshot `{snapshot_id}` is not registered"))?;
658    if bundle.snapshots[snap_index].entries.contains_key(path) {
659        return Ok(None);
660    }
661    let metadata = stdfs::symlink_metadata(path);
662    let (entry, byte_count) = match metadata {
663        Err(err) if err.kind() == std::io::ErrorKind::NotFound => (SnapshotEntry::Absent, 0u64),
664        Err(err) => {
665            return Err(format!("stat `{}`: {err}", path.display()));
666        }
667        Ok(metadata) if metadata.is_dir() => {
668            return Err(format!(
669                "snapshot of directory `{}` is not supported yet",
670                path.display()
671            ));
672        }
673        Ok(metadata) if metadata.file_type().is_symlink() => {
674            return Err(format!(
675                "snapshot of symlink `{}` is not supported yet",
676                path.display()
677            ));
678        }
679        Ok(metadata) => {
680            let bytes = stdfs::read(path)
681                .map_err(|err| format!("read `{}` for snapshot: {err}", path.display()))?;
682            let body_hash = hex::encode(Sha256::digest(&bytes));
683            let len = bytes.len() as u64;
684            store_body(root, session_id, snapshot_id, &body_hash, &bytes)?;
685            #[cfg(unix)]
686            let mode = {
687                use std::os::unix::fs::MetadataExt;
688                Some(metadata.mode())
689            };
690            #[cfg(not(unix))]
691            let mode = {
692                let _ = &metadata;
693                None
694            };
695            (
696                SnapshotEntry::File {
697                    body_hash,
698                    len,
699                    mode,
700                },
701                len,
702            )
703        }
704    };
705    let snap = &mut bundle.snapshots[snap_index];
706    snap.entries.insert(path.to_path_buf(), entry);
707    bundle.byte_count = bundle.byte_count.saturating_add(byte_count);
708    Ok(Some(byte_count))
709}
710
711fn store_body(
712    root: &Path,
713    session_id: &str,
714    snapshot_id: &str,
715    body_hash: &str,
716    bytes: &[u8],
717) -> Result<(), String> {
718    let bodies = snapshot_dir(root, session_id, snapshot_id).join("bodies");
719    stdfs::create_dir_all(&bodies).map_err(|err| format!("mkdir {}: {err}", bodies.display()))?;
720    let body_path = bodies.join(body_hash);
721    if !body_path.exists() {
722        atomic_write(&body_path, bytes)?;
723    }
724    Ok(())
725}
726
727fn restore_entry(state: &SnapshotState, path: &Path, entry: &SnapshotEntry) -> Result<(), String> {
728    match entry {
729        SnapshotEntry::Absent => match stdfs::symlink_metadata(path) {
730            Ok(metadata) if metadata.is_dir() => stdfs::remove_dir_all(path)
731                .map_err(|err| format!("remove_dir_all {}: {err}", path.display())),
732            Ok(_) => stdfs::remove_file(path)
733                .map_err(|err| format!("remove_file {}: {err}", path.display())),
734            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
735            Err(err) => Err(format!("stat {}: {err}", path.display())),
736        },
737        SnapshotEntry::File {
738            body_hash, mode, ..
739        } => {
740            let body_path = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id)
741                .join("bodies")
742                .join(body_hash);
743            let bytes = stdfs::read(&body_path)
744                .map_err(|err| format!("read snapshot body `{}`: {err}", body_path.display()))?;
745            atomic_write(path, &bytes)?;
746            #[cfg(unix)]
747            if let Some(bits) = mode {
748                use std::os::unix::fs::PermissionsExt;
749                let permissions = stdfs::Permissions::from_mode(*bits);
750                stdfs::set_permissions(path, permissions)
751                    .map_err(|err| format!("set_permissions `{}`: {err}", path.display()))?;
752            }
753            #[cfg(not(unix))]
754            let _ = mode;
755            Ok(())
756        }
757    }
758}
759
760fn persist_manifest(state: &SnapshotState) -> Result<(), String> {
761    let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
762    stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
763    let manifest = Manifest {
764        version: MANIFEST_VERSION,
765        snapshot_id: state.snapshot_id.clone(),
766        scope_id: state.scope_id.clone(),
767        session_id: state.session_id.clone(),
768        root: state.root.to_string_lossy().into_owned(),
769        taken_at_ms: state.taken_at_ms,
770        entries: state
771            .entries
772            .iter()
773            .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
774            .collect(),
775    };
776    let bytes = serde_json::to_vec_pretty(&manifest)
777        .map_err(|err| format!("serialize snapshot manifest: {err}"))?;
778    atomic_write(&dir.join("manifest.json"), &bytes)
779}
780
781fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
782    if let Some(parent) = path.parent() {
783        stdfs::create_dir_all(parent)
784            .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
785    }
786    let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
787    stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
788    match stdfs::rename(&tmp, path) {
789        Ok(()) => Ok(()),
790        Err(rename_err) => {
791            let _ = stdfs::remove_file(path);
792            stdfs::rename(&tmp, path).map_err(|retry| {
793                // Both renames failed; the temp file would otherwise linger
794                // and accumulate in the snapshot directory.
795                let _ = stdfs::remove_file(&tmp);
796                format!(
797                    "rename {} to {}: {rename_err}; retry: {retry}",
798                    tmp.display(),
799                    path.display()
800                )
801            })
802        }
803    }
804}
805
806/// Evict snapshots oldest-first until the session is back under its byte
807/// cap. `protected` names the snapshot currently being written (if any);
808/// it is never evicted, even when it alone exceeds the cap — otherwise the
809/// caller would lose the very snapshot it just captured (and `snapshot`
810/// would panic re-fetching it). A snapshot larger than the whole cap is
811/// therefore retained: rollback for an in-flight write takes precedence
812/// over the soft budget.
813fn enforce_byte_cap(bundle: &mut SessionSnapshots, session_id: &str, protected: Option<&str>) {
814    while bundle.byte_count > bundle.byte_cap {
815        let Some(idx) = bundle
816            .snapshots
817            .iter()
818            .position(|snap| Some(snap.snapshot_id.as_str()) != protected)
819        else {
820            break;
821        };
822        let evicted = bundle.snapshots.remove(idx);
823        bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&evicted));
824        tracing::info!(
825            "fs_snapshot: evicting snapshot `{}` from session `{session_id}` (over byte cap {})",
826            evicted.snapshot_id,
827            bundle.byte_cap,
828        );
829        remove_snapshot_dir(&evicted);
830    }
831}
832
833fn remove_snapshot_dir(state: &SnapshotState) {
834    let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
835    let _ = stdfs::remove_dir_all(&dir);
836}
837
838fn entry_byte_count(state: &SnapshotState) -> u64 {
839    state
840        .entries
841        .values()
842        .map(|entry| match entry {
843            SnapshotEntry::File { len, .. } => *len,
844            SnapshotEntry::Absent => 0,
845        })
846        .sum()
847}
848
849fn select_paths(state: &SnapshotState, paths: &[String]) -> Vec<PathBuf> {
850    if paths.is_empty() {
851        return state.entries.keys().cloned().collect();
852    }
853    let requested: BTreeSet<PathBuf> = paths
854        .iter()
855        .map(|path| normalize_logical(Path::new(path)))
856        .collect();
857    state
858        .entries
859        .keys()
860        .filter(|path| requested.contains(*path))
861        .cloned()
862        .collect()
863}
864
865fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
866    if session_id.trim().is_empty() {
867        return Err(HostlibError::InvalidParameter {
868            builtin,
869            param: "session_id",
870            message: "must not be empty".to_string(),
871        });
872    }
873    Ok(())
874}
875
876fn validate_scope_id(builtin: &'static str, scope_id: &str) -> Result<(), HostlibError> {
877    if scope_id.trim().is_empty() {
878        let param = match builtin {
879            SNAPSHOT_BUILTIN => "scope_id",
880            _ => "snapshot_id",
881        };
882        return Err(HostlibError::InvalidParameter {
883            builtin,
884            param,
885            message: "must not be empty".to_string(),
886        });
887    }
888    Ok(())
889}
890
891fn active_session_id() -> Option<String> {
892    harn_vm::agent_sessions::current_session_id().filter(|id| !id.trim().is_empty())
893}
894
895fn resolve_root(root: Option<&Path>) -> PathBuf {
896    match root {
897        Some(path) => normalize_logical(path),
898        None => normalize_logical(&std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))),
899    }
900}
901
902fn snapshot_dir(root: &Path, session_id: &str, snapshot_id: &str) -> PathBuf {
903    let mut dir = root.to_path_buf();
904    for component in STATE_REL {
905        dir.push(component);
906    }
907    dir.push(sanitize_component(session_id));
908    dir.push(sanitize_component(snapshot_id));
909    dir
910}
911
912fn sanitize_component(input: &str) -> String {
913    let sanitized: String = input
914        .chars()
915        .map(|ch| match ch {
916            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
917            _ => '_',
918        })
919        .collect();
920    if sanitized == input {
921        sanitized
922    } else {
923        let hash = hex::encode(Sha256::digest(input.as_bytes()));
924        format!("{sanitized}-{}", &hash[..12])
925    }
926}
927
928fn normalize_logical(path: &Path) -> PathBuf {
929    let absolute = if path.is_absolute() {
930        path.to_path_buf()
931    } else {
932        std::env::current_dir()
933            .unwrap_or_else(|_| PathBuf::from("."))
934            .join(path)
935    };
936    let mut out = PathBuf::new();
937    for component in absolute.components() {
938        match component {
939            Component::ParentDir => {
940                out.pop();
941            }
942            Component::CurDir => {}
943            other => out.push(other),
944        }
945    }
946    out
947}
948
949fn now_ms() -> i64 {
950    std::time::SystemTime::now()
951        .duration_since(std::time::UNIX_EPOCH)
952        .map(|duration| duration.as_millis() as i64)
953        .unwrap_or(0)
954}
955
956#[cfg(test)]
957mod tests {
958    use super::*;
959    use std::sync::atomic::{AtomicU64, Ordering};
960    use tempfile::TempDir;
961
962    /// Hand each test its own session id so the process-wide `SESSIONS`
963    /// map isolates them by key — no serialization or process-wide
964    /// reset required.
965    fn unique_session(prefix: &str) -> String {
966        static COUNTER: AtomicU64 = AtomicU64::new(0);
967        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
968        format!("{prefix}-{n}-{}", std::process::id())
969    }
970
971    fn unique_scope() -> String {
972        static COUNTER: AtomicU64 = AtomicU64::new(0);
973        format!("tc-{}", COUNTER.fetch_add(1, Ordering::Relaxed))
974    }
975
976    fn enter_session(id: &str) -> harn_vm::agent_sessions::CurrentSessionGuard {
977        harn_vm::agent_sessions::open_or_create(Some(id.to_string()));
978        harn_vm::agent_sessions::enter_current_session(id.to_string())
979    }
980
981    #[test]
982    fn explicit_snapshot_then_restore_round_trips_file_bytes() {
983        let dir = TempDir::new().unwrap();
984        let file = dir.path().join("note.txt");
985        stdfs::write(&file, b"v1").unwrap();
986        let session = unique_session("snap-roundtrip");
987        let scope = unique_scope();
988        let _session_guard = enter_session(&session);
989
990        let result = snapshot(
991            &session,
992            &scope,
993            &[file.to_string_lossy().into_owned()],
994            Some(dir.path()),
995        )
996        .unwrap();
997        assert_eq!(result.snapshot_id, scope);
998        assert_eq!(result.captured_paths.len(), 1);
999        assert_eq!(result.byte_count, 2);
1000
1001        stdfs::write(&file, b"clobbered").unwrap();
1002        let restored = restore(&session, &scope, &[]).unwrap();
1003        assert_eq!(restored.restored_paths.len(), 1);
1004        assert!(restored.skipped_paths_with_reasons.is_empty());
1005        assert_eq!(stdfs::read(&file).unwrap(), b"v1");
1006    }
1007
1008    #[test]
1009    fn restore_reinstates_deleted_file() {
1010        let dir = TempDir::new().unwrap();
1011        let file = dir.path().join("doomed.txt");
1012        stdfs::write(&file, b"alive").unwrap();
1013        let session = unique_session("snap-reinstate");
1014        let scope = unique_scope();
1015        let _session_guard = enter_session(&session);
1016
1017        snapshot(
1018            &session,
1019            &scope,
1020            &[file.to_string_lossy().into_owned()],
1021            Some(dir.path()),
1022        )
1023        .unwrap();
1024        stdfs::remove_file(&file).unwrap();
1025        assert!(!file.exists());
1026        let restored = restore(&session, &scope, &[]).unwrap();
1027        assert_eq!(restored.restored_paths.len(), 1);
1028        assert_eq!(stdfs::read(&file).unwrap(), b"alive");
1029    }
1030
1031    #[test]
1032    fn absent_snapshot_means_restore_deletes_paths_created_during_the_call() {
1033        let dir = TempDir::new().unwrap();
1034        let file = dir.path().join("new.txt");
1035        assert!(!file.exists());
1036        let session = unique_session("snap-absent");
1037        let scope = unique_scope();
1038        let _session_guard = enter_session(&session);
1039
1040        snapshot(
1041            &session,
1042            &scope,
1043            &[file.to_string_lossy().into_owned()],
1044            Some(dir.path()),
1045        )
1046        .unwrap();
1047        stdfs::write(&file, b"created during call").unwrap();
1048        let restored = restore(&session, &scope, &[]).unwrap();
1049        assert_eq!(restored.restored_paths.len(), 1);
1050        assert!(
1051            !file.exists(),
1052            "restore must delete files that the snapshot saw as absent"
1053        );
1054    }
1055
1056    #[test]
1057    fn list_and_drop_round_trip_through_metadata() {
1058        let dir = TempDir::new().unwrap();
1059        let file = dir.path().join("listed.txt");
1060        stdfs::write(&file, b"abc").unwrap();
1061        let session = unique_session("snap-list");
1062        let scope = unique_scope();
1063        let _session_guard = enter_session(&session);
1064
1065        snapshot(
1066            &session,
1067            &scope,
1068            &[file.to_string_lossy().into_owned()],
1069            Some(dir.path()),
1070        )
1071        .unwrap();
1072        let summaries = list_snapshots(&session).unwrap();
1073        assert_eq!(summaries.len(), 1);
1074        assert_eq!(summaries[0].snapshot_id, scope);
1075        assert_eq!(summaries[0].byte_count, 3);
1076
1077        let dropped = drop_snapshot(&session, &scope).unwrap();
1078        assert!(dropped.dropped);
1079        assert!(list_snapshots(&session).unwrap().is_empty());
1080
1081        let again = drop_snapshot(&session, &scope).unwrap();
1082        assert!(!again.dropped, "second drop must be idempotent");
1083    }
1084
1085    #[test]
1086    fn auto_capture_records_pre_image_keyed_by_current_tool_call_id() {
1087        let dir = TempDir::new().unwrap();
1088        let file = dir.path().join("auto.txt");
1089        stdfs::write(&file, b"pre").unwrap();
1090        let session = unique_session("snap-auto");
1091        let scope = unique_scope();
1092        let _session_guard = enter_session(&session);
1093        let _tool_guard = harn_vm::agent_sessions::enter_current_tool_call(scope.clone());
1094
1095        snapshot(&session, &scope, &[], Some(dir.path())).unwrap();
1096        auto_capture_for_write("hostlib_tools_write_file", &file);
1097        stdfs::write(&file, b"post").unwrap();
1098
1099        let restored = restore(&session, &scope, &[]).unwrap();
1100        assert_eq!(restored.restored_paths.len(), 1);
1101        assert_eq!(stdfs::read(&file).unwrap(), b"pre");
1102    }
1103
1104    #[test]
1105    fn auto_capture_records_session_changed_path_for_files_written_receipt() {
1106        let dir = TempDir::new().unwrap();
1107        let one = dir.path().join("a.txt");
1108        let two = dir.path().join("b.txt");
1109        let session = unique_session("snap-changed");
1110        harn_vm::agent_sessions::clear_session_changed_paths(&session);
1111        let _session_guard = enter_session(&session);
1112
1113        // No active tool call / open snapshot: the write still happened, so the
1114        // path must be recorded for the receipt regardless.
1115        auto_capture_for_write("hostlib_tools_write_file", &one);
1116        auto_capture_for_write("hostlib_tools_write_file", &two);
1117        // A duplicate write of the same path must dedupe.
1118        auto_capture_for_write("hostlib_tools_write_file", &one);
1119
1120        let changed = harn_vm::agent_sessions::session_changed_paths(&session);
1121        assert_eq!(changed.len(), 2, "two distinct paths recorded (deduped)");
1122        let expect_one = normalize_logical(&one).to_string_lossy().into_owned();
1123        let expect_two = normalize_logical(&two).to_string_lossy().into_owned();
1124        assert!(
1125            changed.contains(&expect_one),
1126            "path a recorded: {changed:?}"
1127        );
1128        assert!(
1129            changed.contains(&expect_two),
1130            "path b recorded: {changed:?}"
1131        );
1132
1133        // `take` drains so the receipt captures the set exactly once.
1134        let drained = harn_vm::agent_sessions::take_session_changed_paths(&session);
1135        assert_eq!(drained.len(), 2);
1136        assert!(
1137            harn_vm::agent_sessions::session_changed_paths(&session).is_empty(),
1138            "take drains the session's recorded paths"
1139        );
1140    }
1141
1142    #[test]
1143    fn byte_cap_evicts_oldest_snapshot_when_exceeded() {
1144        let dir = TempDir::new().unwrap();
1145        let session = unique_session("snap-evict");
1146        let _session_guard = enter_session(&session);
1147
1148        // Per-session cap: only affects this test's session, so other
1149        // tests can run in parallel without seeing the squeeze.
1150        configure_session_byte_cap(&session, 8);
1151
1152        let mk = |name: &str| {
1153            let path = dir.path().join(name);
1154            stdfs::write(&path, b"12345").unwrap();
1155            path
1156        };
1157
1158        let scope_a = unique_scope();
1159        let scope_b = unique_scope();
1160        let a = mk("a.txt");
1161        snapshot(
1162            &session,
1163            &scope_a,
1164            &[a.to_string_lossy().into_owned()],
1165            Some(dir.path()),
1166        )
1167        .unwrap();
1168        let b = mk("b.txt");
1169        snapshot(
1170            &session,
1171            &scope_b,
1172            &[b.to_string_lossy().into_owned()],
1173            Some(dir.path()),
1174        )
1175        .unwrap();
1176
1177        let ids: Vec<String> = list_snapshots(&session)
1178            .unwrap()
1179            .into_iter()
1180            .map(|summary| summary.snapshot_id)
1181            .collect();
1182        assert_eq!(
1183            ids,
1184            vec![scope_b],
1185            "older snapshot must be evicted when the per-session byte cap is exceeded"
1186        );
1187    }
1188
1189    #[test]
1190    fn snapshot_larger_than_cap_is_retained_not_evicted() {
1191        // A single snapshot whose captured bytes exceed the whole cap must
1192        // survive — evicting the snapshot we just took would lose rollback
1193        // for the in-flight write (and previously panicked re-fetching it).
1194        let dir = TempDir::new().unwrap();
1195        let session = unique_session("snap-oversized");
1196        let _session_guard = enter_session(&session);
1197        configure_session_byte_cap(&session, 4);
1198
1199        let scope = unique_scope();
1200        let file = dir.path().join("big.txt");
1201        stdfs::write(&file, b"0123456789").unwrap();
1202        let result = snapshot(
1203            &session,
1204            &scope,
1205            &[file.to_string_lossy().into_owned()],
1206            Some(dir.path()),
1207        )
1208        .unwrap();
1209        assert_eq!(result.byte_count, 10);
1210
1211        let ids: Vec<String> = list_snapshots(&session)
1212            .unwrap()
1213            .into_iter()
1214            .map(|summary| summary.snapshot_id)
1215            .collect();
1216        assert_eq!(
1217            ids,
1218            vec![scope],
1219            "an oversized snapshot must be retained rather than evicting itself"
1220        );
1221    }
1222
1223    #[test]
1224    fn drop_session_snapshots_removes_every_snapshot_for_a_session() {
1225        let dir = TempDir::new().unwrap();
1226        let file = dir.path().join("retained.txt");
1227        stdfs::write(&file, b"x").unwrap();
1228        let session = unique_session("snap-drop-session");
1229        let scope_a = unique_scope();
1230        let scope_b = unique_scope();
1231        let _session_guard = enter_session(&session);
1232
1233        snapshot(
1234            &session,
1235            &scope_a,
1236            &[file.to_string_lossy().into_owned()],
1237            Some(dir.path()),
1238        )
1239        .unwrap();
1240        snapshot(
1241            &session,
1242            &scope_b,
1243            &[file.to_string_lossy().into_owned()],
1244            Some(dir.path()),
1245        )
1246        .unwrap();
1247        assert_eq!(list_snapshots(&session).unwrap().len(), 2);
1248
1249        assert_eq!(drop_session_snapshots(&session), 2);
1250        assert!(list_snapshots(&session).unwrap().is_empty());
1251        assert_eq!(drop_session_snapshots(&session), 0, "idempotent");
1252    }
1253}