1use std::collections::{BTreeMap, BTreeSet};
33use std::fs as stdfs;
34use std::path::{Component, Path, PathBuf};
35use std::sync::Arc;
36use std::sync::{Mutex, OnceLock};
37
38use harn_vm::VmValue;
39use serde::{Deserialize, Serialize};
40use sha2::{Digest, Sha256};
41
42use crate::error::HostlibError;
43use crate::registry::{BuiltinRegistry, HostlibCapability};
44use crate::tools::args::{
45 build_dict, dict_arg, optional_string, optional_string_list, require_string, str_value,
46};
47
48const SNAPSHOT_BUILTIN: &str = "hostlib_fs_snapshot";
49const RESTORE_BUILTIN: &str = "hostlib_fs_restore";
50const LIST_BUILTIN: &str = "hostlib_fs_list_snapshots";
51const DROP_BUILTIN: &str = "hostlib_fs_drop_snapshot";
52
53const MANIFEST_VERSION: u32 = 1;
54const STATE_REL: &[&str] = &[".harn", "state", "snapshots"];
55
56pub const DEFAULT_SESSION_BYTE_CAP: u64 = 1024 * 1024 * 1024;
60
61#[derive(Default)]
63pub struct FsSnapshotCapability;
64
65impl HostlibCapability for FsSnapshotCapability {
66 fn module_name(&self) -> &'static str {
67 "fs"
71 }
72
73 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
74 registry.register_fn("fs", SNAPSHOT_BUILTIN, "snapshot", snapshot_builtin);
75 registry.register_fn("fs", RESTORE_BUILTIN, "restore", restore_builtin);
76 registry.register_fn("fs", LIST_BUILTIN, "list_snapshots", list_snapshots_builtin);
77 registry.register_fn("fs", DROP_BUILTIN, "drop_snapshot", drop_snapshot_builtin);
78 }
79}
80
81#[derive(Clone, Debug, Serialize, Deserialize)]
82#[serde(tag = "kind", rename_all = "snake_case")]
83enum SnapshotEntry {
84 File {
85 body_hash: String,
86 len: u64,
87 #[serde(default, skip_serializing_if = "Option::is_none")]
88 mode: Option<u32>,
89 },
90 Absent,
91}
92
93#[derive(Clone, Debug, Serialize, Deserialize)]
94struct Manifest {
95 version: u32,
96 snapshot_id: String,
97 scope_id: String,
98 session_id: String,
99 root: String,
100 taken_at_ms: i64,
101 entries: BTreeMap<String, SnapshotEntry>,
102}
103
104#[derive(Clone, Debug)]
105struct SnapshotState {
106 snapshot_id: String,
107 scope_id: String,
108 session_id: String,
109 root: PathBuf,
110 taken_at_ms: i64,
111 entries: BTreeMap<PathBuf, SnapshotEntry>,
113}
114
115#[derive(Clone, Debug)]
117pub struct SnapshotSummary {
118 pub snapshot_id: String,
120 pub scope_id: String,
122 pub taken_at_ms: i64,
124 pub captured_paths: Vec<String>,
126 pub byte_count: u64,
128}
129
130#[derive(Clone, Debug)]
132pub struct SnapshotResult {
133 pub snapshot_id: String,
135 pub captured_paths: Vec<String>,
137 pub byte_count: u64,
139}
140
141#[derive(Clone, Debug)]
143pub struct RestoreResult {
144 pub snapshot_id: String,
146 pub restored_paths: Vec<String>,
148 pub skipped_paths_with_reasons: Vec<(String, String)>,
150}
151
152#[derive(Clone, Debug)]
154pub struct DropResult {
155 pub snapshot_id: String,
157 pub dropped: bool,
159}
160
161#[derive(Debug)]
162struct SessionSnapshots {
163 snapshots: Vec<SnapshotState>,
165 byte_count: u64,
169 byte_cap: u64,
172}
173
174impl Default for SessionSnapshots {
175 fn default() -> Self {
176 Self {
177 snapshots: Vec::new(),
178 byte_count: 0,
179 byte_cap: DEFAULT_SESSION_BYTE_CAP,
180 }
181 }
182}
183
184static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionSnapshots>>> = OnceLock::new();
185
186fn sessions() -> &'static Mutex<BTreeMap<String, SessionSnapshots>> {
187 SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
188}
189
190pub fn configure_session_byte_cap(session_id: &str, bytes: u64) -> u64 {
197 let mut guard = sessions()
198 .lock()
199 .expect("fs_snapshot session mutex poisoned");
200 let bundle = guard.entry(session_id.to_string()).or_default();
201 let previous = bundle.byte_cap;
202 bundle.byte_cap = bytes.max(1);
203 enforce_byte_cap(bundle, session_id, None);
204 previous
205}
206
207pub fn drop_session_snapshots(session_id: &str) -> usize {
214 let mut guard = sessions()
215 .lock()
216 .expect("fs_snapshot session mutex poisoned");
217 let Some(bundle) = guard.remove(session_id) else {
218 return 0;
219 };
220 let count = bundle.snapshots.len();
221 for snapshot in &bundle.snapshots {
222 remove_snapshot_dir(snapshot);
223 }
224 count
225}
226
227pub fn reset_all_sessions() -> usize {
236 let mut guard = sessions()
237 .lock()
238 .expect("fs_snapshot session mutex poisoned");
239 let session_count = guard.len();
240 for bundle in guard.values() {
241 for snapshot in &bundle.snapshots {
242 remove_snapshot_dir(snapshot);
243 }
244 }
245 guard.clear();
246 session_count
247}
248
249#[cfg(test)]
251pub fn session_count() -> usize {
252 sessions()
253 .lock()
254 .expect("fs_snapshot session mutex poisoned")
255 .len()
256}
257
258pub fn snapshot(
262 session_id: &str,
263 scope_id: &str,
264 paths: &[String],
265 root: Option<&Path>,
266) -> Result<SnapshotResult, HostlibError> {
267 validate_session_id(SNAPSHOT_BUILTIN, session_id)?;
268 validate_scope_id(SNAPSHOT_BUILTIN, scope_id)?;
269 let root = resolve_root(root);
270 let mut guard = sessions()
271 .lock()
272 .expect("fs_snapshot session mutex poisoned");
273 let bundle = guard.entry(session_id.to_string()).or_default();
274 upsert_snapshot(bundle, session_id, scope_id, &root)?;
275 let mut captured_paths = Vec::new();
276 let mut byte_count = 0u64;
277 for raw in paths {
278 let path = normalize_logical(Path::new(raw));
279 let added =
280 capture_path(bundle, session_id, scope_id, &path, &root).map_err(|message| {
281 HostlibError::Backend {
282 builtin: SNAPSHOT_BUILTIN,
283 message,
284 }
285 })?;
286 if let Some(bytes) = added {
287 byte_count = byte_count.saturating_add(bytes);
288 captured_paths.push(path.to_string_lossy().into_owned());
289 }
290 }
291 enforce_byte_cap(bundle, session_id, Some(scope_id));
292 let state = bundle
293 .snapshots
294 .iter()
295 .find(|snap| snap.snapshot_id == scope_id)
296 .expect("snapshot just upserted is protected from byte-cap eviction");
297 persist_manifest(state).map_err(|err| HostlibError::Backend {
298 builtin: SNAPSHOT_BUILTIN,
299 message: err,
300 })?;
301 Ok(SnapshotResult {
302 snapshot_id: state.snapshot_id.clone(),
303 captured_paths,
304 byte_count,
305 })
306}
307
308pub fn restore(
310 session_id: &str,
311 snapshot_id: &str,
312 paths: &[String],
313) -> Result<RestoreResult, HostlibError> {
314 validate_session_id(RESTORE_BUILTIN, session_id)?;
315 validate_scope_id(RESTORE_BUILTIN, snapshot_id)?;
316 let mut guard = sessions()
317 .lock()
318 .expect("fs_snapshot session mutex poisoned");
319 let bundle = guard
320 .get_mut(session_id)
321 .ok_or_else(|| HostlibError::Backend {
322 builtin: RESTORE_BUILTIN,
323 message: format!("no snapshots registered for session `{session_id}`"),
324 })?;
325 let state = bundle
326 .snapshots
327 .iter()
328 .find(|snap| snap.snapshot_id == snapshot_id)
329 .cloned()
330 .ok_or_else(|| HostlibError::Backend {
331 builtin: RESTORE_BUILTIN,
332 message: format!("unknown snapshot `{snapshot_id}` for session `{session_id}`"),
333 })?;
334 let selected = select_paths(&state, paths);
335 let mut restored_paths = Vec::new();
336 let mut skipped_paths_with_reasons = Vec::new();
337 for path in selected {
338 let Some(entry) = state.entries.get(&path) else {
339 continue;
340 };
341 let label = path.to_string_lossy().into_owned();
342 match restore_entry(&state, &path, entry) {
343 Ok(()) => restored_paths.push(label),
344 Err(reason) => skipped_paths_with_reasons.push((label, reason)),
345 }
346 }
347 Ok(RestoreResult {
348 snapshot_id: snapshot_id.to_string(),
349 restored_paths,
350 skipped_paths_with_reasons,
351 })
352}
353
354pub fn list_snapshots(session_id: &str) -> Result<Vec<SnapshotSummary>, HostlibError> {
356 validate_session_id(LIST_BUILTIN, session_id)?;
357 let guard = sessions()
358 .lock()
359 .expect("fs_snapshot session mutex poisoned");
360 let Some(bundle) = guard.get(session_id) else {
361 return Ok(Vec::new());
362 };
363 let mut summaries: Vec<SnapshotSummary> = bundle
364 .snapshots
365 .iter()
366 .map(|state| SnapshotSummary {
367 snapshot_id: state.snapshot_id.clone(),
368 scope_id: state.scope_id.clone(),
369 taken_at_ms: state.taken_at_ms,
370 captured_paths: state
371 .entries
372 .keys()
373 .map(|path| path.to_string_lossy().into_owned())
374 .collect(),
375 byte_count: entry_byte_count(state),
376 })
377 .collect();
378 summaries.sort_by_key(|summary| summary.taken_at_ms);
379 Ok(summaries)
380}
381
382pub fn drop_snapshot(session_id: &str, snapshot_id: &str) -> Result<DropResult, HostlibError> {
384 validate_session_id(DROP_BUILTIN, session_id)?;
385 validate_scope_id(DROP_BUILTIN, snapshot_id)?;
386 let mut guard = sessions()
387 .lock()
388 .expect("fs_snapshot session mutex poisoned");
389 let Some(bundle) = guard.get_mut(session_id) else {
390 return Ok(DropResult {
391 snapshot_id: snapshot_id.to_string(),
392 dropped: false,
393 });
394 };
395 let position = bundle
396 .snapshots
397 .iter()
398 .position(|snap| snap.snapshot_id == snapshot_id);
399 let dropped = match position {
400 Some(idx) => {
401 let removed = bundle.snapshots.remove(idx);
402 bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&removed));
403 remove_snapshot_dir(&removed);
404 true
405 }
406 None => false,
407 };
408 Ok(DropResult {
409 snapshot_id: snapshot_id.to_string(),
410 dropped,
411 })
412}
413
414pub(crate) fn auto_capture_for_write(builtin: &'static str, path: &Path) {
422 let Some(session_id) = active_session_id() else {
423 return;
424 };
425 harn_vm::agent_sessions::record_session_changed_path(
431 &session_id,
432 normalize_logical(path).to_string_lossy().as_ref(),
433 );
434 let Some(snapshot_id) = harn_vm::agent_sessions::current_tool_call_id() else {
435 return;
436 };
437 let mut guard = sessions()
438 .lock()
439 .expect("fs_snapshot session mutex poisoned");
440 let bundle = guard.entry(session_id.clone()).or_default();
441 if !bundle
442 .snapshots
443 .iter()
444 .any(|snap| snap.snapshot_id == snapshot_id)
445 {
446 let root =
447 crate::fs::configured_session_root(&session_id).unwrap_or_else(|| resolve_root(None));
448 if let Err(error) = upsert_snapshot(bundle, &session_id, &snapshot_id, &root) {
449 tracing::warn!(
450 "fs_snapshot: failed to auto-open snapshot {snapshot_id} in session {session_id} (builtin={builtin}): {error}"
451 );
452 return;
453 }
454 }
455 let Some(snapshot) = bundle
456 .snapshots
457 .iter()
458 .find(|snap| snap.snapshot_id == snapshot_id)
459 else {
460 return;
461 };
462 let scope_id = snapshot.scope_id.clone();
463 let root = snapshot.root.clone();
464 let key = normalize_logical(path);
465 match capture_path(bundle, &session_id, &snapshot_id, &key, &root) {
466 Ok(_added) => {
467 if let Some(state) = bundle
468 .snapshots
469 .iter()
470 .find(|snap| snap.snapshot_id == snapshot_id)
471 {
472 if let Err(err) = persist_manifest(state) {
473 tracing::warn!(
474 "fs_snapshot: failed to persist manifest for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}"
475 );
476 }
477 }
478 }
479 Err(err) => {
480 tracing::warn!(
481 "fs_snapshot: failed to auto-capture `{}` for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}",
482 key.display()
483 );
484 }
485 }
486 enforce_byte_cap(bundle, &session_id, Some(&snapshot_id));
487}
488
489fn snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
490 let raw = dict_arg(SNAPSHOT_BUILTIN, args)?;
491 let dict = raw.as_ref();
492 let session_id = require_string(SNAPSHOT_BUILTIN, dict, "session_id")?;
493 let scope_id = require_string(SNAPSHOT_BUILTIN, dict, "scope_id")?;
494 let paths = optional_string_list(SNAPSHOT_BUILTIN, dict, "paths")?;
495 let root = optional_string(SNAPSHOT_BUILTIN, dict, "root")?.map(PathBuf::from);
496 let result = snapshot(&session_id, &scope_id, &paths, root.as_deref())?;
497 Ok(build_dict([
498 ("snapshot_id", str_value(&result.snapshot_id)),
499 (
500 "captured_paths",
501 VmValue::List(Arc::new(
502 result
503 .captured_paths
504 .into_iter()
505 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
506 .collect(),
507 )),
508 ),
509 ("byte_count", VmValue::Int(result.byte_count as i64)),
510 ]))
511}
512
513fn restore_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
514 let raw = dict_arg(RESTORE_BUILTIN, args)?;
515 let dict = raw.as_ref();
516 let session_id = require_string(RESTORE_BUILTIN, dict, "session_id")?;
517 let snapshot_id = require_string(RESTORE_BUILTIN, dict, "snapshot_id")?;
518 let paths = optional_string_list(RESTORE_BUILTIN, dict, "paths")?;
519 let result = restore(&session_id, &snapshot_id, &paths)?;
520 Ok(build_dict([
521 ("snapshot_id", str_value(&result.snapshot_id)),
522 (
523 "restored_paths",
524 VmValue::List(Arc::new(
525 result
526 .restored_paths
527 .into_iter()
528 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
529 .collect(),
530 )),
531 ),
532 (
533 "skipped_paths_with_reasons",
534 VmValue::List(Arc::new(
535 result
536 .skipped_paths_with_reasons
537 .into_iter()
538 .map(|(path, reason)| {
539 build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
540 })
541 .collect(),
542 )),
543 ),
544 ]))
545}
546
547fn list_snapshots_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
548 let raw = dict_arg(LIST_BUILTIN, args)?;
549 let dict = raw.as_ref();
550 let session_id = require_string(LIST_BUILTIN, dict, "session_id")?;
551 let summaries = list_snapshots(&session_id)?;
552 Ok(build_dict([(
553 "snapshots",
554 VmValue::List(Arc::new(
555 summaries.into_iter().map(snapshot_summary_value).collect(),
556 )),
557 )]))
558}
559
560fn drop_snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
561 let raw = dict_arg(DROP_BUILTIN, args)?;
562 let dict = raw.as_ref();
563 let session_id = require_string(DROP_BUILTIN, dict, "session_id")?;
564 let snapshot_id = require_string(DROP_BUILTIN, dict, "snapshot_id")?;
565 let result = drop_snapshot(&session_id, &snapshot_id)?;
566 Ok(build_dict([
567 ("snapshot_id", str_value(&result.snapshot_id)),
568 ("dropped", VmValue::Bool(result.dropped)),
569 ]))
570}
571
572fn snapshot_summary_value(summary: SnapshotSummary) -> VmValue {
573 build_dict([
574 ("snapshot_id", str_value(&summary.snapshot_id)),
575 ("scope_id", str_value(&summary.scope_id)),
576 ("taken_at_ms", VmValue::Int(summary.taken_at_ms)),
577 (
578 "captured_paths",
579 VmValue::List(Arc::new(
580 summary
581 .captured_paths
582 .into_iter()
583 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
584 .collect(),
585 )),
586 ),
587 ("byte_count", VmValue::Int(summary.byte_count as i64)),
588 ])
589}
590
591fn upsert_snapshot(
592 bundle: &mut SessionSnapshots,
593 session_id: &str,
594 scope_id: &str,
595 root: &Path,
596) -> Result<(), HostlibError> {
597 if bundle
598 .snapshots
599 .iter()
600 .any(|snap| snap.snapshot_id == scope_id)
601 {
602 return Ok(());
603 }
604 let state = SnapshotState {
605 snapshot_id: scope_id.to_string(),
606 scope_id: scope_id.to_string(),
607 session_id: session_id.to_string(),
608 root: root.to_path_buf(),
609 taken_at_ms: now_ms(),
610 entries: BTreeMap::new(),
611 };
612 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
613 stdfs::create_dir_all(dir.join("bodies")).map_err(|err| HostlibError::Backend {
614 builtin: SNAPSHOT_BUILTIN,
615 message: format!("mkdir {}: {err}", dir.display()),
616 })?;
617 bundle.snapshots.push(state);
618 Ok(())
619}
620
621fn capture_path(
622 bundle: &mut SessionSnapshots,
623 session_id: &str,
624 snapshot_id: &str,
625 path: &Path,
626 root: &Path,
627) -> Result<Option<u64>, String> {
628 let snap_index = bundle
629 .snapshots
630 .iter()
631 .position(|snap| snap.snapshot_id == snapshot_id)
632 .ok_or_else(|| format!("snapshot `{snapshot_id}` is not registered"))?;
633 if bundle.snapshots[snap_index].entries.contains_key(path) {
634 return Ok(None);
635 }
636 let metadata = stdfs::symlink_metadata(path);
637 let (entry, byte_count) = match metadata {
638 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (SnapshotEntry::Absent, 0u64),
639 Err(err) => {
640 return Err(format!("stat `{}`: {err}", path.display()));
641 }
642 Ok(metadata) if metadata.is_dir() => {
643 return Err(format!(
644 "snapshot of directory `{}` is not supported yet",
645 path.display()
646 ));
647 }
648 Ok(metadata) if metadata.file_type().is_symlink() => {
649 return Err(format!(
650 "snapshot of symlink `{}` is not supported yet",
651 path.display()
652 ));
653 }
654 Ok(metadata) => {
655 let bytes = stdfs::read(path)
656 .map_err(|err| format!("read `{}` for snapshot: {err}", path.display()))?;
657 let body_hash = hex::encode(Sha256::digest(&bytes));
658 let len = bytes.len() as u64;
659 store_body(root, session_id, snapshot_id, &body_hash, &bytes)?;
660 #[cfg(unix)]
661 let mode = {
662 use std::os::unix::fs::MetadataExt;
663 Some(metadata.mode())
664 };
665 #[cfg(not(unix))]
666 let mode = {
667 let _ = &metadata;
668 None
669 };
670 (
671 SnapshotEntry::File {
672 body_hash,
673 len,
674 mode,
675 },
676 len,
677 )
678 }
679 };
680 let snap = &mut bundle.snapshots[snap_index];
681 snap.entries.insert(path.to_path_buf(), entry);
682 bundle.byte_count = bundle.byte_count.saturating_add(byte_count);
683 Ok(Some(byte_count))
684}
685
686fn store_body(
687 root: &Path,
688 session_id: &str,
689 snapshot_id: &str,
690 body_hash: &str,
691 bytes: &[u8],
692) -> Result<(), String> {
693 let bodies = snapshot_dir(root, session_id, snapshot_id).join("bodies");
694 stdfs::create_dir_all(&bodies).map_err(|err| format!("mkdir {}: {err}", bodies.display()))?;
695 let body_path = bodies.join(body_hash);
696 if !body_path.exists() {
697 atomic_write(&body_path, bytes)?;
698 }
699 Ok(())
700}
701
702fn restore_entry(state: &SnapshotState, path: &Path, entry: &SnapshotEntry) -> Result<(), String> {
703 match entry {
704 SnapshotEntry::Absent => match stdfs::symlink_metadata(path) {
705 Ok(metadata) if metadata.is_dir() => stdfs::remove_dir_all(path)
706 .map_err(|err| format!("remove_dir_all {}: {err}", path.display())),
707 Ok(_) => stdfs::remove_file(path)
708 .map_err(|err| format!("remove_file {}: {err}", path.display())),
709 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
710 Err(err) => Err(format!("stat {}: {err}", path.display())),
711 },
712 SnapshotEntry::File {
713 body_hash, mode, ..
714 } => {
715 let body_path = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id)
716 .join("bodies")
717 .join(body_hash);
718 let bytes = stdfs::read(&body_path)
719 .map_err(|err| format!("read snapshot body `{}`: {err}", body_path.display()))?;
720 atomic_write(path, &bytes)?;
721 #[cfg(unix)]
722 if let Some(bits) = mode {
723 use std::os::unix::fs::PermissionsExt;
724 let permissions = stdfs::Permissions::from_mode(*bits);
725 stdfs::set_permissions(path, permissions)
726 .map_err(|err| format!("set_permissions `{}`: {err}", path.display()))?;
727 }
728 #[cfg(not(unix))]
729 let _ = mode;
730 Ok(())
731 }
732 }
733}
734
735fn persist_manifest(state: &SnapshotState) -> Result<(), String> {
736 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
737 stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
738 let manifest = Manifest {
739 version: MANIFEST_VERSION,
740 snapshot_id: state.snapshot_id.clone(),
741 scope_id: state.scope_id.clone(),
742 session_id: state.session_id.clone(),
743 root: state.root.to_string_lossy().into_owned(),
744 taken_at_ms: state.taken_at_ms,
745 entries: state
746 .entries
747 .iter()
748 .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
749 .collect(),
750 };
751 let bytes = serde_json::to_vec_pretty(&manifest)
752 .map_err(|err| format!("serialize snapshot manifest: {err}"))?;
753 atomic_write(&dir.join("manifest.json"), &bytes)
754}
755
756fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
757 if let Some(parent) = path.parent() {
758 stdfs::create_dir_all(parent)
759 .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
760 }
761 let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
762 stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
763 match stdfs::rename(&tmp, path) {
764 Ok(()) => Ok(()),
765 Err(rename_err) => {
766 let _ = stdfs::remove_file(path);
767 stdfs::rename(&tmp, path).map_err(|retry| {
768 let _ = stdfs::remove_file(&tmp);
771 format!(
772 "rename {} to {}: {rename_err}; retry: {retry}",
773 tmp.display(),
774 path.display()
775 )
776 })
777 }
778 }
779}
780
781fn enforce_byte_cap(bundle: &mut SessionSnapshots, session_id: &str, protected: Option<&str>) {
789 while bundle.byte_count > bundle.byte_cap {
790 let Some(idx) = bundle
791 .snapshots
792 .iter()
793 .position(|snap| Some(snap.snapshot_id.as_str()) != protected)
794 else {
795 break;
796 };
797 let evicted = bundle.snapshots.remove(idx);
798 bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&evicted));
799 tracing::info!(
800 "fs_snapshot: evicting snapshot `{}` from session `{session_id}` (over byte cap {})",
801 evicted.snapshot_id,
802 bundle.byte_cap,
803 );
804 remove_snapshot_dir(&evicted);
805 }
806}
807
808fn remove_snapshot_dir(state: &SnapshotState) {
809 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
810 let _ = stdfs::remove_dir_all(&dir);
811}
812
813fn entry_byte_count(state: &SnapshotState) -> u64 {
814 state
815 .entries
816 .values()
817 .map(|entry| match entry {
818 SnapshotEntry::File { len, .. } => *len,
819 SnapshotEntry::Absent => 0,
820 })
821 .sum()
822}
823
824fn select_paths(state: &SnapshotState, paths: &[String]) -> Vec<PathBuf> {
825 if paths.is_empty() {
826 return state.entries.keys().cloned().collect();
827 }
828 let requested: BTreeSet<PathBuf> = paths
829 .iter()
830 .map(|path| normalize_logical(Path::new(path)))
831 .collect();
832 state
833 .entries
834 .keys()
835 .filter(|path| requested.contains(*path))
836 .cloned()
837 .collect()
838}
839
840fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
841 if session_id.trim().is_empty() {
842 return Err(HostlibError::InvalidParameter {
843 builtin,
844 param: "session_id",
845 message: "must not be empty".to_string(),
846 });
847 }
848 Ok(())
849}
850
851fn validate_scope_id(builtin: &'static str, scope_id: &str) -> Result<(), HostlibError> {
852 if scope_id.trim().is_empty() {
853 let param = match builtin {
854 SNAPSHOT_BUILTIN => "scope_id",
855 _ => "snapshot_id",
856 };
857 return Err(HostlibError::InvalidParameter {
858 builtin,
859 param,
860 message: "must not be empty".to_string(),
861 });
862 }
863 Ok(())
864}
865
866fn active_session_id() -> Option<String> {
867 harn_vm::agent_sessions::current_session_id().filter(|id| !id.trim().is_empty())
868}
869
870fn resolve_root(root: Option<&Path>) -> PathBuf {
871 match root {
872 Some(path) => normalize_logical(path),
873 None => normalize_logical(&std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))),
874 }
875}
876
877fn snapshot_dir(root: &Path, session_id: &str, snapshot_id: &str) -> PathBuf {
878 let mut dir = root.to_path_buf();
879 for component in STATE_REL {
880 dir.push(component);
881 }
882 dir.push(sanitize_component(session_id));
883 dir.push(sanitize_component(snapshot_id));
884 dir
885}
886
887fn sanitize_component(input: &str) -> String {
888 let sanitized: String = input
889 .chars()
890 .map(|ch| match ch {
891 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
892 _ => '_',
893 })
894 .collect();
895 if sanitized == input {
896 sanitized
897 } else {
898 let hash = hex::encode(Sha256::digest(input.as_bytes()));
899 format!("{sanitized}-{}", &hash[..12])
900 }
901}
902
903fn normalize_logical(path: &Path) -> PathBuf {
904 let absolute = if path.is_absolute() {
905 path.to_path_buf()
906 } else {
907 std::env::current_dir()
908 .unwrap_or_else(|_| PathBuf::from("."))
909 .join(path)
910 };
911 let mut out = PathBuf::new();
912 for component in absolute.components() {
913 match component {
914 Component::ParentDir => {
915 out.pop();
916 }
917 Component::CurDir => {}
918 other => out.push(other),
919 }
920 }
921 out
922}
923
924fn now_ms() -> i64 {
925 std::time::SystemTime::now()
926 .duration_since(std::time::UNIX_EPOCH)
927 .map(|duration| duration.as_millis() as i64)
928 .unwrap_or(0)
929}
930
931#[cfg(test)]
932mod tests {
933 use super::*;
934 use std::sync::atomic::{AtomicU64, Ordering};
935 use tempfile::TempDir;
936
937 fn unique_session(prefix: &str) -> String {
941 static COUNTER: AtomicU64 = AtomicU64::new(0);
942 let n = COUNTER.fetch_add(1, Ordering::Relaxed);
943 format!("{prefix}-{n}-{}", std::process::id())
944 }
945
946 fn unique_scope() -> String {
947 static COUNTER: AtomicU64 = AtomicU64::new(0);
948 format!("tc-{}", COUNTER.fetch_add(1, Ordering::Relaxed))
949 }
950
951 fn enter_session(id: &str) -> harn_vm::agent_sessions::CurrentSessionGuard {
952 harn_vm::agent_sessions::open_or_create(Some(id.to_string()));
953 harn_vm::agent_sessions::enter_current_session(id.to_string())
954 }
955
956 #[test]
957 fn explicit_snapshot_then_restore_round_trips_file_bytes() {
958 let dir = TempDir::new().unwrap();
959 let file = dir.path().join("note.txt");
960 stdfs::write(&file, b"v1").unwrap();
961 let session = unique_session("snap-roundtrip");
962 let scope = unique_scope();
963 let _session_guard = enter_session(&session);
964
965 let result = snapshot(
966 &session,
967 &scope,
968 &[file.to_string_lossy().into_owned()],
969 Some(dir.path()),
970 )
971 .unwrap();
972 assert_eq!(result.snapshot_id, scope);
973 assert_eq!(result.captured_paths.len(), 1);
974 assert_eq!(result.byte_count, 2);
975
976 stdfs::write(&file, b"clobbered").unwrap();
977 let restored = restore(&session, &scope, &[]).unwrap();
978 assert_eq!(restored.restored_paths.len(), 1);
979 assert!(restored.skipped_paths_with_reasons.is_empty());
980 assert_eq!(stdfs::read(&file).unwrap(), b"v1");
981 }
982
983 #[test]
984 fn restore_reinstates_deleted_file() {
985 let dir = TempDir::new().unwrap();
986 let file = dir.path().join("doomed.txt");
987 stdfs::write(&file, b"alive").unwrap();
988 let session = unique_session("snap-reinstate");
989 let scope = unique_scope();
990 let _session_guard = enter_session(&session);
991
992 snapshot(
993 &session,
994 &scope,
995 &[file.to_string_lossy().into_owned()],
996 Some(dir.path()),
997 )
998 .unwrap();
999 stdfs::remove_file(&file).unwrap();
1000 assert!(!file.exists());
1001 let restored = restore(&session, &scope, &[]).unwrap();
1002 assert_eq!(restored.restored_paths.len(), 1);
1003 assert_eq!(stdfs::read(&file).unwrap(), b"alive");
1004 }
1005
1006 #[test]
1007 fn absent_snapshot_means_restore_deletes_paths_created_during_the_call() {
1008 let dir = TempDir::new().unwrap();
1009 let file = dir.path().join("new.txt");
1010 assert!(!file.exists());
1011 let session = unique_session("snap-absent");
1012 let scope = unique_scope();
1013 let _session_guard = enter_session(&session);
1014
1015 snapshot(
1016 &session,
1017 &scope,
1018 &[file.to_string_lossy().into_owned()],
1019 Some(dir.path()),
1020 )
1021 .unwrap();
1022 stdfs::write(&file, b"created during call").unwrap();
1023 let restored = restore(&session, &scope, &[]).unwrap();
1024 assert_eq!(restored.restored_paths.len(), 1);
1025 assert!(
1026 !file.exists(),
1027 "restore must delete files that the snapshot saw as absent"
1028 );
1029 }
1030
1031 #[test]
1032 fn list_and_drop_round_trip_through_metadata() {
1033 let dir = TempDir::new().unwrap();
1034 let file = dir.path().join("listed.txt");
1035 stdfs::write(&file, b"abc").unwrap();
1036 let session = unique_session("snap-list");
1037 let scope = unique_scope();
1038 let _session_guard = enter_session(&session);
1039
1040 snapshot(
1041 &session,
1042 &scope,
1043 &[file.to_string_lossy().into_owned()],
1044 Some(dir.path()),
1045 )
1046 .unwrap();
1047 let summaries = list_snapshots(&session).unwrap();
1048 assert_eq!(summaries.len(), 1);
1049 assert_eq!(summaries[0].snapshot_id, scope);
1050 assert_eq!(summaries[0].byte_count, 3);
1051
1052 let dropped = drop_snapshot(&session, &scope).unwrap();
1053 assert!(dropped.dropped);
1054 assert!(list_snapshots(&session).unwrap().is_empty());
1055
1056 let again = drop_snapshot(&session, &scope).unwrap();
1057 assert!(!again.dropped, "second drop must be idempotent");
1058 }
1059
1060 #[test]
1061 fn auto_capture_records_pre_image_keyed_by_current_tool_call_id() {
1062 let dir = TempDir::new().unwrap();
1063 let file = dir.path().join("auto.txt");
1064 stdfs::write(&file, b"pre").unwrap();
1065 let session = unique_session("snap-auto");
1066 let scope = unique_scope();
1067 let _session_guard = enter_session(&session);
1068 let _tool_guard = harn_vm::agent_sessions::enter_current_tool_call(scope.clone());
1069
1070 snapshot(&session, &scope, &[], Some(dir.path())).unwrap();
1071 auto_capture_for_write("hostlib_tools_write_file", &file);
1072 stdfs::write(&file, b"post").unwrap();
1073
1074 let restored = restore(&session, &scope, &[]).unwrap();
1075 assert_eq!(restored.restored_paths.len(), 1);
1076 assert_eq!(stdfs::read(&file).unwrap(), b"pre");
1077 }
1078
1079 #[test]
1080 fn auto_capture_records_session_changed_path_for_files_written_receipt() {
1081 let dir = TempDir::new().unwrap();
1082 let one = dir.path().join("a.txt");
1083 let two = dir.path().join("b.txt");
1084 let session = unique_session("snap-changed");
1085 harn_vm::agent_sessions::clear_session_changed_paths(&session);
1086 let _session_guard = enter_session(&session);
1087
1088 auto_capture_for_write("hostlib_tools_write_file", &one);
1091 auto_capture_for_write("hostlib_tools_write_file", &two);
1092 auto_capture_for_write("hostlib_tools_write_file", &one);
1094
1095 let changed = harn_vm::agent_sessions::session_changed_paths(&session);
1096 assert_eq!(changed.len(), 2, "two distinct paths recorded (deduped)");
1097 let expect_one = normalize_logical(&one).to_string_lossy().into_owned();
1098 let expect_two = normalize_logical(&two).to_string_lossy().into_owned();
1099 assert!(
1100 changed.contains(&expect_one),
1101 "path a recorded: {changed:?}"
1102 );
1103 assert!(
1104 changed.contains(&expect_two),
1105 "path b recorded: {changed:?}"
1106 );
1107
1108 let drained = harn_vm::agent_sessions::take_session_changed_paths(&session);
1110 assert_eq!(drained.len(), 2);
1111 assert!(
1112 harn_vm::agent_sessions::session_changed_paths(&session).is_empty(),
1113 "take drains the session's recorded paths"
1114 );
1115 }
1116
1117 #[test]
1118 fn byte_cap_evicts_oldest_snapshot_when_exceeded() {
1119 let dir = TempDir::new().unwrap();
1120 let session = unique_session("snap-evict");
1121 let _session_guard = enter_session(&session);
1122
1123 configure_session_byte_cap(&session, 8);
1126
1127 let mk = |name: &str| {
1128 let path = dir.path().join(name);
1129 stdfs::write(&path, b"12345").unwrap();
1130 path
1131 };
1132
1133 let scope_a = unique_scope();
1134 let scope_b = unique_scope();
1135 let a = mk("a.txt");
1136 snapshot(
1137 &session,
1138 &scope_a,
1139 &[a.to_string_lossy().into_owned()],
1140 Some(dir.path()),
1141 )
1142 .unwrap();
1143 let b = mk("b.txt");
1144 snapshot(
1145 &session,
1146 &scope_b,
1147 &[b.to_string_lossy().into_owned()],
1148 Some(dir.path()),
1149 )
1150 .unwrap();
1151
1152 let ids: Vec<String> = list_snapshots(&session)
1153 .unwrap()
1154 .into_iter()
1155 .map(|summary| summary.snapshot_id)
1156 .collect();
1157 assert_eq!(
1158 ids,
1159 vec![scope_b],
1160 "older snapshot must be evicted when the per-session byte cap is exceeded"
1161 );
1162 }
1163
1164 #[test]
1165 fn snapshot_larger_than_cap_is_retained_not_evicted() {
1166 let dir = TempDir::new().unwrap();
1170 let session = unique_session("snap-oversized");
1171 let _session_guard = enter_session(&session);
1172 configure_session_byte_cap(&session, 4);
1173
1174 let scope = unique_scope();
1175 let file = dir.path().join("big.txt");
1176 stdfs::write(&file, b"0123456789").unwrap();
1177 let result = snapshot(
1178 &session,
1179 &scope,
1180 &[file.to_string_lossy().into_owned()],
1181 Some(dir.path()),
1182 )
1183 .unwrap();
1184 assert_eq!(result.byte_count, 10);
1185
1186 let ids: Vec<String> = list_snapshots(&session)
1187 .unwrap()
1188 .into_iter()
1189 .map(|summary| summary.snapshot_id)
1190 .collect();
1191 assert_eq!(
1192 ids,
1193 vec![scope],
1194 "an oversized snapshot must be retained rather than evicting itself"
1195 );
1196 }
1197
1198 #[test]
1199 fn drop_session_snapshots_removes_every_snapshot_for_a_session() {
1200 let dir = TempDir::new().unwrap();
1201 let file = dir.path().join("retained.txt");
1202 stdfs::write(&file, b"x").unwrap();
1203 let session = unique_session("snap-drop-session");
1204 let scope_a = unique_scope();
1205 let scope_b = unique_scope();
1206 let _session_guard = enter_session(&session);
1207
1208 snapshot(
1209 &session,
1210 &scope_a,
1211 &[file.to_string_lossy().into_owned()],
1212 Some(dir.path()),
1213 )
1214 .unwrap();
1215 snapshot(
1216 &session,
1217 &scope_b,
1218 &[file.to_string_lossy().into_owned()],
1219 Some(dir.path()),
1220 )
1221 .unwrap();
1222 assert_eq!(list_snapshots(&session).unwrap().len(), 2);
1223
1224 assert_eq!(drop_session_snapshots(&session), 2);
1225 assert!(list_snapshots(&session).unwrap().is_empty());
1226 assert_eq!(drop_session_snapshots(&session), 0, "idempotent");
1227 }
1228}