1use std::collections::{BTreeMap, BTreeSet};
33use std::fs as stdfs;
34use std::path::{Component, Path, PathBuf};
35use std::sync::Arc;
36use std::sync::{Mutex, OnceLock};
37
38use harn_vm::VmValue;
39use serde::{Deserialize, Serialize};
40use sha2::{Digest, Sha256};
41
42use crate::error::HostlibError;
43use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
44use crate::tools::args::{
45 build_dict, dict_arg, optional_string, optional_string_list, require_string, str_value,
46};
47
48const SNAPSHOT_BUILTIN: &str = "hostlib_fs_snapshot";
49const RESTORE_BUILTIN: &str = "hostlib_fs_restore";
50const LIST_BUILTIN: &str = "hostlib_fs_list_snapshots";
51const DROP_BUILTIN: &str = "hostlib_fs_drop_snapshot";
52
53const MANIFEST_VERSION: u32 = 1;
54const STATE_REL: &[&str] = &[".harn", "state", "snapshots"];
55
56pub const DEFAULT_SESSION_BYTE_CAP: u64 = 1024 * 1024 * 1024;
60
61#[derive(Default)]
63pub struct FsSnapshotCapability;
64
65impl HostlibCapability for FsSnapshotCapability {
66 fn module_name(&self) -> &'static str {
67 "fs"
71 }
72
73 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
74 register(registry, SNAPSHOT_BUILTIN, "snapshot", snapshot_builtin);
75 register(registry, RESTORE_BUILTIN, "restore", restore_builtin);
76 register(
77 registry,
78 LIST_BUILTIN,
79 "list_snapshots",
80 list_snapshots_builtin,
81 );
82 register(
83 registry,
84 DROP_BUILTIN,
85 "drop_snapshot",
86 drop_snapshot_builtin,
87 );
88 }
89}
90
91fn register(
92 registry: &mut BuiltinRegistry,
93 name: &'static str,
94 method: &'static str,
95 runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
96) {
97 let handler: SyncHandler = std::sync::Arc::new(runner);
98 registry.register(RegisteredBuiltin {
99 name,
100 module: "fs",
101 method,
102 handler,
103 });
104}
105
106#[derive(Clone, Debug, Serialize, Deserialize)]
107#[serde(tag = "kind", rename_all = "snake_case")]
108enum SnapshotEntry {
109 File {
110 body_hash: String,
111 len: u64,
112 #[serde(default, skip_serializing_if = "Option::is_none")]
113 mode: Option<u32>,
114 },
115 Absent,
116}
117
118#[derive(Clone, Debug, Serialize, Deserialize)]
119struct Manifest {
120 version: u32,
121 snapshot_id: String,
122 scope_id: String,
123 session_id: String,
124 root: String,
125 taken_at_ms: i64,
126 entries: BTreeMap<String, SnapshotEntry>,
127}
128
129#[derive(Clone, Debug)]
130struct SnapshotState {
131 snapshot_id: String,
132 scope_id: String,
133 session_id: String,
134 root: PathBuf,
135 taken_at_ms: i64,
136 entries: BTreeMap<PathBuf, SnapshotEntry>,
138}
139
140#[derive(Clone, Debug)]
142pub struct SnapshotSummary {
143 pub snapshot_id: String,
145 pub scope_id: String,
147 pub taken_at_ms: i64,
149 pub captured_paths: Vec<String>,
151 pub byte_count: u64,
153}
154
155#[derive(Clone, Debug)]
157pub struct SnapshotResult {
158 pub snapshot_id: String,
160 pub captured_paths: Vec<String>,
162 pub byte_count: u64,
164}
165
166#[derive(Clone, Debug)]
168pub struct RestoreResult {
169 pub snapshot_id: String,
171 pub restored_paths: Vec<String>,
173 pub skipped_paths_with_reasons: Vec<(String, String)>,
175}
176
177#[derive(Clone, Debug)]
179pub struct DropResult {
180 pub snapshot_id: String,
182 pub dropped: bool,
184}
185
186#[derive(Debug)]
187struct SessionSnapshots {
188 snapshots: Vec<SnapshotState>,
190 byte_count: u64,
194 byte_cap: u64,
197}
198
199impl Default for SessionSnapshots {
200 fn default() -> Self {
201 Self {
202 snapshots: Vec::new(),
203 byte_count: 0,
204 byte_cap: DEFAULT_SESSION_BYTE_CAP,
205 }
206 }
207}
208
209static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionSnapshots>>> = OnceLock::new();
210
211fn sessions() -> &'static Mutex<BTreeMap<String, SessionSnapshots>> {
212 SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
213}
214
215pub fn configure_session_byte_cap(session_id: &str, bytes: u64) -> u64 {
222 let mut guard = sessions()
223 .lock()
224 .expect("fs_snapshot session mutex poisoned");
225 let bundle = guard.entry(session_id.to_string()).or_default();
226 let previous = bundle.byte_cap;
227 bundle.byte_cap = bytes.max(1);
228 enforce_byte_cap(bundle, session_id, None);
229 previous
230}
231
232pub fn drop_session_snapshots(session_id: &str) -> usize {
239 let mut guard = sessions()
240 .lock()
241 .expect("fs_snapshot session mutex poisoned");
242 let Some(bundle) = guard.remove(session_id) else {
243 return 0;
244 };
245 let count = bundle.snapshots.len();
246 for snapshot in &bundle.snapshots {
247 remove_snapshot_dir(snapshot);
248 }
249 count
250}
251
252pub fn reset_all_sessions() -> usize {
261 let mut guard = sessions()
262 .lock()
263 .expect("fs_snapshot session mutex poisoned");
264 let session_count = guard.len();
265 for bundle in guard.values() {
266 for snapshot in &bundle.snapshots {
267 remove_snapshot_dir(snapshot);
268 }
269 }
270 guard.clear();
271 session_count
272}
273
274#[cfg(test)]
276pub fn session_count() -> usize {
277 sessions()
278 .lock()
279 .expect("fs_snapshot session mutex poisoned")
280 .len()
281}
282
283pub fn snapshot(
287 session_id: &str,
288 scope_id: &str,
289 paths: &[String],
290 root: Option<&Path>,
291) -> Result<SnapshotResult, HostlibError> {
292 validate_session_id(SNAPSHOT_BUILTIN, session_id)?;
293 validate_scope_id(SNAPSHOT_BUILTIN, scope_id)?;
294 let root = resolve_root(root);
295 let mut guard = sessions()
296 .lock()
297 .expect("fs_snapshot session mutex poisoned");
298 let bundle = guard.entry(session_id.to_string()).or_default();
299 upsert_snapshot(bundle, session_id, scope_id, &root)?;
300 let mut captured_paths = Vec::new();
301 let mut byte_count = 0u64;
302 for raw in paths {
303 let path = normalize_logical(Path::new(raw));
304 let added =
305 capture_path(bundle, session_id, scope_id, &path, &root).map_err(|message| {
306 HostlibError::Backend {
307 builtin: SNAPSHOT_BUILTIN,
308 message,
309 }
310 })?;
311 if let Some(bytes) = added {
312 byte_count = byte_count.saturating_add(bytes);
313 captured_paths.push(path.to_string_lossy().into_owned());
314 }
315 }
316 enforce_byte_cap(bundle, session_id, Some(scope_id));
317 let state = bundle
318 .snapshots
319 .iter()
320 .find(|snap| snap.snapshot_id == scope_id)
321 .expect("snapshot just upserted is protected from byte-cap eviction");
322 persist_manifest(state).map_err(|err| HostlibError::Backend {
323 builtin: SNAPSHOT_BUILTIN,
324 message: err,
325 })?;
326 Ok(SnapshotResult {
327 snapshot_id: state.snapshot_id.clone(),
328 captured_paths,
329 byte_count,
330 })
331}
332
333pub fn restore(
335 session_id: &str,
336 snapshot_id: &str,
337 paths: &[String],
338) -> Result<RestoreResult, HostlibError> {
339 validate_session_id(RESTORE_BUILTIN, session_id)?;
340 validate_scope_id(RESTORE_BUILTIN, snapshot_id)?;
341 let mut guard = sessions()
342 .lock()
343 .expect("fs_snapshot session mutex poisoned");
344 let bundle = guard
345 .get_mut(session_id)
346 .ok_or_else(|| HostlibError::Backend {
347 builtin: RESTORE_BUILTIN,
348 message: format!("no snapshots registered for session `{session_id}`"),
349 })?;
350 let state = bundle
351 .snapshots
352 .iter()
353 .find(|snap| snap.snapshot_id == snapshot_id)
354 .cloned()
355 .ok_or_else(|| HostlibError::Backend {
356 builtin: RESTORE_BUILTIN,
357 message: format!("unknown snapshot `{snapshot_id}` for session `{session_id}`"),
358 })?;
359 let selected = select_paths(&state, paths);
360 let mut restored_paths = Vec::new();
361 let mut skipped_paths_with_reasons = Vec::new();
362 for path in selected {
363 let Some(entry) = state.entries.get(&path) else {
364 continue;
365 };
366 let label = path.to_string_lossy().into_owned();
367 match restore_entry(&state, &path, entry) {
368 Ok(()) => restored_paths.push(label),
369 Err(reason) => skipped_paths_with_reasons.push((label, reason)),
370 }
371 }
372 Ok(RestoreResult {
373 snapshot_id: snapshot_id.to_string(),
374 restored_paths,
375 skipped_paths_with_reasons,
376 })
377}
378
379pub fn list_snapshots(session_id: &str) -> Result<Vec<SnapshotSummary>, HostlibError> {
381 validate_session_id(LIST_BUILTIN, session_id)?;
382 let guard = sessions()
383 .lock()
384 .expect("fs_snapshot session mutex poisoned");
385 let Some(bundle) = guard.get(session_id) else {
386 return Ok(Vec::new());
387 };
388 let mut summaries: Vec<SnapshotSummary> = bundle
389 .snapshots
390 .iter()
391 .map(|state| SnapshotSummary {
392 snapshot_id: state.snapshot_id.clone(),
393 scope_id: state.scope_id.clone(),
394 taken_at_ms: state.taken_at_ms,
395 captured_paths: state
396 .entries
397 .keys()
398 .map(|path| path.to_string_lossy().into_owned())
399 .collect(),
400 byte_count: entry_byte_count(state),
401 })
402 .collect();
403 summaries.sort_by_key(|summary| summary.taken_at_ms);
404 Ok(summaries)
405}
406
407pub fn drop_snapshot(session_id: &str, snapshot_id: &str) -> Result<DropResult, HostlibError> {
409 validate_session_id(DROP_BUILTIN, session_id)?;
410 validate_scope_id(DROP_BUILTIN, snapshot_id)?;
411 let mut guard = sessions()
412 .lock()
413 .expect("fs_snapshot session mutex poisoned");
414 let Some(bundle) = guard.get_mut(session_id) else {
415 return Ok(DropResult {
416 snapshot_id: snapshot_id.to_string(),
417 dropped: false,
418 });
419 };
420 let position = bundle
421 .snapshots
422 .iter()
423 .position(|snap| snap.snapshot_id == snapshot_id);
424 let dropped = match position {
425 Some(idx) => {
426 let removed = bundle.snapshots.remove(idx);
427 bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&removed));
428 remove_snapshot_dir(&removed);
429 true
430 }
431 None => false,
432 };
433 Ok(DropResult {
434 snapshot_id: snapshot_id.to_string(),
435 dropped,
436 })
437}
438
439pub(crate) fn auto_capture_for_write(builtin: &'static str, path: &Path) {
447 let Some(session_id) = active_session_id() else {
448 return;
449 };
450 harn_vm::agent_sessions::record_session_changed_path(
456 &session_id,
457 normalize_logical(path).to_string_lossy().as_ref(),
458 );
459 let Some(snapshot_id) = harn_vm::agent_sessions::current_tool_call_id() else {
460 return;
461 };
462 let mut guard = sessions()
463 .lock()
464 .expect("fs_snapshot session mutex poisoned");
465 let bundle = guard.entry(session_id.clone()).or_default();
466 if !bundle
467 .snapshots
468 .iter()
469 .any(|snap| snap.snapshot_id == snapshot_id)
470 {
471 let root =
472 crate::fs::configured_session_root(&session_id).unwrap_or_else(|| resolve_root(None));
473 if let Err(error) = upsert_snapshot(bundle, &session_id, &snapshot_id, &root) {
474 tracing::warn!(
475 "fs_snapshot: failed to auto-open snapshot {snapshot_id} in session {session_id} (builtin={builtin}): {error}"
476 );
477 return;
478 }
479 }
480 let Some(snapshot) = bundle
481 .snapshots
482 .iter()
483 .find(|snap| snap.snapshot_id == snapshot_id)
484 else {
485 return;
486 };
487 let scope_id = snapshot.scope_id.clone();
488 let root = snapshot.root.clone();
489 let key = normalize_logical(path);
490 match capture_path(bundle, &session_id, &snapshot_id, &key, &root) {
491 Ok(_added) => {
492 if let Some(state) = bundle
493 .snapshots
494 .iter()
495 .find(|snap| snap.snapshot_id == snapshot_id)
496 {
497 if let Err(err) = persist_manifest(state) {
498 tracing::warn!(
499 "fs_snapshot: failed to persist manifest for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}"
500 );
501 }
502 }
503 }
504 Err(err) => {
505 tracing::warn!(
506 "fs_snapshot: failed to auto-capture `{}` for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}",
507 key.display()
508 );
509 }
510 }
511 enforce_byte_cap(bundle, &session_id, Some(&snapshot_id));
512}
513
514fn snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
515 let raw = dict_arg(SNAPSHOT_BUILTIN, args)?;
516 let dict = raw.as_ref();
517 let session_id = require_string(SNAPSHOT_BUILTIN, dict, "session_id")?;
518 let scope_id = require_string(SNAPSHOT_BUILTIN, dict, "scope_id")?;
519 let paths = optional_string_list(SNAPSHOT_BUILTIN, dict, "paths")?;
520 let root = optional_string(SNAPSHOT_BUILTIN, dict, "root")?.map(PathBuf::from);
521 let result = snapshot(&session_id, &scope_id, &paths, root.as_deref())?;
522 Ok(build_dict([
523 ("snapshot_id", str_value(&result.snapshot_id)),
524 (
525 "captured_paths",
526 VmValue::List(Arc::new(
527 result
528 .captured_paths
529 .into_iter()
530 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
531 .collect(),
532 )),
533 ),
534 ("byte_count", VmValue::Int(result.byte_count as i64)),
535 ]))
536}
537
538fn restore_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
539 let raw = dict_arg(RESTORE_BUILTIN, args)?;
540 let dict = raw.as_ref();
541 let session_id = require_string(RESTORE_BUILTIN, dict, "session_id")?;
542 let snapshot_id = require_string(RESTORE_BUILTIN, dict, "snapshot_id")?;
543 let paths = optional_string_list(RESTORE_BUILTIN, dict, "paths")?;
544 let result = restore(&session_id, &snapshot_id, &paths)?;
545 Ok(build_dict([
546 ("snapshot_id", str_value(&result.snapshot_id)),
547 (
548 "restored_paths",
549 VmValue::List(Arc::new(
550 result
551 .restored_paths
552 .into_iter()
553 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
554 .collect(),
555 )),
556 ),
557 (
558 "skipped_paths_with_reasons",
559 VmValue::List(Arc::new(
560 result
561 .skipped_paths_with_reasons
562 .into_iter()
563 .map(|(path, reason)| {
564 build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
565 })
566 .collect(),
567 )),
568 ),
569 ]))
570}
571
572fn list_snapshots_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
573 let raw = dict_arg(LIST_BUILTIN, args)?;
574 let dict = raw.as_ref();
575 let session_id = require_string(LIST_BUILTIN, dict, "session_id")?;
576 let summaries = list_snapshots(&session_id)?;
577 Ok(build_dict([(
578 "snapshots",
579 VmValue::List(Arc::new(
580 summaries.into_iter().map(snapshot_summary_value).collect(),
581 )),
582 )]))
583}
584
585fn drop_snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
586 let raw = dict_arg(DROP_BUILTIN, args)?;
587 let dict = raw.as_ref();
588 let session_id = require_string(DROP_BUILTIN, dict, "session_id")?;
589 let snapshot_id = require_string(DROP_BUILTIN, dict, "snapshot_id")?;
590 let result = drop_snapshot(&session_id, &snapshot_id)?;
591 Ok(build_dict([
592 ("snapshot_id", str_value(&result.snapshot_id)),
593 ("dropped", VmValue::Bool(result.dropped)),
594 ]))
595}
596
597fn snapshot_summary_value(summary: SnapshotSummary) -> VmValue {
598 build_dict([
599 ("snapshot_id", str_value(&summary.snapshot_id)),
600 ("scope_id", str_value(&summary.scope_id)),
601 ("taken_at_ms", VmValue::Int(summary.taken_at_ms)),
602 (
603 "captured_paths",
604 VmValue::List(Arc::new(
605 summary
606 .captured_paths
607 .into_iter()
608 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
609 .collect(),
610 )),
611 ),
612 ("byte_count", VmValue::Int(summary.byte_count as i64)),
613 ])
614}
615
616fn upsert_snapshot(
617 bundle: &mut SessionSnapshots,
618 session_id: &str,
619 scope_id: &str,
620 root: &Path,
621) -> Result<(), HostlibError> {
622 if bundle
623 .snapshots
624 .iter()
625 .any(|snap| snap.snapshot_id == scope_id)
626 {
627 return Ok(());
628 }
629 let state = SnapshotState {
630 snapshot_id: scope_id.to_string(),
631 scope_id: scope_id.to_string(),
632 session_id: session_id.to_string(),
633 root: root.to_path_buf(),
634 taken_at_ms: now_ms(),
635 entries: BTreeMap::new(),
636 };
637 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
638 stdfs::create_dir_all(dir.join("bodies")).map_err(|err| HostlibError::Backend {
639 builtin: SNAPSHOT_BUILTIN,
640 message: format!("mkdir {}: {err}", dir.display()),
641 })?;
642 bundle.snapshots.push(state);
643 Ok(())
644}
645
646fn capture_path(
647 bundle: &mut SessionSnapshots,
648 session_id: &str,
649 snapshot_id: &str,
650 path: &Path,
651 root: &Path,
652) -> Result<Option<u64>, String> {
653 let snap_index = bundle
654 .snapshots
655 .iter()
656 .position(|snap| snap.snapshot_id == snapshot_id)
657 .ok_or_else(|| format!("snapshot `{snapshot_id}` is not registered"))?;
658 if bundle.snapshots[snap_index].entries.contains_key(path) {
659 return Ok(None);
660 }
661 let metadata = stdfs::symlink_metadata(path);
662 let (entry, byte_count) = match metadata {
663 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (SnapshotEntry::Absent, 0u64),
664 Err(err) => {
665 return Err(format!("stat `{}`: {err}", path.display()));
666 }
667 Ok(metadata) if metadata.is_dir() => {
668 return Err(format!(
669 "snapshot of directory `{}` is not supported yet",
670 path.display()
671 ));
672 }
673 Ok(metadata) if metadata.file_type().is_symlink() => {
674 return Err(format!(
675 "snapshot of symlink `{}` is not supported yet",
676 path.display()
677 ));
678 }
679 Ok(metadata) => {
680 let bytes = stdfs::read(path)
681 .map_err(|err| format!("read `{}` for snapshot: {err}", path.display()))?;
682 let body_hash = hex::encode(Sha256::digest(&bytes));
683 let len = bytes.len() as u64;
684 store_body(root, session_id, snapshot_id, &body_hash, &bytes)?;
685 #[cfg(unix)]
686 let mode = {
687 use std::os::unix::fs::MetadataExt;
688 Some(metadata.mode())
689 };
690 #[cfg(not(unix))]
691 let mode = {
692 let _ = &metadata;
693 None
694 };
695 (
696 SnapshotEntry::File {
697 body_hash,
698 len,
699 mode,
700 },
701 len,
702 )
703 }
704 };
705 let snap = &mut bundle.snapshots[snap_index];
706 snap.entries.insert(path.to_path_buf(), entry);
707 bundle.byte_count = bundle.byte_count.saturating_add(byte_count);
708 Ok(Some(byte_count))
709}
710
711fn store_body(
712 root: &Path,
713 session_id: &str,
714 snapshot_id: &str,
715 body_hash: &str,
716 bytes: &[u8],
717) -> Result<(), String> {
718 let bodies = snapshot_dir(root, session_id, snapshot_id).join("bodies");
719 stdfs::create_dir_all(&bodies).map_err(|err| format!("mkdir {}: {err}", bodies.display()))?;
720 let body_path = bodies.join(body_hash);
721 if !body_path.exists() {
722 atomic_write(&body_path, bytes)?;
723 }
724 Ok(())
725}
726
727fn restore_entry(state: &SnapshotState, path: &Path, entry: &SnapshotEntry) -> Result<(), String> {
728 match entry {
729 SnapshotEntry::Absent => match stdfs::symlink_metadata(path) {
730 Ok(metadata) if metadata.is_dir() => stdfs::remove_dir_all(path)
731 .map_err(|err| format!("remove_dir_all {}: {err}", path.display())),
732 Ok(_) => stdfs::remove_file(path)
733 .map_err(|err| format!("remove_file {}: {err}", path.display())),
734 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
735 Err(err) => Err(format!("stat {}: {err}", path.display())),
736 },
737 SnapshotEntry::File {
738 body_hash, mode, ..
739 } => {
740 let body_path = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id)
741 .join("bodies")
742 .join(body_hash);
743 let bytes = stdfs::read(&body_path)
744 .map_err(|err| format!("read snapshot body `{}`: {err}", body_path.display()))?;
745 atomic_write(path, &bytes)?;
746 #[cfg(unix)]
747 if let Some(bits) = mode {
748 use std::os::unix::fs::PermissionsExt;
749 let permissions = stdfs::Permissions::from_mode(*bits);
750 stdfs::set_permissions(path, permissions)
751 .map_err(|err| format!("set_permissions `{}`: {err}", path.display()))?;
752 }
753 #[cfg(not(unix))]
754 let _ = mode;
755 Ok(())
756 }
757 }
758}
759
760fn persist_manifest(state: &SnapshotState) -> Result<(), String> {
761 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
762 stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
763 let manifest = Manifest {
764 version: MANIFEST_VERSION,
765 snapshot_id: state.snapshot_id.clone(),
766 scope_id: state.scope_id.clone(),
767 session_id: state.session_id.clone(),
768 root: state.root.to_string_lossy().into_owned(),
769 taken_at_ms: state.taken_at_ms,
770 entries: state
771 .entries
772 .iter()
773 .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
774 .collect(),
775 };
776 let bytes = serde_json::to_vec_pretty(&manifest)
777 .map_err(|err| format!("serialize snapshot manifest: {err}"))?;
778 atomic_write(&dir.join("manifest.json"), &bytes)
779}
780
781fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
782 if let Some(parent) = path.parent() {
783 stdfs::create_dir_all(parent)
784 .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
785 }
786 let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
787 stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
788 match stdfs::rename(&tmp, path) {
789 Ok(()) => Ok(()),
790 Err(rename_err) => {
791 let _ = stdfs::remove_file(path);
792 stdfs::rename(&tmp, path).map_err(|retry| {
793 let _ = stdfs::remove_file(&tmp);
796 format!(
797 "rename {} to {}: {rename_err}; retry: {retry}",
798 tmp.display(),
799 path.display()
800 )
801 })
802 }
803 }
804}
805
806fn enforce_byte_cap(bundle: &mut SessionSnapshots, session_id: &str, protected: Option<&str>) {
814 while bundle.byte_count > bundle.byte_cap {
815 let Some(idx) = bundle
816 .snapshots
817 .iter()
818 .position(|snap| Some(snap.snapshot_id.as_str()) != protected)
819 else {
820 break;
821 };
822 let evicted = bundle.snapshots.remove(idx);
823 bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&evicted));
824 tracing::info!(
825 "fs_snapshot: evicting snapshot `{}` from session `{session_id}` (over byte cap {})",
826 evicted.snapshot_id,
827 bundle.byte_cap,
828 );
829 remove_snapshot_dir(&evicted);
830 }
831}
832
833fn remove_snapshot_dir(state: &SnapshotState) {
834 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
835 let _ = stdfs::remove_dir_all(&dir);
836}
837
838fn entry_byte_count(state: &SnapshotState) -> u64 {
839 state
840 .entries
841 .values()
842 .map(|entry| match entry {
843 SnapshotEntry::File { len, .. } => *len,
844 SnapshotEntry::Absent => 0,
845 })
846 .sum()
847}
848
849fn select_paths(state: &SnapshotState, paths: &[String]) -> Vec<PathBuf> {
850 if paths.is_empty() {
851 return state.entries.keys().cloned().collect();
852 }
853 let requested: BTreeSet<PathBuf> = paths
854 .iter()
855 .map(|path| normalize_logical(Path::new(path)))
856 .collect();
857 state
858 .entries
859 .keys()
860 .filter(|path| requested.contains(*path))
861 .cloned()
862 .collect()
863}
864
865fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
866 if session_id.trim().is_empty() {
867 return Err(HostlibError::InvalidParameter {
868 builtin,
869 param: "session_id",
870 message: "must not be empty".to_string(),
871 });
872 }
873 Ok(())
874}
875
876fn validate_scope_id(builtin: &'static str, scope_id: &str) -> Result<(), HostlibError> {
877 if scope_id.trim().is_empty() {
878 let param = match builtin {
879 SNAPSHOT_BUILTIN => "scope_id",
880 _ => "snapshot_id",
881 };
882 return Err(HostlibError::InvalidParameter {
883 builtin,
884 param,
885 message: "must not be empty".to_string(),
886 });
887 }
888 Ok(())
889}
890
891fn active_session_id() -> Option<String> {
892 harn_vm::agent_sessions::current_session_id().filter(|id| !id.trim().is_empty())
893}
894
895fn resolve_root(root: Option<&Path>) -> PathBuf {
896 match root {
897 Some(path) => normalize_logical(path),
898 None => normalize_logical(&std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))),
899 }
900}
901
902fn snapshot_dir(root: &Path, session_id: &str, snapshot_id: &str) -> PathBuf {
903 let mut dir = root.to_path_buf();
904 for component in STATE_REL {
905 dir.push(component);
906 }
907 dir.push(sanitize_component(session_id));
908 dir.push(sanitize_component(snapshot_id));
909 dir
910}
911
912fn sanitize_component(input: &str) -> String {
913 let sanitized: String = input
914 .chars()
915 .map(|ch| match ch {
916 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
917 _ => '_',
918 })
919 .collect();
920 if sanitized == input {
921 sanitized
922 } else {
923 let hash = hex::encode(Sha256::digest(input.as_bytes()));
924 format!("{sanitized}-{}", &hash[..12])
925 }
926}
927
928fn normalize_logical(path: &Path) -> PathBuf {
929 let absolute = if path.is_absolute() {
930 path.to_path_buf()
931 } else {
932 std::env::current_dir()
933 .unwrap_or_else(|_| PathBuf::from("."))
934 .join(path)
935 };
936 let mut out = PathBuf::new();
937 for component in absolute.components() {
938 match component {
939 Component::ParentDir => {
940 out.pop();
941 }
942 Component::CurDir => {}
943 other => out.push(other),
944 }
945 }
946 out
947}
948
949fn now_ms() -> i64 {
950 std::time::SystemTime::now()
951 .duration_since(std::time::UNIX_EPOCH)
952 .map(|duration| duration.as_millis() as i64)
953 .unwrap_or(0)
954}
955
956#[cfg(test)]
957mod tests {
958 use super::*;
959 use std::sync::atomic::{AtomicU64, Ordering};
960 use tempfile::TempDir;
961
962 fn unique_session(prefix: &str) -> String {
966 static COUNTER: AtomicU64 = AtomicU64::new(0);
967 let n = COUNTER.fetch_add(1, Ordering::Relaxed);
968 format!("{prefix}-{n}-{}", std::process::id())
969 }
970
971 fn unique_scope() -> String {
972 static COUNTER: AtomicU64 = AtomicU64::new(0);
973 format!("tc-{}", COUNTER.fetch_add(1, Ordering::Relaxed))
974 }
975
976 fn enter_session(id: &str) -> harn_vm::agent_sessions::CurrentSessionGuard {
977 harn_vm::agent_sessions::open_or_create(Some(id.to_string()));
978 harn_vm::agent_sessions::enter_current_session(id.to_string())
979 }
980
981 #[test]
982 fn explicit_snapshot_then_restore_round_trips_file_bytes() {
983 let dir = TempDir::new().unwrap();
984 let file = dir.path().join("note.txt");
985 stdfs::write(&file, b"v1").unwrap();
986 let session = unique_session("snap-roundtrip");
987 let scope = unique_scope();
988 let _session_guard = enter_session(&session);
989
990 let result = snapshot(
991 &session,
992 &scope,
993 &[file.to_string_lossy().into_owned()],
994 Some(dir.path()),
995 )
996 .unwrap();
997 assert_eq!(result.snapshot_id, scope);
998 assert_eq!(result.captured_paths.len(), 1);
999 assert_eq!(result.byte_count, 2);
1000
1001 stdfs::write(&file, b"clobbered").unwrap();
1002 let restored = restore(&session, &scope, &[]).unwrap();
1003 assert_eq!(restored.restored_paths.len(), 1);
1004 assert!(restored.skipped_paths_with_reasons.is_empty());
1005 assert_eq!(stdfs::read(&file).unwrap(), b"v1");
1006 }
1007
1008 #[test]
1009 fn restore_reinstates_deleted_file() {
1010 let dir = TempDir::new().unwrap();
1011 let file = dir.path().join("doomed.txt");
1012 stdfs::write(&file, b"alive").unwrap();
1013 let session = unique_session("snap-reinstate");
1014 let scope = unique_scope();
1015 let _session_guard = enter_session(&session);
1016
1017 snapshot(
1018 &session,
1019 &scope,
1020 &[file.to_string_lossy().into_owned()],
1021 Some(dir.path()),
1022 )
1023 .unwrap();
1024 stdfs::remove_file(&file).unwrap();
1025 assert!(!file.exists());
1026 let restored = restore(&session, &scope, &[]).unwrap();
1027 assert_eq!(restored.restored_paths.len(), 1);
1028 assert_eq!(stdfs::read(&file).unwrap(), b"alive");
1029 }
1030
1031 #[test]
1032 fn absent_snapshot_means_restore_deletes_paths_created_during_the_call() {
1033 let dir = TempDir::new().unwrap();
1034 let file = dir.path().join("new.txt");
1035 assert!(!file.exists());
1036 let session = unique_session("snap-absent");
1037 let scope = unique_scope();
1038 let _session_guard = enter_session(&session);
1039
1040 snapshot(
1041 &session,
1042 &scope,
1043 &[file.to_string_lossy().into_owned()],
1044 Some(dir.path()),
1045 )
1046 .unwrap();
1047 stdfs::write(&file, b"created during call").unwrap();
1048 let restored = restore(&session, &scope, &[]).unwrap();
1049 assert_eq!(restored.restored_paths.len(), 1);
1050 assert!(
1051 !file.exists(),
1052 "restore must delete files that the snapshot saw as absent"
1053 );
1054 }
1055
1056 #[test]
1057 fn list_and_drop_round_trip_through_metadata() {
1058 let dir = TempDir::new().unwrap();
1059 let file = dir.path().join("listed.txt");
1060 stdfs::write(&file, b"abc").unwrap();
1061 let session = unique_session("snap-list");
1062 let scope = unique_scope();
1063 let _session_guard = enter_session(&session);
1064
1065 snapshot(
1066 &session,
1067 &scope,
1068 &[file.to_string_lossy().into_owned()],
1069 Some(dir.path()),
1070 )
1071 .unwrap();
1072 let summaries = list_snapshots(&session).unwrap();
1073 assert_eq!(summaries.len(), 1);
1074 assert_eq!(summaries[0].snapshot_id, scope);
1075 assert_eq!(summaries[0].byte_count, 3);
1076
1077 let dropped = drop_snapshot(&session, &scope).unwrap();
1078 assert!(dropped.dropped);
1079 assert!(list_snapshots(&session).unwrap().is_empty());
1080
1081 let again = drop_snapshot(&session, &scope).unwrap();
1082 assert!(!again.dropped, "second drop must be idempotent");
1083 }
1084
1085 #[test]
1086 fn auto_capture_records_pre_image_keyed_by_current_tool_call_id() {
1087 let dir = TempDir::new().unwrap();
1088 let file = dir.path().join("auto.txt");
1089 stdfs::write(&file, b"pre").unwrap();
1090 let session = unique_session("snap-auto");
1091 let scope = unique_scope();
1092 let _session_guard = enter_session(&session);
1093 let _tool_guard = harn_vm::agent_sessions::enter_current_tool_call(scope.clone());
1094
1095 snapshot(&session, &scope, &[], Some(dir.path())).unwrap();
1096 auto_capture_for_write("hostlib_tools_write_file", &file);
1097 stdfs::write(&file, b"post").unwrap();
1098
1099 let restored = restore(&session, &scope, &[]).unwrap();
1100 assert_eq!(restored.restored_paths.len(), 1);
1101 assert_eq!(stdfs::read(&file).unwrap(), b"pre");
1102 }
1103
1104 #[test]
1105 fn auto_capture_records_session_changed_path_for_files_written_receipt() {
1106 let dir = TempDir::new().unwrap();
1107 let one = dir.path().join("a.txt");
1108 let two = dir.path().join("b.txt");
1109 let session = unique_session("snap-changed");
1110 harn_vm::agent_sessions::clear_session_changed_paths(&session);
1111 let _session_guard = enter_session(&session);
1112
1113 auto_capture_for_write("hostlib_tools_write_file", &one);
1116 auto_capture_for_write("hostlib_tools_write_file", &two);
1117 auto_capture_for_write("hostlib_tools_write_file", &one);
1119
1120 let changed = harn_vm::agent_sessions::session_changed_paths(&session);
1121 assert_eq!(changed.len(), 2, "two distinct paths recorded (deduped)");
1122 let expect_one = normalize_logical(&one).to_string_lossy().into_owned();
1123 let expect_two = normalize_logical(&two).to_string_lossy().into_owned();
1124 assert!(
1125 changed.contains(&expect_one),
1126 "path a recorded: {changed:?}"
1127 );
1128 assert!(
1129 changed.contains(&expect_two),
1130 "path b recorded: {changed:?}"
1131 );
1132
1133 let drained = harn_vm::agent_sessions::take_session_changed_paths(&session);
1135 assert_eq!(drained.len(), 2);
1136 assert!(
1137 harn_vm::agent_sessions::session_changed_paths(&session).is_empty(),
1138 "take drains the session's recorded paths"
1139 );
1140 }
1141
1142 #[test]
1143 fn byte_cap_evicts_oldest_snapshot_when_exceeded() {
1144 let dir = TempDir::new().unwrap();
1145 let session = unique_session("snap-evict");
1146 let _session_guard = enter_session(&session);
1147
1148 configure_session_byte_cap(&session, 8);
1151
1152 let mk = |name: &str| {
1153 let path = dir.path().join(name);
1154 stdfs::write(&path, b"12345").unwrap();
1155 path
1156 };
1157
1158 let scope_a = unique_scope();
1159 let scope_b = unique_scope();
1160 let a = mk("a.txt");
1161 snapshot(
1162 &session,
1163 &scope_a,
1164 &[a.to_string_lossy().into_owned()],
1165 Some(dir.path()),
1166 )
1167 .unwrap();
1168 let b = mk("b.txt");
1169 snapshot(
1170 &session,
1171 &scope_b,
1172 &[b.to_string_lossy().into_owned()],
1173 Some(dir.path()),
1174 )
1175 .unwrap();
1176
1177 let ids: Vec<String> = list_snapshots(&session)
1178 .unwrap()
1179 .into_iter()
1180 .map(|summary| summary.snapshot_id)
1181 .collect();
1182 assert_eq!(
1183 ids,
1184 vec![scope_b],
1185 "older snapshot must be evicted when the per-session byte cap is exceeded"
1186 );
1187 }
1188
1189 #[test]
1190 fn snapshot_larger_than_cap_is_retained_not_evicted() {
1191 let dir = TempDir::new().unwrap();
1195 let session = unique_session("snap-oversized");
1196 let _session_guard = enter_session(&session);
1197 configure_session_byte_cap(&session, 4);
1198
1199 let scope = unique_scope();
1200 let file = dir.path().join("big.txt");
1201 stdfs::write(&file, b"0123456789").unwrap();
1202 let result = snapshot(
1203 &session,
1204 &scope,
1205 &[file.to_string_lossy().into_owned()],
1206 Some(dir.path()),
1207 )
1208 .unwrap();
1209 assert_eq!(result.byte_count, 10);
1210
1211 let ids: Vec<String> = list_snapshots(&session)
1212 .unwrap()
1213 .into_iter()
1214 .map(|summary| summary.snapshot_id)
1215 .collect();
1216 assert_eq!(
1217 ids,
1218 vec![scope],
1219 "an oversized snapshot must be retained rather than evicting itself"
1220 );
1221 }
1222
1223 #[test]
1224 fn drop_session_snapshots_removes_every_snapshot_for_a_session() {
1225 let dir = TempDir::new().unwrap();
1226 let file = dir.path().join("retained.txt");
1227 stdfs::write(&file, b"x").unwrap();
1228 let session = unique_session("snap-drop-session");
1229 let scope_a = unique_scope();
1230 let scope_b = unique_scope();
1231 let _session_guard = enter_session(&session);
1232
1233 snapshot(
1234 &session,
1235 &scope_a,
1236 &[file.to_string_lossy().into_owned()],
1237 Some(dir.path()),
1238 )
1239 .unwrap();
1240 snapshot(
1241 &session,
1242 &scope_b,
1243 &[file.to_string_lossy().into_owned()],
1244 Some(dir.path()),
1245 )
1246 .unwrap();
1247 assert_eq!(list_snapshots(&session).unwrap().len(), 2);
1248
1249 assert_eq!(drop_session_snapshots(&session), 2);
1250 assert!(list_snapshots(&session).unwrap().is_empty());
1251 assert_eq!(drop_session_snapshots(&session), 0, "idempotent");
1252 }
1253}