1use std::collections::{BTreeMap, BTreeSet};
33use std::fs as stdfs;
34use std::path::{Component, Path, PathBuf};
35use std::sync::Arc;
36use std::sync::{Mutex, OnceLock};
37
38use harn_vm::VmValue;
39use serde::{Deserialize, Serialize};
40use sha2::{Digest, Sha256};
41
42use crate::error::HostlibError;
43use crate::registry::{BuiltinRegistry, HostlibCapability};
44use crate::tools::args::{
45 build_dict, dict_arg, optional_string, optional_string_list, require_string, str_value,
46 to_agent_path,
47};
48
49const SNAPSHOT_BUILTIN: &str = "hostlib_fs_snapshot";
50const RESTORE_BUILTIN: &str = "hostlib_fs_restore";
51const LIST_BUILTIN: &str = "hostlib_fs_list_snapshots";
52const DROP_BUILTIN: &str = "hostlib_fs_drop_snapshot";
53
54const MANIFEST_VERSION: u32 = 1;
55const STATE_REL: &[&str] = &[".harn", "state", "snapshots"];
56
57pub const DEFAULT_SESSION_BYTE_CAP: u64 = 1024 * 1024 * 1024;
61
62#[derive(Default)]
64pub struct FsSnapshotCapability;
65
66impl HostlibCapability for FsSnapshotCapability {
67 fn module_name(&self) -> &'static str {
68 "fs"
72 }
73
74 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
75 registry.register_fn("fs", SNAPSHOT_BUILTIN, "snapshot", snapshot_builtin);
76 registry.register_fn("fs", RESTORE_BUILTIN, "restore", restore_builtin);
77 registry.register_fn("fs", LIST_BUILTIN, "list_snapshots", list_snapshots_builtin);
78 registry.register_fn("fs", DROP_BUILTIN, "drop_snapshot", drop_snapshot_builtin);
79 }
80}
81
82#[derive(Clone, Debug, Serialize, Deserialize)]
83#[serde(tag = "kind", rename_all = "snake_case")]
84enum SnapshotEntry {
85 File {
86 body_hash: String,
87 len: u64,
88 #[serde(default, skip_serializing_if = "Option::is_none")]
89 mode: Option<u32>,
90 },
91 Absent,
92}
93
94#[derive(Clone, Debug, Serialize, Deserialize)]
95struct Manifest {
96 version: u32,
97 snapshot_id: String,
98 scope_id: String,
99 session_id: String,
100 root: String,
101 taken_at_ms: i64,
102 entries: BTreeMap<String, SnapshotEntry>,
103}
104
105#[derive(Clone, Debug)]
106struct SnapshotState {
107 snapshot_id: String,
108 scope_id: String,
109 session_id: String,
110 root: PathBuf,
111 taken_at_ms: i64,
112 entries: BTreeMap<PathBuf, SnapshotEntry>,
114}
115
116#[derive(Clone, Debug)]
118pub struct SnapshotSummary {
119 pub snapshot_id: String,
121 pub scope_id: String,
123 pub taken_at_ms: i64,
125 pub captured_paths: Vec<String>,
127 pub byte_count: u64,
129}
130
131#[derive(Clone, Debug)]
133pub struct SnapshotResult {
134 pub snapshot_id: String,
136 pub captured_paths: Vec<String>,
138 pub byte_count: u64,
140}
141
142#[derive(Clone, Debug)]
144pub struct RestoreResult {
145 pub snapshot_id: String,
147 pub restored_paths: Vec<String>,
149 pub skipped_paths_with_reasons: Vec<(String, String)>,
151}
152
153#[derive(Clone, Debug)]
155pub struct DropResult {
156 pub snapshot_id: String,
158 pub dropped: bool,
160}
161
162#[derive(Debug)]
163struct SessionSnapshots {
164 snapshots: Vec<SnapshotState>,
166 byte_count: u64,
170 byte_cap: u64,
173}
174
175impl Default for SessionSnapshots {
176 fn default() -> Self {
177 Self {
178 snapshots: Vec::new(),
179 byte_count: 0,
180 byte_cap: DEFAULT_SESSION_BYTE_CAP,
181 }
182 }
183}
184
185static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionSnapshots>>> = OnceLock::new();
186
187fn sessions() -> &'static Mutex<BTreeMap<String, SessionSnapshots>> {
188 SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
189}
190
191pub fn configure_session_byte_cap(session_id: &str, bytes: u64) -> u64 {
198 let mut guard = sessions()
199 .lock()
200 .expect("fs_snapshot session mutex poisoned");
201 let bundle = guard.entry(session_id.to_string()).or_default();
202 let previous = bundle.byte_cap;
203 bundle.byte_cap = bytes.max(1);
204 enforce_byte_cap(bundle, session_id, None);
205 previous
206}
207
208pub fn drop_session_snapshots(session_id: &str) -> usize {
215 let mut guard = sessions()
216 .lock()
217 .expect("fs_snapshot session mutex poisoned");
218 let Some(bundle) = guard.remove(session_id) else {
219 return 0;
220 };
221 let count = bundle.snapshots.len();
222 for snapshot in &bundle.snapshots {
223 remove_snapshot_dir(snapshot);
224 }
225 count
226}
227
228pub fn reset_all_sessions() -> usize {
237 let mut guard = sessions()
238 .lock()
239 .expect("fs_snapshot session mutex poisoned");
240 let session_count = guard.len();
241 for bundle in guard.values() {
242 for snapshot in &bundle.snapshots {
243 remove_snapshot_dir(snapshot);
244 }
245 }
246 guard.clear();
247 session_count
248}
249
250#[cfg(test)]
252pub fn session_count() -> usize {
253 sessions()
254 .lock()
255 .expect("fs_snapshot session mutex poisoned")
256 .len()
257}
258
259pub fn snapshot(
263 session_id: &str,
264 scope_id: &str,
265 paths: &[String],
266 root: Option<&Path>,
267) -> Result<SnapshotResult, HostlibError> {
268 validate_session_id(SNAPSHOT_BUILTIN, session_id)?;
269 validate_scope_id(SNAPSHOT_BUILTIN, scope_id)?;
270 let root = resolve_root(root);
271 let mut guard = sessions()
272 .lock()
273 .expect("fs_snapshot session mutex poisoned");
274 let bundle = guard.entry(session_id.to_string()).or_default();
275 upsert_snapshot(bundle, session_id, scope_id, &root)?;
276 let mut captured_paths = Vec::new();
277 let mut byte_count = 0u64;
278 for raw in paths {
279 let path = normalize_logical(Path::new(raw));
280 let added =
281 capture_path(bundle, session_id, scope_id, &path, &root).map_err(|message| {
282 HostlibError::Backend {
283 builtin: SNAPSHOT_BUILTIN,
284 message,
285 }
286 })?;
287 if let Some(bytes) = added {
288 byte_count = byte_count.saturating_add(bytes);
289 captured_paths.push(to_agent_path(&path));
290 }
291 }
292 enforce_byte_cap(bundle, session_id, Some(scope_id));
293 let state = bundle
294 .snapshots
295 .iter()
296 .find(|snap| snap.snapshot_id == scope_id)
297 .expect("snapshot just upserted is protected from byte-cap eviction");
298 persist_manifest(state).map_err(|err| HostlibError::Backend {
299 builtin: SNAPSHOT_BUILTIN,
300 message: err,
301 })?;
302 Ok(SnapshotResult {
303 snapshot_id: state.snapshot_id.clone(),
304 captured_paths,
305 byte_count,
306 })
307}
308
309pub fn restore(
311 session_id: &str,
312 snapshot_id: &str,
313 paths: &[String],
314) -> Result<RestoreResult, HostlibError> {
315 validate_session_id(RESTORE_BUILTIN, session_id)?;
316 validate_scope_id(RESTORE_BUILTIN, snapshot_id)?;
317 let mut guard = sessions()
318 .lock()
319 .expect("fs_snapshot session mutex poisoned");
320 let bundle = guard
321 .get_mut(session_id)
322 .ok_or_else(|| HostlibError::Backend {
323 builtin: RESTORE_BUILTIN,
324 message: format!("no snapshots registered for session `{session_id}`"),
325 })?;
326 let state = bundle
327 .snapshots
328 .iter()
329 .find(|snap| snap.snapshot_id == snapshot_id)
330 .cloned()
331 .ok_or_else(|| HostlibError::Backend {
332 builtin: RESTORE_BUILTIN,
333 message: format!("unknown snapshot `{snapshot_id}` for session `{session_id}`"),
334 })?;
335 let selected = select_paths(&state, paths);
336 let mut restored_paths = Vec::new();
337 let mut skipped_paths_with_reasons = Vec::new();
338 for path in selected {
339 let Some(entry) = state.entries.get(&path) else {
340 continue;
341 };
342 let label = to_agent_path(&path);
343 match restore_entry(&state, &path, entry) {
344 Ok(()) => restored_paths.push(label),
345 Err(reason) => skipped_paths_with_reasons.push((label, reason)),
346 }
347 }
348 Ok(RestoreResult {
349 snapshot_id: snapshot_id.to_string(),
350 restored_paths,
351 skipped_paths_with_reasons,
352 })
353}
354
355pub fn list_snapshots(session_id: &str) -> Result<Vec<SnapshotSummary>, HostlibError> {
357 validate_session_id(LIST_BUILTIN, session_id)?;
358 let guard = sessions()
359 .lock()
360 .expect("fs_snapshot session mutex poisoned");
361 let Some(bundle) = guard.get(session_id) else {
362 return Ok(Vec::new());
363 };
364 let mut summaries: Vec<SnapshotSummary> = bundle
365 .snapshots
366 .iter()
367 .map(|state| SnapshotSummary {
368 snapshot_id: state.snapshot_id.clone(),
369 scope_id: state.scope_id.clone(),
370 taken_at_ms: state.taken_at_ms,
371 captured_paths: state.entries.keys().map(to_agent_path).collect(),
372 byte_count: entry_byte_count(state),
373 })
374 .collect();
375 summaries.sort_by_key(|summary| summary.taken_at_ms);
376 Ok(summaries)
377}
378
379pub fn drop_snapshot(session_id: &str, snapshot_id: &str) -> Result<DropResult, HostlibError> {
381 validate_session_id(DROP_BUILTIN, session_id)?;
382 validate_scope_id(DROP_BUILTIN, snapshot_id)?;
383 let mut guard = sessions()
384 .lock()
385 .expect("fs_snapshot session mutex poisoned");
386 let Some(bundle) = guard.get_mut(session_id) else {
387 return Ok(DropResult {
388 snapshot_id: snapshot_id.to_string(),
389 dropped: false,
390 });
391 };
392 let position = bundle
393 .snapshots
394 .iter()
395 .position(|snap| snap.snapshot_id == snapshot_id);
396 let dropped = match position {
397 Some(idx) => {
398 let removed = bundle.snapshots.remove(idx);
399 bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&removed));
400 remove_snapshot_dir(&removed);
401 true
402 }
403 None => false,
404 };
405 Ok(DropResult {
406 snapshot_id: snapshot_id.to_string(),
407 dropped,
408 })
409}
410
411pub(crate) fn auto_capture_for_write(builtin: &'static str, path: &Path) {
419 let Some(session_id) = active_session_id() else {
420 return;
421 };
422 harn_vm::agent_sessions::record_session_changed_path(
428 &session_id,
429 normalize_logical(path).to_string_lossy().as_ref(),
430 );
431 let Some(snapshot_id) = harn_vm::agent_sessions::current_tool_call_id() else {
432 return;
433 };
434 let mut guard = sessions()
435 .lock()
436 .expect("fs_snapshot session mutex poisoned");
437 let bundle = guard.entry(session_id.clone()).or_default();
438 if !bundle
439 .snapshots
440 .iter()
441 .any(|snap| snap.snapshot_id == snapshot_id)
442 {
443 let root =
444 crate::fs::configured_session_root(&session_id).unwrap_or_else(|| resolve_root(None));
445 if let Err(error) = upsert_snapshot(bundle, &session_id, &snapshot_id, &root) {
446 tracing::warn!(
447 "fs_snapshot: failed to auto-open snapshot {snapshot_id} in session {session_id} (builtin={builtin}): {error}"
448 );
449 return;
450 }
451 }
452 let Some(snapshot) = bundle
453 .snapshots
454 .iter()
455 .find(|snap| snap.snapshot_id == snapshot_id)
456 else {
457 return;
458 };
459 let scope_id = snapshot.scope_id.clone();
460 let root = snapshot.root.clone();
461 let key = normalize_logical(path);
462 match capture_path(bundle, &session_id, &snapshot_id, &key, &root) {
463 Ok(_added) => {
464 if let Some(state) = bundle
465 .snapshots
466 .iter()
467 .find(|snap| snap.snapshot_id == snapshot_id)
468 {
469 if let Err(err) = persist_manifest(state) {
470 tracing::warn!(
471 "fs_snapshot: failed to persist manifest for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}"
472 );
473 }
474 }
475 }
476 Err(err) => {
477 tracing::warn!(
478 "fs_snapshot: failed to auto-capture `{}` for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}",
479 key.display()
480 );
481 }
482 }
483 enforce_byte_cap(bundle, &session_id, Some(&snapshot_id));
484}
485
486fn snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
487 let raw = dict_arg(SNAPSHOT_BUILTIN, args)?;
488 let dict = raw.as_ref();
489 let session_id = require_string(SNAPSHOT_BUILTIN, dict, "session_id")?;
490 let scope_id = require_string(SNAPSHOT_BUILTIN, dict, "scope_id")?;
491 let paths = optional_string_list(SNAPSHOT_BUILTIN, dict, "paths")?;
492 let root = optional_string(SNAPSHOT_BUILTIN, dict, "root")?.map(PathBuf::from);
493 let result = snapshot(&session_id, &scope_id, &paths, root.as_deref())?;
494 Ok(build_dict([
495 ("snapshot_id", str_value(&result.snapshot_id)),
496 (
497 "captured_paths",
498 VmValue::List(Arc::new(
499 result
500 .captured_paths
501 .into_iter()
502 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
503 .collect(),
504 )),
505 ),
506 ("byte_count", VmValue::Int(result.byte_count as i64)),
507 ]))
508}
509
510fn restore_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
511 let raw = dict_arg(RESTORE_BUILTIN, args)?;
512 let dict = raw.as_ref();
513 let session_id = require_string(RESTORE_BUILTIN, dict, "session_id")?;
514 let snapshot_id = require_string(RESTORE_BUILTIN, dict, "snapshot_id")?;
515 let paths = optional_string_list(RESTORE_BUILTIN, dict, "paths")?;
516 let result = restore(&session_id, &snapshot_id, &paths)?;
517 Ok(build_dict([
518 ("snapshot_id", str_value(&result.snapshot_id)),
519 (
520 "restored_paths",
521 VmValue::List(Arc::new(
522 result
523 .restored_paths
524 .into_iter()
525 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
526 .collect(),
527 )),
528 ),
529 (
530 "skipped_paths_with_reasons",
531 VmValue::List(Arc::new(
532 result
533 .skipped_paths_with_reasons
534 .into_iter()
535 .map(|(path, reason)| {
536 build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
537 })
538 .collect(),
539 )),
540 ),
541 ]))
542}
543
544fn list_snapshots_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
545 let raw = dict_arg(LIST_BUILTIN, args)?;
546 let dict = raw.as_ref();
547 let session_id = require_string(LIST_BUILTIN, dict, "session_id")?;
548 let summaries = list_snapshots(&session_id)?;
549 Ok(build_dict([(
550 "snapshots",
551 VmValue::List(Arc::new(
552 summaries.into_iter().map(snapshot_summary_value).collect(),
553 )),
554 )]))
555}
556
557fn drop_snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
558 let raw = dict_arg(DROP_BUILTIN, args)?;
559 let dict = raw.as_ref();
560 let session_id = require_string(DROP_BUILTIN, dict, "session_id")?;
561 let snapshot_id = require_string(DROP_BUILTIN, dict, "snapshot_id")?;
562 let result = drop_snapshot(&session_id, &snapshot_id)?;
563 Ok(build_dict([
564 ("snapshot_id", str_value(&result.snapshot_id)),
565 ("dropped", VmValue::Bool(result.dropped)),
566 ]))
567}
568
569fn snapshot_summary_value(summary: SnapshotSummary) -> VmValue {
570 build_dict([
571 ("snapshot_id", str_value(&summary.snapshot_id)),
572 ("scope_id", str_value(&summary.scope_id)),
573 ("taken_at_ms", VmValue::Int(summary.taken_at_ms)),
574 (
575 "captured_paths",
576 VmValue::List(Arc::new(
577 summary
578 .captured_paths
579 .into_iter()
580 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
581 .collect(),
582 )),
583 ),
584 ("byte_count", VmValue::Int(summary.byte_count as i64)),
585 ])
586}
587
588fn upsert_snapshot(
589 bundle: &mut SessionSnapshots,
590 session_id: &str,
591 scope_id: &str,
592 root: &Path,
593) -> Result<(), HostlibError> {
594 if bundle
595 .snapshots
596 .iter()
597 .any(|snap| snap.snapshot_id == scope_id)
598 {
599 return Ok(());
600 }
601 let state = SnapshotState {
602 snapshot_id: scope_id.to_string(),
603 scope_id: scope_id.to_string(),
604 session_id: session_id.to_string(),
605 root: root.to_path_buf(),
606 taken_at_ms: now_ms(),
607 entries: BTreeMap::new(),
608 };
609 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
610 stdfs::create_dir_all(dir.join("bodies")).map_err(|err| HostlibError::Backend {
611 builtin: SNAPSHOT_BUILTIN,
612 message: format!("mkdir {}: {err}", dir.display()),
613 })?;
614 bundle.snapshots.push(state);
615 Ok(())
616}
617
618fn capture_path(
619 bundle: &mut SessionSnapshots,
620 session_id: &str,
621 snapshot_id: &str,
622 path: &Path,
623 root: &Path,
624) -> Result<Option<u64>, String> {
625 let snap_index = bundle
626 .snapshots
627 .iter()
628 .position(|snap| snap.snapshot_id == snapshot_id)
629 .ok_or_else(|| format!("snapshot `{snapshot_id}` is not registered"))?;
630 if bundle.snapshots[snap_index].entries.contains_key(path) {
631 return Ok(None);
632 }
633 let metadata = stdfs::symlink_metadata(path);
634 let (entry, byte_count) = match metadata {
635 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (SnapshotEntry::Absent, 0u64),
636 Err(err) => {
637 return Err(format!("stat `{}`: {err}", path.display()));
638 }
639 Ok(metadata) if metadata.is_dir() => {
640 return Err(format!(
641 "snapshot of directory `{}` is not supported yet",
642 path.display()
643 ));
644 }
645 Ok(metadata) if metadata.file_type().is_symlink() => {
646 return Err(format!(
647 "snapshot of symlink `{}` is not supported yet",
648 path.display()
649 ));
650 }
651 Ok(metadata) => {
652 let bytes = stdfs::read(path)
653 .map_err(|err| format!("read `{}` for snapshot: {err}", path.display()))?;
654 let body_hash = hex::encode(Sha256::digest(&bytes));
655 let len = bytes.len() as u64;
656 store_body(root, session_id, snapshot_id, &body_hash, &bytes)?;
657 #[cfg(unix)]
658 let mode = {
659 use std::os::unix::fs::MetadataExt;
660 Some(metadata.mode())
661 };
662 #[cfg(not(unix))]
663 let mode = {
664 let _ = &metadata;
665 None
666 };
667 (
668 SnapshotEntry::File {
669 body_hash,
670 len,
671 mode,
672 },
673 len,
674 )
675 }
676 };
677 let snap = &mut bundle.snapshots[snap_index];
678 snap.entries.insert(path.to_path_buf(), entry);
679 bundle.byte_count = bundle.byte_count.saturating_add(byte_count);
680 Ok(Some(byte_count))
681}
682
683fn store_body(
684 root: &Path,
685 session_id: &str,
686 snapshot_id: &str,
687 body_hash: &str,
688 bytes: &[u8],
689) -> Result<(), String> {
690 let bodies = snapshot_dir(root, session_id, snapshot_id).join("bodies");
691 stdfs::create_dir_all(&bodies).map_err(|err| format!("mkdir {}: {err}", bodies.display()))?;
692 let body_path = bodies.join(body_hash);
693 if !body_path.exists() {
694 atomic_write(&body_path, bytes)?;
695 }
696 Ok(())
697}
698
699fn restore_entry(state: &SnapshotState, path: &Path, entry: &SnapshotEntry) -> Result<(), String> {
700 match entry {
701 SnapshotEntry::Absent => match stdfs::symlink_metadata(path) {
702 Ok(metadata) if metadata.is_dir() => stdfs::remove_dir_all(path)
703 .map_err(|err| format!("remove_dir_all {}: {err}", path.display())),
704 Ok(_) => stdfs::remove_file(path)
705 .map_err(|err| format!("remove_file {}: {err}", path.display())),
706 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
707 Err(err) => Err(format!("stat {}: {err}", path.display())),
708 },
709 SnapshotEntry::File {
710 body_hash, mode, ..
711 } => {
712 let body_path = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id)
713 .join("bodies")
714 .join(body_hash);
715 let bytes = stdfs::read(&body_path)
716 .map_err(|err| format!("read snapshot body `{}`: {err}", body_path.display()))?;
717 atomic_write(path, &bytes)?;
718 #[cfg(unix)]
719 if let Some(bits) = mode {
720 use std::os::unix::fs::PermissionsExt;
721 let permissions = stdfs::Permissions::from_mode(*bits);
722 stdfs::set_permissions(path, permissions)
723 .map_err(|err| format!("set_permissions `{}`: {err}", path.display()))?;
724 }
725 #[cfg(not(unix))]
726 let _ = mode;
727 Ok(())
728 }
729 }
730}
731
732fn persist_manifest(state: &SnapshotState) -> Result<(), String> {
733 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
734 stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
735 let manifest = Manifest {
736 version: MANIFEST_VERSION,
737 snapshot_id: state.snapshot_id.clone(),
738 scope_id: state.scope_id.clone(),
739 session_id: state.session_id.clone(),
740 root: state.root.to_string_lossy().into_owned(),
741 taken_at_ms: state.taken_at_ms,
742 entries: state
743 .entries
744 .iter()
745 .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
746 .collect(),
747 };
748 let bytes = serde_json::to_vec_pretty(&manifest)
749 .map_err(|err| format!("serialize snapshot manifest: {err}"))?;
750 atomic_write(&dir.join("manifest.json"), &bytes)
751}
752
753fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
754 if let Some(parent) = path.parent() {
755 stdfs::create_dir_all(parent)
756 .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
757 }
758 let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
759 stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
760 match stdfs::rename(&tmp, path) {
761 Ok(()) => Ok(()),
762 Err(rename_err) => {
763 let _ = stdfs::remove_file(path);
764 stdfs::rename(&tmp, path).map_err(|retry| {
765 let _ = stdfs::remove_file(&tmp);
768 format!(
769 "rename {} to {}: {rename_err}; retry: {retry}",
770 tmp.display(),
771 path.display()
772 )
773 })
774 }
775 }
776}
777
778fn enforce_byte_cap(bundle: &mut SessionSnapshots, session_id: &str, protected: Option<&str>) {
786 while bundle.byte_count > bundle.byte_cap {
787 let Some(idx) = bundle
788 .snapshots
789 .iter()
790 .position(|snap| Some(snap.snapshot_id.as_str()) != protected)
791 else {
792 break;
793 };
794 let evicted = bundle.snapshots.remove(idx);
795 bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&evicted));
796 tracing::info!(
797 "fs_snapshot: evicting snapshot `{}` from session `{session_id}` (over byte cap {})",
798 evicted.snapshot_id,
799 bundle.byte_cap,
800 );
801 remove_snapshot_dir(&evicted);
802 }
803}
804
805fn remove_snapshot_dir(state: &SnapshotState) {
806 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
807 let _ = stdfs::remove_dir_all(&dir);
808}
809
810fn entry_byte_count(state: &SnapshotState) -> u64 {
811 state
812 .entries
813 .values()
814 .map(|entry| match entry {
815 SnapshotEntry::File { len, .. } => *len,
816 SnapshotEntry::Absent => 0,
817 })
818 .sum()
819}
820
821fn select_paths(state: &SnapshotState, paths: &[String]) -> Vec<PathBuf> {
822 if paths.is_empty() {
823 return state.entries.keys().cloned().collect();
824 }
825 let requested: BTreeSet<PathBuf> = paths
826 .iter()
827 .map(|path| normalize_logical(Path::new(path)))
828 .collect();
829 state
830 .entries
831 .keys()
832 .filter(|path| requested.contains(*path))
833 .cloned()
834 .collect()
835}
836
837fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
838 if session_id.trim().is_empty() {
839 return Err(HostlibError::InvalidParameter {
840 builtin,
841 param: "session_id",
842 message: "must not be empty".to_string(),
843 });
844 }
845 Ok(())
846}
847
848fn validate_scope_id(builtin: &'static str, scope_id: &str) -> Result<(), HostlibError> {
849 if scope_id.trim().is_empty() {
850 let param = match builtin {
851 SNAPSHOT_BUILTIN => "scope_id",
852 _ => "snapshot_id",
853 };
854 return Err(HostlibError::InvalidParameter {
855 builtin,
856 param,
857 message: "must not be empty".to_string(),
858 });
859 }
860 Ok(())
861}
862
863fn active_session_id() -> Option<String> {
864 harn_vm::agent_sessions::current_session_id().filter(|id| !id.trim().is_empty())
865}
866
867fn resolve_root(root: Option<&Path>) -> PathBuf {
868 match root {
869 Some(path) => normalize_logical(path),
870 None => normalize_logical(&std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))),
871 }
872}
873
874fn snapshot_dir(root: &Path, session_id: &str, snapshot_id: &str) -> PathBuf {
875 let mut dir = root.to_path_buf();
876 for component in STATE_REL {
877 dir.push(component);
878 }
879 dir.push(sanitize_component(session_id));
880 dir.push(sanitize_component(snapshot_id));
881 dir
882}
883
884fn sanitize_component(input: &str) -> String {
885 let sanitized: String = input
886 .chars()
887 .map(|ch| match ch {
888 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
889 _ => '_',
890 })
891 .collect();
892 if sanitized == input {
893 sanitized
894 } else {
895 let hash = hex::encode(Sha256::digest(input.as_bytes()));
896 format!("{sanitized}-{}", &hash[..12])
897 }
898}
899
900fn normalize_logical(path: &Path) -> PathBuf {
901 let absolute = if path.is_absolute() {
902 path.to_path_buf()
903 } else {
904 std::env::current_dir()
905 .unwrap_or_else(|_| PathBuf::from("."))
906 .join(path)
907 };
908 let mut out = PathBuf::new();
909 for component in absolute.components() {
910 match component {
911 Component::ParentDir => {
912 out.pop();
913 }
914 Component::CurDir => {}
915 other => out.push(other),
916 }
917 }
918 out
919}
920
921fn now_ms() -> i64 {
922 std::time::SystemTime::now()
923 .duration_since(std::time::UNIX_EPOCH)
924 .map(|duration| duration.as_millis() as i64)
925 .unwrap_or(0)
926}
927
928#[cfg(test)]
929mod tests {
930 use super::*;
931 use std::sync::atomic::{AtomicU64, Ordering};
932 use tempfile::TempDir;
933
934 fn unique_session(prefix: &str) -> String {
938 static COUNTER: AtomicU64 = AtomicU64::new(0);
939 let n = COUNTER.fetch_add(1, Ordering::Relaxed);
940 format!("{prefix}-{n}-{}", std::process::id())
941 }
942
943 fn unique_scope() -> String {
944 static COUNTER: AtomicU64 = AtomicU64::new(0);
945 format!("tc-{}", COUNTER.fetch_add(1, Ordering::Relaxed))
946 }
947
948 fn enter_session(id: &str) -> harn_vm::agent_sessions::CurrentSessionGuard {
949 harn_vm::agent_sessions::open_or_create(Some(id.to_string()));
950 harn_vm::agent_sessions::enter_current_session(id.to_string())
951 }
952
953 #[test]
954 fn explicit_snapshot_then_restore_round_trips_file_bytes() {
955 let dir = TempDir::new().unwrap();
956 let file = dir.path().join("note.txt");
957 stdfs::write(&file, b"v1").unwrap();
958 let session = unique_session("snap-roundtrip");
959 let scope = unique_scope();
960 let _session_guard = enter_session(&session);
961
962 let result = snapshot(
963 &session,
964 &scope,
965 &[file.to_string_lossy().into_owned()],
966 Some(dir.path()),
967 )
968 .unwrap();
969 assert_eq!(result.snapshot_id, scope);
970 assert_eq!(result.captured_paths.len(), 1);
971 assert_eq!(result.byte_count, 2);
972
973 stdfs::write(&file, b"clobbered").unwrap();
974 let restored = restore(&session, &scope, &[]).unwrap();
975 assert_eq!(restored.restored_paths.len(), 1);
976 assert!(restored.skipped_paths_with_reasons.is_empty());
977 assert_eq!(stdfs::read(&file).unwrap(), b"v1");
978 }
979
980 #[test]
981 fn restore_reinstates_deleted_file() {
982 let dir = TempDir::new().unwrap();
983 let file = dir.path().join("doomed.txt");
984 stdfs::write(&file, b"alive").unwrap();
985 let session = unique_session("snap-reinstate");
986 let scope = unique_scope();
987 let _session_guard = enter_session(&session);
988
989 snapshot(
990 &session,
991 &scope,
992 &[file.to_string_lossy().into_owned()],
993 Some(dir.path()),
994 )
995 .unwrap();
996 stdfs::remove_file(&file).unwrap();
997 assert!(!file.exists());
998 let restored = restore(&session, &scope, &[]).unwrap();
999 assert_eq!(restored.restored_paths.len(), 1);
1000 assert_eq!(stdfs::read(&file).unwrap(), b"alive");
1001 }
1002
1003 #[test]
1004 fn absent_snapshot_means_restore_deletes_paths_created_during_the_call() {
1005 let dir = TempDir::new().unwrap();
1006 let file = dir.path().join("new.txt");
1007 assert!(!file.exists());
1008 let session = unique_session("snap-absent");
1009 let scope = unique_scope();
1010 let _session_guard = enter_session(&session);
1011
1012 snapshot(
1013 &session,
1014 &scope,
1015 &[file.to_string_lossy().into_owned()],
1016 Some(dir.path()),
1017 )
1018 .unwrap();
1019 stdfs::write(&file, b"created during call").unwrap();
1020 let restored = restore(&session, &scope, &[]).unwrap();
1021 assert_eq!(restored.restored_paths.len(), 1);
1022 assert!(
1023 !file.exists(),
1024 "restore must delete files that the snapshot saw as absent"
1025 );
1026 }
1027
1028 #[test]
1029 fn list_and_drop_round_trip_through_metadata() {
1030 let dir = TempDir::new().unwrap();
1031 let file = dir.path().join("listed.txt");
1032 stdfs::write(&file, b"abc").unwrap();
1033 let session = unique_session("snap-list");
1034 let scope = unique_scope();
1035 let _session_guard = enter_session(&session);
1036
1037 snapshot(
1038 &session,
1039 &scope,
1040 &[file.to_string_lossy().into_owned()],
1041 Some(dir.path()),
1042 )
1043 .unwrap();
1044 let summaries = list_snapshots(&session).unwrap();
1045 assert_eq!(summaries.len(), 1);
1046 assert_eq!(summaries[0].snapshot_id, scope);
1047 assert_eq!(summaries[0].byte_count, 3);
1048
1049 let dropped = drop_snapshot(&session, &scope).unwrap();
1050 assert!(dropped.dropped);
1051 assert!(list_snapshots(&session).unwrap().is_empty());
1052
1053 let again = drop_snapshot(&session, &scope).unwrap();
1054 assert!(!again.dropped, "second drop must be idempotent");
1055 }
1056
1057 #[test]
1058 fn auto_capture_records_pre_image_keyed_by_current_tool_call_id() {
1059 let dir = TempDir::new().unwrap();
1060 let file = dir.path().join("auto.txt");
1061 stdfs::write(&file, b"pre").unwrap();
1062 let session = unique_session("snap-auto");
1063 let scope = unique_scope();
1064 let _session_guard = enter_session(&session);
1065 let _tool_guard = harn_vm::agent_sessions::enter_current_tool_call(scope.clone());
1066
1067 snapshot(&session, &scope, &[], Some(dir.path())).unwrap();
1068 auto_capture_for_write("hostlib_tools_write_file", &file);
1069 stdfs::write(&file, b"post").unwrap();
1070
1071 let restored = restore(&session, &scope, &[]).unwrap();
1072 assert_eq!(restored.restored_paths.len(), 1);
1073 assert_eq!(stdfs::read(&file).unwrap(), b"pre");
1074 }
1075
1076 #[test]
1077 fn auto_capture_records_session_changed_path_for_files_written_receipt() {
1078 let dir = TempDir::new().unwrap();
1079 let one = dir.path().join("a.txt");
1080 let two = dir.path().join("b.txt");
1081 let session = unique_session("snap-changed");
1082 harn_vm::agent_sessions::clear_session_changed_paths(&session);
1083 let _session_guard = enter_session(&session);
1084
1085 auto_capture_for_write("hostlib_tools_write_file", &one);
1088 auto_capture_for_write("hostlib_tools_write_file", &two);
1089 auto_capture_for_write("hostlib_tools_write_file", &one);
1091
1092 let changed = harn_vm::agent_sessions::session_changed_paths(&session);
1093 assert_eq!(changed.len(), 2, "two distinct paths recorded (deduped)");
1094 let expect_one = normalize_logical(&one).to_string_lossy().into_owned();
1095 let expect_two = normalize_logical(&two).to_string_lossy().into_owned();
1096 assert!(
1097 changed.contains(&expect_one),
1098 "path a recorded: {changed:?}"
1099 );
1100 assert!(
1101 changed.contains(&expect_two),
1102 "path b recorded: {changed:?}"
1103 );
1104
1105 let drained = harn_vm::agent_sessions::take_session_changed_paths(&session);
1107 assert_eq!(drained.len(), 2);
1108 assert!(
1109 harn_vm::agent_sessions::session_changed_paths(&session).is_empty(),
1110 "take drains the session's recorded paths"
1111 );
1112 }
1113
1114 #[test]
1115 fn byte_cap_evicts_oldest_snapshot_when_exceeded() {
1116 let dir = TempDir::new().unwrap();
1117 let session = unique_session("snap-evict");
1118 let _session_guard = enter_session(&session);
1119
1120 configure_session_byte_cap(&session, 8);
1123
1124 let mk = |name: &str| {
1125 let path = dir.path().join(name);
1126 stdfs::write(&path, b"12345").unwrap();
1127 path
1128 };
1129
1130 let scope_a = unique_scope();
1131 let scope_b = unique_scope();
1132 let a = mk("a.txt");
1133 snapshot(
1134 &session,
1135 &scope_a,
1136 &[a.to_string_lossy().into_owned()],
1137 Some(dir.path()),
1138 )
1139 .unwrap();
1140 let b = mk("b.txt");
1141 snapshot(
1142 &session,
1143 &scope_b,
1144 &[b.to_string_lossy().into_owned()],
1145 Some(dir.path()),
1146 )
1147 .unwrap();
1148
1149 let ids: Vec<String> = list_snapshots(&session)
1150 .unwrap()
1151 .into_iter()
1152 .map(|summary| summary.snapshot_id)
1153 .collect();
1154 assert_eq!(
1155 ids,
1156 vec![scope_b],
1157 "older snapshot must be evicted when the per-session byte cap is exceeded"
1158 );
1159 }
1160
1161 #[test]
1162 fn snapshot_larger_than_cap_is_retained_not_evicted() {
1163 let dir = TempDir::new().unwrap();
1167 let session = unique_session("snap-oversized");
1168 let _session_guard = enter_session(&session);
1169 configure_session_byte_cap(&session, 4);
1170
1171 let scope = unique_scope();
1172 let file = dir.path().join("big.txt");
1173 stdfs::write(&file, b"0123456789").unwrap();
1174 let result = snapshot(
1175 &session,
1176 &scope,
1177 &[file.to_string_lossy().into_owned()],
1178 Some(dir.path()),
1179 )
1180 .unwrap();
1181 assert_eq!(result.byte_count, 10);
1182
1183 let ids: Vec<String> = list_snapshots(&session)
1184 .unwrap()
1185 .into_iter()
1186 .map(|summary| summary.snapshot_id)
1187 .collect();
1188 assert_eq!(
1189 ids,
1190 vec![scope],
1191 "an oversized snapshot must be retained rather than evicting itself"
1192 );
1193 }
1194
1195 #[test]
1196 fn drop_session_snapshots_removes_every_snapshot_for_a_session() {
1197 let dir = TempDir::new().unwrap();
1198 let file = dir.path().join("retained.txt");
1199 stdfs::write(&file, b"x").unwrap();
1200 let session = unique_session("snap-drop-session");
1201 let scope_a = unique_scope();
1202 let scope_b = unique_scope();
1203 let _session_guard = enter_session(&session);
1204
1205 snapshot(
1206 &session,
1207 &scope_a,
1208 &[file.to_string_lossy().into_owned()],
1209 Some(dir.path()),
1210 )
1211 .unwrap();
1212 snapshot(
1213 &session,
1214 &scope_b,
1215 &[file.to_string_lossy().into_owned()],
1216 Some(dir.path()),
1217 )
1218 .unwrap();
1219 assert_eq!(list_snapshots(&session).unwrap().len(), 2);
1220
1221 assert_eq!(drop_session_snapshots(&session), 2);
1222 assert!(list_snapshots(&session).unwrap().is_empty());
1223 assert_eq!(drop_session_snapshots(&session), 0, "idempotent");
1224 }
1225}