1use std::collections::{BTreeMap, BTreeSet};
33use std::fs as stdfs;
34use std::path::{Component, Path, PathBuf};
35use std::rc::Rc;
36use std::sync::{Mutex, OnceLock};
37
38use harn_vm::VmValue;
39use serde::{Deserialize, Serialize};
40use sha2::{Digest, Sha256};
41
42use crate::error::HostlibError;
43use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
44use crate::tools::args::{
45 build_dict, dict_arg, optional_string, optional_string_list, require_string, str_value,
46};
47
48const SNAPSHOT_BUILTIN: &str = "hostlib_fs_snapshot";
49const RESTORE_BUILTIN: &str = "hostlib_fs_restore";
50const LIST_BUILTIN: &str = "hostlib_fs_list_snapshots";
51const DROP_BUILTIN: &str = "hostlib_fs_drop_snapshot";
52
53const MANIFEST_VERSION: u32 = 1;
54const STATE_REL: &[&str] = &[".harn", "state", "snapshots"];
55
56pub const DEFAULT_SESSION_BYTE_CAP: u64 = 1024 * 1024 * 1024;
60
61#[derive(Default)]
63pub struct FsSnapshotCapability;
64
65impl HostlibCapability for FsSnapshotCapability {
66 fn module_name(&self) -> &'static str {
67 "fs"
71 }
72
73 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
74 register(registry, SNAPSHOT_BUILTIN, "snapshot", snapshot_builtin);
75 register(registry, RESTORE_BUILTIN, "restore", restore_builtin);
76 register(
77 registry,
78 LIST_BUILTIN,
79 "list_snapshots",
80 list_snapshots_builtin,
81 );
82 register(
83 registry,
84 DROP_BUILTIN,
85 "drop_snapshot",
86 drop_snapshot_builtin,
87 );
88 }
89}
90
91fn register(
92 registry: &mut BuiltinRegistry,
93 name: &'static str,
94 method: &'static str,
95 runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
96) {
97 let handler: SyncHandler = std::sync::Arc::new(runner);
98 registry.register(RegisteredBuiltin {
99 name,
100 module: "fs",
101 method,
102 handler,
103 });
104}
105
106#[derive(Clone, Debug, Serialize, Deserialize)]
107#[serde(tag = "kind", rename_all = "snake_case")]
108enum SnapshotEntry {
109 File {
110 body_hash: String,
111 len: u64,
112 #[serde(default, skip_serializing_if = "Option::is_none")]
113 mode: Option<u32>,
114 },
115 Absent,
116}
117
118#[derive(Clone, Debug, Serialize, Deserialize)]
119struct Manifest {
120 version: u32,
121 snapshot_id: String,
122 scope_id: String,
123 session_id: String,
124 root: String,
125 taken_at_ms: i64,
126 entries: BTreeMap<String, SnapshotEntry>,
127}
128
129#[derive(Clone, Debug)]
130struct SnapshotState {
131 snapshot_id: String,
132 scope_id: String,
133 session_id: String,
134 root: PathBuf,
135 taken_at_ms: i64,
136 entries: BTreeMap<PathBuf, SnapshotEntry>,
138}
139
140#[derive(Clone, Debug)]
142pub struct SnapshotSummary {
143 pub snapshot_id: String,
145 pub scope_id: String,
147 pub taken_at_ms: i64,
149 pub captured_paths: Vec<String>,
151 pub byte_count: u64,
153}
154
155#[derive(Clone, Debug)]
157pub struct SnapshotResult {
158 pub snapshot_id: String,
160 pub captured_paths: Vec<String>,
162 pub byte_count: u64,
164}
165
166#[derive(Clone, Debug)]
168pub struct RestoreResult {
169 pub snapshot_id: String,
171 pub restored_paths: Vec<String>,
173 pub skipped_paths_with_reasons: Vec<(String, String)>,
175}
176
177#[derive(Clone, Debug)]
179pub struct DropResult {
180 pub snapshot_id: String,
182 pub dropped: bool,
184}
185
186#[derive(Debug)]
187struct SessionSnapshots {
188 snapshots: Vec<SnapshotState>,
190 byte_count: u64,
194 byte_cap: u64,
197}
198
199impl Default for SessionSnapshots {
200 fn default() -> Self {
201 Self {
202 snapshots: Vec::new(),
203 byte_count: 0,
204 byte_cap: DEFAULT_SESSION_BYTE_CAP,
205 }
206 }
207}
208
209static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionSnapshots>>> = OnceLock::new();
210
211fn sessions() -> &'static Mutex<BTreeMap<String, SessionSnapshots>> {
212 SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
213}
214
215pub fn configure_session_byte_cap(session_id: &str, bytes: u64) -> u64 {
222 let mut guard = sessions()
223 .lock()
224 .expect("fs_snapshot session mutex poisoned");
225 let bundle = guard.entry(session_id.to_string()).or_default();
226 let previous = bundle.byte_cap;
227 bundle.byte_cap = bytes.max(1);
228 enforce_byte_cap(bundle, session_id);
229 previous
230}
231
232pub fn drop_session_snapshots(session_id: &str) -> usize {
239 let mut guard = sessions()
240 .lock()
241 .expect("fs_snapshot session mutex poisoned");
242 let Some(bundle) = guard.remove(session_id) else {
243 return 0;
244 };
245 let count = bundle.snapshots.len();
246 for snapshot in &bundle.snapshots {
247 remove_snapshot_dir(snapshot);
248 }
249 count
250}
251
252pub fn snapshot(
256 session_id: &str,
257 scope_id: &str,
258 paths: &[String],
259 root: Option<&Path>,
260) -> Result<SnapshotResult, HostlibError> {
261 validate_session_id(SNAPSHOT_BUILTIN, session_id)?;
262 validate_scope_id(SNAPSHOT_BUILTIN, scope_id)?;
263 let root = resolve_root(root);
264 let mut guard = sessions()
265 .lock()
266 .expect("fs_snapshot session mutex poisoned");
267 let bundle = guard.entry(session_id.to_string()).or_default();
268 upsert_snapshot(bundle, session_id, scope_id, &root)?;
269 let mut captured_paths = Vec::new();
270 let mut byte_count = 0u64;
271 for raw in paths {
272 let path = normalize_logical(Path::new(raw));
273 let added =
274 capture_path(bundle, session_id, scope_id, &path, &root).map_err(|message| {
275 HostlibError::Backend {
276 builtin: SNAPSHOT_BUILTIN,
277 message,
278 }
279 })?;
280 if let Some(bytes) = added {
281 byte_count = byte_count.saturating_add(bytes);
282 captured_paths.push(path.to_string_lossy().into_owned());
283 }
284 }
285 enforce_byte_cap(bundle, session_id);
286 let state = bundle
287 .snapshots
288 .iter()
289 .find(|snap| snap.snapshot_id == scope_id)
290 .expect("snapshot just upserted");
291 persist_manifest(state).map_err(|err| HostlibError::Backend {
292 builtin: SNAPSHOT_BUILTIN,
293 message: err,
294 })?;
295 Ok(SnapshotResult {
296 snapshot_id: state.snapshot_id.clone(),
297 captured_paths,
298 byte_count,
299 })
300}
301
302pub fn restore(
304 session_id: &str,
305 snapshot_id: &str,
306 paths: &[String],
307) -> Result<RestoreResult, HostlibError> {
308 validate_session_id(RESTORE_BUILTIN, session_id)?;
309 validate_scope_id(RESTORE_BUILTIN, snapshot_id)?;
310 let mut guard = sessions()
311 .lock()
312 .expect("fs_snapshot session mutex poisoned");
313 let bundle = guard
314 .get_mut(session_id)
315 .ok_or_else(|| HostlibError::Backend {
316 builtin: RESTORE_BUILTIN,
317 message: format!("no snapshots registered for session `{session_id}`"),
318 })?;
319 let state = bundle
320 .snapshots
321 .iter()
322 .find(|snap| snap.snapshot_id == snapshot_id)
323 .cloned()
324 .ok_or_else(|| HostlibError::Backend {
325 builtin: RESTORE_BUILTIN,
326 message: format!("unknown snapshot `{snapshot_id}` for session `{session_id}`"),
327 })?;
328 let selected = select_paths(&state, paths);
329 let mut restored_paths = Vec::new();
330 let mut skipped_paths_with_reasons = Vec::new();
331 for path in selected {
332 let Some(entry) = state.entries.get(&path) else {
333 continue;
334 };
335 let label = path.to_string_lossy().into_owned();
336 match restore_entry(&state, &path, entry) {
337 Ok(()) => restored_paths.push(label),
338 Err(reason) => skipped_paths_with_reasons.push((label, reason)),
339 }
340 }
341 Ok(RestoreResult {
342 snapshot_id: snapshot_id.to_string(),
343 restored_paths,
344 skipped_paths_with_reasons,
345 })
346}
347
348pub fn list_snapshots(session_id: &str) -> Result<Vec<SnapshotSummary>, HostlibError> {
350 validate_session_id(LIST_BUILTIN, session_id)?;
351 let guard = sessions()
352 .lock()
353 .expect("fs_snapshot session mutex poisoned");
354 let Some(bundle) = guard.get(session_id) else {
355 return Ok(Vec::new());
356 };
357 let mut summaries: Vec<SnapshotSummary> = bundle
358 .snapshots
359 .iter()
360 .map(|state| SnapshotSummary {
361 snapshot_id: state.snapshot_id.clone(),
362 scope_id: state.scope_id.clone(),
363 taken_at_ms: state.taken_at_ms,
364 captured_paths: state
365 .entries
366 .keys()
367 .map(|path| path.to_string_lossy().into_owned())
368 .collect(),
369 byte_count: entry_byte_count(state),
370 })
371 .collect();
372 summaries.sort_by_key(|summary| summary.taken_at_ms);
373 Ok(summaries)
374}
375
376pub fn drop_snapshot(session_id: &str, snapshot_id: &str) -> Result<DropResult, HostlibError> {
378 validate_session_id(DROP_BUILTIN, session_id)?;
379 validate_scope_id(DROP_BUILTIN, snapshot_id)?;
380 let mut guard = sessions()
381 .lock()
382 .expect("fs_snapshot session mutex poisoned");
383 let Some(bundle) = guard.get_mut(session_id) else {
384 return Ok(DropResult {
385 snapshot_id: snapshot_id.to_string(),
386 dropped: false,
387 });
388 };
389 let position = bundle
390 .snapshots
391 .iter()
392 .position(|snap| snap.snapshot_id == snapshot_id);
393 let dropped = match position {
394 Some(idx) => {
395 let removed = bundle.snapshots.remove(idx);
396 bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&removed));
397 remove_snapshot_dir(&removed);
398 true
399 }
400 None => false,
401 };
402 Ok(DropResult {
403 snapshot_id: snapshot_id.to_string(),
404 dropped,
405 })
406}
407
408pub(crate) fn auto_capture_for_write(builtin: &'static str, path: &Path) {
417 let Some(session_id) = active_session_id() else {
418 return;
419 };
420 let Some(snapshot_id) = harn_vm::agent_sessions::current_tool_call_id() else {
421 return;
422 };
423 let mut guard = sessions()
424 .lock()
425 .expect("fs_snapshot session mutex poisoned");
426 let Some(bundle) = guard.get_mut(&session_id) else {
427 return;
428 };
429 let Some(snapshot) = bundle
430 .snapshots
431 .iter()
432 .find(|snap| snap.snapshot_id == snapshot_id)
433 else {
434 return;
435 };
436 let scope_id = snapshot.scope_id.clone();
437 let root = snapshot.root.clone();
438 let key = normalize_logical(path);
439 match capture_path(bundle, &session_id, &snapshot_id, &key, &root) {
440 Ok(_added) => {
441 if let Some(state) = bundle
442 .snapshots
443 .iter()
444 .find(|snap| snap.snapshot_id == snapshot_id)
445 {
446 if let Err(err) = persist_manifest(state) {
447 tracing::warn!(
448 "fs_snapshot: failed to persist manifest for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}"
449 );
450 }
451 }
452 }
453 Err(err) => {
454 tracing::warn!(
455 "fs_snapshot: failed to auto-capture `{}` for snapshot {snapshot_id} in session {session_id} (scope_id={scope_id}, builtin={builtin}): {err}",
456 key.display()
457 );
458 }
459 }
460 enforce_byte_cap(bundle, &session_id);
461}
462
463fn snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
464 let raw = dict_arg(SNAPSHOT_BUILTIN, args)?;
465 let dict = raw.as_ref();
466 let session_id = require_string(SNAPSHOT_BUILTIN, dict, "session_id")?;
467 let scope_id = require_string(SNAPSHOT_BUILTIN, dict, "scope_id")?;
468 let paths = optional_string_list(SNAPSHOT_BUILTIN, dict, "paths")?;
469 let root = optional_string(SNAPSHOT_BUILTIN, dict, "root")?.map(PathBuf::from);
470 let result = snapshot(&session_id, &scope_id, &paths, root.as_deref())?;
471 Ok(build_dict([
472 ("snapshot_id", str_value(&result.snapshot_id)),
473 (
474 "captured_paths",
475 VmValue::List(Rc::new(
476 result
477 .captured_paths
478 .into_iter()
479 .map(|path| VmValue::String(Rc::from(path)))
480 .collect(),
481 )),
482 ),
483 ("byte_count", VmValue::Int(result.byte_count as i64)),
484 ]))
485}
486
487fn restore_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
488 let raw = dict_arg(RESTORE_BUILTIN, args)?;
489 let dict = raw.as_ref();
490 let session_id = require_string(RESTORE_BUILTIN, dict, "session_id")?;
491 let snapshot_id = require_string(RESTORE_BUILTIN, dict, "snapshot_id")?;
492 let paths = optional_string_list(RESTORE_BUILTIN, dict, "paths")?;
493 let result = restore(&session_id, &snapshot_id, &paths)?;
494 Ok(build_dict([
495 ("snapshot_id", str_value(&result.snapshot_id)),
496 (
497 "restored_paths",
498 VmValue::List(Rc::new(
499 result
500 .restored_paths
501 .into_iter()
502 .map(|path| VmValue::String(Rc::from(path)))
503 .collect(),
504 )),
505 ),
506 (
507 "skipped_paths_with_reasons",
508 VmValue::List(Rc::new(
509 result
510 .skipped_paths_with_reasons
511 .into_iter()
512 .map(|(path, reason)| {
513 build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
514 })
515 .collect(),
516 )),
517 ),
518 ]))
519}
520
521fn list_snapshots_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
522 let raw = dict_arg(LIST_BUILTIN, args)?;
523 let dict = raw.as_ref();
524 let session_id = require_string(LIST_BUILTIN, dict, "session_id")?;
525 let summaries = list_snapshots(&session_id)?;
526 Ok(build_dict([(
527 "snapshots",
528 VmValue::List(Rc::new(
529 summaries.into_iter().map(snapshot_summary_value).collect(),
530 )),
531 )]))
532}
533
534fn drop_snapshot_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
535 let raw = dict_arg(DROP_BUILTIN, args)?;
536 let dict = raw.as_ref();
537 let session_id = require_string(DROP_BUILTIN, dict, "session_id")?;
538 let snapshot_id = require_string(DROP_BUILTIN, dict, "snapshot_id")?;
539 let result = drop_snapshot(&session_id, &snapshot_id)?;
540 Ok(build_dict([
541 ("snapshot_id", str_value(&result.snapshot_id)),
542 ("dropped", VmValue::Bool(result.dropped)),
543 ]))
544}
545
546fn snapshot_summary_value(summary: SnapshotSummary) -> VmValue {
547 build_dict([
548 ("snapshot_id", str_value(&summary.snapshot_id)),
549 ("scope_id", str_value(&summary.scope_id)),
550 ("taken_at_ms", VmValue::Int(summary.taken_at_ms)),
551 (
552 "captured_paths",
553 VmValue::List(Rc::new(
554 summary
555 .captured_paths
556 .into_iter()
557 .map(|path| VmValue::String(Rc::from(path)))
558 .collect(),
559 )),
560 ),
561 ("byte_count", VmValue::Int(summary.byte_count as i64)),
562 ])
563}
564
565fn upsert_snapshot(
566 bundle: &mut SessionSnapshots,
567 session_id: &str,
568 scope_id: &str,
569 root: &Path,
570) -> Result<(), HostlibError> {
571 if bundle
572 .snapshots
573 .iter()
574 .any(|snap| snap.snapshot_id == scope_id)
575 {
576 return Ok(());
577 }
578 let state = SnapshotState {
579 snapshot_id: scope_id.to_string(),
580 scope_id: scope_id.to_string(),
581 session_id: session_id.to_string(),
582 root: root.to_path_buf(),
583 taken_at_ms: now_ms(),
584 entries: BTreeMap::new(),
585 };
586 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
587 stdfs::create_dir_all(dir.join("bodies")).map_err(|err| HostlibError::Backend {
588 builtin: SNAPSHOT_BUILTIN,
589 message: format!("mkdir {}: {err}", dir.display()),
590 })?;
591 bundle.snapshots.push(state);
592 Ok(())
593}
594
595fn capture_path(
596 bundle: &mut SessionSnapshots,
597 session_id: &str,
598 snapshot_id: &str,
599 path: &Path,
600 root: &Path,
601) -> Result<Option<u64>, String> {
602 let snap_index = bundle
603 .snapshots
604 .iter()
605 .position(|snap| snap.snapshot_id == snapshot_id)
606 .ok_or_else(|| format!("snapshot `{snapshot_id}` is not registered"))?;
607 if bundle.snapshots[snap_index].entries.contains_key(path) {
608 return Ok(None);
609 }
610 let metadata = stdfs::symlink_metadata(path);
611 let (entry, byte_count) = match metadata {
612 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (SnapshotEntry::Absent, 0u64),
613 Err(err) => {
614 return Err(format!("stat `{}`: {err}", path.display()));
615 }
616 Ok(metadata) if metadata.is_dir() => {
617 return Err(format!(
618 "snapshot of directory `{}` is not supported yet",
619 path.display()
620 ));
621 }
622 Ok(metadata) if metadata.file_type().is_symlink() => {
623 return Err(format!(
624 "snapshot of symlink `{}` is not supported yet",
625 path.display()
626 ));
627 }
628 Ok(metadata) => {
629 let bytes = stdfs::read(path)
630 .map_err(|err| format!("read `{}` for snapshot: {err}", path.display()))?;
631 let body_hash = hex::encode(Sha256::digest(&bytes));
632 let len = bytes.len() as u64;
633 store_body(root, session_id, snapshot_id, &body_hash, &bytes)?;
634 #[cfg(unix)]
635 let mode = {
636 use std::os::unix::fs::MetadataExt;
637 Some(metadata.mode())
638 };
639 #[cfg(not(unix))]
640 let mode = {
641 let _ = &metadata;
642 None
643 };
644 (
645 SnapshotEntry::File {
646 body_hash,
647 len,
648 mode,
649 },
650 len,
651 )
652 }
653 };
654 let snap = &mut bundle.snapshots[snap_index];
655 snap.entries.insert(path.to_path_buf(), entry);
656 bundle.byte_count = bundle.byte_count.saturating_add(byte_count);
657 Ok(Some(byte_count))
658}
659
660fn store_body(
661 root: &Path,
662 session_id: &str,
663 snapshot_id: &str,
664 body_hash: &str,
665 bytes: &[u8],
666) -> Result<(), String> {
667 let bodies = snapshot_dir(root, session_id, snapshot_id).join("bodies");
668 stdfs::create_dir_all(&bodies).map_err(|err| format!("mkdir {}: {err}", bodies.display()))?;
669 let body_path = bodies.join(body_hash);
670 if !body_path.exists() {
671 atomic_write(&body_path, bytes)?;
672 }
673 Ok(())
674}
675
676fn restore_entry(state: &SnapshotState, path: &Path, entry: &SnapshotEntry) -> Result<(), String> {
677 match entry {
678 SnapshotEntry::Absent => match stdfs::symlink_metadata(path) {
679 Ok(metadata) if metadata.is_dir() => stdfs::remove_dir_all(path)
680 .map_err(|err| format!("remove_dir_all {}: {err}", path.display())),
681 Ok(_) => stdfs::remove_file(path)
682 .map_err(|err| format!("remove_file {}: {err}", path.display())),
683 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
684 Err(err) => Err(format!("stat {}: {err}", path.display())),
685 },
686 SnapshotEntry::File {
687 body_hash, mode, ..
688 } => {
689 let body_path = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id)
690 .join("bodies")
691 .join(body_hash);
692 let bytes = stdfs::read(&body_path)
693 .map_err(|err| format!("read snapshot body `{}`: {err}", body_path.display()))?;
694 atomic_write(path, &bytes)?;
695 #[cfg(unix)]
696 if let Some(bits) = mode {
697 use std::os::unix::fs::PermissionsExt;
698 let permissions = stdfs::Permissions::from_mode(*bits);
699 stdfs::set_permissions(path, permissions)
700 .map_err(|err| format!("set_permissions `{}`: {err}", path.display()))?;
701 }
702 #[cfg(not(unix))]
703 let _ = mode;
704 Ok(())
705 }
706 }
707}
708
709fn persist_manifest(state: &SnapshotState) -> Result<(), String> {
710 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
711 stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
712 let manifest = Manifest {
713 version: MANIFEST_VERSION,
714 snapshot_id: state.snapshot_id.clone(),
715 scope_id: state.scope_id.clone(),
716 session_id: state.session_id.clone(),
717 root: state.root.to_string_lossy().into_owned(),
718 taken_at_ms: state.taken_at_ms,
719 entries: state
720 .entries
721 .iter()
722 .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
723 .collect(),
724 };
725 let bytes = serde_json::to_vec_pretty(&manifest)
726 .map_err(|err| format!("serialize snapshot manifest: {err}"))?;
727 atomic_write(&dir.join("manifest.json"), &bytes)
728}
729
730fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
731 if let Some(parent) = path.parent() {
732 stdfs::create_dir_all(parent)
733 .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
734 }
735 let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
736 stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
737 match stdfs::rename(&tmp, path) {
738 Ok(()) => Ok(()),
739 Err(rename_err) => {
740 let _ = stdfs::remove_file(path);
741 stdfs::rename(&tmp, path).map_err(|retry| {
742 format!(
743 "rename {} to {}: {rename_err}; retry: {retry}",
744 tmp.display(),
745 path.display()
746 )
747 })
748 }
749 }
750}
751
752fn enforce_byte_cap(bundle: &mut SessionSnapshots, session_id: &str) {
753 while bundle.byte_count > bundle.byte_cap && !bundle.snapshots.is_empty() {
754 let evicted = bundle.snapshots.remove(0);
755 bundle.byte_count = bundle.byte_count.saturating_sub(entry_byte_count(&evicted));
756 tracing::info!(
757 "fs_snapshot: evicting snapshot `{}` from session `{session_id}` (over byte cap {})",
758 evicted.snapshot_id,
759 bundle.byte_cap,
760 );
761 remove_snapshot_dir(&evicted);
762 }
763}
764
765fn remove_snapshot_dir(state: &SnapshotState) {
766 let dir = snapshot_dir(&state.root, &state.session_id, &state.snapshot_id);
767 let _ = stdfs::remove_dir_all(&dir);
768}
769
770fn entry_byte_count(state: &SnapshotState) -> u64 {
771 state
772 .entries
773 .values()
774 .map(|entry| match entry {
775 SnapshotEntry::File { len, .. } => *len,
776 SnapshotEntry::Absent => 0,
777 })
778 .sum()
779}
780
781fn select_paths(state: &SnapshotState, paths: &[String]) -> Vec<PathBuf> {
782 if paths.is_empty() {
783 return state.entries.keys().cloned().collect();
784 }
785 let requested: BTreeSet<PathBuf> = paths
786 .iter()
787 .map(|path| normalize_logical(Path::new(path)))
788 .collect();
789 state
790 .entries
791 .keys()
792 .filter(|path| requested.contains(*path))
793 .cloned()
794 .collect()
795}
796
797fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
798 if session_id.trim().is_empty() {
799 return Err(HostlibError::InvalidParameter {
800 builtin,
801 param: "session_id",
802 message: "must not be empty".to_string(),
803 });
804 }
805 Ok(())
806}
807
808fn validate_scope_id(builtin: &'static str, scope_id: &str) -> Result<(), HostlibError> {
809 if scope_id.trim().is_empty() {
810 let param = match builtin {
811 SNAPSHOT_BUILTIN => "scope_id",
812 _ => "snapshot_id",
813 };
814 return Err(HostlibError::InvalidParameter {
815 builtin,
816 param,
817 message: "must not be empty".to_string(),
818 });
819 }
820 Ok(())
821}
822
823fn active_session_id() -> Option<String> {
824 harn_vm::agent_sessions::current_session_id().filter(|id| !id.trim().is_empty())
825}
826
827fn resolve_root(root: Option<&Path>) -> PathBuf {
828 match root {
829 Some(path) => normalize_logical(path),
830 None => normalize_logical(&std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))),
831 }
832}
833
834fn snapshot_dir(root: &Path, session_id: &str, snapshot_id: &str) -> PathBuf {
835 let mut dir = root.to_path_buf();
836 for component in STATE_REL {
837 dir.push(component);
838 }
839 dir.push(sanitize_component(session_id));
840 dir.push(sanitize_component(snapshot_id));
841 dir
842}
843
844fn sanitize_component(input: &str) -> String {
845 let sanitized: String = input
846 .chars()
847 .map(|ch| match ch {
848 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
849 _ => '_',
850 })
851 .collect();
852 if sanitized == input {
853 sanitized
854 } else {
855 let hash = hex::encode(Sha256::digest(input.as_bytes()));
856 format!("{sanitized}-{}", &hash[..12])
857 }
858}
859
860fn normalize_logical(path: &Path) -> PathBuf {
861 let absolute = if path.is_absolute() {
862 path.to_path_buf()
863 } else {
864 std::env::current_dir()
865 .unwrap_or_else(|_| PathBuf::from("."))
866 .join(path)
867 };
868 let mut out = PathBuf::new();
869 for component in absolute.components() {
870 match component {
871 Component::ParentDir => {
872 out.pop();
873 }
874 Component::CurDir => {}
875 other => out.push(other),
876 }
877 }
878 out
879}
880
881fn now_ms() -> i64 {
882 std::time::SystemTime::now()
883 .duration_since(std::time::UNIX_EPOCH)
884 .map(|duration| duration.as_millis() as i64)
885 .unwrap_or(0)
886}
887
888#[cfg(test)]
889mod tests {
890 use super::*;
891 use std::sync::atomic::{AtomicU64, Ordering};
892 use tempfile::TempDir;
893
894 fn unique_session(prefix: &str) -> String {
898 static COUNTER: AtomicU64 = AtomicU64::new(0);
899 let n = COUNTER.fetch_add(1, Ordering::Relaxed);
900 format!("{prefix}-{n}-{}", std::process::id())
901 }
902
903 fn unique_scope() -> String {
904 static COUNTER: AtomicU64 = AtomicU64::new(0);
905 format!("tc-{}", COUNTER.fetch_add(1, Ordering::Relaxed))
906 }
907
908 fn enter_session(id: &str) -> harn_vm::agent_sessions::CurrentSessionGuard {
909 harn_vm::agent_sessions::open_or_create(Some(id.to_string()));
910 harn_vm::agent_sessions::enter_current_session(id.to_string())
911 }
912
913 #[test]
914 fn explicit_snapshot_then_restore_round_trips_file_bytes() {
915 let dir = TempDir::new().unwrap();
916 let file = dir.path().join("note.txt");
917 stdfs::write(&file, b"v1").unwrap();
918 let session = unique_session("snap-roundtrip");
919 let scope = unique_scope();
920 let _session_guard = enter_session(&session);
921
922 let result = snapshot(
923 &session,
924 &scope,
925 &[file.to_string_lossy().into_owned()],
926 Some(dir.path()),
927 )
928 .unwrap();
929 assert_eq!(result.snapshot_id, scope);
930 assert_eq!(result.captured_paths.len(), 1);
931 assert_eq!(result.byte_count, 2);
932
933 stdfs::write(&file, b"clobbered").unwrap();
934 let restored = restore(&session, &scope, &[]).unwrap();
935 assert_eq!(restored.restored_paths.len(), 1);
936 assert!(restored.skipped_paths_with_reasons.is_empty());
937 assert_eq!(stdfs::read(&file).unwrap(), b"v1");
938 }
939
940 #[test]
941 fn restore_reinstates_deleted_file() {
942 let dir = TempDir::new().unwrap();
943 let file = dir.path().join("doomed.txt");
944 stdfs::write(&file, b"alive").unwrap();
945 let session = unique_session("snap-reinstate");
946 let scope = unique_scope();
947 let _session_guard = enter_session(&session);
948
949 snapshot(
950 &session,
951 &scope,
952 &[file.to_string_lossy().into_owned()],
953 Some(dir.path()),
954 )
955 .unwrap();
956 stdfs::remove_file(&file).unwrap();
957 assert!(!file.exists());
958 let restored = restore(&session, &scope, &[]).unwrap();
959 assert_eq!(restored.restored_paths.len(), 1);
960 assert_eq!(stdfs::read(&file).unwrap(), b"alive");
961 }
962
963 #[test]
964 fn absent_snapshot_means_restore_deletes_paths_created_during_the_call() {
965 let dir = TempDir::new().unwrap();
966 let file = dir.path().join("new.txt");
967 assert!(!file.exists());
968 let session = unique_session("snap-absent");
969 let scope = unique_scope();
970 let _session_guard = enter_session(&session);
971
972 snapshot(
973 &session,
974 &scope,
975 &[file.to_string_lossy().into_owned()],
976 Some(dir.path()),
977 )
978 .unwrap();
979 stdfs::write(&file, b"created during call").unwrap();
980 let restored = restore(&session, &scope, &[]).unwrap();
981 assert_eq!(restored.restored_paths.len(), 1);
982 assert!(
983 !file.exists(),
984 "restore must delete files that the snapshot saw as absent"
985 );
986 }
987
988 #[test]
989 fn list_and_drop_round_trip_through_metadata() {
990 let dir = TempDir::new().unwrap();
991 let file = dir.path().join("listed.txt");
992 stdfs::write(&file, b"abc").unwrap();
993 let session = unique_session("snap-list");
994 let scope = unique_scope();
995 let _session_guard = enter_session(&session);
996
997 snapshot(
998 &session,
999 &scope,
1000 &[file.to_string_lossy().into_owned()],
1001 Some(dir.path()),
1002 )
1003 .unwrap();
1004 let summaries = list_snapshots(&session).unwrap();
1005 assert_eq!(summaries.len(), 1);
1006 assert_eq!(summaries[0].snapshot_id, scope);
1007 assert_eq!(summaries[0].byte_count, 3);
1008
1009 let dropped = drop_snapshot(&session, &scope).unwrap();
1010 assert!(dropped.dropped);
1011 assert!(list_snapshots(&session).unwrap().is_empty());
1012
1013 let again = drop_snapshot(&session, &scope).unwrap();
1014 assert!(!again.dropped, "second drop must be idempotent");
1015 }
1016
1017 #[test]
1018 fn auto_capture_records_pre_image_keyed_by_current_tool_call_id() {
1019 let dir = TempDir::new().unwrap();
1020 let file = dir.path().join("auto.txt");
1021 stdfs::write(&file, b"pre").unwrap();
1022 let session = unique_session("snap-auto");
1023 let scope = unique_scope();
1024 let _session_guard = enter_session(&session);
1025 let _tool_guard = harn_vm::agent_sessions::enter_current_tool_call(scope.clone());
1026
1027 snapshot(&session, &scope, &[], Some(dir.path())).unwrap();
1028 auto_capture_for_write("hostlib_tools_write_file", &file);
1029 stdfs::write(&file, b"post").unwrap();
1030
1031 let restored = restore(&session, &scope, &[]).unwrap();
1032 assert_eq!(restored.restored_paths.len(), 1);
1033 assert_eq!(stdfs::read(&file).unwrap(), b"pre");
1034 }
1035
1036 #[test]
1037 fn byte_cap_evicts_oldest_snapshot_when_exceeded() {
1038 let dir = TempDir::new().unwrap();
1039 let session = unique_session("snap-evict");
1040 let _session_guard = enter_session(&session);
1041
1042 configure_session_byte_cap(&session, 8);
1045
1046 let mk = |name: &str| {
1047 let path = dir.path().join(name);
1048 stdfs::write(&path, b"12345").unwrap();
1049 path
1050 };
1051
1052 let scope_a = unique_scope();
1053 let scope_b = unique_scope();
1054 let a = mk("a.txt");
1055 snapshot(
1056 &session,
1057 &scope_a,
1058 &[a.to_string_lossy().into_owned()],
1059 Some(dir.path()),
1060 )
1061 .unwrap();
1062 let b = mk("b.txt");
1063 snapshot(
1064 &session,
1065 &scope_b,
1066 &[b.to_string_lossy().into_owned()],
1067 Some(dir.path()),
1068 )
1069 .unwrap();
1070
1071 let ids: Vec<String> = list_snapshots(&session)
1072 .unwrap()
1073 .into_iter()
1074 .map(|summary| summary.snapshot_id)
1075 .collect();
1076 assert_eq!(
1077 ids,
1078 vec![scope_b],
1079 "older snapshot must be evicted when the per-session byte cap is exceeded"
1080 );
1081 }
1082
1083 #[test]
1084 fn drop_session_snapshots_removes_every_snapshot_for_a_session() {
1085 let dir = TempDir::new().unwrap();
1086 let file = dir.path().join("retained.txt");
1087 stdfs::write(&file, b"x").unwrap();
1088 let session = unique_session("snap-drop-session");
1089 let scope_a = unique_scope();
1090 let scope_b = unique_scope();
1091 let _session_guard = enter_session(&session);
1092
1093 snapshot(
1094 &session,
1095 &scope_a,
1096 &[file.to_string_lossy().into_owned()],
1097 Some(dir.path()),
1098 )
1099 .unwrap();
1100 snapshot(
1101 &session,
1102 &scope_b,
1103 &[file.to_string_lossy().into_owned()],
1104 Some(dir.path()),
1105 )
1106 .unwrap();
1107 assert_eq!(list_snapshots(&session).unwrap().len(), 2);
1108
1109 assert_eq!(drop_session_snapshots(&session), 2);
1110 assert!(list_snapshots(&session).unwrap().is_empty());
1111 assert_eq!(drop_session_snapshots(&session), 0, "idempotent");
1112 }
1113}