1use std::collections::{BTreeMap, BTreeSet};
10use std::fs as stdfs;
11use std::io::Write;
12use std::path::{Component, Path, PathBuf};
13use std::sync::Arc;
14use std::sync::{Mutex, OnceLock};
15
16use harn_vm::agent_events::AgentEvent;
17use harn_vm::process_sandbox::{check_fs_path_scope, FsAccess};
18use harn_vm::VmValue;
19use serde::{Deserialize, Serialize};
20use sha2::{Digest, Sha256};
21
22use crate::error::HostlibError;
23use crate::registry::{BuiltinRegistry, HostlibCapability};
24use crate::tools::args::{
25 build_dict, dict_arg, optional_bool, optional_int, optional_string, optional_string_list,
26 require_string, str_value,
27};
28use crate::tools::permissions::enforce_path_scope;
29
30const SET_MODE_BUILTIN: &str = "hostlib_fs_set_mode";
31const STATUS_BUILTIN: &str = "hostlib_fs_staged_status";
32const COMMIT_BUILTIN: &str = "hostlib_fs_commit_staged";
33const DISCARD_BUILTIN: &str = "hostlib_fs_discard_staged";
34const SAFE_TEXT_PATCH_BUILTIN: &str = "hostlib_fs_safe_text_patch";
35const READ_TEXT_BUILTIN: &str = "hostlib_fs_read_text";
36const EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN: &str = "hostlib_fs_emit_safe_text_patch_result";
37
38const MANIFEST_VERSION: u32 = 1;
39const STATE_REL: &[&str] = &[".harn", "state", "staged"];
40
41#[derive(Default)]
43pub struct FsCapability;
44
45impl HostlibCapability for FsCapability {
46 fn module_name(&self) -> &'static str {
47 "fs"
48 }
49
50 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
51 registry.register_fn("fs", SET_MODE_BUILTIN, "set_mode", set_mode_builtin);
52 registry.register_fn("fs", STATUS_BUILTIN, "staged_status", staged_status_builtin);
53 registry.register_fn("fs", COMMIT_BUILTIN, "commit_staged", commit_staged_builtin);
54 registry.register_fn(
55 "fs",
56 DISCARD_BUILTIN,
57 "discard_staged",
58 discard_staged_builtin,
59 );
60 registry.register_gated_fn(
63 "fs",
64 SAFE_TEXT_PATCH_BUILTIN,
65 "safe_text_patch",
66 safe_text_patch_builtin,
67 );
68 registry.register_gated_fn("fs", READ_TEXT_BUILTIN, "read_text", read_text_builtin);
69 registry.register_fn(
70 "fs",
71 EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
72 "emit_safe_text_patch_result",
73 emit_safe_text_patch_result_builtin,
74 );
75 }
76}
77
78#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
80#[serde(rename_all = "lowercase")]
81pub enum FsMode {
82 Immediate,
84 Staged,
86}
87
88impl FsMode {
89 fn parse(builtin: &'static str, raw: &str) -> Result<Self, HostlibError> {
90 match raw {
91 "immediate" => Ok(Self::Immediate),
92 "staged" => Ok(Self::Staged),
93 other => Err(HostlibError::InvalidParameter {
94 builtin,
95 param: "mode",
96 message: format!("expected \"immediate\" or \"staged\", got `{other}`"),
97 }),
98 }
99 }
100
101 pub fn as_str(self) -> &'static str {
103 match self {
104 Self::Immediate => "immediate",
105 Self::Staged => "staged",
106 }
107 }
108}
109
110#[derive(Clone, Debug, Serialize, Deserialize)]
111struct Manifest {
112 version: u32,
113 session_id: String,
114 mode: FsMode,
115 root: String,
116 entries: BTreeMap<String, StagedEntry>,
117}
118
119#[derive(Clone, Debug, Serialize, Deserialize)]
120#[serde(tag = "kind", rename_all = "snake_case")]
121enum StagedEntry {
122 Write {
123 body_hash: String,
124 len: u64,
125 created_at_ms: i64,
126 },
127 Delete {
128 recursive: bool,
129 created_at_ms: i64,
130 },
131}
132
133impl StagedEntry {
134 fn created_at_ms(&self) -> i64 {
135 match self {
136 Self::Write { created_at_ms, .. } | Self::Delete { created_at_ms, .. } => {
137 *created_at_ms
138 }
139 }
140 }
141
142 fn body_len(&self) -> u64 {
143 match self {
144 Self::Write { len, .. } => *len,
145 Self::Delete { .. } => 0,
146 }
147 }
148}
149
150#[derive(Clone, Debug)]
151struct SessionState {
152 session_id: String,
153 mode: FsMode,
154 root: PathBuf,
155 entries: BTreeMap<PathBuf, StagedEntry>,
156}
157
158#[derive(Clone, Debug)]
159pub(crate) struct WriteOutcome {
160 pub(crate) created: bool,
161 pub(crate) bytes_written: usize,
162}
163
164#[derive(Clone, Debug)]
165pub(crate) struct OverlayDirEntry {
166 pub(crate) name: String,
167 pub(crate) is_dir: bool,
168 pub(crate) is_symlink: bool,
169 pub(crate) size: u64,
170}
171
172#[derive(Clone, Debug)]
174pub struct StagedStatus {
175 pub pending_writes: Vec<PendingWrite>,
177 pub total_bytes_pending: u64,
179 pub oldest_pending_age_ms: i64,
181}
182
183#[derive(Clone, Debug)]
184pub struct PendingWrite {
186 pub path: String,
188 pub kind: &'static str,
190 pub bytes_added: u64,
192 pub bytes_removed: u64,
194}
195
196#[derive(Clone, Debug)]
198pub struct SetModeResult {
199 pub previous_mode: FsMode,
201}
202
203#[derive(Clone, Debug)]
205pub struct CommitResult {
206 pub committed_paths: Vec<String>,
208 pub failed_paths_with_reasons: Vec<(String, String)>,
210}
211
212#[derive(Clone, Debug)]
214pub struct DiscardResult {
215 pub discarded_paths: Vec<String>,
217}
218
219static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionState>>> = OnceLock::new();
220
221fn sessions() -> &'static Mutex<BTreeMap<String, SessionState>> {
222 SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
223}
224
225fn lock_sessions() -> std::sync::MutexGuard<'static, BTreeMap<String, SessionState>> {
229 sessions()
230 .lock()
231 .expect("hostlib fs session mutex poisoned")
232}
233
234pub fn configure_session_root(session_id: &str, root: &Path) {
239 if session_id.trim().is_empty() {
240 return;
241 }
242 let root = normalize_logical(root);
243 let mut guard = lock_sessions();
244 match guard.get_mut(session_id) {
245 Some(state) if state.entries.is_empty() => {
246 state.root = root;
247 }
248 Some(_) => {}
249 None => {
250 let state = load_state(session_id, Some(root.clone())).unwrap_or(SessionState {
251 session_id: session_id.to_string(),
252 mode: FsMode::Immediate,
253 root,
254 entries: BTreeMap::new(),
255 });
256 guard.insert(session_id.to_string(), state);
257 }
258 }
259}
260
261pub fn configured_session_root(session_id: &str) -> Option<PathBuf> {
263 if session_id.trim().is_empty() {
264 return None;
265 }
266 let guard = lock_sessions();
267 guard.get(session_id).map(|state| state.root.clone())
268}
269
270pub fn set_mode(
272 session_id: &str,
273 mode: FsMode,
274 root: Option<&Path>,
275) -> Result<SetModeResult, HostlibError> {
276 validate_session_id(SET_MODE_BUILTIN, session_id)?;
277 let mut guard = lock_sessions();
278 let mut state = state_for_locked(&mut guard, session_id, root.map(normalize_logical))?;
279 let previous_mode = state.mode;
280 state.mode = mode;
281 persist_state(&state, "set_mode", None).map_err(|err| HostlibError::Backend {
282 builtin: SET_MODE_BUILTIN,
283 message: err,
284 })?;
285 guard.insert(session_id.to_string(), state);
286 Ok(SetModeResult { previous_mode })
287}
288
289pub fn staged_status(session_id: &str) -> Result<StagedStatus, HostlibError> {
291 validate_session_id(STATUS_BUILTIN, session_id)?;
292 let mut guard = lock_sessions();
293 let state = state_for_locked(&mut guard, session_id, None)?;
294 let status = status_from_state(&state);
295 guard.insert(session_id.to_string(), state);
296 Ok(status)
297}
298
299pub fn commit_staged(session_id: &str, paths: &[String]) -> Result<CommitResult, HostlibError> {
301 validate_session_id(COMMIT_BUILTIN, session_id)?;
302 let mut guard = lock_sessions();
303 let mut state = state_for_locked(&mut guard, session_id, None)?;
304 let selected = selected_paths(&state, paths);
305 let mut committed_paths = Vec::new();
306 let mut failed_paths_with_reasons = Vec::new();
307
308 for path in selected {
309 let Some(entry) = state.entries.get(&path).cloned() else {
310 continue;
311 };
312 let path_label = path.to_string_lossy().into_owned();
313 let access = match entry {
319 StagedEntry::Write { .. } => FsAccess::Write,
320 StagedEntry::Delete { .. } => FsAccess::Delete,
321 };
322 if let Err(violation) = check_fs_path_scope(&path, access) {
323 failed_paths_with_reasons.push((path_label, violation.message(COMMIT_BUILTIN)));
324 continue;
325 }
326 match commit_entry(&state, &path, &entry) {
327 Ok(()) => {
328 state.entries.remove(&path);
329 committed_paths.push(path_label);
330 }
331 Err(reason) => failed_paths_with_reasons.push((path_label, reason)),
332 }
333 }
334
335 persist_state(&state, "commit_staged", None).map_err(|err| HostlibError::Backend {
336 builtin: COMMIT_BUILTIN,
337 message: err,
338 })?;
339 emit_staged_update(&state);
340 guard.insert(session_id.to_string(), state);
341 Ok(CommitResult {
342 committed_paths,
343 failed_paths_with_reasons,
344 })
345}
346
347pub fn discard_staged(session_id: &str, paths: &[String]) -> Result<DiscardResult, HostlibError> {
349 validate_session_id(DISCARD_BUILTIN, session_id)?;
350 let mut guard = lock_sessions();
351 let mut state = state_for_locked(&mut guard, session_id, None)?;
352 let selected = selected_paths(&state, paths);
353 let mut discarded_paths = Vec::new();
354 for path in selected {
355 if state.entries.remove(&path).is_some() {
356 discarded_paths.push(path.to_string_lossy().into_owned());
357 }
358 }
359 persist_state(&state, "discard_staged", None).map_err(|err| HostlibError::Backend {
360 builtin: DISCARD_BUILTIN,
361 message: err,
362 })?;
363 emit_staged_update(&state);
364 guard.insert(session_id.to_string(), state);
365 Ok(DiscardResult { discarded_paths })
366}
367
368pub fn remove_session_state(session_id: &str, root: Option<&Path>) -> Result<(), HostlibError> {
375 validate_session_id(DISCARD_BUILTIN, session_id)?;
376 let mut guard = lock_sessions();
377 let state = match guard.remove(session_id) {
378 Some(state) => state,
379 None => load_state(session_id, root.map(normalize_logical)).map_err(|err| {
380 HostlibError::Backend {
381 builtin: DISCARD_BUILTIN,
382 message: err,
383 }
384 })?,
385 };
386 let dir = session_dir(&state.root, &state.session_id);
387 match stdfs::remove_dir_all(&dir) {
388 Ok(()) => Ok(()),
389 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
390 Err(err) => Err(HostlibError::Backend {
391 builtin: DISCARD_BUILTIN,
392 message: format!("remove staged session {}: {err}", dir.display()),
393 }),
394 }
395}
396
397pub(crate) fn read(
398 path: &Path,
399 explicit_session_id: Option<&str>,
400) -> Option<std::io::Result<Vec<u8>>> {
401 let session_id = active_session_id(explicit_session_id)?;
402 let mut guard = lock_sessions();
403 let state = state_for_locked(&mut guard, &session_id, None).ok()?;
404 let result = if state.mode == FsMode::Staged {
405 overlay_read(&state, path)
406 } else {
407 None
408 };
409 guard.insert(session_id, state);
410 result
411}
412
413pub(crate) fn read_to_string(
414 path: &Path,
415 explicit_session_id: Option<&str>,
416) -> Option<std::io::Result<String>> {
417 read(path, explicit_session_id).map(|result| {
418 result.and_then(|bytes| {
419 String::from_utf8(bytes).map_err(|err| {
420 std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string())
421 })
422 })
423 })
424}
425
426pub(crate) fn read_dir(
427 path: &Path,
428 explicit_session_id: Option<&str>,
429) -> Option<std::io::Result<Vec<OverlayDirEntry>>> {
430 let session_id = active_session_id(explicit_session_id)?;
431 let mut guard = lock_sessions();
432 let state = state_for_locked(&mut guard, &session_id, None).ok()?;
433 let result = if state.mode == FsMode::Staged {
434 Some(overlay_read_dir(&state, path))
435 } else {
436 None
437 };
438 guard.insert(session_id, state);
439 result
440}
441
442pub(crate) fn stage_write_or_none(
443 builtin: &'static str,
444 path: &Path,
445 bytes: &[u8],
446 create_parents: bool,
447 overwrite: bool,
448 explicit_session_id: Option<&str>,
449) -> Result<Option<WriteOutcome>, HostlibError> {
450 let Some(session_id) = active_session_id(explicit_session_id) else {
451 return Ok(None);
452 };
453 let mut guard = lock_sessions();
454 let mut state = state_for_locked(&mut guard, &session_id, None)?;
455 if state.mode != FsMode::Staged {
456 guard.insert(session_id, state);
457 return Ok(None);
458 }
459
460 let key = normalize_logical(path);
461 let existed = overlay_exists(&state, &key);
462 if existed && !overwrite {
463 guard.insert(session_id, state);
464 return Err(HostlibError::Backend {
465 builtin,
466 message: format!("`{}` exists and overwrite=false", key.display()),
467 });
468 }
469 if !create_parents && !parent_exists(&state, &key) {
470 guard.insert(session_id, state);
471 return Err(HostlibError::Backend {
472 builtin,
473 message: format!("parent directory for `{}` does not exist", key.display()),
474 });
475 }
476
477 let hash = write_body(&state, bytes).map_err(|err| HostlibError::Backend {
478 builtin,
479 message: err,
480 })?;
481 state.entries.insert(
482 key.clone(),
483 StagedEntry::Write {
484 body_hash: hash,
485 len: bytes.len() as u64,
486 created_at_ms: now_ms(),
487 },
488 );
489 persist_state(&state, "write", Some(&key)).map_err(|err| HostlibError::Backend {
490 builtin,
491 message: err,
492 })?;
493 emit_staged_update(&state);
494 guard.insert(session_id, state);
495 Ok(Some(WriteOutcome {
496 created: !existed,
497 bytes_written: bytes.len(),
498 }))
499}
500
501pub(crate) fn stage_delete_or_none(
502 builtin: &'static str,
503 path: &Path,
504 recursive: bool,
505 explicit_session_id: Option<&str>,
506) -> Result<Option<bool>, HostlibError> {
507 let Some(session_id) = active_session_id(explicit_session_id) else {
508 return Ok(None);
509 };
510 let mut guard = lock_sessions();
511 let mut state = state_for_locked(&mut guard, &session_id, None)?;
512 if state.mode != FsMode::Staged {
513 guard.insert(session_id, state);
514 return Ok(None);
515 }
516
517 let key = normalize_logical(path);
518 let staged_targets = staged_paths_under(&state, &key);
519 let disk_exists = key.exists();
520 if !disk_exists && staged_targets.is_empty() {
521 guard.insert(session_id, state);
522 return Ok(Some(false));
523 }
524
525 if !disk_exists {
526 for staged in staged_targets {
527 state.entries.remove(&staged);
528 }
529 } else {
530 validate_delete_shape(builtin, &key, recursive)?;
531 for staged in staged_targets {
532 state.entries.remove(&staged);
533 }
534 state.entries.insert(
535 key.clone(),
536 StagedEntry::Delete {
537 recursive,
538 created_at_ms: now_ms(),
539 },
540 );
541 }
542 persist_state(&state, "delete", Some(&key)).map_err(|err| HostlibError::Backend {
543 builtin,
544 message: err,
545 })?;
546 emit_staged_update(&state);
547 guard.insert(session_id, state);
548 Ok(Some(true))
549}
550
551#[derive(Clone, Debug)]
555pub struct SafeTextPatchOutcome {
556 pub result: SafeTextPatchResult,
558 pub current_hash: String,
560 pub after_hash: String,
562 pub created: bool,
564 pub bytes_written: usize,
566}
567
568#[derive(Clone, Copy, Debug, Eq, PartialEq)]
570pub enum SafeTextPatchResult {
571 Applied,
574 StaleBase,
577 NoOp,
580}
581
582impl SafeTextPatchResult {
583 fn as_str(self) -> &'static str {
584 match self {
585 Self::Applied => "applied",
586 Self::StaleBase => "stale_base",
587 Self::NoOp => "no_op",
588 }
589 }
590}
591
592fn hash_label(bytes: &[u8]) -> String {
596 format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
597}
598
599pub fn safe_text_patch(
621 path: &Path,
622 content: &str,
623 expected_hash: Option<&str>,
624 session_id: Option<&str>,
625 create_parents: bool,
626 overwrite: bool,
627) -> Result<SafeTextPatchOutcome, HostlibError> {
628 let new_bytes = content.as_bytes();
629 let after_hash = hash_label(new_bytes);
630
631 if let Some(outcome) = safe_text_patch_staged(
632 path,
633 new_bytes,
634 expected_hash,
635 session_id,
636 create_parents,
637 overwrite,
638 &after_hash,
639 )? {
640 return Ok(outcome);
641 }
642
643 safe_text_patch_disk(
644 path,
645 new_bytes,
646 expected_hash,
647 create_parents,
648 overwrite,
649 after_hash,
650 )
651}
652
653#[allow(clippy::too_many_arguments)]
659fn safe_text_patch_staged(
660 path: &Path,
661 new_bytes: &[u8],
662 expected_hash: Option<&str>,
663 session_id: Option<&str>,
664 create_parents: bool,
665 overwrite: bool,
666 after_hash: &str,
667) -> Result<Option<SafeTextPatchOutcome>, HostlibError> {
668 let Some(session) = active_session_id(session_id) else {
669 return Ok(None);
670 };
671 let mut guard = lock_sessions();
672 let mut state = state_for_locked(&mut guard, &session, None)?;
673 if state.mode != FsMode::Staged {
674 guard.insert(session, state);
675 return Ok(None);
676 }
677
678 let key = normalize_logical(path);
679 let (existing_bytes, existed) = match overlay_read(&state, path) {
680 Some(Ok(bytes)) => (bytes, true),
681 Some(Err(err)) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
682 Some(Err(err)) => {
683 guard.insert(session, state);
684 return Err(HostlibError::Backend {
685 builtin: SAFE_TEXT_PATCH_BUILTIN,
686 message: format!("read `{}`: {err}", path.display()),
687 });
688 }
689 None => match stdfs::read(path) {
690 Ok(bytes) => (bytes, true),
691 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
692 Err(err) => {
693 guard.insert(session, state);
694 return Err(HostlibError::Backend {
695 builtin: SAFE_TEXT_PATCH_BUILTIN,
696 message: format!("read `{}`: {err}", path.display()),
697 });
698 }
699 },
700 };
701 let current_hash = hash_label(&existing_bytes);
702
703 if let Some(expected) = expected_hash {
704 if expected != current_hash {
705 guard.insert(session, state);
706 return Ok(Some(SafeTextPatchOutcome {
707 result: SafeTextPatchResult::StaleBase,
708 current_hash,
709 after_hash: after_hash.to_string(),
710 created: false,
711 bytes_written: 0,
712 }));
713 }
714 }
715
716 if existed && existing_bytes == new_bytes {
717 guard.insert(session, state);
718 return Ok(Some(SafeTextPatchOutcome {
719 result: SafeTextPatchResult::NoOp,
720 current_hash,
721 after_hash: after_hash.to_string(),
722 created: false,
723 bytes_written: 0,
724 }));
725 }
726
727 let overlay_existed = overlay_exists(&state, &key);
728 if overlay_existed && !overwrite {
729 guard.insert(session, state);
730 return Err(HostlibError::Backend {
731 builtin: SAFE_TEXT_PATCH_BUILTIN,
732 message: format!("`{}` exists and overwrite=false", key.display()),
733 });
734 }
735 if !create_parents && !parent_exists(&state, &key) {
736 guard.insert(session, state);
737 return Err(HostlibError::Backend {
738 builtin: SAFE_TEXT_PATCH_BUILTIN,
739 message: format!("parent directory for `{}` does not exist", key.display()),
740 });
741 }
742
743 let body_hash = write_body(&state, new_bytes).map_err(|err| HostlibError::Backend {
744 builtin: SAFE_TEXT_PATCH_BUILTIN,
745 message: err,
746 })?;
747 state.entries.insert(
748 key.clone(),
749 StagedEntry::Write {
750 body_hash,
751 len: new_bytes.len() as u64,
752 created_at_ms: now_ms(),
753 },
754 );
755 persist_state(&state, "safe_text_patch", Some(&key)).map_err(|err| HostlibError::Backend {
756 builtin: SAFE_TEXT_PATCH_BUILTIN,
757 message: err,
758 })?;
759 emit_staged_update(&state);
760 guard.insert(session, state);
761
762 Ok(Some(SafeTextPatchOutcome {
763 result: SafeTextPatchResult::Applied,
764 current_hash,
765 after_hash: after_hash.to_string(),
766 created: !existed,
767 bytes_written: new_bytes.len(),
768 }))
769}
770
771fn safe_text_patch_disk(
776 path: &Path,
777 new_bytes: &[u8],
778 expected_hash: Option<&str>,
779 create_parents: bool,
780 overwrite: bool,
781 after_hash: String,
782) -> Result<SafeTextPatchOutcome, HostlibError> {
783 let (existing_bytes, existed) = match stdfs::read(path) {
784 Ok(bytes) => (bytes, true),
785 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
786 Err(err) => {
787 return Err(HostlibError::Backend {
788 builtin: SAFE_TEXT_PATCH_BUILTIN,
789 message: format!("read `{}`: {err}", path.display()),
790 });
791 }
792 };
793 let current_hash = hash_label(&existing_bytes);
794
795 if let Some(expected) = expected_hash {
796 if expected != current_hash {
797 return Ok(SafeTextPatchOutcome {
798 result: SafeTextPatchResult::StaleBase,
799 current_hash,
800 after_hash,
801 created: false,
802 bytes_written: 0,
803 });
804 }
805 }
806
807 if existed && existing_bytes == new_bytes {
808 return Ok(SafeTextPatchOutcome {
809 result: SafeTextPatchResult::NoOp,
810 current_hash,
811 after_hash,
812 created: false,
813 bytes_written: 0,
814 });
815 }
816 if existed && !overwrite {
817 return Err(HostlibError::Backend {
818 builtin: SAFE_TEXT_PATCH_BUILTIN,
819 message: format!("`{}` exists and overwrite=false", path.display()),
820 });
821 }
822 if !create_parents {
823 if let Some(parent) = path.parent() {
824 if !parent.as_os_str().is_empty() && !parent.is_dir() {
825 return Err(HostlibError::Backend {
826 builtin: SAFE_TEXT_PATCH_BUILTIN,
827 message: format!(
828 "parent directory for `{}` does not exist (pass create_parents=true to mkdir)",
829 path.display()
830 ),
831 });
832 }
833 }
834 }
835
836 crate::fs_snapshot::auto_capture_for_write(SAFE_TEXT_PATCH_BUILTIN, path);
837 atomic_write(path, new_bytes).map_err(|err| HostlibError::Backend {
838 builtin: SAFE_TEXT_PATCH_BUILTIN,
839 message: format!("write `{}`: {err}", path.display()),
840 })?;
841
842 Ok(SafeTextPatchOutcome {
843 result: SafeTextPatchResult::Applied,
844 current_hash,
845 after_hash,
846 created: !existed,
847 bytes_written: new_bytes.len(),
848 })
849}
850
851fn read_existing(
856 builtin: &'static str,
857 path: &Path,
858 session_id: Option<&str>,
859) -> Result<(Vec<u8>, bool), HostlibError> {
860 if let Some(result) = read(path, session_id) {
861 return match result {
862 Ok(bytes) => Ok((bytes, true)),
863 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
864 Err(err) => Err(HostlibError::Backend {
865 builtin,
866 message: format!("read `{}`: {err}", path.display()),
867 }),
868 };
869 }
870 match stdfs::read(path) {
871 Ok(bytes) => Ok((bytes, true)),
872 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
873 Err(err) => Err(HostlibError::Backend {
874 builtin,
875 message: format!("read `{}`: {err}", path.display()),
876 }),
877 }
878}
879
880fn read_text_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
881 let raw = dict_arg(READ_TEXT_BUILTIN, args)?;
882 let dict = raw.as_ref();
883 let path_str = require_string(READ_TEXT_BUILTIN, dict, "path")?;
884 let session_id = optional_string(READ_TEXT_BUILTIN, dict, "session_id")?;
885 let path = Path::new(&path_str);
886 enforce_path_scope(READ_TEXT_BUILTIN, path, FsAccess::Read)?;
887
888 let (bytes, existed) = read_existing(READ_TEXT_BUILTIN, path, session_id.as_deref())?;
889 let hash = hash_label(&bytes);
890 let content = match std::str::from_utf8(&bytes) {
891 Ok(s) => s.to_string(),
892 Err(err) => {
893 return Err(HostlibError::Backend {
894 builtin: READ_TEXT_BUILTIN,
895 message: format!("`{path_str}` is not valid UTF-8: {err}"),
896 });
897 }
898 };
899 let bytes_len = bytes.len() as i64;
900 Ok(build_dict([
901 ("path", str_value(&path_str)),
902 ("content", str_value(&content)),
903 ("sha256", str_value(&hash)),
904 ("size", VmValue::Int(bytes_len)),
905 ("exists", VmValue::Bool(existed)),
906 ]))
907}
908
909fn safe_text_patch_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
910 let raw = dict_arg(SAFE_TEXT_PATCH_BUILTIN, args)?;
911 let dict = raw.as_ref();
912
913 let path_str = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "path")?;
914 let content = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "content")?;
915 let expected_hash = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "expected_hash")?;
916 let session_id = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "session_id")?;
917 let create_parents = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "create_parents", true)?;
918 let overwrite = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "overwrite", true)?;
919
920 enforce_path_scope(
921 SAFE_TEXT_PATCH_BUILTIN,
922 Path::new(&path_str),
923 FsAccess::Write,
924 )?;
925 let outcome = safe_text_patch(
926 Path::new(&path_str),
927 &content,
928 expected_hash.as_deref(),
929 session_id.as_deref(),
930 create_parents,
931 overwrite,
932 )?;
933
934 let entries: Vec<(&'static str, VmValue)> = vec![
935 ("path", str_value(&path_str)),
936 ("result", str_value(outcome.result.as_str())),
937 (
938 "applied",
939 VmValue::Bool(outcome.result == SafeTextPatchResult::Applied),
940 ),
941 (
942 "stale_base",
943 VmValue::Bool(outcome.result == SafeTextPatchResult::StaleBase),
944 ),
945 ("current_hash", str_value(&outcome.current_hash)),
946 ("before_sha256", str_value(&outcome.current_hash)),
947 ("after_sha256", str_value(&outcome.after_hash)),
948 ("created", VmValue::Bool(outcome.created)),
949 ("bytes_written", VmValue::Int(outcome.bytes_written as i64)),
950 (
951 "expected_hash",
952 match expected_hash.as_deref() {
953 Some(hash) => str_value(hash),
954 None => VmValue::Nil,
955 },
956 ),
957 ];
958 Ok(build_dict(entries))
959}
960
961fn emit_safe_text_patch_result_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
962 let raw = dict_arg(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, args)?;
963 let dict = raw.as_ref();
964
965 let path = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "path")?;
966 let result = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "result")?;
967 let hunks_count = optional_int(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "hunks_count", 0)?;
968 let bytes_written = optional_int(
969 EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
970 dict,
971 "bytes_written",
972 0,
973 )?;
974 let failed_hunk_index = match dict.get("failed_hunk_index") {
975 None | Some(VmValue::Nil) => None,
976 Some(VmValue::Int(n)) if *n >= 0 => Some(*n as usize),
977 Some(other) => {
978 return Err(HostlibError::InvalidParameter {
979 builtin: EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
980 param: "failed_hunk_index",
981 message: format!("expected non-negative integer, got {}", other.type_name()),
982 });
983 }
984 };
985 let session_id = optional_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "session_id")?
986 .or_else(harn_vm::agent_sessions::current_session_id);
987
988 if let Some(session_id) = session_id.filter(|s| !s.trim().is_empty()) {
989 harn_vm::agent_events::emit_event(&AgentEvent::SafeTextPatchResult {
990 session_id,
991 path,
992 result,
993 hunks_count: hunks_count.max(0) as usize,
994 bytes_written: bytes_written.max(0) as u64,
995 failed_hunk_index,
996 });
997 Ok(VmValue::Bool(true))
998 } else {
999 Ok(VmValue::Bool(false))
1003 }
1004}
1005
1006fn set_mode_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1007 let raw = dict_arg(SET_MODE_BUILTIN, args)?;
1008 let dict = raw.as_ref();
1009 let session_id = require_string(SET_MODE_BUILTIN, dict, "session_id")?;
1010 let mode = FsMode::parse(
1011 SET_MODE_BUILTIN,
1012 &require_string(SET_MODE_BUILTIN, dict, "mode")?,
1013 )?;
1014 let root = optional_string(SET_MODE_BUILTIN, dict, "root")?.map(PathBuf::from);
1015 let result = set_mode(&session_id, mode, root.as_deref())?;
1016 Ok(build_dict([(
1017 "previous_mode",
1018 str_value(result.previous_mode.as_str()),
1019 )]))
1020}
1021
1022fn staged_status_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1023 let raw = dict_arg(STATUS_BUILTIN, args)?;
1024 let session_id = require_string(STATUS_BUILTIN, raw.as_ref(), "session_id")?;
1025 Ok(status_to_value(staged_status(&session_id)?))
1026}
1027
1028fn commit_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1029 let raw = dict_arg(COMMIT_BUILTIN, args)?;
1030 let dict = raw.as_ref();
1031 let session_id = require_string(COMMIT_BUILTIN, dict, "session_id")?;
1032 let paths = optional_string_list(COMMIT_BUILTIN, dict, "paths")?;
1033 Ok(commit_result_to_value(commit_staged(&session_id, &paths)?))
1034}
1035
1036fn discard_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1037 let raw = dict_arg(DISCARD_BUILTIN, args)?;
1038 let dict = raw.as_ref();
1039 let session_id = require_string(DISCARD_BUILTIN, dict, "session_id")?;
1040 let paths = optional_string_list(DISCARD_BUILTIN, dict, "paths")?;
1041 Ok(discard_result_to_value(discard_staged(
1042 &session_id,
1043 &paths,
1044 )?))
1045}
1046
1047fn state_for_locked(
1048 guard: &mut BTreeMap<String, SessionState>,
1049 session_id: &str,
1050 root: Option<PathBuf>,
1051) -> Result<SessionState, HostlibError> {
1052 if let Some(existing) = guard.get(session_id) {
1053 let mut state = existing.clone();
1054 if let Some(root) = root {
1055 if state.entries.is_empty() {
1056 state.root = root;
1057 }
1058 }
1059 return Ok(state);
1060 }
1061 let state = load_state(session_id, root).map_err(|err| HostlibError::Backend {
1062 builtin: SET_MODE_BUILTIN,
1063 message: err,
1064 })?;
1065 Ok(state)
1066}
1067
1068fn load_state(session_id: &str, root: Option<PathBuf>) -> Result<SessionState, String> {
1069 let root = root.unwrap_or_else(default_root);
1070 let manifest_path = manifest_path(&root, session_id);
1071 if manifest_path.exists() {
1072 let text = stdfs::read_to_string(&manifest_path)
1073 .map_err(|err| format!("read {}: {err}", manifest_path.display()))?;
1074 let manifest: Manifest = serde_json::from_str(&text)
1075 .map_err(|err| format!("parse {}: {err}", manifest_path.display()))?;
1076 if manifest.version != MANIFEST_VERSION {
1077 return Err(format!(
1078 "unsupported staged fs manifest version {} in {}",
1079 manifest.version,
1080 manifest_path.display()
1081 ));
1082 }
1083 if manifest.session_id != session_id {
1084 return Err(format!(
1085 "staged fs manifest session id mismatch in {}",
1086 manifest_path.display()
1087 ));
1088 }
1089 return Ok(SessionState {
1090 session_id: manifest.session_id,
1091 mode: manifest.mode,
1092 root: normalize_logical(Path::new(&manifest.root)),
1093 entries: manifest
1094 .entries
1095 .into_iter()
1096 .map(|(path, entry)| (normalize_logical(Path::new(&path)), entry))
1097 .collect(),
1098 });
1099 }
1100 Ok(SessionState {
1101 session_id: session_id.to_string(),
1102 mode: FsMode::Immediate,
1103 root,
1104 entries: BTreeMap::new(),
1105 })
1106}
1107
1108fn persist_state(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1109 let dir = session_dir(&state.root, &state.session_id);
1110 stdfs::create_dir_all(dir.join("bodies"))
1111 .map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1112 let manifest = Manifest {
1113 version: MANIFEST_VERSION,
1114 session_id: state.session_id.clone(),
1115 mode: state.mode,
1116 root: state.root.to_string_lossy().into_owned(),
1117 entries: state
1118 .entries
1119 .iter()
1120 .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
1121 .collect(),
1122 };
1123 let bytes = serde_json::to_vec_pretty(&manifest)
1124 .map_err(|err| format!("serialize staged manifest: {err}"))?;
1125 atomic_write(&manifest_path(&state.root, &state.session_id), &bytes)?;
1126 append_journal(state, op, path)?;
1127 prune_unreferenced_bodies(state);
1128 Ok(())
1129}
1130
1131fn append_journal(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1132 let dir = session_dir(&state.root, &state.session_id);
1133 stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1134 let line = serde_json::to_string(&serde_json::json!({
1135 "ts_ms": now_ms(),
1136 "op": op,
1137 "path": path.map(|path| path.to_string_lossy().into_owned()),
1138 "pending_count": state.entries.len(),
1139 }))
1140 .map_err(|err| format!("serialize staged journal: {err}"))?;
1141 let mut file = stdfs::OpenOptions::new()
1142 .create(true)
1143 .append(true)
1144 .open(dir.join("journal.jsonl"))
1145 .map_err(|err| format!("open staged journal: {err}"))?;
1146 writeln!(file, "{line}").map_err(|err| format!("write staged journal: {err}"))
1147}
1148
1149fn write_body(state: &SessionState, bytes: &[u8]) -> Result<String, String> {
1150 let hash = hex::encode(Sha256::digest(bytes));
1151 let path = session_dir(&state.root, &state.session_id)
1152 .join("bodies")
1153 .join(&hash);
1154 if !path.exists() {
1155 atomic_write(&path, bytes)?;
1156 }
1157 Ok(hash)
1158}
1159
1160fn read_body(state: &SessionState, hash: &str) -> std::io::Result<Vec<u8>> {
1161 stdfs::read(
1162 session_dir(&state.root, &state.session_id)
1163 .join("bodies")
1164 .join(hash),
1165 )
1166}
1167
1168fn prune_unreferenced_bodies(state: &SessionState) {
1169 let live: BTreeSet<String> = state
1170 .entries
1171 .values()
1172 .filter_map(|entry| match entry {
1173 StagedEntry::Write { body_hash, .. } => Some(body_hash.clone()),
1174 StagedEntry::Delete { .. } => None,
1175 })
1176 .collect();
1177 let body_dir = session_dir(&state.root, &state.session_id).join("bodies");
1178 let Ok(entries) = stdfs::read_dir(&body_dir) else {
1179 return;
1180 };
1181 for entry in entries.flatten() {
1182 let name = entry.file_name().to_string_lossy().into_owned();
1183 if !live.contains(&name) {
1184 let _ = stdfs::remove_file(entry.path());
1185 }
1186 }
1187}
1188
1189fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
1190 if let Some(parent) = path.parent() {
1191 stdfs::create_dir_all(parent)
1192 .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
1193 }
1194 let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
1195 stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
1196 match stdfs::rename(&tmp, path) {
1197 Ok(()) => Ok(()),
1198 Err(err) => {
1199 let _ = stdfs::remove_file(path);
1200 stdfs::rename(&tmp, path).map_err(|retry| {
1201 format!(
1202 "rename {} to {}: {err}; retry: {retry}",
1203 tmp.display(),
1204 path.display()
1205 )
1206 })
1207 }
1208 }
1209}
1210
1211fn commit_entry(state: &SessionState, path: &Path, entry: &StagedEntry) -> Result<(), String> {
1212 match entry {
1213 StagedEntry::Write { body_hash, .. } => {
1214 let bytes = read_body(state, body_hash)
1215 .map_err(|err| format!("read staged body for {}: {err}", path.display()))?;
1216 atomic_write(path, &bytes)
1217 }
1218 StagedEntry::Delete { recursive, .. } => match stdfs::symlink_metadata(path) {
1219 Ok(metadata) if metadata.is_dir() => {
1220 if *recursive {
1221 stdfs::remove_dir_all(path)
1222 .map_err(|err| format!("remove_dir_all {}: {err}", path.display()))
1223 } else {
1224 stdfs::remove_dir(path)
1225 .map_err(|err| format!("remove_dir {}: {err}", path.display()))
1226 }
1227 }
1228 Ok(_) => stdfs::remove_file(path)
1229 .map_err(|err| format!("remove_file {}: {err}", path.display())),
1230 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1231 Err(err) => Err(format!("stat {}: {err}", path.display())),
1232 },
1233 }
1234}
1235
1236fn overlay_read(state: &SessionState, path: &Path) -> Option<std::io::Result<Vec<u8>>> {
1237 let key = normalize_logical(path);
1238 if let Some(entry) = state.entries.get(&key) {
1239 return Some(match entry {
1240 StagedEntry::Write { body_hash, .. } => read_body(state, body_hash),
1241 StagedEntry::Delete { .. } => Err(not_found(&key)),
1242 });
1243 }
1244 if deleted_ancestor(state, &key) {
1245 return Some(Err(not_found(&key)));
1246 }
1247 None
1248}
1249
1250fn overlay_read_dir(state: &SessionState, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
1251 let dir_key = normalize_logical(path);
1252 if matches!(state.entries.get(&dir_key), Some(StagedEntry::Write { .. }))
1253 || deleted_ancestor(state, &dir_key)
1254 || matches!(
1255 state.entries.get(&dir_key),
1256 Some(StagedEntry::Delete { .. })
1257 )
1258 {
1259 return Err(not_found(&dir_key));
1260 }
1261 if !path.exists() && !has_staged_descendant(state, &dir_key) {
1262 return Err(not_found(&dir_key));
1263 }
1264
1265 let mut entries: BTreeMap<String, OverlayDirEntry> = BTreeMap::new();
1266 if path.exists() {
1267 for entry in stdfs::read_dir(path)? {
1268 let entry = entry?;
1269 let name = entry.file_name().to_string_lossy().into_owned();
1270 let file_type = entry.file_type().ok();
1271 let metadata = entry.metadata().ok();
1272 entries.insert(
1273 name.clone(),
1274 OverlayDirEntry {
1275 name,
1276 is_dir: file_type.is_some_and(|ty| ty.is_dir()),
1277 is_symlink: file_type.is_some_and(|ty| ty.is_symlink()),
1278 size: metadata.map(|m| m.len()).unwrap_or(0),
1279 },
1280 );
1281 }
1282 }
1283
1284 for (path, entry) in &state.entries {
1285 let Some(name) = overlay_child_name(path, &dir_key) else {
1286 continue;
1287 };
1288 match entry {
1289 StagedEntry::Write { len, .. } => {
1290 let is_dir = path.parent() != Some(dir_key.as_path());
1291 entries.insert(
1292 name.clone(),
1293 OverlayDirEntry {
1294 name,
1295 is_dir,
1296 is_symlink: false,
1297 size: if is_dir { 0 } else { *len },
1298 },
1299 );
1300 }
1301 StagedEntry::Delete { .. } => {
1302 if path.parent() == Some(dir_key.as_path()) {
1303 entries.remove(&name);
1304 }
1305 }
1306 }
1307 }
1308
1309 Ok(entries.into_values().collect())
1310}
1311
1312fn overlay_child_name(path: &Path, dir: &Path) -> Option<String> {
1313 let suffix = path.strip_prefix(dir).ok()?;
1314 let mut components = suffix.components();
1315 let first = components.next()?;
1316 match first {
1317 Component::Normal(name) => Some(name.to_string_lossy().into_owned()),
1318 _ => None,
1319 }
1320}
1321
1322fn overlay_exists(state: &SessionState, path: &Path) -> bool {
1323 if let Some(entry) = state.entries.get(path) {
1324 return matches!(entry, StagedEntry::Write { .. });
1325 }
1326 if deleted_ancestor(state, path) {
1327 return false;
1328 }
1329 if has_staged_descendant(state, path) {
1330 return true;
1331 }
1332 path.exists()
1333}
1334
1335fn parent_exists(state: &SessionState, path: &Path) -> bool {
1336 let Some(parent) = path.parent() else {
1337 return true;
1338 };
1339 if parent.as_os_str().is_empty() {
1340 return true;
1341 }
1342 if let Some(entry) = state.entries.get(parent) {
1343 return !matches!(entry, StagedEntry::Delete { .. });
1344 }
1345 if deleted_ancestor(state, parent) {
1346 return false;
1347 }
1348 if has_staged_descendant(state, parent) {
1349 return true;
1350 }
1351 parent.is_dir()
1352}
1353
1354fn deleted_ancestor(state: &SessionState, path: &Path) -> bool {
1355 state.entries.iter().any(|(candidate, entry)| {
1356 matches!(entry, StagedEntry::Delete { .. })
1357 && path != candidate.as_path()
1358 && path.starts_with(candidate)
1359 })
1360}
1361
1362fn has_staged_descendant(state: &SessionState, path: &Path) -> bool {
1363 state.entries.iter().any(|(candidate, entry)| {
1364 matches!(entry, StagedEntry::Write { .. })
1365 && candidate != path
1366 && candidate.starts_with(path)
1367 })
1368}
1369
1370fn staged_paths_under(state: &SessionState, path: &Path) -> Vec<PathBuf> {
1371 state
1372 .entries
1373 .keys()
1374 .filter(|candidate| *candidate == path || candidate.starts_with(path))
1375 .cloned()
1376 .collect()
1377}
1378
1379fn validate_delete_shape(
1380 builtin: &'static str,
1381 path: &Path,
1382 recursive: bool,
1383) -> Result<(), HostlibError> {
1384 let Ok(metadata) = stdfs::symlink_metadata(path) else {
1385 return Ok(());
1386 };
1387 if metadata.is_dir() && !recursive {
1388 let mut entries = stdfs::read_dir(path).map_err(|err| HostlibError::Backend {
1389 builtin,
1390 message: format!("read_dir `{}`: {err}", path.display()),
1391 })?;
1392 if entries.next().is_some() {
1393 return Err(HostlibError::Backend {
1394 builtin,
1395 message: format!(
1396 "remove_dir `{}` (pass recursive=true to delete non-empty dirs): directory not empty",
1397 path.display()
1398 ),
1399 });
1400 }
1401 }
1402 Ok(())
1403}
1404
1405fn status_from_state(state: &SessionState) -> StagedStatus {
1406 let now = now_ms();
1407 let mut pending_writes = Vec::new();
1408 let mut total_bytes_pending = 0u64;
1409 let mut oldest = None;
1410 for (path, entry) in &state.entries {
1411 total_bytes_pending = total_bytes_pending.saturating_add(entry.body_len());
1412 oldest = Some(oldest.map_or(entry.created_at_ms(), |old: i64| {
1413 old.min(entry.created_at_ms())
1414 }));
1415 let (kind, bytes_added, bytes_removed) = match entry {
1416 StagedEntry::Write { len, .. } => ("write", *len, disk_size(path).unwrap_or(0)),
1417 StagedEntry::Delete { .. } => ("delete", 0, disk_size(path).unwrap_or(0)),
1418 };
1419 pending_writes.push(PendingWrite {
1420 path: path.to_string_lossy().into_owned(),
1421 kind,
1422 bytes_added,
1423 bytes_removed,
1424 });
1425 }
1426 StagedStatus {
1427 pending_writes,
1428 total_bytes_pending,
1429 oldest_pending_age_ms: oldest.map(|old| now.saturating_sub(old)).unwrap_or(0),
1430 }
1431}
1432
1433fn disk_size(path: &Path) -> Option<u64> {
1434 let metadata = stdfs::symlink_metadata(path).ok()?;
1435 if metadata.is_file() {
1436 return Some(metadata.len());
1437 }
1438 if metadata.is_dir() {
1439 let mut total = 0u64;
1440 for entry in walkdir::WalkDir::new(path)
1441 .into_iter()
1442 .filter_map(Result::ok)
1443 {
1444 if let Ok(metadata) = entry.metadata() {
1445 if metadata.is_file() {
1446 total = total.saturating_add(metadata.len());
1447 }
1448 }
1449 }
1450 return Some(total);
1451 }
1452 Some(metadata.len())
1453}
1454
1455fn selected_paths(state: &SessionState, paths: &[String]) -> Vec<PathBuf> {
1456 if paths.is_empty() {
1457 return state.entries.keys().cloned().collect();
1458 }
1459 let selected: BTreeSet<PathBuf> = paths
1460 .iter()
1461 .map(|path| normalize_logical(Path::new(path)))
1462 .collect();
1463 state
1464 .entries
1465 .keys()
1466 .filter(|path| selected.contains(*path))
1467 .cloned()
1468 .collect()
1469}
1470
1471fn active_session_id(explicit: Option<&str>) -> Option<String> {
1472 explicit
1473 .map(str::to_string)
1474 .or_else(harn_vm::agent_sessions::current_session_id)
1475 .filter(|id| !id.trim().is_empty())
1476}
1477
1478fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
1479 if session_id.trim().is_empty() {
1480 return Err(HostlibError::InvalidParameter {
1481 builtin,
1482 param: "session_id",
1483 message: "must not be empty".to_string(),
1484 });
1485 }
1486 Ok(())
1487}
1488
1489fn default_root() -> PathBuf {
1490 std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1491}
1492
1493fn session_dir(root: &Path, session_id: &str) -> PathBuf {
1494 let mut dir = root.to_path_buf();
1495 for component in STATE_REL {
1496 dir.push(component);
1497 }
1498 dir.push(sanitize_component(session_id));
1499 dir
1500}
1501
1502fn manifest_path(root: &Path, session_id: &str) -> PathBuf {
1503 session_dir(root, session_id).join("manifest.json")
1504}
1505
1506fn sanitize_component(input: &str) -> String {
1507 let sanitized: String = input
1508 .chars()
1509 .map(|ch| match ch {
1510 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
1511 _ => '_',
1512 })
1513 .collect();
1514 let is_dotted = sanitized.is_empty() || sanitized.bytes().all(|b| b == b'.');
1520 if sanitized == input && !is_dotted {
1521 sanitized
1522 } else {
1523 let hash = hex::encode(Sha256::digest(input.as_bytes()));
1524 format!("{sanitized}-{}", &hash[..12])
1525 }
1526}
1527
1528fn normalize_logical(path: &Path) -> PathBuf {
1529 let absolute = if path.is_absolute() {
1530 path.to_path_buf()
1531 } else {
1532 default_root().join(path)
1533 };
1534 let mut out = PathBuf::new();
1535 for component in absolute.components() {
1536 match component {
1537 Component::ParentDir => {
1538 out.pop();
1539 }
1540 Component::CurDir => {}
1541 other => out.push(other),
1542 }
1543 }
1544 out
1545}
1546
1547fn not_found(path: &Path) -> std::io::Error {
1548 std::io::Error::new(
1549 std::io::ErrorKind::NotFound,
1550 format!("staged fs: {} is deleted or absent", path.display()),
1551 )
1552}
1553
1554fn now_ms() -> i64 {
1555 std::time::SystemTime::now()
1556 .duration_since(std::time::UNIX_EPOCH)
1557 .map(|duration| duration.as_millis() as i64)
1558 .unwrap_or(0)
1559}
1560
1561fn emit_staged_update(state: &SessionState) {
1562 let status = status_from_state(state);
1563 harn_vm::agent_events::emit_event(&AgentEvent::StagedWritesPending {
1564 session_id: state.session_id.clone(),
1565 pending_count: status.pending_writes.len(),
1566 total_bytes: status.total_bytes_pending,
1567 });
1568}
1569
1570fn pending_write_to_value(write: PendingWrite) -> VmValue {
1571 build_dict([
1572 ("path", str_value(&write.path)),
1573 ("kind", str_value(write.kind)),
1574 ("bytes_added", VmValue::Int(write.bytes_added as i64)),
1575 ("bytes_removed", VmValue::Int(write.bytes_removed as i64)),
1576 ])
1577}
1578
1579fn status_to_value(status: StagedStatus) -> VmValue {
1580 build_dict([
1581 (
1582 "pending_writes",
1583 VmValue::List(Arc::new(
1584 status
1585 .pending_writes
1586 .into_iter()
1587 .map(pending_write_to_value)
1588 .collect(),
1589 )),
1590 ),
1591 (
1592 "total_bytes_pending",
1593 VmValue::Int(status.total_bytes_pending as i64),
1594 ),
1595 (
1596 "oldest_pending_age_ms",
1597 VmValue::Int(status.oldest_pending_age_ms),
1598 ),
1599 ])
1600}
1601
1602fn commit_result_to_value(result: CommitResult) -> VmValue {
1603 build_dict([
1604 (
1605 "committed_paths",
1606 VmValue::List(Arc::new(
1607 result
1608 .committed_paths
1609 .into_iter()
1610 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1611 .collect(),
1612 )),
1613 ),
1614 (
1615 "failed_paths_with_reasons",
1616 VmValue::List(Arc::new(
1617 result
1618 .failed_paths_with_reasons
1619 .into_iter()
1620 .map(|(path, reason)| {
1621 build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
1622 })
1623 .collect(),
1624 )),
1625 ),
1626 ])
1627}
1628
1629fn discard_result_to_value(result: DiscardResult) -> VmValue {
1630 build_dict([(
1631 "discarded_paths",
1632 VmValue::List(Arc::new(
1633 result
1634 .discarded_paths
1635 .into_iter()
1636 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1637 .collect(),
1638 )),
1639 )])
1640}
1641
1642#[cfg(test)]
1643mod sanitize_tests {
1644 use super::{sanitize_component, session_dir, STATE_REL};
1645 use std::path::{Component, Path};
1646
1647 #[test]
1648 fn dotted_session_ids_are_never_traversal_tokens() {
1649 for evil in ["..", ".", "...", ""] {
1652 let safe = sanitize_component(evil);
1653 assert_ne!(safe, evil, "`{evil}` passed through unsanitized");
1654 assert!(
1655 !safe.bytes().all(|b| b == b'.'),
1656 "`{evil}` -> `{safe}` is still all dots"
1657 );
1658 let comps: Vec<_> = Path::new(&safe).components().collect();
1660 assert!(
1661 comps.iter().all(|c| matches!(c, Component::Normal(_))),
1662 "`{safe}` contains a traversal component"
1663 );
1664 }
1665 }
1666
1667 #[test]
1668 fn ordinary_session_ids_pass_through() {
1669 assert_eq!(sanitize_component("abc-123_v2.0"), "abc-123_v2.0");
1670 }
1671
1672 #[test]
1673 fn session_dir_stays_under_staged_root() {
1674 let dir = session_dir(Path::new("/workspace"), "..");
1675 assert!(
1677 !dir.components().any(|c| matches!(c, Component::ParentDir)),
1678 "session_dir({dir:?}) escapes via `..`"
1679 );
1680 let mut staged = std::path::PathBuf::from("/workspace");
1681 staged.extend(STATE_REL);
1682 assert!(dir.starts_with(&staged), "{dir:?} not under {staged:?}");
1683 }
1684}