1use std::collections::{BTreeMap, BTreeSet};
10use std::fs as stdfs;
11use std::io::Write;
12use std::path::{Component, Path, PathBuf};
13use std::sync::Arc;
14use std::sync::{Mutex, OnceLock};
15
16use harn_vm::agent_events::AgentEvent;
17use harn_vm::process_sandbox::{check_fs_path_scope, FsAccess};
18use harn_vm::VmValue;
19use serde::{Deserialize, Serialize};
20use sha2::{Digest, Sha256};
21
22use crate::error::HostlibError;
23use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
24use crate::tools::args::{
25 build_dict, dict_arg, optional_bool, optional_int, optional_string, optional_string_list,
26 require_string, str_value,
27};
28use crate::tools::permissions::enforce_path_scope;
29
30const SET_MODE_BUILTIN: &str = "hostlib_fs_set_mode";
31const STATUS_BUILTIN: &str = "hostlib_fs_staged_status";
32const COMMIT_BUILTIN: &str = "hostlib_fs_commit_staged";
33const DISCARD_BUILTIN: &str = "hostlib_fs_discard_staged";
34const SAFE_TEXT_PATCH_BUILTIN: &str = "hostlib_fs_safe_text_patch";
35const READ_TEXT_BUILTIN: &str = "hostlib_fs_read_text";
36const EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN: &str = "hostlib_fs_emit_safe_text_patch_result";
37
38const MANIFEST_VERSION: u32 = 1;
39const STATE_REL: &[&str] = &[".harn", "state", "staged"];
40
41#[derive(Default)]
43pub struct FsCapability;
44
45impl HostlibCapability for FsCapability {
46 fn module_name(&self) -> &'static str {
47 "fs"
48 }
49
50 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
51 register(registry, SET_MODE_BUILTIN, "set_mode", set_mode_builtin);
52 register(
53 registry,
54 STATUS_BUILTIN,
55 "staged_status",
56 staged_status_builtin,
57 );
58 register(
59 registry,
60 COMMIT_BUILTIN,
61 "commit_staged",
62 commit_staged_builtin,
63 );
64 register(
65 registry,
66 DISCARD_BUILTIN,
67 "discard_staged",
68 discard_staged_builtin,
69 );
70 register_gated(
73 registry,
74 SAFE_TEXT_PATCH_BUILTIN,
75 "safe_text_patch",
76 safe_text_patch_builtin,
77 );
78 register_gated(registry, READ_TEXT_BUILTIN, "read_text", read_text_builtin);
79 register(
80 registry,
81 EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
82 "emit_safe_text_patch_result",
83 emit_safe_text_patch_result_builtin,
84 );
85 }
86}
87
88fn register(
89 registry: &mut BuiltinRegistry,
90 name: &'static str,
91 method: &'static str,
92 runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
93) {
94 let handler: SyncHandler = std::sync::Arc::new(runner);
95 registry.register(RegisteredBuiltin {
96 name,
97 module: "fs",
98 method,
99 handler,
100 });
101}
102
103fn register_gated(
104 registry: &mut BuiltinRegistry,
105 name: &'static str,
106 method: &'static str,
107 runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
108) {
109 registry.register(RegisteredBuiltin {
110 name,
111 module: "fs",
112 method,
113 handler: crate::tools::permissions::gated_handler(name, runner),
114 });
115}
116
117#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
119#[serde(rename_all = "lowercase")]
120pub enum FsMode {
121 Immediate,
123 Staged,
125}
126
127impl FsMode {
128 fn parse(builtin: &'static str, raw: &str) -> Result<Self, HostlibError> {
129 match raw {
130 "immediate" => Ok(Self::Immediate),
131 "staged" => Ok(Self::Staged),
132 other => Err(HostlibError::InvalidParameter {
133 builtin,
134 param: "mode",
135 message: format!("expected \"immediate\" or \"staged\", got `{other}`"),
136 }),
137 }
138 }
139
140 pub fn as_str(self) -> &'static str {
142 match self {
143 Self::Immediate => "immediate",
144 Self::Staged => "staged",
145 }
146 }
147}
148
149#[derive(Clone, Debug, Serialize, Deserialize)]
150struct Manifest {
151 version: u32,
152 session_id: String,
153 mode: FsMode,
154 root: String,
155 entries: BTreeMap<String, StagedEntry>,
156}
157
158#[derive(Clone, Debug, Serialize, Deserialize)]
159#[serde(tag = "kind", rename_all = "snake_case")]
160enum StagedEntry {
161 Write {
162 body_hash: String,
163 len: u64,
164 created_at_ms: i64,
165 },
166 Delete {
167 recursive: bool,
168 created_at_ms: i64,
169 },
170}
171
172impl StagedEntry {
173 fn created_at_ms(&self) -> i64 {
174 match self {
175 Self::Write { created_at_ms, .. } | Self::Delete { created_at_ms, .. } => {
176 *created_at_ms
177 }
178 }
179 }
180
181 fn body_len(&self) -> u64 {
182 match self {
183 Self::Write { len, .. } => *len,
184 Self::Delete { .. } => 0,
185 }
186 }
187}
188
189#[derive(Clone, Debug)]
190struct SessionState {
191 session_id: String,
192 mode: FsMode,
193 root: PathBuf,
194 entries: BTreeMap<PathBuf, StagedEntry>,
195}
196
197#[derive(Clone, Debug)]
198pub(crate) struct WriteOutcome {
199 pub(crate) created: bool,
200 pub(crate) bytes_written: usize,
201}
202
203#[derive(Clone, Debug)]
204pub(crate) struct OverlayDirEntry {
205 pub(crate) name: String,
206 pub(crate) is_dir: bool,
207 pub(crate) is_symlink: bool,
208 pub(crate) size: u64,
209}
210
211#[derive(Clone, Debug)]
213pub struct StagedStatus {
214 pub pending_writes: Vec<PendingWrite>,
216 pub total_bytes_pending: u64,
218 pub oldest_pending_age_ms: i64,
220}
221
222#[derive(Clone, Debug)]
223pub struct PendingWrite {
225 pub path: String,
227 pub kind: &'static str,
229 pub bytes_added: u64,
231 pub bytes_removed: u64,
233}
234
235#[derive(Clone, Debug)]
237pub struct SetModeResult {
238 pub previous_mode: FsMode,
240}
241
242#[derive(Clone, Debug)]
244pub struct CommitResult {
245 pub committed_paths: Vec<String>,
247 pub failed_paths_with_reasons: Vec<(String, String)>,
249}
250
251#[derive(Clone, Debug)]
253pub struct DiscardResult {
254 pub discarded_paths: Vec<String>,
256}
257
258static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionState>>> = OnceLock::new();
259
260fn sessions() -> &'static Mutex<BTreeMap<String, SessionState>> {
261 SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
262}
263
264fn lock_sessions() -> std::sync::MutexGuard<'static, BTreeMap<String, SessionState>> {
268 sessions()
269 .lock()
270 .expect("hostlib fs session mutex poisoned")
271}
272
273pub fn configure_session_root(session_id: &str, root: &Path) {
278 if session_id.trim().is_empty() {
279 return;
280 }
281 let root = normalize_logical(root);
282 let mut guard = lock_sessions();
283 match guard.get_mut(session_id) {
284 Some(state) if state.entries.is_empty() => {
285 state.root = root;
286 }
287 Some(_) => {}
288 None => {
289 let state = load_state(session_id, Some(root.clone())).unwrap_or(SessionState {
290 session_id: session_id.to_string(),
291 mode: FsMode::Immediate,
292 root,
293 entries: BTreeMap::new(),
294 });
295 guard.insert(session_id.to_string(), state);
296 }
297 }
298}
299
300pub fn configured_session_root(session_id: &str) -> Option<PathBuf> {
302 if session_id.trim().is_empty() {
303 return None;
304 }
305 let guard = lock_sessions();
306 guard.get(session_id).map(|state| state.root.clone())
307}
308
309pub fn set_mode(
311 session_id: &str,
312 mode: FsMode,
313 root: Option<&Path>,
314) -> Result<SetModeResult, HostlibError> {
315 validate_session_id(SET_MODE_BUILTIN, session_id)?;
316 let mut guard = lock_sessions();
317 let mut state = state_for_locked(&mut guard, session_id, root.map(normalize_logical))?;
318 let previous_mode = state.mode;
319 state.mode = mode;
320 persist_state(&state, "set_mode", None).map_err(|err| HostlibError::Backend {
321 builtin: SET_MODE_BUILTIN,
322 message: err,
323 })?;
324 guard.insert(session_id.to_string(), state);
325 Ok(SetModeResult { previous_mode })
326}
327
328pub fn staged_status(session_id: &str) -> Result<StagedStatus, HostlibError> {
330 validate_session_id(STATUS_BUILTIN, session_id)?;
331 let mut guard = lock_sessions();
332 let state = state_for_locked(&mut guard, session_id, None)?;
333 let status = status_from_state(&state);
334 guard.insert(session_id.to_string(), state);
335 Ok(status)
336}
337
338pub fn commit_staged(session_id: &str, paths: &[String]) -> Result<CommitResult, HostlibError> {
340 validate_session_id(COMMIT_BUILTIN, session_id)?;
341 let mut guard = lock_sessions();
342 let mut state = state_for_locked(&mut guard, session_id, None)?;
343 let selected = selected_paths(&state, paths);
344 let mut committed_paths = Vec::new();
345 let mut failed_paths_with_reasons = Vec::new();
346
347 for path in selected {
348 let Some(entry) = state.entries.get(&path).cloned() else {
349 continue;
350 };
351 let path_label = path.to_string_lossy().into_owned();
352 let access = match entry {
358 StagedEntry::Write { .. } => FsAccess::Write,
359 StagedEntry::Delete { .. } => FsAccess::Delete,
360 };
361 if let Err(violation) = check_fs_path_scope(&path, access) {
362 failed_paths_with_reasons.push((path_label, violation.message(COMMIT_BUILTIN)));
363 continue;
364 }
365 match commit_entry(&state, &path, &entry) {
366 Ok(()) => {
367 state.entries.remove(&path);
368 committed_paths.push(path_label);
369 }
370 Err(reason) => failed_paths_with_reasons.push((path_label, reason)),
371 }
372 }
373
374 persist_state(&state, "commit_staged", None).map_err(|err| HostlibError::Backend {
375 builtin: COMMIT_BUILTIN,
376 message: err,
377 })?;
378 emit_staged_update(&state);
379 guard.insert(session_id.to_string(), state);
380 Ok(CommitResult {
381 committed_paths,
382 failed_paths_with_reasons,
383 })
384}
385
386pub fn discard_staged(session_id: &str, paths: &[String]) -> Result<DiscardResult, HostlibError> {
388 validate_session_id(DISCARD_BUILTIN, session_id)?;
389 let mut guard = lock_sessions();
390 let mut state = state_for_locked(&mut guard, session_id, None)?;
391 let selected = selected_paths(&state, paths);
392 let mut discarded_paths = Vec::new();
393 for path in selected {
394 if state.entries.remove(&path).is_some() {
395 discarded_paths.push(path.to_string_lossy().into_owned());
396 }
397 }
398 persist_state(&state, "discard_staged", None).map_err(|err| HostlibError::Backend {
399 builtin: DISCARD_BUILTIN,
400 message: err,
401 })?;
402 emit_staged_update(&state);
403 guard.insert(session_id.to_string(), state);
404 Ok(DiscardResult { discarded_paths })
405}
406
407pub fn remove_session_state(session_id: &str, root: Option<&Path>) -> Result<(), HostlibError> {
414 validate_session_id(DISCARD_BUILTIN, session_id)?;
415 let mut guard = lock_sessions();
416 let state = match guard.remove(session_id) {
417 Some(state) => state,
418 None => load_state(session_id, root.map(normalize_logical)).map_err(|err| {
419 HostlibError::Backend {
420 builtin: DISCARD_BUILTIN,
421 message: err,
422 }
423 })?,
424 };
425 let dir = session_dir(&state.root, &state.session_id);
426 match stdfs::remove_dir_all(&dir) {
427 Ok(()) => Ok(()),
428 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
429 Err(err) => Err(HostlibError::Backend {
430 builtin: DISCARD_BUILTIN,
431 message: format!("remove staged session {}: {err}", dir.display()),
432 }),
433 }
434}
435
436pub(crate) fn read(
437 path: &Path,
438 explicit_session_id: Option<&str>,
439) -> Option<std::io::Result<Vec<u8>>> {
440 let session_id = active_session_id(explicit_session_id)?;
441 let mut guard = lock_sessions();
442 let state = state_for_locked(&mut guard, &session_id, None).ok()?;
443 let result = if state.mode == FsMode::Staged {
444 overlay_read(&state, path)
445 } else {
446 None
447 };
448 guard.insert(session_id, state);
449 result
450}
451
452pub(crate) fn read_to_string(
453 path: &Path,
454 explicit_session_id: Option<&str>,
455) -> Option<std::io::Result<String>> {
456 read(path, explicit_session_id).map(|result| {
457 result.and_then(|bytes| {
458 String::from_utf8(bytes).map_err(|err| {
459 std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string())
460 })
461 })
462 })
463}
464
465pub(crate) fn read_dir(
466 path: &Path,
467 explicit_session_id: Option<&str>,
468) -> Option<std::io::Result<Vec<OverlayDirEntry>>> {
469 let session_id = active_session_id(explicit_session_id)?;
470 let mut guard = lock_sessions();
471 let state = state_for_locked(&mut guard, &session_id, None).ok()?;
472 let result = if state.mode == FsMode::Staged {
473 Some(overlay_read_dir(&state, path))
474 } else {
475 None
476 };
477 guard.insert(session_id, state);
478 result
479}
480
481pub(crate) fn stage_write_or_none(
482 builtin: &'static str,
483 path: &Path,
484 bytes: &[u8],
485 create_parents: bool,
486 overwrite: bool,
487 explicit_session_id: Option<&str>,
488) -> Result<Option<WriteOutcome>, HostlibError> {
489 let Some(session_id) = active_session_id(explicit_session_id) else {
490 return Ok(None);
491 };
492 let mut guard = lock_sessions();
493 let mut state = state_for_locked(&mut guard, &session_id, None)?;
494 if state.mode != FsMode::Staged {
495 guard.insert(session_id, state);
496 return Ok(None);
497 }
498
499 let key = normalize_logical(path);
500 let existed = overlay_exists(&state, &key);
501 if existed && !overwrite {
502 guard.insert(session_id, state);
503 return Err(HostlibError::Backend {
504 builtin,
505 message: format!("`{}` exists and overwrite=false", key.display()),
506 });
507 }
508 if !create_parents && !parent_exists(&state, &key) {
509 guard.insert(session_id, state);
510 return Err(HostlibError::Backend {
511 builtin,
512 message: format!("parent directory for `{}` does not exist", key.display()),
513 });
514 }
515
516 let hash = write_body(&state, bytes).map_err(|err| HostlibError::Backend {
517 builtin,
518 message: err,
519 })?;
520 state.entries.insert(
521 key.clone(),
522 StagedEntry::Write {
523 body_hash: hash,
524 len: bytes.len() as u64,
525 created_at_ms: now_ms(),
526 },
527 );
528 persist_state(&state, "write", Some(&key)).map_err(|err| HostlibError::Backend {
529 builtin,
530 message: err,
531 })?;
532 emit_staged_update(&state);
533 guard.insert(session_id, state);
534 Ok(Some(WriteOutcome {
535 created: !existed,
536 bytes_written: bytes.len(),
537 }))
538}
539
540pub(crate) fn stage_delete_or_none(
541 builtin: &'static str,
542 path: &Path,
543 recursive: bool,
544 explicit_session_id: Option<&str>,
545) -> Result<Option<bool>, HostlibError> {
546 let Some(session_id) = active_session_id(explicit_session_id) else {
547 return Ok(None);
548 };
549 let mut guard = lock_sessions();
550 let mut state = state_for_locked(&mut guard, &session_id, None)?;
551 if state.mode != FsMode::Staged {
552 guard.insert(session_id, state);
553 return Ok(None);
554 }
555
556 let key = normalize_logical(path);
557 let staged_targets = staged_paths_under(&state, &key);
558 let disk_exists = key.exists();
559 if !disk_exists && staged_targets.is_empty() {
560 guard.insert(session_id, state);
561 return Ok(Some(false));
562 }
563
564 if !disk_exists {
565 for staged in staged_targets {
566 state.entries.remove(&staged);
567 }
568 } else {
569 validate_delete_shape(builtin, &key, recursive)?;
570 for staged in staged_targets {
571 state.entries.remove(&staged);
572 }
573 state.entries.insert(
574 key.clone(),
575 StagedEntry::Delete {
576 recursive,
577 created_at_ms: now_ms(),
578 },
579 );
580 }
581 persist_state(&state, "delete", Some(&key)).map_err(|err| HostlibError::Backend {
582 builtin,
583 message: err,
584 })?;
585 emit_staged_update(&state);
586 guard.insert(session_id, state);
587 Ok(Some(true))
588}
589
590#[derive(Clone, Debug)]
594pub struct SafeTextPatchOutcome {
595 pub result: SafeTextPatchResult,
597 pub current_hash: String,
599 pub after_hash: String,
601 pub created: bool,
603 pub bytes_written: usize,
605}
606
607#[derive(Clone, Copy, Debug, Eq, PartialEq)]
609pub enum SafeTextPatchResult {
610 Applied,
613 StaleBase,
616 NoOp,
619}
620
621impl SafeTextPatchResult {
622 fn as_str(self) -> &'static str {
623 match self {
624 Self::Applied => "applied",
625 Self::StaleBase => "stale_base",
626 Self::NoOp => "no_op",
627 }
628 }
629}
630
631fn hash_label(bytes: &[u8]) -> String {
635 format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
636}
637
638pub fn safe_text_patch(
660 path: &Path,
661 content: &str,
662 expected_hash: Option<&str>,
663 session_id: Option<&str>,
664 create_parents: bool,
665 overwrite: bool,
666) -> Result<SafeTextPatchOutcome, HostlibError> {
667 let new_bytes = content.as_bytes();
668 let after_hash = hash_label(new_bytes);
669
670 if let Some(outcome) = safe_text_patch_staged(
671 path,
672 new_bytes,
673 expected_hash,
674 session_id,
675 create_parents,
676 overwrite,
677 &after_hash,
678 )? {
679 return Ok(outcome);
680 }
681
682 safe_text_patch_disk(
683 path,
684 new_bytes,
685 expected_hash,
686 create_parents,
687 overwrite,
688 after_hash,
689 )
690}
691
692#[allow(clippy::too_many_arguments)]
698fn safe_text_patch_staged(
699 path: &Path,
700 new_bytes: &[u8],
701 expected_hash: Option<&str>,
702 session_id: Option<&str>,
703 create_parents: bool,
704 overwrite: bool,
705 after_hash: &str,
706) -> Result<Option<SafeTextPatchOutcome>, HostlibError> {
707 let Some(session) = active_session_id(session_id) else {
708 return Ok(None);
709 };
710 let mut guard = lock_sessions();
711 let mut state = state_for_locked(&mut guard, &session, None)?;
712 if state.mode != FsMode::Staged {
713 guard.insert(session, state);
714 return Ok(None);
715 }
716
717 let key = normalize_logical(path);
718 let (existing_bytes, existed) = match overlay_read(&state, path) {
719 Some(Ok(bytes)) => (bytes, true),
720 Some(Err(err)) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
721 Some(Err(err)) => {
722 guard.insert(session, state);
723 return Err(HostlibError::Backend {
724 builtin: SAFE_TEXT_PATCH_BUILTIN,
725 message: format!("read `{}`: {err}", path.display()),
726 });
727 }
728 None => match stdfs::read(path) {
729 Ok(bytes) => (bytes, true),
730 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
731 Err(err) => {
732 guard.insert(session, state);
733 return Err(HostlibError::Backend {
734 builtin: SAFE_TEXT_PATCH_BUILTIN,
735 message: format!("read `{}`: {err}", path.display()),
736 });
737 }
738 },
739 };
740 let current_hash = hash_label(&existing_bytes);
741
742 if let Some(expected) = expected_hash {
743 if expected != current_hash {
744 guard.insert(session, state);
745 return Ok(Some(SafeTextPatchOutcome {
746 result: SafeTextPatchResult::StaleBase,
747 current_hash,
748 after_hash: after_hash.to_string(),
749 created: false,
750 bytes_written: 0,
751 }));
752 }
753 }
754
755 if existed && existing_bytes == new_bytes {
756 guard.insert(session, state);
757 return Ok(Some(SafeTextPatchOutcome {
758 result: SafeTextPatchResult::NoOp,
759 current_hash,
760 after_hash: after_hash.to_string(),
761 created: false,
762 bytes_written: 0,
763 }));
764 }
765
766 let overlay_existed = overlay_exists(&state, &key);
767 if overlay_existed && !overwrite {
768 guard.insert(session, state);
769 return Err(HostlibError::Backend {
770 builtin: SAFE_TEXT_PATCH_BUILTIN,
771 message: format!("`{}` exists and overwrite=false", key.display()),
772 });
773 }
774 if !create_parents && !parent_exists(&state, &key) {
775 guard.insert(session, state);
776 return Err(HostlibError::Backend {
777 builtin: SAFE_TEXT_PATCH_BUILTIN,
778 message: format!("parent directory for `{}` does not exist", key.display()),
779 });
780 }
781
782 let body_hash = write_body(&state, new_bytes).map_err(|err| HostlibError::Backend {
783 builtin: SAFE_TEXT_PATCH_BUILTIN,
784 message: err,
785 })?;
786 state.entries.insert(
787 key.clone(),
788 StagedEntry::Write {
789 body_hash,
790 len: new_bytes.len() as u64,
791 created_at_ms: now_ms(),
792 },
793 );
794 persist_state(&state, "safe_text_patch", Some(&key)).map_err(|err| HostlibError::Backend {
795 builtin: SAFE_TEXT_PATCH_BUILTIN,
796 message: err,
797 })?;
798 emit_staged_update(&state);
799 guard.insert(session, state);
800
801 Ok(Some(SafeTextPatchOutcome {
802 result: SafeTextPatchResult::Applied,
803 current_hash,
804 after_hash: after_hash.to_string(),
805 created: !existed,
806 bytes_written: new_bytes.len(),
807 }))
808}
809
810fn safe_text_patch_disk(
815 path: &Path,
816 new_bytes: &[u8],
817 expected_hash: Option<&str>,
818 create_parents: bool,
819 overwrite: bool,
820 after_hash: String,
821) -> Result<SafeTextPatchOutcome, HostlibError> {
822 let (existing_bytes, existed) = match stdfs::read(path) {
823 Ok(bytes) => (bytes, true),
824 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
825 Err(err) => {
826 return Err(HostlibError::Backend {
827 builtin: SAFE_TEXT_PATCH_BUILTIN,
828 message: format!("read `{}`: {err}", path.display()),
829 });
830 }
831 };
832 let current_hash = hash_label(&existing_bytes);
833
834 if let Some(expected) = expected_hash {
835 if expected != current_hash {
836 return Ok(SafeTextPatchOutcome {
837 result: SafeTextPatchResult::StaleBase,
838 current_hash,
839 after_hash,
840 created: false,
841 bytes_written: 0,
842 });
843 }
844 }
845
846 if existed && existing_bytes == new_bytes {
847 return Ok(SafeTextPatchOutcome {
848 result: SafeTextPatchResult::NoOp,
849 current_hash,
850 after_hash,
851 created: false,
852 bytes_written: 0,
853 });
854 }
855 if existed && !overwrite {
856 return Err(HostlibError::Backend {
857 builtin: SAFE_TEXT_PATCH_BUILTIN,
858 message: format!("`{}` exists and overwrite=false", path.display()),
859 });
860 }
861 if !create_parents {
862 if let Some(parent) = path.parent() {
863 if !parent.as_os_str().is_empty() && !parent.is_dir() {
864 return Err(HostlibError::Backend {
865 builtin: SAFE_TEXT_PATCH_BUILTIN,
866 message: format!(
867 "parent directory for `{}` does not exist (pass create_parents=true to mkdir)",
868 path.display()
869 ),
870 });
871 }
872 }
873 }
874
875 crate::fs_snapshot::auto_capture_for_write(SAFE_TEXT_PATCH_BUILTIN, path);
876 atomic_write(path, new_bytes).map_err(|err| HostlibError::Backend {
877 builtin: SAFE_TEXT_PATCH_BUILTIN,
878 message: format!("write `{}`: {err}", path.display()),
879 })?;
880
881 Ok(SafeTextPatchOutcome {
882 result: SafeTextPatchResult::Applied,
883 current_hash,
884 after_hash,
885 created: !existed,
886 bytes_written: new_bytes.len(),
887 })
888}
889
890fn read_existing(
895 builtin: &'static str,
896 path: &Path,
897 session_id: Option<&str>,
898) -> Result<(Vec<u8>, bool), HostlibError> {
899 if let Some(result) = read(path, session_id) {
900 return match result {
901 Ok(bytes) => Ok((bytes, true)),
902 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
903 Err(err) => Err(HostlibError::Backend {
904 builtin,
905 message: format!("read `{}`: {err}", path.display()),
906 }),
907 };
908 }
909 match stdfs::read(path) {
910 Ok(bytes) => Ok((bytes, true)),
911 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
912 Err(err) => Err(HostlibError::Backend {
913 builtin,
914 message: format!("read `{}`: {err}", path.display()),
915 }),
916 }
917}
918
919fn read_text_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
920 let raw = dict_arg(READ_TEXT_BUILTIN, args)?;
921 let dict = raw.as_ref();
922 let path_str = require_string(READ_TEXT_BUILTIN, dict, "path")?;
923 let session_id = optional_string(READ_TEXT_BUILTIN, dict, "session_id")?;
924 let path = Path::new(&path_str);
925 enforce_path_scope(READ_TEXT_BUILTIN, path, FsAccess::Read)?;
926
927 let (bytes, existed) = read_existing(READ_TEXT_BUILTIN, path, session_id.as_deref())?;
928 let hash = hash_label(&bytes);
929 let content = match std::str::from_utf8(&bytes) {
930 Ok(s) => s.to_string(),
931 Err(err) => {
932 return Err(HostlibError::Backend {
933 builtin: READ_TEXT_BUILTIN,
934 message: format!("`{path_str}` is not valid UTF-8: {err}"),
935 });
936 }
937 };
938 let bytes_len = bytes.len() as i64;
939 Ok(build_dict([
940 ("path", str_value(&path_str)),
941 ("content", str_value(&content)),
942 ("sha256", str_value(&hash)),
943 ("size", VmValue::Int(bytes_len)),
944 ("exists", VmValue::Bool(existed)),
945 ]))
946}
947
948fn safe_text_patch_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
949 let raw = dict_arg(SAFE_TEXT_PATCH_BUILTIN, args)?;
950 let dict = raw.as_ref();
951
952 let path_str = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "path")?;
953 let content = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "content")?;
954 let expected_hash = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "expected_hash")?;
955 let session_id = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "session_id")?;
956 let create_parents = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "create_parents", true)?;
957 let overwrite = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "overwrite", true)?;
958
959 enforce_path_scope(
960 SAFE_TEXT_PATCH_BUILTIN,
961 Path::new(&path_str),
962 FsAccess::Write,
963 )?;
964 let outcome = safe_text_patch(
965 Path::new(&path_str),
966 &content,
967 expected_hash.as_deref(),
968 session_id.as_deref(),
969 create_parents,
970 overwrite,
971 )?;
972
973 let entries: Vec<(&'static str, VmValue)> = vec![
974 ("path", str_value(&path_str)),
975 ("result", str_value(outcome.result.as_str())),
976 (
977 "applied",
978 VmValue::Bool(outcome.result == SafeTextPatchResult::Applied),
979 ),
980 (
981 "stale_base",
982 VmValue::Bool(outcome.result == SafeTextPatchResult::StaleBase),
983 ),
984 ("current_hash", str_value(&outcome.current_hash)),
985 ("before_sha256", str_value(&outcome.current_hash)),
986 ("after_sha256", str_value(&outcome.after_hash)),
987 ("created", VmValue::Bool(outcome.created)),
988 ("bytes_written", VmValue::Int(outcome.bytes_written as i64)),
989 (
990 "expected_hash",
991 match expected_hash.as_deref() {
992 Some(hash) => str_value(hash),
993 None => VmValue::Nil,
994 },
995 ),
996 ];
997 Ok(build_dict(entries))
998}
999
1000fn emit_safe_text_patch_result_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1001 let raw = dict_arg(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, args)?;
1002 let dict = raw.as_ref();
1003
1004 let path = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "path")?;
1005 let result = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "result")?;
1006 let hunks_count = optional_int(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "hunks_count", 0)?;
1007 let bytes_written = optional_int(
1008 EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1009 dict,
1010 "bytes_written",
1011 0,
1012 )?;
1013 let failed_hunk_index = match dict.get("failed_hunk_index") {
1014 None | Some(VmValue::Nil) => None,
1015 Some(VmValue::Int(n)) if *n >= 0 => Some(*n as usize),
1016 Some(other) => {
1017 return Err(HostlibError::InvalidParameter {
1018 builtin: EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1019 param: "failed_hunk_index",
1020 message: format!("expected non-negative integer, got {}", other.type_name()),
1021 });
1022 }
1023 };
1024 let session_id = optional_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "session_id")?
1025 .or_else(harn_vm::agent_sessions::current_session_id);
1026
1027 if let Some(session_id) = session_id.filter(|s| !s.trim().is_empty()) {
1028 harn_vm::agent_events::emit_event(&AgentEvent::SafeTextPatchResult {
1029 session_id,
1030 path,
1031 result,
1032 hunks_count: hunks_count.max(0) as usize,
1033 bytes_written: bytes_written.max(0) as u64,
1034 failed_hunk_index,
1035 });
1036 Ok(VmValue::Bool(true))
1037 } else {
1038 Ok(VmValue::Bool(false))
1042 }
1043}
1044
1045fn set_mode_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1046 let raw = dict_arg(SET_MODE_BUILTIN, args)?;
1047 let dict = raw.as_ref();
1048 let session_id = require_string(SET_MODE_BUILTIN, dict, "session_id")?;
1049 let mode = FsMode::parse(
1050 SET_MODE_BUILTIN,
1051 &require_string(SET_MODE_BUILTIN, dict, "mode")?,
1052 )?;
1053 let root = optional_string(SET_MODE_BUILTIN, dict, "root")?.map(PathBuf::from);
1054 let result = set_mode(&session_id, mode, root.as_deref())?;
1055 Ok(build_dict([(
1056 "previous_mode",
1057 str_value(result.previous_mode.as_str()),
1058 )]))
1059}
1060
1061fn staged_status_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1062 let raw = dict_arg(STATUS_BUILTIN, args)?;
1063 let session_id = require_string(STATUS_BUILTIN, raw.as_ref(), "session_id")?;
1064 Ok(status_to_value(staged_status(&session_id)?))
1065}
1066
1067fn commit_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1068 let raw = dict_arg(COMMIT_BUILTIN, args)?;
1069 let dict = raw.as_ref();
1070 let session_id = require_string(COMMIT_BUILTIN, dict, "session_id")?;
1071 let paths = optional_string_list(COMMIT_BUILTIN, dict, "paths")?;
1072 Ok(commit_result_to_value(commit_staged(&session_id, &paths)?))
1073}
1074
1075fn discard_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1076 let raw = dict_arg(DISCARD_BUILTIN, args)?;
1077 let dict = raw.as_ref();
1078 let session_id = require_string(DISCARD_BUILTIN, dict, "session_id")?;
1079 let paths = optional_string_list(DISCARD_BUILTIN, dict, "paths")?;
1080 Ok(discard_result_to_value(discard_staged(
1081 &session_id,
1082 &paths,
1083 )?))
1084}
1085
1086fn state_for_locked(
1087 guard: &mut BTreeMap<String, SessionState>,
1088 session_id: &str,
1089 root: Option<PathBuf>,
1090) -> Result<SessionState, HostlibError> {
1091 if let Some(existing) = guard.get(session_id) {
1092 let mut state = existing.clone();
1093 if let Some(root) = root {
1094 if state.entries.is_empty() {
1095 state.root = root;
1096 }
1097 }
1098 return Ok(state);
1099 }
1100 let state = load_state(session_id, root).map_err(|err| HostlibError::Backend {
1101 builtin: SET_MODE_BUILTIN,
1102 message: err,
1103 })?;
1104 Ok(state)
1105}
1106
1107fn load_state(session_id: &str, root: Option<PathBuf>) -> Result<SessionState, String> {
1108 let root = root.unwrap_or_else(default_root);
1109 let manifest_path = manifest_path(&root, session_id);
1110 if manifest_path.exists() {
1111 let text = stdfs::read_to_string(&manifest_path)
1112 .map_err(|err| format!("read {}: {err}", manifest_path.display()))?;
1113 let manifest: Manifest = serde_json::from_str(&text)
1114 .map_err(|err| format!("parse {}: {err}", manifest_path.display()))?;
1115 if manifest.version != MANIFEST_VERSION {
1116 return Err(format!(
1117 "unsupported staged fs manifest version {} in {}",
1118 manifest.version,
1119 manifest_path.display()
1120 ));
1121 }
1122 if manifest.session_id != session_id {
1123 return Err(format!(
1124 "staged fs manifest session id mismatch in {}",
1125 manifest_path.display()
1126 ));
1127 }
1128 return Ok(SessionState {
1129 session_id: manifest.session_id,
1130 mode: manifest.mode,
1131 root: normalize_logical(Path::new(&manifest.root)),
1132 entries: manifest
1133 .entries
1134 .into_iter()
1135 .map(|(path, entry)| (normalize_logical(Path::new(&path)), entry))
1136 .collect(),
1137 });
1138 }
1139 Ok(SessionState {
1140 session_id: session_id.to_string(),
1141 mode: FsMode::Immediate,
1142 root,
1143 entries: BTreeMap::new(),
1144 })
1145}
1146
1147fn persist_state(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1148 let dir = session_dir(&state.root, &state.session_id);
1149 stdfs::create_dir_all(dir.join("bodies"))
1150 .map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1151 let manifest = Manifest {
1152 version: MANIFEST_VERSION,
1153 session_id: state.session_id.clone(),
1154 mode: state.mode,
1155 root: state.root.to_string_lossy().into_owned(),
1156 entries: state
1157 .entries
1158 .iter()
1159 .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
1160 .collect(),
1161 };
1162 let bytes = serde_json::to_vec_pretty(&manifest)
1163 .map_err(|err| format!("serialize staged manifest: {err}"))?;
1164 atomic_write(&manifest_path(&state.root, &state.session_id), &bytes)?;
1165 append_journal(state, op, path)?;
1166 prune_unreferenced_bodies(state);
1167 Ok(())
1168}
1169
1170fn append_journal(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1171 let dir = session_dir(&state.root, &state.session_id);
1172 stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1173 let line = serde_json::to_string(&serde_json::json!({
1174 "ts_ms": now_ms(),
1175 "op": op,
1176 "path": path.map(|path| path.to_string_lossy().into_owned()),
1177 "pending_count": state.entries.len(),
1178 }))
1179 .map_err(|err| format!("serialize staged journal: {err}"))?;
1180 let mut file = stdfs::OpenOptions::new()
1181 .create(true)
1182 .append(true)
1183 .open(dir.join("journal.jsonl"))
1184 .map_err(|err| format!("open staged journal: {err}"))?;
1185 writeln!(file, "{line}").map_err(|err| format!("write staged journal: {err}"))
1186}
1187
1188fn write_body(state: &SessionState, bytes: &[u8]) -> Result<String, String> {
1189 let hash = hex::encode(Sha256::digest(bytes));
1190 let path = session_dir(&state.root, &state.session_id)
1191 .join("bodies")
1192 .join(&hash);
1193 if !path.exists() {
1194 atomic_write(&path, bytes)?;
1195 }
1196 Ok(hash)
1197}
1198
1199fn read_body(state: &SessionState, hash: &str) -> std::io::Result<Vec<u8>> {
1200 stdfs::read(
1201 session_dir(&state.root, &state.session_id)
1202 .join("bodies")
1203 .join(hash),
1204 )
1205}
1206
1207fn prune_unreferenced_bodies(state: &SessionState) {
1208 let live: BTreeSet<String> = state
1209 .entries
1210 .values()
1211 .filter_map(|entry| match entry {
1212 StagedEntry::Write { body_hash, .. } => Some(body_hash.clone()),
1213 StagedEntry::Delete { .. } => None,
1214 })
1215 .collect();
1216 let body_dir = session_dir(&state.root, &state.session_id).join("bodies");
1217 let Ok(entries) = stdfs::read_dir(&body_dir) else {
1218 return;
1219 };
1220 for entry in entries.flatten() {
1221 let name = entry.file_name().to_string_lossy().into_owned();
1222 if !live.contains(&name) {
1223 let _ = stdfs::remove_file(entry.path());
1224 }
1225 }
1226}
1227
1228fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
1229 if let Some(parent) = path.parent() {
1230 stdfs::create_dir_all(parent)
1231 .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
1232 }
1233 let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
1234 stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
1235 match stdfs::rename(&tmp, path) {
1236 Ok(()) => Ok(()),
1237 Err(err) => {
1238 let _ = stdfs::remove_file(path);
1239 stdfs::rename(&tmp, path).map_err(|retry| {
1240 format!(
1241 "rename {} to {}: {err}; retry: {retry}",
1242 tmp.display(),
1243 path.display()
1244 )
1245 })
1246 }
1247 }
1248}
1249
1250fn commit_entry(state: &SessionState, path: &Path, entry: &StagedEntry) -> Result<(), String> {
1251 match entry {
1252 StagedEntry::Write { body_hash, .. } => {
1253 let bytes = read_body(state, body_hash)
1254 .map_err(|err| format!("read staged body for {}: {err}", path.display()))?;
1255 atomic_write(path, &bytes)
1256 }
1257 StagedEntry::Delete { recursive, .. } => match stdfs::symlink_metadata(path) {
1258 Ok(metadata) if metadata.is_dir() => {
1259 if *recursive {
1260 stdfs::remove_dir_all(path)
1261 .map_err(|err| format!("remove_dir_all {}: {err}", path.display()))
1262 } else {
1263 stdfs::remove_dir(path)
1264 .map_err(|err| format!("remove_dir {}: {err}", path.display()))
1265 }
1266 }
1267 Ok(_) => stdfs::remove_file(path)
1268 .map_err(|err| format!("remove_file {}: {err}", path.display())),
1269 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1270 Err(err) => Err(format!("stat {}: {err}", path.display())),
1271 },
1272 }
1273}
1274
1275fn overlay_read(state: &SessionState, path: &Path) -> Option<std::io::Result<Vec<u8>>> {
1276 let key = normalize_logical(path);
1277 if let Some(entry) = state.entries.get(&key) {
1278 return Some(match entry {
1279 StagedEntry::Write { body_hash, .. } => read_body(state, body_hash),
1280 StagedEntry::Delete { .. } => Err(not_found(&key)),
1281 });
1282 }
1283 if deleted_ancestor(state, &key) {
1284 return Some(Err(not_found(&key)));
1285 }
1286 None
1287}
1288
1289fn overlay_read_dir(state: &SessionState, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
1290 let dir_key = normalize_logical(path);
1291 if matches!(state.entries.get(&dir_key), Some(StagedEntry::Write { .. }))
1292 || deleted_ancestor(state, &dir_key)
1293 || matches!(
1294 state.entries.get(&dir_key),
1295 Some(StagedEntry::Delete { .. })
1296 )
1297 {
1298 return Err(not_found(&dir_key));
1299 }
1300 if !path.exists() && !has_staged_descendant(state, &dir_key) {
1301 return Err(not_found(&dir_key));
1302 }
1303
1304 let mut entries: BTreeMap<String, OverlayDirEntry> = BTreeMap::new();
1305 if path.exists() {
1306 for entry in stdfs::read_dir(path)? {
1307 let entry = entry?;
1308 let name = entry.file_name().to_string_lossy().into_owned();
1309 let file_type = entry.file_type().ok();
1310 let metadata = entry.metadata().ok();
1311 entries.insert(
1312 name.clone(),
1313 OverlayDirEntry {
1314 name,
1315 is_dir: file_type.is_some_and(|ty| ty.is_dir()),
1316 is_symlink: file_type.is_some_and(|ty| ty.is_symlink()),
1317 size: metadata.map(|m| m.len()).unwrap_or(0),
1318 },
1319 );
1320 }
1321 }
1322
1323 for (path, entry) in &state.entries {
1324 let Some(name) = overlay_child_name(path, &dir_key) else {
1325 continue;
1326 };
1327 match entry {
1328 StagedEntry::Write { len, .. } => {
1329 let is_dir = path.parent() != Some(dir_key.as_path());
1330 entries.insert(
1331 name.clone(),
1332 OverlayDirEntry {
1333 name,
1334 is_dir,
1335 is_symlink: false,
1336 size: if is_dir { 0 } else { *len },
1337 },
1338 );
1339 }
1340 StagedEntry::Delete { .. } => {
1341 if path.parent() == Some(dir_key.as_path()) {
1342 entries.remove(&name);
1343 }
1344 }
1345 }
1346 }
1347
1348 Ok(entries.into_values().collect())
1349}
1350
1351fn overlay_child_name(path: &Path, dir: &Path) -> Option<String> {
1352 let suffix = path.strip_prefix(dir).ok()?;
1353 let mut components = suffix.components();
1354 let first = components.next()?;
1355 match first {
1356 Component::Normal(name) => Some(name.to_string_lossy().into_owned()),
1357 _ => None,
1358 }
1359}
1360
1361fn overlay_exists(state: &SessionState, path: &Path) -> bool {
1362 if let Some(entry) = state.entries.get(path) {
1363 return matches!(entry, StagedEntry::Write { .. });
1364 }
1365 if deleted_ancestor(state, path) {
1366 return false;
1367 }
1368 if has_staged_descendant(state, path) {
1369 return true;
1370 }
1371 path.exists()
1372}
1373
1374fn parent_exists(state: &SessionState, path: &Path) -> bool {
1375 let Some(parent) = path.parent() else {
1376 return true;
1377 };
1378 if parent.as_os_str().is_empty() {
1379 return true;
1380 }
1381 if let Some(entry) = state.entries.get(parent) {
1382 return !matches!(entry, StagedEntry::Delete { .. });
1383 }
1384 if deleted_ancestor(state, parent) {
1385 return false;
1386 }
1387 if has_staged_descendant(state, parent) {
1388 return true;
1389 }
1390 parent.is_dir()
1391}
1392
1393fn deleted_ancestor(state: &SessionState, path: &Path) -> bool {
1394 state.entries.iter().any(|(candidate, entry)| {
1395 matches!(entry, StagedEntry::Delete { .. })
1396 && path != candidate.as_path()
1397 && path.starts_with(candidate)
1398 })
1399}
1400
1401fn has_staged_descendant(state: &SessionState, path: &Path) -> bool {
1402 state.entries.iter().any(|(candidate, entry)| {
1403 matches!(entry, StagedEntry::Write { .. })
1404 && candidate != path
1405 && candidate.starts_with(path)
1406 })
1407}
1408
1409fn staged_paths_under(state: &SessionState, path: &Path) -> Vec<PathBuf> {
1410 state
1411 .entries
1412 .keys()
1413 .filter(|candidate| *candidate == path || candidate.starts_with(path))
1414 .cloned()
1415 .collect()
1416}
1417
1418fn validate_delete_shape(
1419 builtin: &'static str,
1420 path: &Path,
1421 recursive: bool,
1422) -> Result<(), HostlibError> {
1423 let Ok(metadata) = stdfs::symlink_metadata(path) else {
1424 return Ok(());
1425 };
1426 if metadata.is_dir() && !recursive {
1427 let mut entries = stdfs::read_dir(path).map_err(|err| HostlibError::Backend {
1428 builtin,
1429 message: format!("read_dir `{}`: {err}", path.display()),
1430 })?;
1431 if entries.next().is_some() {
1432 return Err(HostlibError::Backend {
1433 builtin,
1434 message: format!(
1435 "remove_dir `{}` (pass recursive=true to delete non-empty dirs): directory not empty",
1436 path.display()
1437 ),
1438 });
1439 }
1440 }
1441 Ok(())
1442}
1443
1444fn status_from_state(state: &SessionState) -> StagedStatus {
1445 let now = now_ms();
1446 let mut pending_writes = Vec::new();
1447 let mut total_bytes_pending = 0u64;
1448 let mut oldest = None;
1449 for (path, entry) in &state.entries {
1450 total_bytes_pending = total_bytes_pending.saturating_add(entry.body_len());
1451 oldest = Some(oldest.map_or(entry.created_at_ms(), |old: i64| {
1452 old.min(entry.created_at_ms())
1453 }));
1454 let (kind, bytes_added, bytes_removed) = match entry {
1455 StagedEntry::Write { len, .. } => ("write", *len, disk_size(path).unwrap_or(0)),
1456 StagedEntry::Delete { .. } => ("delete", 0, disk_size(path).unwrap_or(0)),
1457 };
1458 pending_writes.push(PendingWrite {
1459 path: path.to_string_lossy().into_owned(),
1460 kind,
1461 bytes_added,
1462 bytes_removed,
1463 });
1464 }
1465 StagedStatus {
1466 pending_writes,
1467 total_bytes_pending,
1468 oldest_pending_age_ms: oldest.map(|old| now.saturating_sub(old)).unwrap_or(0),
1469 }
1470}
1471
1472fn disk_size(path: &Path) -> Option<u64> {
1473 let metadata = stdfs::symlink_metadata(path).ok()?;
1474 if metadata.is_file() {
1475 return Some(metadata.len());
1476 }
1477 if metadata.is_dir() {
1478 let mut total = 0u64;
1479 for entry in walkdir::WalkDir::new(path)
1480 .into_iter()
1481 .filter_map(Result::ok)
1482 {
1483 if let Ok(metadata) = entry.metadata() {
1484 if metadata.is_file() {
1485 total = total.saturating_add(metadata.len());
1486 }
1487 }
1488 }
1489 return Some(total);
1490 }
1491 Some(metadata.len())
1492}
1493
1494fn selected_paths(state: &SessionState, paths: &[String]) -> Vec<PathBuf> {
1495 if paths.is_empty() {
1496 return state.entries.keys().cloned().collect();
1497 }
1498 let selected: BTreeSet<PathBuf> = paths
1499 .iter()
1500 .map(|path| normalize_logical(Path::new(path)))
1501 .collect();
1502 state
1503 .entries
1504 .keys()
1505 .filter(|path| selected.contains(*path))
1506 .cloned()
1507 .collect()
1508}
1509
1510fn active_session_id(explicit: Option<&str>) -> Option<String> {
1511 explicit
1512 .map(str::to_string)
1513 .or_else(harn_vm::agent_sessions::current_session_id)
1514 .filter(|id| !id.trim().is_empty())
1515}
1516
1517fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
1518 if session_id.trim().is_empty() {
1519 return Err(HostlibError::InvalidParameter {
1520 builtin,
1521 param: "session_id",
1522 message: "must not be empty".to_string(),
1523 });
1524 }
1525 Ok(())
1526}
1527
1528fn default_root() -> PathBuf {
1529 std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1530}
1531
1532fn session_dir(root: &Path, session_id: &str) -> PathBuf {
1533 let mut dir = root.to_path_buf();
1534 for component in STATE_REL {
1535 dir.push(component);
1536 }
1537 dir.push(sanitize_component(session_id));
1538 dir
1539}
1540
1541fn manifest_path(root: &Path, session_id: &str) -> PathBuf {
1542 session_dir(root, session_id).join("manifest.json")
1543}
1544
1545fn sanitize_component(input: &str) -> String {
1546 let sanitized: String = input
1547 .chars()
1548 .map(|ch| match ch {
1549 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
1550 _ => '_',
1551 })
1552 .collect();
1553 let is_dotted = sanitized.is_empty() || sanitized.bytes().all(|b| b == b'.');
1559 if sanitized == input && !is_dotted {
1560 sanitized
1561 } else {
1562 let hash = hex::encode(Sha256::digest(input.as_bytes()));
1563 format!("{sanitized}-{}", &hash[..12])
1564 }
1565}
1566
1567fn normalize_logical(path: &Path) -> PathBuf {
1568 let absolute = if path.is_absolute() {
1569 path.to_path_buf()
1570 } else {
1571 default_root().join(path)
1572 };
1573 let mut out = PathBuf::new();
1574 for component in absolute.components() {
1575 match component {
1576 Component::ParentDir => {
1577 out.pop();
1578 }
1579 Component::CurDir => {}
1580 other => out.push(other),
1581 }
1582 }
1583 out
1584}
1585
1586fn not_found(path: &Path) -> std::io::Error {
1587 std::io::Error::new(
1588 std::io::ErrorKind::NotFound,
1589 format!("staged fs: {} is deleted or absent", path.display()),
1590 )
1591}
1592
1593fn now_ms() -> i64 {
1594 std::time::SystemTime::now()
1595 .duration_since(std::time::UNIX_EPOCH)
1596 .map(|duration| duration.as_millis() as i64)
1597 .unwrap_or(0)
1598}
1599
1600fn emit_staged_update(state: &SessionState) {
1601 let status = status_from_state(state);
1602 harn_vm::agent_events::emit_event(&AgentEvent::StagedWritesPending {
1603 session_id: state.session_id.clone(),
1604 pending_count: status.pending_writes.len(),
1605 total_bytes: status.total_bytes_pending,
1606 });
1607}
1608
1609fn pending_write_to_value(write: PendingWrite) -> VmValue {
1610 build_dict([
1611 ("path", str_value(&write.path)),
1612 ("kind", str_value(write.kind)),
1613 ("bytes_added", VmValue::Int(write.bytes_added as i64)),
1614 ("bytes_removed", VmValue::Int(write.bytes_removed as i64)),
1615 ])
1616}
1617
1618fn status_to_value(status: StagedStatus) -> VmValue {
1619 build_dict([
1620 (
1621 "pending_writes",
1622 VmValue::List(Arc::new(
1623 status
1624 .pending_writes
1625 .into_iter()
1626 .map(pending_write_to_value)
1627 .collect(),
1628 )),
1629 ),
1630 (
1631 "total_bytes_pending",
1632 VmValue::Int(status.total_bytes_pending as i64),
1633 ),
1634 (
1635 "oldest_pending_age_ms",
1636 VmValue::Int(status.oldest_pending_age_ms),
1637 ),
1638 ])
1639}
1640
1641fn commit_result_to_value(result: CommitResult) -> VmValue {
1642 build_dict([
1643 (
1644 "committed_paths",
1645 VmValue::List(Arc::new(
1646 result
1647 .committed_paths
1648 .into_iter()
1649 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1650 .collect(),
1651 )),
1652 ),
1653 (
1654 "failed_paths_with_reasons",
1655 VmValue::List(Arc::new(
1656 result
1657 .failed_paths_with_reasons
1658 .into_iter()
1659 .map(|(path, reason)| {
1660 build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
1661 })
1662 .collect(),
1663 )),
1664 ),
1665 ])
1666}
1667
1668fn discard_result_to_value(result: DiscardResult) -> VmValue {
1669 build_dict([(
1670 "discarded_paths",
1671 VmValue::List(Arc::new(
1672 result
1673 .discarded_paths
1674 .into_iter()
1675 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1676 .collect(),
1677 )),
1678 )])
1679}
1680
1681#[cfg(test)]
1682mod sanitize_tests {
1683 use super::{sanitize_component, session_dir, STATE_REL};
1684 use std::path::{Component, Path};
1685
1686 #[test]
1687 fn dotted_session_ids_are_never_traversal_tokens() {
1688 for evil in ["..", ".", "...", ""] {
1691 let safe = sanitize_component(evil);
1692 assert_ne!(safe, evil, "`{evil}` passed through unsanitized");
1693 assert!(
1694 !safe.bytes().all(|b| b == b'.'),
1695 "`{evil}` -> `{safe}` is still all dots"
1696 );
1697 let comps: Vec<_> = Path::new(&safe).components().collect();
1699 assert!(
1700 comps.iter().all(|c| matches!(c, Component::Normal(_))),
1701 "`{safe}` contains a traversal component"
1702 );
1703 }
1704 }
1705
1706 #[test]
1707 fn ordinary_session_ids_pass_through() {
1708 assert_eq!(sanitize_component("abc-123_v2.0"), "abc-123_v2.0");
1709 }
1710
1711 #[test]
1712 fn session_dir_stays_under_staged_root() {
1713 let dir = session_dir(Path::new("/workspace"), "..");
1714 assert!(
1716 !dir.components().any(|c| matches!(c, Component::ParentDir)),
1717 "session_dir({dir:?}) escapes via `..`"
1718 );
1719 let mut staged = std::path::PathBuf::from("/workspace");
1720 staged.extend(STATE_REL);
1721 assert!(dir.starts_with(&staged), "{dir:?} not under {staged:?}");
1722 }
1723}