1use std::collections::{BTreeMap, BTreeSet};
10use std::fs as stdfs;
11use std::io::Write;
12use std::path::{Component, Path, PathBuf};
13use std::sync::Arc;
14use std::sync::{Mutex, OnceLock};
15
16use harn_vm::agent_events::AgentEvent;
17use harn_vm::process_sandbox::{check_fs_path_scope, FsAccess};
18use harn_vm::VmValue;
19use serde::{Deserialize, Serialize};
20use sha2::{Digest, Sha256};
21
22use crate::error::HostlibError;
23use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
24use crate::tools::args::{
25 build_dict, dict_arg, optional_bool, optional_int, optional_string, optional_string_list,
26 require_string, str_value,
27};
28use crate::tools::permissions::enforce_path_scope;
29
30const SET_MODE_BUILTIN: &str = "hostlib_fs_set_mode";
31const STATUS_BUILTIN: &str = "hostlib_fs_staged_status";
32const COMMIT_BUILTIN: &str = "hostlib_fs_commit_staged";
33const DISCARD_BUILTIN: &str = "hostlib_fs_discard_staged";
34const SAFE_TEXT_PATCH_BUILTIN: &str = "hostlib_fs_safe_text_patch";
35const READ_TEXT_BUILTIN: &str = "hostlib_fs_read_text";
36const EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN: &str = "hostlib_fs_emit_safe_text_patch_result";
37
38const MANIFEST_VERSION: u32 = 1;
39const STATE_REL: &[&str] = &[".harn", "state", "staged"];
40
41#[derive(Default)]
43pub struct FsCapability;
44
45impl HostlibCapability for FsCapability {
46 fn module_name(&self) -> &'static str {
47 "fs"
48 }
49
50 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
51 register(registry, SET_MODE_BUILTIN, "set_mode", set_mode_builtin);
52 register(
53 registry,
54 STATUS_BUILTIN,
55 "staged_status",
56 staged_status_builtin,
57 );
58 register(
59 registry,
60 COMMIT_BUILTIN,
61 "commit_staged",
62 commit_staged_builtin,
63 );
64 register(
65 registry,
66 DISCARD_BUILTIN,
67 "discard_staged",
68 discard_staged_builtin,
69 );
70 register_gated(
73 registry,
74 SAFE_TEXT_PATCH_BUILTIN,
75 "safe_text_patch",
76 safe_text_patch_builtin,
77 );
78 register_gated(registry, READ_TEXT_BUILTIN, "read_text", read_text_builtin);
79 register(
80 registry,
81 EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
82 "emit_safe_text_patch_result",
83 emit_safe_text_patch_result_builtin,
84 );
85 }
86}
87
88fn register(
89 registry: &mut BuiltinRegistry,
90 name: &'static str,
91 method: &'static str,
92 runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
93) {
94 let handler: SyncHandler = std::sync::Arc::new(runner);
95 registry.register(RegisteredBuiltin {
96 name,
97 module: "fs",
98 method,
99 handler,
100 });
101}
102
103fn register_gated(
104 registry: &mut BuiltinRegistry,
105 name: &'static str,
106 method: &'static str,
107 runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
108) {
109 registry.register(RegisteredBuiltin {
110 name,
111 module: "fs",
112 method,
113 handler: crate::tools::permissions::gated_handler(name, runner),
114 });
115}
116
117#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
119#[serde(rename_all = "lowercase")]
120pub enum FsMode {
121 Immediate,
123 Staged,
125}
126
127impl FsMode {
128 fn parse(builtin: &'static str, raw: &str) -> Result<Self, HostlibError> {
129 match raw {
130 "immediate" => Ok(Self::Immediate),
131 "staged" => Ok(Self::Staged),
132 other => Err(HostlibError::InvalidParameter {
133 builtin,
134 param: "mode",
135 message: format!("expected \"immediate\" or \"staged\", got `{other}`"),
136 }),
137 }
138 }
139
140 pub fn as_str(self) -> &'static str {
142 match self {
143 Self::Immediate => "immediate",
144 Self::Staged => "staged",
145 }
146 }
147}
148
149#[derive(Clone, Debug, Serialize, Deserialize)]
150struct Manifest {
151 version: u32,
152 session_id: String,
153 mode: FsMode,
154 root: String,
155 entries: BTreeMap<String, StagedEntry>,
156}
157
158#[derive(Clone, Debug, Serialize, Deserialize)]
159#[serde(tag = "kind", rename_all = "snake_case")]
160enum StagedEntry {
161 Write {
162 body_hash: String,
163 len: u64,
164 created_at_ms: i64,
165 },
166 Delete {
167 recursive: bool,
168 created_at_ms: i64,
169 },
170}
171
172impl StagedEntry {
173 fn created_at_ms(&self) -> i64 {
174 match self {
175 Self::Write { created_at_ms, .. } | Self::Delete { created_at_ms, .. } => {
176 *created_at_ms
177 }
178 }
179 }
180
181 fn body_len(&self) -> u64 {
182 match self {
183 Self::Write { len, .. } => *len,
184 Self::Delete { .. } => 0,
185 }
186 }
187}
188
189#[derive(Clone, Debug)]
190struct SessionState {
191 session_id: String,
192 mode: FsMode,
193 root: PathBuf,
194 entries: BTreeMap<PathBuf, StagedEntry>,
195}
196
197#[derive(Clone, Debug)]
198pub(crate) struct WriteOutcome {
199 pub(crate) created: bool,
200 pub(crate) bytes_written: usize,
201}
202
203#[derive(Clone, Debug)]
204pub(crate) struct OverlayDirEntry {
205 pub(crate) name: String,
206 pub(crate) is_dir: bool,
207 pub(crate) is_symlink: bool,
208 pub(crate) size: u64,
209}
210
211#[derive(Clone, Debug)]
213pub struct StagedStatus {
214 pub pending_writes: Vec<PendingWrite>,
216 pub total_bytes_pending: u64,
218 pub oldest_pending_age_ms: i64,
220}
221
222#[derive(Clone, Debug)]
223pub struct PendingWrite {
225 pub path: String,
227 pub kind: &'static str,
229 pub bytes_added: u64,
231 pub bytes_removed: u64,
233}
234
235#[derive(Clone, Debug)]
237pub struct SetModeResult {
238 pub previous_mode: FsMode,
240}
241
242#[derive(Clone, Debug)]
244pub struct CommitResult {
245 pub committed_paths: Vec<String>,
247 pub failed_paths_with_reasons: Vec<(String, String)>,
249}
250
251#[derive(Clone, Debug)]
253pub struct DiscardResult {
254 pub discarded_paths: Vec<String>,
256}
257
258static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionState>>> = OnceLock::new();
259
260fn sessions() -> &'static Mutex<BTreeMap<String, SessionState>> {
261 SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
262}
263
264pub fn configure_session_root(session_id: &str, root: &Path) {
269 if session_id.trim().is_empty() {
270 return;
271 }
272 let root = normalize_logical(root);
273 let mut guard = sessions()
274 .lock()
275 .expect("hostlib fs session mutex poisoned");
276 match guard.get_mut(session_id) {
277 Some(state) if state.entries.is_empty() => {
278 state.root = root;
279 }
280 Some(_) => {}
281 None => {
282 let state = load_state(session_id, Some(root.clone())).unwrap_or(SessionState {
283 session_id: session_id.to_string(),
284 mode: FsMode::Immediate,
285 root,
286 entries: BTreeMap::new(),
287 });
288 guard.insert(session_id.to_string(), state);
289 }
290 }
291}
292
293pub fn set_mode(
295 session_id: &str,
296 mode: FsMode,
297 root: Option<&Path>,
298) -> Result<SetModeResult, HostlibError> {
299 validate_session_id(SET_MODE_BUILTIN, session_id)?;
300 let mut guard = sessions()
301 .lock()
302 .expect("hostlib fs session mutex poisoned");
303 let mut state = state_for_locked(&mut guard, session_id, root.map(normalize_logical))?;
304 let previous_mode = state.mode;
305 state.mode = mode;
306 persist_state(&state, "set_mode", None).map_err(|err| HostlibError::Backend {
307 builtin: SET_MODE_BUILTIN,
308 message: err,
309 })?;
310 guard.insert(session_id.to_string(), state);
311 Ok(SetModeResult { previous_mode })
312}
313
314pub fn staged_status(session_id: &str) -> Result<StagedStatus, HostlibError> {
316 validate_session_id(STATUS_BUILTIN, session_id)?;
317 let mut guard = sessions()
318 .lock()
319 .expect("hostlib fs session mutex poisoned");
320 let state = state_for_locked(&mut guard, session_id, None)?;
321 let status = status_from_state(&state);
322 guard.insert(session_id.to_string(), state);
323 Ok(status)
324}
325
326pub fn commit_staged(session_id: &str, paths: &[String]) -> Result<CommitResult, HostlibError> {
328 validate_session_id(COMMIT_BUILTIN, session_id)?;
329 let mut guard = sessions()
330 .lock()
331 .expect("hostlib fs session mutex poisoned");
332 let mut state = state_for_locked(&mut guard, session_id, None)?;
333 let selected = selected_paths(&state, paths);
334 let mut committed_paths = Vec::new();
335 let mut failed_paths_with_reasons = Vec::new();
336
337 for path in selected {
338 let Some(entry) = state.entries.get(&path).cloned() else {
339 continue;
340 };
341 let path_label = path.to_string_lossy().into_owned();
342 let access = match entry {
348 StagedEntry::Write { .. } => FsAccess::Write,
349 StagedEntry::Delete { .. } => FsAccess::Delete,
350 };
351 if let Err(violation) = check_fs_path_scope(&path, access) {
352 failed_paths_with_reasons.push((path_label, violation.message(COMMIT_BUILTIN)));
353 continue;
354 }
355 match commit_entry(&state, &path, &entry) {
356 Ok(()) => {
357 state.entries.remove(&path);
358 committed_paths.push(path_label);
359 }
360 Err(reason) => failed_paths_with_reasons.push((path_label, reason)),
361 }
362 }
363
364 persist_state(&state, "commit_staged", None).map_err(|err| HostlibError::Backend {
365 builtin: COMMIT_BUILTIN,
366 message: err,
367 })?;
368 emit_staged_update(&state);
369 guard.insert(session_id.to_string(), state);
370 Ok(CommitResult {
371 committed_paths,
372 failed_paths_with_reasons,
373 })
374}
375
376pub fn discard_staged(session_id: &str, paths: &[String]) -> Result<DiscardResult, HostlibError> {
378 validate_session_id(DISCARD_BUILTIN, session_id)?;
379 let mut guard = sessions()
380 .lock()
381 .expect("hostlib fs session mutex poisoned");
382 let mut state = state_for_locked(&mut guard, session_id, None)?;
383 let selected = selected_paths(&state, paths);
384 let mut discarded_paths = Vec::new();
385 for path in selected {
386 if state.entries.remove(&path).is_some() {
387 discarded_paths.push(path.to_string_lossy().into_owned());
388 }
389 }
390 persist_state(&state, "discard_staged", None).map_err(|err| HostlibError::Backend {
391 builtin: DISCARD_BUILTIN,
392 message: err,
393 })?;
394 emit_staged_update(&state);
395 guard.insert(session_id.to_string(), state);
396 Ok(DiscardResult { discarded_paths })
397}
398
399pub fn remove_session_state(session_id: &str, root: Option<&Path>) -> Result<(), HostlibError> {
406 validate_session_id(DISCARD_BUILTIN, session_id)?;
407 let mut guard = sessions()
408 .lock()
409 .expect("hostlib fs session mutex poisoned");
410 let state = match guard.remove(session_id) {
411 Some(state) => state,
412 None => load_state(session_id, root.map(normalize_logical)).map_err(|err| {
413 HostlibError::Backend {
414 builtin: DISCARD_BUILTIN,
415 message: err,
416 }
417 })?,
418 };
419 let dir = session_dir(&state.root, &state.session_id);
420 match stdfs::remove_dir_all(&dir) {
421 Ok(()) => Ok(()),
422 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
423 Err(err) => Err(HostlibError::Backend {
424 builtin: DISCARD_BUILTIN,
425 message: format!("remove staged session {}: {err}", dir.display()),
426 }),
427 }
428}
429
430pub(crate) fn read(
431 path: &Path,
432 explicit_session_id: Option<&str>,
433) -> Option<std::io::Result<Vec<u8>>> {
434 let session_id = active_session_id(explicit_session_id)?;
435 let mut guard = sessions()
436 .lock()
437 .expect("hostlib fs session mutex poisoned");
438 let state = state_for_locked(&mut guard, &session_id, None).ok()?;
439 let result = if state.mode == FsMode::Staged {
440 overlay_read(&state, path)
441 } else {
442 None
443 };
444 guard.insert(session_id, state);
445 result
446}
447
448pub(crate) fn read_to_string(
449 path: &Path,
450 explicit_session_id: Option<&str>,
451) -> Option<std::io::Result<String>> {
452 read(path, explicit_session_id).map(|result| {
453 result.and_then(|bytes| {
454 String::from_utf8(bytes).map_err(|err| {
455 std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string())
456 })
457 })
458 })
459}
460
461pub(crate) fn read_dir(
462 path: &Path,
463 explicit_session_id: Option<&str>,
464) -> Option<std::io::Result<Vec<OverlayDirEntry>>> {
465 let session_id = active_session_id(explicit_session_id)?;
466 let mut guard = sessions()
467 .lock()
468 .expect("hostlib fs session mutex poisoned");
469 let state = state_for_locked(&mut guard, &session_id, None).ok()?;
470 let result = if state.mode == FsMode::Staged {
471 Some(overlay_read_dir(&state, path))
472 } else {
473 None
474 };
475 guard.insert(session_id, state);
476 result
477}
478
479pub(crate) fn stage_write_or_none(
480 builtin: &'static str,
481 path: &Path,
482 bytes: &[u8],
483 create_parents: bool,
484 overwrite: bool,
485 explicit_session_id: Option<&str>,
486) -> Result<Option<WriteOutcome>, HostlibError> {
487 let Some(session_id) = active_session_id(explicit_session_id) else {
488 return Ok(None);
489 };
490 let mut guard = sessions()
491 .lock()
492 .expect("hostlib fs session mutex poisoned");
493 let mut state = state_for_locked(&mut guard, &session_id, None)?;
494 if state.mode != FsMode::Staged {
495 guard.insert(session_id, state);
496 return Ok(None);
497 }
498
499 let key = normalize_logical(path);
500 let existed = overlay_exists(&state, &key);
501 if existed && !overwrite {
502 guard.insert(session_id, state);
503 return Err(HostlibError::Backend {
504 builtin,
505 message: format!("`{}` exists and overwrite=false", key.display()),
506 });
507 }
508 if !create_parents && !parent_exists(&state, &key) {
509 guard.insert(session_id, state);
510 return Err(HostlibError::Backend {
511 builtin,
512 message: format!("parent directory for `{}` does not exist", key.display()),
513 });
514 }
515
516 let hash = write_body(&state, bytes).map_err(|err| HostlibError::Backend {
517 builtin,
518 message: err,
519 })?;
520 state.entries.insert(
521 key.clone(),
522 StagedEntry::Write {
523 body_hash: hash,
524 len: bytes.len() as u64,
525 created_at_ms: now_ms(),
526 },
527 );
528 persist_state(&state, "write", Some(&key)).map_err(|err| HostlibError::Backend {
529 builtin,
530 message: err,
531 })?;
532 emit_staged_update(&state);
533 guard.insert(session_id, state);
534 Ok(Some(WriteOutcome {
535 created: !existed,
536 bytes_written: bytes.len(),
537 }))
538}
539
540pub(crate) fn stage_delete_or_none(
541 builtin: &'static str,
542 path: &Path,
543 recursive: bool,
544 explicit_session_id: Option<&str>,
545) -> Result<Option<bool>, HostlibError> {
546 let Some(session_id) = active_session_id(explicit_session_id) else {
547 return Ok(None);
548 };
549 let mut guard = sessions()
550 .lock()
551 .expect("hostlib fs session mutex poisoned");
552 let mut state = state_for_locked(&mut guard, &session_id, None)?;
553 if state.mode != FsMode::Staged {
554 guard.insert(session_id, state);
555 return Ok(None);
556 }
557
558 let key = normalize_logical(path);
559 let staged_targets = staged_paths_under(&state, &key);
560 let disk_exists = key.exists();
561 if !disk_exists && staged_targets.is_empty() {
562 guard.insert(session_id, state);
563 return Ok(Some(false));
564 }
565
566 if !disk_exists {
567 for staged in staged_targets {
568 state.entries.remove(&staged);
569 }
570 } else {
571 validate_delete_shape(builtin, &key, recursive)?;
572 for staged in staged_targets {
573 state.entries.remove(&staged);
574 }
575 state.entries.insert(
576 key.clone(),
577 StagedEntry::Delete {
578 recursive,
579 created_at_ms: now_ms(),
580 },
581 );
582 }
583 persist_state(&state, "delete", Some(&key)).map_err(|err| HostlibError::Backend {
584 builtin,
585 message: err,
586 })?;
587 emit_staged_update(&state);
588 guard.insert(session_id, state);
589 Ok(Some(true))
590}
591
592#[derive(Clone, Debug)]
596pub struct SafeTextPatchOutcome {
597 pub result: SafeTextPatchResult,
599 pub current_hash: String,
601 pub after_hash: String,
603 pub created: bool,
605 pub bytes_written: usize,
607}
608
609#[derive(Clone, Copy, Debug, Eq, PartialEq)]
611pub enum SafeTextPatchResult {
612 Applied,
615 StaleBase,
618 NoOp,
621}
622
623impl SafeTextPatchResult {
624 fn as_str(self) -> &'static str {
625 match self {
626 Self::Applied => "applied",
627 Self::StaleBase => "stale_base",
628 Self::NoOp => "no_op",
629 }
630 }
631}
632
633fn hash_label(bytes: &[u8]) -> String {
637 format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
638}
639
640pub fn safe_text_patch(
662 path: &Path,
663 content: &str,
664 expected_hash: Option<&str>,
665 session_id: Option<&str>,
666 create_parents: bool,
667 overwrite: bool,
668) -> Result<SafeTextPatchOutcome, HostlibError> {
669 let new_bytes = content.as_bytes();
670 let after_hash = hash_label(new_bytes);
671
672 if let Some(outcome) = safe_text_patch_staged(
673 path,
674 new_bytes,
675 expected_hash,
676 session_id,
677 create_parents,
678 overwrite,
679 &after_hash,
680 )? {
681 return Ok(outcome);
682 }
683
684 safe_text_patch_disk(
685 path,
686 new_bytes,
687 expected_hash,
688 create_parents,
689 overwrite,
690 after_hash,
691 )
692}
693
694#[allow(clippy::too_many_arguments)]
700fn safe_text_patch_staged(
701 path: &Path,
702 new_bytes: &[u8],
703 expected_hash: Option<&str>,
704 session_id: Option<&str>,
705 create_parents: bool,
706 overwrite: bool,
707 after_hash: &str,
708) -> Result<Option<SafeTextPatchOutcome>, HostlibError> {
709 let Some(session) = active_session_id(session_id) else {
710 return Ok(None);
711 };
712 let mut guard = sessions()
713 .lock()
714 .expect("hostlib fs session mutex poisoned");
715 let mut state = state_for_locked(&mut guard, &session, None)?;
716 if state.mode != FsMode::Staged {
717 guard.insert(session, state);
718 return Ok(None);
719 }
720
721 let key = normalize_logical(path);
722 let (existing_bytes, existed) = match overlay_read(&state, path) {
723 Some(Ok(bytes)) => (bytes, true),
724 Some(Err(err)) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
725 Some(Err(err)) => {
726 guard.insert(session, state);
727 return Err(HostlibError::Backend {
728 builtin: SAFE_TEXT_PATCH_BUILTIN,
729 message: format!("read `{}`: {err}", path.display()),
730 });
731 }
732 None => match stdfs::read(path) {
733 Ok(bytes) => (bytes, true),
734 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
735 Err(err) => {
736 guard.insert(session, state);
737 return Err(HostlibError::Backend {
738 builtin: SAFE_TEXT_PATCH_BUILTIN,
739 message: format!("read `{}`: {err}", path.display()),
740 });
741 }
742 },
743 };
744 let current_hash = hash_label(&existing_bytes);
745
746 if let Some(expected) = expected_hash {
747 if expected != current_hash {
748 guard.insert(session, state);
749 return Ok(Some(SafeTextPatchOutcome {
750 result: SafeTextPatchResult::StaleBase,
751 current_hash,
752 after_hash: after_hash.to_string(),
753 created: false,
754 bytes_written: 0,
755 }));
756 }
757 }
758
759 if existed && existing_bytes == new_bytes {
760 guard.insert(session, state);
761 return Ok(Some(SafeTextPatchOutcome {
762 result: SafeTextPatchResult::NoOp,
763 current_hash,
764 after_hash: after_hash.to_string(),
765 created: false,
766 bytes_written: 0,
767 }));
768 }
769
770 let overlay_existed = overlay_exists(&state, &key);
771 if overlay_existed && !overwrite {
772 guard.insert(session, state);
773 return Err(HostlibError::Backend {
774 builtin: SAFE_TEXT_PATCH_BUILTIN,
775 message: format!("`{}` exists and overwrite=false", key.display()),
776 });
777 }
778 if !create_parents && !parent_exists(&state, &key) {
779 guard.insert(session, state);
780 return Err(HostlibError::Backend {
781 builtin: SAFE_TEXT_PATCH_BUILTIN,
782 message: format!("parent directory for `{}` does not exist", key.display()),
783 });
784 }
785
786 let body_hash = write_body(&state, new_bytes).map_err(|err| HostlibError::Backend {
787 builtin: SAFE_TEXT_PATCH_BUILTIN,
788 message: err,
789 })?;
790 state.entries.insert(
791 key.clone(),
792 StagedEntry::Write {
793 body_hash,
794 len: new_bytes.len() as u64,
795 created_at_ms: now_ms(),
796 },
797 );
798 persist_state(&state, "safe_text_patch", Some(&key)).map_err(|err| HostlibError::Backend {
799 builtin: SAFE_TEXT_PATCH_BUILTIN,
800 message: err,
801 })?;
802 emit_staged_update(&state);
803 guard.insert(session, state);
804
805 Ok(Some(SafeTextPatchOutcome {
806 result: SafeTextPatchResult::Applied,
807 current_hash,
808 after_hash: after_hash.to_string(),
809 created: !existed,
810 bytes_written: new_bytes.len(),
811 }))
812}
813
814fn safe_text_patch_disk(
819 path: &Path,
820 new_bytes: &[u8],
821 expected_hash: Option<&str>,
822 create_parents: bool,
823 overwrite: bool,
824 after_hash: String,
825) -> Result<SafeTextPatchOutcome, HostlibError> {
826 let (existing_bytes, existed) = match stdfs::read(path) {
827 Ok(bytes) => (bytes, true),
828 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
829 Err(err) => {
830 return Err(HostlibError::Backend {
831 builtin: SAFE_TEXT_PATCH_BUILTIN,
832 message: format!("read `{}`: {err}", path.display()),
833 });
834 }
835 };
836 let current_hash = hash_label(&existing_bytes);
837
838 if let Some(expected) = expected_hash {
839 if expected != current_hash {
840 return Ok(SafeTextPatchOutcome {
841 result: SafeTextPatchResult::StaleBase,
842 current_hash,
843 after_hash,
844 created: false,
845 bytes_written: 0,
846 });
847 }
848 }
849
850 if existed && existing_bytes == new_bytes {
851 return Ok(SafeTextPatchOutcome {
852 result: SafeTextPatchResult::NoOp,
853 current_hash,
854 after_hash,
855 created: false,
856 bytes_written: 0,
857 });
858 }
859 if existed && !overwrite {
860 return Err(HostlibError::Backend {
861 builtin: SAFE_TEXT_PATCH_BUILTIN,
862 message: format!("`{}` exists and overwrite=false", path.display()),
863 });
864 }
865 if !create_parents {
866 if let Some(parent) = path.parent() {
867 if !parent.as_os_str().is_empty() && !parent.is_dir() {
868 return Err(HostlibError::Backend {
869 builtin: SAFE_TEXT_PATCH_BUILTIN,
870 message: format!(
871 "parent directory for `{}` does not exist (pass create_parents=true to mkdir)",
872 path.display()
873 ),
874 });
875 }
876 }
877 }
878
879 crate::fs_snapshot::auto_capture_for_write(SAFE_TEXT_PATCH_BUILTIN, path);
880 atomic_write(path, new_bytes).map_err(|err| HostlibError::Backend {
881 builtin: SAFE_TEXT_PATCH_BUILTIN,
882 message: format!("write `{}`: {err}", path.display()),
883 })?;
884
885 Ok(SafeTextPatchOutcome {
886 result: SafeTextPatchResult::Applied,
887 current_hash,
888 after_hash,
889 created: !existed,
890 bytes_written: new_bytes.len(),
891 })
892}
893
894fn read_existing(
899 builtin: &'static str,
900 path: &Path,
901 session_id: Option<&str>,
902) -> Result<(Vec<u8>, bool), HostlibError> {
903 if let Some(result) = read(path, session_id) {
904 return match result {
905 Ok(bytes) => Ok((bytes, true)),
906 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
907 Err(err) => Err(HostlibError::Backend {
908 builtin,
909 message: format!("read `{}`: {err}", path.display()),
910 }),
911 };
912 }
913 match stdfs::read(path) {
914 Ok(bytes) => Ok((bytes, true)),
915 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
916 Err(err) => Err(HostlibError::Backend {
917 builtin,
918 message: format!("read `{}`: {err}", path.display()),
919 }),
920 }
921}
922
923fn read_text_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
924 let raw = dict_arg(READ_TEXT_BUILTIN, args)?;
925 let dict = raw.as_ref();
926 let path_str = require_string(READ_TEXT_BUILTIN, dict, "path")?;
927 let session_id = optional_string(READ_TEXT_BUILTIN, dict, "session_id")?;
928 let path = Path::new(&path_str);
929 enforce_path_scope(READ_TEXT_BUILTIN, path, FsAccess::Read)?;
930
931 let (bytes, existed) = read_existing(READ_TEXT_BUILTIN, path, session_id.as_deref())?;
932 let hash = hash_label(&bytes);
933 let content = match std::str::from_utf8(&bytes) {
934 Ok(s) => s.to_string(),
935 Err(err) => {
936 return Err(HostlibError::Backend {
937 builtin: READ_TEXT_BUILTIN,
938 message: format!("`{path_str}` is not valid UTF-8: {err}"),
939 });
940 }
941 };
942 let bytes_len = bytes.len() as i64;
943 Ok(build_dict([
944 ("path", str_value(&path_str)),
945 ("content", str_value(&content)),
946 ("sha256", str_value(&hash)),
947 ("size", VmValue::Int(bytes_len)),
948 ("exists", VmValue::Bool(existed)),
949 ]))
950}
951
952fn safe_text_patch_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
953 let raw = dict_arg(SAFE_TEXT_PATCH_BUILTIN, args)?;
954 let dict = raw.as_ref();
955
956 let path_str = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "path")?;
957 let content = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "content")?;
958 let expected_hash = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "expected_hash")?;
959 let session_id = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "session_id")?;
960 let create_parents = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "create_parents", true)?;
961 let overwrite = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "overwrite", true)?;
962
963 enforce_path_scope(
964 SAFE_TEXT_PATCH_BUILTIN,
965 Path::new(&path_str),
966 FsAccess::Write,
967 )?;
968 let outcome = safe_text_patch(
969 Path::new(&path_str),
970 &content,
971 expected_hash.as_deref(),
972 session_id.as_deref(),
973 create_parents,
974 overwrite,
975 )?;
976
977 let entries: Vec<(&'static str, VmValue)> = vec![
978 ("path", str_value(&path_str)),
979 ("result", str_value(outcome.result.as_str())),
980 (
981 "applied",
982 VmValue::Bool(outcome.result == SafeTextPatchResult::Applied),
983 ),
984 (
985 "stale_base",
986 VmValue::Bool(outcome.result == SafeTextPatchResult::StaleBase),
987 ),
988 ("current_hash", str_value(&outcome.current_hash)),
989 ("before_sha256", str_value(&outcome.current_hash)),
990 ("after_sha256", str_value(&outcome.after_hash)),
991 ("created", VmValue::Bool(outcome.created)),
992 ("bytes_written", VmValue::Int(outcome.bytes_written as i64)),
993 (
994 "expected_hash",
995 match expected_hash.as_deref() {
996 Some(hash) => str_value(hash),
997 None => VmValue::Nil,
998 },
999 ),
1000 ];
1001 Ok(build_dict(entries))
1002}
1003
1004fn emit_safe_text_patch_result_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1005 let raw = dict_arg(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, args)?;
1006 let dict = raw.as_ref();
1007
1008 let path = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "path")?;
1009 let result = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "result")?;
1010 let hunks_count = optional_int(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "hunks_count", 0)?;
1011 let bytes_written = optional_int(
1012 EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1013 dict,
1014 "bytes_written",
1015 0,
1016 )?;
1017 let failed_hunk_index = match dict.get("failed_hunk_index") {
1018 None | Some(VmValue::Nil) => None,
1019 Some(VmValue::Int(n)) if *n >= 0 => Some(*n as usize),
1020 Some(other) => {
1021 return Err(HostlibError::InvalidParameter {
1022 builtin: EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1023 param: "failed_hunk_index",
1024 message: format!("expected non-negative integer, got {}", other.type_name()),
1025 });
1026 }
1027 };
1028 let session_id = optional_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "session_id")?
1029 .or_else(harn_vm::agent_sessions::current_session_id);
1030
1031 if let Some(session_id) = session_id.filter(|s| !s.trim().is_empty()) {
1032 harn_vm::agent_events::emit_event(&AgentEvent::SafeTextPatchResult {
1033 session_id,
1034 path,
1035 result,
1036 hunks_count: hunks_count.max(0) as usize,
1037 bytes_written: bytes_written.max(0) as u64,
1038 failed_hunk_index,
1039 });
1040 Ok(VmValue::Bool(true))
1041 } else {
1042 Ok(VmValue::Bool(false))
1046 }
1047}
1048
1049fn set_mode_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1050 let raw = dict_arg(SET_MODE_BUILTIN, args)?;
1051 let dict = raw.as_ref();
1052 let session_id = require_string(SET_MODE_BUILTIN, dict, "session_id")?;
1053 let mode = FsMode::parse(
1054 SET_MODE_BUILTIN,
1055 &require_string(SET_MODE_BUILTIN, dict, "mode")?,
1056 )?;
1057 let root = optional_string(SET_MODE_BUILTIN, dict, "root")?.map(PathBuf::from);
1058 let result = set_mode(&session_id, mode, root.as_deref())?;
1059 Ok(build_dict([(
1060 "previous_mode",
1061 str_value(result.previous_mode.as_str()),
1062 )]))
1063}
1064
1065fn staged_status_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1066 let raw = dict_arg(STATUS_BUILTIN, args)?;
1067 let session_id = require_string(STATUS_BUILTIN, raw.as_ref(), "session_id")?;
1068 Ok(status_to_value(staged_status(&session_id)?))
1069}
1070
1071fn commit_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1072 let raw = dict_arg(COMMIT_BUILTIN, args)?;
1073 let dict = raw.as_ref();
1074 let session_id = require_string(COMMIT_BUILTIN, dict, "session_id")?;
1075 let paths = optional_string_list(COMMIT_BUILTIN, dict, "paths")?;
1076 Ok(commit_result_to_value(commit_staged(&session_id, &paths)?))
1077}
1078
1079fn discard_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1080 let raw = dict_arg(DISCARD_BUILTIN, args)?;
1081 let dict = raw.as_ref();
1082 let session_id = require_string(DISCARD_BUILTIN, dict, "session_id")?;
1083 let paths = optional_string_list(DISCARD_BUILTIN, dict, "paths")?;
1084 Ok(discard_result_to_value(discard_staged(
1085 &session_id,
1086 &paths,
1087 )?))
1088}
1089
1090fn state_for_locked(
1091 guard: &mut BTreeMap<String, SessionState>,
1092 session_id: &str,
1093 root: Option<PathBuf>,
1094) -> Result<SessionState, HostlibError> {
1095 if let Some(existing) = guard.get(session_id) {
1096 let mut state = existing.clone();
1097 if let Some(root) = root {
1098 if state.entries.is_empty() {
1099 state.root = root;
1100 }
1101 }
1102 return Ok(state);
1103 }
1104 let state = load_state(session_id, root).map_err(|err| HostlibError::Backend {
1105 builtin: SET_MODE_BUILTIN,
1106 message: err,
1107 })?;
1108 Ok(state)
1109}
1110
1111fn load_state(session_id: &str, root: Option<PathBuf>) -> Result<SessionState, String> {
1112 let root = root.unwrap_or_else(default_root);
1113 let manifest_path = manifest_path(&root, session_id);
1114 if manifest_path.exists() {
1115 let text = stdfs::read_to_string(&manifest_path)
1116 .map_err(|err| format!("read {}: {err}", manifest_path.display()))?;
1117 let manifest: Manifest = serde_json::from_str(&text)
1118 .map_err(|err| format!("parse {}: {err}", manifest_path.display()))?;
1119 if manifest.version != MANIFEST_VERSION {
1120 return Err(format!(
1121 "unsupported staged fs manifest version {} in {}",
1122 manifest.version,
1123 manifest_path.display()
1124 ));
1125 }
1126 if manifest.session_id != session_id {
1127 return Err(format!(
1128 "staged fs manifest session id mismatch in {}",
1129 manifest_path.display()
1130 ));
1131 }
1132 return Ok(SessionState {
1133 session_id: manifest.session_id,
1134 mode: manifest.mode,
1135 root: normalize_logical(Path::new(&manifest.root)),
1136 entries: manifest
1137 .entries
1138 .into_iter()
1139 .map(|(path, entry)| (normalize_logical(Path::new(&path)), entry))
1140 .collect(),
1141 });
1142 }
1143 Ok(SessionState {
1144 session_id: session_id.to_string(),
1145 mode: FsMode::Immediate,
1146 root,
1147 entries: BTreeMap::new(),
1148 })
1149}
1150
1151fn persist_state(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1152 let dir = session_dir(&state.root, &state.session_id);
1153 stdfs::create_dir_all(dir.join("bodies"))
1154 .map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1155 let manifest = Manifest {
1156 version: MANIFEST_VERSION,
1157 session_id: state.session_id.clone(),
1158 mode: state.mode,
1159 root: state.root.to_string_lossy().into_owned(),
1160 entries: state
1161 .entries
1162 .iter()
1163 .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
1164 .collect(),
1165 };
1166 let bytes = serde_json::to_vec_pretty(&manifest)
1167 .map_err(|err| format!("serialize staged manifest: {err}"))?;
1168 atomic_write(&manifest_path(&state.root, &state.session_id), &bytes)?;
1169 append_journal(state, op, path)?;
1170 prune_unreferenced_bodies(state);
1171 Ok(())
1172}
1173
1174fn append_journal(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1175 let dir = session_dir(&state.root, &state.session_id);
1176 stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1177 let line = serde_json::to_string(&serde_json::json!({
1178 "ts_ms": now_ms(),
1179 "op": op,
1180 "path": path.map(|path| path.to_string_lossy().into_owned()),
1181 "pending_count": state.entries.len(),
1182 }))
1183 .map_err(|err| format!("serialize staged journal: {err}"))?;
1184 let mut file = stdfs::OpenOptions::new()
1185 .create(true)
1186 .append(true)
1187 .open(dir.join("journal.jsonl"))
1188 .map_err(|err| format!("open staged journal: {err}"))?;
1189 writeln!(file, "{line}").map_err(|err| format!("write staged journal: {err}"))
1190}
1191
1192fn write_body(state: &SessionState, bytes: &[u8]) -> Result<String, String> {
1193 let hash = hex::encode(Sha256::digest(bytes));
1194 let path = session_dir(&state.root, &state.session_id)
1195 .join("bodies")
1196 .join(&hash);
1197 if !path.exists() {
1198 atomic_write(&path, bytes)?;
1199 }
1200 Ok(hash)
1201}
1202
1203fn read_body(state: &SessionState, hash: &str) -> std::io::Result<Vec<u8>> {
1204 stdfs::read(
1205 session_dir(&state.root, &state.session_id)
1206 .join("bodies")
1207 .join(hash),
1208 )
1209}
1210
1211fn prune_unreferenced_bodies(state: &SessionState) {
1212 let live: BTreeSet<String> = state
1213 .entries
1214 .values()
1215 .filter_map(|entry| match entry {
1216 StagedEntry::Write { body_hash, .. } => Some(body_hash.clone()),
1217 StagedEntry::Delete { .. } => None,
1218 })
1219 .collect();
1220 let body_dir = session_dir(&state.root, &state.session_id).join("bodies");
1221 let Ok(entries) = stdfs::read_dir(&body_dir) else {
1222 return;
1223 };
1224 for entry in entries.flatten() {
1225 let name = entry.file_name().to_string_lossy().into_owned();
1226 if !live.contains(&name) {
1227 let _ = stdfs::remove_file(entry.path());
1228 }
1229 }
1230}
1231
1232fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
1233 if let Some(parent) = path.parent() {
1234 stdfs::create_dir_all(parent)
1235 .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
1236 }
1237 let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
1238 stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
1239 match stdfs::rename(&tmp, path) {
1240 Ok(()) => Ok(()),
1241 Err(err) => {
1242 let _ = stdfs::remove_file(path);
1243 stdfs::rename(&tmp, path).map_err(|retry| {
1244 format!(
1245 "rename {} to {}: {err}; retry: {retry}",
1246 tmp.display(),
1247 path.display()
1248 )
1249 })
1250 }
1251 }
1252}
1253
1254fn commit_entry(state: &SessionState, path: &Path, entry: &StagedEntry) -> Result<(), String> {
1255 match entry {
1256 StagedEntry::Write { body_hash, .. } => {
1257 let bytes = read_body(state, body_hash)
1258 .map_err(|err| format!("read staged body for {}: {err}", path.display()))?;
1259 atomic_write(path, &bytes)
1260 }
1261 StagedEntry::Delete { recursive, .. } => match stdfs::symlink_metadata(path) {
1262 Ok(metadata) if metadata.is_dir() => {
1263 if *recursive {
1264 stdfs::remove_dir_all(path)
1265 .map_err(|err| format!("remove_dir_all {}: {err}", path.display()))
1266 } else {
1267 stdfs::remove_dir(path)
1268 .map_err(|err| format!("remove_dir {}: {err}", path.display()))
1269 }
1270 }
1271 Ok(_) => stdfs::remove_file(path)
1272 .map_err(|err| format!("remove_file {}: {err}", path.display())),
1273 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1274 Err(err) => Err(format!("stat {}: {err}", path.display())),
1275 },
1276 }
1277}
1278
1279fn overlay_read(state: &SessionState, path: &Path) -> Option<std::io::Result<Vec<u8>>> {
1280 let key = normalize_logical(path);
1281 if let Some(entry) = state.entries.get(&key) {
1282 return Some(match entry {
1283 StagedEntry::Write { body_hash, .. } => read_body(state, body_hash),
1284 StagedEntry::Delete { .. } => Err(not_found(&key)),
1285 });
1286 }
1287 if deleted_ancestor(state, &key) {
1288 return Some(Err(not_found(&key)));
1289 }
1290 None
1291}
1292
1293fn overlay_read_dir(state: &SessionState, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
1294 let dir_key = normalize_logical(path);
1295 if matches!(state.entries.get(&dir_key), Some(StagedEntry::Write { .. }))
1296 || deleted_ancestor(state, &dir_key)
1297 || matches!(
1298 state.entries.get(&dir_key),
1299 Some(StagedEntry::Delete { .. })
1300 )
1301 {
1302 return Err(not_found(&dir_key));
1303 }
1304 if !path.exists() && !has_staged_descendant(state, &dir_key) {
1305 return Err(not_found(&dir_key));
1306 }
1307
1308 let mut entries: BTreeMap<String, OverlayDirEntry> = BTreeMap::new();
1309 if path.exists() {
1310 for entry in stdfs::read_dir(path)? {
1311 let entry = entry?;
1312 let name = entry.file_name().to_string_lossy().into_owned();
1313 let file_type = entry.file_type().ok();
1314 let metadata = entry.metadata().ok();
1315 entries.insert(
1316 name.clone(),
1317 OverlayDirEntry {
1318 name,
1319 is_dir: file_type.is_some_and(|ty| ty.is_dir()),
1320 is_symlink: file_type.is_some_and(|ty| ty.is_symlink()),
1321 size: metadata.map(|m| m.len()).unwrap_or(0),
1322 },
1323 );
1324 }
1325 }
1326
1327 for (path, entry) in &state.entries {
1328 let Some(name) = overlay_child_name(path, &dir_key) else {
1329 continue;
1330 };
1331 match entry {
1332 StagedEntry::Write { len, .. } => {
1333 let is_dir = path.parent() != Some(dir_key.as_path());
1334 entries.insert(
1335 name.clone(),
1336 OverlayDirEntry {
1337 name,
1338 is_dir,
1339 is_symlink: false,
1340 size: if is_dir { 0 } else { *len },
1341 },
1342 );
1343 }
1344 StagedEntry::Delete { .. } => {
1345 if path.parent() == Some(dir_key.as_path()) {
1346 entries.remove(&name);
1347 }
1348 }
1349 }
1350 }
1351
1352 Ok(entries.into_values().collect())
1353}
1354
1355fn overlay_child_name(path: &Path, dir: &Path) -> Option<String> {
1356 let suffix = path.strip_prefix(dir).ok()?;
1357 let mut components = suffix.components();
1358 let first = components.next()?;
1359 match first {
1360 Component::Normal(name) => Some(name.to_string_lossy().into_owned()),
1361 _ => None,
1362 }
1363}
1364
1365fn overlay_exists(state: &SessionState, path: &Path) -> bool {
1366 if let Some(entry) = state.entries.get(path) {
1367 return matches!(entry, StagedEntry::Write { .. });
1368 }
1369 if deleted_ancestor(state, path) {
1370 return false;
1371 }
1372 if has_staged_descendant(state, path) {
1373 return true;
1374 }
1375 path.exists()
1376}
1377
1378fn parent_exists(state: &SessionState, path: &Path) -> bool {
1379 let Some(parent) = path.parent() else {
1380 return true;
1381 };
1382 if parent.as_os_str().is_empty() {
1383 return true;
1384 }
1385 if let Some(entry) = state.entries.get(parent) {
1386 return !matches!(entry, StagedEntry::Delete { .. });
1387 }
1388 if deleted_ancestor(state, parent) {
1389 return false;
1390 }
1391 if has_staged_descendant(state, parent) {
1392 return true;
1393 }
1394 parent.is_dir()
1395}
1396
1397fn deleted_ancestor(state: &SessionState, path: &Path) -> bool {
1398 state.entries.iter().any(|(candidate, entry)| {
1399 matches!(entry, StagedEntry::Delete { .. })
1400 && path != candidate.as_path()
1401 && path.starts_with(candidate)
1402 })
1403}
1404
1405fn has_staged_descendant(state: &SessionState, path: &Path) -> bool {
1406 state.entries.iter().any(|(candidate, entry)| {
1407 matches!(entry, StagedEntry::Write { .. })
1408 && candidate != path
1409 && candidate.starts_with(path)
1410 })
1411}
1412
1413fn staged_paths_under(state: &SessionState, path: &Path) -> Vec<PathBuf> {
1414 state
1415 .entries
1416 .keys()
1417 .filter(|candidate| *candidate == path || candidate.starts_with(path))
1418 .cloned()
1419 .collect()
1420}
1421
1422fn validate_delete_shape(
1423 builtin: &'static str,
1424 path: &Path,
1425 recursive: bool,
1426) -> Result<(), HostlibError> {
1427 let Ok(metadata) = stdfs::symlink_metadata(path) else {
1428 return Ok(());
1429 };
1430 if metadata.is_dir() && !recursive {
1431 let mut entries = stdfs::read_dir(path).map_err(|err| HostlibError::Backend {
1432 builtin,
1433 message: format!("read_dir `{}`: {err}", path.display()),
1434 })?;
1435 if entries.next().is_some() {
1436 return Err(HostlibError::Backend {
1437 builtin,
1438 message: format!(
1439 "remove_dir `{}` (pass recursive=true to delete non-empty dirs): directory not empty",
1440 path.display()
1441 ),
1442 });
1443 }
1444 }
1445 Ok(())
1446}
1447
1448fn status_from_state(state: &SessionState) -> StagedStatus {
1449 let now = now_ms();
1450 let mut pending_writes = Vec::new();
1451 let mut total_bytes_pending = 0u64;
1452 let mut oldest = None;
1453 for (path, entry) in &state.entries {
1454 total_bytes_pending = total_bytes_pending.saturating_add(entry.body_len());
1455 oldest = Some(oldest.map_or(entry.created_at_ms(), |old: i64| {
1456 old.min(entry.created_at_ms())
1457 }));
1458 let (kind, bytes_added, bytes_removed) = match entry {
1459 StagedEntry::Write { len, .. } => ("write", *len, disk_size(path).unwrap_or(0)),
1460 StagedEntry::Delete { .. } => ("delete", 0, disk_size(path).unwrap_or(0)),
1461 };
1462 pending_writes.push(PendingWrite {
1463 path: path.to_string_lossy().into_owned(),
1464 kind,
1465 bytes_added,
1466 bytes_removed,
1467 });
1468 }
1469 StagedStatus {
1470 pending_writes,
1471 total_bytes_pending,
1472 oldest_pending_age_ms: oldest.map(|old| now.saturating_sub(old)).unwrap_or(0),
1473 }
1474}
1475
1476fn disk_size(path: &Path) -> Option<u64> {
1477 let metadata = stdfs::symlink_metadata(path).ok()?;
1478 if metadata.is_file() {
1479 return Some(metadata.len());
1480 }
1481 if metadata.is_dir() {
1482 let mut total = 0u64;
1483 for entry in walkdir::WalkDir::new(path)
1484 .into_iter()
1485 .filter_map(Result::ok)
1486 {
1487 if let Ok(metadata) = entry.metadata() {
1488 if metadata.is_file() {
1489 total = total.saturating_add(metadata.len());
1490 }
1491 }
1492 }
1493 return Some(total);
1494 }
1495 Some(metadata.len())
1496}
1497
1498fn selected_paths(state: &SessionState, paths: &[String]) -> Vec<PathBuf> {
1499 if paths.is_empty() {
1500 return state.entries.keys().cloned().collect();
1501 }
1502 let selected: BTreeSet<PathBuf> = paths
1503 .iter()
1504 .map(|path| normalize_logical(Path::new(path)))
1505 .collect();
1506 state
1507 .entries
1508 .keys()
1509 .filter(|path| selected.contains(*path))
1510 .cloned()
1511 .collect()
1512}
1513
1514fn active_session_id(explicit: Option<&str>) -> Option<String> {
1515 explicit
1516 .map(str::to_string)
1517 .or_else(harn_vm::agent_sessions::current_session_id)
1518 .filter(|id| !id.trim().is_empty())
1519}
1520
1521fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
1522 if session_id.trim().is_empty() {
1523 return Err(HostlibError::InvalidParameter {
1524 builtin,
1525 param: "session_id",
1526 message: "must not be empty".to_string(),
1527 });
1528 }
1529 Ok(())
1530}
1531
1532fn default_root() -> PathBuf {
1533 std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1534}
1535
1536fn session_dir(root: &Path, session_id: &str) -> PathBuf {
1537 let mut dir = root.to_path_buf();
1538 for component in STATE_REL {
1539 dir.push(component);
1540 }
1541 dir.push(sanitize_component(session_id));
1542 dir
1543}
1544
1545fn manifest_path(root: &Path, session_id: &str) -> PathBuf {
1546 session_dir(root, session_id).join("manifest.json")
1547}
1548
1549fn sanitize_component(input: &str) -> String {
1550 let sanitized: String = input
1551 .chars()
1552 .map(|ch| match ch {
1553 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
1554 _ => '_',
1555 })
1556 .collect();
1557 let is_dotted = sanitized.is_empty() || sanitized.bytes().all(|b| b == b'.');
1563 if sanitized == input && !is_dotted {
1564 sanitized
1565 } else {
1566 let hash = hex::encode(Sha256::digest(input.as_bytes()));
1567 format!("{sanitized}-{}", &hash[..12])
1568 }
1569}
1570
1571fn normalize_logical(path: &Path) -> PathBuf {
1572 let absolute = if path.is_absolute() {
1573 path.to_path_buf()
1574 } else {
1575 default_root().join(path)
1576 };
1577 let mut out = PathBuf::new();
1578 for component in absolute.components() {
1579 match component {
1580 Component::ParentDir => {
1581 out.pop();
1582 }
1583 Component::CurDir => {}
1584 other => out.push(other),
1585 }
1586 }
1587 out
1588}
1589
1590fn not_found(path: &Path) -> std::io::Error {
1591 std::io::Error::new(
1592 std::io::ErrorKind::NotFound,
1593 format!("staged fs: {} is deleted or absent", path.display()),
1594 )
1595}
1596
1597fn now_ms() -> i64 {
1598 std::time::SystemTime::now()
1599 .duration_since(std::time::UNIX_EPOCH)
1600 .map(|duration| duration.as_millis() as i64)
1601 .unwrap_or(0)
1602}
1603
1604fn emit_staged_update(state: &SessionState) {
1605 let status = status_from_state(state);
1606 harn_vm::agent_events::emit_event(&AgentEvent::StagedWritesPending {
1607 session_id: state.session_id.clone(),
1608 pending_count: status.pending_writes.len(),
1609 total_bytes: status.total_bytes_pending,
1610 });
1611}
1612
1613fn pending_write_to_value(write: PendingWrite) -> VmValue {
1614 build_dict([
1615 ("path", str_value(&write.path)),
1616 ("kind", str_value(write.kind)),
1617 ("bytes_added", VmValue::Int(write.bytes_added as i64)),
1618 ("bytes_removed", VmValue::Int(write.bytes_removed as i64)),
1619 ])
1620}
1621
1622fn status_to_value(status: StagedStatus) -> VmValue {
1623 build_dict([
1624 (
1625 "pending_writes",
1626 VmValue::List(Arc::new(
1627 status
1628 .pending_writes
1629 .into_iter()
1630 .map(pending_write_to_value)
1631 .collect(),
1632 )),
1633 ),
1634 (
1635 "total_bytes_pending",
1636 VmValue::Int(status.total_bytes_pending as i64),
1637 ),
1638 (
1639 "oldest_pending_age_ms",
1640 VmValue::Int(status.oldest_pending_age_ms),
1641 ),
1642 ])
1643}
1644
1645fn commit_result_to_value(result: CommitResult) -> VmValue {
1646 build_dict([
1647 (
1648 "committed_paths",
1649 VmValue::List(Arc::new(
1650 result
1651 .committed_paths
1652 .into_iter()
1653 .map(|path| VmValue::String(Arc::from(path)))
1654 .collect(),
1655 )),
1656 ),
1657 (
1658 "failed_paths_with_reasons",
1659 VmValue::List(Arc::new(
1660 result
1661 .failed_paths_with_reasons
1662 .into_iter()
1663 .map(|(path, reason)| {
1664 build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
1665 })
1666 .collect(),
1667 )),
1668 ),
1669 ])
1670}
1671
1672fn discard_result_to_value(result: DiscardResult) -> VmValue {
1673 build_dict([(
1674 "discarded_paths",
1675 VmValue::List(Arc::new(
1676 result
1677 .discarded_paths
1678 .into_iter()
1679 .map(|path| VmValue::String(Arc::from(path)))
1680 .collect(),
1681 )),
1682 )])
1683}
1684
1685#[cfg(test)]
1686mod sanitize_tests {
1687 use super::{sanitize_component, session_dir, STATE_REL};
1688 use std::path::{Component, Path};
1689
1690 #[test]
1691 fn dotted_session_ids_are_never_traversal_tokens() {
1692 for evil in ["..", ".", "...", ""] {
1695 let safe = sanitize_component(evil);
1696 assert_ne!(safe, evil, "`{evil}` passed through unsanitized");
1697 assert!(
1698 !safe.bytes().all(|b| b == b'.'),
1699 "`{evil}` -> `{safe}` is still all dots"
1700 );
1701 let comps: Vec<_> = Path::new(&safe).components().collect();
1703 assert!(
1704 comps.iter().all(|c| matches!(c, Component::Normal(_))),
1705 "`{safe}` contains a traversal component"
1706 );
1707 }
1708 }
1709
1710 #[test]
1711 fn ordinary_session_ids_pass_through() {
1712 assert_eq!(sanitize_component("abc-123_v2.0"), "abc-123_v2.0");
1713 }
1714
1715 #[test]
1716 fn session_dir_stays_under_staged_root() {
1717 let dir = session_dir(Path::new("/workspace"), "..");
1718 assert!(
1720 !dir.components().any(|c| matches!(c, Component::ParentDir)),
1721 "session_dir({dir:?}) escapes via `..`"
1722 );
1723 let mut staged = std::path::PathBuf::from("/workspace");
1724 staged.extend(STATE_REL);
1725 assert!(dir.starts_with(&staged), "{dir:?} not under {staged:?}");
1726 }
1727}