1use std::collections::{BTreeMap, BTreeSet};
10use std::fs as stdfs;
11use std::io::Write;
12use std::path::{Component, Path, PathBuf};
13use std::sync::Arc;
14use std::sync::{Mutex, OnceLock};
15
16use harn_vm::agent_events::AgentEvent;
17use harn_vm::process_sandbox::{check_fs_path_scope, FsAccess};
18use harn_vm::VmValue;
19use serde::{Deserialize, Serialize};
20use sha2::{Digest, Sha256};
21
22use crate::error::HostlibError;
23use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
24use crate::tools::args::{
25 build_dict, dict_arg, optional_bool, optional_int, optional_string, optional_string_list,
26 require_string, str_value,
27};
28use crate::tools::permissions::enforce_path_scope;
29
30const SET_MODE_BUILTIN: &str = "hostlib_fs_set_mode";
31const STATUS_BUILTIN: &str = "hostlib_fs_staged_status";
32const COMMIT_BUILTIN: &str = "hostlib_fs_commit_staged";
33const DISCARD_BUILTIN: &str = "hostlib_fs_discard_staged";
34const SAFE_TEXT_PATCH_BUILTIN: &str = "hostlib_fs_safe_text_patch";
35const READ_TEXT_BUILTIN: &str = "hostlib_fs_read_text";
36const EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN: &str = "hostlib_fs_emit_safe_text_patch_result";
37
38const MANIFEST_VERSION: u32 = 1;
39const STATE_REL: &[&str] = &[".harn", "state", "staged"];
40
41#[derive(Default)]
43pub struct FsCapability;
44
45impl HostlibCapability for FsCapability {
46 fn module_name(&self) -> &'static str {
47 "fs"
48 }
49
50 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
51 register(registry, SET_MODE_BUILTIN, "set_mode", set_mode_builtin);
52 register(
53 registry,
54 STATUS_BUILTIN,
55 "staged_status",
56 staged_status_builtin,
57 );
58 register(
59 registry,
60 COMMIT_BUILTIN,
61 "commit_staged",
62 commit_staged_builtin,
63 );
64 register(
65 registry,
66 DISCARD_BUILTIN,
67 "discard_staged",
68 discard_staged_builtin,
69 );
70 register_gated(
73 registry,
74 SAFE_TEXT_PATCH_BUILTIN,
75 "safe_text_patch",
76 safe_text_patch_builtin,
77 );
78 register_gated(registry, READ_TEXT_BUILTIN, "read_text", read_text_builtin);
79 register(
80 registry,
81 EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
82 "emit_safe_text_patch_result",
83 emit_safe_text_patch_result_builtin,
84 );
85 }
86}
87
88fn register(
89 registry: &mut BuiltinRegistry,
90 name: &'static str,
91 method: &'static str,
92 runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
93) {
94 let handler: SyncHandler = std::sync::Arc::new(runner);
95 registry.register(RegisteredBuiltin {
96 name,
97 module: "fs",
98 method,
99 handler,
100 });
101}
102
103fn register_gated(
104 registry: &mut BuiltinRegistry,
105 name: &'static str,
106 method: &'static str,
107 runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
108) {
109 registry.register(RegisteredBuiltin {
110 name,
111 module: "fs",
112 method,
113 handler: crate::tools::permissions::gated_handler(name, runner),
114 });
115}
116
117#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
119#[serde(rename_all = "lowercase")]
120pub enum FsMode {
121 Immediate,
123 Staged,
125}
126
127impl FsMode {
128 fn parse(builtin: &'static str, raw: &str) -> Result<Self, HostlibError> {
129 match raw {
130 "immediate" => Ok(Self::Immediate),
131 "staged" => Ok(Self::Staged),
132 other => Err(HostlibError::InvalidParameter {
133 builtin,
134 param: "mode",
135 message: format!("expected \"immediate\" or \"staged\", got `{other}`"),
136 }),
137 }
138 }
139
140 pub fn as_str(self) -> &'static str {
142 match self {
143 Self::Immediate => "immediate",
144 Self::Staged => "staged",
145 }
146 }
147}
148
149#[derive(Clone, Debug, Serialize, Deserialize)]
150struct Manifest {
151 version: u32,
152 session_id: String,
153 mode: FsMode,
154 root: String,
155 entries: BTreeMap<String, StagedEntry>,
156}
157
158#[derive(Clone, Debug, Serialize, Deserialize)]
159#[serde(tag = "kind", rename_all = "snake_case")]
160enum StagedEntry {
161 Write {
162 body_hash: String,
163 len: u64,
164 created_at_ms: i64,
165 },
166 Delete {
167 recursive: bool,
168 created_at_ms: i64,
169 },
170}
171
172impl StagedEntry {
173 fn created_at_ms(&self) -> i64 {
174 match self {
175 Self::Write { created_at_ms, .. } | Self::Delete { created_at_ms, .. } => {
176 *created_at_ms
177 }
178 }
179 }
180
181 fn body_len(&self) -> u64 {
182 match self {
183 Self::Write { len, .. } => *len,
184 Self::Delete { .. } => 0,
185 }
186 }
187}
188
189#[derive(Clone, Debug)]
190struct SessionState {
191 session_id: String,
192 mode: FsMode,
193 root: PathBuf,
194 entries: BTreeMap<PathBuf, StagedEntry>,
195}
196
197#[derive(Clone, Debug)]
198pub(crate) struct WriteOutcome {
199 pub(crate) created: bool,
200 pub(crate) bytes_written: usize,
201}
202
203#[derive(Clone, Debug)]
204pub(crate) struct OverlayDirEntry {
205 pub(crate) name: String,
206 pub(crate) is_dir: bool,
207 pub(crate) is_symlink: bool,
208 pub(crate) size: u64,
209}
210
211#[derive(Clone, Debug)]
213pub struct StagedStatus {
214 pub pending_writes: Vec<PendingWrite>,
216 pub total_bytes_pending: u64,
218 pub oldest_pending_age_ms: i64,
220}
221
222#[derive(Clone, Debug)]
223pub struct PendingWrite {
225 pub path: String,
227 pub kind: &'static str,
229 pub bytes_added: u64,
231 pub bytes_removed: u64,
233}
234
235#[derive(Clone, Debug)]
237pub struct SetModeResult {
238 pub previous_mode: FsMode,
240}
241
242#[derive(Clone, Debug)]
244pub struct CommitResult {
245 pub committed_paths: Vec<String>,
247 pub failed_paths_with_reasons: Vec<(String, String)>,
249}
250
251#[derive(Clone, Debug)]
253pub struct DiscardResult {
254 pub discarded_paths: Vec<String>,
256}
257
258static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionState>>> = OnceLock::new();
259
260fn sessions() -> &'static Mutex<BTreeMap<String, SessionState>> {
261 SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
262}
263
264pub fn configure_session_root(session_id: &str, root: &Path) {
269 if session_id.trim().is_empty() {
270 return;
271 }
272 let root = normalize_logical(root);
273 let mut guard = sessions()
274 .lock()
275 .expect("hostlib fs session mutex poisoned");
276 match guard.get_mut(session_id) {
277 Some(state) if state.entries.is_empty() => {
278 state.root = root;
279 }
280 Some(_) => {}
281 None => {
282 let state = load_state(session_id, Some(root.clone())).unwrap_or(SessionState {
283 session_id: session_id.to_string(),
284 mode: FsMode::Immediate,
285 root,
286 entries: BTreeMap::new(),
287 });
288 guard.insert(session_id.to_string(), state);
289 }
290 }
291}
292
293pub fn configured_session_root(session_id: &str) -> Option<PathBuf> {
295 if session_id.trim().is_empty() {
296 return None;
297 }
298 let guard = sessions()
299 .lock()
300 .expect("hostlib fs session mutex poisoned");
301 guard.get(session_id).map(|state| state.root.clone())
302}
303
304pub fn set_mode(
306 session_id: &str,
307 mode: FsMode,
308 root: Option<&Path>,
309) -> Result<SetModeResult, HostlibError> {
310 validate_session_id(SET_MODE_BUILTIN, session_id)?;
311 let mut guard = sessions()
312 .lock()
313 .expect("hostlib fs session mutex poisoned");
314 let mut state = state_for_locked(&mut guard, session_id, root.map(normalize_logical))?;
315 let previous_mode = state.mode;
316 state.mode = mode;
317 persist_state(&state, "set_mode", None).map_err(|err| HostlibError::Backend {
318 builtin: SET_MODE_BUILTIN,
319 message: err,
320 })?;
321 guard.insert(session_id.to_string(), state);
322 Ok(SetModeResult { previous_mode })
323}
324
325pub fn staged_status(session_id: &str) -> Result<StagedStatus, HostlibError> {
327 validate_session_id(STATUS_BUILTIN, session_id)?;
328 let mut guard = sessions()
329 .lock()
330 .expect("hostlib fs session mutex poisoned");
331 let state = state_for_locked(&mut guard, session_id, None)?;
332 let status = status_from_state(&state);
333 guard.insert(session_id.to_string(), state);
334 Ok(status)
335}
336
337pub fn commit_staged(session_id: &str, paths: &[String]) -> Result<CommitResult, HostlibError> {
339 validate_session_id(COMMIT_BUILTIN, session_id)?;
340 let mut guard = sessions()
341 .lock()
342 .expect("hostlib fs session mutex poisoned");
343 let mut state = state_for_locked(&mut guard, session_id, None)?;
344 let selected = selected_paths(&state, paths);
345 let mut committed_paths = Vec::new();
346 let mut failed_paths_with_reasons = Vec::new();
347
348 for path in selected {
349 let Some(entry) = state.entries.get(&path).cloned() else {
350 continue;
351 };
352 let path_label = path.to_string_lossy().into_owned();
353 let access = match entry {
359 StagedEntry::Write { .. } => FsAccess::Write,
360 StagedEntry::Delete { .. } => FsAccess::Delete,
361 };
362 if let Err(violation) = check_fs_path_scope(&path, access) {
363 failed_paths_with_reasons.push((path_label, violation.message(COMMIT_BUILTIN)));
364 continue;
365 }
366 match commit_entry(&state, &path, &entry) {
367 Ok(()) => {
368 state.entries.remove(&path);
369 committed_paths.push(path_label);
370 }
371 Err(reason) => failed_paths_with_reasons.push((path_label, reason)),
372 }
373 }
374
375 persist_state(&state, "commit_staged", None).map_err(|err| HostlibError::Backend {
376 builtin: COMMIT_BUILTIN,
377 message: err,
378 })?;
379 emit_staged_update(&state);
380 guard.insert(session_id.to_string(), state);
381 Ok(CommitResult {
382 committed_paths,
383 failed_paths_with_reasons,
384 })
385}
386
387pub fn discard_staged(session_id: &str, paths: &[String]) -> Result<DiscardResult, HostlibError> {
389 validate_session_id(DISCARD_BUILTIN, session_id)?;
390 let mut guard = sessions()
391 .lock()
392 .expect("hostlib fs session mutex poisoned");
393 let mut state = state_for_locked(&mut guard, session_id, None)?;
394 let selected = selected_paths(&state, paths);
395 let mut discarded_paths = Vec::new();
396 for path in selected {
397 if state.entries.remove(&path).is_some() {
398 discarded_paths.push(path.to_string_lossy().into_owned());
399 }
400 }
401 persist_state(&state, "discard_staged", None).map_err(|err| HostlibError::Backend {
402 builtin: DISCARD_BUILTIN,
403 message: err,
404 })?;
405 emit_staged_update(&state);
406 guard.insert(session_id.to_string(), state);
407 Ok(DiscardResult { discarded_paths })
408}
409
410pub fn remove_session_state(session_id: &str, root: Option<&Path>) -> Result<(), HostlibError> {
417 validate_session_id(DISCARD_BUILTIN, session_id)?;
418 let mut guard = sessions()
419 .lock()
420 .expect("hostlib fs session mutex poisoned");
421 let state = match guard.remove(session_id) {
422 Some(state) => state,
423 None => load_state(session_id, root.map(normalize_logical)).map_err(|err| {
424 HostlibError::Backend {
425 builtin: DISCARD_BUILTIN,
426 message: err,
427 }
428 })?,
429 };
430 let dir = session_dir(&state.root, &state.session_id);
431 match stdfs::remove_dir_all(&dir) {
432 Ok(()) => Ok(()),
433 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
434 Err(err) => Err(HostlibError::Backend {
435 builtin: DISCARD_BUILTIN,
436 message: format!("remove staged session {}: {err}", dir.display()),
437 }),
438 }
439}
440
441pub(crate) fn read(
442 path: &Path,
443 explicit_session_id: Option<&str>,
444) -> Option<std::io::Result<Vec<u8>>> {
445 let session_id = active_session_id(explicit_session_id)?;
446 let mut guard = sessions()
447 .lock()
448 .expect("hostlib fs session mutex poisoned");
449 let state = state_for_locked(&mut guard, &session_id, None).ok()?;
450 let result = if state.mode == FsMode::Staged {
451 overlay_read(&state, path)
452 } else {
453 None
454 };
455 guard.insert(session_id, state);
456 result
457}
458
459pub(crate) fn read_to_string(
460 path: &Path,
461 explicit_session_id: Option<&str>,
462) -> Option<std::io::Result<String>> {
463 read(path, explicit_session_id).map(|result| {
464 result.and_then(|bytes| {
465 String::from_utf8(bytes).map_err(|err| {
466 std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string())
467 })
468 })
469 })
470}
471
472pub(crate) fn read_dir(
473 path: &Path,
474 explicit_session_id: Option<&str>,
475) -> Option<std::io::Result<Vec<OverlayDirEntry>>> {
476 let session_id = active_session_id(explicit_session_id)?;
477 let mut guard = sessions()
478 .lock()
479 .expect("hostlib fs session mutex poisoned");
480 let state = state_for_locked(&mut guard, &session_id, None).ok()?;
481 let result = if state.mode == FsMode::Staged {
482 Some(overlay_read_dir(&state, path))
483 } else {
484 None
485 };
486 guard.insert(session_id, state);
487 result
488}
489
490pub(crate) fn stage_write_or_none(
491 builtin: &'static str,
492 path: &Path,
493 bytes: &[u8],
494 create_parents: bool,
495 overwrite: bool,
496 explicit_session_id: Option<&str>,
497) -> Result<Option<WriteOutcome>, HostlibError> {
498 let Some(session_id) = active_session_id(explicit_session_id) else {
499 return Ok(None);
500 };
501 let mut guard = sessions()
502 .lock()
503 .expect("hostlib fs session mutex poisoned");
504 let mut state = state_for_locked(&mut guard, &session_id, None)?;
505 if state.mode != FsMode::Staged {
506 guard.insert(session_id, state);
507 return Ok(None);
508 }
509
510 let key = normalize_logical(path);
511 let existed = overlay_exists(&state, &key);
512 if existed && !overwrite {
513 guard.insert(session_id, state);
514 return Err(HostlibError::Backend {
515 builtin,
516 message: format!("`{}` exists and overwrite=false", key.display()),
517 });
518 }
519 if !create_parents && !parent_exists(&state, &key) {
520 guard.insert(session_id, state);
521 return Err(HostlibError::Backend {
522 builtin,
523 message: format!("parent directory for `{}` does not exist", key.display()),
524 });
525 }
526
527 let hash = write_body(&state, bytes).map_err(|err| HostlibError::Backend {
528 builtin,
529 message: err,
530 })?;
531 state.entries.insert(
532 key.clone(),
533 StagedEntry::Write {
534 body_hash: hash,
535 len: bytes.len() as u64,
536 created_at_ms: now_ms(),
537 },
538 );
539 persist_state(&state, "write", Some(&key)).map_err(|err| HostlibError::Backend {
540 builtin,
541 message: err,
542 })?;
543 emit_staged_update(&state);
544 guard.insert(session_id, state);
545 Ok(Some(WriteOutcome {
546 created: !existed,
547 bytes_written: bytes.len(),
548 }))
549}
550
551pub(crate) fn stage_delete_or_none(
552 builtin: &'static str,
553 path: &Path,
554 recursive: bool,
555 explicit_session_id: Option<&str>,
556) -> Result<Option<bool>, HostlibError> {
557 let Some(session_id) = active_session_id(explicit_session_id) else {
558 return Ok(None);
559 };
560 let mut guard = sessions()
561 .lock()
562 .expect("hostlib fs session mutex poisoned");
563 let mut state = state_for_locked(&mut guard, &session_id, None)?;
564 if state.mode != FsMode::Staged {
565 guard.insert(session_id, state);
566 return Ok(None);
567 }
568
569 let key = normalize_logical(path);
570 let staged_targets = staged_paths_under(&state, &key);
571 let disk_exists = key.exists();
572 if !disk_exists && staged_targets.is_empty() {
573 guard.insert(session_id, state);
574 return Ok(Some(false));
575 }
576
577 if !disk_exists {
578 for staged in staged_targets {
579 state.entries.remove(&staged);
580 }
581 } else {
582 validate_delete_shape(builtin, &key, recursive)?;
583 for staged in staged_targets {
584 state.entries.remove(&staged);
585 }
586 state.entries.insert(
587 key.clone(),
588 StagedEntry::Delete {
589 recursive,
590 created_at_ms: now_ms(),
591 },
592 );
593 }
594 persist_state(&state, "delete", Some(&key)).map_err(|err| HostlibError::Backend {
595 builtin,
596 message: err,
597 })?;
598 emit_staged_update(&state);
599 guard.insert(session_id, state);
600 Ok(Some(true))
601}
602
603#[derive(Clone, Debug)]
607pub struct SafeTextPatchOutcome {
608 pub result: SafeTextPatchResult,
610 pub current_hash: String,
612 pub after_hash: String,
614 pub created: bool,
616 pub bytes_written: usize,
618}
619
620#[derive(Clone, Copy, Debug, Eq, PartialEq)]
622pub enum SafeTextPatchResult {
623 Applied,
626 StaleBase,
629 NoOp,
632}
633
634impl SafeTextPatchResult {
635 fn as_str(self) -> &'static str {
636 match self {
637 Self::Applied => "applied",
638 Self::StaleBase => "stale_base",
639 Self::NoOp => "no_op",
640 }
641 }
642}
643
644fn hash_label(bytes: &[u8]) -> String {
648 format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
649}
650
651pub fn safe_text_patch(
673 path: &Path,
674 content: &str,
675 expected_hash: Option<&str>,
676 session_id: Option<&str>,
677 create_parents: bool,
678 overwrite: bool,
679) -> Result<SafeTextPatchOutcome, HostlibError> {
680 let new_bytes = content.as_bytes();
681 let after_hash = hash_label(new_bytes);
682
683 if let Some(outcome) = safe_text_patch_staged(
684 path,
685 new_bytes,
686 expected_hash,
687 session_id,
688 create_parents,
689 overwrite,
690 &after_hash,
691 )? {
692 return Ok(outcome);
693 }
694
695 safe_text_patch_disk(
696 path,
697 new_bytes,
698 expected_hash,
699 create_parents,
700 overwrite,
701 after_hash,
702 )
703}
704
705#[allow(clippy::too_many_arguments)]
711fn safe_text_patch_staged(
712 path: &Path,
713 new_bytes: &[u8],
714 expected_hash: Option<&str>,
715 session_id: Option<&str>,
716 create_parents: bool,
717 overwrite: bool,
718 after_hash: &str,
719) -> Result<Option<SafeTextPatchOutcome>, HostlibError> {
720 let Some(session) = active_session_id(session_id) else {
721 return Ok(None);
722 };
723 let mut guard = sessions()
724 .lock()
725 .expect("hostlib fs session mutex poisoned");
726 let mut state = state_for_locked(&mut guard, &session, None)?;
727 if state.mode != FsMode::Staged {
728 guard.insert(session, state);
729 return Ok(None);
730 }
731
732 let key = normalize_logical(path);
733 let (existing_bytes, existed) = match overlay_read(&state, path) {
734 Some(Ok(bytes)) => (bytes, true),
735 Some(Err(err)) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
736 Some(Err(err)) => {
737 guard.insert(session, state);
738 return Err(HostlibError::Backend {
739 builtin: SAFE_TEXT_PATCH_BUILTIN,
740 message: format!("read `{}`: {err}", path.display()),
741 });
742 }
743 None => match stdfs::read(path) {
744 Ok(bytes) => (bytes, true),
745 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
746 Err(err) => {
747 guard.insert(session, state);
748 return Err(HostlibError::Backend {
749 builtin: SAFE_TEXT_PATCH_BUILTIN,
750 message: format!("read `{}`: {err}", path.display()),
751 });
752 }
753 },
754 };
755 let current_hash = hash_label(&existing_bytes);
756
757 if let Some(expected) = expected_hash {
758 if expected != current_hash {
759 guard.insert(session, state);
760 return Ok(Some(SafeTextPatchOutcome {
761 result: SafeTextPatchResult::StaleBase,
762 current_hash,
763 after_hash: after_hash.to_string(),
764 created: false,
765 bytes_written: 0,
766 }));
767 }
768 }
769
770 if existed && existing_bytes == new_bytes {
771 guard.insert(session, state);
772 return Ok(Some(SafeTextPatchOutcome {
773 result: SafeTextPatchResult::NoOp,
774 current_hash,
775 after_hash: after_hash.to_string(),
776 created: false,
777 bytes_written: 0,
778 }));
779 }
780
781 let overlay_existed = overlay_exists(&state, &key);
782 if overlay_existed && !overwrite {
783 guard.insert(session, state);
784 return Err(HostlibError::Backend {
785 builtin: SAFE_TEXT_PATCH_BUILTIN,
786 message: format!("`{}` exists and overwrite=false", key.display()),
787 });
788 }
789 if !create_parents && !parent_exists(&state, &key) {
790 guard.insert(session, state);
791 return Err(HostlibError::Backend {
792 builtin: SAFE_TEXT_PATCH_BUILTIN,
793 message: format!("parent directory for `{}` does not exist", key.display()),
794 });
795 }
796
797 let body_hash = write_body(&state, new_bytes).map_err(|err| HostlibError::Backend {
798 builtin: SAFE_TEXT_PATCH_BUILTIN,
799 message: err,
800 })?;
801 state.entries.insert(
802 key.clone(),
803 StagedEntry::Write {
804 body_hash,
805 len: new_bytes.len() as u64,
806 created_at_ms: now_ms(),
807 },
808 );
809 persist_state(&state, "safe_text_patch", Some(&key)).map_err(|err| HostlibError::Backend {
810 builtin: SAFE_TEXT_PATCH_BUILTIN,
811 message: err,
812 })?;
813 emit_staged_update(&state);
814 guard.insert(session, state);
815
816 Ok(Some(SafeTextPatchOutcome {
817 result: SafeTextPatchResult::Applied,
818 current_hash,
819 after_hash: after_hash.to_string(),
820 created: !existed,
821 bytes_written: new_bytes.len(),
822 }))
823}
824
825fn safe_text_patch_disk(
830 path: &Path,
831 new_bytes: &[u8],
832 expected_hash: Option<&str>,
833 create_parents: bool,
834 overwrite: bool,
835 after_hash: String,
836) -> Result<SafeTextPatchOutcome, HostlibError> {
837 let (existing_bytes, existed) = match stdfs::read(path) {
838 Ok(bytes) => (bytes, true),
839 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
840 Err(err) => {
841 return Err(HostlibError::Backend {
842 builtin: SAFE_TEXT_PATCH_BUILTIN,
843 message: format!("read `{}`: {err}", path.display()),
844 });
845 }
846 };
847 let current_hash = hash_label(&existing_bytes);
848
849 if let Some(expected) = expected_hash {
850 if expected != current_hash {
851 return Ok(SafeTextPatchOutcome {
852 result: SafeTextPatchResult::StaleBase,
853 current_hash,
854 after_hash,
855 created: false,
856 bytes_written: 0,
857 });
858 }
859 }
860
861 if existed && existing_bytes == new_bytes {
862 return Ok(SafeTextPatchOutcome {
863 result: SafeTextPatchResult::NoOp,
864 current_hash,
865 after_hash,
866 created: false,
867 bytes_written: 0,
868 });
869 }
870 if existed && !overwrite {
871 return Err(HostlibError::Backend {
872 builtin: SAFE_TEXT_PATCH_BUILTIN,
873 message: format!("`{}` exists and overwrite=false", path.display()),
874 });
875 }
876 if !create_parents {
877 if let Some(parent) = path.parent() {
878 if !parent.as_os_str().is_empty() && !parent.is_dir() {
879 return Err(HostlibError::Backend {
880 builtin: SAFE_TEXT_PATCH_BUILTIN,
881 message: format!(
882 "parent directory for `{}` does not exist (pass create_parents=true to mkdir)",
883 path.display()
884 ),
885 });
886 }
887 }
888 }
889
890 crate::fs_snapshot::auto_capture_for_write(SAFE_TEXT_PATCH_BUILTIN, path);
891 atomic_write(path, new_bytes).map_err(|err| HostlibError::Backend {
892 builtin: SAFE_TEXT_PATCH_BUILTIN,
893 message: format!("write `{}`: {err}", path.display()),
894 })?;
895
896 Ok(SafeTextPatchOutcome {
897 result: SafeTextPatchResult::Applied,
898 current_hash,
899 after_hash,
900 created: !existed,
901 bytes_written: new_bytes.len(),
902 })
903}
904
905fn read_existing(
910 builtin: &'static str,
911 path: &Path,
912 session_id: Option<&str>,
913) -> Result<(Vec<u8>, bool), HostlibError> {
914 if let Some(result) = read(path, session_id) {
915 return match result {
916 Ok(bytes) => Ok((bytes, true)),
917 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
918 Err(err) => Err(HostlibError::Backend {
919 builtin,
920 message: format!("read `{}`: {err}", path.display()),
921 }),
922 };
923 }
924 match stdfs::read(path) {
925 Ok(bytes) => Ok((bytes, true)),
926 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
927 Err(err) => Err(HostlibError::Backend {
928 builtin,
929 message: format!("read `{}`: {err}", path.display()),
930 }),
931 }
932}
933
934fn read_text_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
935 let raw = dict_arg(READ_TEXT_BUILTIN, args)?;
936 let dict = raw.as_ref();
937 let path_str = require_string(READ_TEXT_BUILTIN, dict, "path")?;
938 let session_id = optional_string(READ_TEXT_BUILTIN, dict, "session_id")?;
939 let path = Path::new(&path_str);
940 enforce_path_scope(READ_TEXT_BUILTIN, path, FsAccess::Read)?;
941
942 let (bytes, existed) = read_existing(READ_TEXT_BUILTIN, path, session_id.as_deref())?;
943 let hash = hash_label(&bytes);
944 let content = match std::str::from_utf8(&bytes) {
945 Ok(s) => s.to_string(),
946 Err(err) => {
947 return Err(HostlibError::Backend {
948 builtin: READ_TEXT_BUILTIN,
949 message: format!("`{path_str}` is not valid UTF-8: {err}"),
950 });
951 }
952 };
953 let bytes_len = bytes.len() as i64;
954 Ok(build_dict([
955 ("path", str_value(&path_str)),
956 ("content", str_value(&content)),
957 ("sha256", str_value(&hash)),
958 ("size", VmValue::Int(bytes_len)),
959 ("exists", VmValue::Bool(existed)),
960 ]))
961}
962
963fn safe_text_patch_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
964 let raw = dict_arg(SAFE_TEXT_PATCH_BUILTIN, args)?;
965 let dict = raw.as_ref();
966
967 let path_str = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "path")?;
968 let content = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "content")?;
969 let expected_hash = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "expected_hash")?;
970 let session_id = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "session_id")?;
971 let create_parents = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "create_parents", true)?;
972 let overwrite = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "overwrite", true)?;
973
974 enforce_path_scope(
975 SAFE_TEXT_PATCH_BUILTIN,
976 Path::new(&path_str),
977 FsAccess::Write,
978 )?;
979 let outcome = safe_text_patch(
980 Path::new(&path_str),
981 &content,
982 expected_hash.as_deref(),
983 session_id.as_deref(),
984 create_parents,
985 overwrite,
986 )?;
987
988 let entries: Vec<(&'static str, VmValue)> = vec![
989 ("path", str_value(&path_str)),
990 ("result", str_value(outcome.result.as_str())),
991 (
992 "applied",
993 VmValue::Bool(outcome.result == SafeTextPatchResult::Applied),
994 ),
995 (
996 "stale_base",
997 VmValue::Bool(outcome.result == SafeTextPatchResult::StaleBase),
998 ),
999 ("current_hash", str_value(&outcome.current_hash)),
1000 ("before_sha256", str_value(&outcome.current_hash)),
1001 ("after_sha256", str_value(&outcome.after_hash)),
1002 ("created", VmValue::Bool(outcome.created)),
1003 ("bytes_written", VmValue::Int(outcome.bytes_written as i64)),
1004 (
1005 "expected_hash",
1006 match expected_hash.as_deref() {
1007 Some(hash) => str_value(hash),
1008 None => VmValue::Nil,
1009 },
1010 ),
1011 ];
1012 Ok(build_dict(entries))
1013}
1014
1015fn emit_safe_text_patch_result_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1016 let raw = dict_arg(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, args)?;
1017 let dict = raw.as_ref();
1018
1019 let path = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "path")?;
1020 let result = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "result")?;
1021 let hunks_count = optional_int(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "hunks_count", 0)?;
1022 let bytes_written = optional_int(
1023 EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1024 dict,
1025 "bytes_written",
1026 0,
1027 )?;
1028 let failed_hunk_index = match dict.get("failed_hunk_index") {
1029 None | Some(VmValue::Nil) => None,
1030 Some(VmValue::Int(n)) if *n >= 0 => Some(*n as usize),
1031 Some(other) => {
1032 return Err(HostlibError::InvalidParameter {
1033 builtin: EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1034 param: "failed_hunk_index",
1035 message: format!("expected non-negative integer, got {}", other.type_name()),
1036 });
1037 }
1038 };
1039 let session_id = optional_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "session_id")?
1040 .or_else(harn_vm::agent_sessions::current_session_id);
1041
1042 if let Some(session_id) = session_id.filter(|s| !s.trim().is_empty()) {
1043 harn_vm::agent_events::emit_event(&AgentEvent::SafeTextPatchResult {
1044 session_id,
1045 path,
1046 result,
1047 hunks_count: hunks_count.max(0) as usize,
1048 bytes_written: bytes_written.max(0) as u64,
1049 failed_hunk_index,
1050 });
1051 Ok(VmValue::Bool(true))
1052 } else {
1053 Ok(VmValue::Bool(false))
1057 }
1058}
1059
1060fn set_mode_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1061 let raw = dict_arg(SET_MODE_BUILTIN, args)?;
1062 let dict = raw.as_ref();
1063 let session_id = require_string(SET_MODE_BUILTIN, dict, "session_id")?;
1064 let mode = FsMode::parse(
1065 SET_MODE_BUILTIN,
1066 &require_string(SET_MODE_BUILTIN, dict, "mode")?,
1067 )?;
1068 let root = optional_string(SET_MODE_BUILTIN, dict, "root")?.map(PathBuf::from);
1069 let result = set_mode(&session_id, mode, root.as_deref())?;
1070 Ok(build_dict([(
1071 "previous_mode",
1072 str_value(result.previous_mode.as_str()),
1073 )]))
1074}
1075
1076fn staged_status_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1077 let raw = dict_arg(STATUS_BUILTIN, args)?;
1078 let session_id = require_string(STATUS_BUILTIN, raw.as_ref(), "session_id")?;
1079 Ok(status_to_value(staged_status(&session_id)?))
1080}
1081
1082fn commit_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1083 let raw = dict_arg(COMMIT_BUILTIN, args)?;
1084 let dict = raw.as_ref();
1085 let session_id = require_string(COMMIT_BUILTIN, dict, "session_id")?;
1086 let paths = optional_string_list(COMMIT_BUILTIN, dict, "paths")?;
1087 Ok(commit_result_to_value(commit_staged(&session_id, &paths)?))
1088}
1089
1090fn discard_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1091 let raw = dict_arg(DISCARD_BUILTIN, args)?;
1092 let dict = raw.as_ref();
1093 let session_id = require_string(DISCARD_BUILTIN, dict, "session_id")?;
1094 let paths = optional_string_list(DISCARD_BUILTIN, dict, "paths")?;
1095 Ok(discard_result_to_value(discard_staged(
1096 &session_id,
1097 &paths,
1098 )?))
1099}
1100
1101fn state_for_locked(
1102 guard: &mut BTreeMap<String, SessionState>,
1103 session_id: &str,
1104 root: Option<PathBuf>,
1105) -> Result<SessionState, HostlibError> {
1106 if let Some(existing) = guard.get(session_id) {
1107 let mut state = existing.clone();
1108 if let Some(root) = root {
1109 if state.entries.is_empty() {
1110 state.root = root;
1111 }
1112 }
1113 return Ok(state);
1114 }
1115 let state = load_state(session_id, root).map_err(|err| HostlibError::Backend {
1116 builtin: SET_MODE_BUILTIN,
1117 message: err,
1118 })?;
1119 Ok(state)
1120}
1121
1122fn load_state(session_id: &str, root: Option<PathBuf>) -> Result<SessionState, String> {
1123 let root = root.unwrap_or_else(default_root);
1124 let manifest_path = manifest_path(&root, session_id);
1125 if manifest_path.exists() {
1126 let text = stdfs::read_to_string(&manifest_path)
1127 .map_err(|err| format!("read {}: {err}", manifest_path.display()))?;
1128 let manifest: Manifest = serde_json::from_str(&text)
1129 .map_err(|err| format!("parse {}: {err}", manifest_path.display()))?;
1130 if manifest.version != MANIFEST_VERSION {
1131 return Err(format!(
1132 "unsupported staged fs manifest version {} in {}",
1133 manifest.version,
1134 manifest_path.display()
1135 ));
1136 }
1137 if manifest.session_id != session_id {
1138 return Err(format!(
1139 "staged fs manifest session id mismatch in {}",
1140 manifest_path.display()
1141 ));
1142 }
1143 return Ok(SessionState {
1144 session_id: manifest.session_id,
1145 mode: manifest.mode,
1146 root: normalize_logical(Path::new(&manifest.root)),
1147 entries: manifest
1148 .entries
1149 .into_iter()
1150 .map(|(path, entry)| (normalize_logical(Path::new(&path)), entry))
1151 .collect(),
1152 });
1153 }
1154 Ok(SessionState {
1155 session_id: session_id.to_string(),
1156 mode: FsMode::Immediate,
1157 root,
1158 entries: BTreeMap::new(),
1159 })
1160}
1161
1162fn persist_state(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1163 let dir = session_dir(&state.root, &state.session_id);
1164 stdfs::create_dir_all(dir.join("bodies"))
1165 .map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1166 let manifest = Manifest {
1167 version: MANIFEST_VERSION,
1168 session_id: state.session_id.clone(),
1169 mode: state.mode,
1170 root: state.root.to_string_lossy().into_owned(),
1171 entries: state
1172 .entries
1173 .iter()
1174 .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
1175 .collect(),
1176 };
1177 let bytes = serde_json::to_vec_pretty(&manifest)
1178 .map_err(|err| format!("serialize staged manifest: {err}"))?;
1179 atomic_write(&manifest_path(&state.root, &state.session_id), &bytes)?;
1180 append_journal(state, op, path)?;
1181 prune_unreferenced_bodies(state);
1182 Ok(())
1183}
1184
1185fn append_journal(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1186 let dir = session_dir(&state.root, &state.session_id);
1187 stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1188 let line = serde_json::to_string(&serde_json::json!({
1189 "ts_ms": now_ms(),
1190 "op": op,
1191 "path": path.map(|path| path.to_string_lossy().into_owned()),
1192 "pending_count": state.entries.len(),
1193 }))
1194 .map_err(|err| format!("serialize staged journal: {err}"))?;
1195 let mut file = stdfs::OpenOptions::new()
1196 .create(true)
1197 .append(true)
1198 .open(dir.join("journal.jsonl"))
1199 .map_err(|err| format!("open staged journal: {err}"))?;
1200 writeln!(file, "{line}").map_err(|err| format!("write staged journal: {err}"))
1201}
1202
1203fn write_body(state: &SessionState, bytes: &[u8]) -> Result<String, String> {
1204 let hash = hex::encode(Sha256::digest(bytes));
1205 let path = session_dir(&state.root, &state.session_id)
1206 .join("bodies")
1207 .join(&hash);
1208 if !path.exists() {
1209 atomic_write(&path, bytes)?;
1210 }
1211 Ok(hash)
1212}
1213
1214fn read_body(state: &SessionState, hash: &str) -> std::io::Result<Vec<u8>> {
1215 stdfs::read(
1216 session_dir(&state.root, &state.session_id)
1217 .join("bodies")
1218 .join(hash),
1219 )
1220}
1221
1222fn prune_unreferenced_bodies(state: &SessionState) {
1223 let live: BTreeSet<String> = state
1224 .entries
1225 .values()
1226 .filter_map(|entry| match entry {
1227 StagedEntry::Write { body_hash, .. } => Some(body_hash.clone()),
1228 StagedEntry::Delete { .. } => None,
1229 })
1230 .collect();
1231 let body_dir = session_dir(&state.root, &state.session_id).join("bodies");
1232 let Ok(entries) = stdfs::read_dir(&body_dir) else {
1233 return;
1234 };
1235 for entry in entries.flatten() {
1236 let name = entry.file_name().to_string_lossy().into_owned();
1237 if !live.contains(&name) {
1238 let _ = stdfs::remove_file(entry.path());
1239 }
1240 }
1241}
1242
1243fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
1244 if let Some(parent) = path.parent() {
1245 stdfs::create_dir_all(parent)
1246 .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
1247 }
1248 let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
1249 stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
1250 match stdfs::rename(&tmp, path) {
1251 Ok(()) => Ok(()),
1252 Err(err) => {
1253 let _ = stdfs::remove_file(path);
1254 stdfs::rename(&tmp, path).map_err(|retry| {
1255 format!(
1256 "rename {} to {}: {err}; retry: {retry}",
1257 tmp.display(),
1258 path.display()
1259 )
1260 })
1261 }
1262 }
1263}
1264
1265fn commit_entry(state: &SessionState, path: &Path, entry: &StagedEntry) -> Result<(), String> {
1266 match entry {
1267 StagedEntry::Write { body_hash, .. } => {
1268 let bytes = read_body(state, body_hash)
1269 .map_err(|err| format!("read staged body for {}: {err}", path.display()))?;
1270 atomic_write(path, &bytes)
1271 }
1272 StagedEntry::Delete { recursive, .. } => match stdfs::symlink_metadata(path) {
1273 Ok(metadata) if metadata.is_dir() => {
1274 if *recursive {
1275 stdfs::remove_dir_all(path)
1276 .map_err(|err| format!("remove_dir_all {}: {err}", path.display()))
1277 } else {
1278 stdfs::remove_dir(path)
1279 .map_err(|err| format!("remove_dir {}: {err}", path.display()))
1280 }
1281 }
1282 Ok(_) => stdfs::remove_file(path)
1283 .map_err(|err| format!("remove_file {}: {err}", path.display())),
1284 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1285 Err(err) => Err(format!("stat {}: {err}", path.display())),
1286 },
1287 }
1288}
1289
1290fn overlay_read(state: &SessionState, path: &Path) -> Option<std::io::Result<Vec<u8>>> {
1291 let key = normalize_logical(path);
1292 if let Some(entry) = state.entries.get(&key) {
1293 return Some(match entry {
1294 StagedEntry::Write { body_hash, .. } => read_body(state, body_hash),
1295 StagedEntry::Delete { .. } => Err(not_found(&key)),
1296 });
1297 }
1298 if deleted_ancestor(state, &key) {
1299 return Some(Err(not_found(&key)));
1300 }
1301 None
1302}
1303
1304fn overlay_read_dir(state: &SessionState, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
1305 let dir_key = normalize_logical(path);
1306 if matches!(state.entries.get(&dir_key), Some(StagedEntry::Write { .. }))
1307 || deleted_ancestor(state, &dir_key)
1308 || matches!(
1309 state.entries.get(&dir_key),
1310 Some(StagedEntry::Delete { .. })
1311 )
1312 {
1313 return Err(not_found(&dir_key));
1314 }
1315 if !path.exists() && !has_staged_descendant(state, &dir_key) {
1316 return Err(not_found(&dir_key));
1317 }
1318
1319 let mut entries: BTreeMap<String, OverlayDirEntry> = BTreeMap::new();
1320 if path.exists() {
1321 for entry in stdfs::read_dir(path)? {
1322 let entry = entry?;
1323 let name = entry.file_name().to_string_lossy().into_owned();
1324 let file_type = entry.file_type().ok();
1325 let metadata = entry.metadata().ok();
1326 entries.insert(
1327 name.clone(),
1328 OverlayDirEntry {
1329 name,
1330 is_dir: file_type.is_some_and(|ty| ty.is_dir()),
1331 is_symlink: file_type.is_some_and(|ty| ty.is_symlink()),
1332 size: metadata.map(|m| m.len()).unwrap_or(0),
1333 },
1334 );
1335 }
1336 }
1337
1338 for (path, entry) in &state.entries {
1339 let Some(name) = overlay_child_name(path, &dir_key) else {
1340 continue;
1341 };
1342 match entry {
1343 StagedEntry::Write { len, .. } => {
1344 let is_dir = path.parent() != Some(dir_key.as_path());
1345 entries.insert(
1346 name.clone(),
1347 OverlayDirEntry {
1348 name,
1349 is_dir,
1350 is_symlink: false,
1351 size: if is_dir { 0 } else { *len },
1352 },
1353 );
1354 }
1355 StagedEntry::Delete { .. } => {
1356 if path.parent() == Some(dir_key.as_path()) {
1357 entries.remove(&name);
1358 }
1359 }
1360 }
1361 }
1362
1363 Ok(entries.into_values().collect())
1364}
1365
1366fn overlay_child_name(path: &Path, dir: &Path) -> Option<String> {
1367 let suffix = path.strip_prefix(dir).ok()?;
1368 let mut components = suffix.components();
1369 let first = components.next()?;
1370 match first {
1371 Component::Normal(name) => Some(name.to_string_lossy().into_owned()),
1372 _ => None,
1373 }
1374}
1375
1376fn overlay_exists(state: &SessionState, path: &Path) -> bool {
1377 if let Some(entry) = state.entries.get(path) {
1378 return matches!(entry, StagedEntry::Write { .. });
1379 }
1380 if deleted_ancestor(state, path) {
1381 return false;
1382 }
1383 if has_staged_descendant(state, path) {
1384 return true;
1385 }
1386 path.exists()
1387}
1388
1389fn parent_exists(state: &SessionState, path: &Path) -> bool {
1390 let Some(parent) = path.parent() else {
1391 return true;
1392 };
1393 if parent.as_os_str().is_empty() {
1394 return true;
1395 }
1396 if let Some(entry) = state.entries.get(parent) {
1397 return !matches!(entry, StagedEntry::Delete { .. });
1398 }
1399 if deleted_ancestor(state, parent) {
1400 return false;
1401 }
1402 if has_staged_descendant(state, parent) {
1403 return true;
1404 }
1405 parent.is_dir()
1406}
1407
1408fn deleted_ancestor(state: &SessionState, path: &Path) -> bool {
1409 state.entries.iter().any(|(candidate, entry)| {
1410 matches!(entry, StagedEntry::Delete { .. })
1411 && path != candidate.as_path()
1412 && path.starts_with(candidate)
1413 })
1414}
1415
1416fn has_staged_descendant(state: &SessionState, path: &Path) -> bool {
1417 state.entries.iter().any(|(candidate, entry)| {
1418 matches!(entry, StagedEntry::Write { .. })
1419 && candidate != path
1420 && candidate.starts_with(path)
1421 })
1422}
1423
1424fn staged_paths_under(state: &SessionState, path: &Path) -> Vec<PathBuf> {
1425 state
1426 .entries
1427 .keys()
1428 .filter(|candidate| *candidate == path || candidate.starts_with(path))
1429 .cloned()
1430 .collect()
1431}
1432
1433fn validate_delete_shape(
1434 builtin: &'static str,
1435 path: &Path,
1436 recursive: bool,
1437) -> Result<(), HostlibError> {
1438 let Ok(metadata) = stdfs::symlink_metadata(path) else {
1439 return Ok(());
1440 };
1441 if metadata.is_dir() && !recursive {
1442 let mut entries = stdfs::read_dir(path).map_err(|err| HostlibError::Backend {
1443 builtin,
1444 message: format!("read_dir `{}`: {err}", path.display()),
1445 })?;
1446 if entries.next().is_some() {
1447 return Err(HostlibError::Backend {
1448 builtin,
1449 message: format!(
1450 "remove_dir `{}` (pass recursive=true to delete non-empty dirs): directory not empty",
1451 path.display()
1452 ),
1453 });
1454 }
1455 }
1456 Ok(())
1457}
1458
1459fn status_from_state(state: &SessionState) -> StagedStatus {
1460 let now = now_ms();
1461 let mut pending_writes = Vec::new();
1462 let mut total_bytes_pending = 0u64;
1463 let mut oldest = None;
1464 for (path, entry) in &state.entries {
1465 total_bytes_pending = total_bytes_pending.saturating_add(entry.body_len());
1466 oldest = Some(oldest.map_or(entry.created_at_ms(), |old: i64| {
1467 old.min(entry.created_at_ms())
1468 }));
1469 let (kind, bytes_added, bytes_removed) = match entry {
1470 StagedEntry::Write { len, .. } => ("write", *len, disk_size(path).unwrap_or(0)),
1471 StagedEntry::Delete { .. } => ("delete", 0, disk_size(path).unwrap_or(0)),
1472 };
1473 pending_writes.push(PendingWrite {
1474 path: path.to_string_lossy().into_owned(),
1475 kind,
1476 bytes_added,
1477 bytes_removed,
1478 });
1479 }
1480 StagedStatus {
1481 pending_writes,
1482 total_bytes_pending,
1483 oldest_pending_age_ms: oldest.map(|old| now.saturating_sub(old)).unwrap_or(0),
1484 }
1485}
1486
1487fn disk_size(path: &Path) -> Option<u64> {
1488 let metadata = stdfs::symlink_metadata(path).ok()?;
1489 if metadata.is_file() {
1490 return Some(metadata.len());
1491 }
1492 if metadata.is_dir() {
1493 let mut total = 0u64;
1494 for entry in walkdir::WalkDir::new(path)
1495 .into_iter()
1496 .filter_map(Result::ok)
1497 {
1498 if let Ok(metadata) = entry.metadata() {
1499 if metadata.is_file() {
1500 total = total.saturating_add(metadata.len());
1501 }
1502 }
1503 }
1504 return Some(total);
1505 }
1506 Some(metadata.len())
1507}
1508
1509fn selected_paths(state: &SessionState, paths: &[String]) -> Vec<PathBuf> {
1510 if paths.is_empty() {
1511 return state.entries.keys().cloned().collect();
1512 }
1513 let selected: BTreeSet<PathBuf> = paths
1514 .iter()
1515 .map(|path| normalize_logical(Path::new(path)))
1516 .collect();
1517 state
1518 .entries
1519 .keys()
1520 .filter(|path| selected.contains(*path))
1521 .cloned()
1522 .collect()
1523}
1524
1525fn active_session_id(explicit: Option<&str>) -> Option<String> {
1526 explicit
1527 .map(str::to_string)
1528 .or_else(harn_vm::agent_sessions::current_session_id)
1529 .filter(|id| !id.trim().is_empty())
1530}
1531
1532fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
1533 if session_id.trim().is_empty() {
1534 return Err(HostlibError::InvalidParameter {
1535 builtin,
1536 param: "session_id",
1537 message: "must not be empty".to_string(),
1538 });
1539 }
1540 Ok(())
1541}
1542
1543fn default_root() -> PathBuf {
1544 std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1545}
1546
1547fn session_dir(root: &Path, session_id: &str) -> PathBuf {
1548 let mut dir = root.to_path_buf();
1549 for component in STATE_REL {
1550 dir.push(component);
1551 }
1552 dir.push(sanitize_component(session_id));
1553 dir
1554}
1555
1556fn manifest_path(root: &Path, session_id: &str) -> PathBuf {
1557 session_dir(root, session_id).join("manifest.json")
1558}
1559
1560fn sanitize_component(input: &str) -> String {
1561 let sanitized: String = input
1562 .chars()
1563 .map(|ch| match ch {
1564 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
1565 _ => '_',
1566 })
1567 .collect();
1568 let is_dotted = sanitized.is_empty() || sanitized.bytes().all(|b| b == b'.');
1574 if sanitized == input && !is_dotted {
1575 sanitized
1576 } else {
1577 let hash = hex::encode(Sha256::digest(input.as_bytes()));
1578 format!("{sanitized}-{}", &hash[..12])
1579 }
1580}
1581
1582fn normalize_logical(path: &Path) -> PathBuf {
1583 let absolute = if path.is_absolute() {
1584 path.to_path_buf()
1585 } else {
1586 default_root().join(path)
1587 };
1588 let mut out = PathBuf::new();
1589 for component in absolute.components() {
1590 match component {
1591 Component::ParentDir => {
1592 out.pop();
1593 }
1594 Component::CurDir => {}
1595 other => out.push(other),
1596 }
1597 }
1598 out
1599}
1600
1601fn not_found(path: &Path) -> std::io::Error {
1602 std::io::Error::new(
1603 std::io::ErrorKind::NotFound,
1604 format!("staged fs: {} is deleted or absent", path.display()),
1605 )
1606}
1607
1608fn now_ms() -> i64 {
1609 std::time::SystemTime::now()
1610 .duration_since(std::time::UNIX_EPOCH)
1611 .map(|duration| duration.as_millis() as i64)
1612 .unwrap_or(0)
1613}
1614
1615fn emit_staged_update(state: &SessionState) {
1616 let status = status_from_state(state);
1617 harn_vm::agent_events::emit_event(&AgentEvent::StagedWritesPending {
1618 session_id: state.session_id.clone(),
1619 pending_count: status.pending_writes.len(),
1620 total_bytes: status.total_bytes_pending,
1621 });
1622}
1623
1624fn pending_write_to_value(write: PendingWrite) -> VmValue {
1625 build_dict([
1626 ("path", str_value(&write.path)),
1627 ("kind", str_value(write.kind)),
1628 ("bytes_added", VmValue::Int(write.bytes_added as i64)),
1629 ("bytes_removed", VmValue::Int(write.bytes_removed as i64)),
1630 ])
1631}
1632
1633fn status_to_value(status: StagedStatus) -> VmValue {
1634 build_dict([
1635 (
1636 "pending_writes",
1637 VmValue::List(Arc::new(
1638 status
1639 .pending_writes
1640 .into_iter()
1641 .map(pending_write_to_value)
1642 .collect(),
1643 )),
1644 ),
1645 (
1646 "total_bytes_pending",
1647 VmValue::Int(status.total_bytes_pending as i64),
1648 ),
1649 (
1650 "oldest_pending_age_ms",
1651 VmValue::Int(status.oldest_pending_age_ms),
1652 ),
1653 ])
1654}
1655
1656fn commit_result_to_value(result: CommitResult) -> VmValue {
1657 build_dict([
1658 (
1659 "committed_paths",
1660 VmValue::List(Arc::new(
1661 result
1662 .committed_paths
1663 .into_iter()
1664 .map(|path| VmValue::String(Arc::from(path)))
1665 .collect(),
1666 )),
1667 ),
1668 (
1669 "failed_paths_with_reasons",
1670 VmValue::List(Arc::new(
1671 result
1672 .failed_paths_with_reasons
1673 .into_iter()
1674 .map(|(path, reason)| {
1675 build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
1676 })
1677 .collect(),
1678 )),
1679 ),
1680 ])
1681}
1682
1683fn discard_result_to_value(result: DiscardResult) -> VmValue {
1684 build_dict([(
1685 "discarded_paths",
1686 VmValue::List(Arc::new(
1687 result
1688 .discarded_paths
1689 .into_iter()
1690 .map(|path| VmValue::String(Arc::from(path)))
1691 .collect(),
1692 )),
1693 )])
1694}
1695
1696#[cfg(test)]
1697mod sanitize_tests {
1698 use super::{sanitize_component, session_dir, STATE_REL};
1699 use std::path::{Component, Path};
1700
1701 #[test]
1702 fn dotted_session_ids_are_never_traversal_tokens() {
1703 for evil in ["..", ".", "...", ""] {
1706 let safe = sanitize_component(evil);
1707 assert_ne!(safe, evil, "`{evil}` passed through unsanitized");
1708 assert!(
1709 !safe.bytes().all(|b| b == b'.'),
1710 "`{evil}` -> `{safe}` is still all dots"
1711 );
1712 let comps: Vec<_> = Path::new(&safe).components().collect();
1714 assert!(
1715 comps.iter().all(|c| matches!(c, Component::Normal(_))),
1716 "`{safe}` contains a traversal component"
1717 );
1718 }
1719 }
1720
1721 #[test]
1722 fn ordinary_session_ids_pass_through() {
1723 assert_eq!(sanitize_component("abc-123_v2.0"), "abc-123_v2.0");
1724 }
1725
1726 #[test]
1727 fn session_dir_stays_under_staged_root() {
1728 let dir = session_dir(Path::new("/workspace"), "..");
1729 assert!(
1731 !dir.components().any(|c| matches!(c, Component::ParentDir)),
1732 "session_dir({dir:?}) escapes via `..`"
1733 );
1734 let mut staged = std::path::PathBuf::from("/workspace");
1735 staged.extend(STATE_REL);
1736 assert!(dir.starts_with(&staged), "{dir:?} not under {staged:?}");
1737 }
1738}