1use std::collections::{BTreeMap, BTreeSet};
10use std::fs as stdfs;
11use std::io::Write;
12use std::path::{Component, Path, PathBuf};
13use std::sync::Arc;
14use std::sync::{Mutex, OnceLock};
15
16use harn_vm::agent_events::AgentEvent;
17use harn_vm::process_sandbox::{check_fs_path_scope, FsAccess};
18use harn_vm::VmValue;
19use serde::{Deserialize, Serialize};
20use sha2::{Digest, Sha256};
21
22use crate::error::HostlibError;
23use crate::registry::{BuiltinRegistry, HostlibCapability};
24use crate::tools::args::{
25 build_dict, dict_arg, optional_bool, optional_int, optional_string, optional_string_list,
26 require_string, str_value, to_agent_path,
27};
28use crate::tools::permissions::enforce_path_scope;
29
30const SET_MODE_BUILTIN: &str = "hostlib_fs_set_mode";
31const STATUS_BUILTIN: &str = "hostlib_fs_staged_status";
32const COMMIT_BUILTIN: &str = "hostlib_fs_commit_staged";
33const DISCARD_BUILTIN: &str = "hostlib_fs_discard_staged";
34const SAFE_TEXT_PATCH_BUILTIN: &str = "hostlib_fs_safe_text_patch";
35const READ_TEXT_BUILTIN: &str = "hostlib_fs_read_text";
36const EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN: &str = "hostlib_fs_emit_safe_text_patch_result";
37
38const MANIFEST_VERSION: u32 = 1;
39const STATE_REL: &[&str] = &[".harn", "state", "staged"];
40
41#[derive(Default)]
43pub struct FsCapability;
44
45impl HostlibCapability for FsCapability {
46 fn module_name(&self) -> &'static str {
47 "fs"
48 }
49
50 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
51 registry.register_fn("fs", SET_MODE_BUILTIN, "set_mode", set_mode_builtin);
52 registry.register_fn("fs", STATUS_BUILTIN, "staged_status", staged_status_builtin);
53 registry.register_fn("fs", COMMIT_BUILTIN, "commit_staged", commit_staged_builtin);
54 registry.register_fn(
55 "fs",
56 DISCARD_BUILTIN,
57 "discard_staged",
58 discard_staged_builtin,
59 );
60 registry.register_gated_fn(
63 "fs",
64 SAFE_TEXT_PATCH_BUILTIN,
65 "safe_text_patch",
66 safe_text_patch_builtin,
67 );
68 registry.register_gated_fn("fs", READ_TEXT_BUILTIN, "read_text", read_text_builtin);
69 registry.register_fn(
70 "fs",
71 EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
72 "emit_safe_text_patch_result",
73 emit_safe_text_patch_result_builtin,
74 );
75 }
76}
77
78#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
80#[serde(rename_all = "lowercase")]
81pub enum FsMode {
82 Immediate,
84 Staged,
86}
87
88impl FsMode {
89 fn parse(builtin: &'static str, raw: &str) -> Result<Self, HostlibError> {
90 match raw {
91 "immediate" => Ok(Self::Immediate),
92 "staged" => Ok(Self::Staged),
93 other => Err(HostlibError::InvalidParameter {
94 builtin,
95 param: "mode",
96 message: format!("expected \"immediate\" or \"staged\", got `{other}`"),
97 }),
98 }
99 }
100
101 pub fn as_str(self) -> &'static str {
103 match self {
104 Self::Immediate => "immediate",
105 Self::Staged => "staged",
106 }
107 }
108}
109
110#[derive(Clone, Debug, Serialize, Deserialize)]
111struct Manifest {
112 version: u32,
113 session_id: String,
114 mode: FsMode,
115 root: String,
116 entries: BTreeMap<String, StagedEntry>,
117}
118
119#[derive(Clone, Debug, Serialize, Deserialize)]
120#[serde(tag = "kind", rename_all = "snake_case")]
121enum StagedEntry {
122 Write {
123 body_hash: String,
124 len: u64,
125 created_at_ms: i64,
126 },
127 Delete {
128 recursive: bool,
129 created_at_ms: i64,
130 },
131}
132
133impl StagedEntry {
134 fn created_at_ms(&self) -> i64 {
135 match self {
136 Self::Write { created_at_ms, .. } | Self::Delete { created_at_ms, .. } => {
137 *created_at_ms
138 }
139 }
140 }
141
142 fn body_len(&self) -> u64 {
143 match self {
144 Self::Write { len, .. } => *len,
145 Self::Delete { .. } => 0,
146 }
147 }
148}
149
150#[derive(Clone, Debug)]
151struct SessionState {
152 session_id: String,
153 mode: FsMode,
154 root: PathBuf,
155 entries: BTreeMap<PathBuf, StagedEntry>,
156}
157
158#[derive(Clone, Debug)]
159pub(crate) struct WriteOutcome {
160 pub(crate) created: bool,
161 pub(crate) bytes_written: usize,
162}
163
164#[derive(Clone, Debug)]
165pub(crate) struct OverlayDirEntry {
166 pub(crate) name: String,
167 pub(crate) is_dir: bool,
168 pub(crate) is_symlink: bool,
169 pub(crate) size: u64,
170}
171
172#[derive(Clone, Debug)]
174pub struct StagedStatus {
175 pub pending_writes: Vec<PendingWrite>,
177 pub total_bytes_pending: u64,
179 pub oldest_pending_age_ms: i64,
181}
182
183#[derive(Clone, Debug)]
184pub struct PendingWrite {
186 pub path: String,
188 pub kind: &'static str,
190 pub bytes_added: u64,
192 pub bytes_removed: u64,
194}
195
196#[derive(Clone, Debug)]
198pub struct SetModeResult {
199 pub previous_mode: FsMode,
201}
202
203#[derive(Clone, Debug)]
205pub struct CommitResult {
206 pub committed_paths: Vec<String>,
208 pub failed_paths_with_reasons: Vec<(String, String)>,
210}
211
212#[derive(Clone, Debug)]
214pub struct DiscardResult {
215 pub discarded_paths: Vec<String>,
217}
218
219static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionState>>> = OnceLock::new();
220
221fn sessions() -> &'static Mutex<BTreeMap<String, SessionState>> {
222 SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
223}
224
225fn lock_sessions() -> std::sync::MutexGuard<'static, BTreeMap<String, SessionState>> {
229 sessions()
230 .lock()
231 .expect("hostlib fs session mutex poisoned")
232}
233
234pub fn configure_session_root(session_id: &str, root: &Path) {
239 if session_id.trim().is_empty() {
240 return;
241 }
242 let root = normalize_logical(root);
243 let mut guard = lock_sessions();
244 match guard.get_mut(session_id) {
245 Some(state) if state.entries.is_empty() => {
246 state.root = root;
247 }
248 Some(_) => {}
249 None => {
250 let state = load_state(session_id, Some(root.clone())).unwrap_or(SessionState {
251 session_id: session_id.to_string(),
252 mode: FsMode::Immediate,
253 root,
254 entries: BTreeMap::new(),
255 });
256 guard.insert(session_id.to_string(), state);
257 }
258 }
259}
260
261pub fn configured_session_root(session_id: &str) -> Option<PathBuf> {
263 if session_id.trim().is_empty() {
264 return None;
265 }
266 let guard = lock_sessions();
267 guard.get(session_id).map(|state| state.root.clone())
268}
269
270pub fn set_mode(
272 session_id: &str,
273 mode: FsMode,
274 root: Option<&Path>,
275) -> Result<SetModeResult, HostlibError> {
276 validate_session_id(SET_MODE_BUILTIN, session_id)?;
277 let mut guard = lock_sessions();
278 let mut state = state_for_locked(&mut guard, session_id, root.map(normalize_logical))?;
279 let previous_mode = state.mode;
280 state.mode = mode;
281 persist_state(&state, "set_mode", None).map_err(|err| HostlibError::Backend {
282 builtin: SET_MODE_BUILTIN,
283 message: err,
284 })?;
285 guard.insert(session_id.to_string(), state);
286 Ok(SetModeResult { previous_mode })
287}
288
289pub fn staged_status(session_id: &str) -> Result<StagedStatus, HostlibError> {
291 validate_session_id(STATUS_BUILTIN, session_id)?;
292 let mut guard = lock_sessions();
293 let state = state_for_locked(&mut guard, session_id, None)?;
294 let status = status_from_state(&state);
295 guard.insert(session_id.to_string(), state);
296 Ok(status)
297}
298
299pub(crate) fn staged_pending_paths(session_id: &str) -> Result<BTreeSet<PathBuf>, HostlibError> {
306 validate_session_id(STATUS_BUILTIN, session_id)?;
307 let mut guard = lock_sessions();
308 let state = state_for_locked(&mut guard, session_id, None)?;
309 let paths = state.entries.keys().cloned().collect();
310 guard.insert(session_id.to_string(), state);
311 Ok(paths)
312}
313
314pub fn commit_staged(session_id: &str, paths: &[String]) -> Result<CommitResult, HostlibError> {
316 validate_session_id(COMMIT_BUILTIN, session_id)?;
317 let mut guard = lock_sessions();
318 let mut state = state_for_locked(&mut guard, session_id, None)?;
319 let selected = selected_paths(&state, paths);
320 let mut committed_paths = Vec::new();
321 let mut failed_paths_with_reasons = Vec::new();
322
323 for path in selected {
324 let Some(entry) = state.entries.get(&path).cloned() else {
325 continue;
326 };
327 let path_label = to_agent_path(&path);
328 let access = match entry {
334 StagedEntry::Write { .. } => FsAccess::Write,
335 StagedEntry::Delete { .. } => FsAccess::Delete,
336 };
337 if let Err(violation) = check_fs_path_scope(&path, access) {
338 failed_paths_with_reasons.push((path_label, violation.message(COMMIT_BUILTIN)));
339 continue;
340 }
341 match commit_entry(&state, &path, &entry) {
342 Ok(()) => {
343 state.entries.remove(&path);
344 committed_paths.push(path_label);
345 }
346 Err(reason) => failed_paths_with_reasons.push((path_label, reason)),
347 }
348 }
349
350 persist_state(&state, "commit_staged", None).map_err(|err| HostlibError::Backend {
351 builtin: COMMIT_BUILTIN,
352 message: err,
353 })?;
354 emit_staged_update(&state);
355 guard.insert(session_id.to_string(), state);
356 Ok(CommitResult {
357 committed_paths,
358 failed_paths_with_reasons,
359 })
360}
361
362pub fn discard_staged(session_id: &str, paths: &[String]) -> Result<DiscardResult, HostlibError> {
364 validate_session_id(DISCARD_BUILTIN, session_id)?;
365 let mut guard = lock_sessions();
366 let mut state = state_for_locked(&mut guard, session_id, None)?;
367 let selected = selected_paths(&state, paths);
368 let mut discarded_paths = Vec::new();
369 for path in selected {
370 if state.entries.remove(&path).is_some() {
371 discarded_paths.push(to_agent_path(&path));
372 }
373 }
374 persist_state(&state, "discard_staged", None).map_err(|err| HostlibError::Backend {
375 builtin: DISCARD_BUILTIN,
376 message: err,
377 })?;
378 emit_staged_update(&state);
379 guard.insert(session_id.to_string(), state);
380 Ok(DiscardResult { discarded_paths })
381}
382
383pub fn remove_session_state(session_id: &str, root: Option<&Path>) -> Result<(), HostlibError> {
390 validate_session_id(DISCARD_BUILTIN, session_id)?;
391 let mut guard = lock_sessions();
392 let state = match guard.remove(session_id) {
393 Some(state) => state,
394 None => load_state(session_id, root.map(normalize_logical)).map_err(|err| {
395 HostlibError::Backend {
396 builtin: DISCARD_BUILTIN,
397 message: err,
398 }
399 })?,
400 };
401 let dir = session_dir(&state.root, &state.session_id);
402 match stdfs::remove_dir_all(&dir) {
403 Ok(()) => Ok(()),
404 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
405 Err(err) => Err(HostlibError::Backend {
406 builtin: DISCARD_BUILTIN,
407 message: format!("remove staged session {}: {err}", dir.display()),
408 }),
409 }
410}
411
412pub(crate) fn read(
413 path: &Path,
414 explicit_session_id: Option<&str>,
415) -> Option<std::io::Result<Vec<u8>>> {
416 let session_id = active_session_id(explicit_session_id)?;
417 let mut guard = lock_sessions();
418 let state = state_for_locked(&mut guard, &session_id, None).ok()?;
419 let result = if state.mode == FsMode::Staged {
420 overlay_read(&state, path)
421 } else {
422 None
423 };
424 guard.insert(session_id, state);
425 result
426}
427
428pub(crate) fn read_to_string(
429 path: &Path,
430 explicit_session_id: Option<&str>,
431) -> Option<std::io::Result<String>> {
432 read(path, explicit_session_id).map(|result| {
433 result.and_then(|bytes| {
434 String::from_utf8(bytes).map_err(|err| {
435 std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string())
436 })
437 })
438 })
439}
440
441pub(crate) fn read_dir(
442 path: &Path,
443 explicit_session_id: Option<&str>,
444) -> Option<std::io::Result<Vec<OverlayDirEntry>>> {
445 let session_id = active_session_id(explicit_session_id)?;
446 let mut guard = lock_sessions();
447 let state = state_for_locked(&mut guard, &session_id, None).ok()?;
448 let result = if state.mode == FsMode::Staged {
449 Some(overlay_read_dir(&state, path))
450 } else {
451 None
452 };
453 guard.insert(session_id, state);
454 result
455}
456
457pub(crate) fn stage_write_or_none(
458 builtin: &'static str,
459 path: &Path,
460 bytes: &[u8],
461 create_parents: bool,
462 overwrite: bool,
463 explicit_session_id: Option<&str>,
464) -> Result<Option<WriteOutcome>, HostlibError> {
465 let Some(session_id) = active_session_id(explicit_session_id) else {
466 return Ok(None);
467 };
468 let mut guard = lock_sessions();
469 let mut state = state_for_locked(&mut guard, &session_id, None)?;
470 if state.mode != FsMode::Staged {
471 guard.insert(session_id, state);
472 return Ok(None);
473 }
474
475 let key = normalize_logical(path);
476 let existed = overlay_exists(&state, &key);
477 if existed && !overwrite {
478 guard.insert(session_id, state);
479 return Err(HostlibError::Backend {
480 builtin,
481 message: format!("`{}` exists and overwrite=false", key.display()),
482 });
483 }
484 if !create_parents && !parent_exists(&state, &key) {
485 guard.insert(session_id, state);
486 return Err(HostlibError::Backend {
487 builtin,
488 message: format!("parent directory for `{}` does not exist", key.display()),
489 });
490 }
491
492 let hash = write_body(&state, bytes).map_err(|err| HostlibError::Backend {
493 builtin,
494 message: err,
495 })?;
496 state.entries.insert(
497 key.clone(),
498 StagedEntry::Write {
499 body_hash: hash,
500 len: bytes.len() as u64,
501 created_at_ms: now_ms(),
502 },
503 );
504 persist_state(&state, "write", Some(&key)).map_err(|err| HostlibError::Backend {
505 builtin,
506 message: err,
507 })?;
508 emit_staged_update(&state);
509 guard.insert(session_id, state);
510 Ok(Some(WriteOutcome {
511 created: !existed,
512 bytes_written: bytes.len(),
513 }))
514}
515
516pub(crate) fn stage_delete_or_none(
517 builtin: &'static str,
518 path: &Path,
519 recursive: bool,
520 explicit_session_id: Option<&str>,
521) -> Result<Option<bool>, HostlibError> {
522 let Some(session_id) = active_session_id(explicit_session_id) else {
523 return Ok(None);
524 };
525 let mut guard = lock_sessions();
526 let mut state = state_for_locked(&mut guard, &session_id, None)?;
527 if state.mode != FsMode::Staged {
528 guard.insert(session_id, state);
529 return Ok(None);
530 }
531
532 let key = normalize_logical(path);
533 let staged_targets = staged_paths_under(&state, &key);
534 let disk_exists = key.exists();
535 if !disk_exists && staged_targets.is_empty() {
536 guard.insert(session_id, state);
537 return Ok(Some(false));
538 }
539
540 if !disk_exists {
541 for staged in staged_targets {
542 state.entries.remove(&staged);
543 }
544 } else {
545 validate_delete_shape(builtin, &key, recursive)?;
546 for staged in staged_targets {
547 state.entries.remove(&staged);
548 }
549 state.entries.insert(
550 key.clone(),
551 StagedEntry::Delete {
552 recursive,
553 created_at_ms: now_ms(),
554 },
555 );
556 }
557 persist_state(&state, "delete", Some(&key)).map_err(|err| HostlibError::Backend {
558 builtin,
559 message: err,
560 })?;
561 emit_staged_update(&state);
562 guard.insert(session_id, state);
563 Ok(Some(true))
564}
565
566#[derive(Clone, Debug)]
570pub struct SafeTextPatchOutcome {
571 pub result: SafeTextPatchResult,
573 pub current_hash: String,
575 pub after_hash: String,
577 pub created: bool,
579 pub bytes_written: usize,
581}
582
583#[derive(Clone, Copy, Debug, Eq, PartialEq)]
585pub enum SafeTextPatchResult {
586 Applied,
589 StaleBase,
592 NoOp,
595}
596
597impl SafeTextPatchResult {
598 fn as_str(self) -> &'static str {
599 match self {
600 Self::Applied => "applied",
601 Self::StaleBase => "stale_base",
602 Self::NoOp => "no_op",
603 }
604 }
605}
606
607fn hash_label(bytes: &[u8]) -> String {
611 format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
612}
613
614pub fn safe_text_patch(
636 path: &Path,
637 content: &str,
638 expected_hash: Option<&str>,
639 session_id: Option<&str>,
640 create_parents: bool,
641 overwrite: bool,
642) -> Result<SafeTextPatchOutcome, HostlibError> {
643 let new_bytes = content.as_bytes();
644 let after_hash = hash_label(new_bytes);
645
646 if let Some(outcome) = safe_text_patch_staged(
647 path,
648 new_bytes,
649 expected_hash,
650 session_id,
651 create_parents,
652 overwrite,
653 &after_hash,
654 )? {
655 return Ok(outcome);
656 }
657
658 safe_text_patch_disk(
659 path,
660 new_bytes,
661 expected_hash,
662 create_parents,
663 overwrite,
664 after_hash,
665 )
666}
667
668#[allow(clippy::too_many_arguments)]
674fn safe_text_patch_staged(
675 path: &Path,
676 new_bytes: &[u8],
677 expected_hash: Option<&str>,
678 session_id: Option<&str>,
679 create_parents: bool,
680 overwrite: bool,
681 after_hash: &str,
682) -> Result<Option<SafeTextPatchOutcome>, HostlibError> {
683 let Some(session) = active_session_id(session_id) else {
684 return Ok(None);
685 };
686 let mut guard = lock_sessions();
687 let mut state = state_for_locked(&mut guard, &session, None)?;
688 if state.mode != FsMode::Staged {
689 guard.insert(session, state);
690 return Ok(None);
691 }
692
693 let key = normalize_logical(path);
694 let (existing_bytes, existed) = match overlay_read(&state, path) {
695 Some(Ok(bytes)) => (bytes, true),
696 Some(Err(err)) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
697 Some(Err(err)) => {
698 guard.insert(session, state);
699 return Err(HostlibError::Backend {
700 builtin: SAFE_TEXT_PATCH_BUILTIN,
701 message: format!("read `{}`: {err}", path.display()),
702 });
703 }
704 None => match stdfs::read(path) {
705 Ok(bytes) => (bytes, true),
706 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
707 Err(err) => {
708 guard.insert(session, state);
709 return Err(HostlibError::Backend {
710 builtin: SAFE_TEXT_PATCH_BUILTIN,
711 message: format!("read `{}`: {err}", path.display()),
712 });
713 }
714 },
715 };
716 let current_hash = hash_label(&existing_bytes);
717
718 if let Some(expected) = expected_hash {
719 if expected != current_hash {
720 guard.insert(session, state);
721 return Ok(Some(SafeTextPatchOutcome {
722 result: SafeTextPatchResult::StaleBase,
723 current_hash,
724 after_hash: after_hash.to_string(),
725 created: false,
726 bytes_written: 0,
727 }));
728 }
729 }
730
731 if existed && existing_bytes == new_bytes {
732 guard.insert(session, state);
733 return Ok(Some(SafeTextPatchOutcome {
734 result: SafeTextPatchResult::NoOp,
735 current_hash,
736 after_hash: after_hash.to_string(),
737 created: false,
738 bytes_written: 0,
739 }));
740 }
741
742 let overlay_existed = overlay_exists(&state, &key);
743 if overlay_existed && !overwrite {
744 guard.insert(session, state);
745 return Err(HostlibError::Backend {
746 builtin: SAFE_TEXT_PATCH_BUILTIN,
747 message: format!("`{}` exists and overwrite=false", key.display()),
748 });
749 }
750 if !create_parents && !parent_exists(&state, &key) {
751 guard.insert(session, state);
752 return Err(HostlibError::Backend {
753 builtin: SAFE_TEXT_PATCH_BUILTIN,
754 message: format!("parent directory for `{}` does not exist", key.display()),
755 });
756 }
757
758 let body_hash = write_body(&state, new_bytes).map_err(|err| HostlibError::Backend {
759 builtin: SAFE_TEXT_PATCH_BUILTIN,
760 message: err,
761 })?;
762 state.entries.insert(
763 key.clone(),
764 StagedEntry::Write {
765 body_hash,
766 len: new_bytes.len() as u64,
767 created_at_ms: now_ms(),
768 },
769 );
770 persist_state(&state, "safe_text_patch", Some(&key)).map_err(|err| HostlibError::Backend {
771 builtin: SAFE_TEXT_PATCH_BUILTIN,
772 message: err,
773 })?;
774 emit_staged_update(&state);
775 guard.insert(session, state);
776
777 Ok(Some(SafeTextPatchOutcome {
778 result: SafeTextPatchResult::Applied,
779 current_hash,
780 after_hash: after_hash.to_string(),
781 created: !existed,
782 bytes_written: new_bytes.len(),
783 }))
784}
785
786fn safe_text_patch_disk(
791 path: &Path,
792 new_bytes: &[u8],
793 expected_hash: Option<&str>,
794 create_parents: bool,
795 overwrite: bool,
796 after_hash: String,
797) -> Result<SafeTextPatchOutcome, HostlibError> {
798 let (existing_bytes, existed) = match stdfs::read(path) {
799 Ok(bytes) => (bytes, true),
800 Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
801 Err(err) => {
802 return Err(HostlibError::Backend {
803 builtin: SAFE_TEXT_PATCH_BUILTIN,
804 message: format!("read `{}`: {err}", path.display()),
805 });
806 }
807 };
808 let current_hash = hash_label(&existing_bytes);
809
810 if let Some(expected) = expected_hash {
811 if expected != current_hash {
812 return Ok(SafeTextPatchOutcome {
813 result: SafeTextPatchResult::StaleBase,
814 current_hash,
815 after_hash,
816 created: false,
817 bytes_written: 0,
818 });
819 }
820 }
821
822 if existed && existing_bytes == new_bytes {
823 return Ok(SafeTextPatchOutcome {
824 result: SafeTextPatchResult::NoOp,
825 current_hash,
826 after_hash,
827 created: false,
828 bytes_written: 0,
829 });
830 }
831 if existed && !overwrite {
832 return Err(HostlibError::Backend {
833 builtin: SAFE_TEXT_PATCH_BUILTIN,
834 message: format!("`{}` exists and overwrite=false", path.display()),
835 });
836 }
837 if !create_parents {
838 if let Some(parent) = path.parent() {
839 if !parent.as_os_str().is_empty() && !parent.is_dir() {
840 return Err(HostlibError::Backend {
841 builtin: SAFE_TEXT_PATCH_BUILTIN,
842 message: format!(
843 "parent directory for `{}` does not exist (pass create_parents=true to mkdir)",
844 path.display()
845 ),
846 });
847 }
848 }
849 }
850
851 crate::fs_snapshot::auto_capture_for_write(SAFE_TEXT_PATCH_BUILTIN, path);
852 atomic_write(path, new_bytes).map_err(|err| HostlibError::Backend {
853 builtin: SAFE_TEXT_PATCH_BUILTIN,
854 message: format!("write `{}`: {err}", path.display()),
855 })?;
856
857 Ok(SafeTextPatchOutcome {
858 result: SafeTextPatchResult::Applied,
859 current_hash,
860 after_hash,
861 created: !existed,
862 bytes_written: new_bytes.len(),
863 })
864}
865
866fn read_existing(
871 builtin: &'static str,
872 path: &Path,
873 session_id: Option<&str>,
874) -> Result<(Vec<u8>, bool), HostlibError> {
875 if let Some(result) = read(path, session_id) {
876 return match result {
877 Ok(bytes) => Ok((bytes, true)),
878 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
879 Err(err) => Err(HostlibError::Backend {
880 builtin,
881 message: format!("read `{}`: {err}", path.display()),
882 }),
883 };
884 }
885 match stdfs::read(path) {
886 Ok(bytes) => Ok((bytes, true)),
887 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
888 Err(err) => Err(HostlibError::Backend {
889 builtin,
890 message: format!("read `{}`: {err}", path.display()),
891 }),
892 }
893}
894
895fn read_text_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
896 let raw = dict_arg(READ_TEXT_BUILTIN, args)?;
897 let dict = raw.as_ref();
898 let path_str = require_string(READ_TEXT_BUILTIN, dict, "path")?;
899 let session_id = optional_string(READ_TEXT_BUILTIN, dict, "session_id")?;
900 let path = Path::new(&path_str);
901 enforce_path_scope(READ_TEXT_BUILTIN, path, FsAccess::Read)?;
902
903 let (bytes, existed) = read_existing(READ_TEXT_BUILTIN, path, session_id.as_deref())?;
904 let hash = hash_label(&bytes);
905 let content = match std::str::from_utf8(&bytes) {
906 Ok(s) => s.to_string(),
907 Err(err) => {
908 return Err(HostlibError::Backend {
909 builtin: READ_TEXT_BUILTIN,
910 message: format!("`{path_str}` is not valid UTF-8: {err}"),
911 });
912 }
913 };
914 let bytes_len = bytes.len() as i64;
915 Ok(build_dict([
916 ("path", str_value(&path_str)),
917 ("content", str_value(&content)),
918 ("sha256", str_value(&hash)),
919 ("size", VmValue::Int(bytes_len)),
920 ("exists", VmValue::Bool(existed)),
921 ]))
922}
923
924fn safe_text_patch_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
925 let raw = dict_arg(SAFE_TEXT_PATCH_BUILTIN, args)?;
926 let dict = raw.as_ref();
927
928 let path_str = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "path")?;
929 let content = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "content")?;
930 let expected_hash = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "expected_hash")?;
931 let session_id = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "session_id")?;
932 let create_parents = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "create_parents", true)?;
933 let overwrite = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "overwrite", true)?;
934
935 enforce_path_scope(
936 SAFE_TEXT_PATCH_BUILTIN,
937 Path::new(&path_str),
938 FsAccess::Write,
939 )?;
940 let outcome = safe_text_patch(
941 Path::new(&path_str),
942 &content,
943 expected_hash.as_deref(),
944 session_id.as_deref(),
945 create_parents,
946 overwrite,
947 )?;
948
949 let entries: Vec<(&'static str, VmValue)> = vec![
950 ("path", str_value(&path_str)),
951 ("result", str_value(outcome.result.as_str())),
952 (
953 "applied",
954 VmValue::Bool(outcome.result == SafeTextPatchResult::Applied),
955 ),
956 (
957 "stale_base",
958 VmValue::Bool(outcome.result == SafeTextPatchResult::StaleBase),
959 ),
960 ("current_hash", str_value(&outcome.current_hash)),
961 ("before_sha256", str_value(&outcome.current_hash)),
962 ("after_sha256", str_value(&outcome.after_hash)),
963 ("created", VmValue::Bool(outcome.created)),
964 ("bytes_written", VmValue::Int(outcome.bytes_written as i64)),
965 (
966 "expected_hash",
967 match expected_hash.as_deref() {
968 Some(hash) => str_value(hash),
969 None => VmValue::Nil,
970 },
971 ),
972 ];
973 Ok(build_dict(entries))
974}
975
976fn emit_safe_text_patch_result_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
977 let raw = dict_arg(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, args)?;
978 let dict = raw.as_ref();
979
980 let path = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "path")?;
981 let result = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "result")?;
982 let hunks_count = optional_int(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "hunks_count", 0)?;
983 let bytes_written = optional_int(
984 EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
985 dict,
986 "bytes_written",
987 0,
988 )?;
989 let failed_hunk_index = match dict.get("failed_hunk_index") {
990 None | Some(VmValue::Nil) => None,
991 Some(VmValue::Int(n)) if *n >= 0 => Some(*n as usize),
992 Some(other) => {
993 return Err(HostlibError::InvalidParameter {
994 builtin: EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
995 param: "failed_hunk_index",
996 message: format!("expected non-negative integer, got {}", other.type_name()),
997 });
998 }
999 };
1000 let session_id = optional_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "session_id")?
1001 .or_else(harn_vm::agent_sessions::current_session_id);
1002
1003 if let Some(session_id) = session_id.filter(|s| !s.trim().is_empty()) {
1004 harn_vm::agent_events::emit_event(&AgentEvent::SafeTextPatchResult {
1005 session_id,
1006 path,
1007 result,
1008 hunks_count: hunks_count.max(0) as usize,
1009 bytes_written: bytes_written.max(0) as u64,
1010 failed_hunk_index,
1011 });
1012 Ok(VmValue::Bool(true))
1013 } else {
1014 Ok(VmValue::Bool(false))
1018 }
1019}
1020
1021fn set_mode_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1022 let raw = dict_arg(SET_MODE_BUILTIN, args)?;
1023 let dict = raw.as_ref();
1024 let session_id = require_string(SET_MODE_BUILTIN, dict, "session_id")?;
1025 let mode = FsMode::parse(
1026 SET_MODE_BUILTIN,
1027 &require_string(SET_MODE_BUILTIN, dict, "mode")?,
1028 )?;
1029 let root = optional_string(SET_MODE_BUILTIN, dict, "root")?.map(PathBuf::from);
1030 let result = set_mode(&session_id, mode, root.as_deref())?;
1031 Ok(build_dict([(
1032 "previous_mode",
1033 str_value(result.previous_mode.as_str()),
1034 )]))
1035}
1036
1037fn staged_status_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1038 let raw = dict_arg(STATUS_BUILTIN, args)?;
1039 let session_id = require_string(STATUS_BUILTIN, raw.as_ref(), "session_id")?;
1040 Ok(status_to_value(staged_status(&session_id)?))
1041}
1042
1043fn commit_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1044 let raw = dict_arg(COMMIT_BUILTIN, args)?;
1045 let dict = raw.as_ref();
1046 let session_id = require_string(COMMIT_BUILTIN, dict, "session_id")?;
1047 let paths = optional_string_list(COMMIT_BUILTIN, dict, "paths")?;
1048 Ok(commit_result_to_value(commit_staged(&session_id, &paths)?))
1049}
1050
1051fn discard_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1052 let raw = dict_arg(DISCARD_BUILTIN, args)?;
1053 let dict = raw.as_ref();
1054 let session_id = require_string(DISCARD_BUILTIN, dict, "session_id")?;
1055 let paths = optional_string_list(DISCARD_BUILTIN, dict, "paths")?;
1056 Ok(discard_result_to_value(discard_staged(
1057 &session_id,
1058 &paths,
1059 )?))
1060}
1061
1062fn state_for_locked(
1063 guard: &mut BTreeMap<String, SessionState>,
1064 session_id: &str,
1065 root: Option<PathBuf>,
1066) -> Result<SessionState, HostlibError> {
1067 if let Some(existing) = guard.get(session_id) {
1068 let mut state = existing.clone();
1069 if let Some(root) = root {
1070 if state.entries.is_empty() {
1071 state.root = root;
1072 }
1073 }
1074 return Ok(state);
1075 }
1076 let state = load_state(session_id, root).map_err(|err| HostlibError::Backend {
1077 builtin: SET_MODE_BUILTIN,
1078 message: err,
1079 })?;
1080 Ok(state)
1081}
1082
1083fn load_state(session_id: &str, root: Option<PathBuf>) -> Result<SessionState, String> {
1084 let root = root.unwrap_or_else(default_root);
1085 let manifest_path = manifest_path(&root, session_id);
1086 if manifest_path.exists() {
1087 let text = stdfs::read_to_string(&manifest_path)
1088 .map_err(|err| format!("read {}: {err}", manifest_path.display()))?;
1089 let manifest: Manifest = serde_json::from_str(&text)
1090 .map_err(|err| format!("parse {}: {err}", manifest_path.display()))?;
1091 if manifest.version != MANIFEST_VERSION {
1092 return Err(format!(
1093 "unsupported staged fs manifest version {} in {}",
1094 manifest.version,
1095 manifest_path.display()
1096 ));
1097 }
1098 if manifest.session_id != session_id {
1099 return Err(format!(
1100 "staged fs manifest session id mismatch in {}",
1101 manifest_path.display()
1102 ));
1103 }
1104 return Ok(SessionState {
1105 session_id: manifest.session_id,
1106 mode: manifest.mode,
1107 root: normalize_logical(Path::new(&manifest.root)),
1108 entries: manifest
1109 .entries
1110 .into_iter()
1111 .map(|(path, entry)| (normalize_logical(Path::new(&path)), entry))
1112 .collect(),
1113 });
1114 }
1115 Ok(SessionState {
1116 session_id: session_id.to_string(),
1117 mode: FsMode::Immediate,
1118 root,
1119 entries: BTreeMap::new(),
1120 })
1121}
1122
1123fn persist_state(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1124 let dir = session_dir(&state.root, &state.session_id);
1125 stdfs::create_dir_all(dir.join("bodies"))
1126 .map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1127 let manifest = Manifest {
1128 version: MANIFEST_VERSION,
1129 session_id: state.session_id.clone(),
1130 mode: state.mode,
1131 root: state.root.to_string_lossy().into_owned(),
1132 entries: state
1133 .entries
1134 .iter()
1135 .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
1136 .collect(),
1137 };
1138 let bytes = serde_json::to_vec_pretty(&manifest)
1139 .map_err(|err| format!("serialize staged manifest: {err}"))?;
1140 atomic_write(&manifest_path(&state.root, &state.session_id), &bytes)?;
1141 append_journal(state, op, path)?;
1142 prune_unreferenced_bodies(state);
1143 Ok(())
1144}
1145
1146fn append_journal(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1147 let dir = session_dir(&state.root, &state.session_id);
1148 stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1149 let line = serde_json::to_string(&serde_json::json!({
1150 "ts_ms": now_ms(),
1151 "op": op,
1152 "path": path.map(|path| path.to_string_lossy().into_owned()), "pending_count": state.entries.len(),
1154 }))
1155 .map_err(|err| format!("serialize staged journal: {err}"))?;
1156 let mut file = stdfs::OpenOptions::new()
1157 .create(true)
1158 .append(true)
1159 .open(dir.join("journal.jsonl"))
1160 .map_err(|err| format!("open staged journal: {err}"))?;
1161 writeln!(file, "{line}").map_err(|err| format!("write staged journal: {err}"))
1162}
1163
1164fn write_body(state: &SessionState, bytes: &[u8]) -> Result<String, String> {
1165 let hash = hex::encode(Sha256::digest(bytes));
1166 let path = session_dir(&state.root, &state.session_id)
1167 .join("bodies")
1168 .join(&hash);
1169 if !path.exists() {
1170 atomic_write(&path, bytes)?;
1171 }
1172 Ok(hash)
1173}
1174
1175fn read_body(state: &SessionState, hash: &str) -> std::io::Result<Vec<u8>> {
1176 stdfs::read(
1177 session_dir(&state.root, &state.session_id)
1178 .join("bodies")
1179 .join(hash),
1180 )
1181}
1182
1183fn prune_unreferenced_bodies(state: &SessionState) {
1184 let live: BTreeSet<String> = state
1185 .entries
1186 .values()
1187 .filter_map(|entry| match entry {
1188 StagedEntry::Write { body_hash, .. } => Some(body_hash.clone()),
1189 StagedEntry::Delete { .. } => None,
1190 })
1191 .collect();
1192 let body_dir = session_dir(&state.root, &state.session_id).join("bodies");
1193 let Ok(entries) = stdfs::read_dir(&body_dir) else {
1194 return;
1195 };
1196 for entry in entries.flatten() {
1197 let name = entry.file_name().to_string_lossy().into_owned();
1198 if !live.contains(&name) {
1199 let _ = stdfs::remove_file(entry.path());
1200 }
1201 }
1202}
1203
1204fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
1205 if let Some(parent) = path.parent() {
1206 stdfs::create_dir_all(parent)
1207 .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
1208 }
1209 let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
1210 stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
1211 match stdfs::rename(&tmp, path) {
1212 Ok(()) => Ok(()),
1213 Err(err) => {
1214 let _ = stdfs::remove_file(path);
1215 stdfs::rename(&tmp, path).map_err(|retry| {
1216 format!(
1217 "rename {} to {}: {err}; retry: {retry}",
1218 tmp.display(),
1219 path.display()
1220 )
1221 })
1222 }
1223 }
1224}
1225
1226fn commit_entry(state: &SessionState, path: &Path, entry: &StagedEntry) -> Result<(), String> {
1227 match entry {
1228 StagedEntry::Write { body_hash, .. } => {
1229 let bytes = read_body(state, body_hash)
1230 .map_err(|err| format!("read staged body for {}: {err}", path.display()))?;
1231 atomic_write(path, &bytes)
1232 }
1233 StagedEntry::Delete { recursive, .. } => match stdfs::symlink_metadata(path) {
1234 Ok(metadata) if metadata.is_dir() => {
1235 if *recursive {
1236 stdfs::remove_dir_all(path)
1237 .map_err(|err| format!("remove_dir_all {}: {err}", path.display()))
1238 } else {
1239 stdfs::remove_dir(path)
1240 .map_err(|err| format!("remove_dir {}: {err}", path.display()))
1241 }
1242 }
1243 Ok(_) => stdfs::remove_file(path)
1244 .map_err(|err| format!("remove_file {}: {err}", path.display())),
1245 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1246 Err(err) => Err(format!("stat {}: {err}", path.display())),
1247 },
1248 }
1249}
1250
1251fn overlay_read(state: &SessionState, path: &Path) -> Option<std::io::Result<Vec<u8>>> {
1252 let key = normalize_logical(path);
1253 if let Some(entry) = state.entries.get(&key) {
1254 return Some(match entry {
1255 StagedEntry::Write { body_hash, .. } => read_body(state, body_hash),
1256 StagedEntry::Delete { .. } => Err(not_found(&key)),
1257 });
1258 }
1259 if deleted_ancestor(state, &key) {
1260 return Some(Err(not_found(&key)));
1261 }
1262 None
1263}
1264
1265fn overlay_read_dir(state: &SessionState, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
1266 let dir_key = normalize_logical(path);
1267 if matches!(state.entries.get(&dir_key), Some(StagedEntry::Write { .. }))
1268 || deleted_ancestor(state, &dir_key)
1269 || matches!(
1270 state.entries.get(&dir_key),
1271 Some(StagedEntry::Delete { .. })
1272 )
1273 {
1274 return Err(not_found(&dir_key));
1275 }
1276 if !path.exists() && !has_staged_descendant(state, &dir_key) {
1277 return Err(not_found(&dir_key));
1278 }
1279
1280 let mut entries: BTreeMap<String, OverlayDirEntry> = BTreeMap::new();
1281 if path.exists() {
1282 for entry in stdfs::read_dir(path)? {
1283 let entry = entry?;
1284 let name = entry.file_name().to_string_lossy().into_owned();
1285 let file_type = entry.file_type().ok();
1286 let metadata = entry.metadata().ok();
1287 entries.insert(
1288 name.clone(),
1289 OverlayDirEntry {
1290 name,
1291 is_dir: file_type.is_some_and(|ty| ty.is_dir()),
1292 is_symlink: file_type.is_some_and(|ty| ty.is_symlink()),
1293 size: metadata.map(|m| m.len()).unwrap_or(0),
1294 },
1295 );
1296 }
1297 }
1298
1299 for (path, entry) in &state.entries {
1300 let Some(name) = overlay_child_name(path, &dir_key) else {
1301 continue;
1302 };
1303 match entry {
1304 StagedEntry::Write { len, .. } => {
1305 let is_dir = path.parent() != Some(dir_key.as_path());
1306 entries.insert(
1307 name.clone(),
1308 OverlayDirEntry {
1309 name,
1310 is_dir,
1311 is_symlink: false,
1312 size: if is_dir { 0 } else { *len },
1313 },
1314 );
1315 }
1316 StagedEntry::Delete { .. } => {
1317 if path.parent() == Some(dir_key.as_path()) {
1318 entries.remove(&name);
1319 }
1320 }
1321 }
1322 }
1323
1324 Ok(entries.into_values().collect())
1325}
1326
1327fn overlay_child_name(path: &Path, dir: &Path) -> Option<String> {
1328 let suffix = path.strip_prefix(dir).ok()?;
1329 let mut components = suffix.components();
1330 let first = components.next()?;
1331 match first {
1332 Component::Normal(name) => Some(name.to_string_lossy().into_owned()),
1333 _ => None,
1334 }
1335}
1336
1337fn overlay_exists(state: &SessionState, path: &Path) -> bool {
1338 if let Some(entry) = state.entries.get(path) {
1339 return matches!(entry, StagedEntry::Write { .. });
1340 }
1341 if deleted_ancestor(state, path) {
1342 return false;
1343 }
1344 if has_staged_descendant(state, path) {
1345 return true;
1346 }
1347 path.exists()
1348}
1349
1350fn parent_exists(state: &SessionState, path: &Path) -> bool {
1351 let Some(parent) = path.parent() else {
1352 return true;
1353 };
1354 if parent.as_os_str().is_empty() {
1355 return true;
1356 }
1357 if let Some(entry) = state.entries.get(parent) {
1358 return !matches!(entry, StagedEntry::Delete { .. });
1359 }
1360 if deleted_ancestor(state, parent) {
1361 return false;
1362 }
1363 if has_staged_descendant(state, parent) {
1364 return true;
1365 }
1366 parent.is_dir()
1367}
1368
1369fn deleted_ancestor(state: &SessionState, path: &Path) -> bool {
1370 state.entries.iter().any(|(candidate, entry)| {
1371 matches!(entry, StagedEntry::Delete { .. })
1372 && path != candidate.as_path()
1373 && path.starts_with(candidate)
1374 })
1375}
1376
1377fn has_staged_descendant(state: &SessionState, path: &Path) -> bool {
1378 state.entries.iter().any(|(candidate, entry)| {
1379 matches!(entry, StagedEntry::Write { .. })
1380 && candidate != path
1381 && candidate.starts_with(path)
1382 })
1383}
1384
1385fn staged_paths_under(state: &SessionState, path: &Path) -> Vec<PathBuf> {
1386 state
1387 .entries
1388 .keys()
1389 .filter(|candidate| *candidate == path || candidate.starts_with(path))
1390 .cloned()
1391 .collect()
1392}
1393
1394fn validate_delete_shape(
1395 builtin: &'static str,
1396 path: &Path,
1397 recursive: bool,
1398) -> Result<(), HostlibError> {
1399 let Ok(metadata) = stdfs::symlink_metadata(path) else {
1400 return Ok(());
1401 };
1402 if metadata.is_dir() && !recursive {
1403 let mut entries = stdfs::read_dir(path).map_err(|err| HostlibError::Backend {
1404 builtin,
1405 message: format!("read_dir `{}`: {err}", path.display()),
1406 })?;
1407 if entries.next().is_some() {
1408 return Err(HostlibError::Backend {
1409 builtin,
1410 message: format!(
1411 "remove_dir `{}` (pass recursive=true to delete non-empty dirs): directory not empty",
1412 path.display()
1413 ),
1414 });
1415 }
1416 }
1417 Ok(())
1418}
1419
1420fn status_from_state(state: &SessionState) -> StagedStatus {
1421 let now = now_ms();
1422 let mut pending_writes = Vec::new();
1423 let mut total_bytes_pending = 0u64;
1424 let mut oldest = None;
1425 for (path, entry) in &state.entries {
1426 total_bytes_pending = total_bytes_pending.saturating_add(entry.body_len());
1427 oldest = Some(oldest.map_or(entry.created_at_ms(), |old: i64| {
1428 old.min(entry.created_at_ms())
1429 }));
1430 let (kind, bytes_added, bytes_removed) = match entry {
1431 StagedEntry::Write { len, .. } => ("write", *len, disk_size(path).unwrap_or(0)),
1432 StagedEntry::Delete { .. } => ("delete", 0, disk_size(path).unwrap_or(0)),
1433 };
1434 pending_writes.push(PendingWrite {
1435 path: to_agent_path(path),
1436 kind,
1437 bytes_added,
1438 bytes_removed,
1439 });
1440 }
1441 StagedStatus {
1442 pending_writes,
1443 total_bytes_pending,
1444 oldest_pending_age_ms: oldest.map(|old| now.saturating_sub(old)).unwrap_or(0),
1445 }
1446}
1447
1448fn disk_size(path: &Path) -> Option<u64> {
1449 let metadata = stdfs::symlink_metadata(path).ok()?;
1450 if metadata.is_file() {
1451 return Some(metadata.len());
1452 }
1453 if metadata.is_dir() {
1454 let mut total = 0u64;
1455 for entry in walkdir::WalkDir::new(path)
1456 .into_iter()
1457 .filter_map(Result::ok)
1458 {
1459 if let Ok(metadata) = entry.metadata() {
1460 if metadata.is_file() {
1461 total = total.saturating_add(metadata.len());
1462 }
1463 }
1464 }
1465 return Some(total);
1466 }
1467 Some(metadata.len())
1468}
1469
1470fn selected_paths(state: &SessionState, paths: &[String]) -> Vec<PathBuf> {
1471 if paths.is_empty() {
1472 return state.entries.keys().cloned().collect();
1473 }
1474 let selected: BTreeSet<PathBuf> = paths
1475 .iter()
1476 .map(|path| normalize_logical(Path::new(path)))
1477 .collect();
1478 state
1479 .entries
1480 .keys()
1481 .filter(|path| selected.contains(*path))
1482 .cloned()
1483 .collect()
1484}
1485
1486fn active_session_id(explicit: Option<&str>) -> Option<String> {
1487 explicit
1488 .map(str::to_string)
1489 .or_else(harn_vm::agent_sessions::current_session_id)
1490 .filter(|id| !id.trim().is_empty())
1491}
1492
1493fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
1494 if session_id.trim().is_empty() {
1495 return Err(HostlibError::InvalidParameter {
1496 builtin,
1497 param: "session_id",
1498 message: "must not be empty".to_string(),
1499 });
1500 }
1501 Ok(())
1502}
1503
1504fn default_root() -> PathBuf {
1505 std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1506}
1507
1508fn session_dir(root: &Path, session_id: &str) -> PathBuf {
1509 let mut dir = root.to_path_buf();
1510 for component in STATE_REL {
1511 dir.push(component);
1512 }
1513 dir.push(sanitize_component(session_id));
1514 dir
1515}
1516
1517fn manifest_path(root: &Path, session_id: &str) -> PathBuf {
1518 session_dir(root, session_id).join("manifest.json")
1519}
1520
1521fn sanitize_component(input: &str) -> String {
1522 let sanitized: String = input
1523 .chars()
1524 .map(|ch| match ch {
1525 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
1526 _ => '_',
1527 })
1528 .collect();
1529 let is_dotted = sanitized.is_empty() || sanitized.bytes().all(|b| b == b'.');
1535 if sanitized == input && !is_dotted {
1536 sanitized
1537 } else {
1538 let hash = hex::encode(Sha256::digest(input.as_bytes()));
1539 format!("{sanitized}-{}", &hash[..12])
1540 }
1541}
1542
1543fn normalize_logical(path: &Path) -> PathBuf {
1544 let absolute = if path.is_absolute() {
1545 path.to_path_buf()
1546 } else {
1547 default_root().join(path)
1548 };
1549 let mut out = PathBuf::new();
1550 for component in absolute.components() {
1551 match component {
1552 Component::ParentDir => {
1553 out.pop();
1554 }
1555 Component::CurDir => {}
1556 other => out.push(other),
1557 }
1558 }
1559 out
1560}
1561
1562fn not_found(path: &Path) -> std::io::Error {
1563 std::io::Error::new(
1564 std::io::ErrorKind::NotFound,
1565 format!("staged fs: {} is deleted or absent", path.display()),
1566 )
1567}
1568
1569fn now_ms() -> i64 {
1570 std::time::SystemTime::now()
1571 .duration_since(std::time::UNIX_EPOCH)
1572 .map(|duration| duration.as_millis() as i64)
1573 .unwrap_or(0)
1574}
1575
1576fn emit_staged_update(state: &SessionState) {
1577 let status = status_from_state(state);
1578 harn_vm::agent_events::emit_event(&AgentEvent::StagedWritesPending {
1579 session_id: state.session_id.clone(),
1580 pending_count: status.pending_writes.len(),
1581 total_bytes: status.total_bytes_pending,
1582 });
1583}
1584
1585fn pending_write_to_value(write: PendingWrite) -> VmValue {
1586 build_dict([
1587 ("path", str_value(&write.path)),
1588 ("kind", str_value(write.kind)),
1589 ("bytes_added", VmValue::Int(write.bytes_added as i64)),
1590 ("bytes_removed", VmValue::Int(write.bytes_removed as i64)),
1591 ])
1592}
1593
1594fn status_to_value(status: StagedStatus) -> VmValue {
1595 build_dict([
1596 (
1597 "pending_writes",
1598 VmValue::List(Arc::new(
1599 status
1600 .pending_writes
1601 .into_iter()
1602 .map(pending_write_to_value)
1603 .collect(),
1604 )),
1605 ),
1606 (
1607 "total_bytes_pending",
1608 VmValue::Int(status.total_bytes_pending as i64),
1609 ),
1610 (
1611 "oldest_pending_age_ms",
1612 VmValue::Int(status.oldest_pending_age_ms),
1613 ),
1614 ])
1615}
1616
1617fn commit_result_to_value(result: CommitResult) -> VmValue {
1618 build_dict([
1619 (
1620 "committed_paths",
1621 VmValue::List(Arc::new(
1622 result
1623 .committed_paths
1624 .into_iter()
1625 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1626 .collect(),
1627 )),
1628 ),
1629 (
1630 "failed_paths_with_reasons",
1631 VmValue::List(Arc::new(
1632 result
1633 .failed_paths_with_reasons
1634 .into_iter()
1635 .map(|(path, reason)| {
1636 build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
1637 })
1638 .collect(),
1639 )),
1640 ),
1641 ])
1642}
1643
1644fn discard_result_to_value(result: DiscardResult) -> VmValue {
1645 build_dict([(
1646 "discarded_paths",
1647 VmValue::List(Arc::new(
1648 result
1649 .discarded_paths
1650 .into_iter()
1651 .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1652 .collect(),
1653 )),
1654 )])
1655}
1656
1657#[cfg(test)]
1658mod staged_path_tests {
1659 use super::{set_mode, stage_write_or_none, staged_pending_paths, staged_status, FsMode};
1660 use tempfile::tempdir;
1661
1662 #[test]
1663 fn staged_pending_paths_preserve_native_filesystem_paths() {
1664 let dir = tempdir().expect("tempdir");
1665 let root = dir.path().canonicalize().expect("canonical tempdir");
1666 let path = root.join("src").join("lib.rs");
1667 let session_id = format!(
1668 "native-paths-{}-{}",
1669 std::process::id(),
1670 std::thread::current().name().unwrap_or("test")
1671 );
1672
1673 set_mode(&session_id, FsMode::Staged, Some(&root)).expect("set staged mode");
1674 stage_write_or_none(
1675 "test",
1676 &path,
1677 b"fn beta() {}\n",
1678 true,
1679 true,
1680 Some(&session_id),
1681 )
1682 .expect("stage write")
1683 .expect("staged write");
1684
1685 let native_paths = staged_pending_paths(&session_id).expect("pending paths");
1686 assert_eq!(
1687 native_paths,
1688 std::collections::BTreeSet::from([path.clone()])
1689 );
1690
1691 let status = staged_status(&session_id).expect("staged status");
1692 assert_eq!(status.pending_writes.len(), 1);
1693 assert_eq!(
1694 status.pending_writes[0].path,
1695 crate::tools::args::to_agent_path(&path)
1696 );
1697
1698 let _ = super::remove_session_state(&session_id, Some(&root));
1699 }
1700}
1701
1702#[cfg(test)]
1703mod sanitize_tests {
1704 use super::{sanitize_component, session_dir, STATE_REL};
1705 use std::path::{Component, Path};
1706
1707 #[test]
1708 fn dotted_session_ids_are_never_traversal_tokens() {
1709 for evil in ["..", ".", "...", ""] {
1712 let safe = sanitize_component(evil);
1713 assert_ne!(safe, evil, "`{evil}` passed through unsanitized");
1714 assert!(
1715 !safe.bytes().all(|b| b == b'.'),
1716 "`{evil}` -> `{safe}` is still all dots"
1717 );
1718 let comps: Vec<_> = Path::new(&safe).components().collect();
1720 assert!(
1721 comps.iter().all(|c| matches!(c, Component::Normal(_))),
1722 "`{safe}` contains a traversal component"
1723 );
1724 }
1725 }
1726
1727 #[test]
1728 fn ordinary_session_ids_pass_through() {
1729 assert_eq!(sanitize_component("abc-123_v2.0"), "abc-123_v2.0");
1730 }
1731
1732 #[test]
1733 fn session_dir_stays_under_staged_root() {
1734 let dir = session_dir(Path::new("/workspace"), "..");
1735 assert!(
1737 !dir.components().any(|c| matches!(c, Component::ParentDir)),
1738 "session_dir({dir:?}) escapes via `..`"
1739 );
1740 let mut staged = std::path::PathBuf::from("/workspace");
1741 staged.extend(STATE_REL);
1742 assert!(dir.starts_with(&staged), "{dir:?} not under {staged:?}");
1743 }
1744}