Skip to main content

harn_hostlib/
fs.rs

1//! Session-scoped staged filesystem mode.
2//!
3//! `hostlib_fs_set_mode({session_id, mode: "staged"})` makes hostlib file
4//! mutations land in a durable per-session overlay under
5//! `.harn/state/staged/<session_id>/`. Reads made by the same session consult
6//! that overlay first, so agent loops see their own pending writes without
7//! touching the working tree until `hostlib_fs_commit_staged`.
8
9use std::collections::{BTreeMap, BTreeSet};
10use std::fs as stdfs;
11use std::io::Write;
12use std::path::{Component, Path, PathBuf};
13use std::sync::Arc;
14use std::sync::{Mutex, OnceLock};
15
16use harn_vm::agent_events::AgentEvent;
17use harn_vm::process_sandbox::{check_fs_path_scope, FsAccess};
18use harn_vm::VmValue;
19use serde::{Deserialize, Serialize};
20use sha2::{Digest, Sha256};
21
22use crate::error::HostlibError;
23use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
24use crate::tools::args::{
25    build_dict, dict_arg, optional_bool, optional_int, optional_string, optional_string_list,
26    require_string, str_value,
27};
28use crate::tools::permissions::enforce_path_scope;
29
30const SET_MODE_BUILTIN: &str = "hostlib_fs_set_mode";
31const STATUS_BUILTIN: &str = "hostlib_fs_staged_status";
32const COMMIT_BUILTIN: &str = "hostlib_fs_commit_staged";
33const DISCARD_BUILTIN: &str = "hostlib_fs_discard_staged";
34const SAFE_TEXT_PATCH_BUILTIN: &str = "hostlib_fs_safe_text_patch";
35const READ_TEXT_BUILTIN: &str = "hostlib_fs_read_text";
36const EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN: &str = "hostlib_fs_emit_safe_text_patch_result";
37
38const MANIFEST_VERSION: u32 = 1;
39const STATE_REL: &[&str] = &[".harn", "state", "staged"];
40
41/// Hostlib filesystem capability handle.
42#[derive(Default)]
43pub struct FsCapability;
44
45impl HostlibCapability for FsCapability {
46    fn module_name(&self) -> &'static str {
47        "fs"
48    }
49
50    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
51        register(registry, SET_MODE_BUILTIN, "set_mode", set_mode_builtin);
52        register(
53            registry,
54            STATUS_BUILTIN,
55            "staged_status",
56            staged_status_builtin,
57        );
58        register(
59            registry,
60            COMMIT_BUILTIN,
61            "commit_staged",
62            commit_staged_builtin,
63        );
64        register(
65            registry,
66            DISCARD_BUILTIN,
67            "discard_staged",
68            discard_staged_builtin,
69        );
70        // `safe_text_patch` and `read_text` touch arbitrary host paths, so
71        // they share the deterministic-tools gate with `tools::*` file I/O.
72        register_gated(
73            registry,
74            SAFE_TEXT_PATCH_BUILTIN,
75            "safe_text_patch",
76            safe_text_patch_builtin,
77        );
78        register_gated(registry, READ_TEXT_BUILTIN, "read_text", read_text_builtin);
79        register(
80            registry,
81            EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
82            "emit_safe_text_patch_result",
83            emit_safe_text_patch_result_builtin,
84        );
85    }
86}
87
88fn register(
89    registry: &mut BuiltinRegistry,
90    name: &'static str,
91    method: &'static str,
92    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
93) {
94    let handler: SyncHandler = std::sync::Arc::new(runner);
95    registry.register(RegisteredBuiltin {
96        name,
97        module: "fs",
98        method,
99        handler,
100    });
101}
102
103fn register_gated(
104    registry: &mut BuiltinRegistry,
105    name: &'static str,
106    method: &'static str,
107    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
108) {
109    registry.register(RegisteredBuiltin {
110        name,
111        module: "fs",
112        method,
113        handler: crate::tools::permissions::gated_handler(name, runner),
114    });
115}
116
117/// Filesystem mode for one hostlib session.
118#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
119#[serde(rename_all = "lowercase")]
120pub enum FsMode {
121    /// Mutations apply to the working tree immediately.
122    Immediate,
123    /// Mutations are recorded in the staging layer until committed.
124    Staged,
125}
126
127impl FsMode {
128    fn parse(builtin: &'static str, raw: &str) -> Result<Self, HostlibError> {
129        match raw {
130            "immediate" => Ok(Self::Immediate),
131            "staged" => Ok(Self::Staged),
132            other => Err(HostlibError::InvalidParameter {
133                builtin,
134                param: "mode",
135                message: format!("expected \"immediate\" or \"staged\", got `{other}`"),
136            }),
137        }
138    }
139
140    /// Wire string used by hostlib schemas.
141    pub fn as_str(self) -> &'static str {
142        match self {
143            Self::Immediate => "immediate",
144            Self::Staged => "staged",
145        }
146    }
147}
148
149#[derive(Clone, Debug, Serialize, Deserialize)]
150struct Manifest {
151    version: u32,
152    session_id: String,
153    mode: FsMode,
154    root: String,
155    entries: BTreeMap<String, StagedEntry>,
156}
157
158#[derive(Clone, Debug, Serialize, Deserialize)]
159#[serde(tag = "kind", rename_all = "snake_case")]
160enum StagedEntry {
161    Write {
162        body_hash: String,
163        len: u64,
164        created_at_ms: i64,
165    },
166    Delete {
167        recursive: bool,
168        created_at_ms: i64,
169    },
170}
171
172impl StagedEntry {
173    fn created_at_ms(&self) -> i64 {
174        match self {
175            Self::Write { created_at_ms, .. } | Self::Delete { created_at_ms, .. } => {
176                *created_at_ms
177            }
178        }
179    }
180
181    fn body_len(&self) -> u64 {
182        match self {
183            Self::Write { len, .. } => *len,
184            Self::Delete { .. } => 0,
185        }
186    }
187}
188
189#[derive(Clone, Debug)]
190struct SessionState {
191    session_id: String,
192    mode: FsMode,
193    root: PathBuf,
194    entries: BTreeMap<PathBuf, StagedEntry>,
195}
196
197#[derive(Clone, Debug)]
198pub(crate) struct WriteOutcome {
199    pub(crate) created: bool,
200    pub(crate) bytes_written: usize,
201}
202
203#[derive(Clone, Debug)]
204pub(crate) struct OverlayDirEntry {
205    pub(crate) name: String,
206    pub(crate) is_dir: bool,
207    pub(crate) is_symlink: bool,
208    pub(crate) size: u64,
209}
210
211/// Summary of staged filesystem changes for one session.
212#[derive(Clone, Debug)]
213pub struct StagedStatus {
214    /// Pending path changes, sorted by path.
215    pub pending_writes: Vec<PendingWrite>,
216    /// Bytes stored in staged write bodies.
217    pub total_bytes_pending: u64,
218    /// Age in milliseconds of the oldest pending change, or 0 when empty.
219    pub oldest_pending_age_ms: i64,
220}
221
222#[derive(Clone, Debug)]
223/// One pending staged filesystem change.
224pub struct PendingWrite {
225    /// Absolute path affected by this staged change.
226    pub path: String,
227    /// Change kind (`write`, `delete`, or reserved future `move`).
228    pub kind: &'static str,
229    /// Bytes the final staged view adds at this path.
230    pub bytes_added: u64,
231    /// Bytes the final staged view removes at this path.
232    pub bytes_removed: u64,
233}
234
235/// Result returned after changing a session's filesystem mode.
236#[derive(Clone, Debug)]
237pub struct SetModeResult {
238    /// Mode active before the change.
239    pub previous_mode: FsMode,
240}
241
242/// Result returned after applying staged changes to disk.
243#[derive(Clone, Debug)]
244pub struct CommitResult {
245    /// Paths successfully applied to disk.
246    pub committed_paths: Vec<String>,
247    /// Paths that failed to apply, with human-readable reasons.
248    pub failed_paths_with_reasons: Vec<(String, String)>,
249}
250
251/// Result returned after dropping staged changes.
252#[derive(Clone, Debug)]
253pub struct DiscardResult {
254    /// Paths whose staged entries were removed.
255    pub discarded_paths: Vec<String>,
256}
257
258static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionState>>> = OnceLock::new();
259
260fn sessions() -> &'static Mutex<BTreeMap<String, SessionState>> {
261    SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
262}
263
264/// Remember the workspace root associated with a live session.
265///
266/// ACP calls this when a prompt starts so Harn code can call
267/// `hostlib_fs_set_mode({session_id, mode})` without also passing a root.
268pub fn configure_session_root(session_id: &str, root: &Path) {
269    if session_id.trim().is_empty() {
270        return;
271    }
272    let root = normalize_logical(root);
273    let mut guard = sessions()
274        .lock()
275        .expect("hostlib fs session mutex poisoned");
276    match guard.get_mut(session_id) {
277        Some(state) if state.entries.is_empty() => {
278            state.root = root;
279        }
280        Some(_) => {}
281        None => {
282            let state = load_state(session_id, Some(root.clone())).unwrap_or(SessionState {
283                session_id: session_id.to_string(),
284                mode: FsMode::Immediate,
285                root,
286                entries: BTreeMap::new(),
287            });
288            guard.insert(session_id.to_string(), state);
289        }
290    }
291}
292
293/// Set a session's filesystem mode.
294pub fn set_mode(
295    session_id: &str,
296    mode: FsMode,
297    root: Option<&Path>,
298) -> Result<SetModeResult, HostlibError> {
299    validate_session_id(SET_MODE_BUILTIN, session_id)?;
300    let mut guard = sessions()
301        .lock()
302        .expect("hostlib fs session mutex poisoned");
303    let mut state = state_for_locked(&mut guard, session_id, root.map(normalize_logical))?;
304    let previous_mode = state.mode;
305    state.mode = mode;
306    persist_state(&state, "set_mode", None).map_err(|err| HostlibError::Backend {
307        builtin: SET_MODE_BUILTIN,
308        message: err,
309    })?;
310    guard.insert(session_id.to_string(), state);
311    Ok(SetModeResult { previous_mode })
312}
313
314/// Return the staged status for a session.
315pub fn staged_status(session_id: &str) -> Result<StagedStatus, HostlibError> {
316    validate_session_id(STATUS_BUILTIN, session_id)?;
317    let mut guard = sessions()
318        .lock()
319        .expect("hostlib fs session mutex poisoned");
320    let state = state_for_locked(&mut guard, session_id, None)?;
321    let status = status_from_state(&state);
322    guard.insert(session_id.to_string(), state);
323    Ok(status)
324}
325
326/// Commit staged changes for all paths or for a filtered path list.
327pub fn commit_staged(session_id: &str, paths: &[String]) -> Result<CommitResult, HostlibError> {
328    validate_session_id(COMMIT_BUILTIN, session_id)?;
329    let mut guard = sessions()
330        .lock()
331        .expect("hostlib fs session mutex poisoned");
332    let mut state = state_for_locked(&mut guard, session_id, None)?;
333    let selected = selected_paths(&state, paths);
334    let mut committed_paths = Vec::new();
335    let mut failed_paths_with_reasons = Vec::new();
336
337    for path in selected {
338        let Some(entry) = state.entries.get(&path).cloned() else {
339            continue;
340        };
341        let path_label = path.to_string_lossy().into_owned();
342        // The overlay always lives inside the workspace, but commit flushes
343        // to the *target* working-tree path. Enforce workspace-root scope
344        // against that target so a staged entry — possibly persisted under
345        // a looser policy in an earlier session — can never write outside
346        // the roots active at commit time.
347        let access = match entry {
348            StagedEntry::Write { .. } => FsAccess::Write,
349            StagedEntry::Delete { .. } => FsAccess::Delete,
350        };
351        if let Err(violation) = check_fs_path_scope(&path, access) {
352            failed_paths_with_reasons.push((path_label, violation.message(COMMIT_BUILTIN)));
353            continue;
354        }
355        match commit_entry(&state, &path, &entry) {
356            Ok(()) => {
357                state.entries.remove(&path);
358                committed_paths.push(path_label);
359            }
360            Err(reason) => failed_paths_with_reasons.push((path_label, reason)),
361        }
362    }
363
364    persist_state(&state, "commit_staged", None).map_err(|err| HostlibError::Backend {
365        builtin: COMMIT_BUILTIN,
366        message: err,
367    })?;
368    emit_staged_update(&state);
369    guard.insert(session_id.to_string(), state);
370    Ok(CommitResult {
371        committed_paths,
372        failed_paths_with_reasons,
373    })
374}
375
376/// Discard staged changes for all paths or for a filtered path list.
377pub fn discard_staged(session_id: &str, paths: &[String]) -> Result<DiscardResult, HostlibError> {
378    validate_session_id(DISCARD_BUILTIN, session_id)?;
379    let mut guard = sessions()
380        .lock()
381        .expect("hostlib fs session mutex poisoned");
382    let mut state = state_for_locked(&mut guard, session_id, None)?;
383    let selected = selected_paths(&state, paths);
384    let mut discarded_paths = Vec::new();
385    for path in selected {
386        if state.entries.remove(&path).is_some() {
387            discarded_paths.push(path.to_string_lossy().into_owned());
388        }
389    }
390    persist_state(&state, "discard_staged", None).map_err(|err| HostlibError::Backend {
391        builtin: DISCARD_BUILTIN,
392        message: err,
393    })?;
394    emit_staged_update(&state);
395    guard.insert(session_id.to_string(), state);
396    Ok(DiscardResult { discarded_paths })
397}
398
399/// Remove all persisted staged-fs state for a caller-owned throw-away session.
400///
401/// Normal agent sessions keep their manifest after `discard_staged` so hosts can
402/// continue reporting session state. Transient dry-run sessions own their ids,
403/// though, and should remove both the in-memory entry and on-disk overlay after
404/// their preview is rendered.
405pub fn remove_session_state(session_id: &str, root: Option<&Path>) -> Result<(), HostlibError> {
406    validate_session_id(DISCARD_BUILTIN, session_id)?;
407    let mut guard = sessions()
408        .lock()
409        .expect("hostlib fs session mutex poisoned");
410    let state = match guard.remove(session_id) {
411        Some(state) => state,
412        None => load_state(session_id, root.map(normalize_logical)).map_err(|err| {
413            HostlibError::Backend {
414                builtin: DISCARD_BUILTIN,
415                message: err,
416            }
417        })?,
418    };
419    let dir = session_dir(&state.root, &state.session_id);
420    match stdfs::remove_dir_all(&dir) {
421        Ok(()) => Ok(()),
422        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
423        Err(err) => Err(HostlibError::Backend {
424            builtin: DISCARD_BUILTIN,
425            message: format!("remove staged session {}: {err}", dir.display()),
426        }),
427    }
428}
429
430pub(crate) fn read(
431    path: &Path,
432    explicit_session_id: Option<&str>,
433) -> Option<std::io::Result<Vec<u8>>> {
434    let session_id = active_session_id(explicit_session_id)?;
435    let mut guard = sessions()
436        .lock()
437        .expect("hostlib fs session mutex poisoned");
438    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
439    let result = if state.mode == FsMode::Staged {
440        overlay_read(&state, path)
441    } else {
442        None
443    };
444    guard.insert(session_id, state);
445    result
446}
447
448pub(crate) fn read_to_string(
449    path: &Path,
450    explicit_session_id: Option<&str>,
451) -> Option<std::io::Result<String>> {
452    read(path, explicit_session_id).map(|result| {
453        result.and_then(|bytes| {
454            String::from_utf8(bytes).map_err(|err| {
455                std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string())
456            })
457        })
458    })
459}
460
461pub(crate) fn read_dir(
462    path: &Path,
463    explicit_session_id: Option<&str>,
464) -> Option<std::io::Result<Vec<OverlayDirEntry>>> {
465    let session_id = active_session_id(explicit_session_id)?;
466    let mut guard = sessions()
467        .lock()
468        .expect("hostlib fs session mutex poisoned");
469    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
470    let result = if state.mode == FsMode::Staged {
471        Some(overlay_read_dir(&state, path))
472    } else {
473        None
474    };
475    guard.insert(session_id, state);
476    result
477}
478
479pub(crate) fn stage_write_or_none(
480    builtin: &'static str,
481    path: &Path,
482    bytes: &[u8],
483    create_parents: bool,
484    overwrite: bool,
485    explicit_session_id: Option<&str>,
486) -> Result<Option<WriteOutcome>, HostlibError> {
487    let Some(session_id) = active_session_id(explicit_session_id) else {
488        return Ok(None);
489    };
490    let mut guard = sessions()
491        .lock()
492        .expect("hostlib fs session mutex poisoned");
493    let mut state = state_for_locked(&mut guard, &session_id, None)?;
494    if state.mode != FsMode::Staged {
495        guard.insert(session_id, state);
496        return Ok(None);
497    }
498
499    let key = normalize_logical(path);
500    let existed = overlay_exists(&state, &key);
501    if existed && !overwrite {
502        guard.insert(session_id, state);
503        return Err(HostlibError::Backend {
504            builtin,
505            message: format!("`{}` exists and overwrite=false", key.display()),
506        });
507    }
508    if !create_parents && !parent_exists(&state, &key) {
509        guard.insert(session_id, state);
510        return Err(HostlibError::Backend {
511            builtin,
512            message: format!("parent directory for `{}` does not exist", key.display()),
513        });
514    }
515
516    let hash = write_body(&state, bytes).map_err(|err| HostlibError::Backend {
517        builtin,
518        message: err,
519    })?;
520    state.entries.insert(
521        key.clone(),
522        StagedEntry::Write {
523            body_hash: hash,
524            len: bytes.len() as u64,
525            created_at_ms: now_ms(),
526        },
527    );
528    persist_state(&state, "write", Some(&key)).map_err(|err| HostlibError::Backend {
529        builtin,
530        message: err,
531    })?;
532    emit_staged_update(&state);
533    guard.insert(session_id, state);
534    Ok(Some(WriteOutcome {
535        created: !existed,
536        bytes_written: bytes.len(),
537    }))
538}
539
540pub(crate) fn stage_delete_or_none(
541    builtin: &'static str,
542    path: &Path,
543    recursive: bool,
544    explicit_session_id: Option<&str>,
545) -> Result<Option<bool>, HostlibError> {
546    let Some(session_id) = active_session_id(explicit_session_id) else {
547        return Ok(None);
548    };
549    let mut guard = sessions()
550        .lock()
551        .expect("hostlib fs session mutex poisoned");
552    let mut state = state_for_locked(&mut guard, &session_id, None)?;
553    if state.mode != FsMode::Staged {
554        guard.insert(session_id, state);
555        return Ok(None);
556    }
557
558    let key = normalize_logical(path);
559    let staged_targets = staged_paths_under(&state, &key);
560    let disk_exists = key.exists();
561    if !disk_exists && staged_targets.is_empty() {
562        guard.insert(session_id, state);
563        return Ok(Some(false));
564    }
565
566    if !disk_exists {
567        for staged in staged_targets {
568            state.entries.remove(&staged);
569        }
570    } else {
571        validate_delete_shape(builtin, &key, recursive)?;
572        for staged in staged_targets {
573            state.entries.remove(&staged);
574        }
575        state.entries.insert(
576            key.clone(),
577            StagedEntry::Delete {
578                recursive,
579                created_at_ms: now_ms(),
580            },
581        );
582    }
583    persist_state(&state, "delete", Some(&key)).map_err(|err| HostlibError::Backend {
584        builtin,
585        message: err,
586    })?;
587    emit_staged_update(&state);
588    guard.insert(session_id, state);
589    Ok(Some(true))
590}
591
592/// Outcome of one [`safe_text_patch`] call. `applied` says whether the
593/// on-disk (or staged-overlay) bytes changed; `result` carries the
594/// structured discriminant used by the wire/JSON shape.
595#[derive(Clone, Debug)]
596pub struct SafeTextPatchOutcome {
597    /// Discriminant: `"applied"`, `"stale_base"`, or `"no_op"`.
598    pub result: SafeTextPatchResult,
599    /// `sha256:HEX` of the pre-image (overlay-aware) the call observed.
600    pub current_hash: String,
601    /// `sha256:HEX` of the requested post-image.
602    pub after_hash: String,
603    /// `true` when the file did not exist before the call.
604    pub created: bool,
605    /// Bytes written; `0` on `stale_base` or `no_op`.
606    pub bytes_written: usize,
607}
608
609/// Discriminant for a [`safe_text_patch`] outcome.
610#[derive(Clone, Copy, Debug, Eq, PartialEq)]
611pub enum SafeTextPatchResult {
612    /// Pre-image hash matched (or no expected hash supplied) and the
613    /// post-image differs from the pre-image — bytes were written.
614    Applied,
615    /// `expected_hash` did not match the observed pre-image hash; no
616    /// bytes were written. Callers should re-read and retry.
617    StaleBase,
618    /// Pre-image hash matched and the post-image equals the pre-image —
619    /// skipped the write to avoid spurious timestamps and overlay churn.
620    NoOp,
621}
622
623impl SafeTextPatchResult {
624    fn as_str(self) -> &'static str {
625        match self {
626            Self::Applied => "applied",
627            Self::StaleBase => "stale_base",
628            Self::NoOp => "no_op",
629        }
630    }
631}
632
633/// Format `bytes` as the `sha256:HEX` label used in `before_sha256` /
634/// `after_sha256` / `current_hash` / `expected_hash` everywhere in the
635/// safe-text-patch surface.
636fn hash_label(bytes: &[u8]) -> String {
637    format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
638}
639
640/// Atomic compare-and-swap-style text write.
641///
642/// Reads the current bytes at `path` through the staged-fs overlay (when a
643/// session is active) so concurrent agent edits see each other's pending
644/// writes. If `expected_hash` is supplied and differs from the observed
645/// `sha256:HEX`, returns `SafeTextPatchResult::StaleBase` without
646/// mutating any state. On a hash match the post-image is written through
647/// the same overlay path, keeping the read and the write atomic with
648/// respect to other staged-fs consumers in the same process.
649///
650/// Atomicity:
651///
652/// - When a session is in staged mode, the read, hash check, and write
653///   all happen under a single acquisition of the sessions mutex, so a
654///   sibling thread cannot stage a write into the window between the
655///   pre-image snapshot and the commit.
656/// - When the call routes through disk (no active session, or session in
657///   immediate mode), the write goes through an atomic rename-into-place
658///   so partial-write tearing is impossible. Cross-process races are
659///   intentionally out of scope — the staged-fs overlay is the
660///   collision-rejection layer.
661pub fn safe_text_patch(
662    path: &Path,
663    content: &str,
664    expected_hash: Option<&str>,
665    session_id: Option<&str>,
666    create_parents: bool,
667    overwrite: bool,
668) -> Result<SafeTextPatchOutcome, HostlibError> {
669    let new_bytes = content.as_bytes();
670    let after_hash = hash_label(new_bytes);
671
672    if let Some(outcome) = safe_text_patch_staged(
673        path,
674        new_bytes,
675        expected_hash,
676        session_id,
677        create_parents,
678        overwrite,
679        &after_hash,
680    )? {
681        return Ok(outcome);
682    }
683
684    safe_text_patch_disk(
685        path,
686        new_bytes,
687        expected_hash,
688        create_parents,
689        overwrite,
690        after_hash,
691    )
692}
693
694/// Atomic CAS path for a session in `staged` mode. Holds the sessions
695/// mutex through the entire read → hash → check → write so concurrent
696/// agents in the same process cannot race the snapshot. Returns `None`
697/// when no session is active or the session is in `immediate` mode, so
698/// the caller can fall through to the disk path.
699#[allow(clippy::too_many_arguments)]
700fn safe_text_patch_staged(
701    path: &Path,
702    new_bytes: &[u8],
703    expected_hash: Option<&str>,
704    session_id: Option<&str>,
705    create_parents: bool,
706    overwrite: bool,
707    after_hash: &str,
708) -> Result<Option<SafeTextPatchOutcome>, HostlibError> {
709    let Some(session) = active_session_id(session_id) else {
710        return Ok(None);
711    };
712    let mut guard = sessions()
713        .lock()
714        .expect("hostlib fs session mutex poisoned");
715    let mut state = state_for_locked(&mut guard, &session, None)?;
716    if state.mode != FsMode::Staged {
717        guard.insert(session, state);
718        return Ok(None);
719    }
720
721    let key = normalize_logical(path);
722    let (existing_bytes, existed) = match overlay_read(&state, path) {
723        Some(Ok(bytes)) => (bytes, true),
724        Some(Err(err)) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
725        Some(Err(err)) => {
726            guard.insert(session, state);
727            return Err(HostlibError::Backend {
728                builtin: SAFE_TEXT_PATCH_BUILTIN,
729                message: format!("read `{}`: {err}", path.display()),
730            });
731        }
732        None => match stdfs::read(path) {
733            Ok(bytes) => (bytes, true),
734            Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
735            Err(err) => {
736                guard.insert(session, state);
737                return Err(HostlibError::Backend {
738                    builtin: SAFE_TEXT_PATCH_BUILTIN,
739                    message: format!("read `{}`: {err}", path.display()),
740                });
741            }
742        },
743    };
744    let current_hash = hash_label(&existing_bytes);
745
746    if let Some(expected) = expected_hash {
747        if expected != current_hash {
748            guard.insert(session, state);
749            return Ok(Some(SafeTextPatchOutcome {
750                result: SafeTextPatchResult::StaleBase,
751                current_hash,
752                after_hash: after_hash.to_string(),
753                created: false,
754                bytes_written: 0,
755            }));
756        }
757    }
758
759    if existed && existing_bytes == new_bytes {
760        guard.insert(session, state);
761        return Ok(Some(SafeTextPatchOutcome {
762            result: SafeTextPatchResult::NoOp,
763            current_hash,
764            after_hash: after_hash.to_string(),
765            created: false,
766            bytes_written: 0,
767        }));
768    }
769
770    let overlay_existed = overlay_exists(&state, &key);
771    if overlay_existed && !overwrite {
772        guard.insert(session, state);
773        return Err(HostlibError::Backend {
774            builtin: SAFE_TEXT_PATCH_BUILTIN,
775            message: format!("`{}` exists and overwrite=false", key.display()),
776        });
777    }
778    if !create_parents && !parent_exists(&state, &key) {
779        guard.insert(session, state);
780        return Err(HostlibError::Backend {
781            builtin: SAFE_TEXT_PATCH_BUILTIN,
782            message: format!("parent directory for `{}` does not exist", key.display()),
783        });
784    }
785
786    let body_hash = write_body(&state, new_bytes).map_err(|err| HostlibError::Backend {
787        builtin: SAFE_TEXT_PATCH_BUILTIN,
788        message: err,
789    })?;
790    state.entries.insert(
791        key.clone(),
792        StagedEntry::Write {
793            body_hash,
794            len: new_bytes.len() as u64,
795            created_at_ms: now_ms(),
796        },
797    );
798    persist_state(&state, "safe_text_patch", Some(&key)).map_err(|err| HostlibError::Backend {
799        builtin: SAFE_TEXT_PATCH_BUILTIN,
800        message: err,
801    })?;
802    emit_staged_update(&state);
803    guard.insert(session, state);
804
805    Ok(Some(SafeTextPatchOutcome {
806        result: SafeTextPatchResult::Applied,
807        current_hash,
808        after_hash: after_hash.to_string(),
809        created: !existed,
810        bytes_written: new_bytes.len(),
811    }))
812}
813
814/// Disk path for callers without an active staged session. Uses
815/// `atomic_write` so the post-image lands via rename-into-place rather
816/// than an open/truncate/write/close sequence — readers either see the
817/// pre-image or the post-image, never a torn write.
818fn safe_text_patch_disk(
819    path: &Path,
820    new_bytes: &[u8],
821    expected_hash: Option<&str>,
822    create_parents: bool,
823    overwrite: bool,
824    after_hash: String,
825) -> Result<SafeTextPatchOutcome, HostlibError> {
826    let (existing_bytes, existed) = match stdfs::read(path) {
827        Ok(bytes) => (bytes, true),
828        Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
829        Err(err) => {
830            return Err(HostlibError::Backend {
831                builtin: SAFE_TEXT_PATCH_BUILTIN,
832                message: format!("read `{}`: {err}", path.display()),
833            });
834        }
835    };
836    let current_hash = hash_label(&existing_bytes);
837
838    if let Some(expected) = expected_hash {
839        if expected != current_hash {
840            return Ok(SafeTextPatchOutcome {
841                result: SafeTextPatchResult::StaleBase,
842                current_hash,
843                after_hash,
844                created: false,
845                bytes_written: 0,
846            });
847        }
848    }
849
850    if existed && existing_bytes == new_bytes {
851        return Ok(SafeTextPatchOutcome {
852            result: SafeTextPatchResult::NoOp,
853            current_hash,
854            after_hash,
855            created: false,
856            bytes_written: 0,
857        });
858    }
859    if existed && !overwrite {
860        return Err(HostlibError::Backend {
861            builtin: SAFE_TEXT_PATCH_BUILTIN,
862            message: format!("`{}` exists and overwrite=false", path.display()),
863        });
864    }
865    if !create_parents {
866        if let Some(parent) = path.parent() {
867            if !parent.as_os_str().is_empty() && !parent.is_dir() {
868                return Err(HostlibError::Backend {
869                    builtin: SAFE_TEXT_PATCH_BUILTIN,
870                    message: format!(
871                        "parent directory for `{}` does not exist (pass create_parents=true to mkdir)",
872                        path.display()
873                    ),
874                });
875            }
876        }
877    }
878
879    crate::fs_snapshot::auto_capture_for_write(SAFE_TEXT_PATCH_BUILTIN, path);
880    atomic_write(path, new_bytes).map_err(|err| HostlibError::Backend {
881        builtin: SAFE_TEXT_PATCH_BUILTIN,
882        message: format!("write `{}`: {err}", path.display()),
883    })?;
884
885    Ok(SafeTextPatchOutcome {
886        result: SafeTextPatchResult::Applied,
887        current_hash,
888        after_hash,
889        created: !existed,
890        bytes_written: new_bytes.len(),
891    })
892}
893
894/// Read the pre-image through the staged-fs overlay (when active),
895/// falling back to disk. Returns `(bytes, existed_on_disk_or_overlay)`.
896/// `builtin` is the caller's tag — used so backend errors point at the
897/// right builtin name in diagnostics.
898fn read_existing(
899    builtin: &'static str,
900    path: &Path,
901    session_id: Option<&str>,
902) -> Result<(Vec<u8>, bool), HostlibError> {
903    if let Some(result) = read(path, session_id) {
904        return match result {
905            Ok(bytes) => Ok((bytes, true)),
906            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
907            Err(err) => Err(HostlibError::Backend {
908                builtin,
909                message: format!("read `{}`: {err}", path.display()),
910            }),
911        };
912    }
913    match stdfs::read(path) {
914        Ok(bytes) => Ok((bytes, true)),
915        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
916        Err(err) => Err(HostlibError::Backend {
917            builtin,
918            message: format!("read `{}`: {err}", path.display()),
919        }),
920    }
921}
922
923fn read_text_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
924    let raw = dict_arg(READ_TEXT_BUILTIN, args)?;
925    let dict = raw.as_ref();
926    let path_str = require_string(READ_TEXT_BUILTIN, dict, "path")?;
927    let session_id = optional_string(READ_TEXT_BUILTIN, dict, "session_id")?;
928    let path = Path::new(&path_str);
929    enforce_path_scope(READ_TEXT_BUILTIN, path, FsAccess::Read)?;
930
931    let (bytes, existed) = read_existing(READ_TEXT_BUILTIN, path, session_id.as_deref())?;
932    let hash = hash_label(&bytes);
933    let content = match std::str::from_utf8(&bytes) {
934        Ok(s) => s.to_string(),
935        Err(err) => {
936            return Err(HostlibError::Backend {
937                builtin: READ_TEXT_BUILTIN,
938                message: format!("`{path_str}` is not valid UTF-8: {err}"),
939            });
940        }
941    };
942    let bytes_len = bytes.len() as i64;
943    Ok(build_dict([
944        ("path", str_value(&path_str)),
945        ("content", str_value(&content)),
946        ("sha256", str_value(&hash)),
947        ("size", VmValue::Int(bytes_len)),
948        ("exists", VmValue::Bool(existed)),
949    ]))
950}
951
952fn safe_text_patch_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
953    let raw = dict_arg(SAFE_TEXT_PATCH_BUILTIN, args)?;
954    let dict = raw.as_ref();
955
956    let path_str = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "path")?;
957    let content = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "content")?;
958    let expected_hash = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "expected_hash")?;
959    let session_id = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "session_id")?;
960    let create_parents = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "create_parents", true)?;
961    let overwrite = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "overwrite", true)?;
962
963    enforce_path_scope(
964        SAFE_TEXT_PATCH_BUILTIN,
965        Path::new(&path_str),
966        FsAccess::Write,
967    )?;
968    let outcome = safe_text_patch(
969        Path::new(&path_str),
970        &content,
971        expected_hash.as_deref(),
972        session_id.as_deref(),
973        create_parents,
974        overwrite,
975    )?;
976
977    let entries: Vec<(&'static str, VmValue)> = vec![
978        ("path", str_value(&path_str)),
979        ("result", str_value(outcome.result.as_str())),
980        (
981            "applied",
982            VmValue::Bool(outcome.result == SafeTextPatchResult::Applied),
983        ),
984        (
985            "stale_base",
986            VmValue::Bool(outcome.result == SafeTextPatchResult::StaleBase),
987        ),
988        ("current_hash", str_value(&outcome.current_hash)),
989        ("before_sha256", str_value(&outcome.current_hash)),
990        ("after_sha256", str_value(&outcome.after_hash)),
991        ("created", VmValue::Bool(outcome.created)),
992        ("bytes_written", VmValue::Int(outcome.bytes_written as i64)),
993        (
994            "expected_hash",
995            match expected_hash.as_deref() {
996                Some(hash) => str_value(hash),
997                None => VmValue::Nil,
998            },
999        ),
1000    ];
1001    Ok(build_dict(entries))
1002}
1003
1004fn emit_safe_text_patch_result_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1005    let raw = dict_arg(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, args)?;
1006    let dict = raw.as_ref();
1007
1008    let path = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "path")?;
1009    let result = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "result")?;
1010    let hunks_count = optional_int(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "hunks_count", 0)?;
1011    let bytes_written = optional_int(
1012        EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1013        dict,
1014        "bytes_written",
1015        0,
1016    )?;
1017    let failed_hunk_index = match dict.get("failed_hunk_index") {
1018        None | Some(VmValue::Nil) => None,
1019        Some(VmValue::Int(n)) if *n >= 0 => Some(*n as usize),
1020        Some(other) => {
1021            return Err(HostlibError::InvalidParameter {
1022                builtin: EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1023                param: "failed_hunk_index",
1024                message: format!("expected non-negative integer, got {}", other.type_name()),
1025            });
1026        }
1027    };
1028    let session_id = optional_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "session_id")?
1029        .or_else(harn_vm::agent_sessions::current_session_id);
1030
1031    if let Some(session_id) = session_id.filter(|s| !s.trim().is_empty()) {
1032        harn_vm::agent_events::emit_event(&AgentEvent::SafeTextPatchResult {
1033            session_id,
1034            path,
1035            result,
1036            hunks_count: hunks_count.max(0) as usize,
1037            bytes_written: bytes_written.max(0) as u64,
1038            failed_hunk_index,
1039        });
1040        Ok(VmValue::Bool(true))
1041    } else {
1042        // Silently no-op when no session is active — telemetry without a
1043        // session has nowhere to route. Caller can opt in by always
1044        // passing session_id explicitly.
1045        Ok(VmValue::Bool(false))
1046    }
1047}
1048
1049fn set_mode_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1050    let raw = dict_arg(SET_MODE_BUILTIN, args)?;
1051    let dict = raw.as_ref();
1052    let session_id = require_string(SET_MODE_BUILTIN, dict, "session_id")?;
1053    let mode = FsMode::parse(
1054        SET_MODE_BUILTIN,
1055        &require_string(SET_MODE_BUILTIN, dict, "mode")?,
1056    )?;
1057    let root = optional_string(SET_MODE_BUILTIN, dict, "root")?.map(PathBuf::from);
1058    let result = set_mode(&session_id, mode, root.as_deref())?;
1059    Ok(build_dict([(
1060        "previous_mode",
1061        str_value(result.previous_mode.as_str()),
1062    )]))
1063}
1064
1065fn staged_status_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1066    let raw = dict_arg(STATUS_BUILTIN, args)?;
1067    let session_id = require_string(STATUS_BUILTIN, raw.as_ref(), "session_id")?;
1068    Ok(status_to_value(staged_status(&session_id)?))
1069}
1070
1071fn commit_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1072    let raw = dict_arg(COMMIT_BUILTIN, args)?;
1073    let dict = raw.as_ref();
1074    let session_id = require_string(COMMIT_BUILTIN, dict, "session_id")?;
1075    let paths = optional_string_list(COMMIT_BUILTIN, dict, "paths")?;
1076    Ok(commit_result_to_value(commit_staged(&session_id, &paths)?))
1077}
1078
1079fn discard_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1080    let raw = dict_arg(DISCARD_BUILTIN, args)?;
1081    let dict = raw.as_ref();
1082    let session_id = require_string(DISCARD_BUILTIN, dict, "session_id")?;
1083    let paths = optional_string_list(DISCARD_BUILTIN, dict, "paths")?;
1084    Ok(discard_result_to_value(discard_staged(
1085        &session_id,
1086        &paths,
1087    )?))
1088}
1089
1090fn state_for_locked(
1091    guard: &mut BTreeMap<String, SessionState>,
1092    session_id: &str,
1093    root: Option<PathBuf>,
1094) -> Result<SessionState, HostlibError> {
1095    if let Some(existing) = guard.get(session_id) {
1096        let mut state = existing.clone();
1097        if let Some(root) = root {
1098            if state.entries.is_empty() {
1099                state.root = root;
1100            }
1101        }
1102        return Ok(state);
1103    }
1104    let state = load_state(session_id, root).map_err(|err| HostlibError::Backend {
1105        builtin: SET_MODE_BUILTIN,
1106        message: err,
1107    })?;
1108    Ok(state)
1109}
1110
1111fn load_state(session_id: &str, root: Option<PathBuf>) -> Result<SessionState, String> {
1112    let root = root.unwrap_or_else(default_root);
1113    let manifest_path = manifest_path(&root, session_id);
1114    if manifest_path.exists() {
1115        let text = stdfs::read_to_string(&manifest_path)
1116            .map_err(|err| format!("read {}: {err}", manifest_path.display()))?;
1117        let manifest: Manifest = serde_json::from_str(&text)
1118            .map_err(|err| format!("parse {}: {err}", manifest_path.display()))?;
1119        if manifest.version != MANIFEST_VERSION {
1120            return Err(format!(
1121                "unsupported staged fs manifest version {} in {}",
1122                manifest.version,
1123                manifest_path.display()
1124            ));
1125        }
1126        if manifest.session_id != session_id {
1127            return Err(format!(
1128                "staged fs manifest session id mismatch in {}",
1129                manifest_path.display()
1130            ));
1131        }
1132        return Ok(SessionState {
1133            session_id: manifest.session_id,
1134            mode: manifest.mode,
1135            root: normalize_logical(Path::new(&manifest.root)),
1136            entries: manifest
1137                .entries
1138                .into_iter()
1139                .map(|(path, entry)| (normalize_logical(Path::new(&path)), entry))
1140                .collect(),
1141        });
1142    }
1143    Ok(SessionState {
1144        session_id: session_id.to_string(),
1145        mode: FsMode::Immediate,
1146        root,
1147        entries: BTreeMap::new(),
1148    })
1149}
1150
1151fn persist_state(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1152    let dir = session_dir(&state.root, &state.session_id);
1153    stdfs::create_dir_all(dir.join("bodies"))
1154        .map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1155    let manifest = Manifest {
1156        version: MANIFEST_VERSION,
1157        session_id: state.session_id.clone(),
1158        mode: state.mode,
1159        root: state.root.to_string_lossy().into_owned(),
1160        entries: state
1161            .entries
1162            .iter()
1163            .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
1164            .collect(),
1165    };
1166    let bytes = serde_json::to_vec_pretty(&manifest)
1167        .map_err(|err| format!("serialize staged manifest: {err}"))?;
1168    atomic_write(&manifest_path(&state.root, &state.session_id), &bytes)?;
1169    append_journal(state, op, path)?;
1170    prune_unreferenced_bodies(state);
1171    Ok(())
1172}
1173
1174fn append_journal(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1175    let dir = session_dir(&state.root, &state.session_id);
1176    stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1177    let line = serde_json::to_string(&serde_json::json!({
1178        "ts_ms": now_ms(),
1179        "op": op,
1180        "path": path.map(|path| path.to_string_lossy().into_owned()),
1181        "pending_count": state.entries.len(),
1182    }))
1183    .map_err(|err| format!("serialize staged journal: {err}"))?;
1184    let mut file = stdfs::OpenOptions::new()
1185        .create(true)
1186        .append(true)
1187        .open(dir.join("journal.jsonl"))
1188        .map_err(|err| format!("open staged journal: {err}"))?;
1189    writeln!(file, "{line}").map_err(|err| format!("write staged journal: {err}"))
1190}
1191
1192fn write_body(state: &SessionState, bytes: &[u8]) -> Result<String, String> {
1193    let hash = hex::encode(Sha256::digest(bytes));
1194    let path = session_dir(&state.root, &state.session_id)
1195        .join("bodies")
1196        .join(&hash);
1197    if !path.exists() {
1198        atomic_write(&path, bytes)?;
1199    }
1200    Ok(hash)
1201}
1202
1203fn read_body(state: &SessionState, hash: &str) -> std::io::Result<Vec<u8>> {
1204    stdfs::read(
1205        session_dir(&state.root, &state.session_id)
1206            .join("bodies")
1207            .join(hash),
1208    )
1209}
1210
1211fn prune_unreferenced_bodies(state: &SessionState) {
1212    let live: BTreeSet<String> = state
1213        .entries
1214        .values()
1215        .filter_map(|entry| match entry {
1216            StagedEntry::Write { body_hash, .. } => Some(body_hash.clone()),
1217            StagedEntry::Delete { .. } => None,
1218        })
1219        .collect();
1220    let body_dir = session_dir(&state.root, &state.session_id).join("bodies");
1221    let Ok(entries) = stdfs::read_dir(&body_dir) else {
1222        return;
1223    };
1224    for entry in entries.flatten() {
1225        let name = entry.file_name().to_string_lossy().into_owned();
1226        if !live.contains(&name) {
1227            let _ = stdfs::remove_file(entry.path());
1228        }
1229    }
1230}
1231
1232fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
1233    if let Some(parent) = path.parent() {
1234        stdfs::create_dir_all(parent)
1235            .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
1236    }
1237    let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
1238    stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
1239    match stdfs::rename(&tmp, path) {
1240        Ok(()) => Ok(()),
1241        Err(err) => {
1242            let _ = stdfs::remove_file(path);
1243            stdfs::rename(&tmp, path).map_err(|retry| {
1244                format!(
1245                    "rename {} to {}: {err}; retry: {retry}",
1246                    tmp.display(),
1247                    path.display()
1248                )
1249            })
1250        }
1251    }
1252}
1253
1254fn commit_entry(state: &SessionState, path: &Path, entry: &StagedEntry) -> Result<(), String> {
1255    match entry {
1256        StagedEntry::Write { body_hash, .. } => {
1257            let bytes = read_body(state, body_hash)
1258                .map_err(|err| format!("read staged body for {}: {err}", path.display()))?;
1259            atomic_write(path, &bytes)
1260        }
1261        StagedEntry::Delete { recursive, .. } => match stdfs::symlink_metadata(path) {
1262            Ok(metadata) if metadata.is_dir() => {
1263                if *recursive {
1264                    stdfs::remove_dir_all(path)
1265                        .map_err(|err| format!("remove_dir_all {}: {err}", path.display()))
1266                } else {
1267                    stdfs::remove_dir(path)
1268                        .map_err(|err| format!("remove_dir {}: {err}", path.display()))
1269                }
1270            }
1271            Ok(_) => stdfs::remove_file(path)
1272                .map_err(|err| format!("remove_file {}: {err}", path.display())),
1273            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1274            Err(err) => Err(format!("stat {}: {err}", path.display())),
1275        },
1276    }
1277}
1278
1279fn overlay_read(state: &SessionState, path: &Path) -> Option<std::io::Result<Vec<u8>>> {
1280    let key = normalize_logical(path);
1281    if let Some(entry) = state.entries.get(&key) {
1282        return Some(match entry {
1283            StagedEntry::Write { body_hash, .. } => read_body(state, body_hash),
1284            StagedEntry::Delete { .. } => Err(not_found(&key)),
1285        });
1286    }
1287    if deleted_ancestor(state, &key) {
1288        return Some(Err(not_found(&key)));
1289    }
1290    None
1291}
1292
1293fn overlay_read_dir(state: &SessionState, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
1294    let dir_key = normalize_logical(path);
1295    if matches!(state.entries.get(&dir_key), Some(StagedEntry::Write { .. }))
1296        || deleted_ancestor(state, &dir_key)
1297        || matches!(
1298            state.entries.get(&dir_key),
1299            Some(StagedEntry::Delete { .. })
1300        )
1301    {
1302        return Err(not_found(&dir_key));
1303    }
1304    if !path.exists() && !has_staged_descendant(state, &dir_key) {
1305        return Err(not_found(&dir_key));
1306    }
1307
1308    let mut entries: BTreeMap<String, OverlayDirEntry> = BTreeMap::new();
1309    if path.exists() {
1310        for entry in stdfs::read_dir(path)? {
1311            let entry = entry?;
1312            let name = entry.file_name().to_string_lossy().into_owned();
1313            let file_type = entry.file_type().ok();
1314            let metadata = entry.metadata().ok();
1315            entries.insert(
1316                name.clone(),
1317                OverlayDirEntry {
1318                    name,
1319                    is_dir: file_type.is_some_and(|ty| ty.is_dir()),
1320                    is_symlink: file_type.is_some_and(|ty| ty.is_symlink()),
1321                    size: metadata.map(|m| m.len()).unwrap_or(0),
1322                },
1323            );
1324        }
1325    }
1326
1327    for (path, entry) in &state.entries {
1328        let Some(name) = overlay_child_name(path, &dir_key) else {
1329            continue;
1330        };
1331        match entry {
1332            StagedEntry::Write { len, .. } => {
1333                let is_dir = path.parent() != Some(dir_key.as_path());
1334                entries.insert(
1335                    name.clone(),
1336                    OverlayDirEntry {
1337                        name,
1338                        is_dir,
1339                        is_symlink: false,
1340                        size: if is_dir { 0 } else { *len },
1341                    },
1342                );
1343            }
1344            StagedEntry::Delete { .. } => {
1345                if path.parent() == Some(dir_key.as_path()) {
1346                    entries.remove(&name);
1347                }
1348            }
1349        }
1350    }
1351
1352    Ok(entries.into_values().collect())
1353}
1354
1355fn overlay_child_name(path: &Path, dir: &Path) -> Option<String> {
1356    let suffix = path.strip_prefix(dir).ok()?;
1357    let mut components = suffix.components();
1358    let first = components.next()?;
1359    match first {
1360        Component::Normal(name) => Some(name.to_string_lossy().into_owned()),
1361        _ => None,
1362    }
1363}
1364
1365fn overlay_exists(state: &SessionState, path: &Path) -> bool {
1366    if let Some(entry) = state.entries.get(path) {
1367        return matches!(entry, StagedEntry::Write { .. });
1368    }
1369    if deleted_ancestor(state, path) {
1370        return false;
1371    }
1372    if has_staged_descendant(state, path) {
1373        return true;
1374    }
1375    path.exists()
1376}
1377
1378fn parent_exists(state: &SessionState, path: &Path) -> bool {
1379    let Some(parent) = path.parent() else {
1380        return true;
1381    };
1382    if parent.as_os_str().is_empty() {
1383        return true;
1384    }
1385    if let Some(entry) = state.entries.get(parent) {
1386        return !matches!(entry, StagedEntry::Delete { .. });
1387    }
1388    if deleted_ancestor(state, parent) {
1389        return false;
1390    }
1391    if has_staged_descendant(state, parent) {
1392        return true;
1393    }
1394    parent.is_dir()
1395}
1396
1397fn deleted_ancestor(state: &SessionState, path: &Path) -> bool {
1398    state.entries.iter().any(|(candidate, entry)| {
1399        matches!(entry, StagedEntry::Delete { .. })
1400            && path != candidate.as_path()
1401            && path.starts_with(candidate)
1402    })
1403}
1404
1405fn has_staged_descendant(state: &SessionState, path: &Path) -> bool {
1406    state.entries.iter().any(|(candidate, entry)| {
1407        matches!(entry, StagedEntry::Write { .. })
1408            && candidate != path
1409            && candidate.starts_with(path)
1410    })
1411}
1412
1413fn staged_paths_under(state: &SessionState, path: &Path) -> Vec<PathBuf> {
1414    state
1415        .entries
1416        .keys()
1417        .filter(|candidate| *candidate == path || candidate.starts_with(path))
1418        .cloned()
1419        .collect()
1420}
1421
1422fn validate_delete_shape(
1423    builtin: &'static str,
1424    path: &Path,
1425    recursive: bool,
1426) -> Result<(), HostlibError> {
1427    let Ok(metadata) = stdfs::symlink_metadata(path) else {
1428        return Ok(());
1429    };
1430    if metadata.is_dir() && !recursive {
1431        let mut entries = stdfs::read_dir(path).map_err(|err| HostlibError::Backend {
1432            builtin,
1433            message: format!("read_dir `{}`: {err}", path.display()),
1434        })?;
1435        if entries.next().is_some() {
1436            return Err(HostlibError::Backend {
1437                builtin,
1438                message: format!(
1439                    "remove_dir `{}` (pass recursive=true to delete non-empty dirs): directory not empty",
1440                    path.display()
1441                ),
1442            });
1443        }
1444    }
1445    Ok(())
1446}
1447
1448fn status_from_state(state: &SessionState) -> StagedStatus {
1449    let now = now_ms();
1450    let mut pending_writes = Vec::new();
1451    let mut total_bytes_pending = 0u64;
1452    let mut oldest = None;
1453    for (path, entry) in &state.entries {
1454        total_bytes_pending = total_bytes_pending.saturating_add(entry.body_len());
1455        oldest = Some(oldest.map_or(entry.created_at_ms(), |old: i64| {
1456            old.min(entry.created_at_ms())
1457        }));
1458        let (kind, bytes_added, bytes_removed) = match entry {
1459            StagedEntry::Write { len, .. } => ("write", *len, disk_size(path).unwrap_or(0)),
1460            StagedEntry::Delete { .. } => ("delete", 0, disk_size(path).unwrap_or(0)),
1461        };
1462        pending_writes.push(PendingWrite {
1463            path: path.to_string_lossy().into_owned(),
1464            kind,
1465            bytes_added,
1466            bytes_removed,
1467        });
1468    }
1469    StagedStatus {
1470        pending_writes,
1471        total_bytes_pending,
1472        oldest_pending_age_ms: oldest.map(|old| now.saturating_sub(old)).unwrap_or(0),
1473    }
1474}
1475
1476fn disk_size(path: &Path) -> Option<u64> {
1477    let metadata = stdfs::symlink_metadata(path).ok()?;
1478    if metadata.is_file() {
1479        return Some(metadata.len());
1480    }
1481    if metadata.is_dir() {
1482        let mut total = 0u64;
1483        for entry in walkdir::WalkDir::new(path)
1484            .into_iter()
1485            .filter_map(Result::ok)
1486        {
1487            if let Ok(metadata) = entry.metadata() {
1488                if metadata.is_file() {
1489                    total = total.saturating_add(metadata.len());
1490                }
1491            }
1492        }
1493        return Some(total);
1494    }
1495    Some(metadata.len())
1496}
1497
1498fn selected_paths(state: &SessionState, paths: &[String]) -> Vec<PathBuf> {
1499    if paths.is_empty() {
1500        return state.entries.keys().cloned().collect();
1501    }
1502    let selected: BTreeSet<PathBuf> = paths
1503        .iter()
1504        .map(|path| normalize_logical(Path::new(path)))
1505        .collect();
1506    state
1507        .entries
1508        .keys()
1509        .filter(|path| selected.contains(*path))
1510        .cloned()
1511        .collect()
1512}
1513
1514fn active_session_id(explicit: Option<&str>) -> Option<String> {
1515    explicit
1516        .map(str::to_string)
1517        .or_else(harn_vm::agent_sessions::current_session_id)
1518        .filter(|id| !id.trim().is_empty())
1519}
1520
1521fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
1522    if session_id.trim().is_empty() {
1523        return Err(HostlibError::InvalidParameter {
1524            builtin,
1525            param: "session_id",
1526            message: "must not be empty".to_string(),
1527        });
1528    }
1529    Ok(())
1530}
1531
1532fn default_root() -> PathBuf {
1533    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1534}
1535
1536fn session_dir(root: &Path, session_id: &str) -> PathBuf {
1537    let mut dir = root.to_path_buf();
1538    for component in STATE_REL {
1539        dir.push(component);
1540    }
1541    dir.push(sanitize_component(session_id));
1542    dir
1543}
1544
1545fn manifest_path(root: &Path, session_id: &str) -> PathBuf {
1546    session_dir(root, session_id).join("manifest.json")
1547}
1548
1549fn sanitize_component(input: &str) -> String {
1550    let sanitized: String = input
1551        .chars()
1552        .map(|ch| match ch {
1553            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
1554            _ => '_',
1555        })
1556        .collect();
1557    if sanitized == input {
1558        sanitized
1559    } else {
1560        let hash = hex::encode(Sha256::digest(input.as_bytes()));
1561        format!("{sanitized}-{}", &hash[..12])
1562    }
1563}
1564
1565fn normalize_logical(path: &Path) -> PathBuf {
1566    let absolute = if path.is_absolute() {
1567        path.to_path_buf()
1568    } else {
1569        default_root().join(path)
1570    };
1571    let mut out = PathBuf::new();
1572    for component in absolute.components() {
1573        match component {
1574            Component::ParentDir => {
1575                out.pop();
1576            }
1577            Component::CurDir => {}
1578            other => out.push(other),
1579        }
1580    }
1581    out
1582}
1583
1584fn not_found(path: &Path) -> std::io::Error {
1585    std::io::Error::new(
1586        std::io::ErrorKind::NotFound,
1587        format!("staged fs: {} is deleted or absent", path.display()),
1588    )
1589}
1590
1591fn now_ms() -> i64 {
1592    std::time::SystemTime::now()
1593        .duration_since(std::time::UNIX_EPOCH)
1594        .map(|duration| duration.as_millis() as i64)
1595        .unwrap_or(0)
1596}
1597
1598fn emit_staged_update(state: &SessionState) {
1599    let status = status_from_state(state);
1600    harn_vm::agent_events::emit_event(&AgentEvent::StagedWritesPending {
1601        session_id: state.session_id.clone(),
1602        pending_count: status.pending_writes.len(),
1603        total_bytes: status.total_bytes_pending,
1604    });
1605}
1606
1607fn pending_write_to_value(write: PendingWrite) -> VmValue {
1608    build_dict([
1609        ("path", str_value(&write.path)),
1610        ("kind", str_value(write.kind)),
1611        ("bytes_added", VmValue::Int(write.bytes_added as i64)),
1612        ("bytes_removed", VmValue::Int(write.bytes_removed as i64)),
1613    ])
1614}
1615
1616fn status_to_value(status: StagedStatus) -> VmValue {
1617    build_dict([
1618        (
1619            "pending_writes",
1620            VmValue::List(Arc::new(
1621                status
1622                    .pending_writes
1623                    .into_iter()
1624                    .map(pending_write_to_value)
1625                    .collect(),
1626            )),
1627        ),
1628        (
1629            "total_bytes_pending",
1630            VmValue::Int(status.total_bytes_pending as i64),
1631        ),
1632        (
1633            "oldest_pending_age_ms",
1634            VmValue::Int(status.oldest_pending_age_ms),
1635        ),
1636    ])
1637}
1638
1639fn commit_result_to_value(result: CommitResult) -> VmValue {
1640    build_dict([
1641        (
1642            "committed_paths",
1643            VmValue::List(Arc::new(
1644                result
1645                    .committed_paths
1646                    .into_iter()
1647                    .map(|path| VmValue::String(Arc::from(path)))
1648                    .collect(),
1649            )),
1650        ),
1651        (
1652            "failed_paths_with_reasons",
1653            VmValue::List(Arc::new(
1654                result
1655                    .failed_paths_with_reasons
1656                    .into_iter()
1657                    .map(|(path, reason)| {
1658                        build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
1659                    })
1660                    .collect(),
1661            )),
1662        ),
1663    ])
1664}
1665
1666fn discard_result_to_value(result: DiscardResult) -> VmValue {
1667    build_dict([(
1668        "discarded_paths",
1669        VmValue::List(Arc::new(
1670            result
1671                .discarded_paths
1672                .into_iter()
1673                .map(|path| VmValue::String(Arc::from(path)))
1674                .collect(),
1675        )),
1676    )])
1677}