Skip to main content

harn_hostlib/
fs.rs

1//! Session-scoped staged filesystem mode.
2//!
3//! `hostlib_fs_set_mode({session_id, mode: "staged"})` makes hostlib file
4//! mutations land in a durable per-session overlay under
5//! `.harn/state/staged/<session_id>/`. Reads made by the same session consult
6//! that overlay first, so agent loops see their own pending writes without
7//! touching the working tree until `hostlib_fs_commit_staged`.
8
9use std::collections::{BTreeMap, BTreeSet};
10use std::fs as stdfs;
11use std::io::Write;
12use std::path::{Component, Path, PathBuf};
13use std::sync::Arc;
14use std::sync::{Mutex, OnceLock};
15
16use harn_vm::agent_events::AgentEvent;
17use harn_vm::process_sandbox::{check_fs_path_scope, FsAccess};
18use harn_vm::VmValue;
19use serde::{Deserialize, Serialize};
20use sha2::{Digest, Sha256};
21
22use crate::error::HostlibError;
23use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
24use crate::tools::args::{
25    build_dict, dict_arg, optional_bool, optional_int, optional_string, optional_string_list,
26    require_string, str_value,
27};
28use crate::tools::permissions::enforce_path_scope;
29
30const SET_MODE_BUILTIN: &str = "hostlib_fs_set_mode";
31const STATUS_BUILTIN: &str = "hostlib_fs_staged_status";
32const COMMIT_BUILTIN: &str = "hostlib_fs_commit_staged";
33const DISCARD_BUILTIN: &str = "hostlib_fs_discard_staged";
34const SAFE_TEXT_PATCH_BUILTIN: &str = "hostlib_fs_safe_text_patch";
35const READ_TEXT_BUILTIN: &str = "hostlib_fs_read_text";
36const EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN: &str = "hostlib_fs_emit_safe_text_patch_result";
37
38const MANIFEST_VERSION: u32 = 1;
39const STATE_REL: &[&str] = &[".harn", "state", "staged"];
40
41/// Hostlib filesystem capability handle.
42#[derive(Default)]
43pub struct FsCapability;
44
45impl HostlibCapability for FsCapability {
46    fn module_name(&self) -> &'static str {
47        "fs"
48    }
49
50    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
51        register(registry, SET_MODE_BUILTIN, "set_mode", set_mode_builtin);
52        register(
53            registry,
54            STATUS_BUILTIN,
55            "staged_status",
56            staged_status_builtin,
57        );
58        register(
59            registry,
60            COMMIT_BUILTIN,
61            "commit_staged",
62            commit_staged_builtin,
63        );
64        register(
65            registry,
66            DISCARD_BUILTIN,
67            "discard_staged",
68            discard_staged_builtin,
69        );
70        // `safe_text_patch` and `read_text` touch arbitrary host paths, so
71        // they share the deterministic-tools gate with `tools::*` file I/O.
72        register_gated(
73            registry,
74            SAFE_TEXT_PATCH_BUILTIN,
75            "safe_text_patch",
76            safe_text_patch_builtin,
77        );
78        register_gated(registry, READ_TEXT_BUILTIN, "read_text", read_text_builtin);
79        register(
80            registry,
81            EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
82            "emit_safe_text_patch_result",
83            emit_safe_text_patch_result_builtin,
84        );
85    }
86}
87
88fn register(
89    registry: &mut BuiltinRegistry,
90    name: &'static str,
91    method: &'static str,
92    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
93) {
94    let handler: SyncHandler = std::sync::Arc::new(runner);
95    registry.register(RegisteredBuiltin {
96        name,
97        module: "fs",
98        method,
99        handler,
100    });
101}
102
103fn register_gated(
104    registry: &mut BuiltinRegistry,
105    name: &'static str,
106    method: &'static str,
107    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
108) {
109    registry.register(RegisteredBuiltin {
110        name,
111        module: "fs",
112        method,
113        handler: crate::tools::permissions::gated_handler(name, runner),
114    });
115}
116
117/// Filesystem mode for one hostlib session.
118#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
119#[serde(rename_all = "lowercase")]
120pub enum FsMode {
121    /// Mutations apply to the working tree immediately.
122    Immediate,
123    /// Mutations are recorded in the staging layer until committed.
124    Staged,
125}
126
127impl FsMode {
128    fn parse(builtin: &'static str, raw: &str) -> Result<Self, HostlibError> {
129        match raw {
130            "immediate" => Ok(Self::Immediate),
131            "staged" => Ok(Self::Staged),
132            other => Err(HostlibError::InvalidParameter {
133                builtin,
134                param: "mode",
135                message: format!("expected \"immediate\" or \"staged\", got `{other}`"),
136            }),
137        }
138    }
139
140    /// Wire string used by hostlib schemas.
141    pub fn as_str(self) -> &'static str {
142        match self {
143            Self::Immediate => "immediate",
144            Self::Staged => "staged",
145        }
146    }
147}
148
149#[derive(Clone, Debug, Serialize, Deserialize)]
150struct Manifest {
151    version: u32,
152    session_id: String,
153    mode: FsMode,
154    root: String,
155    entries: BTreeMap<String, StagedEntry>,
156}
157
158#[derive(Clone, Debug, Serialize, Deserialize)]
159#[serde(tag = "kind", rename_all = "snake_case")]
160enum StagedEntry {
161    Write {
162        body_hash: String,
163        len: u64,
164        created_at_ms: i64,
165    },
166    Delete {
167        recursive: bool,
168        created_at_ms: i64,
169    },
170}
171
172impl StagedEntry {
173    fn created_at_ms(&self) -> i64 {
174        match self {
175            Self::Write { created_at_ms, .. } | Self::Delete { created_at_ms, .. } => {
176                *created_at_ms
177            }
178        }
179    }
180
181    fn body_len(&self) -> u64 {
182        match self {
183            Self::Write { len, .. } => *len,
184            Self::Delete { .. } => 0,
185        }
186    }
187}
188
189#[derive(Clone, Debug)]
190struct SessionState {
191    session_id: String,
192    mode: FsMode,
193    root: PathBuf,
194    entries: BTreeMap<PathBuf, StagedEntry>,
195}
196
197#[derive(Clone, Debug)]
198pub(crate) struct WriteOutcome {
199    pub(crate) created: bool,
200    pub(crate) bytes_written: usize,
201}
202
203#[derive(Clone, Debug)]
204pub(crate) struct OverlayDirEntry {
205    pub(crate) name: String,
206    pub(crate) is_dir: bool,
207    pub(crate) is_symlink: bool,
208    pub(crate) size: u64,
209}
210
211/// Summary of staged filesystem changes for one session.
212#[derive(Clone, Debug)]
213pub struct StagedStatus {
214    /// Pending path changes, sorted by path.
215    pub pending_writes: Vec<PendingWrite>,
216    /// Bytes stored in staged write bodies.
217    pub total_bytes_pending: u64,
218    /// Age in milliseconds of the oldest pending change, or 0 when empty.
219    pub oldest_pending_age_ms: i64,
220}
221
222#[derive(Clone, Debug)]
223/// One pending staged filesystem change.
224pub struct PendingWrite {
225    /// Absolute path affected by this staged change.
226    pub path: String,
227    /// Change kind (`write`, `delete`, or reserved future `move`).
228    pub kind: &'static str,
229    /// Bytes the final staged view adds at this path.
230    pub bytes_added: u64,
231    /// Bytes the final staged view removes at this path.
232    pub bytes_removed: u64,
233}
234
235/// Result returned after changing a session's filesystem mode.
236#[derive(Clone, Debug)]
237pub struct SetModeResult {
238    /// Mode active before the change.
239    pub previous_mode: FsMode,
240}
241
242/// Result returned after applying staged changes to disk.
243#[derive(Clone, Debug)]
244pub struct CommitResult {
245    /// Paths successfully applied to disk.
246    pub committed_paths: Vec<String>,
247    /// Paths that failed to apply, with human-readable reasons.
248    pub failed_paths_with_reasons: Vec<(String, String)>,
249}
250
251/// Result returned after dropping staged changes.
252#[derive(Clone, Debug)]
253pub struct DiscardResult {
254    /// Paths whose staged entries were removed.
255    pub discarded_paths: Vec<String>,
256}
257
258static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionState>>> = OnceLock::new();
259
260fn sessions() -> &'static Mutex<BTreeMap<String, SessionState>> {
261    SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
262}
263
264/// Remember the workspace root associated with a live session.
265///
266/// ACP calls this when a prompt starts so Harn code can call
267/// `hostlib_fs_set_mode({session_id, mode})` without also passing a root.
268pub fn configure_session_root(session_id: &str, root: &Path) {
269    if session_id.trim().is_empty() {
270        return;
271    }
272    let root = normalize_logical(root);
273    let mut guard = sessions()
274        .lock()
275        .expect("hostlib fs session mutex poisoned");
276    match guard.get_mut(session_id) {
277        Some(state) if state.entries.is_empty() => {
278            state.root = root;
279        }
280        Some(_) => {}
281        None => {
282            let state = load_state(session_id, Some(root.clone())).unwrap_or(SessionState {
283                session_id: session_id.to_string(),
284                mode: FsMode::Immediate,
285                root,
286                entries: BTreeMap::new(),
287            });
288            guard.insert(session_id.to_string(), state);
289        }
290    }
291}
292
293/// Return the root currently associated with a hostlib session.
294pub fn configured_session_root(session_id: &str) -> Option<PathBuf> {
295    if session_id.trim().is_empty() {
296        return None;
297    }
298    let guard = sessions()
299        .lock()
300        .expect("hostlib fs session mutex poisoned");
301    guard.get(session_id).map(|state| state.root.clone())
302}
303
304/// Set a session's filesystem mode.
305pub fn set_mode(
306    session_id: &str,
307    mode: FsMode,
308    root: Option<&Path>,
309) -> Result<SetModeResult, HostlibError> {
310    validate_session_id(SET_MODE_BUILTIN, session_id)?;
311    let mut guard = sessions()
312        .lock()
313        .expect("hostlib fs session mutex poisoned");
314    let mut state = state_for_locked(&mut guard, session_id, root.map(normalize_logical))?;
315    let previous_mode = state.mode;
316    state.mode = mode;
317    persist_state(&state, "set_mode", None).map_err(|err| HostlibError::Backend {
318        builtin: SET_MODE_BUILTIN,
319        message: err,
320    })?;
321    guard.insert(session_id.to_string(), state);
322    Ok(SetModeResult { previous_mode })
323}
324
325/// Return the staged status for a session.
326pub fn staged_status(session_id: &str) -> Result<StagedStatus, HostlibError> {
327    validate_session_id(STATUS_BUILTIN, session_id)?;
328    let mut guard = sessions()
329        .lock()
330        .expect("hostlib fs session mutex poisoned");
331    let state = state_for_locked(&mut guard, session_id, None)?;
332    let status = status_from_state(&state);
333    guard.insert(session_id.to_string(), state);
334    Ok(status)
335}
336
337/// Commit staged changes for all paths or for a filtered path list.
338pub fn commit_staged(session_id: &str, paths: &[String]) -> Result<CommitResult, HostlibError> {
339    validate_session_id(COMMIT_BUILTIN, session_id)?;
340    let mut guard = sessions()
341        .lock()
342        .expect("hostlib fs session mutex poisoned");
343    let mut state = state_for_locked(&mut guard, session_id, None)?;
344    let selected = selected_paths(&state, paths);
345    let mut committed_paths = Vec::new();
346    let mut failed_paths_with_reasons = Vec::new();
347
348    for path in selected {
349        let Some(entry) = state.entries.get(&path).cloned() else {
350            continue;
351        };
352        let path_label = path.to_string_lossy().into_owned();
353        // The overlay always lives inside the workspace, but commit flushes
354        // to the *target* working-tree path. Enforce workspace-root scope
355        // against that target so a staged entry — possibly persisted under
356        // a looser policy in an earlier session — can never write outside
357        // the roots active at commit time.
358        let access = match entry {
359            StagedEntry::Write { .. } => FsAccess::Write,
360            StagedEntry::Delete { .. } => FsAccess::Delete,
361        };
362        if let Err(violation) = check_fs_path_scope(&path, access) {
363            failed_paths_with_reasons.push((path_label, violation.message(COMMIT_BUILTIN)));
364            continue;
365        }
366        match commit_entry(&state, &path, &entry) {
367            Ok(()) => {
368                state.entries.remove(&path);
369                committed_paths.push(path_label);
370            }
371            Err(reason) => failed_paths_with_reasons.push((path_label, reason)),
372        }
373    }
374
375    persist_state(&state, "commit_staged", None).map_err(|err| HostlibError::Backend {
376        builtin: COMMIT_BUILTIN,
377        message: err,
378    })?;
379    emit_staged_update(&state);
380    guard.insert(session_id.to_string(), state);
381    Ok(CommitResult {
382        committed_paths,
383        failed_paths_with_reasons,
384    })
385}
386
387/// Discard staged changes for all paths or for a filtered path list.
388pub fn discard_staged(session_id: &str, paths: &[String]) -> Result<DiscardResult, HostlibError> {
389    validate_session_id(DISCARD_BUILTIN, session_id)?;
390    let mut guard = sessions()
391        .lock()
392        .expect("hostlib fs session mutex poisoned");
393    let mut state = state_for_locked(&mut guard, session_id, None)?;
394    let selected = selected_paths(&state, paths);
395    let mut discarded_paths = Vec::new();
396    for path in selected {
397        if state.entries.remove(&path).is_some() {
398            discarded_paths.push(path.to_string_lossy().into_owned());
399        }
400    }
401    persist_state(&state, "discard_staged", None).map_err(|err| HostlibError::Backend {
402        builtin: DISCARD_BUILTIN,
403        message: err,
404    })?;
405    emit_staged_update(&state);
406    guard.insert(session_id.to_string(), state);
407    Ok(DiscardResult { discarded_paths })
408}
409
410/// Remove all persisted staged-fs state for a caller-owned throw-away session.
411///
412/// Normal agent sessions keep their manifest after `discard_staged` so hosts can
413/// continue reporting session state. Transient dry-run sessions own their ids,
414/// though, and should remove both the in-memory entry and on-disk overlay after
415/// their preview is rendered.
416pub fn remove_session_state(session_id: &str, root: Option<&Path>) -> Result<(), HostlibError> {
417    validate_session_id(DISCARD_BUILTIN, session_id)?;
418    let mut guard = sessions()
419        .lock()
420        .expect("hostlib fs session mutex poisoned");
421    let state = match guard.remove(session_id) {
422        Some(state) => state,
423        None => load_state(session_id, root.map(normalize_logical)).map_err(|err| {
424            HostlibError::Backend {
425                builtin: DISCARD_BUILTIN,
426                message: err,
427            }
428        })?,
429    };
430    let dir = session_dir(&state.root, &state.session_id);
431    match stdfs::remove_dir_all(&dir) {
432        Ok(()) => Ok(()),
433        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
434        Err(err) => Err(HostlibError::Backend {
435            builtin: DISCARD_BUILTIN,
436            message: format!("remove staged session {}: {err}", dir.display()),
437        }),
438    }
439}
440
441pub(crate) fn read(
442    path: &Path,
443    explicit_session_id: Option<&str>,
444) -> Option<std::io::Result<Vec<u8>>> {
445    let session_id = active_session_id(explicit_session_id)?;
446    let mut guard = sessions()
447        .lock()
448        .expect("hostlib fs session mutex poisoned");
449    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
450    let result = if state.mode == FsMode::Staged {
451        overlay_read(&state, path)
452    } else {
453        None
454    };
455    guard.insert(session_id, state);
456    result
457}
458
459pub(crate) fn read_to_string(
460    path: &Path,
461    explicit_session_id: Option<&str>,
462) -> Option<std::io::Result<String>> {
463    read(path, explicit_session_id).map(|result| {
464        result.and_then(|bytes| {
465            String::from_utf8(bytes).map_err(|err| {
466                std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string())
467            })
468        })
469    })
470}
471
472pub(crate) fn read_dir(
473    path: &Path,
474    explicit_session_id: Option<&str>,
475) -> Option<std::io::Result<Vec<OverlayDirEntry>>> {
476    let session_id = active_session_id(explicit_session_id)?;
477    let mut guard = sessions()
478        .lock()
479        .expect("hostlib fs session mutex poisoned");
480    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
481    let result = if state.mode == FsMode::Staged {
482        Some(overlay_read_dir(&state, path))
483    } else {
484        None
485    };
486    guard.insert(session_id, state);
487    result
488}
489
490pub(crate) fn stage_write_or_none(
491    builtin: &'static str,
492    path: &Path,
493    bytes: &[u8],
494    create_parents: bool,
495    overwrite: bool,
496    explicit_session_id: Option<&str>,
497) -> Result<Option<WriteOutcome>, HostlibError> {
498    let Some(session_id) = active_session_id(explicit_session_id) else {
499        return Ok(None);
500    };
501    let mut guard = sessions()
502        .lock()
503        .expect("hostlib fs session mutex poisoned");
504    let mut state = state_for_locked(&mut guard, &session_id, None)?;
505    if state.mode != FsMode::Staged {
506        guard.insert(session_id, state);
507        return Ok(None);
508    }
509
510    let key = normalize_logical(path);
511    let existed = overlay_exists(&state, &key);
512    if existed && !overwrite {
513        guard.insert(session_id, state);
514        return Err(HostlibError::Backend {
515            builtin,
516            message: format!("`{}` exists and overwrite=false", key.display()),
517        });
518    }
519    if !create_parents && !parent_exists(&state, &key) {
520        guard.insert(session_id, state);
521        return Err(HostlibError::Backend {
522            builtin,
523            message: format!("parent directory for `{}` does not exist", key.display()),
524        });
525    }
526
527    let hash = write_body(&state, bytes).map_err(|err| HostlibError::Backend {
528        builtin,
529        message: err,
530    })?;
531    state.entries.insert(
532        key.clone(),
533        StagedEntry::Write {
534            body_hash: hash,
535            len: bytes.len() as u64,
536            created_at_ms: now_ms(),
537        },
538    );
539    persist_state(&state, "write", Some(&key)).map_err(|err| HostlibError::Backend {
540        builtin,
541        message: err,
542    })?;
543    emit_staged_update(&state);
544    guard.insert(session_id, state);
545    Ok(Some(WriteOutcome {
546        created: !existed,
547        bytes_written: bytes.len(),
548    }))
549}
550
551pub(crate) fn stage_delete_or_none(
552    builtin: &'static str,
553    path: &Path,
554    recursive: bool,
555    explicit_session_id: Option<&str>,
556) -> Result<Option<bool>, HostlibError> {
557    let Some(session_id) = active_session_id(explicit_session_id) else {
558        return Ok(None);
559    };
560    let mut guard = sessions()
561        .lock()
562        .expect("hostlib fs session mutex poisoned");
563    let mut state = state_for_locked(&mut guard, &session_id, None)?;
564    if state.mode != FsMode::Staged {
565        guard.insert(session_id, state);
566        return Ok(None);
567    }
568
569    let key = normalize_logical(path);
570    let staged_targets = staged_paths_under(&state, &key);
571    let disk_exists = key.exists();
572    if !disk_exists && staged_targets.is_empty() {
573        guard.insert(session_id, state);
574        return Ok(Some(false));
575    }
576
577    if !disk_exists {
578        for staged in staged_targets {
579            state.entries.remove(&staged);
580        }
581    } else {
582        validate_delete_shape(builtin, &key, recursive)?;
583        for staged in staged_targets {
584            state.entries.remove(&staged);
585        }
586        state.entries.insert(
587            key.clone(),
588            StagedEntry::Delete {
589                recursive,
590                created_at_ms: now_ms(),
591            },
592        );
593    }
594    persist_state(&state, "delete", Some(&key)).map_err(|err| HostlibError::Backend {
595        builtin,
596        message: err,
597    })?;
598    emit_staged_update(&state);
599    guard.insert(session_id, state);
600    Ok(Some(true))
601}
602
603/// Outcome of one [`safe_text_patch`] call. `applied` says whether the
604/// on-disk (or staged-overlay) bytes changed; `result` carries the
605/// structured discriminant used by the wire/JSON shape.
606#[derive(Clone, Debug)]
607pub struct SafeTextPatchOutcome {
608    /// Discriminant: `"applied"`, `"stale_base"`, or `"no_op"`.
609    pub result: SafeTextPatchResult,
610    /// `sha256:HEX` of the pre-image (overlay-aware) the call observed.
611    pub current_hash: String,
612    /// `sha256:HEX` of the requested post-image.
613    pub after_hash: String,
614    /// `true` when the file did not exist before the call.
615    pub created: bool,
616    /// Bytes written; `0` on `stale_base` or `no_op`.
617    pub bytes_written: usize,
618}
619
620/// Discriminant for a [`safe_text_patch`] outcome.
621#[derive(Clone, Copy, Debug, Eq, PartialEq)]
622pub enum SafeTextPatchResult {
623    /// Pre-image hash matched (or no expected hash supplied) and the
624    /// post-image differs from the pre-image — bytes were written.
625    Applied,
626    /// `expected_hash` did not match the observed pre-image hash; no
627    /// bytes were written. Callers should re-read and retry.
628    StaleBase,
629    /// Pre-image hash matched and the post-image equals the pre-image —
630    /// skipped the write to avoid spurious timestamps and overlay churn.
631    NoOp,
632}
633
634impl SafeTextPatchResult {
635    fn as_str(self) -> &'static str {
636        match self {
637            Self::Applied => "applied",
638            Self::StaleBase => "stale_base",
639            Self::NoOp => "no_op",
640        }
641    }
642}
643
644/// Format `bytes` as the `sha256:HEX` label used in `before_sha256` /
645/// `after_sha256` / `current_hash` / `expected_hash` everywhere in the
646/// safe-text-patch surface.
647fn hash_label(bytes: &[u8]) -> String {
648    format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
649}
650
651/// Atomic compare-and-swap-style text write.
652///
653/// Reads the current bytes at `path` through the staged-fs overlay (when a
654/// session is active) so concurrent agent edits see each other's pending
655/// writes. If `expected_hash` is supplied and differs from the observed
656/// `sha256:HEX`, returns `SafeTextPatchResult::StaleBase` without
657/// mutating any state. On a hash match the post-image is written through
658/// the same overlay path, keeping the read and the write atomic with
659/// respect to other staged-fs consumers in the same process.
660///
661/// Atomicity:
662///
663/// - When a session is in staged mode, the read, hash check, and write
664///   all happen under a single acquisition of the sessions mutex, so a
665///   sibling thread cannot stage a write into the window between the
666///   pre-image snapshot and the commit.
667/// - When the call routes through disk (no active session, or session in
668///   immediate mode), the write goes through an atomic rename-into-place
669///   so partial-write tearing is impossible. Cross-process races are
670///   intentionally out of scope — the staged-fs overlay is the
671///   collision-rejection layer.
672pub fn safe_text_patch(
673    path: &Path,
674    content: &str,
675    expected_hash: Option<&str>,
676    session_id: Option<&str>,
677    create_parents: bool,
678    overwrite: bool,
679) -> Result<SafeTextPatchOutcome, HostlibError> {
680    let new_bytes = content.as_bytes();
681    let after_hash = hash_label(new_bytes);
682
683    if let Some(outcome) = safe_text_patch_staged(
684        path,
685        new_bytes,
686        expected_hash,
687        session_id,
688        create_parents,
689        overwrite,
690        &after_hash,
691    )? {
692        return Ok(outcome);
693    }
694
695    safe_text_patch_disk(
696        path,
697        new_bytes,
698        expected_hash,
699        create_parents,
700        overwrite,
701        after_hash,
702    )
703}
704
705/// Atomic CAS path for a session in `staged` mode. Holds the sessions
706/// mutex through the entire read → hash → check → write so concurrent
707/// agents in the same process cannot race the snapshot. Returns `None`
708/// when no session is active or the session is in `immediate` mode, so
709/// the caller can fall through to the disk path.
710#[allow(clippy::too_many_arguments)]
711fn safe_text_patch_staged(
712    path: &Path,
713    new_bytes: &[u8],
714    expected_hash: Option<&str>,
715    session_id: Option<&str>,
716    create_parents: bool,
717    overwrite: bool,
718    after_hash: &str,
719) -> Result<Option<SafeTextPatchOutcome>, HostlibError> {
720    let Some(session) = active_session_id(session_id) else {
721        return Ok(None);
722    };
723    let mut guard = sessions()
724        .lock()
725        .expect("hostlib fs session mutex poisoned");
726    let mut state = state_for_locked(&mut guard, &session, None)?;
727    if state.mode != FsMode::Staged {
728        guard.insert(session, state);
729        return Ok(None);
730    }
731
732    let key = normalize_logical(path);
733    let (existing_bytes, existed) = match overlay_read(&state, path) {
734        Some(Ok(bytes)) => (bytes, true),
735        Some(Err(err)) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
736        Some(Err(err)) => {
737            guard.insert(session, state);
738            return Err(HostlibError::Backend {
739                builtin: SAFE_TEXT_PATCH_BUILTIN,
740                message: format!("read `{}`: {err}", path.display()),
741            });
742        }
743        None => match stdfs::read(path) {
744            Ok(bytes) => (bytes, true),
745            Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
746            Err(err) => {
747                guard.insert(session, state);
748                return Err(HostlibError::Backend {
749                    builtin: SAFE_TEXT_PATCH_BUILTIN,
750                    message: format!("read `{}`: {err}", path.display()),
751                });
752            }
753        },
754    };
755    let current_hash = hash_label(&existing_bytes);
756
757    if let Some(expected) = expected_hash {
758        if expected != current_hash {
759            guard.insert(session, state);
760            return Ok(Some(SafeTextPatchOutcome {
761                result: SafeTextPatchResult::StaleBase,
762                current_hash,
763                after_hash: after_hash.to_string(),
764                created: false,
765                bytes_written: 0,
766            }));
767        }
768    }
769
770    if existed && existing_bytes == new_bytes {
771        guard.insert(session, state);
772        return Ok(Some(SafeTextPatchOutcome {
773            result: SafeTextPatchResult::NoOp,
774            current_hash,
775            after_hash: after_hash.to_string(),
776            created: false,
777            bytes_written: 0,
778        }));
779    }
780
781    let overlay_existed = overlay_exists(&state, &key);
782    if overlay_existed && !overwrite {
783        guard.insert(session, state);
784        return Err(HostlibError::Backend {
785            builtin: SAFE_TEXT_PATCH_BUILTIN,
786            message: format!("`{}` exists and overwrite=false", key.display()),
787        });
788    }
789    if !create_parents && !parent_exists(&state, &key) {
790        guard.insert(session, state);
791        return Err(HostlibError::Backend {
792            builtin: SAFE_TEXT_PATCH_BUILTIN,
793            message: format!("parent directory for `{}` does not exist", key.display()),
794        });
795    }
796
797    let body_hash = write_body(&state, new_bytes).map_err(|err| HostlibError::Backend {
798        builtin: SAFE_TEXT_PATCH_BUILTIN,
799        message: err,
800    })?;
801    state.entries.insert(
802        key.clone(),
803        StagedEntry::Write {
804            body_hash,
805            len: new_bytes.len() as u64,
806            created_at_ms: now_ms(),
807        },
808    );
809    persist_state(&state, "safe_text_patch", Some(&key)).map_err(|err| HostlibError::Backend {
810        builtin: SAFE_TEXT_PATCH_BUILTIN,
811        message: err,
812    })?;
813    emit_staged_update(&state);
814    guard.insert(session, state);
815
816    Ok(Some(SafeTextPatchOutcome {
817        result: SafeTextPatchResult::Applied,
818        current_hash,
819        after_hash: after_hash.to_string(),
820        created: !existed,
821        bytes_written: new_bytes.len(),
822    }))
823}
824
825/// Disk path for callers without an active staged session. Uses
826/// `atomic_write` so the post-image lands via rename-into-place rather
827/// than an open/truncate/write/close sequence — readers either see the
828/// pre-image or the post-image, never a torn write.
829fn safe_text_patch_disk(
830    path: &Path,
831    new_bytes: &[u8],
832    expected_hash: Option<&str>,
833    create_parents: bool,
834    overwrite: bool,
835    after_hash: String,
836) -> Result<SafeTextPatchOutcome, HostlibError> {
837    let (existing_bytes, existed) = match stdfs::read(path) {
838        Ok(bytes) => (bytes, true),
839        Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
840        Err(err) => {
841            return Err(HostlibError::Backend {
842                builtin: SAFE_TEXT_PATCH_BUILTIN,
843                message: format!("read `{}`: {err}", path.display()),
844            });
845        }
846    };
847    let current_hash = hash_label(&existing_bytes);
848
849    if let Some(expected) = expected_hash {
850        if expected != current_hash {
851            return Ok(SafeTextPatchOutcome {
852                result: SafeTextPatchResult::StaleBase,
853                current_hash,
854                after_hash,
855                created: false,
856                bytes_written: 0,
857            });
858        }
859    }
860
861    if existed && existing_bytes == new_bytes {
862        return Ok(SafeTextPatchOutcome {
863            result: SafeTextPatchResult::NoOp,
864            current_hash,
865            after_hash,
866            created: false,
867            bytes_written: 0,
868        });
869    }
870    if existed && !overwrite {
871        return Err(HostlibError::Backend {
872            builtin: SAFE_TEXT_PATCH_BUILTIN,
873            message: format!("`{}` exists and overwrite=false", path.display()),
874        });
875    }
876    if !create_parents {
877        if let Some(parent) = path.parent() {
878            if !parent.as_os_str().is_empty() && !parent.is_dir() {
879                return Err(HostlibError::Backend {
880                    builtin: SAFE_TEXT_PATCH_BUILTIN,
881                    message: format!(
882                        "parent directory for `{}` does not exist (pass create_parents=true to mkdir)",
883                        path.display()
884                    ),
885                });
886            }
887        }
888    }
889
890    crate::fs_snapshot::auto_capture_for_write(SAFE_TEXT_PATCH_BUILTIN, path);
891    atomic_write(path, new_bytes).map_err(|err| HostlibError::Backend {
892        builtin: SAFE_TEXT_PATCH_BUILTIN,
893        message: format!("write `{}`: {err}", path.display()),
894    })?;
895
896    Ok(SafeTextPatchOutcome {
897        result: SafeTextPatchResult::Applied,
898        current_hash,
899        after_hash,
900        created: !existed,
901        bytes_written: new_bytes.len(),
902    })
903}
904
905/// Read the pre-image through the staged-fs overlay (when active),
906/// falling back to disk. Returns `(bytes, existed_on_disk_or_overlay)`.
907/// `builtin` is the caller's tag — used so backend errors point at the
908/// right builtin name in diagnostics.
909fn read_existing(
910    builtin: &'static str,
911    path: &Path,
912    session_id: Option<&str>,
913) -> Result<(Vec<u8>, bool), HostlibError> {
914    if let Some(result) = read(path, session_id) {
915        return match result {
916            Ok(bytes) => Ok((bytes, true)),
917            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
918            Err(err) => Err(HostlibError::Backend {
919                builtin,
920                message: format!("read `{}`: {err}", path.display()),
921            }),
922        };
923    }
924    match stdfs::read(path) {
925        Ok(bytes) => Ok((bytes, true)),
926        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
927        Err(err) => Err(HostlibError::Backend {
928            builtin,
929            message: format!("read `{}`: {err}", path.display()),
930        }),
931    }
932}
933
934fn read_text_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
935    let raw = dict_arg(READ_TEXT_BUILTIN, args)?;
936    let dict = raw.as_ref();
937    let path_str = require_string(READ_TEXT_BUILTIN, dict, "path")?;
938    let session_id = optional_string(READ_TEXT_BUILTIN, dict, "session_id")?;
939    let path = Path::new(&path_str);
940    enforce_path_scope(READ_TEXT_BUILTIN, path, FsAccess::Read)?;
941
942    let (bytes, existed) = read_existing(READ_TEXT_BUILTIN, path, session_id.as_deref())?;
943    let hash = hash_label(&bytes);
944    let content = match std::str::from_utf8(&bytes) {
945        Ok(s) => s.to_string(),
946        Err(err) => {
947            return Err(HostlibError::Backend {
948                builtin: READ_TEXT_BUILTIN,
949                message: format!("`{path_str}` is not valid UTF-8: {err}"),
950            });
951        }
952    };
953    let bytes_len = bytes.len() as i64;
954    Ok(build_dict([
955        ("path", str_value(&path_str)),
956        ("content", str_value(&content)),
957        ("sha256", str_value(&hash)),
958        ("size", VmValue::Int(bytes_len)),
959        ("exists", VmValue::Bool(existed)),
960    ]))
961}
962
963fn safe_text_patch_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
964    let raw = dict_arg(SAFE_TEXT_PATCH_BUILTIN, args)?;
965    let dict = raw.as_ref();
966
967    let path_str = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "path")?;
968    let content = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "content")?;
969    let expected_hash = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "expected_hash")?;
970    let session_id = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "session_id")?;
971    let create_parents = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "create_parents", true)?;
972    let overwrite = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "overwrite", true)?;
973
974    enforce_path_scope(
975        SAFE_TEXT_PATCH_BUILTIN,
976        Path::new(&path_str),
977        FsAccess::Write,
978    )?;
979    let outcome = safe_text_patch(
980        Path::new(&path_str),
981        &content,
982        expected_hash.as_deref(),
983        session_id.as_deref(),
984        create_parents,
985        overwrite,
986    )?;
987
988    let entries: Vec<(&'static str, VmValue)> = vec![
989        ("path", str_value(&path_str)),
990        ("result", str_value(outcome.result.as_str())),
991        (
992            "applied",
993            VmValue::Bool(outcome.result == SafeTextPatchResult::Applied),
994        ),
995        (
996            "stale_base",
997            VmValue::Bool(outcome.result == SafeTextPatchResult::StaleBase),
998        ),
999        ("current_hash", str_value(&outcome.current_hash)),
1000        ("before_sha256", str_value(&outcome.current_hash)),
1001        ("after_sha256", str_value(&outcome.after_hash)),
1002        ("created", VmValue::Bool(outcome.created)),
1003        ("bytes_written", VmValue::Int(outcome.bytes_written as i64)),
1004        (
1005            "expected_hash",
1006            match expected_hash.as_deref() {
1007                Some(hash) => str_value(hash),
1008                None => VmValue::Nil,
1009            },
1010        ),
1011    ];
1012    Ok(build_dict(entries))
1013}
1014
1015fn emit_safe_text_patch_result_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1016    let raw = dict_arg(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, args)?;
1017    let dict = raw.as_ref();
1018
1019    let path = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "path")?;
1020    let result = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "result")?;
1021    let hunks_count = optional_int(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "hunks_count", 0)?;
1022    let bytes_written = optional_int(
1023        EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1024        dict,
1025        "bytes_written",
1026        0,
1027    )?;
1028    let failed_hunk_index = match dict.get("failed_hunk_index") {
1029        None | Some(VmValue::Nil) => None,
1030        Some(VmValue::Int(n)) if *n >= 0 => Some(*n as usize),
1031        Some(other) => {
1032            return Err(HostlibError::InvalidParameter {
1033                builtin: EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1034                param: "failed_hunk_index",
1035                message: format!("expected non-negative integer, got {}", other.type_name()),
1036            });
1037        }
1038    };
1039    let session_id = optional_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "session_id")?
1040        .or_else(harn_vm::agent_sessions::current_session_id);
1041
1042    if let Some(session_id) = session_id.filter(|s| !s.trim().is_empty()) {
1043        harn_vm::agent_events::emit_event(&AgentEvent::SafeTextPatchResult {
1044            session_id,
1045            path,
1046            result,
1047            hunks_count: hunks_count.max(0) as usize,
1048            bytes_written: bytes_written.max(0) as u64,
1049            failed_hunk_index,
1050        });
1051        Ok(VmValue::Bool(true))
1052    } else {
1053        // Silently no-op when no session is active — telemetry without a
1054        // session has nowhere to route. Caller can opt in by always
1055        // passing session_id explicitly.
1056        Ok(VmValue::Bool(false))
1057    }
1058}
1059
1060fn set_mode_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1061    let raw = dict_arg(SET_MODE_BUILTIN, args)?;
1062    let dict = raw.as_ref();
1063    let session_id = require_string(SET_MODE_BUILTIN, dict, "session_id")?;
1064    let mode = FsMode::parse(
1065        SET_MODE_BUILTIN,
1066        &require_string(SET_MODE_BUILTIN, dict, "mode")?,
1067    )?;
1068    let root = optional_string(SET_MODE_BUILTIN, dict, "root")?.map(PathBuf::from);
1069    let result = set_mode(&session_id, mode, root.as_deref())?;
1070    Ok(build_dict([(
1071        "previous_mode",
1072        str_value(result.previous_mode.as_str()),
1073    )]))
1074}
1075
1076fn staged_status_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1077    let raw = dict_arg(STATUS_BUILTIN, args)?;
1078    let session_id = require_string(STATUS_BUILTIN, raw.as_ref(), "session_id")?;
1079    Ok(status_to_value(staged_status(&session_id)?))
1080}
1081
1082fn commit_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1083    let raw = dict_arg(COMMIT_BUILTIN, args)?;
1084    let dict = raw.as_ref();
1085    let session_id = require_string(COMMIT_BUILTIN, dict, "session_id")?;
1086    let paths = optional_string_list(COMMIT_BUILTIN, dict, "paths")?;
1087    Ok(commit_result_to_value(commit_staged(&session_id, &paths)?))
1088}
1089
1090fn discard_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1091    let raw = dict_arg(DISCARD_BUILTIN, args)?;
1092    let dict = raw.as_ref();
1093    let session_id = require_string(DISCARD_BUILTIN, dict, "session_id")?;
1094    let paths = optional_string_list(DISCARD_BUILTIN, dict, "paths")?;
1095    Ok(discard_result_to_value(discard_staged(
1096        &session_id,
1097        &paths,
1098    )?))
1099}
1100
1101fn state_for_locked(
1102    guard: &mut BTreeMap<String, SessionState>,
1103    session_id: &str,
1104    root: Option<PathBuf>,
1105) -> Result<SessionState, HostlibError> {
1106    if let Some(existing) = guard.get(session_id) {
1107        let mut state = existing.clone();
1108        if let Some(root) = root {
1109            if state.entries.is_empty() {
1110                state.root = root;
1111            }
1112        }
1113        return Ok(state);
1114    }
1115    let state = load_state(session_id, root).map_err(|err| HostlibError::Backend {
1116        builtin: SET_MODE_BUILTIN,
1117        message: err,
1118    })?;
1119    Ok(state)
1120}
1121
1122fn load_state(session_id: &str, root: Option<PathBuf>) -> Result<SessionState, String> {
1123    let root = root.unwrap_or_else(default_root);
1124    let manifest_path = manifest_path(&root, session_id);
1125    if manifest_path.exists() {
1126        let text = stdfs::read_to_string(&manifest_path)
1127            .map_err(|err| format!("read {}: {err}", manifest_path.display()))?;
1128        let manifest: Manifest = serde_json::from_str(&text)
1129            .map_err(|err| format!("parse {}: {err}", manifest_path.display()))?;
1130        if manifest.version != MANIFEST_VERSION {
1131            return Err(format!(
1132                "unsupported staged fs manifest version {} in {}",
1133                manifest.version,
1134                manifest_path.display()
1135            ));
1136        }
1137        if manifest.session_id != session_id {
1138            return Err(format!(
1139                "staged fs manifest session id mismatch in {}",
1140                manifest_path.display()
1141            ));
1142        }
1143        return Ok(SessionState {
1144            session_id: manifest.session_id,
1145            mode: manifest.mode,
1146            root: normalize_logical(Path::new(&manifest.root)),
1147            entries: manifest
1148                .entries
1149                .into_iter()
1150                .map(|(path, entry)| (normalize_logical(Path::new(&path)), entry))
1151                .collect(),
1152        });
1153    }
1154    Ok(SessionState {
1155        session_id: session_id.to_string(),
1156        mode: FsMode::Immediate,
1157        root,
1158        entries: BTreeMap::new(),
1159    })
1160}
1161
1162fn persist_state(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1163    let dir = session_dir(&state.root, &state.session_id);
1164    stdfs::create_dir_all(dir.join("bodies"))
1165        .map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1166    let manifest = Manifest {
1167        version: MANIFEST_VERSION,
1168        session_id: state.session_id.clone(),
1169        mode: state.mode,
1170        root: state.root.to_string_lossy().into_owned(),
1171        entries: state
1172            .entries
1173            .iter()
1174            .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
1175            .collect(),
1176    };
1177    let bytes = serde_json::to_vec_pretty(&manifest)
1178        .map_err(|err| format!("serialize staged manifest: {err}"))?;
1179    atomic_write(&manifest_path(&state.root, &state.session_id), &bytes)?;
1180    append_journal(state, op, path)?;
1181    prune_unreferenced_bodies(state);
1182    Ok(())
1183}
1184
1185fn append_journal(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1186    let dir = session_dir(&state.root, &state.session_id);
1187    stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1188    let line = serde_json::to_string(&serde_json::json!({
1189        "ts_ms": now_ms(),
1190        "op": op,
1191        "path": path.map(|path| path.to_string_lossy().into_owned()),
1192        "pending_count": state.entries.len(),
1193    }))
1194    .map_err(|err| format!("serialize staged journal: {err}"))?;
1195    let mut file = stdfs::OpenOptions::new()
1196        .create(true)
1197        .append(true)
1198        .open(dir.join("journal.jsonl"))
1199        .map_err(|err| format!("open staged journal: {err}"))?;
1200    writeln!(file, "{line}").map_err(|err| format!("write staged journal: {err}"))
1201}
1202
1203fn write_body(state: &SessionState, bytes: &[u8]) -> Result<String, String> {
1204    let hash = hex::encode(Sha256::digest(bytes));
1205    let path = session_dir(&state.root, &state.session_id)
1206        .join("bodies")
1207        .join(&hash);
1208    if !path.exists() {
1209        atomic_write(&path, bytes)?;
1210    }
1211    Ok(hash)
1212}
1213
1214fn read_body(state: &SessionState, hash: &str) -> std::io::Result<Vec<u8>> {
1215    stdfs::read(
1216        session_dir(&state.root, &state.session_id)
1217            .join("bodies")
1218            .join(hash),
1219    )
1220}
1221
1222fn prune_unreferenced_bodies(state: &SessionState) {
1223    let live: BTreeSet<String> = state
1224        .entries
1225        .values()
1226        .filter_map(|entry| match entry {
1227            StagedEntry::Write { body_hash, .. } => Some(body_hash.clone()),
1228            StagedEntry::Delete { .. } => None,
1229        })
1230        .collect();
1231    let body_dir = session_dir(&state.root, &state.session_id).join("bodies");
1232    let Ok(entries) = stdfs::read_dir(&body_dir) else {
1233        return;
1234    };
1235    for entry in entries.flatten() {
1236        let name = entry.file_name().to_string_lossy().into_owned();
1237        if !live.contains(&name) {
1238            let _ = stdfs::remove_file(entry.path());
1239        }
1240    }
1241}
1242
1243fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
1244    if let Some(parent) = path.parent() {
1245        stdfs::create_dir_all(parent)
1246            .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
1247    }
1248    let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
1249    stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
1250    match stdfs::rename(&tmp, path) {
1251        Ok(()) => Ok(()),
1252        Err(err) => {
1253            let _ = stdfs::remove_file(path);
1254            stdfs::rename(&tmp, path).map_err(|retry| {
1255                format!(
1256                    "rename {} to {}: {err}; retry: {retry}",
1257                    tmp.display(),
1258                    path.display()
1259                )
1260            })
1261        }
1262    }
1263}
1264
1265fn commit_entry(state: &SessionState, path: &Path, entry: &StagedEntry) -> Result<(), String> {
1266    match entry {
1267        StagedEntry::Write { body_hash, .. } => {
1268            let bytes = read_body(state, body_hash)
1269                .map_err(|err| format!("read staged body for {}: {err}", path.display()))?;
1270            atomic_write(path, &bytes)
1271        }
1272        StagedEntry::Delete { recursive, .. } => match stdfs::symlink_metadata(path) {
1273            Ok(metadata) if metadata.is_dir() => {
1274                if *recursive {
1275                    stdfs::remove_dir_all(path)
1276                        .map_err(|err| format!("remove_dir_all {}: {err}", path.display()))
1277                } else {
1278                    stdfs::remove_dir(path)
1279                        .map_err(|err| format!("remove_dir {}: {err}", path.display()))
1280                }
1281            }
1282            Ok(_) => stdfs::remove_file(path)
1283                .map_err(|err| format!("remove_file {}: {err}", path.display())),
1284            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1285            Err(err) => Err(format!("stat {}: {err}", path.display())),
1286        },
1287    }
1288}
1289
1290fn overlay_read(state: &SessionState, path: &Path) -> Option<std::io::Result<Vec<u8>>> {
1291    let key = normalize_logical(path);
1292    if let Some(entry) = state.entries.get(&key) {
1293        return Some(match entry {
1294            StagedEntry::Write { body_hash, .. } => read_body(state, body_hash),
1295            StagedEntry::Delete { .. } => Err(not_found(&key)),
1296        });
1297    }
1298    if deleted_ancestor(state, &key) {
1299        return Some(Err(not_found(&key)));
1300    }
1301    None
1302}
1303
1304fn overlay_read_dir(state: &SessionState, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
1305    let dir_key = normalize_logical(path);
1306    if matches!(state.entries.get(&dir_key), Some(StagedEntry::Write { .. }))
1307        || deleted_ancestor(state, &dir_key)
1308        || matches!(
1309            state.entries.get(&dir_key),
1310            Some(StagedEntry::Delete { .. })
1311        )
1312    {
1313        return Err(not_found(&dir_key));
1314    }
1315    if !path.exists() && !has_staged_descendant(state, &dir_key) {
1316        return Err(not_found(&dir_key));
1317    }
1318
1319    let mut entries: BTreeMap<String, OverlayDirEntry> = BTreeMap::new();
1320    if path.exists() {
1321        for entry in stdfs::read_dir(path)? {
1322            let entry = entry?;
1323            let name = entry.file_name().to_string_lossy().into_owned();
1324            let file_type = entry.file_type().ok();
1325            let metadata = entry.metadata().ok();
1326            entries.insert(
1327                name.clone(),
1328                OverlayDirEntry {
1329                    name,
1330                    is_dir: file_type.is_some_and(|ty| ty.is_dir()),
1331                    is_symlink: file_type.is_some_and(|ty| ty.is_symlink()),
1332                    size: metadata.map(|m| m.len()).unwrap_or(0),
1333                },
1334            );
1335        }
1336    }
1337
1338    for (path, entry) in &state.entries {
1339        let Some(name) = overlay_child_name(path, &dir_key) else {
1340            continue;
1341        };
1342        match entry {
1343            StagedEntry::Write { len, .. } => {
1344                let is_dir = path.parent() != Some(dir_key.as_path());
1345                entries.insert(
1346                    name.clone(),
1347                    OverlayDirEntry {
1348                        name,
1349                        is_dir,
1350                        is_symlink: false,
1351                        size: if is_dir { 0 } else { *len },
1352                    },
1353                );
1354            }
1355            StagedEntry::Delete { .. } => {
1356                if path.parent() == Some(dir_key.as_path()) {
1357                    entries.remove(&name);
1358                }
1359            }
1360        }
1361    }
1362
1363    Ok(entries.into_values().collect())
1364}
1365
1366fn overlay_child_name(path: &Path, dir: &Path) -> Option<String> {
1367    let suffix = path.strip_prefix(dir).ok()?;
1368    let mut components = suffix.components();
1369    let first = components.next()?;
1370    match first {
1371        Component::Normal(name) => Some(name.to_string_lossy().into_owned()),
1372        _ => None,
1373    }
1374}
1375
1376fn overlay_exists(state: &SessionState, path: &Path) -> bool {
1377    if let Some(entry) = state.entries.get(path) {
1378        return matches!(entry, StagedEntry::Write { .. });
1379    }
1380    if deleted_ancestor(state, path) {
1381        return false;
1382    }
1383    if has_staged_descendant(state, path) {
1384        return true;
1385    }
1386    path.exists()
1387}
1388
1389fn parent_exists(state: &SessionState, path: &Path) -> bool {
1390    let Some(parent) = path.parent() else {
1391        return true;
1392    };
1393    if parent.as_os_str().is_empty() {
1394        return true;
1395    }
1396    if let Some(entry) = state.entries.get(parent) {
1397        return !matches!(entry, StagedEntry::Delete { .. });
1398    }
1399    if deleted_ancestor(state, parent) {
1400        return false;
1401    }
1402    if has_staged_descendant(state, parent) {
1403        return true;
1404    }
1405    parent.is_dir()
1406}
1407
1408fn deleted_ancestor(state: &SessionState, path: &Path) -> bool {
1409    state.entries.iter().any(|(candidate, entry)| {
1410        matches!(entry, StagedEntry::Delete { .. })
1411            && path != candidate.as_path()
1412            && path.starts_with(candidate)
1413    })
1414}
1415
1416fn has_staged_descendant(state: &SessionState, path: &Path) -> bool {
1417    state.entries.iter().any(|(candidate, entry)| {
1418        matches!(entry, StagedEntry::Write { .. })
1419            && candidate != path
1420            && candidate.starts_with(path)
1421    })
1422}
1423
1424fn staged_paths_under(state: &SessionState, path: &Path) -> Vec<PathBuf> {
1425    state
1426        .entries
1427        .keys()
1428        .filter(|candidate| *candidate == path || candidate.starts_with(path))
1429        .cloned()
1430        .collect()
1431}
1432
1433fn validate_delete_shape(
1434    builtin: &'static str,
1435    path: &Path,
1436    recursive: bool,
1437) -> Result<(), HostlibError> {
1438    let Ok(metadata) = stdfs::symlink_metadata(path) else {
1439        return Ok(());
1440    };
1441    if metadata.is_dir() && !recursive {
1442        let mut entries = stdfs::read_dir(path).map_err(|err| HostlibError::Backend {
1443            builtin,
1444            message: format!("read_dir `{}`: {err}", path.display()),
1445        })?;
1446        if entries.next().is_some() {
1447            return Err(HostlibError::Backend {
1448                builtin,
1449                message: format!(
1450                    "remove_dir `{}` (pass recursive=true to delete non-empty dirs): directory not empty",
1451                    path.display()
1452                ),
1453            });
1454        }
1455    }
1456    Ok(())
1457}
1458
1459fn status_from_state(state: &SessionState) -> StagedStatus {
1460    let now = now_ms();
1461    let mut pending_writes = Vec::new();
1462    let mut total_bytes_pending = 0u64;
1463    let mut oldest = None;
1464    for (path, entry) in &state.entries {
1465        total_bytes_pending = total_bytes_pending.saturating_add(entry.body_len());
1466        oldest = Some(oldest.map_or(entry.created_at_ms(), |old: i64| {
1467            old.min(entry.created_at_ms())
1468        }));
1469        let (kind, bytes_added, bytes_removed) = match entry {
1470            StagedEntry::Write { len, .. } => ("write", *len, disk_size(path).unwrap_or(0)),
1471            StagedEntry::Delete { .. } => ("delete", 0, disk_size(path).unwrap_or(0)),
1472        };
1473        pending_writes.push(PendingWrite {
1474            path: path.to_string_lossy().into_owned(),
1475            kind,
1476            bytes_added,
1477            bytes_removed,
1478        });
1479    }
1480    StagedStatus {
1481        pending_writes,
1482        total_bytes_pending,
1483        oldest_pending_age_ms: oldest.map(|old| now.saturating_sub(old)).unwrap_or(0),
1484    }
1485}
1486
1487fn disk_size(path: &Path) -> Option<u64> {
1488    let metadata = stdfs::symlink_metadata(path).ok()?;
1489    if metadata.is_file() {
1490        return Some(metadata.len());
1491    }
1492    if metadata.is_dir() {
1493        let mut total = 0u64;
1494        for entry in walkdir::WalkDir::new(path)
1495            .into_iter()
1496            .filter_map(Result::ok)
1497        {
1498            if let Ok(metadata) = entry.metadata() {
1499                if metadata.is_file() {
1500                    total = total.saturating_add(metadata.len());
1501                }
1502            }
1503        }
1504        return Some(total);
1505    }
1506    Some(metadata.len())
1507}
1508
1509fn selected_paths(state: &SessionState, paths: &[String]) -> Vec<PathBuf> {
1510    if paths.is_empty() {
1511        return state.entries.keys().cloned().collect();
1512    }
1513    let selected: BTreeSet<PathBuf> = paths
1514        .iter()
1515        .map(|path| normalize_logical(Path::new(path)))
1516        .collect();
1517    state
1518        .entries
1519        .keys()
1520        .filter(|path| selected.contains(*path))
1521        .cloned()
1522        .collect()
1523}
1524
1525fn active_session_id(explicit: Option<&str>) -> Option<String> {
1526    explicit
1527        .map(str::to_string)
1528        .or_else(harn_vm::agent_sessions::current_session_id)
1529        .filter(|id| !id.trim().is_empty())
1530}
1531
1532fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
1533    if session_id.trim().is_empty() {
1534        return Err(HostlibError::InvalidParameter {
1535            builtin,
1536            param: "session_id",
1537            message: "must not be empty".to_string(),
1538        });
1539    }
1540    Ok(())
1541}
1542
1543fn default_root() -> PathBuf {
1544    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1545}
1546
1547fn session_dir(root: &Path, session_id: &str) -> PathBuf {
1548    let mut dir = root.to_path_buf();
1549    for component in STATE_REL {
1550        dir.push(component);
1551    }
1552    dir.push(sanitize_component(session_id));
1553    dir
1554}
1555
1556fn manifest_path(root: &Path, session_id: &str) -> PathBuf {
1557    session_dir(root, session_id).join("manifest.json")
1558}
1559
1560fn sanitize_component(input: &str) -> String {
1561    let sanitized: String = input
1562        .chars()
1563        .map(|ch| match ch {
1564            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
1565            _ => '_',
1566        })
1567        .collect();
1568    // `.` is allowed inside a name, but a component that is empty or *only*
1569    // dots (`.`, `..`, `...`) is a path-traversal / current-dir token, not a
1570    // safe single component — `session_dir`'s `dir.push("..")` would escape
1571    // the staged-state root. Force the hashed form so the result is always a
1572    // genuine, traversal-free directory name.
1573    let is_dotted = sanitized.is_empty() || sanitized.bytes().all(|b| b == b'.');
1574    if sanitized == input && !is_dotted {
1575        sanitized
1576    } else {
1577        let hash = hex::encode(Sha256::digest(input.as_bytes()));
1578        format!("{sanitized}-{}", &hash[..12])
1579    }
1580}
1581
1582fn normalize_logical(path: &Path) -> PathBuf {
1583    let absolute = if path.is_absolute() {
1584        path.to_path_buf()
1585    } else {
1586        default_root().join(path)
1587    };
1588    let mut out = PathBuf::new();
1589    for component in absolute.components() {
1590        match component {
1591            Component::ParentDir => {
1592                out.pop();
1593            }
1594            Component::CurDir => {}
1595            other => out.push(other),
1596        }
1597    }
1598    out
1599}
1600
1601fn not_found(path: &Path) -> std::io::Error {
1602    std::io::Error::new(
1603        std::io::ErrorKind::NotFound,
1604        format!("staged fs: {} is deleted or absent", path.display()),
1605    )
1606}
1607
1608fn now_ms() -> i64 {
1609    std::time::SystemTime::now()
1610        .duration_since(std::time::UNIX_EPOCH)
1611        .map(|duration| duration.as_millis() as i64)
1612        .unwrap_or(0)
1613}
1614
1615fn emit_staged_update(state: &SessionState) {
1616    let status = status_from_state(state);
1617    harn_vm::agent_events::emit_event(&AgentEvent::StagedWritesPending {
1618        session_id: state.session_id.clone(),
1619        pending_count: status.pending_writes.len(),
1620        total_bytes: status.total_bytes_pending,
1621    });
1622}
1623
1624fn pending_write_to_value(write: PendingWrite) -> VmValue {
1625    build_dict([
1626        ("path", str_value(&write.path)),
1627        ("kind", str_value(write.kind)),
1628        ("bytes_added", VmValue::Int(write.bytes_added as i64)),
1629        ("bytes_removed", VmValue::Int(write.bytes_removed as i64)),
1630    ])
1631}
1632
1633fn status_to_value(status: StagedStatus) -> VmValue {
1634    build_dict([
1635        (
1636            "pending_writes",
1637            VmValue::List(Arc::new(
1638                status
1639                    .pending_writes
1640                    .into_iter()
1641                    .map(pending_write_to_value)
1642                    .collect(),
1643            )),
1644        ),
1645        (
1646            "total_bytes_pending",
1647            VmValue::Int(status.total_bytes_pending as i64),
1648        ),
1649        (
1650            "oldest_pending_age_ms",
1651            VmValue::Int(status.oldest_pending_age_ms),
1652        ),
1653    ])
1654}
1655
1656fn commit_result_to_value(result: CommitResult) -> VmValue {
1657    build_dict([
1658        (
1659            "committed_paths",
1660            VmValue::List(Arc::new(
1661                result
1662                    .committed_paths
1663                    .into_iter()
1664                    .map(|path| VmValue::String(Arc::from(path)))
1665                    .collect(),
1666            )),
1667        ),
1668        (
1669            "failed_paths_with_reasons",
1670            VmValue::List(Arc::new(
1671                result
1672                    .failed_paths_with_reasons
1673                    .into_iter()
1674                    .map(|(path, reason)| {
1675                        build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
1676                    })
1677                    .collect(),
1678            )),
1679        ),
1680    ])
1681}
1682
1683fn discard_result_to_value(result: DiscardResult) -> VmValue {
1684    build_dict([(
1685        "discarded_paths",
1686        VmValue::List(Arc::new(
1687            result
1688                .discarded_paths
1689                .into_iter()
1690                .map(|path| VmValue::String(Arc::from(path)))
1691                .collect(),
1692        )),
1693    )])
1694}
1695
1696#[cfg(test)]
1697mod sanitize_tests {
1698    use super::{sanitize_component, session_dir, STATE_REL};
1699    use std::path::{Component, Path};
1700
1701    #[test]
1702    fn dotted_session_ids_are_never_traversal_tokens() {
1703        // `.`, `..`, `...` must not survive verbatim — otherwise
1704        // `session_dir`'s `dir.push(..)` escapes the staged-state root.
1705        for evil in ["..", ".", "...", ""] {
1706            let safe = sanitize_component(evil);
1707            assert_ne!(safe, evil, "`{evil}` passed through unsanitized");
1708            assert!(
1709                !safe.bytes().all(|b| b == b'.'),
1710                "`{evil}` -> `{safe}` is still all dots"
1711            );
1712            // The result is a single normal component (no ParentDir/CurDir).
1713            let comps: Vec<_> = Path::new(&safe).components().collect();
1714            assert!(
1715                comps.iter().all(|c| matches!(c, Component::Normal(_))),
1716                "`{safe}` contains a traversal component"
1717            );
1718        }
1719    }
1720
1721    #[test]
1722    fn ordinary_session_ids_pass_through() {
1723        assert_eq!(sanitize_component("abc-123_v2.0"), "abc-123_v2.0");
1724    }
1725
1726    #[test]
1727    fn session_dir_stays_under_staged_root() {
1728        let dir = session_dir(Path::new("/workspace"), "..");
1729        // No path component resolves above the staged dir.
1730        assert!(
1731            !dir.components().any(|c| matches!(c, Component::ParentDir)),
1732            "session_dir({dir:?}) escapes via `..`"
1733        );
1734        let mut staged = std::path::PathBuf::from("/workspace");
1735        staged.extend(STATE_REL);
1736        assert!(dir.starts_with(&staged), "{dir:?} not under {staged:?}");
1737    }
1738}