Skip to main content

harn_hostlib/
fs.rs

1//! Session-scoped staged filesystem mode.
2//!
3//! `hostlib_fs_set_mode({session_id, mode: "staged"})` makes hostlib file
4//! mutations land in a durable per-session overlay under
5//! `.harn/state/staged/<session_id>/`. Reads made by the same session consult
6//! that overlay first, so agent loops see their own pending writes without
7//! touching the working tree until `hostlib_fs_commit_staged`.
8
9use std::collections::{BTreeMap, BTreeSet};
10use std::fs as stdfs;
11use std::io::Write;
12use std::path::{Component, Path, PathBuf};
13use std::sync::Arc;
14use std::sync::{Mutex, OnceLock};
15
16use harn_vm::agent_events::AgentEvent;
17use harn_vm::process_sandbox::{check_fs_path_scope, FsAccess};
18use harn_vm::VmValue;
19use serde::{Deserialize, Serialize};
20use sha2::{Digest, Sha256};
21
22use crate::error::HostlibError;
23use crate::registry::{BuiltinRegistry, HostlibCapability};
24use crate::tools::args::{
25    build_dict, dict_arg, optional_bool, optional_int, optional_string, optional_string_list,
26    require_string, str_value, to_agent_path,
27};
28use crate::tools::permissions::enforce_path_scope;
29
30const SET_MODE_BUILTIN: &str = "hostlib_fs_set_mode";
31const STATUS_BUILTIN: &str = "hostlib_fs_staged_status";
32const COMMIT_BUILTIN: &str = "hostlib_fs_commit_staged";
33const DISCARD_BUILTIN: &str = "hostlib_fs_discard_staged";
34const SAFE_TEXT_PATCH_BUILTIN: &str = "hostlib_fs_safe_text_patch";
35const READ_TEXT_BUILTIN: &str = "hostlib_fs_read_text";
36const EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN: &str = "hostlib_fs_emit_safe_text_patch_result";
37
38const MANIFEST_VERSION: u32 = 1;
39const STATE_REL: &[&str] = &[".harn", "state", "staged"];
40
41/// Hostlib filesystem capability handle.
42#[derive(Default)]
43pub struct FsCapability;
44
45impl HostlibCapability for FsCapability {
46    fn module_name(&self) -> &'static str {
47        "fs"
48    }
49
50    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
51        registry.register_fn("fs", SET_MODE_BUILTIN, "set_mode", set_mode_builtin);
52        registry.register_fn("fs", STATUS_BUILTIN, "staged_status", staged_status_builtin);
53        registry.register_fn("fs", COMMIT_BUILTIN, "commit_staged", commit_staged_builtin);
54        registry.register_fn(
55            "fs",
56            DISCARD_BUILTIN,
57            "discard_staged",
58            discard_staged_builtin,
59        );
60        // `safe_text_patch` and `read_text` touch arbitrary host paths, so
61        // they share the deterministic-tools gate with `tools::*` file I/O.
62        registry.register_gated_fn(
63            "fs",
64            SAFE_TEXT_PATCH_BUILTIN,
65            "safe_text_patch",
66            safe_text_patch_builtin,
67        );
68        registry.register_gated_fn("fs", READ_TEXT_BUILTIN, "read_text", read_text_builtin);
69        registry.register_fn(
70            "fs",
71            EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
72            "emit_safe_text_patch_result",
73            emit_safe_text_patch_result_builtin,
74        );
75    }
76}
77
78/// Filesystem mode for one hostlib session.
79#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
80#[serde(rename_all = "lowercase")]
81pub enum FsMode {
82    /// Mutations apply to the working tree immediately.
83    Immediate,
84    /// Mutations are recorded in the staging layer until committed.
85    Staged,
86}
87
88impl FsMode {
89    fn parse(builtin: &'static str, raw: &str) -> Result<Self, HostlibError> {
90        match raw {
91            "immediate" => Ok(Self::Immediate),
92            "staged" => Ok(Self::Staged),
93            other => Err(HostlibError::InvalidParameter {
94                builtin,
95                param: "mode",
96                message: format!("expected \"immediate\" or \"staged\", got `{other}`"),
97            }),
98        }
99    }
100
101    /// Wire string used by hostlib schemas.
102    pub fn as_str(self) -> &'static str {
103        match self {
104            Self::Immediate => "immediate",
105            Self::Staged => "staged",
106        }
107    }
108}
109
110#[derive(Clone, Debug, Serialize, Deserialize)]
111struct Manifest {
112    version: u32,
113    session_id: String,
114    mode: FsMode,
115    root: String,
116    entries: BTreeMap<String, StagedEntry>,
117}
118
119#[derive(Clone, Debug, Serialize, Deserialize)]
120#[serde(tag = "kind", rename_all = "snake_case")]
121enum StagedEntry {
122    Write {
123        body_hash: String,
124        len: u64,
125        created_at_ms: i64,
126    },
127    Delete {
128        recursive: bool,
129        created_at_ms: i64,
130    },
131}
132
133impl StagedEntry {
134    fn created_at_ms(&self) -> i64 {
135        match self {
136            Self::Write { created_at_ms, .. } | Self::Delete { created_at_ms, .. } => {
137                *created_at_ms
138            }
139        }
140    }
141
142    fn body_len(&self) -> u64 {
143        match self {
144            Self::Write { len, .. } => *len,
145            Self::Delete { .. } => 0,
146        }
147    }
148}
149
150#[derive(Clone, Debug)]
151struct SessionState {
152    session_id: String,
153    mode: FsMode,
154    root: PathBuf,
155    entries: BTreeMap<PathBuf, StagedEntry>,
156}
157
158#[derive(Clone, Debug)]
159pub(crate) struct WriteOutcome {
160    pub(crate) created: bool,
161    pub(crate) bytes_written: usize,
162}
163
164#[derive(Clone, Debug)]
165pub(crate) struct OverlayDirEntry {
166    pub(crate) name: String,
167    pub(crate) is_dir: bool,
168    pub(crate) is_symlink: bool,
169    pub(crate) size: u64,
170}
171
172/// Summary of staged filesystem changes for one session.
173#[derive(Clone, Debug)]
174pub struct StagedStatus {
175    /// Pending path changes, sorted by path.
176    pub pending_writes: Vec<PendingWrite>,
177    /// Bytes stored in staged write bodies.
178    pub total_bytes_pending: u64,
179    /// Age in milliseconds of the oldest pending change, or 0 when empty.
180    pub oldest_pending_age_ms: i64,
181}
182
183#[derive(Clone, Debug)]
184/// One pending staged filesystem change.
185pub struct PendingWrite {
186    /// Absolute path affected by this staged change.
187    pub path: String,
188    /// Change kind (`write`, `delete`, or reserved future `move`).
189    pub kind: &'static str,
190    /// Bytes the final staged view adds at this path.
191    pub bytes_added: u64,
192    /// Bytes the final staged view removes at this path.
193    pub bytes_removed: u64,
194}
195
196/// Result returned after changing a session's filesystem mode.
197#[derive(Clone, Debug)]
198pub struct SetModeResult {
199    /// Mode active before the change.
200    pub previous_mode: FsMode,
201}
202
203/// Result returned after applying staged changes to disk.
204#[derive(Clone, Debug)]
205pub struct CommitResult {
206    /// Paths successfully applied to disk.
207    pub committed_paths: Vec<String>,
208    /// Paths that failed to apply, with human-readable reasons.
209    pub failed_paths_with_reasons: Vec<(String, String)>,
210}
211
212/// Result returned after dropping staged changes.
213#[derive(Clone, Debug)]
214pub struct DiscardResult {
215    /// Paths whose staged entries were removed.
216    pub discarded_paths: Vec<String>,
217}
218
219static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionState>>> = OnceLock::new();
220
221fn sessions() -> &'static Mutex<BTreeMap<String, SessionState>> {
222    SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
223}
224
225/// Lock the session map, panicking with one canonical message if a prior
226/// holder poisoned the mutex. Every accessor goes through here so the poison
227/// policy and message live in exactly one place.
228fn lock_sessions() -> std::sync::MutexGuard<'static, BTreeMap<String, SessionState>> {
229    sessions()
230        .lock()
231        .expect("hostlib fs session mutex poisoned")
232}
233
234/// Remember the workspace root associated with a live session.
235///
236/// ACP calls this when a prompt starts so Harn code can call
237/// `hostlib_fs_set_mode({session_id, mode})` without also passing a root.
238pub fn configure_session_root(session_id: &str, root: &Path) {
239    if session_id.trim().is_empty() {
240        return;
241    }
242    let root = normalize_logical(root);
243    let mut guard = lock_sessions();
244    match guard.get_mut(session_id) {
245        Some(state) if state.entries.is_empty() => {
246            state.root = root;
247        }
248        Some(_) => {}
249        None => {
250            let state = load_state(session_id, Some(root.clone())).unwrap_or(SessionState {
251                session_id: session_id.to_string(),
252                mode: FsMode::Immediate,
253                root,
254                entries: BTreeMap::new(),
255            });
256            guard.insert(session_id.to_string(), state);
257        }
258    }
259}
260
261/// Return the root currently associated with a hostlib session.
262pub fn configured_session_root(session_id: &str) -> Option<PathBuf> {
263    if session_id.trim().is_empty() {
264        return None;
265    }
266    let guard = lock_sessions();
267    guard.get(session_id).map(|state| state.root.clone())
268}
269
270/// Set a session's filesystem mode.
271pub fn set_mode(
272    session_id: &str,
273    mode: FsMode,
274    root: Option<&Path>,
275) -> Result<SetModeResult, HostlibError> {
276    validate_session_id(SET_MODE_BUILTIN, session_id)?;
277    let mut guard = lock_sessions();
278    let mut state = state_for_locked(&mut guard, session_id, root.map(normalize_logical))?;
279    let previous_mode = state.mode;
280    state.mode = mode;
281    persist_state(&state, "set_mode", None).map_err(|err| HostlibError::Backend {
282        builtin: SET_MODE_BUILTIN,
283        message: err,
284    })?;
285    guard.insert(session_id.to_string(), state);
286    Ok(SetModeResult { previous_mode })
287}
288
289/// Return the staged status for a session.
290pub fn staged_status(session_id: &str) -> Result<StagedStatus, HostlibError> {
291    validate_session_id(STATUS_BUILTIN, session_id)?;
292    let mut guard = lock_sessions();
293    let state = state_for_locked(&mut guard, session_id, None)?;
294    let status = status_from_state(&state);
295    guard.insert(session_id.to_string(), state);
296    Ok(status)
297}
298
299/// Return native filesystem paths for every pending staged entry.
300///
301/// Public staged status normalizes paths for the agent/tool surface. Internal
302/// callers that need to read from the filesystem must keep the native
303/// [`PathBuf`]s, especially on Windows where slash-normalized display strings
304/// are not always valid filesystem paths.
305pub(crate) fn staged_pending_paths(session_id: &str) -> Result<BTreeSet<PathBuf>, HostlibError> {
306    validate_session_id(STATUS_BUILTIN, session_id)?;
307    let mut guard = lock_sessions();
308    let state = state_for_locked(&mut guard, session_id, None)?;
309    let paths = state.entries.keys().cloned().collect();
310    guard.insert(session_id.to_string(), state);
311    Ok(paths)
312}
313
314/// Commit staged changes for all paths or for a filtered path list.
315pub fn commit_staged(session_id: &str, paths: &[String]) -> Result<CommitResult, HostlibError> {
316    validate_session_id(COMMIT_BUILTIN, session_id)?;
317    let mut guard = lock_sessions();
318    let mut state = state_for_locked(&mut guard, session_id, None)?;
319    let selected = selected_paths(&state, paths);
320    let mut committed_paths = Vec::new();
321    let mut failed_paths_with_reasons = Vec::new();
322
323    for path in selected {
324        let Some(entry) = state.entries.get(&path).cloned() else {
325            continue;
326        };
327        let path_label = to_agent_path(&path);
328        // The overlay always lives inside the workspace, but commit flushes
329        // to the *target* working-tree path. Enforce workspace-root scope
330        // against that target so a staged entry — possibly persisted under
331        // a looser policy in an earlier session — can never write outside
332        // the roots active at commit time.
333        let access = match entry {
334            StagedEntry::Write { .. } => FsAccess::Write,
335            StagedEntry::Delete { .. } => FsAccess::Delete,
336        };
337        if let Err(violation) = check_fs_path_scope(&path, access) {
338            failed_paths_with_reasons.push((path_label, violation.message(COMMIT_BUILTIN)));
339            continue;
340        }
341        match commit_entry(&state, &path, &entry) {
342            Ok(()) => {
343                state.entries.remove(&path);
344                committed_paths.push(path_label);
345            }
346            Err(reason) => failed_paths_with_reasons.push((path_label, reason)),
347        }
348    }
349
350    persist_state(&state, "commit_staged", None).map_err(|err| HostlibError::Backend {
351        builtin: COMMIT_BUILTIN,
352        message: err,
353    })?;
354    emit_staged_update(&state);
355    guard.insert(session_id.to_string(), state);
356    Ok(CommitResult {
357        committed_paths,
358        failed_paths_with_reasons,
359    })
360}
361
362/// Discard staged changes for all paths or for a filtered path list.
363pub fn discard_staged(session_id: &str, paths: &[String]) -> Result<DiscardResult, HostlibError> {
364    validate_session_id(DISCARD_BUILTIN, session_id)?;
365    let mut guard = lock_sessions();
366    let mut state = state_for_locked(&mut guard, session_id, None)?;
367    let selected = selected_paths(&state, paths);
368    let mut discarded_paths = Vec::new();
369    for path in selected {
370        if state.entries.remove(&path).is_some() {
371            discarded_paths.push(to_agent_path(&path));
372        }
373    }
374    persist_state(&state, "discard_staged", None).map_err(|err| HostlibError::Backend {
375        builtin: DISCARD_BUILTIN,
376        message: err,
377    })?;
378    emit_staged_update(&state);
379    guard.insert(session_id.to_string(), state);
380    Ok(DiscardResult { discarded_paths })
381}
382
383/// Remove all persisted staged-fs state for a caller-owned throw-away session.
384///
385/// Normal agent sessions keep their manifest after `discard_staged` so hosts can
386/// continue reporting session state. Transient dry-run sessions own their ids,
387/// though, and should remove both the in-memory entry and on-disk overlay after
388/// their preview is rendered.
389pub fn remove_session_state(session_id: &str, root: Option<&Path>) -> Result<(), HostlibError> {
390    validate_session_id(DISCARD_BUILTIN, session_id)?;
391    let mut guard = lock_sessions();
392    let state = match guard.remove(session_id) {
393        Some(state) => state,
394        None => load_state(session_id, root.map(normalize_logical)).map_err(|err| {
395            HostlibError::Backend {
396                builtin: DISCARD_BUILTIN,
397                message: err,
398            }
399        })?,
400    };
401    let dir = session_dir(&state.root, &state.session_id);
402    match stdfs::remove_dir_all(&dir) {
403        Ok(()) => Ok(()),
404        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
405        Err(err) => Err(HostlibError::Backend {
406            builtin: DISCARD_BUILTIN,
407            message: format!("remove staged session {}: {err}", dir.display()),
408        }),
409    }
410}
411
412pub(crate) fn read(
413    path: &Path,
414    explicit_session_id: Option<&str>,
415) -> Option<std::io::Result<Vec<u8>>> {
416    let session_id = active_session_id(explicit_session_id)?;
417    let mut guard = lock_sessions();
418    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
419    let result = if state.mode == FsMode::Staged {
420        overlay_read(&state, path)
421    } else {
422        None
423    };
424    guard.insert(session_id, state);
425    result
426}
427
428pub(crate) fn read_to_string(
429    path: &Path,
430    explicit_session_id: Option<&str>,
431) -> Option<std::io::Result<String>> {
432    read(path, explicit_session_id).map(|result| {
433        result.and_then(|bytes| {
434            String::from_utf8(bytes).map_err(|err| {
435                std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string())
436            })
437        })
438    })
439}
440
441pub(crate) fn read_dir(
442    path: &Path,
443    explicit_session_id: Option<&str>,
444) -> Option<std::io::Result<Vec<OverlayDirEntry>>> {
445    let session_id = active_session_id(explicit_session_id)?;
446    let mut guard = lock_sessions();
447    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
448    let result = if state.mode == FsMode::Staged {
449        Some(overlay_read_dir(&state, path))
450    } else {
451        None
452    };
453    guard.insert(session_id, state);
454    result
455}
456
457pub(crate) fn stage_write_or_none(
458    builtin: &'static str,
459    path: &Path,
460    bytes: &[u8],
461    create_parents: bool,
462    overwrite: bool,
463    explicit_session_id: Option<&str>,
464) -> Result<Option<WriteOutcome>, HostlibError> {
465    let Some(session_id) = active_session_id(explicit_session_id) else {
466        return Ok(None);
467    };
468    let mut guard = lock_sessions();
469    let mut state = state_for_locked(&mut guard, &session_id, None)?;
470    if state.mode != FsMode::Staged {
471        guard.insert(session_id, state);
472        return Ok(None);
473    }
474
475    let key = normalize_logical(path);
476    let existed = overlay_exists(&state, &key);
477    if existed && !overwrite {
478        guard.insert(session_id, state);
479        return Err(HostlibError::Backend {
480            builtin,
481            message: format!("`{}` exists and overwrite=false", key.display()),
482        });
483    }
484    if !create_parents && !parent_exists(&state, &key) {
485        guard.insert(session_id, state);
486        return Err(HostlibError::Backend {
487            builtin,
488            message: format!("parent directory for `{}` does not exist", key.display()),
489        });
490    }
491
492    let hash = write_body(&state, bytes).map_err(|err| HostlibError::Backend {
493        builtin,
494        message: err,
495    })?;
496    state.entries.insert(
497        key.clone(),
498        StagedEntry::Write {
499            body_hash: hash,
500            len: bytes.len() as u64,
501            created_at_ms: now_ms(),
502        },
503    );
504    persist_state(&state, "write", Some(&key)).map_err(|err| HostlibError::Backend {
505        builtin,
506        message: err,
507    })?;
508    emit_staged_update(&state);
509    guard.insert(session_id, state);
510    Ok(Some(WriteOutcome {
511        created: !existed,
512        bytes_written: bytes.len(),
513    }))
514}
515
516pub(crate) fn stage_delete_or_none(
517    builtin: &'static str,
518    path: &Path,
519    recursive: bool,
520    explicit_session_id: Option<&str>,
521) -> Result<Option<bool>, HostlibError> {
522    let Some(session_id) = active_session_id(explicit_session_id) else {
523        return Ok(None);
524    };
525    let mut guard = lock_sessions();
526    let mut state = state_for_locked(&mut guard, &session_id, None)?;
527    if state.mode != FsMode::Staged {
528        guard.insert(session_id, state);
529        return Ok(None);
530    }
531
532    let key = normalize_logical(path);
533    let staged_targets = staged_paths_under(&state, &key);
534    let disk_exists = key.exists();
535    if !disk_exists && staged_targets.is_empty() {
536        guard.insert(session_id, state);
537        return Ok(Some(false));
538    }
539
540    if !disk_exists {
541        for staged in staged_targets {
542            state.entries.remove(&staged);
543        }
544    } else {
545        validate_delete_shape(builtin, &key, recursive)?;
546        for staged in staged_targets {
547            state.entries.remove(&staged);
548        }
549        state.entries.insert(
550            key.clone(),
551            StagedEntry::Delete {
552                recursive,
553                created_at_ms: now_ms(),
554            },
555        );
556    }
557    persist_state(&state, "delete", Some(&key)).map_err(|err| HostlibError::Backend {
558        builtin,
559        message: err,
560    })?;
561    emit_staged_update(&state);
562    guard.insert(session_id, state);
563    Ok(Some(true))
564}
565
566/// Outcome of one [`safe_text_patch`] call. `applied` says whether the
567/// on-disk (or staged-overlay) bytes changed; `result` carries the
568/// structured discriminant used by the wire/JSON shape.
569#[derive(Clone, Debug)]
570pub struct SafeTextPatchOutcome {
571    /// Discriminant: `"applied"`, `"stale_base"`, or `"no_op"`.
572    pub result: SafeTextPatchResult,
573    /// `sha256:HEX` of the pre-image (overlay-aware) the call observed.
574    pub current_hash: String,
575    /// `sha256:HEX` of the requested post-image.
576    pub after_hash: String,
577    /// `true` when the file did not exist before the call.
578    pub created: bool,
579    /// Bytes written; `0` on `stale_base` or `no_op`.
580    pub bytes_written: usize,
581}
582
583/// Discriminant for a [`safe_text_patch`] outcome.
584#[derive(Clone, Copy, Debug, Eq, PartialEq)]
585pub enum SafeTextPatchResult {
586    /// Pre-image hash matched (or no expected hash supplied) and the
587    /// post-image differs from the pre-image — bytes were written.
588    Applied,
589    /// `expected_hash` did not match the observed pre-image hash; no
590    /// bytes were written. Callers should re-read and retry.
591    StaleBase,
592    /// Pre-image hash matched and the post-image equals the pre-image —
593    /// skipped the write to avoid spurious timestamps and overlay churn.
594    NoOp,
595}
596
597impl SafeTextPatchResult {
598    fn as_str(self) -> &'static str {
599        match self {
600            Self::Applied => "applied",
601            Self::StaleBase => "stale_base",
602            Self::NoOp => "no_op",
603        }
604    }
605}
606
607/// Format `bytes` as the `sha256:HEX` label used in `before_sha256` /
608/// `after_sha256` / `current_hash` / `expected_hash` everywhere in the
609/// safe-text-patch surface.
610fn hash_label(bytes: &[u8]) -> String {
611    format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
612}
613
614/// Atomic compare-and-swap-style text write.
615///
616/// Reads the current bytes at `path` through the staged-fs overlay (when a
617/// session is active) so concurrent agent edits see each other's pending
618/// writes. If `expected_hash` is supplied and differs from the observed
619/// `sha256:HEX`, returns `SafeTextPatchResult::StaleBase` without
620/// mutating any state. On a hash match the post-image is written through
621/// the same overlay path, keeping the read and the write atomic with
622/// respect to other staged-fs consumers in the same process.
623///
624/// Atomicity:
625///
626/// - When a session is in staged mode, the read, hash check, and write
627///   all happen under a single acquisition of the sessions mutex, so a
628///   sibling thread cannot stage a write into the window between the
629///   pre-image snapshot and the commit.
630/// - When the call routes through disk (no active session, or session in
631///   immediate mode), the write goes through an atomic rename-into-place
632///   so partial-write tearing is impossible. Cross-process races are
633///   intentionally out of scope — the staged-fs overlay is the
634///   collision-rejection layer.
635pub fn safe_text_patch(
636    path: &Path,
637    content: &str,
638    expected_hash: Option<&str>,
639    session_id: Option<&str>,
640    create_parents: bool,
641    overwrite: bool,
642) -> Result<SafeTextPatchOutcome, HostlibError> {
643    let new_bytes = content.as_bytes();
644    let after_hash = hash_label(new_bytes);
645
646    if let Some(outcome) = safe_text_patch_staged(
647        path,
648        new_bytes,
649        expected_hash,
650        session_id,
651        create_parents,
652        overwrite,
653        &after_hash,
654    )? {
655        return Ok(outcome);
656    }
657
658    safe_text_patch_disk(
659        path,
660        new_bytes,
661        expected_hash,
662        create_parents,
663        overwrite,
664        after_hash,
665    )
666}
667
668/// Atomic CAS path for a session in `staged` mode. Holds the sessions
669/// mutex through the entire read → hash → check → write so concurrent
670/// agents in the same process cannot race the snapshot. Returns `None`
671/// when no session is active or the session is in `immediate` mode, so
672/// the caller can fall through to the disk path.
673#[allow(clippy::too_many_arguments)]
674fn safe_text_patch_staged(
675    path: &Path,
676    new_bytes: &[u8],
677    expected_hash: Option<&str>,
678    session_id: Option<&str>,
679    create_parents: bool,
680    overwrite: bool,
681    after_hash: &str,
682) -> Result<Option<SafeTextPatchOutcome>, HostlibError> {
683    let Some(session) = active_session_id(session_id) else {
684        return Ok(None);
685    };
686    let mut guard = lock_sessions();
687    let mut state = state_for_locked(&mut guard, &session, None)?;
688    if state.mode != FsMode::Staged {
689        guard.insert(session, state);
690        return Ok(None);
691    }
692
693    let key = normalize_logical(path);
694    let (existing_bytes, existed) = match overlay_read(&state, path) {
695        Some(Ok(bytes)) => (bytes, true),
696        Some(Err(err)) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
697        Some(Err(err)) => {
698            guard.insert(session, state);
699            return Err(HostlibError::Backend {
700                builtin: SAFE_TEXT_PATCH_BUILTIN,
701                message: format!("read `{}`: {err}", path.display()),
702            });
703        }
704        None => match stdfs::read(path) {
705            Ok(bytes) => (bytes, true),
706            Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
707            Err(err) => {
708                guard.insert(session, state);
709                return Err(HostlibError::Backend {
710                    builtin: SAFE_TEXT_PATCH_BUILTIN,
711                    message: format!("read `{}`: {err}", path.display()),
712                });
713            }
714        },
715    };
716    let current_hash = hash_label(&existing_bytes);
717
718    if let Some(expected) = expected_hash {
719        if expected != current_hash {
720            guard.insert(session, state);
721            return Ok(Some(SafeTextPatchOutcome {
722                result: SafeTextPatchResult::StaleBase,
723                current_hash,
724                after_hash: after_hash.to_string(),
725                created: false,
726                bytes_written: 0,
727            }));
728        }
729    }
730
731    if existed && existing_bytes == new_bytes {
732        guard.insert(session, state);
733        return Ok(Some(SafeTextPatchOutcome {
734            result: SafeTextPatchResult::NoOp,
735            current_hash,
736            after_hash: after_hash.to_string(),
737            created: false,
738            bytes_written: 0,
739        }));
740    }
741
742    let overlay_existed = overlay_exists(&state, &key);
743    if overlay_existed && !overwrite {
744        guard.insert(session, state);
745        return Err(HostlibError::Backend {
746            builtin: SAFE_TEXT_PATCH_BUILTIN,
747            message: format!("`{}` exists and overwrite=false", key.display()),
748        });
749    }
750    if !create_parents && !parent_exists(&state, &key) {
751        guard.insert(session, state);
752        return Err(HostlibError::Backend {
753            builtin: SAFE_TEXT_PATCH_BUILTIN,
754            message: format!("parent directory for `{}` does not exist", key.display()),
755        });
756    }
757
758    let body_hash = write_body(&state, new_bytes).map_err(|err| HostlibError::Backend {
759        builtin: SAFE_TEXT_PATCH_BUILTIN,
760        message: err,
761    })?;
762    state.entries.insert(
763        key.clone(),
764        StagedEntry::Write {
765            body_hash,
766            len: new_bytes.len() as u64,
767            created_at_ms: now_ms(),
768        },
769    );
770    persist_state(&state, "safe_text_patch", Some(&key)).map_err(|err| HostlibError::Backend {
771        builtin: SAFE_TEXT_PATCH_BUILTIN,
772        message: err,
773    })?;
774    emit_staged_update(&state);
775    guard.insert(session, state);
776
777    Ok(Some(SafeTextPatchOutcome {
778        result: SafeTextPatchResult::Applied,
779        current_hash,
780        after_hash: after_hash.to_string(),
781        created: !existed,
782        bytes_written: new_bytes.len(),
783    }))
784}
785
786/// Disk path for callers without an active staged session. Uses
787/// `atomic_write` so the post-image lands via rename-into-place rather
788/// than an open/truncate/write/close sequence — readers either see the
789/// pre-image or the post-image, never a torn write.
790fn safe_text_patch_disk(
791    path: &Path,
792    new_bytes: &[u8],
793    expected_hash: Option<&str>,
794    create_parents: bool,
795    overwrite: bool,
796    after_hash: String,
797) -> Result<SafeTextPatchOutcome, HostlibError> {
798    let (existing_bytes, existed) = match stdfs::read(path) {
799        Ok(bytes) => (bytes, true),
800        Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
801        Err(err) => {
802            return Err(HostlibError::Backend {
803                builtin: SAFE_TEXT_PATCH_BUILTIN,
804                message: format!("read `{}`: {err}", path.display()),
805            });
806        }
807    };
808    let current_hash = hash_label(&existing_bytes);
809
810    if let Some(expected) = expected_hash {
811        if expected != current_hash {
812            return Ok(SafeTextPatchOutcome {
813                result: SafeTextPatchResult::StaleBase,
814                current_hash,
815                after_hash,
816                created: false,
817                bytes_written: 0,
818            });
819        }
820    }
821
822    if existed && existing_bytes == new_bytes {
823        return Ok(SafeTextPatchOutcome {
824            result: SafeTextPatchResult::NoOp,
825            current_hash,
826            after_hash,
827            created: false,
828            bytes_written: 0,
829        });
830    }
831    if existed && !overwrite {
832        return Err(HostlibError::Backend {
833            builtin: SAFE_TEXT_PATCH_BUILTIN,
834            message: format!("`{}` exists and overwrite=false", path.display()),
835        });
836    }
837    if !create_parents {
838        if let Some(parent) = path.parent() {
839            if !parent.as_os_str().is_empty() && !parent.is_dir() {
840                return Err(HostlibError::Backend {
841                    builtin: SAFE_TEXT_PATCH_BUILTIN,
842                    message: format!(
843                        "parent directory for `{}` does not exist (pass create_parents=true to mkdir)",
844                        path.display()
845                    ),
846                });
847            }
848        }
849    }
850
851    crate::fs_snapshot::auto_capture_for_write(SAFE_TEXT_PATCH_BUILTIN, path);
852    atomic_write(path, new_bytes).map_err(|err| HostlibError::Backend {
853        builtin: SAFE_TEXT_PATCH_BUILTIN,
854        message: format!("write `{}`: {err}", path.display()),
855    })?;
856
857    Ok(SafeTextPatchOutcome {
858        result: SafeTextPatchResult::Applied,
859        current_hash,
860        after_hash,
861        created: !existed,
862        bytes_written: new_bytes.len(),
863    })
864}
865
866/// Read the pre-image through the staged-fs overlay (when active),
867/// falling back to disk. Returns `(bytes, existed_on_disk_or_overlay)`.
868/// `builtin` is the caller's tag — used so backend errors point at the
869/// right builtin name in diagnostics.
870fn read_existing(
871    builtin: &'static str,
872    path: &Path,
873    session_id: Option<&str>,
874) -> Result<(Vec<u8>, bool), HostlibError> {
875    if let Some(result) = read(path, session_id) {
876        return match result {
877            Ok(bytes) => Ok((bytes, true)),
878            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
879            Err(err) => Err(HostlibError::Backend {
880                builtin,
881                message: format!("read `{}`: {err}", path.display()),
882            }),
883        };
884    }
885    match stdfs::read(path) {
886        Ok(bytes) => Ok((bytes, true)),
887        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
888        Err(err) => Err(HostlibError::Backend {
889            builtin,
890            message: format!("read `{}`: {err}", path.display()),
891        }),
892    }
893}
894
895fn read_text_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
896    let raw = dict_arg(READ_TEXT_BUILTIN, args)?;
897    let dict = raw.as_ref();
898    let path_str = require_string(READ_TEXT_BUILTIN, dict, "path")?;
899    let session_id = optional_string(READ_TEXT_BUILTIN, dict, "session_id")?;
900    let path = Path::new(&path_str);
901    enforce_path_scope(READ_TEXT_BUILTIN, path, FsAccess::Read)?;
902
903    let (bytes, existed) = read_existing(READ_TEXT_BUILTIN, path, session_id.as_deref())?;
904    let hash = hash_label(&bytes);
905    let content = match std::str::from_utf8(&bytes) {
906        Ok(s) => s.to_string(),
907        Err(err) => {
908            return Err(HostlibError::Backend {
909                builtin: READ_TEXT_BUILTIN,
910                message: format!("`{path_str}` is not valid UTF-8: {err}"),
911            });
912        }
913    };
914    let bytes_len = bytes.len() as i64;
915    Ok(build_dict([
916        ("path", str_value(&path_str)),
917        ("content", str_value(&content)),
918        ("sha256", str_value(&hash)),
919        ("size", VmValue::Int(bytes_len)),
920        ("exists", VmValue::Bool(existed)),
921    ]))
922}
923
924fn safe_text_patch_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
925    let raw = dict_arg(SAFE_TEXT_PATCH_BUILTIN, args)?;
926    let dict = raw.as_ref();
927
928    let path_str = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "path")?;
929    let content = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "content")?;
930    let expected_hash = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "expected_hash")?;
931    let session_id = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "session_id")?;
932    let create_parents = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "create_parents", true)?;
933    let overwrite = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "overwrite", true)?;
934
935    enforce_path_scope(
936        SAFE_TEXT_PATCH_BUILTIN,
937        Path::new(&path_str),
938        FsAccess::Write,
939    )?;
940    let outcome = safe_text_patch(
941        Path::new(&path_str),
942        &content,
943        expected_hash.as_deref(),
944        session_id.as_deref(),
945        create_parents,
946        overwrite,
947    )?;
948
949    let entries: Vec<(&'static str, VmValue)> = vec![
950        ("path", str_value(&path_str)),
951        ("result", str_value(outcome.result.as_str())),
952        (
953            "applied",
954            VmValue::Bool(outcome.result == SafeTextPatchResult::Applied),
955        ),
956        (
957            "stale_base",
958            VmValue::Bool(outcome.result == SafeTextPatchResult::StaleBase),
959        ),
960        ("current_hash", str_value(&outcome.current_hash)),
961        ("before_sha256", str_value(&outcome.current_hash)),
962        ("after_sha256", str_value(&outcome.after_hash)),
963        ("created", VmValue::Bool(outcome.created)),
964        ("bytes_written", VmValue::Int(outcome.bytes_written as i64)),
965        (
966            "expected_hash",
967            match expected_hash.as_deref() {
968                Some(hash) => str_value(hash),
969                None => VmValue::Nil,
970            },
971        ),
972    ];
973    Ok(build_dict(entries))
974}
975
976fn emit_safe_text_patch_result_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
977    let raw = dict_arg(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, args)?;
978    let dict = raw.as_ref();
979
980    let path = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "path")?;
981    let result = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "result")?;
982    let hunks_count = optional_int(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "hunks_count", 0)?;
983    let bytes_written = optional_int(
984        EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
985        dict,
986        "bytes_written",
987        0,
988    )?;
989    let failed_hunk_index = match dict.get("failed_hunk_index") {
990        None | Some(VmValue::Nil) => None,
991        Some(VmValue::Int(n)) if *n >= 0 => Some(*n as usize),
992        Some(other) => {
993            return Err(HostlibError::InvalidParameter {
994                builtin: EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
995                param: "failed_hunk_index",
996                message: format!("expected non-negative integer, got {}", other.type_name()),
997            });
998        }
999    };
1000    let session_id = optional_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "session_id")?
1001        .or_else(harn_vm::agent_sessions::current_session_id);
1002
1003    if let Some(session_id) = session_id.filter(|s| !s.trim().is_empty()) {
1004        harn_vm::agent_events::emit_event(&AgentEvent::SafeTextPatchResult {
1005            session_id,
1006            path,
1007            result,
1008            hunks_count: hunks_count.max(0) as usize,
1009            bytes_written: bytes_written.max(0) as u64,
1010            failed_hunk_index,
1011        });
1012        Ok(VmValue::Bool(true))
1013    } else {
1014        // Silently no-op when no session is active — telemetry without a
1015        // session has nowhere to route. Caller can opt in by always
1016        // passing session_id explicitly.
1017        Ok(VmValue::Bool(false))
1018    }
1019}
1020
1021fn set_mode_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1022    let raw = dict_arg(SET_MODE_BUILTIN, args)?;
1023    let dict = raw.as_ref();
1024    let session_id = require_string(SET_MODE_BUILTIN, dict, "session_id")?;
1025    let mode = FsMode::parse(
1026        SET_MODE_BUILTIN,
1027        &require_string(SET_MODE_BUILTIN, dict, "mode")?,
1028    )?;
1029    let root = optional_string(SET_MODE_BUILTIN, dict, "root")?.map(PathBuf::from);
1030    let result = set_mode(&session_id, mode, root.as_deref())?;
1031    Ok(build_dict([(
1032        "previous_mode",
1033        str_value(result.previous_mode.as_str()),
1034    )]))
1035}
1036
1037fn staged_status_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1038    let raw = dict_arg(STATUS_BUILTIN, args)?;
1039    let session_id = require_string(STATUS_BUILTIN, raw.as_ref(), "session_id")?;
1040    Ok(status_to_value(staged_status(&session_id)?))
1041}
1042
1043fn commit_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1044    let raw = dict_arg(COMMIT_BUILTIN, args)?;
1045    let dict = raw.as_ref();
1046    let session_id = require_string(COMMIT_BUILTIN, dict, "session_id")?;
1047    let paths = optional_string_list(COMMIT_BUILTIN, dict, "paths")?;
1048    Ok(commit_result_to_value(commit_staged(&session_id, &paths)?))
1049}
1050
1051fn discard_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1052    let raw = dict_arg(DISCARD_BUILTIN, args)?;
1053    let dict = raw.as_ref();
1054    let session_id = require_string(DISCARD_BUILTIN, dict, "session_id")?;
1055    let paths = optional_string_list(DISCARD_BUILTIN, dict, "paths")?;
1056    Ok(discard_result_to_value(discard_staged(
1057        &session_id,
1058        &paths,
1059    )?))
1060}
1061
1062fn state_for_locked(
1063    guard: &mut BTreeMap<String, SessionState>,
1064    session_id: &str,
1065    root: Option<PathBuf>,
1066) -> Result<SessionState, HostlibError> {
1067    if let Some(existing) = guard.get(session_id) {
1068        let mut state = existing.clone();
1069        if let Some(root) = root {
1070            if state.entries.is_empty() {
1071                state.root = root;
1072            }
1073        }
1074        return Ok(state);
1075    }
1076    let state = load_state(session_id, root).map_err(|err| HostlibError::Backend {
1077        builtin: SET_MODE_BUILTIN,
1078        message: err,
1079    })?;
1080    Ok(state)
1081}
1082
1083fn load_state(session_id: &str, root: Option<PathBuf>) -> Result<SessionState, String> {
1084    let root = root.unwrap_or_else(default_root);
1085    let manifest_path = manifest_path(&root, session_id);
1086    if manifest_path.exists() {
1087        let text = stdfs::read_to_string(&manifest_path)
1088            .map_err(|err| format!("read {}: {err}", manifest_path.display()))?;
1089        let manifest: Manifest = serde_json::from_str(&text)
1090            .map_err(|err| format!("parse {}: {err}", manifest_path.display()))?;
1091        if manifest.version != MANIFEST_VERSION {
1092            return Err(format!(
1093                "unsupported staged fs manifest version {} in {}",
1094                manifest.version,
1095                manifest_path.display()
1096            ));
1097        }
1098        if manifest.session_id != session_id {
1099            return Err(format!(
1100                "staged fs manifest session id mismatch in {}",
1101                manifest_path.display()
1102            ));
1103        }
1104        return Ok(SessionState {
1105            session_id: manifest.session_id,
1106            mode: manifest.mode,
1107            root: normalize_logical(Path::new(&manifest.root)),
1108            entries: manifest
1109                .entries
1110                .into_iter()
1111                .map(|(path, entry)| (normalize_logical(Path::new(&path)), entry))
1112                .collect(),
1113        });
1114    }
1115    Ok(SessionState {
1116        session_id: session_id.to_string(),
1117        mode: FsMode::Immediate,
1118        root,
1119        entries: BTreeMap::new(),
1120    })
1121}
1122
1123fn persist_state(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1124    let dir = session_dir(&state.root, &state.session_id);
1125    stdfs::create_dir_all(dir.join("bodies"))
1126        .map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1127    let manifest = Manifest {
1128        version: MANIFEST_VERSION,
1129        session_id: state.session_id.clone(),
1130        mode: state.mode,
1131        root: state.root.to_string_lossy().into_owned(),
1132        entries: state
1133            .entries
1134            .iter()
1135            .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
1136            .collect(),
1137    };
1138    let bytes = serde_json::to_vec_pretty(&manifest)
1139        .map_err(|err| format!("serialize staged manifest: {err}"))?;
1140    atomic_write(&manifest_path(&state.root, &state.session_id), &bytes)?;
1141    append_journal(state, op, path)?;
1142    prune_unreferenced_bodies(state);
1143    Ok(())
1144}
1145
1146fn append_journal(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1147    let dir = session_dir(&state.root, &state.session_id);
1148    stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1149    let line = serde_json::to_string(&serde_json::json!({
1150        "ts_ms": now_ms(),
1151        "op": op,
1152        "path": path.map(|path| path.to_string_lossy().into_owned()), // agent-path-ok: on-disk journal.jsonl audit line, never returned to the agent
1153        "pending_count": state.entries.len(),
1154    }))
1155    .map_err(|err| format!("serialize staged journal: {err}"))?;
1156    let mut file = stdfs::OpenOptions::new()
1157        .create(true)
1158        .append(true)
1159        .open(dir.join("journal.jsonl"))
1160        .map_err(|err| format!("open staged journal: {err}"))?;
1161    writeln!(file, "{line}").map_err(|err| format!("write staged journal: {err}"))
1162}
1163
1164fn write_body(state: &SessionState, bytes: &[u8]) -> Result<String, String> {
1165    let hash = hex::encode(Sha256::digest(bytes));
1166    let path = session_dir(&state.root, &state.session_id)
1167        .join("bodies")
1168        .join(&hash);
1169    if !path.exists() {
1170        atomic_write(&path, bytes)?;
1171    }
1172    Ok(hash)
1173}
1174
1175fn read_body(state: &SessionState, hash: &str) -> std::io::Result<Vec<u8>> {
1176    stdfs::read(
1177        session_dir(&state.root, &state.session_id)
1178            .join("bodies")
1179            .join(hash),
1180    )
1181}
1182
1183fn prune_unreferenced_bodies(state: &SessionState) {
1184    let live: BTreeSet<String> = state
1185        .entries
1186        .values()
1187        .filter_map(|entry| match entry {
1188            StagedEntry::Write { body_hash, .. } => Some(body_hash.clone()),
1189            StagedEntry::Delete { .. } => None,
1190        })
1191        .collect();
1192    let body_dir = session_dir(&state.root, &state.session_id).join("bodies");
1193    let Ok(entries) = stdfs::read_dir(&body_dir) else {
1194        return;
1195    };
1196    for entry in entries.flatten() {
1197        let name = entry.file_name().to_string_lossy().into_owned();
1198        if !live.contains(&name) {
1199            let _ = stdfs::remove_file(entry.path());
1200        }
1201    }
1202}
1203
1204fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
1205    if let Some(parent) = path.parent() {
1206        stdfs::create_dir_all(parent)
1207            .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
1208    }
1209    let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
1210    stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
1211    match stdfs::rename(&tmp, path) {
1212        Ok(()) => Ok(()),
1213        Err(err) => {
1214            let _ = stdfs::remove_file(path);
1215            stdfs::rename(&tmp, path).map_err(|retry| {
1216                format!(
1217                    "rename {} to {}: {err}; retry: {retry}",
1218                    tmp.display(),
1219                    path.display()
1220                )
1221            })
1222        }
1223    }
1224}
1225
1226fn commit_entry(state: &SessionState, path: &Path, entry: &StagedEntry) -> Result<(), String> {
1227    match entry {
1228        StagedEntry::Write { body_hash, .. } => {
1229            let bytes = read_body(state, body_hash)
1230                .map_err(|err| format!("read staged body for {}: {err}", path.display()))?;
1231            atomic_write(path, &bytes)
1232        }
1233        StagedEntry::Delete { recursive, .. } => match stdfs::symlink_metadata(path) {
1234            Ok(metadata) if metadata.is_dir() => {
1235                if *recursive {
1236                    stdfs::remove_dir_all(path)
1237                        .map_err(|err| format!("remove_dir_all {}: {err}", path.display()))
1238                } else {
1239                    stdfs::remove_dir(path)
1240                        .map_err(|err| format!("remove_dir {}: {err}", path.display()))
1241                }
1242            }
1243            Ok(_) => stdfs::remove_file(path)
1244                .map_err(|err| format!("remove_file {}: {err}", path.display())),
1245            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1246            Err(err) => Err(format!("stat {}: {err}", path.display())),
1247        },
1248    }
1249}
1250
1251fn overlay_read(state: &SessionState, path: &Path) -> Option<std::io::Result<Vec<u8>>> {
1252    let key = normalize_logical(path);
1253    if let Some(entry) = state.entries.get(&key) {
1254        return Some(match entry {
1255            StagedEntry::Write { body_hash, .. } => read_body(state, body_hash),
1256            StagedEntry::Delete { .. } => Err(not_found(&key)),
1257        });
1258    }
1259    if deleted_ancestor(state, &key) {
1260        return Some(Err(not_found(&key)));
1261    }
1262    None
1263}
1264
1265fn overlay_read_dir(state: &SessionState, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
1266    let dir_key = normalize_logical(path);
1267    if matches!(state.entries.get(&dir_key), Some(StagedEntry::Write { .. }))
1268        || deleted_ancestor(state, &dir_key)
1269        || matches!(
1270            state.entries.get(&dir_key),
1271            Some(StagedEntry::Delete { .. })
1272        )
1273    {
1274        return Err(not_found(&dir_key));
1275    }
1276    if !path.exists() && !has_staged_descendant(state, &dir_key) {
1277        return Err(not_found(&dir_key));
1278    }
1279
1280    let mut entries: BTreeMap<String, OverlayDirEntry> = BTreeMap::new();
1281    if path.exists() {
1282        for entry in stdfs::read_dir(path)? {
1283            let entry = entry?;
1284            let name = entry.file_name().to_string_lossy().into_owned();
1285            let file_type = entry.file_type().ok();
1286            let metadata = entry.metadata().ok();
1287            entries.insert(
1288                name.clone(),
1289                OverlayDirEntry {
1290                    name,
1291                    is_dir: file_type.is_some_and(|ty| ty.is_dir()),
1292                    is_symlink: file_type.is_some_and(|ty| ty.is_symlink()),
1293                    size: metadata.map(|m| m.len()).unwrap_or(0),
1294                },
1295            );
1296        }
1297    }
1298
1299    for (path, entry) in &state.entries {
1300        let Some(name) = overlay_child_name(path, &dir_key) else {
1301            continue;
1302        };
1303        match entry {
1304            StagedEntry::Write { len, .. } => {
1305                let is_dir = path.parent() != Some(dir_key.as_path());
1306                entries.insert(
1307                    name.clone(),
1308                    OverlayDirEntry {
1309                        name,
1310                        is_dir,
1311                        is_symlink: false,
1312                        size: if is_dir { 0 } else { *len },
1313                    },
1314                );
1315            }
1316            StagedEntry::Delete { .. } => {
1317                if path.parent() == Some(dir_key.as_path()) {
1318                    entries.remove(&name);
1319                }
1320            }
1321        }
1322    }
1323
1324    Ok(entries.into_values().collect())
1325}
1326
1327fn overlay_child_name(path: &Path, dir: &Path) -> Option<String> {
1328    let suffix = path.strip_prefix(dir).ok()?;
1329    let mut components = suffix.components();
1330    let first = components.next()?;
1331    match first {
1332        Component::Normal(name) => Some(name.to_string_lossy().into_owned()),
1333        _ => None,
1334    }
1335}
1336
1337fn overlay_exists(state: &SessionState, path: &Path) -> bool {
1338    if let Some(entry) = state.entries.get(path) {
1339        return matches!(entry, StagedEntry::Write { .. });
1340    }
1341    if deleted_ancestor(state, path) {
1342        return false;
1343    }
1344    if has_staged_descendant(state, path) {
1345        return true;
1346    }
1347    path.exists()
1348}
1349
1350fn parent_exists(state: &SessionState, path: &Path) -> bool {
1351    let Some(parent) = path.parent() else {
1352        return true;
1353    };
1354    if parent.as_os_str().is_empty() {
1355        return true;
1356    }
1357    if let Some(entry) = state.entries.get(parent) {
1358        return !matches!(entry, StagedEntry::Delete { .. });
1359    }
1360    if deleted_ancestor(state, parent) {
1361        return false;
1362    }
1363    if has_staged_descendant(state, parent) {
1364        return true;
1365    }
1366    parent.is_dir()
1367}
1368
1369fn deleted_ancestor(state: &SessionState, path: &Path) -> bool {
1370    state.entries.iter().any(|(candidate, entry)| {
1371        matches!(entry, StagedEntry::Delete { .. })
1372            && path != candidate.as_path()
1373            && path.starts_with(candidate)
1374    })
1375}
1376
1377fn has_staged_descendant(state: &SessionState, path: &Path) -> bool {
1378    state.entries.iter().any(|(candidate, entry)| {
1379        matches!(entry, StagedEntry::Write { .. })
1380            && candidate != path
1381            && candidate.starts_with(path)
1382    })
1383}
1384
1385fn staged_paths_under(state: &SessionState, path: &Path) -> Vec<PathBuf> {
1386    state
1387        .entries
1388        .keys()
1389        .filter(|candidate| *candidate == path || candidate.starts_with(path))
1390        .cloned()
1391        .collect()
1392}
1393
1394fn validate_delete_shape(
1395    builtin: &'static str,
1396    path: &Path,
1397    recursive: bool,
1398) -> Result<(), HostlibError> {
1399    let Ok(metadata) = stdfs::symlink_metadata(path) else {
1400        return Ok(());
1401    };
1402    if metadata.is_dir() && !recursive {
1403        let mut entries = stdfs::read_dir(path).map_err(|err| HostlibError::Backend {
1404            builtin,
1405            message: format!("read_dir `{}`: {err}", path.display()),
1406        })?;
1407        if entries.next().is_some() {
1408            return Err(HostlibError::Backend {
1409                builtin,
1410                message: format!(
1411                    "remove_dir `{}` (pass recursive=true to delete non-empty dirs): directory not empty",
1412                    path.display()
1413                ),
1414            });
1415        }
1416    }
1417    Ok(())
1418}
1419
1420fn status_from_state(state: &SessionState) -> StagedStatus {
1421    let now = now_ms();
1422    let mut pending_writes = Vec::new();
1423    let mut total_bytes_pending = 0u64;
1424    let mut oldest = None;
1425    for (path, entry) in &state.entries {
1426        total_bytes_pending = total_bytes_pending.saturating_add(entry.body_len());
1427        oldest = Some(oldest.map_or(entry.created_at_ms(), |old: i64| {
1428            old.min(entry.created_at_ms())
1429        }));
1430        let (kind, bytes_added, bytes_removed) = match entry {
1431            StagedEntry::Write { len, .. } => ("write", *len, disk_size(path).unwrap_or(0)),
1432            StagedEntry::Delete { .. } => ("delete", 0, disk_size(path).unwrap_or(0)),
1433        };
1434        pending_writes.push(PendingWrite {
1435            path: to_agent_path(path),
1436            kind,
1437            bytes_added,
1438            bytes_removed,
1439        });
1440    }
1441    StagedStatus {
1442        pending_writes,
1443        total_bytes_pending,
1444        oldest_pending_age_ms: oldest.map(|old| now.saturating_sub(old)).unwrap_or(0),
1445    }
1446}
1447
1448fn disk_size(path: &Path) -> Option<u64> {
1449    let metadata = stdfs::symlink_metadata(path).ok()?;
1450    if metadata.is_file() {
1451        return Some(metadata.len());
1452    }
1453    if metadata.is_dir() {
1454        let mut total = 0u64;
1455        for entry in walkdir::WalkDir::new(path)
1456            .into_iter()
1457            .filter_map(Result::ok)
1458        {
1459            if let Ok(metadata) = entry.metadata() {
1460                if metadata.is_file() {
1461                    total = total.saturating_add(metadata.len());
1462                }
1463            }
1464        }
1465        return Some(total);
1466    }
1467    Some(metadata.len())
1468}
1469
1470fn selected_paths(state: &SessionState, paths: &[String]) -> Vec<PathBuf> {
1471    if paths.is_empty() {
1472        return state.entries.keys().cloned().collect();
1473    }
1474    let selected: BTreeSet<PathBuf> = paths
1475        .iter()
1476        .map(|path| normalize_logical(Path::new(path)))
1477        .collect();
1478    state
1479        .entries
1480        .keys()
1481        .filter(|path| selected.contains(*path))
1482        .cloned()
1483        .collect()
1484}
1485
1486fn active_session_id(explicit: Option<&str>) -> Option<String> {
1487    explicit
1488        .map(str::to_string)
1489        .or_else(harn_vm::agent_sessions::current_session_id)
1490        .filter(|id| !id.trim().is_empty())
1491}
1492
1493fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
1494    if session_id.trim().is_empty() {
1495        return Err(HostlibError::InvalidParameter {
1496            builtin,
1497            param: "session_id",
1498            message: "must not be empty".to_string(),
1499        });
1500    }
1501    Ok(())
1502}
1503
1504fn default_root() -> PathBuf {
1505    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1506}
1507
1508fn session_dir(root: &Path, session_id: &str) -> PathBuf {
1509    let mut dir = root.to_path_buf();
1510    for component in STATE_REL {
1511        dir.push(component);
1512    }
1513    dir.push(sanitize_component(session_id));
1514    dir
1515}
1516
1517fn manifest_path(root: &Path, session_id: &str) -> PathBuf {
1518    session_dir(root, session_id).join("manifest.json")
1519}
1520
1521fn sanitize_component(input: &str) -> String {
1522    let sanitized: String = input
1523        .chars()
1524        .map(|ch| match ch {
1525            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
1526            _ => '_',
1527        })
1528        .collect();
1529    // `.` is allowed inside a name, but a component that is empty or *only*
1530    // dots (`.`, `..`, `...`) is a path-traversal / current-dir token, not a
1531    // safe single component — `session_dir`'s `dir.push("..")` would escape
1532    // the staged-state root. Force the hashed form so the result is always a
1533    // genuine, traversal-free directory name.
1534    let is_dotted = sanitized.is_empty() || sanitized.bytes().all(|b| b == b'.');
1535    if sanitized == input && !is_dotted {
1536        sanitized
1537    } else {
1538        let hash = hex::encode(Sha256::digest(input.as_bytes()));
1539        format!("{sanitized}-{}", &hash[..12])
1540    }
1541}
1542
1543fn normalize_logical(path: &Path) -> PathBuf {
1544    let absolute = if path.is_absolute() {
1545        path.to_path_buf()
1546    } else {
1547        default_root().join(path)
1548    };
1549    let mut out = PathBuf::new();
1550    for component in absolute.components() {
1551        match component {
1552            Component::ParentDir => {
1553                out.pop();
1554            }
1555            Component::CurDir => {}
1556            other => out.push(other),
1557        }
1558    }
1559    out
1560}
1561
1562fn not_found(path: &Path) -> std::io::Error {
1563    std::io::Error::new(
1564        std::io::ErrorKind::NotFound,
1565        format!("staged fs: {} is deleted or absent", path.display()),
1566    )
1567}
1568
1569fn now_ms() -> i64 {
1570    std::time::SystemTime::now()
1571        .duration_since(std::time::UNIX_EPOCH)
1572        .map(|duration| duration.as_millis() as i64)
1573        .unwrap_or(0)
1574}
1575
1576fn emit_staged_update(state: &SessionState) {
1577    let status = status_from_state(state);
1578    harn_vm::agent_events::emit_event(&AgentEvent::StagedWritesPending {
1579        session_id: state.session_id.clone(),
1580        pending_count: status.pending_writes.len(),
1581        total_bytes: status.total_bytes_pending,
1582    });
1583}
1584
1585fn pending_write_to_value(write: PendingWrite) -> VmValue {
1586    build_dict([
1587        ("path", str_value(&write.path)),
1588        ("kind", str_value(write.kind)),
1589        ("bytes_added", VmValue::Int(write.bytes_added as i64)),
1590        ("bytes_removed", VmValue::Int(write.bytes_removed as i64)),
1591    ])
1592}
1593
1594fn status_to_value(status: StagedStatus) -> VmValue {
1595    build_dict([
1596        (
1597            "pending_writes",
1598            VmValue::List(Arc::new(
1599                status
1600                    .pending_writes
1601                    .into_iter()
1602                    .map(pending_write_to_value)
1603                    .collect(),
1604            )),
1605        ),
1606        (
1607            "total_bytes_pending",
1608            VmValue::Int(status.total_bytes_pending as i64),
1609        ),
1610        (
1611            "oldest_pending_age_ms",
1612            VmValue::Int(status.oldest_pending_age_ms),
1613        ),
1614    ])
1615}
1616
1617fn commit_result_to_value(result: CommitResult) -> VmValue {
1618    build_dict([
1619        (
1620            "committed_paths",
1621            VmValue::List(Arc::new(
1622                result
1623                    .committed_paths
1624                    .into_iter()
1625                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1626                    .collect(),
1627            )),
1628        ),
1629        (
1630            "failed_paths_with_reasons",
1631            VmValue::List(Arc::new(
1632                result
1633                    .failed_paths_with_reasons
1634                    .into_iter()
1635                    .map(|(path, reason)| {
1636                        build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
1637                    })
1638                    .collect(),
1639            )),
1640        ),
1641    ])
1642}
1643
1644fn discard_result_to_value(result: DiscardResult) -> VmValue {
1645    build_dict([(
1646        "discarded_paths",
1647        VmValue::List(Arc::new(
1648            result
1649                .discarded_paths
1650                .into_iter()
1651                .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1652                .collect(),
1653        )),
1654    )])
1655}
1656
1657#[cfg(test)]
1658mod staged_path_tests {
1659    use super::{set_mode, stage_write_or_none, staged_pending_paths, staged_status, FsMode};
1660    use tempfile::tempdir;
1661
1662    #[test]
1663    fn staged_pending_paths_preserve_native_filesystem_paths() {
1664        let dir = tempdir().expect("tempdir");
1665        let root = dir.path().canonicalize().expect("canonical tempdir");
1666        let path = root.join("src").join("lib.rs");
1667        let session_id = format!(
1668            "native-paths-{}-{}",
1669            std::process::id(),
1670            std::thread::current().name().unwrap_or("test")
1671        );
1672
1673        set_mode(&session_id, FsMode::Staged, Some(&root)).expect("set staged mode");
1674        stage_write_or_none(
1675            "test",
1676            &path,
1677            b"fn beta() {}\n",
1678            true,
1679            true,
1680            Some(&session_id),
1681        )
1682        .expect("stage write")
1683        .expect("staged write");
1684
1685        let native_paths = staged_pending_paths(&session_id).expect("pending paths");
1686        assert_eq!(
1687            native_paths,
1688            std::collections::BTreeSet::from([path.clone()])
1689        );
1690
1691        let status = staged_status(&session_id).expect("staged status");
1692        assert_eq!(status.pending_writes.len(), 1);
1693        assert_eq!(
1694            status.pending_writes[0].path,
1695            crate::tools::args::to_agent_path(&path)
1696        );
1697
1698        let _ = super::remove_session_state(&session_id, Some(&root));
1699    }
1700}
1701
1702#[cfg(test)]
1703mod sanitize_tests {
1704    use super::{sanitize_component, session_dir, STATE_REL};
1705    use std::path::{Component, Path};
1706
1707    #[test]
1708    fn dotted_session_ids_are_never_traversal_tokens() {
1709        // `.`, `..`, `...` must not survive verbatim — otherwise
1710        // `session_dir`'s `dir.push(..)` escapes the staged-state root.
1711        for evil in ["..", ".", "...", ""] {
1712            let safe = sanitize_component(evil);
1713            assert_ne!(safe, evil, "`{evil}` passed through unsanitized");
1714            assert!(
1715                !safe.bytes().all(|b| b == b'.'),
1716                "`{evil}` -> `{safe}` is still all dots"
1717            );
1718            // The result is a single normal component (no ParentDir/CurDir).
1719            let comps: Vec<_> = Path::new(&safe).components().collect();
1720            assert!(
1721                comps.iter().all(|c| matches!(c, Component::Normal(_))),
1722                "`{safe}` contains a traversal component"
1723            );
1724        }
1725    }
1726
1727    #[test]
1728    fn ordinary_session_ids_pass_through() {
1729        assert_eq!(sanitize_component("abc-123_v2.0"), "abc-123_v2.0");
1730    }
1731
1732    #[test]
1733    fn session_dir_stays_under_staged_root() {
1734        let dir = session_dir(Path::new("/workspace"), "..");
1735        // No path component resolves above the staged dir.
1736        assert!(
1737            !dir.components().any(|c| matches!(c, Component::ParentDir)),
1738            "session_dir({dir:?}) escapes via `..`"
1739        );
1740        let mut staged = std::path::PathBuf::from("/workspace");
1741        staged.extend(STATE_REL);
1742        assert!(dir.starts_with(&staged), "{dir:?} not under {staged:?}");
1743    }
1744}