Skip to main content

harn_hostlib/
fs.rs

1//! Session-scoped staged filesystem mode.
2//!
3//! `hostlib_fs_set_mode({session_id, mode: "staged"})` makes hostlib file
4//! mutations land in a durable per-session overlay under
5//! `.harn/state/staged/<session_id>/`. Reads made by the same session consult
6//! that overlay first, so agent loops see their own pending writes without
7//! touching the working tree until `hostlib_fs_commit_staged`.
8
9use std::collections::{BTreeMap, BTreeSet};
10use std::fs as stdfs;
11use std::io::Write;
12use std::path::{Component, Path, PathBuf};
13use std::sync::Arc;
14use std::sync::{Mutex, OnceLock};
15
16use harn_vm::agent_events::AgentEvent;
17use harn_vm::process_sandbox::{check_fs_path_scope, FsAccess};
18use harn_vm::VmValue;
19use serde::{Deserialize, Serialize};
20use sha2::{Digest, Sha256};
21
22use crate::error::HostlibError;
23use crate::registry::{BuiltinRegistry, HostlibCapability};
24use crate::tools::args::{
25    build_dict, dict_arg, optional_bool, optional_int, optional_string, optional_string_list,
26    require_string, str_value,
27};
28use crate::tools::permissions::enforce_path_scope;
29
30const SET_MODE_BUILTIN: &str = "hostlib_fs_set_mode";
31const STATUS_BUILTIN: &str = "hostlib_fs_staged_status";
32const COMMIT_BUILTIN: &str = "hostlib_fs_commit_staged";
33const DISCARD_BUILTIN: &str = "hostlib_fs_discard_staged";
34const SAFE_TEXT_PATCH_BUILTIN: &str = "hostlib_fs_safe_text_patch";
35const READ_TEXT_BUILTIN: &str = "hostlib_fs_read_text";
36const EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN: &str = "hostlib_fs_emit_safe_text_patch_result";
37
38const MANIFEST_VERSION: u32 = 1;
39const STATE_REL: &[&str] = &[".harn", "state", "staged"];
40
41/// Hostlib filesystem capability handle.
42#[derive(Default)]
43pub struct FsCapability;
44
45impl HostlibCapability for FsCapability {
46    fn module_name(&self) -> &'static str {
47        "fs"
48    }
49
50    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
51        registry.register_fn("fs", SET_MODE_BUILTIN, "set_mode", set_mode_builtin);
52        registry.register_fn("fs", STATUS_BUILTIN, "staged_status", staged_status_builtin);
53        registry.register_fn("fs", COMMIT_BUILTIN, "commit_staged", commit_staged_builtin);
54        registry.register_fn(
55            "fs",
56            DISCARD_BUILTIN,
57            "discard_staged",
58            discard_staged_builtin,
59        );
60        // `safe_text_patch` and `read_text` touch arbitrary host paths, so
61        // they share the deterministic-tools gate with `tools::*` file I/O.
62        registry.register_gated_fn(
63            "fs",
64            SAFE_TEXT_PATCH_BUILTIN,
65            "safe_text_patch",
66            safe_text_patch_builtin,
67        );
68        registry.register_gated_fn("fs", READ_TEXT_BUILTIN, "read_text", read_text_builtin);
69        registry.register_fn(
70            "fs",
71            EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
72            "emit_safe_text_patch_result",
73            emit_safe_text_patch_result_builtin,
74        );
75    }
76}
77
78/// Filesystem mode for one hostlib session.
79#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
80#[serde(rename_all = "lowercase")]
81pub enum FsMode {
82    /// Mutations apply to the working tree immediately.
83    Immediate,
84    /// Mutations are recorded in the staging layer until committed.
85    Staged,
86}
87
88impl FsMode {
89    fn parse(builtin: &'static str, raw: &str) -> Result<Self, HostlibError> {
90        match raw {
91            "immediate" => Ok(Self::Immediate),
92            "staged" => Ok(Self::Staged),
93            other => Err(HostlibError::InvalidParameter {
94                builtin,
95                param: "mode",
96                message: format!("expected \"immediate\" or \"staged\", got `{other}`"),
97            }),
98        }
99    }
100
101    /// Wire string used by hostlib schemas.
102    pub fn as_str(self) -> &'static str {
103        match self {
104            Self::Immediate => "immediate",
105            Self::Staged => "staged",
106        }
107    }
108}
109
110#[derive(Clone, Debug, Serialize, Deserialize)]
111struct Manifest {
112    version: u32,
113    session_id: String,
114    mode: FsMode,
115    root: String,
116    entries: BTreeMap<String, StagedEntry>,
117}
118
119#[derive(Clone, Debug, Serialize, Deserialize)]
120#[serde(tag = "kind", rename_all = "snake_case")]
121enum StagedEntry {
122    Write {
123        body_hash: String,
124        len: u64,
125        created_at_ms: i64,
126    },
127    Delete {
128        recursive: bool,
129        created_at_ms: i64,
130    },
131}
132
133impl StagedEntry {
134    fn created_at_ms(&self) -> i64 {
135        match self {
136            Self::Write { created_at_ms, .. } | Self::Delete { created_at_ms, .. } => {
137                *created_at_ms
138            }
139        }
140    }
141
142    fn body_len(&self) -> u64 {
143        match self {
144            Self::Write { len, .. } => *len,
145            Self::Delete { .. } => 0,
146        }
147    }
148}
149
150#[derive(Clone, Debug)]
151struct SessionState {
152    session_id: String,
153    mode: FsMode,
154    root: PathBuf,
155    entries: BTreeMap<PathBuf, StagedEntry>,
156}
157
158#[derive(Clone, Debug)]
159pub(crate) struct WriteOutcome {
160    pub(crate) created: bool,
161    pub(crate) bytes_written: usize,
162}
163
164#[derive(Clone, Debug)]
165pub(crate) struct OverlayDirEntry {
166    pub(crate) name: String,
167    pub(crate) is_dir: bool,
168    pub(crate) is_symlink: bool,
169    pub(crate) size: u64,
170}
171
172/// Summary of staged filesystem changes for one session.
173#[derive(Clone, Debug)]
174pub struct StagedStatus {
175    /// Pending path changes, sorted by path.
176    pub pending_writes: Vec<PendingWrite>,
177    /// Bytes stored in staged write bodies.
178    pub total_bytes_pending: u64,
179    /// Age in milliseconds of the oldest pending change, or 0 when empty.
180    pub oldest_pending_age_ms: i64,
181}
182
183#[derive(Clone, Debug)]
184/// One pending staged filesystem change.
185pub struct PendingWrite {
186    /// Absolute path affected by this staged change.
187    pub path: String,
188    /// Change kind (`write`, `delete`, or reserved future `move`).
189    pub kind: &'static str,
190    /// Bytes the final staged view adds at this path.
191    pub bytes_added: u64,
192    /// Bytes the final staged view removes at this path.
193    pub bytes_removed: u64,
194}
195
196/// Result returned after changing a session's filesystem mode.
197#[derive(Clone, Debug)]
198pub struct SetModeResult {
199    /// Mode active before the change.
200    pub previous_mode: FsMode,
201}
202
203/// Result returned after applying staged changes to disk.
204#[derive(Clone, Debug)]
205pub struct CommitResult {
206    /// Paths successfully applied to disk.
207    pub committed_paths: Vec<String>,
208    /// Paths that failed to apply, with human-readable reasons.
209    pub failed_paths_with_reasons: Vec<(String, String)>,
210}
211
212/// Result returned after dropping staged changes.
213#[derive(Clone, Debug)]
214pub struct DiscardResult {
215    /// Paths whose staged entries were removed.
216    pub discarded_paths: Vec<String>,
217}
218
219static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionState>>> = OnceLock::new();
220
221fn sessions() -> &'static Mutex<BTreeMap<String, SessionState>> {
222    SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
223}
224
225/// Lock the session map, panicking with one canonical message if a prior
226/// holder poisoned the mutex. Every accessor goes through here so the poison
227/// policy and message live in exactly one place.
228fn lock_sessions() -> std::sync::MutexGuard<'static, BTreeMap<String, SessionState>> {
229    sessions()
230        .lock()
231        .expect("hostlib fs session mutex poisoned")
232}
233
234/// Remember the workspace root associated with a live session.
235///
236/// ACP calls this when a prompt starts so Harn code can call
237/// `hostlib_fs_set_mode({session_id, mode})` without also passing a root.
238pub fn configure_session_root(session_id: &str, root: &Path) {
239    if session_id.trim().is_empty() {
240        return;
241    }
242    let root = normalize_logical(root);
243    let mut guard = lock_sessions();
244    match guard.get_mut(session_id) {
245        Some(state) if state.entries.is_empty() => {
246            state.root = root;
247        }
248        Some(_) => {}
249        None => {
250            let state = load_state(session_id, Some(root.clone())).unwrap_or(SessionState {
251                session_id: session_id.to_string(),
252                mode: FsMode::Immediate,
253                root,
254                entries: BTreeMap::new(),
255            });
256            guard.insert(session_id.to_string(), state);
257        }
258    }
259}
260
261/// Return the root currently associated with a hostlib session.
262pub fn configured_session_root(session_id: &str) -> Option<PathBuf> {
263    if session_id.trim().is_empty() {
264        return None;
265    }
266    let guard = lock_sessions();
267    guard.get(session_id).map(|state| state.root.clone())
268}
269
270/// Set a session's filesystem mode.
271pub fn set_mode(
272    session_id: &str,
273    mode: FsMode,
274    root: Option<&Path>,
275) -> Result<SetModeResult, HostlibError> {
276    validate_session_id(SET_MODE_BUILTIN, session_id)?;
277    let mut guard = lock_sessions();
278    let mut state = state_for_locked(&mut guard, session_id, root.map(normalize_logical))?;
279    let previous_mode = state.mode;
280    state.mode = mode;
281    persist_state(&state, "set_mode", None).map_err(|err| HostlibError::Backend {
282        builtin: SET_MODE_BUILTIN,
283        message: err,
284    })?;
285    guard.insert(session_id.to_string(), state);
286    Ok(SetModeResult { previous_mode })
287}
288
289/// Return the staged status for a session.
290pub fn staged_status(session_id: &str) -> Result<StagedStatus, HostlibError> {
291    validate_session_id(STATUS_BUILTIN, session_id)?;
292    let mut guard = lock_sessions();
293    let state = state_for_locked(&mut guard, session_id, None)?;
294    let status = status_from_state(&state);
295    guard.insert(session_id.to_string(), state);
296    Ok(status)
297}
298
299/// Commit staged changes for all paths or for a filtered path list.
300pub fn commit_staged(session_id: &str, paths: &[String]) -> Result<CommitResult, HostlibError> {
301    validate_session_id(COMMIT_BUILTIN, session_id)?;
302    let mut guard = lock_sessions();
303    let mut state = state_for_locked(&mut guard, session_id, None)?;
304    let selected = selected_paths(&state, paths);
305    let mut committed_paths = Vec::new();
306    let mut failed_paths_with_reasons = Vec::new();
307
308    for path in selected {
309        let Some(entry) = state.entries.get(&path).cloned() else {
310            continue;
311        };
312        let path_label = path.to_string_lossy().into_owned();
313        // The overlay always lives inside the workspace, but commit flushes
314        // to the *target* working-tree path. Enforce workspace-root scope
315        // against that target so a staged entry — possibly persisted under
316        // a looser policy in an earlier session — can never write outside
317        // the roots active at commit time.
318        let access = match entry {
319            StagedEntry::Write { .. } => FsAccess::Write,
320            StagedEntry::Delete { .. } => FsAccess::Delete,
321        };
322        if let Err(violation) = check_fs_path_scope(&path, access) {
323            failed_paths_with_reasons.push((path_label, violation.message(COMMIT_BUILTIN)));
324            continue;
325        }
326        match commit_entry(&state, &path, &entry) {
327            Ok(()) => {
328                state.entries.remove(&path);
329                committed_paths.push(path_label);
330            }
331            Err(reason) => failed_paths_with_reasons.push((path_label, reason)),
332        }
333    }
334
335    persist_state(&state, "commit_staged", None).map_err(|err| HostlibError::Backend {
336        builtin: COMMIT_BUILTIN,
337        message: err,
338    })?;
339    emit_staged_update(&state);
340    guard.insert(session_id.to_string(), state);
341    Ok(CommitResult {
342        committed_paths,
343        failed_paths_with_reasons,
344    })
345}
346
347/// Discard staged changes for all paths or for a filtered path list.
348pub fn discard_staged(session_id: &str, paths: &[String]) -> Result<DiscardResult, HostlibError> {
349    validate_session_id(DISCARD_BUILTIN, session_id)?;
350    let mut guard = lock_sessions();
351    let mut state = state_for_locked(&mut guard, session_id, None)?;
352    let selected = selected_paths(&state, paths);
353    let mut discarded_paths = Vec::new();
354    for path in selected {
355        if state.entries.remove(&path).is_some() {
356            discarded_paths.push(path.to_string_lossy().into_owned());
357        }
358    }
359    persist_state(&state, "discard_staged", None).map_err(|err| HostlibError::Backend {
360        builtin: DISCARD_BUILTIN,
361        message: err,
362    })?;
363    emit_staged_update(&state);
364    guard.insert(session_id.to_string(), state);
365    Ok(DiscardResult { discarded_paths })
366}
367
368/// Remove all persisted staged-fs state for a caller-owned throw-away session.
369///
370/// Normal agent sessions keep their manifest after `discard_staged` so hosts can
371/// continue reporting session state. Transient dry-run sessions own their ids,
372/// though, and should remove both the in-memory entry and on-disk overlay after
373/// their preview is rendered.
374pub fn remove_session_state(session_id: &str, root: Option<&Path>) -> Result<(), HostlibError> {
375    validate_session_id(DISCARD_BUILTIN, session_id)?;
376    let mut guard = lock_sessions();
377    let state = match guard.remove(session_id) {
378        Some(state) => state,
379        None => load_state(session_id, root.map(normalize_logical)).map_err(|err| {
380            HostlibError::Backend {
381                builtin: DISCARD_BUILTIN,
382                message: err,
383            }
384        })?,
385    };
386    let dir = session_dir(&state.root, &state.session_id);
387    match stdfs::remove_dir_all(&dir) {
388        Ok(()) => Ok(()),
389        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
390        Err(err) => Err(HostlibError::Backend {
391            builtin: DISCARD_BUILTIN,
392            message: format!("remove staged session {}: {err}", dir.display()),
393        }),
394    }
395}
396
397pub(crate) fn read(
398    path: &Path,
399    explicit_session_id: Option<&str>,
400) -> Option<std::io::Result<Vec<u8>>> {
401    let session_id = active_session_id(explicit_session_id)?;
402    let mut guard = lock_sessions();
403    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
404    let result = if state.mode == FsMode::Staged {
405        overlay_read(&state, path)
406    } else {
407        None
408    };
409    guard.insert(session_id, state);
410    result
411}
412
413pub(crate) fn read_to_string(
414    path: &Path,
415    explicit_session_id: Option<&str>,
416) -> Option<std::io::Result<String>> {
417    read(path, explicit_session_id).map(|result| {
418        result.and_then(|bytes| {
419            String::from_utf8(bytes).map_err(|err| {
420                std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string())
421            })
422        })
423    })
424}
425
426pub(crate) fn read_dir(
427    path: &Path,
428    explicit_session_id: Option<&str>,
429) -> Option<std::io::Result<Vec<OverlayDirEntry>>> {
430    let session_id = active_session_id(explicit_session_id)?;
431    let mut guard = lock_sessions();
432    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
433    let result = if state.mode == FsMode::Staged {
434        Some(overlay_read_dir(&state, path))
435    } else {
436        None
437    };
438    guard.insert(session_id, state);
439    result
440}
441
442pub(crate) fn stage_write_or_none(
443    builtin: &'static str,
444    path: &Path,
445    bytes: &[u8],
446    create_parents: bool,
447    overwrite: bool,
448    explicit_session_id: Option<&str>,
449) -> Result<Option<WriteOutcome>, HostlibError> {
450    let Some(session_id) = active_session_id(explicit_session_id) else {
451        return Ok(None);
452    };
453    let mut guard = lock_sessions();
454    let mut state = state_for_locked(&mut guard, &session_id, None)?;
455    if state.mode != FsMode::Staged {
456        guard.insert(session_id, state);
457        return Ok(None);
458    }
459
460    let key = normalize_logical(path);
461    let existed = overlay_exists(&state, &key);
462    if existed && !overwrite {
463        guard.insert(session_id, state);
464        return Err(HostlibError::Backend {
465            builtin,
466            message: format!("`{}` exists and overwrite=false", key.display()),
467        });
468    }
469    if !create_parents && !parent_exists(&state, &key) {
470        guard.insert(session_id, state);
471        return Err(HostlibError::Backend {
472            builtin,
473            message: format!("parent directory for `{}` does not exist", key.display()),
474        });
475    }
476
477    let hash = write_body(&state, bytes).map_err(|err| HostlibError::Backend {
478        builtin,
479        message: err,
480    })?;
481    state.entries.insert(
482        key.clone(),
483        StagedEntry::Write {
484            body_hash: hash,
485            len: bytes.len() as u64,
486            created_at_ms: now_ms(),
487        },
488    );
489    persist_state(&state, "write", Some(&key)).map_err(|err| HostlibError::Backend {
490        builtin,
491        message: err,
492    })?;
493    emit_staged_update(&state);
494    guard.insert(session_id, state);
495    Ok(Some(WriteOutcome {
496        created: !existed,
497        bytes_written: bytes.len(),
498    }))
499}
500
501pub(crate) fn stage_delete_or_none(
502    builtin: &'static str,
503    path: &Path,
504    recursive: bool,
505    explicit_session_id: Option<&str>,
506) -> Result<Option<bool>, HostlibError> {
507    let Some(session_id) = active_session_id(explicit_session_id) else {
508        return Ok(None);
509    };
510    let mut guard = lock_sessions();
511    let mut state = state_for_locked(&mut guard, &session_id, None)?;
512    if state.mode != FsMode::Staged {
513        guard.insert(session_id, state);
514        return Ok(None);
515    }
516
517    let key = normalize_logical(path);
518    let staged_targets = staged_paths_under(&state, &key);
519    let disk_exists = key.exists();
520    if !disk_exists && staged_targets.is_empty() {
521        guard.insert(session_id, state);
522        return Ok(Some(false));
523    }
524
525    if !disk_exists {
526        for staged in staged_targets {
527            state.entries.remove(&staged);
528        }
529    } else {
530        validate_delete_shape(builtin, &key, recursive)?;
531        for staged in staged_targets {
532            state.entries.remove(&staged);
533        }
534        state.entries.insert(
535            key.clone(),
536            StagedEntry::Delete {
537                recursive,
538                created_at_ms: now_ms(),
539            },
540        );
541    }
542    persist_state(&state, "delete", Some(&key)).map_err(|err| HostlibError::Backend {
543        builtin,
544        message: err,
545    })?;
546    emit_staged_update(&state);
547    guard.insert(session_id, state);
548    Ok(Some(true))
549}
550
551/// Outcome of one [`safe_text_patch`] call. `applied` says whether the
552/// on-disk (or staged-overlay) bytes changed; `result` carries the
553/// structured discriminant used by the wire/JSON shape.
554#[derive(Clone, Debug)]
555pub struct SafeTextPatchOutcome {
556    /// Discriminant: `"applied"`, `"stale_base"`, or `"no_op"`.
557    pub result: SafeTextPatchResult,
558    /// `sha256:HEX` of the pre-image (overlay-aware) the call observed.
559    pub current_hash: String,
560    /// `sha256:HEX` of the requested post-image.
561    pub after_hash: String,
562    /// `true` when the file did not exist before the call.
563    pub created: bool,
564    /// Bytes written; `0` on `stale_base` or `no_op`.
565    pub bytes_written: usize,
566}
567
568/// Discriminant for a [`safe_text_patch`] outcome.
569#[derive(Clone, Copy, Debug, Eq, PartialEq)]
570pub enum SafeTextPatchResult {
571    /// Pre-image hash matched (or no expected hash supplied) and the
572    /// post-image differs from the pre-image — bytes were written.
573    Applied,
574    /// `expected_hash` did not match the observed pre-image hash; no
575    /// bytes were written. Callers should re-read and retry.
576    StaleBase,
577    /// Pre-image hash matched and the post-image equals the pre-image —
578    /// skipped the write to avoid spurious timestamps and overlay churn.
579    NoOp,
580}
581
582impl SafeTextPatchResult {
583    fn as_str(self) -> &'static str {
584        match self {
585            Self::Applied => "applied",
586            Self::StaleBase => "stale_base",
587            Self::NoOp => "no_op",
588        }
589    }
590}
591
592/// Format `bytes` as the `sha256:HEX` label used in `before_sha256` /
593/// `after_sha256` / `current_hash` / `expected_hash` everywhere in the
594/// safe-text-patch surface.
595fn hash_label(bytes: &[u8]) -> String {
596    format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
597}
598
599/// Atomic compare-and-swap-style text write.
600///
601/// Reads the current bytes at `path` through the staged-fs overlay (when a
602/// session is active) so concurrent agent edits see each other's pending
603/// writes. If `expected_hash` is supplied and differs from the observed
604/// `sha256:HEX`, returns `SafeTextPatchResult::StaleBase` without
605/// mutating any state. On a hash match the post-image is written through
606/// the same overlay path, keeping the read and the write atomic with
607/// respect to other staged-fs consumers in the same process.
608///
609/// Atomicity:
610///
611/// - When a session is in staged mode, the read, hash check, and write
612///   all happen under a single acquisition of the sessions mutex, so a
613///   sibling thread cannot stage a write into the window between the
614///   pre-image snapshot and the commit.
615/// - When the call routes through disk (no active session, or session in
616///   immediate mode), the write goes through an atomic rename-into-place
617///   so partial-write tearing is impossible. Cross-process races are
618///   intentionally out of scope — the staged-fs overlay is the
619///   collision-rejection layer.
620pub fn safe_text_patch(
621    path: &Path,
622    content: &str,
623    expected_hash: Option<&str>,
624    session_id: Option<&str>,
625    create_parents: bool,
626    overwrite: bool,
627) -> Result<SafeTextPatchOutcome, HostlibError> {
628    let new_bytes = content.as_bytes();
629    let after_hash = hash_label(new_bytes);
630
631    if let Some(outcome) = safe_text_patch_staged(
632        path,
633        new_bytes,
634        expected_hash,
635        session_id,
636        create_parents,
637        overwrite,
638        &after_hash,
639    )? {
640        return Ok(outcome);
641    }
642
643    safe_text_patch_disk(
644        path,
645        new_bytes,
646        expected_hash,
647        create_parents,
648        overwrite,
649        after_hash,
650    )
651}
652
653/// Atomic CAS path for a session in `staged` mode. Holds the sessions
654/// mutex through the entire read → hash → check → write so concurrent
655/// agents in the same process cannot race the snapshot. Returns `None`
656/// when no session is active or the session is in `immediate` mode, so
657/// the caller can fall through to the disk path.
658#[allow(clippy::too_many_arguments)]
659fn safe_text_patch_staged(
660    path: &Path,
661    new_bytes: &[u8],
662    expected_hash: Option<&str>,
663    session_id: Option<&str>,
664    create_parents: bool,
665    overwrite: bool,
666    after_hash: &str,
667) -> Result<Option<SafeTextPatchOutcome>, HostlibError> {
668    let Some(session) = active_session_id(session_id) else {
669        return Ok(None);
670    };
671    let mut guard = lock_sessions();
672    let mut state = state_for_locked(&mut guard, &session, None)?;
673    if state.mode != FsMode::Staged {
674        guard.insert(session, state);
675        return Ok(None);
676    }
677
678    let key = normalize_logical(path);
679    let (existing_bytes, existed) = match overlay_read(&state, path) {
680        Some(Ok(bytes)) => (bytes, true),
681        Some(Err(err)) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
682        Some(Err(err)) => {
683            guard.insert(session, state);
684            return Err(HostlibError::Backend {
685                builtin: SAFE_TEXT_PATCH_BUILTIN,
686                message: format!("read `{}`: {err}", path.display()),
687            });
688        }
689        None => match stdfs::read(path) {
690            Ok(bytes) => (bytes, true),
691            Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
692            Err(err) => {
693                guard.insert(session, state);
694                return Err(HostlibError::Backend {
695                    builtin: SAFE_TEXT_PATCH_BUILTIN,
696                    message: format!("read `{}`: {err}", path.display()),
697                });
698            }
699        },
700    };
701    let current_hash = hash_label(&existing_bytes);
702
703    if let Some(expected) = expected_hash {
704        if expected != current_hash {
705            guard.insert(session, state);
706            return Ok(Some(SafeTextPatchOutcome {
707                result: SafeTextPatchResult::StaleBase,
708                current_hash,
709                after_hash: after_hash.to_string(),
710                created: false,
711                bytes_written: 0,
712            }));
713        }
714    }
715
716    if existed && existing_bytes == new_bytes {
717        guard.insert(session, state);
718        return Ok(Some(SafeTextPatchOutcome {
719            result: SafeTextPatchResult::NoOp,
720            current_hash,
721            after_hash: after_hash.to_string(),
722            created: false,
723            bytes_written: 0,
724        }));
725    }
726
727    let overlay_existed = overlay_exists(&state, &key);
728    if overlay_existed && !overwrite {
729        guard.insert(session, state);
730        return Err(HostlibError::Backend {
731            builtin: SAFE_TEXT_PATCH_BUILTIN,
732            message: format!("`{}` exists and overwrite=false", key.display()),
733        });
734    }
735    if !create_parents && !parent_exists(&state, &key) {
736        guard.insert(session, state);
737        return Err(HostlibError::Backend {
738            builtin: SAFE_TEXT_PATCH_BUILTIN,
739            message: format!("parent directory for `{}` does not exist", key.display()),
740        });
741    }
742
743    let body_hash = write_body(&state, new_bytes).map_err(|err| HostlibError::Backend {
744        builtin: SAFE_TEXT_PATCH_BUILTIN,
745        message: err,
746    })?;
747    state.entries.insert(
748        key.clone(),
749        StagedEntry::Write {
750            body_hash,
751            len: new_bytes.len() as u64,
752            created_at_ms: now_ms(),
753        },
754    );
755    persist_state(&state, "safe_text_patch", Some(&key)).map_err(|err| HostlibError::Backend {
756        builtin: SAFE_TEXT_PATCH_BUILTIN,
757        message: err,
758    })?;
759    emit_staged_update(&state);
760    guard.insert(session, state);
761
762    Ok(Some(SafeTextPatchOutcome {
763        result: SafeTextPatchResult::Applied,
764        current_hash,
765        after_hash: after_hash.to_string(),
766        created: !existed,
767        bytes_written: new_bytes.len(),
768    }))
769}
770
771/// Disk path for callers without an active staged session. Uses
772/// `atomic_write` so the post-image lands via rename-into-place rather
773/// than an open/truncate/write/close sequence — readers either see the
774/// pre-image or the post-image, never a torn write.
775fn safe_text_patch_disk(
776    path: &Path,
777    new_bytes: &[u8],
778    expected_hash: Option<&str>,
779    create_parents: bool,
780    overwrite: bool,
781    after_hash: String,
782) -> Result<SafeTextPatchOutcome, HostlibError> {
783    let (existing_bytes, existed) = match stdfs::read(path) {
784        Ok(bytes) => (bytes, true),
785        Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
786        Err(err) => {
787            return Err(HostlibError::Backend {
788                builtin: SAFE_TEXT_PATCH_BUILTIN,
789                message: format!("read `{}`: {err}", path.display()),
790            });
791        }
792    };
793    let current_hash = hash_label(&existing_bytes);
794
795    if let Some(expected) = expected_hash {
796        if expected != current_hash {
797            return Ok(SafeTextPatchOutcome {
798                result: SafeTextPatchResult::StaleBase,
799                current_hash,
800                after_hash,
801                created: false,
802                bytes_written: 0,
803            });
804        }
805    }
806
807    if existed && existing_bytes == new_bytes {
808        return Ok(SafeTextPatchOutcome {
809            result: SafeTextPatchResult::NoOp,
810            current_hash,
811            after_hash,
812            created: false,
813            bytes_written: 0,
814        });
815    }
816    if existed && !overwrite {
817        return Err(HostlibError::Backend {
818            builtin: SAFE_TEXT_PATCH_BUILTIN,
819            message: format!("`{}` exists and overwrite=false", path.display()),
820        });
821    }
822    if !create_parents {
823        if let Some(parent) = path.parent() {
824            if !parent.as_os_str().is_empty() && !parent.is_dir() {
825                return Err(HostlibError::Backend {
826                    builtin: SAFE_TEXT_PATCH_BUILTIN,
827                    message: format!(
828                        "parent directory for `{}` does not exist (pass create_parents=true to mkdir)",
829                        path.display()
830                    ),
831                });
832            }
833        }
834    }
835
836    crate::fs_snapshot::auto_capture_for_write(SAFE_TEXT_PATCH_BUILTIN, path);
837    atomic_write(path, new_bytes).map_err(|err| HostlibError::Backend {
838        builtin: SAFE_TEXT_PATCH_BUILTIN,
839        message: format!("write `{}`: {err}", path.display()),
840    })?;
841
842    Ok(SafeTextPatchOutcome {
843        result: SafeTextPatchResult::Applied,
844        current_hash,
845        after_hash,
846        created: !existed,
847        bytes_written: new_bytes.len(),
848    })
849}
850
851/// Read the pre-image through the staged-fs overlay (when active),
852/// falling back to disk. Returns `(bytes, existed_on_disk_or_overlay)`.
853/// `builtin` is the caller's tag — used so backend errors point at the
854/// right builtin name in diagnostics.
855fn read_existing(
856    builtin: &'static str,
857    path: &Path,
858    session_id: Option<&str>,
859) -> Result<(Vec<u8>, bool), HostlibError> {
860    if let Some(result) = read(path, session_id) {
861        return match result {
862            Ok(bytes) => Ok((bytes, true)),
863            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
864            Err(err) => Err(HostlibError::Backend {
865                builtin,
866                message: format!("read `{}`: {err}", path.display()),
867            }),
868        };
869    }
870    match stdfs::read(path) {
871        Ok(bytes) => Ok((bytes, true)),
872        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
873        Err(err) => Err(HostlibError::Backend {
874            builtin,
875            message: format!("read `{}`: {err}", path.display()),
876        }),
877    }
878}
879
880fn read_text_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
881    let raw = dict_arg(READ_TEXT_BUILTIN, args)?;
882    let dict = raw.as_ref();
883    let path_str = require_string(READ_TEXT_BUILTIN, dict, "path")?;
884    let session_id = optional_string(READ_TEXT_BUILTIN, dict, "session_id")?;
885    let path = Path::new(&path_str);
886    enforce_path_scope(READ_TEXT_BUILTIN, path, FsAccess::Read)?;
887
888    let (bytes, existed) = read_existing(READ_TEXT_BUILTIN, path, session_id.as_deref())?;
889    let hash = hash_label(&bytes);
890    let content = match std::str::from_utf8(&bytes) {
891        Ok(s) => s.to_string(),
892        Err(err) => {
893            return Err(HostlibError::Backend {
894                builtin: READ_TEXT_BUILTIN,
895                message: format!("`{path_str}` is not valid UTF-8: {err}"),
896            });
897        }
898    };
899    let bytes_len = bytes.len() as i64;
900    Ok(build_dict([
901        ("path", str_value(&path_str)),
902        ("content", str_value(&content)),
903        ("sha256", str_value(&hash)),
904        ("size", VmValue::Int(bytes_len)),
905        ("exists", VmValue::Bool(existed)),
906    ]))
907}
908
909fn safe_text_patch_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
910    let raw = dict_arg(SAFE_TEXT_PATCH_BUILTIN, args)?;
911    let dict = raw.as_ref();
912
913    let path_str = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "path")?;
914    let content = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "content")?;
915    let expected_hash = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "expected_hash")?;
916    let session_id = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "session_id")?;
917    let create_parents = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "create_parents", true)?;
918    let overwrite = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "overwrite", true)?;
919
920    enforce_path_scope(
921        SAFE_TEXT_PATCH_BUILTIN,
922        Path::new(&path_str),
923        FsAccess::Write,
924    )?;
925    let outcome = safe_text_patch(
926        Path::new(&path_str),
927        &content,
928        expected_hash.as_deref(),
929        session_id.as_deref(),
930        create_parents,
931        overwrite,
932    )?;
933
934    let entries: Vec<(&'static str, VmValue)> = vec![
935        ("path", str_value(&path_str)),
936        ("result", str_value(outcome.result.as_str())),
937        (
938            "applied",
939            VmValue::Bool(outcome.result == SafeTextPatchResult::Applied),
940        ),
941        (
942            "stale_base",
943            VmValue::Bool(outcome.result == SafeTextPatchResult::StaleBase),
944        ),
945        ("current_hash", str_value(&outcome.current_hash)),
946        ("before_sha256", str_value(&outcome.current_hash)),
947        ("after_sha256", str_value(&outcome.after_hash)),
948        ("created", VmValue::Bool(outcome.created)),
949        ("bytes_written", VmValue::Int(outcome.bytes_written as i64)),
950        (
951            "expected_hash",
952            match expected_hash.as_deref() {
953                Some(hash) => str_value(hash),
954                None => VmValue::Nil,
955            },
956        ),
957    ];
958    Ok(build_dict(entries))
959}
960
961fn emit_safe_text_patch_result_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
962    let raw = dict_arg(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, args)?;
963    let dict = raw.as_ref();
964
965    let path = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "path")?;
966    let result = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "result")?;
967    let hunks_count = optional_int(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "hunks_count", 0)?;
968    let bytes_written = optional_int(
969        EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
970        dict,
971        "bytes_written",
972        0,
973    )?;
974    let failed_hunk_index = match dict.get("failed_hunk_index") {
975        None | Some(VmValue::Nil) => None,
976        Some(VmValue::Int(n)) if *n >= 0 => Some(*n as usize),
977        Some(other) => {
978            return Err(HostlibError::InvalidParameter {
979                builtin: EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
980                param: "failed_hunk_index",
981                message: format!("expected non-negative integer, got {}", other.type_name()),
982            });
983        }
984    };
985    let session_id = optional_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "session_id")?
986        .or_else(harn_vm::agent_sessions::current_session_id);
987
988    if let Some(session_id) = session_id.filter(|s| !s.trim().is_empty()) {
989        harn_vm::agent_events::emit_event(&AgentEvent::SafeTextPatchResult {
990            session_id,
991            path,
992            result,
993            hunks_count: hunks_count.max(0) as usize,
994            bytes_written: bytes_written.max(0) as u64,
995            failed_hunk_index,
996        });
997        Ok(VmValue::Bool(true))
998    } else {
999        // Silently no-op when no session is active — telemetry without a
1000        // session has nowhere to route. Caller can opt in by always
1001        // passing session_id explicitly.
1002        Ok(VmValue::Bool(false))
1003    }
1004}
1005
1006fn set_mode_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1007    let raw = dict_arg(SET_MODE_BUILTIN, args)?;
1008    let dict = raw.as_ref();
1009    let session_id = require_string(SET_MODE_BUILTIN, dict, "session_id")?;
1010    let mode = FsMode::parse(
1011        SET_MODE_BUILTIN,
1012        &require_string(SET_MODE_BUILTIN, dict, "mode")?,
1013    )?;
1014    let root = optional_string(SET_MODE_BUILTIN, dict, "root")?.map(PathBuf::from);
1015    let result = set_mode(&session_id, mode, root.as_deref())?;
1016    Ok(build_dict([(
1017        "previous_mode",
1018        str_value(result.previous_mode.as_str()),
1019    )]))
1020}
1021
1022fn staged_status_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1023    let raw = dict_arg(STATUS_BUILTIN, args)?;
1024    let session_id = require_string(STATUS_BUILTIN, raw.as_ref(), "session_id")?;
1025    Ok(status_to_value(staged_status(&session_id)?))
1026}
1027
1028fn commit_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1029    let raw = dict_arg(COMMIT_BUILTIN, args)?;
1030    let dict = raw.as_ref();
1031    let session_id = require_string(COMMIT_BUILTIN, dict, "session_id")?;
1032    let paths = optional_string_list(COMMIT_BUILTIN, dict, "paths")?;
1033    Ok(commit_result_to_value(commit_staged(&session_id, &paths)?))
1034}
1035
1036fn discard_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1037    let raw = dict_arg(DISCARD_BUILTIN, args)?;
1038    let dict = raw.as_ref();
1039    let session_id = require_string(DISCARD_BUILTIN, dict, "session_id")?;
1040    let paths = optional_string_list(DISCARD_BUILTIN, dict, "paths")?;
1041    Ok(discard_result_to_value(discard_staged(
1042        &session_id,
1043        &paths,
1044    )?))
1045}
1046
1047fn state_for_locked(
1048    guard: &mut BTreeMap<String, SessionState>,
1049    session_id: &str,
1050    root: Option<PathBuf>,
1051) -> Result<SessionState, HostlibError> {
1052    if let Some(existing) = guard.get(session_id) {
1053        let mut state = existing.clone();
1054        if let Some(root) = root {
1055            if state.entries.is_empty() {
1056                state.root = root;
1057            }
1058        }
1059        return Ok(state);
1060    }
1061    let state = load_state(session_id, root).map_err(|err| HostlibError::Backend {
1062        builtin: SET_MODE_BUILTIN,
1063        message: err,
1064    })?;
1065    Ok(state)
1066}
1067
1068fn load_state(session_id: &str, root: Option<PathBuf>) -> Result<SessionState, String> {
1069    let root = root.unwrap_or_else(default_root);
1070    let manifest_path = manifest_path(&root, session_id);
1071    if manifest_path.exists() {
1072        let text = stdfs::read_to_string(&manifest_path)
1073            .map_err(|err| format!("read {}: {err}", manifest_path.display()))?;
1074        let manifest: Manifest = serde_json::from_str(&text)
1075            .map_err(|err| format!("parse {}: {err}", manifest_path.display()))?;
1076        if manifest.version != MANIFEST_VERSION {
1077            return Err(format!(
1078                "unsupported staged fs manifest version {} in {}",
1079                manifest.version,
1080                manifest_path.display()
1081            ));
1082        }
1083        if manifest.session_id != session_id {
1084            return Err(format!(
1085                "staged fs manifest session id mismatch in {}",
1086                manifest_path.display()
1087            ));
1088        }
1089        return Ok(SessionState {
1090            session_id: manifest.session_id,
1091            mode: manifest.mode,
1092            root: normalize_logical(Path::new(&manifest.root)),
1093            entries: manifest
1094                .entries
1095                .into_iter()
1096                .map(|(path, entry)| (normalize_logical(Path::new(&path)), entry))
1097                .collect(),
1098        });
1099    }
1100    Ok(SessionState {
1101        session_id: session_id.to_string(),
1102        mode: FsMode::Immediate,
1103        root,
1104        entries: BTreeMap::new(),
1105    })
1106}
1107
1108fn persist_state(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1109    let dir = session_dir(&state.root, &state.session_id);
1110    stdfs::create_dir_all(dir.join("bodies"))
1111        .map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1112    let manifest = Manifest {
1113        version: MANIFEST_VERSION,
1114        session_id: state.session_id.clone(),
1115        mode: state.mode,
1116        root: state.root.to_string_lossy().into_owned(),
1117        entries: state
1118            .entries
1119            .iter()
1120            .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
1121            .collect(),
1122    };
1123    let bytes = serde_json::to_vec_pretty(&manifest)
1124        .map_err(|err| format!("serialize staged manifest: {err}"))?;
1125    atomic_write(&manifest_path(&state.root, &state.session_id), &bytes)?;
1126    append_journal(state, op, path)?;
1127    prune_unreferenced_bodies(state);
1128    Ok(())
1129}
1130
1131fn append_journal(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1132    let dir = session_dir(&state.root, &state.session_id);
1133    stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1134    let line = serde_json::to_string(&serde_json::json!({
1135        "ts_ms": now_ms(),
1136        "op": op,
1137        "path": path.map(|path| path.to_string_lossy().into_owned()),
1138        "pending_count": state.entries.len(),
1139    }))
1140    .map_err(|err| format!("serialize staged journal: {err}"))?;
1141    let mut file = stdfs::OpenOptions::new()
1142        .create(true)
1143        .append(true)
1144        .open(dir.join("journal.jsonl"))
1145        .map_err(|err| format!("open staged journal: {err}"))?;
1146    writeln!(file, "{line}").map_err(|err| format!("write staged journal: {err}"))
1147}
1148
1149fn write_body(state: &SessionState, bytes: &[u8]) -> Result<String, String> {
1150    let hash = hex::encode(Sha256::digest(bytes));
1151    let path = session_dir(&state.root, &state.session_id)
1152        .join("bodies")
1153        .join(&hash);
1154    if !path.exists() {
1155        atomic_write(&path, bytes)?;
1156    }
1157    Ok(hash)
1158}
1159
1160fn read_body(state: &SessionState, hash: &str) -> std::io::Result<Vec<u8>> {
1161    stdfs::read(
1162        session_dir(&state.root, &state.session_id)
1163            .join("bodies")
1164            .join(hash),
1165    )
1166}
1167
1168fn prune_unreferenced_bodies(state: &SessionState) {
1169    let live: BTreeSet<String> = state
1170        .entries
1171        .values()
1172        .filter_map(|entry| match entry {
1173            StagedEntry::Write { body_hash, .. } => Some(body_hash.clone()),
1174            StagedEntry::Delete { .. } => None,
1175        })
1176        .collect();
1177    let body_dir = session_dir(&state.root, &state.session_id).join("bodies");
1178    let Ok(entries) = stdfs::read_dir(&body_dir) else {
1179        return;
1180    };
1181    for entry in entries.flatten() {
1182        let name = entry.file_name().to_string_lossy().into_owned();
1183        if !live.contains(&name) {
1184            let _ = stdfs::remove_file(entry.path());
1185        }
1186    }
1187}
1188
1189fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
1190    if let Some(parent) = path.parent() {
1191        stdfs::create_dir_all(parent)
1192            .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
1193    }
1194    let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
1195    stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
1196    match stdfs::rename(&tmp, path) {
1197        Ok(()) => Ok(()),
1198        Err(err) => {
1199            let _ = stdfs::remove_file(path);
1200            stdfs::rename(&tmp, path).map_err(|retry| {
1201                format!(
1202                    "rename {} to {}: {err}; retry: {retry}",
1203                    tmp.display(),
1204                    path.display()
1205                )
1206            })
1207        }
1208    }
1209}
1210
1211fn commit_entry(state: &SessionState, path: &Path, entry: &StagedEntry) -> Result<(), String> {
1212    match entry {
1213        StagedEntry::Write { body_hash, .. } => {
1214            let bytes = read_body(state, body_hash)
1215                .map_err(|err| format!("read staged body for {}: {err}", path.display()))?;
1216            atomic_write(path, &bytes)
1217        }
1218        StagedEntry::Delete { recursive, .. } => match stdfs::symlink_metadata(path) {
1219            Ok(metadata) if metadata.is_dir() => {
1220                if *recursive {
1221                    stdfs::remove_dir_all(path)
1222                        .map_err(|err| format!("remove_dir_all {}: {err}", path.display()))
1223                } else {
1224                    stdfs::remove_dir(path)
1225                        .map_err(|err| format!("remove_dir {}: {err}", path.display()))
1226                }
1227            }
1228            Ok(_) => stdfs::remove_file(path)
1229                .map_err(|err| format!("remove_file {}: {err}", path.display())),
1230            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1231            Err(err) => Err(format!("stat {}: {err}", path.display())),
1232        },
1233    }
1234}
1235
1236fn overlay_read(state: &SessionState, path: &Path) -> Option<std::io::Result<Vec<u8>>> {
1237    let key = normalize_logical(path);
1238    if let Some(entry) = state.entries.get(&key) {
1239        return Some(match entry {
1240            StagedEntry::Write { body_hash, .. } => read_body(state, body_hash),
1241            StagedEntry::Delete { .. } => Err(not_found(&key)),
1242        });
1243    }
1244    if deleted_ancestor(state, &key) {
1245        return Some(Err(not_found(&key)));
1246    }
1247    None
1248}
1249
1250fn overlay_read_dir(state: &SessionState, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
1251    let dir_key = normalize_logical(path);
1252    if matches!(state.entries.get(&dir_key), Some(StagedEntry::Write { .. }))
1253        || deleted_ancestor(state, &dir_key)
1254        || matches!(
1255            state.entries.get(&dir_key),
1256            Some(StagedEntry::Delete { .. })
1257        )
1258    {
1259        return Err(not_found(&dir_key));
1260    }
1261    if !path.exists() && !has_staged_descendant(state, &dir_key) {
1262        return Err(not_found(&dir_key));
1263    }
1264
1265    let mut entries: BTreeMap<String, OverlayDirEntry> = BTreeMap::new();
1266    if path.exists() {
1267        for entry in stdfs::read_dir(path)? {
1268            let entry = entry?;
1269            let name = entry.file_name().to_string_lossy().into_owned();
1270            let file_type = entry.file_type().ok();
1271            let metadata = entry.metadata().ok();
1272            entries.insert(
1273                name.clone(),
1274                OverlayDirEntry {
1275                    name,
1276                    is_dir: file_type.is_some_and(|ty| ty.is_dir()),
1277                    is_symlink: file_type.is_some_and(|ty| ty.is_symlink()),
1278                    size: metadata.map(|m| m.len()).unwrap_or(0),
1279                },
1280            );
1281        }
1282    }
1283
1284    for (path, entry) in &state.entries {
1285        let Some(name) = overlay_child_name(path, &dir_key) else {
1286            continue;
1287        };
1288        match entry {
1289            StagedEntry::Write { len, .. } => {
1290                let is_dir = path.parent() != Some(dir_key.as_path());
1291                entries.insert(
1292                    name.clone(),
1293                    OverlayDirEntry {
1294                        name,
1295                        is_dir,
1296                        is_symlink: false,
1297                        size: if is_dir { 0 } else { *len },
1298                    },
1299                );
1300            }
1301            StagedEntry::Delete { .. } => {
1302                if path.parent() == Some(dir_key.as_path()) {
1303                    entries.remove(&name);
1304                }
1305            }
1306        }
1307    }
1308
1309    Ok(entries.into_values().collect())
1310}
1311
1312fn overlay_child_name(path: &Path, dir: &Path) -> Option<String> {
1313    let suffix = path.strip_prefix(dir).ok()?;
1314    let mut components = suffix.components();
1315    let first = components.next()?;
1316    match first {
1317        Component::Normal(name) => Some(name.to_string_lossy().into_owned()),
1318        _ => None,
1319    }
1320}
1321
1322fn overlay_exists(state: &SessionState, path: &Path) -> bool {
1323    if let Some(entry) = state.entries.get(path) {
1324        return matches!(entry, StagedEntry::Write { .. });
1325    }
1326    if deleted_ancestor(state, path) {
1327        return false;
1328    }
1329    if has_staged_descendant(state, path) {
1330        return true;
1331    }
1332    path.exists()
1333}
1334
1335fn parent_exists(state: &SessionState, path: &Path) -> bool {
1336    let Some(parent) = path.parent() else {
1337        return true;
1338    };
1339    if parent.as_os_str().is_empty() {
1340        return true;
1341    }
1342    if let Some(entry) = state.entries.get(parent) {
1343        return !matches!(entry, StagedEntry::Delete { .. });
1344    }
1345    if deleted_ancestor(state, parent) {
1346        return false;
1347    }
1348    if has_staged_descendant(state, parent) {
1349        return true;
1350    }
1351    parent.is_dir()
1352}
1353
1354fn deleted_ancestor(state: &SessionState, path: &Path) -> bool {
1355    state.entries.iter().any(|(candidate, entry)| {
1356        matches!(entry, StagedEntry::Delete { .. })
1357            && path != candidate.as_path()
1358            && path.starts_with(candidate)
1359    })
1360}
1361
1362fn has_staged_descendant(state: &SessionState, path: &Path) -> bool {
1363    state.entries.iter().any(|(candidate, entry)| {
1364        matches!(entry, StagedEntry::Write { .. })
1365            && candidate != path
1366            && candidate.starts_with(path)
1367    })
1368}
1369
1370fn staged_paths_under(state: &SessionState, path: &Path) -> Vec<PathBuf> {
1371    state
1372        .entries
1373        .keys()
1374        .filter(|candidate| *candidate == path || candidate.starts_with(path))
1375        .cloned()
1376        .collect()
1377}
1378
1379fn validate_delete_shape(
1380    builtin: &'static str,
1381    path: &Path,
1382    recursive: bool,
1383) -> Result<(), HostlibError> {
1384    let Ok(metadata) = stdfs::symlink_metadata(path) else {
1385        return Ok(());
1386    };
1387    if metadata.is_dir() && !recursive {
1388        let mut entries = stdfs::read_dir(path).map_err(|err| HostlibError::Backend {
1389            builtin,
1390            message: format!("read_dir `{}`: {err}", path.display()),
1391        })?;
1392        if entries.next().is_some() {
1393            return Err(HostlibError::Backend {
1394                builtin,
1395                message: format!(
1396                    "remove_dir `{}` (pass recursive=true to delete non-empty dirs): directory not empty",
1397                    path.display()
1398                ),
1399            });
1400        }
1401    }
1402    Ok(())
1403}
1404
1405fn status_from_state(state: &SessionState) -> StagedStatus {
1406    let now = now_ms();
1407    let mut pending_writes = Vec::new();
1408    let mut total_bytes_pending = 0u64;
1409    let mut oldest = None;
1410    for (path, entry) in &state.entries {
1411        total_bytes_pending = total_bytes_pending.saturating_add(entry.body_len());
1412        oldest = Some(oldest.map_or(entry.created_at_ms(), |old: i64| {
1413            old.min(entry.created_at_ms())
1414        }));
1415        let (kind, bytes_added, bytes_removed) = match entry {
1416            StagedEntry::Write { len, .. } => ("write", *len, disk_size(path).unwrap_or(0)),
1417            StagedEntry::Delete { .. } => ("delete", 0, disk_size(path).unwrap_or(0)),
1418        };
1419        pending_writes.push(PendingWrite {
1420            path: path.to_string_lossy().into_owned(),
1421            kind,
1422            bytes_added,
1423            bytes_removed,
1424        });
1425    }
1426    StagedStatus {
1427        pending_writes,
1428        total_bytes_pending,
1429        oldest_pending_age_ms: oldest.map(|old| now.saturating_sub(old)).unwrap_or(0),
1430    }
1431}
1432
1433fn disk_size(path: &Path) -> Option<u64> {
1434    let metadata = stdfs::symlink_metadata(path).ok()?;
1435    if metadata.is_file() {
1436        return Some(metadata.len());
1437    }
1438    if metadata.is_dir() {
1439        let mut total = 0u64;
1440        for entry in walkdir::WalkDir::new(path)
1441            .into_iter()
1442            .filter_map(Result::ok)
1443        {
1444            if let Ok(metadata) = entry.metadata() {
1445                if metadata.is_file() {
1446                    total = total.saturating_add(metadata.len());
1447                }
1448            }
1449        }
1450        return Some(total);
1451    }
1452    Some(metadata.len())
1453}
1454
1455fn selected_paths(state: &SessionState, paths: &[String]) -> Vec<PathBuf> {
1456    if paths.is_empty() {
1457        return state.entries.keys().cloned().collect();
1458    }
1459    let selected: BTreeSet<PathBuf> = paths
1460        .iter()
1461        .map(|path| normalize_logical(Path::new(path)))
1462        .collect();
1463    state
1464        .entries
1465        .keys()
1466        .filter(|path| selected.contains(*path))
1467        .cloned()
1468        .collect()
1469}
1470
1471fn active_session_id(explicit: Option<&str>) -> Option<String> {
1472    explicit
1473        .map(str::to_string)
1474        .or_else(harn_vm::agent_sessions::current_session_id)
1475        .filter(|id| !id.trim().is_empty())
1476}
1477
1478fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
1479    if session_id.trim().is_empty() {
1480        return Err(HostlibError::InvalidParameter {
1481            builtin,
1482            param: "session_id",
1483            message: "must not be empty".to_string(),
1484        });
1485    }
1486    Ok(())
1487}
1488
1489fn default_root() -> PathBuf {
1490    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1491}
1492
1493fn session_dir(root: &Path, session_id: &str) -> PathBuf {
1494    let mut dir = root.to_path_buf();
1495    for component in STATE_REL {
1496        dir.push(component);
1497    }
1498    dir.push(sanitize_component(session_id));
1499    dir
1500}
1501
1502fn manifest_path(root: &Path, session_id: &str) -> PathBuf {
1503    session_dir(root, session_id).join("manifest.json")
1504}
1505
1506fn sanitize_component(input: &str) -> String {
1507    let sanitized: String = input
1508        .chars()
1509        .map(|ch| match ch {
1510            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
1511            _ => '_',
1512        })
1513        .collect();
1514    // `.` is allowed inside a name, but a component that is empty or *only*
1515    // dots (`.`, `..`, `...`) is a path-traversal / current-dir token, not a
1516    // safe single component — `session_dir`'s `dir.push("..")` would escape
1517    // the staged-state root. Force the hashed form so the result is always a
1518    // genuine, traversal-free directory name.
1519    let is_dotted = sanitized.is_empty() || sanitized.bytes().all(|b| b == b'.');
1520    if sanitized == input && !is_dotted {
1521        sanitized
1522    } else {
1523        let hash = hex::encode(Sha256::digest(input.as_bytes()));
1524        format!("{sanitized}-{}", &hash[..12])
1525    }
1526}
1527
1528fn normalize_logical(path: &Path) -> PathBuf {
1529    let absolute = if path.is_absolute() {
1530        path.to_path_buf()
1531    } else {
1532        default_root().join(path)
1533    };
1534    let mut out = PathBuf::new();
1535    for component in absolute.components() {
1536        match component {
1537            Component::ParentDir => {
1538                out.pop();
1539            }
1540            Component::CurDir => {}
1541            other => out.push(other),
1542        }
1543    }
1544    out
1545}
1546
1547fn not_found(path: &Path) -> std::io::Error {
1548    std::io::Error::new(
1549        std::io::ErrorKind::NotFound,
1550        format!("staged fs: {} is deleted or absent", path.display()),
1551    )
1552}
1553
1554fn now_ms() -> i64 {
1555    std::time::SystemTime::now()
1556        .duration_since(std::time::UNIX_EPOCH)
1557        .map(|duration| duration.as_millis() as i64)
1558        .unwrap_or(0)
1559}
1560
1561fn emit_staged_update(state: &SessionState) {
1562    let status = status_from_state(state);
1563    harn_vm::agent_events::emit_event(&AgentEvent::StagedWritesPending {
1564        session_id: state.session_id.clone(),
1565        pending_count: status.pending_writes.len(),
1566        total_bytes: status.total_bytes_pending,
1567    });
1568}
1569
1570fn pending_write_to_value(write: PendingWrite) -> VmValue {
1571    build_dict([
1572        ("path", str_value(&write.path)),
1573        ("kind", str_value(write.kind)),
1574        ("bytes_added", VmValue::Int(write.bytes_added as i64)),
1575        ("bytes_removed", VmValue::Int(write.bytes_removed as i64)),
1576    ])
1577}
1578
1579fn status_to_value(status: StagedStatus) -> VmValue {
1580    build_dict([
1581        (
1582            "pending_writes",
1583            VmValue::List(Arc::new(
1584                status
1585                    .pending_writes
1586                    .into_iter()
1587                    .map(pending_write_to_value)
1588                    .collect(),
1589            )),
1590        ),
1591        (
1592            "total_bytes_pending",
1593            VmValue::Int(status.total_bytes_pending as i64),
1594        ),
1595        (
1596            "oldest_pending_age_ms",
1597            VmValue::Int(status.oldest_pending_age_ms),
1598        ),
1599    ])
1600}
1601
1602fn commit_result_to_value(result: CommitResult) -> VmValue {
1603    build_dict([
1604        (
1605            "committed_paths",
1606            VmValue::List(Arc::new(
1607                result
1608                    .committed_paths
1609                    .into_iter()
1610                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1611                    .collect(),
1612            )),
1613        ),
1614        (
1615            "failed_paths_with_reasons",
1616            VmValue::List(Arc::new(
1617                result
1618                    .failed_paths_with_reasons
1619                    .into_iter()
1620                    .map(|(path, reason)| {
1621                        build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
1622                    })
1623                    .collect(),
1624            )),
1625        ),
1626    ])
1627}
1628
1629fn discard_result_to_value(result: DiscardResult) -> VmValue {
1630    build_dict([(
1631        "discarded_paths",
1632        VmValue::List(Arc::new(
1633            result
1634                .discarded_paths
1635                .into_iter()
1636                .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1637                .collect(),
1638        )),
1639    )])
1640}
1641
1642#[cfg(test)]
1643mod sanitize_tests {
1644    use super::{sanitize_component, session_dir, STATE_REL};
1645    use std::path::{Component, Path};
1646
1647    #[test]
1648    fn dotted_session_ids_are_never_traversal_tokens() {
1649        // `.`, `..`, `...` must not survive verbatim — otherwise
1650        // `session_dir`'s `dir.push(..)` escapes the staged-state root.
1651        for evil in ["..", ".", "...", ""] {
1652            let safe = sanitize_component(evil);
1653            assert_ne!(safe, evil, "`{evil}` passed through unsanitized");
1654            assert!(
1655                !safe.bytes().all(|b| b == b'.'),
1656                "`{evil}` -> `{safe}` is still all dots"
1657            );
1658            // The result is a single normal component (no ParentDir/CurDir).
1659            let comps: Vec<_> = Path::new(&safe).components().collect();
1660            assert!(
1661                comps.iter().all(|c| matches!(c, Component::Normal(_))),
1662                "`{safe}` contains a traversal component"
1663            );
1664        }
1665    }
1666
1667    #[test]
1668    fn ordinary_session_ids_pass_through() {
1669        assert_eq!(sanitize_component("abc-123_v2.0"), "abc-123_v2.0");
1670    }
1671
1672    #[test]
1673    fn session_dir_stays_under_staged_root() {
1674        let dir = session_dir(Path::new("/workspace"), "..");
1675        // No path component resolves above the staged dir.
1676        assert!(
1677            !dir.components().any(|c| matches!(c, Component::ParentDir)),
1678            "session_dir({dir:?}) escapes via `..`"
1679        );
1680        let mut staged = std::path::PathBuf::from("/workspace");
1681        staged.extend(STATE_REL);
1682        assert!(dir.starts_with(&staged), "{dir:?} not under {staged:?}");
1683    }
1684}