Skip to main content

harn_hostlib/
fs.rs

1//! Session-scoped staged filesystem mode.
2//!
3//! `hostlib_fs_set_mode({session_id, mode: "staged"})` makes hostlib file
4//! mutations land in a durable per-session overlay under
5//! `.harn/state/staged/<session_id>/`. Reads made by the same session consult
6//! that overlay first, so agent loops see their own pending writes without
7//! touching the working tree until `hostlib_fs_commit_staged`.
8
9use std::collections::{BTreeMap, BTreeSet};
10use std::fs as stdfs;
11use std::io::Write;
12use std::path::{Component, Path, PathBuf};
13use std::sync::Arc;
14use std::sync::{Mutex, OnceLock};
15
16use harn_vm::agent_events::AgentEvent;
17use harn_vm::process_sandbox::{check_fs_path_scope, FsAccess};
18use harn_vm::VmValue;
19use serde::{Deserialize, Serialize};
20use sha2::{Digest, Sha256};
21
22use crate::error::HostlibError;
23use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
24use crate::tools::args::{
25    build_dict, dict_arg, optional_bool, optional_int, optional_string, optional_string_list,
26    require_string, str_value,
27};
28use crate::tools::permissions::enforce_path_scope;
29
30const SET_MODE_BUILTIN: &str = "hostlib_fs_set_mode";
31const STATUS_BUILTIN: &str = "hostlib_fs_staged_status";
32const COMMIT_BUILTIN: &str = "hostlib_fs_commit_staged";
33const DISCARD_BUILTIN: &str = "hostlib_fs_discard_staged";
34const SAFE_TEXT_PATCH_BUILTIN: &str = "hostlib_fs_safe_text_patch";
35const READ_TEXT_BUILTIN: &str = "hostlib_fs_read_text";
36const EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN: &str = "hostlib_fs_emit_safe_text_patch_result";
37
38const MANIFEST_VERSION: u32 = 1;
39const STATE_REL: &[&str] = &[".harn", "state", "staged"];
40
41/// Hostlib filesystem capability handle.
42#[derive(Default)]
43pub struct FsCapability;
44
45impl HostlibCapability for FsCapability {
46    fn module_name(&self) -> &'static str {
47        "fs"
48    }
49
50    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
51        register(registry, SET_MODE_BUILTIN, "set_mode", set_mode_builtin);
52        register(
53            registry,
54            STATUS_BUILTIN,
55            "staged_status",
56            staged_status_builtin,
57        );
58        register(
59            registry,
60            COMMIT_BUILTIN,
61            "commit_staged",
62            commit_staged_builtin,
63        );
64        register(
65            registry,
66            DISCARD_BUILTIN,
67            "discard_staged",
68            discard_staged_builtin,
69        );
70        // `safe_text_patch` and `read_text` touch arbitrary host paths, so
71        // they share the deterministic-tools gate with `tools::*` file I/O.
72        register_gated(
73            registry,
74            SAFE_TEXT_PATCH_BUILTIN,
75            "safe_text_patch",
76            safe_text_patch_builtin,
77        );
78        register_gated(registry, READ_TEXT_BUILTIN, "read_text", read_text_builtin);
79        register(
80            registry,
81            EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
82            "emit_safe_text_patch_result",
83            emit_safe_text_patch_result_builtin,
84        );
85    }
86}
87
88fn register(
89    registry: &mut BuiltinRegistry,
90    name: &'static str,
91    method: &'static str,
92    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
93) {
94    let handler: SyncHandler = std::sync::Arc::new(runner);
95    registry.register(RegisteredBuiltin {
96        name,
97        module: "fs",
98        method,
99        handler,
100    });
101}
102
103fn register_gated(
104    registry: &mut BuiltinRegistry,
105    name: &'static str,
106    method: &'static str,
107    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
108) {
109    registry.register(RegisteredBuiltin {
110        name,
111        module: "fs",
112        method,
113        handler: crate::tools::permissions::gated_handler(name, runner),
114    });
115}
116
117/// Filesystem mode for one hostlib session.
118#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
119#[serde(rename_all = "lowercase")]
120pub enum FsMode {
121    /// Mutations apply to the working tree immediately.
122    Immediate,
123    /// Mutations are recorded in the staging layer until committed.
124    Staged,
125}
126
127impl FsMode {
128    fn parse(builtin: &'static str, raw: &str) -> Result<Self, HostlibError> {
129        match raw {
130            "immediate" => Ok(Self::Immediate),
131            "staged" => Ok(Self::Staged),
132            other => Err(HostlibError::InvalidParameter {
133                builtin,
134                param: "mode",
135                message: format!("expected \"immediate\" or \"staged\", got `{other}`"),
136            }),
137        }
138    }
139
140    /// Wire string used by hostlib schemas.
141    pub fn as_str(self) -> &'static str {
142        match self {
143            Self::Immediate => "immediate",
144            Self::Staged => "staged",
145        }
146    }
147}
148
149#[derive(Clone, Debug, Serialize, Deserialize)]
150struct Manifest {
151    version: u32,
152    session_id: String,
153    mode: FsMode,
154    root: String,
155    entries: BTreeMap<String, StagedEntry>,
156}
157
158#[derive(Clone, Debug, Serialize, Deserialize)]
159#[serde(tag = "kind", rename_all = "snake_case")]
160enum StagedEntry {
161    Write {
162        body_hash: String,
163        len: u64,
164        created_at_ms: i64,
165    },
166    Delete {
167        recursive: bool,
168        created_at_ms: i64,
169    },
170}
171
172impl StagedEntry {
173    fn created_at_ms(&self) -> i64 {
174        match self {
175            Self::Write { created_at_ms, .. } | Self::Delete { created_at_ms, .. } => {
176                *created_at_ms
177            }
178        }
179    }
180
181    fn body_len(&self) -> u64 {
182        match self {
183            Self::Write { len, .. } => *len,
184            Self::Delete { .. } => 0,
185        }
186    }
187}
188
189#[derive(Clone, Debug)]
190struct SessionState {
191    session_id: String,
192    mode: FsMode,
193    root: PathBuf,
194    entries: BTreeMap<PathBuf, StagedEntry>,
195}
196
197#[derive(Clone, Debug)]
198pub(crate) struct WriteOutcome {
199    pub(crate) created: bool,
200    pub(crate) bytes_written: usize,
201}
202
203#[derive(Clone, Debug)]
204pub(crate) struct OverlayDirEntry {
205    pub(crate) name: String,
206    pub(crate) is_dir: bool,
207    pub(crate) is_symlink: bool,
208    pub(crate) size: u64,
209}
210
211/// Summary of staged filesystem changes for one session.
212#[derive(Clone, Debug)]
213pub struct StagedStatus {
214    /// Pending path changes, sorted by path.
215    pub pending_writes: Vec<PendingWrite>,
216    /// Bytes stored in staged write bodies.
217    pub total_bytes_pending: u64,
218    /// Age in milliseconds of the oldest pending change, or 0 when empty.
219    pub oldest_pending_age_ms: i64,
220}
221
222#[derive(Clone, Debug)]
223/// One pending staged filesystem change.
224pub struct PendingWrite {
225    /// Absolute path affected by this staged change.
226    pub path: String,
227    /// Change kind (`write`, `delete`, or reserved future `move`).
228    pub kind: &'static str,
229    /// Bytes the final staged view adds at this path.
230    pub bytes_added: u64,
231    /// Bytes the final staged view removes at this path.
232    pub bytes_removed: u64,
233}
234
235/// Result returned after changing a session's filesystem mode.
236#[derive(Clone, Debug)]
237pub struct SetModeResult {
238    /// Mode active before the change.
239    pub previous_mode: FsMode,
240}
241
242/// Result returned after applying staged changes to disk.
243#[derive(Clone, Debug)]
244pub struct CommitResult {
245    /// Paths successfully applied to disk.
246    pub committed_paths: Vec<String>,
247    /// Paths that failed to apply, with human-readable reasons.
248    pub failed_paths_with_reasons: Vec<(String, String)>,
249}
250
251/// Result returned after dropping staged changes.
252#[derive(Clone, Debug)]
253pub struct DiscardResult {
254    /// Paths whose staged entries were removed.
255    pub discarded_paths: Vec<String>,
256}
257
258static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionState>>> = OnceLock::new();
259
260fn sessions() -> &'static Mutex<BTreeMap<String, SessionState>> {
261    SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
262}
263
264/// Lock the session map, panicking with one canonical message if a prior
265/// holder poisoned the mutex. Every accessor goes through here so the poison
266/// policy and message live in exactly one place.
267fn lock_sessions() -> std::sync::MutexGuard<'static, BTreeMap<String, SessionState>> {
268    sessions()
269        .lock()
270        .expect("hostlib fs session mutex poisoned")
271}
272
273/// Remember the workspace root associated with a live session.
274///
275/// ACP calls this when a prompt starts so Harn code can call
276/// `hostlib_fs_set_mode({session_id, mode})` without also passing a root.
277pub fn configure_session_root(session_id: &str, root: &Path) {
278    if session_id.trim().is_empty() {
279        return;
280    }
281    let root = normalize_logical(root);
282    let mut guard = lock_sessions();
283    match guard.get_mut(session_id) {
284        Some(state) if state.entries.is_empty() => {
285            state.root = root;
286        }
287        Some(_) => {}
288        None => {
289            let state = load_state(session_id, Some(root.clone())).unwrap_or(SessionState {
290                session_id: session_id.to_string(),
291                mode: FsMode::Immediate,
292                root,
293                entries: BTreeMap::new(),
294            });
295            guard.insert(session_id.to_string(), state);
296        }
297    }
298}
299
300/// Return the root currently associated with a hostlib session.
301pub fn configured_session_root(session_id: &str) -> Option<PathBuf> {
302    if session_id.trim().is_empty() {
303        return None;
304    }
305    let guard = lock_sessions();
306    guard.get(session_id).map(|state| state.root.clone())
307}
308
309/// Set a session's filesystem mode.
310pub fn set_mode(
311    session_id: &str,
312    mode: FsMode,
313    root: Option<&Path>,
314) -> Result<SetModeResult, HostlibError> {
315    validate_session_id(SET_MODE_BUILTIN, session_id)?;
316    let mut guard = lock_sessions();
317    let mut state = state_for_locked(&mut guard, session_id, root.map(normalize_logical))?;
318    let previous_mode = state.mode;
319    state.mode = mode;
320    persist_state(&state, "set_mode", None).map_err(|err| HostlibError::Backend {
321        builtin: SET_MODE_BUILTIN,
322        message: err,
323    })?;
324    guard.insert(session_id.to_string(), state);
325    Ok(SetModeResult { previous_mode })
326}
327
328/// Return the staged status for a session.
329pub fn staged_status(session_id: &str) -> Result<StagedStatus, HostlibError> {
330    validate_session_id(STATUS_BUILTIN, session_id)?;
331    let mut guard = lock_sessions();
332    let state = state_for_locked(&mut guard, session_id, None)?;
333    let status = status_from_state(&state);
334    guard.insert(session_id.to_string(), state);
335    Ok(status)
336}
337
338/// Commit staged changes for all paths or for a filtered path list.
339pub fn commit_staged(session_id: &str, paths: &[String]) -> Result<CommitResult, HostlibError> {
340    validate_session_id(COMMIT_BUILTIN, session_id)?;
341    let mut guard = lock_sessions();
342    let mut state = state_for_locked(&mut guard, session_id, None)?;
343    let selected = selected_paths(&state, paths);
344    let mut committed_paths = Vec::new();
345    let mut failed_paths_with_reasons = Vec::new();
346
347    for path in selected {
348        let Some(entry) = state.entries.get(&path).cloned() else {
349            continue;
350        };
351        let path_label = path.to_string_lossy().into_owned();
352        // The overlay always lives inside the workspace, but commit flushes
353        // to the *target* working-tree path. Enforce workspace-root scope
354        // against that target so a staged entry — possibly persisted under
355        // a looser policy in an earlier session — can never write outside
356        // the roots active at commit time.
357        let access = match entry {
358            StagedEntry::Write { .. } => FsAccess::Write,
359            StagedEntry::Delete { .. } => FsAccess::Delete,
360        };
361        if let Err(violation) = check_fs_path_scope(&path, access) {
362            failed_paths_with_reasons.push((path_label, violation.message(COMMIT_BUILTIN)));
363            continue;
364        }
365        match commit_entry(&state, &path, &entry) {
366            Ok(()) => {
367                state.entries.remove(&path);
368                committed_paths.push(path_label);
369            }
370            Err(reason) => failed_paths_with_reasons.push((path_label, reason)),
371        }
372    }
373
374    persist_state(&state, "commit_staged", None).map_err(|err| HostlibError::Backend {
375        builtin: COMMIT_BUILTIN,
376        message: err,
377    })?;
378    emit_staged_update(&state);
379    guard.insert(session_id.to_string(), state);
380    Ok(CommitResult {
381        committed_paths,
382        failed_paths_with_reasons,
383    })
384}
385
386/// Discard staged changes for all paths or for a filtered path list.
387pub fn discard_staged(session_id: &str, paths: &[String]) -> Result<DiscardResult, HostlibError> {
388    validate_session_id(DISCARD_BUILTIN, session_id)?;
389    let mut guard = lock_sessions();
390    let mut state = state_for_locked(&mut guard, session_id, None)?;
391    let selected = selected_paths(&state, paths);
392    let mut discarded_paths = Vec::new();
393    for path in selected {
394        if state.entries.remove(&path).is_some() {
395            discarded_paths.push(path.to_string_lossy().into_owned());
396        }
397    }
398    persist_state(&state, "discard_staged", None).map_err(|err| HostlibError::Backend {
399        builtin: DISCARD_BUILTIN,
400        message: err,
401    })?;
402    emit_staged_update(&state);
403    guard.insert(session_id.to_string(), state);
404    Ok(DiscardResult { discarded_paths })
405}
406
407/// Remove all persisted staged-fs state for a caller-owned throw-away session.
408///
409/// Normal agent sessions keep their manifest after `discard_staged` so hosts can
410/// continue reporting session state. Transient dry-run sessions own their ids,
411/// though, and should remove both the in-memory entry and on-disk overlay after
412/// their preview is rendered.
413pub fn remove_session_state(session_id: &str, root: Option<&Path>) -> Result<(), HostlibError> {
414    validate_session_id(DISCARD_BUILTIN, session_id)?;
415    let mut guard = lock_sessions();
416    let state = match guard.remove(session_id) {
417        Some(state) => state,
418        None => load_state(session_id, root.map(normalize_logical)).map_err(|err| {
419            HostlibError::Backend {
420                builtin: DISCARD_BUILTIN,
421                message: err,
422            }
423        })?,
424    };
425    let dir = session_dir(&state.root, &state.session_id);
426    match stdfs::remove_dir_all(&dir) {
427        Ok(()) => Ok(()),
428        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
429        Err(err) => Err(HostlibError::Backend {
430            builtin: DISCARD_BUILTIN,
431            message: format!("remove staged session {}: {err}", dir.display()),
432        }),
433    }
434}
435
436pub(crate) fn read(
437    path: &Path,
438    explicit_session_id: Option<&str>,
439) -> Option<std::io::Result<Vec<u8>>> {
440    let session_id = active_session_id(explicit_session_id)?;
441    let mut guard = lock_sessions();
442    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
443    let result = if state.mode == FsMode::Staged {
444        overlay_read(&state, path)
445    } else {
446        None
447    };
448    guard.insert(session_id, state);
449    result
450}
451
452pub(crate) fn read_to_string(
453    path: &Path,
454    explicit_session_id: Option<&str>,
455) -> Option<std::io::Result<String>> {
456    read(path, explicit_session_id).map(|result| {
457        result.and_then(|bytes| {
458            String::from_utf8(bytes).map_err(|err| {
459                std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string())
460            })
461        })
462    })
463}
464
465pub(crate) fn read_dir(
466    path: &Path,
467    explicit_session_id: Option<&str>,
468) -> Option<std::io::Result<Vec<OverlayDirEntry>>> {
469    let session_id = active_session_id(explicit_session_id)?;
470    let mut guard = lock_sessions();
471    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
472    let result = if state.mode == FsMode::Staged {
473        Some(overlay_read_dir(&state, path))
474    } else {
475        None
476    };
477    guard.insert(session_id, state);
478    result
479}
480
481pub(crate) fn stage_write_or_none(
482    builtin: &'static str,
483    path: &Path,
484    bytes: &[u8],
485    create_parents: bool,
486    overwrite: bool,
487    explicit_session_id: Option<&str>,
488) -> Result<Option<WriteOutcome>, HostlibError> {
489    let Some(session_id) = active_session_id(explicit_session_id) else {
490        return Ok(None);
491    };
492    let mut guard = lock_sessions();
493    let mut state = state_for_locked(&mut guard, &session_id, None)?;
494    if state.mode != FsMode::Staged {
495        guard.insert(session_id, state);
496        return Ok(None);
497    }
498
499    let key = normalize_logical(path);
500    let existed = overlay_exists(&state, &key);
501    if existed && !overwrite {
502        guard.insert(session_id, state);
503        return Err(HostlibError::Backend {
504            builtin,
505            message: format!("`{}` exists and overwrite=false", key.display()),
506        });
507    }
508    if !create_parents && !parent_exists(&state, &key) {
509        guard.insert(session_id, state);
510        return Err(HostlibError::Backend {
511            builtin,
512            message: format!("parent directory for `{}` does not exist", key.display()),
513        });
514    }
515
516    let hash = write_body(&state, bytes).map_err(|err| HostlibError::Backend {
517        builtin,
518        message: err,
519    })?;
520    state.entries.insert(
521        key.clone(),
522        StagedEntry::Write {
523            body_hash: hash,
524            len: bytes.len() as u64,
525            created_at_ms: now_ms(),
526        },
527    );
528    persist_state(&state, "write", Some(&key)).map_err(|err| HostlibError::Backend {
529        builtin,
530        message: err,
531    })?;
532    emit_staged_update(&state);
533    guard.insert(session_id, state);
534    Ok(Some(WriteOutcome {
535        created: !existed,
536        bytes_written: bytes.len(),
537    }))
538}
539
540pub(crate) fn stage_delete_or_none(
541    builtin: &'static str,
542    path: &Path,
543    recursive: bool,
544    explicit_session_id: Option<&str>,
545) -> Result<Option<bool>, HostlibError> {
546    let Some(session_id) = active_session_id(explicit_session_id) else {
547        return Ok(None);
548    };
549    let mut guard = lock_sessions();
550    let mut state = state_for_locked(&mut guard, &session_id, None)?;
551    if state.mode != FsMode::Staged {
552        guard.insert(session_id, state);
553        return Ok(None);
554    }
555
556    let key = normalize_logical(path);
557    let staged_targets = staged_paths_under(&state, &key);
558    let disk_exists = key.exists();
559    if !disk_exists && staged_targets.is_empty() {
560        guard.insert(session_id, state);
561        return Ok(Some(false));
562    }
563
564    if !disk_exists {
565        for staged in staged_targets {
566            state.entries.remove(&staged);
567        }
568    } else {
569        validate_delete_shape(builtin, &key, recursive)?;
570        for staged in staged_targets {
571            state.entries.remove(&staged);
572        }
573        state.entries.insert(
574            key.clone(),
575            StagedEntry::Delete {
576                recursive,
577                created_at_ms: now_ms(),
578            },
579        );
580    }
581    persist_state(&state, "delete", Some(&key)).map_err(|err| HostlibError::Backend {
582        builtin,
583        message: err,
584    })?;
585    emit_staged_update(&state);
586    guard.insert(session_id, state);
587    Ok(Some(true))
588}
589
590/// Outcome of one [`safe_text_patch`] call. `applied` says whether the
591/// on-disk (or staged-overlay) bytes changed; `result` carries the
592/// structured discriminant used by the wire/JSON shape.
593#[derive(Clone, Debug)]
594pub struct SafeTextPatchOutcome {
595    /// Discriminant: `"applied"`, `"stale_base"`, or `"no_op"`.
596    pub result: SafeTextPatchResult,
597    /// `sha256:HEX` of the pre-image (overlay-aware) the call observed.
598    pub current_hash: String,
599    /// `sha256:HEX` of the requested post-image.
600    pub after_hash: String,
601    /// `true` when the file did not exist before the call.
602    pub created: bool,
603    /// Bytes written; `0` on `stale_base` or `no_op`.
604    pub bytes_written: usize,
605}
606
607/// Discriminant for a [`safe_text_patch`] outcome.
608#[derive(Clone, Copy, Debug, Eq, PartialEq)]
609pub enum SafeTextPatchResult {
610    /// Pre-image hash matched (or no expected hash supplied) and the
611    /// post-image differs from the pre-image — bytes were written.
612    Applied,
613    /// `expected_hash` did not match the observed pre-image hash; no
614    /// bytes were written. Callers should re-read and retry.
615    StaleBase,
616    /// Pre-image hash matched and the post-image equals the pre-image —
617    /// skipped the write to avoid spurious timestamps and overlay churn.
618    NoOp,
619}
620
621impl SafeTextPatchResult {
622    fn as_str(self) -> &'static str {
623        match self {
624            Self::Applied => "applied",
625            Self::StaleBase => "stale_base",
626            Self::NoOp => "no_op",
627        }
628    }
629}
630
631/// Format `bytes` as the `sha256:HEX` label used in `before_sha256` /
632/// `after_sha256` / `current_hash` / `expected_hash` everywhere in the
633/// safe-text-patch surface.
634fn hash_label(bytes: &[u8]) -> String {
635    format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
636}
637
638/// Atomic compare-and-swap-style text write.
639///
640/// Reads the current bytes at `path` through the staged-fs overlay (when a
641/// session is active) so concurrent agent edits see each other's pending
642/// writes. If `expected_hash` is supplied and differs from the observed
643/// `sha256:HEX`, returns `SafeTextPatchResult::StaleBase` without
644/// mutating any state. On a hash match the post-image is written through
645/// the same overlay path, keeping the read and the write atomic with
646/// respect to other staged-fs consumers in the same process.
647///
648/// Atomicity:
649///
650/// - When a session is in staged mode, the read, hash check, and write
651///   all happen under a single acquisition of the sessions mutex, so a
652///   sibling thread cannot stage a write into the window between the
653///   pre-image snapshot and the commit.
654/// - When the call routes through disk (no active session, or session in
655///   immediate mode), the write goes through an atomic rename-into-place
656///   so partial-write tearing is impossible. Cross-process races are
657///   intentionally out of scope — the staged-fs overlay is the
658///   collision-rejection layer.
659pub fn safe_text_patch(
660    path: &Path,
661    content: &str,
662    expected_hash: Option<&str>,
663    session_id: Option<&str>,
664    create_parents: bool,
665    overwrite: bool,
666) -> Result<SafeTextPatchOutcome, HostlibError> {
667    let new_bytes = content.as_bytes();
668    let after_hash = hash_label(new_bytes);
669
670    if let Some(outcome) = safe_text_patch_staged(
671        path,
672        new_bytes,
673        expected_hash,
674        session_id,
675        create_parents,
676        overwrite,
677        &after_hash,
678    )? {
679        return Ok(outcome);
680    }
681
682    safe_text_patch_disk(
683        path,
684        new_bytes,
685        expected_hash,
686        create_parents,
687        overwrite,
688        after_hash,
689    )
690}
691
692/// Atomic CAS path for a session in `staged` mode. Holds the sessions
693/// mutex through the entire read → hash → check → write so concurrent
694/// agents in the same process cannot race the snapshot. Returns `None`
695/// when no session is active or the session is in `immediate` mode, so
696/// the caller can fall through to the disk path.
697#[allow(clippy::too_many_arguments)]
698fn safe_text_patch_staged(
699    path: &Path,
700    new_bytes: &[u8],
701    expected_hash: Option<&str>,
702    session_id: Option<&str>,
703    create_parents: bool,
704    overwrite: bool,
705    after_hash: &str,
706) -> Result<Option<SafeTextPatchOutcome>, HostlibError> {
707    let Some(session) = active_session_id(session_id) else {
708        return Ok(None);
709    };
710    let mut guard = lock_sessions();
711    let mut state = state_for_locked(&mut guard, &session, None)?;
712    if state.mode != FsMode::Staged {
713        guard.insert(session, state);
714        return Ok(None);
715    }
716
717    let key = normalize_logical(path);
718    let (existing_bytes, existed) = match overlay_read(&state, path) {
719        Some(Ok(bytes)) => (bytes, true),
720        Some(Err(err)) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
721        Some(Err(err)) => {
722            guard.insert(session, state);
723            return Err(HostlibError::Backend {
724                builtin: SAFE_TEXT_PATCH_BUILTIN,
725                message: format!("read `{}`: {err}", path.display()),
726            });
727        }
728        None => match stdfs::read(path) {
729            Ok(bytes) => (bytes, true),
730            Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
731            Err(err) => {
732                guard.insert(session, state);
733                return Err(HostlibError::Backend {
734                    builtin: SAFE_TEXT_PATCH_BUILTIN,
735                    message: format!("read `{}`: {err}", path.display()),
736                });
737            }
738        },
739    };
740    let current_hash = hash_label(&existing_bytes);
741
742    if let Some(expected) = expected_hash {
743        if expected != current_hash {
744            guard.insert(session, state);
745            return Ok(Some(SafeTextPatchOutcome {
746                result: SafeTextPatchResult::StaleBase,
747                current_hash,
748                after_hash: after_hash.to_string(),
749                created: false,
750                bytes_written: 0,
751            }));
752        }
753    }
754
755    if existed && existing_bytes == new_bytes {
756        guard.insert(session, state);
757        return Ok(Some(SafeTextPatchOutcome {
758            result: SafeTextPatchResult::NoOp,
759            current_hash,
760            after_hash: after_hash.to_string(),
761            created: false,
762            bytes_written: 0,
763        }));
764    }
765
766    let overlay_existed = overlay_exists(&state, &key);
767    if overlay_existed && !overwrite {
768        guard.insert(session, state);
769        return Err(HostlibError::Backend {
770            builtin: SAFE_TEXT_PATCH_BUILTIN,
771            message: format!("`{}` exists and overwrite=false", key.display()),
772        });
773    }
774    if !create_parents && !parent_exists(&state, &key) {
775        guard.insert(session, state);
776        return Err(HostlibError::Backend {
777            builtin: SAFE_TEXT_PATCH_BUILTIN,
778            message: format!("parent directory for `{}` does not exist", key.display()),
779        });
780    }
781
782    let body_hash = write_body(&state, new_bytes).map_err(|err| HostlibError::Backend {
783        builtin: SAFE_TEXT_PATCH_BUILTIN,
784        message: err,
785    })?;
786    state.entries.insert(
787        key.clone(),
788        StagedEntry::Write {
789            body_hash,
790            len: new_bytes.len() as u64,
791            created_at_ms: now_ms(),
792        },
793    );
794    persist_state(&state, "safe_text_patch", Some(&key)).map_err(|err| HostlibError::Backend {
795        builtin: SAFE_TEXT_PATCH_BUILTIN,
796        message: err,
797    })?;
798    emit_staged_update(&state);
799    guard.insert(session, state);
800
801    Ok(Some(SafeTextPatchOutcome {
802        result: SafeTextPatchResult::Applied,
803        current_hash,
804        after_hash: after_hash.to_string(),
805        created: !existed,
806        bytes_written: new_bytes.len(),
807    }))
808}
809
810/// Disk path for callers without an active staged session. Uses
811/// `atomic_write` so the post-image lands via rename-into-place rather
812/// than an open/truncate/write/close sequence — readers either see the
813/// pre-image or the post-image, never a torn write.
814fn safe_text_patch_disk(
815    path: &Path,
816    new_bytes: &[u8],
817    expected_hash: Option<&str>,
818    create_parents: bool,
819    overwrite: bool,
820    after_hash: String,
821) -> Result<SafeTextPatchOutcome, HostlibError> {
822    let (existing_bytes, existed) = match stdfs::read(path) {
823        Ok(bytes) => (bytes, true),
824        Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
825        Err(err) => {
826            return Err(HostlibError::Backend {
827                builtin: SAFE_TEXT_PATCH_BUILTIN,
828                message: format!("read `{}`: {err}", path.display()),
829            });
830        }
831    };
832    let current_hash = hash_label(&existing_bytes);
833
834    if let Some(expected) = expected_hash {
835        if expected != current_hash {
836            return Ok(SafeTextPatchOutcome {
837                result: SafeTextPatchResult::StaleBase,
838                current_hash,
839                after_hash,
840                created: false,
841                bytes_written: 0,
842            });
843        }
844    }
845
846    if existed && existing_bytes == new_bytes {
847        return Ok(SafeTextPatchOutcome {
848            result: SafeTextPatchResult::NoOp,
849            current_hash,
850            after_hash,
851            created: false,
852            bytes_written: 0,
853        });
854    }
855    if existed && !overwrite {
856        return Err(HostlibError::Backend {
857            builtin: SAFE_TEXT_PATCH_BUILTIN,
858            message: format!("`{}` exists and overwrite=false", path.display()),
859        });
860    }
861    if !create_parents {
862        if let Some(parent) = path.parent() {
863            if !parent.as_os_str().is_empty() && !parent.is_dir() {
864                return Err(HostlibError::Backend {
865                    builtin: SAFE_TEXT_PATCH_BUILTIN,
866                    message: format!(
867                        "parent directory for `{}` does not exist (pass create_parents=true to mkdir)",
868                        path.display()
869                    ),
870                });
871            }
872        }
873    }
874
875    crate::fs_snapshot::auto_capture_for_write(SAFE_TEXT_PATCH_BUILTIN, path);
876    atomic_write(path, new_bytes).map_err(|err| HostlibError::Backend {
877        builtin: SAFE_TEXT_PATCH_BUILTIN,
878        message: format!("write `{}`: {err}", path.display()),
879    })?;
880
881    Ok(SafeTextPatchOutcome {
882        result: SafeTextPatchResult::Applied,
883        current_hash,
884        after_hash,
885        created: !existed,
886        bytes_written: new_bytes.len(),
887    })
888}
889
890/// Read the pre-image through the staged-fs overlay (when active),
891/// falling back to disk. Returns `(bytes, existed_on_disk_or_overlay)`.
892/// `builtin` is the caller's tag — used so backend errors point at the
893/// right builtin name in diagnostics.
894fn read_existing(
895    builtin: &'static str,
896    path: &Path,
897    session_id: Option<&str>,
898) -> Result<(Vec<u8>, bool), HostlibError> {
899    if let Some(result) = read(path, session_id) {
900        return match result {
901            Ok(bytes) => Ok((bytes, true)),
902            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
903            Err(err) => Err(HostlibError::Backend {
904                builtin,
905                message: format!("read `{}`: {err}", path.display()),
906            }),
907        };
908    }
909    match stdfs::read(path) {
910        Ok(bytes) => Ok((bytes, true)),
911        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
912        Err(err) => Err(HostlibError::Backend {
913            builtin,
914            message: format!("read `{}`: {err}", path.display()),
915        }),
916    }
917}
918
919fn read_text_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
920    let raw = dict_arg(READ_TEXT_BUILTIN, args)?;
921    let dict = raw.as_ref();
922    let path_str = require_string(READ_TEXT_BUILTIN, dict, "path")?;
923    let session_id = optional_string(READ_TEXT_BUILTIN, dict, "session_id")?;
924    let path = Path::new(&path_str);
925    enforce_path_scope(READ_TEXT_BUILTIN, path, FsAccess::Read)?;
926
927    let (bytes, existed) = read_existing(READ_TEXT_BUILTIN, path, session_id.as_deref())?;
928    let hash = hash_label(&bytes);
929    let content = match std::str::from_utf8(&bytes) {
930        Ok(s) => s.to_string(),
931        Err(err) => {
932            return Err(HostlibError::Backend {
933                builtin: READ_TEXT_BUILTIN,
934                message: format!("`{path_str}` is not valid UTF-8: {err}"),
935            });
936        }
937    };
938    let bytes_len = bytes.len() as i64;
939    Ok(build_dict([
940        ("path", str_value(&path_str)),
941        ("content", str_value(&content)),
942        ("sha256", str_value(&hash)),
943        ("size", VmValue::Int(bytes_len)),
944        ("exists", VmValue::Bool(existed)),
945    ]))
946}
947
948fn safe_text_patch_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
949    let raw = dict_arg(SAFE_TEXT_PATCH_BUILTIN, args)?;
950    let dict = raw.as_ref();
951
952    let path_str = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "path")?;
953    let content = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "content")?;
954    let expected_hash = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "expected_hash")?;
955    let session_id = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "session_id")?;
956    let create_parents = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "create_parents", true)?;
957    let overwrite = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "overwrite", true)?;
958
959    enforce_path_scope(
960        SAFE_TEXT_PATCH_BUILTIN,
961        Path::new(&path_str),
962        FsAccess::Write,
963    )?;
964    let outcome = safe_text_patch(
965        Path::new(&path_str),
966        &content,
967        expected_hash.as_deref(),
968        session_id.as_deref(),
969        create_parents,
970        overwrite,
971    )?;
972
973    let entries: Vec<(&'static str, VmValue)> = vec![
974        ("path", str_value(&path_str)),
975        ("result", str_value(outcome.result.as_str())),
976        (
977            "applied",
978            VmValue::Bool(outcome.result == SafeTextPatchResult::Applied),
979        ),
980        (
981            "stale_base",
982            VmValue::Bool(outcome.result == SafeTextPatchResult::StaleBase),
983        ),
984        ("current_hash", str_value(&outcome.current_hash)),
985        ("before_sha256", str_value(&outcome.current_hash)),
986        ("after_sha256", str_value(&outcome.after_hash)),
987        ("created", VmValue::Bool(outcome.created)),
988        ("bytes_written", VmValue::Int(outcome.bytes_written as i64)),
989        (
990            "expected_hash",
991            match expected_hash.as_deref() {
992                Some(hash) => str_value(hash),
993                None => VmValue::Nil,
994            },
995        ),
996    ];
997    Ok(build_dict(entries))
998}
999
1000fn emit_safe_text_patch_result_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1001    let raw = dict_arg(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, args)?;
1002    let dict = raw.as_ref();
1003
1004    let path = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "path")?;
1005    let result = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "result")?;
1006    let hunks_count = optional_int(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "hunks_count", 0)?;
1007    let bytes_written = optional_int(
1008        EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1009        dict,
1010        "bytes_written",
1011        0,
1012    )?;
1013    let failed_hunk_index = match dict.get("failed_hunk_index") {
1014        None | Some(VmValue::Nil) => None,
1015        Some(VmValue::Int(n)) if *n >= 0 => Some(*n as usize),
1016        Some(other) => {
1017            return Err(HostlibError::InvalidParameter {
1018                builtin: EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
1019                param: "failed_hunk_index",
1020                message: format!("expected non-negative integer, got {}", other.type_name()),
1021            });
1022        }
1023    };
1024    let session_id = optional_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "session_id")?
1025        .or_else(harn_vm::agent_sessions::current_session_id);
1026
1027    if let Some(session_id) = session_id.filter(|s| !s.trim().is_empty()) {
1028        harn_vm::agent_events::emit_event(&AgentEvent::SafeTextPatchResult {
1029            session_id,
1030            path,
1031            result,
1032            hunks_count: hunks_count.max(0) as usize,
1033            bytes_written: bytes_written.max(0) as u64,
1034            failed_hunk_index,
1035        });
1036        Ok(VmValue::Bool(true))
1037    } else {
1038        // Silently no-op when no session is active — telemetry without a
1039        // session has nowhere to route. Caller can opt in by always
1040        // passing session_id explicitly.
1041        Ok(VmValue::Bool(false))
1042    }
1043}
1044
1045fn set_mode_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1046    let raw = dict_arg(SET_MODE_BUILTIN, args)?;
1047    let dict = raw.as_ref();
1048    let session_id = require_string(SET_MODE_BUILTIN, dict, "session_id")?;
1049    let mode = FsMode::parse(
1050        SET_MODE_BUILTIN,
1051        &require_string(SET_MODE_BUILTIN, dict, "mode")?,
1052    )?;
1053    let root = optional_string(SET_MODE_BUILTIN, dict, "root")?.map(PathBuf::from);
1054    let result = set_mode(&session_id, mode, root.as_deref())?;
1055    Ok(build_dict([(
1056        "previous_mode",
1057        str_value(result.previous_mode.as_str()),
1058    )]))
1059}
1060
1061fn staged_status_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1062    let raw = dict_arg(STATUS_BUILTIN, args)?;
1063    let session_id = require_string(STATUS_BUILTIN, raw.as_ref(), "session_id")?;
1064    Ok(status_to_value(staged_status(&session_id)?))
1065}
1066
1067fn commit_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1068    let raw = dict_arg(COMMIT_BUILTIN, args)?;
1069    let dict = raw.as_ref();
1070    let session_id = require_string(COMMIT_BUILTIN, dict, "session_id")?;
1071    let paths = optional_string_list(COMMIT_BUILTIN, dict, "paths")?;
1072    Ok(commit_result_to_value(commit_staged(&session_id, &paths)?))
1073}
1074
1075fn discard_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1076    let raw = dict_arg(DISCARD_BUILTIN, args)?;
1077    let dict = raw.as_ref();
1078    let session_id = require_string(DISCARD_BUILTIN, dict, "session_id")?;
1079    let paths = optional_string_list(DISCARD_BUILTIN, dict, "paths")?;
1080    Ok(discard_result_to_value(discard_staged(
1081        &session_id,
1082        &paths,
1083    )?))
1084}
1085
1086fn state_for_locked(
1087    guard: &mut BTreeMap<String, SessionState>,
1088    session_id: &str,
1089    root: Option<PathBuf>,
1090) -> Result<SessionState, HostlibError> {
1091    if let Some(existing) = guard.get(session_id) {
1092        let mut state = existing.clone();
1093        if let Some(root) = root {
1094            if state.entries.is_empty() {
1095                state.root = root;
1096            }
1097        }
1098        return Ok(state);
1099    }
1100    let state = load_state(session_id, root).map_err(|err| HostlibError::Backend {
1101        builtin: SET_MODE_BUILTIN,
1102        message: err,
1103    })?;
1104    Ok(state)
1105}
1106
1107fn load_state(session_id: &str, root: Option<PathBuf>) -> Result<SessionState, String> {
1108    let root = root.unwrap_or_else(default_root);
1109    let manifest_path = manifest_path(&root, session_id);
1110    if manifest_path.exists() {
1111        let text = stdfs::read_to_string(&manifest_path)
1112            .map_err(|err| format!("read {}: {err}", manifest_path.display()))?;
1113        let manifest: Manifest = serde_json::from_str(&text)
1114            .map_err(|err| format!("parse {}: {err}", manifest_path.display()))?;
1115        if manifest.version != MANIFEST_VERSION {
1116            return Err(format!(
1117                "unsupported staged fs manifest version {} in {}",
1118                manifest.version,
1119                manifest_path.display()
1120            ));
1121        }
1122        if manifest.session_id != session_id {
1123            return Err(format!(
1124                "staged fs manifest session id mismatch in {}",
1125                manifest_path.display()
1126            ));
1127        }
1128        return Ok(SessionState {
1129            session_id: manifest.session_id,
1130            mode: manifest.mode,
1131            root: normalize_logical(Path::new(&manifest.root)),
1132            entries: manifest
1133                .entries
1134                .into_iter()
1135                .map(|(path, entry)| (normalize_logical(Path::new(&path)), entry))
1136                .collect(),
1137        });
1138    }
1139    Ok(SessionState {
1140        session_id: session_id.to_string(),
1141        mode: FsMode::Immediate,
1142        root,
1143        entries: BTreeMap::new(),
1144    })
1145}
1146
1147fn persist_state(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1148    let dir = session_dir(&state.root, &state.session_id);
1149    stdfs::create_dir_all(dir.join("bodies"))
1150        .map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1151    let manifest = Manifest {
1152        version: MANIFEST_VERSION,
1153        session_id: state.session_id.clone(),
1154        mode: state.mode,
1155        root: state.root.to_string_lossy().into_owned(),
1156        entries: state
1157            .entries
1158            .iter()
1159            .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
1160            .collect(),
1161    };
1162    let bytes = serde_json::to_vec_pretty(&manifest)
1163        .map_err(|err| format!("serialize staged manifest: {err}"))?;
1164    atomic_write(&manifest_path(&state.root, &state.session_id), &bytes)?;
1165    append_journal(state, op, path)?;
1166    prune_unreferenced_bodies(state);
1167    Ok(())
1168}
1169
1170fn append_journal(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1171    let dir = session_dir(&state.root, &state.session_id);
1172    stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1173    let line = serde_json::to_string(&serde_json::json!({
1174        "ts_ms": now_ms(),
1175        "op": op,
1176        "path": path.map(|path| path.to_string_lossy().into_owned()),
1177        "pending_count": state.entries.len(),
1178    }))
1179    .map_err(|err| format!("serialize staged journal: {err}"))?;
1180    let mut file = stdfs::OpenOptions::new()
1181        .create(true)
1182        .append(true)
1183        .open(dir.join("journal.jsonl"))
1184        .map_err(|err| format!("open staged journal: {err}"))?;
1185    writeln!(file, "{line}").map_err(|err| format!("write staged journal: {err}"))
1186}
1187
1188fn write_body(state: &SessionState, bytes: &[u8]) -> Result<String, String> {
1189    let hash = hex::encode(Sha256::digest(bytes));
1190    let path = session_dir(&state.root, &state.session_id)
1191        .join("bodies")
1192        .join(&hash);
1193    if !path.exists() {
1194        atomic_write(&path, bytes)?;
1195    }
1196    Ok(hash)
1197}
1198
1199fn read_body(state: &SessionState, hash: &str) -> std::io::Result<Vec<u8>> {
1200    stdfs::read(
1201        session_dir(&state.root, &state.session_id)
1202            .join("bodies")
1203            .join(hash),
1204    )
1205}
1206
1207fn prune_unreferenced_bodies(state: &SessionState) {
1208    let live: BTreeSet<String> = state
1209        .entries
1210        .values()
1211        .filter_map(|entry| match entry {
1212            StagedEntry::Write { body_hash, .. } => Some(body_hash.clone()),
1213            StagedEntry::Delete { .. } => None,
1214        })
1215        .collect();
1216    let body_dir = session_dir(&state.root, &state.session_id).join("bodies");
1217    let Ok(entries) = stdfs::read_dir(&body_dir) else {
1218        return;
1219    };
1220    for entry in entries.flatten() {
1221        let name = entry.file_name().to_string_lossy().into_owned();
1222        if !live.contains(&name) {
1223            let _ = stdfs::remove_file(entry.path());
1224        }
1225    }
1226}
1227
1228fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
1229    if let Some(parent) = path.parent() {
1230        stdfs::create_dir_all(parent)
1231            .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
1232    }
1233    let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
1234    stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
1235    match stdfs::rename(&tmp, path) {
1236        Ok(()) => Ok(()),
1237        Err(err) => {
1238            let _ = stdfs::remove_file(path);
1239            stdfs::rename(&tmp, path).map_err(|retry| {
1240                format!(
1241                    "rename {} to {}: {err}; retry: {retry}",
1242                    tmp.display(),
1243                    path.display()
1244                )
1245            })
1246        }
1247    }
1248}
1249
1250fn commit_entry(state: &SessionState, path: &Path, entry: &StagedEntry) -> Result<(), String> {
1251    match entry {
1252        StagedEntry::Write { body_hash, .. } => {
1253            let bytes = read_body(state, body_hash)
1254                .map_err(|err| format!("read staged body for {}: {err}", path.display()))?;
1255            atomic_write(path, &bytes)
1256        }
1257        StagedEntry::Delete { recursive, .. } => match stdfs::symlink_metadata(path) {
1258            Ok(metadata) if metadata.is_dir() => {
1259                if *recursive {
1260                    stdfs::remove_dir_all(path)
1261                        .map_err(|err| format!("remove_dir_all {}: {err}", path.display()))
1262                } else {
1263                    stdfs::remove_dir(path)
1264                        .map_err(|err| format!("remove_dir {}: {err}", path.display()))
1265                }
1266            }
1267            Ok(_) => stdfs::remove_file(path)
1268                .map_err(|err| format!("remove_file {}: {err}", path.display())),
1269            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1270            Err(err) => Err(format!("stat {}: {err}", path.display())),
1271        },
1272    }
1273}
1274
1275fn overlay_read(state: &SessionState, path: &Path) -> Option<std::io::Result<Vec<u8>>> {
1276    let key = normalize_logical(path);
1277    if let Some(entry) = state.entries.get(&key) {
1278        return Some(match entry {
1279            StagedEntry::Write { body_hash, .. } => read_body(state, body_hash),
1280            StagedEntry::Delete { .. } => Err(not_found(&key)),
1281        });
1282    }
1283    if deleted_ancestor(state, &key) {
1284        return Some(Err(not_found(&key)));
1285    }
1286    None
1287}
1288
1289fn overlay_read_dir(state: &SessionState, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
1290    let dir_key = normalize_logical(path);
1291    if matches!(state.entries.get(&dir_key), Some(StagedEntry::Write { .. }))
1292        || deleted_ancestor(state, &dir_key)
1293        || matches!(
1294            state.entries.get(&dir_key),
1295            Some(StagedEntry::Delete { .. })
1296        )
1297    {
1298        return Err(not_found(&dir_key));
1299    }
1300    if !path.exists() && !has_staged_descendant(state, &dir_key) {
1301        return Err(not_found(&dir_key));
1302    }
1303
1304    let mut entries: BTreeMap<String, OverlayDirEntry> = BTreeMap::new();
1305    if path.exists() {
1306        for entry in stdfs::read_dir(path)? {
1307            let entry = entry?;
1308            let name = entry.file_name().to_string_lossy().into_owned();
1309            let file_type = entry.file_type().ok();
1310            let metadata = entry.metadata().ok();
1311            entries.insert(
1312                name.clone(),
1313                OverlayDirEntry {
1314                    name,
1315                    is_dir: file_type.is_some_and(|ty| ty.is_dir()),
1316                    is_symlink: file_type.is_some_and(|ty| ty.is_symlink()),
1317                    size: metadata.map(|m| m.len()).unwrap_or(0),
1318                },
1319            );
1320        }
1321    }
1322
1323    for (path, entry) in &state.entries {
1324        let Some(name) = overlay_child_name(path, &dir_key) else {
1325            continue;
1326        };
1327        match entry {
1328            StagedEntry::Write { len, .. } => {
1329                let is_dir = path.parent() != Some(dir_key.as_path());
1330                entries.insert(
1331                    name.clone(),
1332                    OverlayDirEntry {
1333                        name,
1334                        is_dir,
1335                        is_symlink: false,
1336                        size: if is_dir { 0 } else { *len },
1337                    },
1338                );
1339            }
1340            StagedEntry::Delete { .. } => {
1341                if path.parent() == Some(dir_key.as_path()) {
1342                    entries.remove(&name);
1343                }
1344            }
1345        }
1346    }
1347
1348    Ok(entries.into_values().collect())
1349}
1350
1351fn overlay_child_name(path: &Path, dir: &Path) -> Option<String> {
1352    let suffix = path.strip_prefix(dir).ok()?;
1353    let mut components = suffix.components();
1354    let first = components.next()?;
1355    match first {
1356        Component::Normal(name) => Some(name.to_string_lossy().into_owned()),
1357        _ => None,
1358    }
1359}
1360
1361fn overlay_exists(state: &SessionState, path: &Path) -> bool {
1362    if let Some(entry) = state.entries.get(path) {
1363        return matches!(entry, StagedEntry::Write { .. });
1364    }
1365    if deleted_ancestor(state, path) {
1366        return false;
1367    }
1368    if has_staged_descendant(state, path) {
1369        return true;
1370    }
1371    path.exists()
1372}
1373
1374fn parent_exists(state: &SessionState, path: &Path) -> bool {
1375    let Some(parent) = path.parent() else {
1376        return true;
1377    };
1378    if parent.as_os_str().is_empty() {
1379        return true;
1380    }
1381    if let Some(entry) = state.entries.get(parent) {
1382        return !matches!(entry, StagedEntry::Delete { .. });
1383    }
1384    if deleted_ancestor(state, parent) {
1385        return false;
1386    }
1387    if has_staged_descendant(state, parent) {
1388        return true;
1389    }
1390    parent.is_dir()
1391}
1392
1393fn deleted_ancestor(state: &SessionState, path: &Path) -> bool {
1394    state.entries.iter().any(|(candidate, entry)| {
1395        matches!(entry, StagedEntry::Delete { .. })
1396            && path != candidate.as_path()
1397            && path.starts_with(candidate)
1398    })
1399}
1400
1401fn has_staged_descendant(state: &SessionState, path: &Path) -> bool {
1402    state.entries.iter().any(|(candidate, entry)| {
1403        matches!(entry, StagedEntry::Write { .. })
1404            && candidate != path
1405            && candidate.starts_with(path)
1406    })
1407}
1408
1409fn staged_paths_under(state: &SessionState, path: &Path) -> Vec<PathBuf> {
1410    state
1411        .entries
1412        .keys()
1413        .filter(|candidate| *candidate == path || candidate.starts_with(path))
1414        .cloned()
1415        .collect()
1416}
1417
1418fn validate_delete_shape(
1419    builtin: &'static str,
1420    path: &Path,
1421    recursive: bool,
1422) -> Result<(), HostlibError> {
1423    let Ok(metadata) = stdfs::symlink_metadata(path) else {
1424        return Ok(());
1425    };
1426    if metadata.is_dir() && !recursive {
1427        let mut entries = stdfs::read_dir(path).map_err(|err| HostlibError::Backend {
1428            builtin,
1429            message: format!("read_dir `{}`: {err}", path.display()),
1430        })?;
1431        if entries.next().is_some() {
1432            return Err(HostlibError::Backend {
1433                builtin,
1434                message: format!(
1435                    "remove_dir `{}` (pass recursive=true to delete non-empty dirs): directory not empty",
1436                    path.display()
1437                ),
1438            });
1439        }
1440    }
1441    Ok(())
1442}
1443
1444fn status_from_state(state: &SessionState) -> StagedStatus {
1445    let now = now_ms();
1446    let mut pending_writes = Vec::new();
1447    let mut total_bytes_pending = 0u64;
1448    let mut oldest = None;
1449    for (path, entry) in &state.entries {
1450        total_bytes_pending = total_bytes_pending.saturating_add(entry.body_len());
1451        oldest = Some(oldest.map_or(entry.created_at_ms(), |old: i64| {
1452            old.min(entry.created_at_ms())
1453        }));
1454        let (kind, bytes_added, bytes_removed) = match entry {
1455            StagedEntry::Write { len, .. } => ("write", *len, disk_size(path).unwrap_or(0)),
1456            StagedEntry::Delete { .. } => ("delete", 0, disk_size(path).unwrap_or(0)),
1457        };
1458        pending_writes.push(PendingWrite {
1459            path: path.to_string_lossy().into_owned(),
1460            kind,
1461            bytes_added,
1462            bytes_removed,
1463        });
1464    }
1465    StagedStatus {
1466        pending_writes,
1467        total_bytes_pending,
1468        oldest_pending_age_ms: oldest.map(|old| now.saturating_sub(old)).unwrap_or(0),
1469    }
1470}
1471
1472fn disk_size(path: &Path) -> Option<u64> {
1473    let metadata = stdfs::symlink_metadata(path).ok()?;
1474    if metadata.is_file() {
1475        return Some(metadata.len());
1476    }
1477    if metadata.is_dir() {
1478        let mut total = 0u64;
1479        for entry in walkdir::WalkDir::new(path)
1480            .into_iter()
1481            .filter_map(Result::ok)
1482        {
1483            if let Ok(metadata) = entry.metadata() {
1484                if metadata.is_file() {
1485                    total = total.saturating_add(metadata.len());
1486                }
1487            }
1488        }
1489        return Some(total);
1490    }
1491    Some(metadata.len())
1492}
1493
1494fn selected_paths(state: &SessionState, paths: &[String]) -> Vec<PathBuf> {
1495    if paths.is_empty() {
1496        return state.entries.keys().cloned().collect();
1497    }
1498    let selected: BTreeSet<PathBuf> = paths
1499        .iter()
1500        .map(|path| normalize_logical(Path::new(path)))
1501        .collect();
1502    state
1503        .entries
1504        .keys()
1505        .filter(|path| selected.contains(*path))
1506        .cloned()
1507        .collect()
1508}
1509
1510fn active_session_id(explicit: Option<&str>) -> Option<String> {
1511    explicit
1512        .map(str::to_string)
1513        .or_else(harn_vm::agent_sessions::current_session_id)
1514        .filter(|id| !id.trim().is_empty())
1515}
1516
1517fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
1518    if session_id.trim().is_empty() {
1519        return Err(HostlibError::InvalidParameter {
1520            builtin,
1521            param: "session_id",
1522            message: "must not be empty".to_string(),
1523        });
1524    }
1525    Ok(())
1526}
1527
1528fn default_root() -> PathBuf {
1529    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1530}
1531
1532fn session_dir(root: &Path, session_id: &str) -> PathBuf {
1533    let mut dir = root.to_path_buf();
1534    for component in STATE_REL {
1535        dir.push(component);
1536    }
1537    dir.push(sanitize_component(session_id));
1538    dir
1539}
1540
1541fn manifest_path(root: &Path, session_id: &str) -> PathBuf {
1542    session_dir(root, session_id).join("manifest.json")
1543}
1544
1545fn sanitize_component(input: &str) -> String {
1546    let sanitized: String = input
1547        .chars()
1548        .map(|ch| match ch {
1549            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
1550            _ => '_',
1551        })
1552        .collect();
1553    // `.` is allowed inside a name, but a component that is empty or *only*
1554    // dots (`.`, `..`, `...`) is a path-traversal / current-dir token, not a
1555    // safe single component — `session_dir`'s `dir.push("..")` would escape
1556    // the staged-state root. Force the hashed form so the result is always a
1557    // genuine, traversal-free directory name.
1558    let is_dotted = sanitized.is_empty() || sanitized.bytes().all(|b| b == b'.');
1559    if sanitized == input && !is_dotted {
1560        sanitized
1561    } else {
1562        let hash = hex::encode(Sha256::digest(input.as_bytes()));
1563        format!("{sanitized}-{}", &hash[..12])
1564    }
1565}
1566
1567fn normalize_logical(path: &Path) -> PathBuf {
1568    let absolute = if path.is_absolute() {
1569        path.to_path_buf()
1570    } else {
1571        default_root().join(path)
1572    };
1573    let mut out = PathBuf::new();
1574    for component in absolute.components() {
1575        match component {
1576            Component::ParentDir => {
1577                out.pop();
1578            }
1579            Component::CurDir => {}
1580            other => out.push(other),
1581        }
1582    }
1583    out
1584}
1585
1586fn not_found(path: &Path) -> std::io::Error {
1587    std::io::Error::new(
1588        std::io::ErrorKind::NotFound,
1589        format!("staged fs: {} is deleted or absent", path.display()),
1590    )
1591}
1592
1593fn now_ms() -> i64 {
1594    std::time::SystemTime::now()
1595        .duration_since(std::time::UNIX_EPOCH)
1596        .map(|duration| duration.as_millis() as i64)
1597        .unwrap_or(0)
1598}
1599
1600fn emit_staged_update(state: &SessionState) {
1601    let status = status_from_state(state);
1602    harn_vm::agent_events::emit_event(&AgentEvent::StagedWritesPending {
1603        session_id: state.session_id.clone(),
1604        pending_count: status.pending_writes.len(),
1605        total_bytes: status.total_bytes_pending,
1606    });
1607}
1608
1609fn pending_write_to_value(write: PendingWrite) -> VmValue {
1610    build_dict([
1611        ("path", str_value(&write.path)),
1612        ("kind", str_value(write.kind)),
1613        ("bytes_added", VmValue::Int(write.bytes_added as i64)),
1614        ("bytes_removed", VmValue::Int(write.bytes_removed as i64)),
1615    ])
1616}
1617
1618fn status_to_value(status: StagedStatus) -> VmValue {
1619    build_dict([
1620        (
1621            "pending_writes",
1622            VmValue::List(Arc::new(
1623                status
1624                    .pending_writes
1625                    .into_iter()
1626                    .map(pending_write_to_value)
1627                    .collect(),
1628            )),
1629        ),
1630        (
1631            "total_bytes_pending",
1632            VmValue::Int(status.total_bytes_pending as i64),
1633        ),
1634        (
1635            "oldest_pending_age_ms",
1636            VmValue::Int(status.oldest_pending_age_ms),
1637        ),
1638    ])
1639}
1640
1641fn commit_result_to_value(result: CommitResult) -> VmValue {
1642    build_dict([
1643        (
1644            "committed_paths",
1645            VmValue::List(Arc::new(
1646                result
1647                    .committed_paths
1648                    .into_iter()
1649                    .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1650                    .collect(),
1651            )),
1652        ),
1653        (
1654            "failed_paths_with_reasons",
1655            VmValue::List(Arc::new(
1656                result
1657                    .failed_paths_with_reasons
1658                    .into_iter()
1659                    .map(|(path, reason)| {
1660                        build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
1661                    })
1662                    .collect(),
1663            )),
1664        ),
1665    ])
1666}
1667
1668fn discard_result_to_value(result: DiscardResult) -> VmValue {
1669    build_dict([(
1670        "discarded_paths",
1671        VmValue::List(Arc::new(
1672            result
1673                .discarded_paths
1674                .into_iter()
1675                .map(|path| VmValue::String(arcstr::ArcStr::from(path)))
1676                .collect(),
1677        )),
1678    )])
1679}
1680
1681#[cfg(test)]
1682mod sanitize_tests {
1683    use super::{sanitize_component, session_dir, STATE_REL};
1684    use std::path::{Component, Path};
1685
1686    #[test]
1687    fn dotted_session_ids_are_never_traversal_tokens() {
1688        // `.`, `..`, `...` must not survive verbatim — otherwise
1689        // `session_dir`'s `dir.push(..)` escapes the staged-state root.
1690        for evil in ["..", ".", "...", ""] {
1691            let safe = sanitize_component(evil);
1692            assert_ne!(safe, evil, "`{evil}` passed through unsanitized");
1693            assert!(
1694                !safe.bytes().all(|b| b == b'.'),
1695                "`{evil}` -> `{safe}` is still all dots"
1696            );
1697            // The result is a single normal component (no ParentDir/CurDir).
1698            let comps: Vec<_> = Path::new(&safe).components().collect();
1699            assert!(
1700                comps.iter().all(|c| matches!(c, Component::Normal(_))),
1701                "`{safe}` contains a traversal component"
1702            );
1703        }
1704    }
1705
1706    #[test]
1707    fn ordinary_session_ids_pass_through() {
1708        assert_eq!(sanitize_component("abc-123_v2.0"), "abc-123_v2.0");
1709    }
1710
1711    #[test]
1712    fn session_dir_stays_under_staged_root() {
1713        let dir = session_dir(Path::new("/workspace"), "..");
1714        // No path component resolves above the staged dir.
1715        assert!(
1716            !dir.components().any(|c| matches!(c, Component::ParentDir)),
1717            "session_dir({dir:?}) escapes via `..`"
1718        );
1719        let mut staged = std::path::PathBuf::from("/workspace");
1720        staged.extend(STATE_REL);
1721        assert!(dir.starts_with(&staged), "{dir:?} not under {staged:?}");
1722    }
1723}