Skip to main content

harn_hostlib/
fs.rs

1//! Session-scoped staged filesystem mode.
2//!
3//! `hostlib_fs_set_mode({session_id, mode: "staged"})` makes hostlib file
4//! mutations land in a durable per-session overlay under
5//! `.harn/state/staged/<session_id>/`. Reads made by the same session consult
6//! that overlay first, so agent loops see their own pending writes without
7//! touching the working tree until `hostlib_fs_commit_staged`.
8
9use std::collections::{BTreeMap, BTreeSet};
10use std::fs as stdfs;
11use std::io::Write;
12use std::path::{Component, Path, PathBuf};
13use std::rc::Rc;
14use std::sync::{Mutex, OnceLock};
15
16use harn_vm::agent_events::AgentEvent;
17use harn_vm::process_sandbox::{check_fs_path_scope, FsAccess};
18use harn_vm::VmValue;
19use serde::{Deserialize, Serialize};
20use sha2::{Digest, Sha256};
21
22use crate::error::HostlibError;
23use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
24use crate::tools::args::{
25    build_dict, dict_arg, optional_bool, optional_int, optional_string, optional_string_list,
26    require_string, str_value,
27};
28use crate::tools::permissions::enforce_path_scope;
29
30const SET_MODE_BUILTIN: &str = "hostlib_fs_set_mode";
31const STATUS_BUILTIN: &str = "hostlib_fs_staged_status";
32const COMMIT_BUILTIN: &str = "hostlib_fs_commit_staged";
33const DISCARD_BUILTIN: &str = "hostlib_fs_discard_staged";
34const SAFE_TEXT_PATCH_BUILTIN: &str = "hostlib_fs_safe_text_patch";
35const READ_TEXT_BUILTIN: &str = "hostlib_fs_read_text";
36const EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN: &str = "hostlib_fs_emit_safe_text_patch_result";
37
38const MANIFEST_VERSION: u32 = 1;
39const STATE_REL: &[&str] = &[".harn", "state", "staged"];
40
41/// Hostlib filesystem capability handle.
42#[derive(Default)]
43pub struct FsCapability;
44
45impl HostlibCapability for FsCapability {
46    fn module_name(&self) -> &'static str {
47        "fs"
48    }
49
50    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
51        register(registry, SET_MODE_BUILTIN, "set_mode", set_mode_builtin);
52        register(
53            registry,
54            STATUS_BUILTIN,
55            "staged_status",
56            staged_status_builtin,
57        );
58        register(
59            registry,
60            COMMIT_BUILTIN,
61            "commit_staged",
62            commit_staged_builtin,
63        );
64        register(
65            registry,
66            DISCARD_BUILTIN,
67            "discard_staged",
68            discard_staged_builtin,
69        );
70        // `safe_text_patch` and `read_text` touch arbitrary host paths, so
71        // they share the deterministic-tools gate with `tools::*` file I/O.
72        register_gated(
73            registry,
74            SAFE_TEXT_PATCH_BUILTIN,
75            "safe_text_patch",
76            safe_text_patch_builtin,
77        );
78        register_gated(registry, READ_TEXT_BUILTIN, "read_text", read_text_builtin);
79        register(
80            registry,
81            EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
82            "emit_safe_text_patch_result",
83            emit_safe_text_patch_result_builtin,
84        );
85    }
86}
87
88fn register(
89    registry: &mut BuiltinRegistry,
90    name: &'static str,
91    method: &'static str,
92    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
93) {
94    let handler: SyncHandler = std::sync::Arc::new(runner);
95    registry.register(RegisteredBuiltin {
96        name,
97        module: "fs",
98        method,
99        handler,
100    });
101}
102
103fn register_gated(
104    registry: &mut BuiltinRegistry,
105    name: &'static str,
106    method: &'static str,
107    runner: fn(&[VmValue]) -> Result<VmValue, HostlibError>,
108) {
109    registry.register(RegisteredBuiltin {
110        name,
111        module: "fs",
112        method,
113        handler: crate::tools::permissions::gated_handler(name, runner),
114    });
115}
116
117/// Filesystem mode for one hostlib session.
118#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
119#[serde(rename_all = "lowercase")]
120pub enum FsMode {
121    /// Mutations apply to the working tree immediately.
122    Immediate,
123    /// Mutations are recorded in the staging layer until committed.
124    Staged,
125}
126
127impl FsMode {
128    fn parse(builtin: &'static str, raw: &str) -> Result<Self, HostlibError> {
129        match raw {
130            "immediate" => Ok(Self::Immediate),
131            "staged" => Ok(Self::Staged),
132            other => Err(HostlibError::InvalidParameter {
133                builtin,
134                param: "mode",
135                message: format!("expected \"immediate\" or \"staged\", got `{other}`"),
136            }),
137        }
138    }
139
140    /// Wire string used by hostlib schemas.
141    pub fn as_str(self) -> &'static str {
142        match self {
143            Self::Immediate => "immediate",
144            Self::Staged => "staged",
145        }
146    }
147}
148
149#[derive(Clone, Debug, Serialize, Deserialize)]
150struct Manifest {
151    version: u32,
152    session_id: String,
153    mode: FsMode,
154    root: String,
155    entries: BTreeMap<String, StagedEntry>,
156}
157
158#[derive(Clone, Debug, Serialize, Deserialize)]
159#[serde(tag = "kind", rename_all = "snake_case")]
160enum StagedEntry {
161    Write {
162        body_hash: String,
163        len: u64,
164        created_at_ms: i64,
165    },
166    Delete {
167        recursive: bool,
168        created_at_ms: i64,
169    },
170}
171
172impl StagedEntry {
173    fn created_at_ms(&self) -> i64 {
174        match self {
175            Self::Write { created_at_ms, .. } | Self::Delete { created_at_ms, .. } => {
176                *created_at_ms
177            }
178        }
179    }
180
181    fn body_len(&self) -> u64 {
182        match self {
183            Self::Write { len, .. } => *len,
184            Self::Delete { .. } => 0,
185        }
186    }
187}
188
189#[derive(Clone, Debug)]
190struct SessionState {
191    session_id: String,
192    mode: FsMode,
193    root: PathBuf,
194    entries: BTreeMap<PathBuf, StagedEntry>,
195}
196
197#[derive(Clone, Debug)]
198pub(crate) struct WriteOutcome {
199    pub(crate) created: bool,
200    pub(crate) bytes_written: usize,
201}
202
203#[derive(Clone, Debug)]
204pub(crate) struct OverlayDirEntry {
205    pub(crate) name: String,
206    pub(crate) is_dir: bool,
207    pub(crate) is_symlink: bool,
208    pub(crate) size: u64,
209}
210
211/// Summary of staged filesystem changes for one session.
212#[derive(Clone, Debug)]
213pub struct StagedStatus {
214    /// Pending path changes, sorted by path.
215    pub pending_writes: Vec<PendingWrite>,
216    /// Bytes stored in staged write bodies.
217    pub total_bytes_pending: u64,
218    /// Age in milliseconds of the oldest pending change, or 0 when empty.
219    pub oldest_pending_age_ms: i64,
220}
221
222#[derive(Clone, Debug)]
223/// One pending staged filesystem change.
224pub struct PendingWrite {
225    /// Absolute path affected by this staged change.
226    pub path: String,
227    /// Change kind (`write`, `delete`, or reserved future `move`).
228    pub kind: &'static str,
229    /// Bytes the final staged view adds at this path.
230    pub bytes_added: u64,
231    /// Bytes the final staged view removes at this path.
232    pub bytes_removed: u64,
233}
234
235/// Result returned after changing a session's filesystem mode.
236#[derive(Clone, Debug)]
237pub struct SetModeResult {
238    /// Mode active before the change.
239    pub previous_mode: FsMode,
240}
241
242/// Result returned after applying staged changes to disk.
243#[derive(Clone, Debug)]
244pub struct CommitResult {
245    /// Paths successfully applied to disk.
246    pub committed_paths: Vec<String>,
247    /// Paths that failed to apply, with human-readable reasons.
248    pub failed_paths_with_reasons: Vec<(String, String)>,
249}
250
251/// Result returned after dropping staged changes.
252#[derive(Clone, Debug)]
253pub struct DiscardResult {
254    /// Paths whose staged entries were removed.
255    pub discarded_paths: Vec<String>,
256}
257
258static SESSIONS: OnceLock<Mutex<BTreeMap<String, SessionState>>> = OnceLock::new();
259
260fn sessions() -> &'static Mutex<BTreeMap<String, SessionState>> {
261    SESSIONS.get_or_init(|| Mutex::new(BTreeMap::new()))
262}
263
264/// Remember the workspace root associated with a live session.
265///
266/// ACP calls this when a prompt starts so Harn code can call
267/// `hostlib_fs_set_mode({session_id, mode})` without also passing a root.
268pub fn configure_session_root(session_id: &str, root: &Path) {
269    if session_id.trim().is_empty() {
270        return;
271    }
272    let root = normalize_logical(root);
273    let mut guard = sessions()
274        .lock()
275        .expect("hostlib fs session mutex poisoned");
276    match guard.get_mut(session_id) {
277        Some(state) if state.entries.is_empty() => {
278            state.root = root;
279        }
280        Some(_) => {}
281        None => {
282            let state = load_state(session_id, Some(root.clone())).unwrap_or(SessionState {
283                session_id: session_id.to_string(),
284                mode: FsMode::Immediate,
285                root,
286                entries: BTreeMap::new(),
287            });
288            guard.insert(session_id.to_string(), state);
289        }
290    }
291}
292
293/// Set a session's filesystem mode.
294pub fn set_mode(
295    session_id: &str,
296    mode: FsMode,
297    root: Option<&Path>,
298) -> Result<SetModeResult, HostlibError> {
299    validate_session_id(SET_MODE_BUILTIN, session_id)?;
300    let mut guard = sessions()
301        .lock()
302        .expect("hostlib fs session mutex poisoned");
303    let mut state = state_for_locked(&mut guard, session_id, root.map(normalize_logical))?;
304    let previous_mode = state.mode;
305    state.mode = mode;
306    persist_state(&state, "set_mode", None).map_err(|err| HostlibError::Backend {
307        builtin: SET_MODE_BUILTIN,
308        message: err,
309    })?;
310    guard.insert(session_id.to_string(), state);
311    Ok(SetModeResult { previous_mode })
312}
313
314/// Return the staged status for a session.
315pub fn staged_status(session_id: &str) -> Result<StagedStatus, HostlibError> {
316    validate_session_id(STATUS_BUILTIN, session_id)?;
317    let mut guard = sessions()
318        .lock()
319        .expect("hostlib fs session mutex poisoned");
320    let state = state_for_locked(&mut guard, session_id, None)?;
321    let status = status_from_state(&state);
322    guard.insert(session_id.to_string(), state);
323    Ok(status)
324}
325
326/// Commit staged changes for all paths or for a filtered path list.
327pub fn commit_staged(session_id: &str, paths: &[String]) -> Result<CommitResult, HostlibError> {
328    validate_session_id(COMMIT_BUILTIN, session_id)?;
329    let mut guard = sessions()
330        .lock()
331        .expect("hostlib fs session mutex poisoned");
332    let mut state = state_for_locked(&mut guard, session_id, None)?;
333    let selected = selected_paths(&state, paths);
334    let mut committed_paths = Vec::new();
335    let mut failed_paths_with_reasons = Vec::new();
336
337    for path in selected {
338        let Some(entry) = state.entries.get(&path).cloned() else {
339            continue;
340        };
341        let path_label = path.to_string_lossy().into_owned();
342        // The overlay always lives inside the workspace, but commit flushes
343        // to the *target* working-tree path. Enforce workspace-root scope
344        // against that target so a staged entry — possibly persisted under
345        // a looser policy in an earlier session — can never write outside
346        // the roots active at commit time.
347        let access = match entry {
348            StagedEntry::Write { .. } => FsAccess::Write,
349            StagedEntry::Delete { .. } => FsAccess::Delete,
350        };
351        if let Err(violation) = check_fs_path_scope(&path, access) {
352            failed_paths_with_reasons.push((path_label, violation.message(COMMIT_BUILTIN)));
353            continue;
354        }
355        match commit_entry(&state, &path, &entry) {
356            Ok(()) => {
357                state.entries.remove(&path);
358                committed_paths.push(path_label);
359            }
360            Err(reason) => failed_paths_with_reasons.push((path_label, reason)),
361        }
362    }
363
364    persist_state(&state, "commit_staged", None).map_err(|err| HostlibError::Backend {
365        builtin: COMMIT_BUILTIN,
366        message: err,
367    })?;
368    emit_staged_update(&state);
369    guard.insert(session_id.to_string(), state);
370    Ok(CommitResult {
371        committed_paths,
372        failed_paths_with_reasons,
373    })
374}
375
376/// Discard staged changes for all paths or for a filtered path list.
377pub fn discard_staged(session_id: &str, paths: &[String]) -> Result<DiscardResult, HostlibError> {
378    validate_session_id(DISCARD_BUILTIN, session_id)?;
379    let mut guard = sessions()
380        .lock()
381        .expect("hostlib fs session mutex poisoned");
382    let mut state = state_for_locked(&mut guard, session_id, None)?;
383    let selected = selected_paths(&state, paths);
384    let mut discarded_paths = Vec::new();
385    for path in selected {
386        if state.entries.remove(&path).is_some() {
387            discarded_paths.push(path.to_string_lossy().into_owned());
388        }
389    }
390    persist_state(&state, "discard_staged", None).map_err(|err| HostlibError::Backend {
391        builtin: DISCARD_BUILTIN,
392        message: err,
393    })?;
394    emit_staged_update(&state);
395    guard.insert(session_id.to_string(), state);
396    Ok(DiscardResult { discarded_paths })
397}
398
399pub(crate) fn read(
400    path: &Path,
401    explicit_session_id: Option<&str>,
402) -> Option<std::io::Result<Vec<u8>>> {
403    let session_id = active_session_id(explicit_session_id)?;
404    let mut guard = sessions()
405        .lock()
406        .expect("hostlib fs session mutex poisoned");
407    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
408    let result = if state.mode == FsMode::Staged {
409        overlay_read(&state, path)
410    } else {
411        None
412    };
413    guard.insert(session_id, state);
414    result
415}
416
417pub(crate) fn read_to_string(
418    path: &Path,
419    explicit_session_id: Option<&str>,
420) -> Option<std::io::Result<String>> {
421    read(path, explicit_session_id).map(|result| {
422        result.and_then(|bytes| {
423            String::from_utf8(bytes).map_err(|err| {
424                std::io::Error::new(std::io::ErrorKind::InvalidData, err.to_string())
425            })
426        })
427    })
428}
429
430pub(crate) fn read_dir(
431    path: &Path,
432    explicit_session_id: Option<&str>,
433) -> Option<std::io::Result<Vec<OverlayDirEntry>>> {
434    let session_id = active_session_id(explicit_session_id)?;
435    let mut guard = sessions()
436        .lock()
437        .expect("hostlib fs session mutex poisoned");
438    let state = state_for_locked(&mut guard, &session_id, None).ok()?;
439    let result = if state.mode == FsMode::Staged {
440        Some(overlay_read_dir(&state, path))
441    } else {
442        None
443    };
444    guard.insert(session_id, state);
445    result
446}
447
448pub(crate) fn stage_write_or_none(
449    builtin: &'static str,
450    path: &Path,
451    bytes: &[u8],
452    create_parents: bool,
453    overwrite: bool,
454    explicit_session_id: Option<&str>,
455) -> Result<Option<WriteOutcome>, HostlibError> {
456    let Some(session_id) = active_session_id(explicit_session_id) else {
457        return Ok(None);
458    };
459    let mut guard = sessions()
460        .lock()
461        .expect("hostlib fs session mutex poisoned");
462    let mut state = state_for_locked(&mut guard, &session_id, None)?;
463    if state.mode != FsMode::Staged {
464        guard.insert(session_id, state);
465        return Ok(None);
466    }
467
468    let key = normalize_logical(path);
469    let existed = overlay_exists(&state, &key);
470    if existed && !overwrite {
471        guard.insert(session_id, state);
472        return Err(HostlibError::Backend {
473            builtin,
474            message: format!("`{}` exists and overwrite=false", key.display()),
475        });
476    }
477    if !create_parents && !parent_exists(&state, &key) {
478        guard.insert(session_id, state);
479        return Err(HostlibError::Backend {
480            builtin,
481            message: format!("parent directory for `{}` does not exist", key.display()),
482        });
483    }
484
485    let hash = write_body(&state, bytes).map_err(|err| HostlibError::Backend {
486        builtin,
487        message: err,
488    })?;
489    state.entries.insert(
490        key.clone(),
491        StagedEntry::Write {
492            body_hash: hash,
493            len: bytes.len() as u64,
494            created_at_ms: now_ms(),
495        },
496    );
497    persist_state(&state, "write", Some(&key)).map_err(|err| HostlibError::Backend {
498        builtin,
499        message: err,
500    })?;
501    emit_staged_update(&state);
502    guard.insert(session_id, state);
503    Ok(Some(WriteOutcome {
504        created: !existed,
505        bytes_written: bytes.len(),
506    }))
507}
508
509pub(crate) fn stage_delete_or_none(
510    builtin: &'static str,
511    path: &Path,
512    recursive: bool,
513    explicit_session_id: Option<&str>,
514) -> Result<Option<bool>, HostlibError> {
515    let Some(session_id) = active_session_id(explicit_session_id) else {
516        return Ok(None);
517    };
518    let mut guard = sessions()
519        .lock()
520        .expect("hostlib fs session mutex poisoned");
521    let mut state = state_for_locked(&mut guard, &session_id, None)?;
522    if state.mode != FsMode::Staged {
523        guard.insert(session_id, state);
524        return Ok(None);
525    }
526
527    let key = normalize_logical(path);
528    let staged_targets = staged_paths_under(&state, &key);
529    let disk_exists = key.exists();
530    if !disk_exists && staged_targets.is_empty() {
531        guard.insert(session_id, state);
532        return Ok(Some(false));
533    }
534
535    if !disk_exists {
536        for staged in staged_targets {
537            state.entries.remove(&staged);
538        }
539    } else {
540        validate_delete_shape(builtin, &key, recursive)?;
541        for staged in staged_targets {
542            state.entries.remove(&staged);
543        }
544        state.entries.insert(
545            key.clone(),
546            StagedEntry::Delete {
547                recursive,
548                created_at_ms: now_ms(),
549            },
550        );
551    }
552    persist_state(&state, "delete", Some(&key)).map_err(|err| HostlibError::Backend {
553        builtin,
554        message: err,
555    })?;
556    emit_staged_update(&state);
557    guard.insert(session_id, state);
558    Ok(Some(true))
559}
560
561/// Outcome of one [`safe_text_patch`] call. `applied` says whether the
562/// on-disk (or staged-overlay) bytes changed; `result` carries the
563/// structured discriminant used by the wire/JSON shape.
564#[derive(Clone, Debug)]
565pub struct SafeTextPatchOutcome {
566    /// Discriminant: `"applied"`, `"stale_base"`, or `"no_op"`.
567    pub result: SafeTextPatchResult,
568    /// `sha256:HEX` of the pre-image (overlay-aware) the call observed.
569    pub current_hash: String,
570    /// `sha256:HEX` of the requested post-image.
571    pub after_hash: String,
572    /// `true` when the file did not exist before the call.
573    pub created: bool,
574    /// Bytes written; `0` on `stale_base` or `no_op`.
575    pub bytes_written: usize,
576}
577
578/// Discriminant for a [`safe_text_patch`] outcome.
579#[derive(Clone, Copy, Debug, Eq, PartialEq)]
580pub enum SafeTextPatchResult {
581    /// Pre-image hash matched (or no expected hash supplied) and the
582    /// post-image differs from the pre-image — bytes were written.
583    Applied,
584    /// `expected_hash` did not match the observed pre-image hash; no
585    /// bytes were written. Callers should re-read and retry.
586    StaleBase,
587    /// Pre-image hash matched and the post-image equals the pre-image —
588    /// skipped the write to avoid spurious timestamps and overlay churn.
589    NoOp,
590}
591
592impl SafeTextPatchResult {
593    fn as_str(self) -> &'static str {
594        match self {
595            Self::Applied => "applied",
596            Self::StaleBase => "stale_base",
597            Self::NoOp => "no_op",
598        }
599    }
600}
601
602/// Format `bytes` as the `sha256:HEX` label used in `before_sha256` /
603/// `after_sha256` / `current_hash` / `expected_hash` everywhere in the
604/// safe-text-patch surface.
605fn hash_label(bytes: &[u8]) -> String {
606    format!("sha256:{}", hex::encode(Sha256::digest(bytes)))
607}
608
609/// Atomic compare-and-swap-style text write.
610///
611/// Reads the current bytes at `path` through the staged-fs overlay (when a
612/// session is active) so concurrent agent edits see each other's pending
613/// writes. If `expected_hash` is supplied and differs from the observed
614/// `sha256:HEX`, returns `SafeTextPatchResult::StaleBase` without
615/// mutating any state. On a hash match the post-image is written through
616/// the same overlay path, keeping the read and the write atomic with
617/// respect to other staged-fs consumers in the same process.
618///
619/// Atomicity:
620///
621/// - When a session is in staged mode, the read, hash check, and write
622///   all happen under a single acquisition of the sessions mutex, so a
623///   sibling thread cannot stage a write into the window between the
624///   pre-image snapshot and the commit.
625/// - When the call routes through disk (no active session, or session in
626///   immediate mode), the write goes through an atomic rename-into-place
627///   so partial-write tearing is impossible. Cross-process races are
628///   intentionally out of scope — the staged-fs overlay is the
629///   collision-rejection layer.
630pub fn safe_text_patch(
631    path: &Path,
632    content: &str,
633    expected_hash: Option<&str>,
634    session_id: Option<&str>,
635    create_parents: bool,
636    overwrite: bool,
637) -> Result<SafeTextPatchOutcome, HostlibError> {
638    let new_bytes = content.as_bytes();
639    let after_hash = hash_label(new_bytes);
640
641    if let Some(outcome) = safe_text_patch_staged(
642        path,
643        new_bytes,
644        expected_hash,
645        session_id,
646        create_parents,
647        overwrite,
648        &after_hash,
649    )? {
650        return Ok(outcome);
651    }
652
653    safe_text_patch_disk(
654        path,
655        new_bytes,
656        expected_hash,
657        create_parents,
658        overwrite,
659        after_hash,
660    )
661}
662
663/// Atomic CAS path for a session in `staged` mode. Holds the sessions
664/// mutex through the entire read → hash → check → write so concurrent
665/// agents in the same process cannot race the snapshot. Returns `None`
666/// when no session is active or the session is in `immediate` mode, so
667/// the caller can fall through to the disk path.
668#[allow(clippy::too_many_arguments)]
669fn safe_text_patch_staged(
670    path: &Path,
671    new_bytes: &[u8],
672    expected_hash: Option<&str>,
673    session_id: Option<&str>,
674    create_parents: bool,
675    overwrite: bool,
676    after_hash: &str,
677) -> Result<Option<SafeTextPatchOutcome>, HostlibError> {
678    let Some(session) = active_session_id(session_id) else {
679        return Ok(None);
680    };
681    let mut guard = sessions()
682        .lock()
683        .expect("hostlib fs session mutex poisoned");
684    let mut state = state_for_locked(&mut guard, &session, None)?;
685    if state.mode != FsMode::Staged {
686        guard.insert(session, state);
687        return Ok(None);
688    }
689
690    let key = normalize_logical(path);
691    let (existing_bytes, existed) = match overlay_read(&state, path) {
692        Some(Ok(bytes)) => (bytes, true),
693        Some(Err(err)) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
694        Some(Err(err)) => {
695            guard.insert(session, state);
696            return Err(HostlibError::Backend {
697                builtin: SAFE_TEXT_PATCH_BUILTIN,
698                message: format!("read `{}`: {err}", path.display()),
699            });
700        }
701        None => match stdfs::read(path) {
702            Ok(bytes) => (bytes, true),
703            Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
704            Err(err) => {
705                guard.insert(session, state);
706                return Err(HostlibError::Backend {
707                    builtin: SAFE_TEXT_PATCH_BUILTIN,
708                    message: format!("read `{}`: {err}", path.display()),
709                });
710            }
711        },
712    };
713    let current_hash = hash_label(&existing_bytes);
714
715    if let Some(expected) = expected_hash {
716        if expected != current_hash {
717            guard.insert(session, state);
718            return Ok(Some(SafeTextPatchOutcome {
719                result: SafeTextPatchResult::StaleBase,
720                current_hash,
721                after_hash: after_hash.to_string(),
722                created: false,
723                bytes_written: 0,
724            }));
725        }
726    }
727
728    if existed && existing_bytes == new_bytes {
729        guard.insert(session, state);
730        return Ok(Some(SafeTextPatchOutcome {
731            result: SafeTextPatchResult::NoOp,
732            current_hash,
733            after_hash: after_hash.to_string(),
734            created: false,
735            bytes_written: 0,
736        }));
737    }
738
739    let overlay_existed = overlay_exists(&state, &key);
740    if overlay_existed && !overwrite {
741        guard.insert(session, state);
742        return Err(HostlibError::Backend {
743            builtin: SAFE_TEXT_PATCH_BUILTIN,
744            message: format!("`{}` exists and overwrite=false", key.display()),
745        });
746    }
747    if !create_parents && !parent_exists(&state, &key) {
748        guard.insert(session, state);
749        return Err(HostlibError::Backend {
750            builtin: SAFE_TEXT_PATCH_BUILTIN,
751            message: format!("parent directory for `{}` does not exist", key.display()),
752        });
753    }
754
755    let body_hash = write_body(&state, new_bytes).map_err(|err| HostlibError::Backend {
756        builtin: SAFE_TEXT_PATCH_BUILTIN,
757        message: err,
758    })?;
759    state.entries.insert(
760        key.clone(),
761        StagedEntry::Write {
762            body_hash,
763            len: new_bytes.len() as u64,
764            created_at_ms: now_ms(),
765        },
766    );
767    persist_state(&state, "safe_text_patch", Some(&key)).map_err(|err| HostlibError::Backend {
768        builtin: SAFE_TEXT_PATCH_BUILTIN,
769        message: err,
770    })?;
771    emit_staged_update(&state);
772    guard.insert(session, state);
773
774    Ok(Some(SafeTextPatchOutcome {
775        result: SafeTextPatchResult::Applied,
776        current_hash,
777        after_hash: after_hash.to_string(),
778        created: !existed,
779        bytes_written: new_bytes.len(),
780    }))
781}
782
783/// Disk path for callers without an active staged session. Uses
784/// `atomic_write` so the post-image lands via rename-into-place rather
785/// than an open/truncate/write/close sequence — readers either see the
786/// pre-image or the post-image, never a torn write.
787fn safe_text_patch_disk(
788    path: &Path,
789    new_bytes: &[u8],
790    expected_hash: Option<&str>,
791    create_parents: bool,
792    overwrite: bool,
793    after_hash: String,
794) -> Result<SafeTextPatchOutcome, HostlibError> {
795    let (existing_bytes, existed) = match stdfs::read(path) {
796        Ok(bytes) => (bytes, true),
797        Err(err) if err.kind() == std::io::ErrorKind::NotFound => (Vec::new(), false),
798        Err(err) => {
799            return Err(HostlibError::Backend {
800                builtin: SAFE_TEXT_PATCH_BUILTIN,
801                message: format!("read `{}`: {err}", path.display()),
802            });
803        }
804    };
805    let current_hash = hash_label(&existing_bytes);
806
807    if let Some(expected) = expected_hash {
808        if expected != current_hash {
809            return Ok(SafeTextPatchOutcome {
810                result: SafeTextPatchResult::StaleBase,
811                current_hash,
812                after_hash,
813                created: false,
814                bytes_written: 0,
815            });
816        }
817    }
818
819    if existed && existing_bytes == new_bytes {
820        return Ok(SafeTextPatchOutcome {
821            result: SafeTextPatchResult::NoOp,
822            current_hash,
823            after_hash,
824            created: false,
825            bytes_written: 0,
826        });
827    }
828    if existed && !overwrite {
829        return Err(HostlibError::Backend {
830            builtin: SAFE_TEXT_PATCH_BUILTIN,
831            message: format!("`{}` exists and overwrite=false", path.display()),
832        });
833    }
834    if !create_parents {
835        if let Some(parent) = path.parent() {
836            if !parent.as_os_str().is_empty() && !parent.is_dir() {
837                return Err(HostlibError::Backend {
838                    builtin: SAFE_TEXT_PATCH_BUILTIN,
839                    message: format!(
840                        "parent directory for `{}` does not exist (pass create_parents=true to mkdir)",
841                        path.display()
842                    ),
843                });
844            }
845        }
846    }
847
848    crate::fs_snapshot::auto_capture_for_write(SAFE_TEXT_PATCH_BUILTIN, path);
849    atomic_write(path, new_bytes).map_err(|err| HostlibError::Backend {
850        builtin: SAFE_TEXT_PATCH_BUILTIN,
851        message: format!("write `{}`: {err}", path.display()),
852    })?;
853
854    Ok(SafeTextPatchOutcome {
855        result: SafeTextPatchResult::Applied,
856        current_hash,
857        after_hash,
858        created: !existed,
859        bytes_written: new_bytes.len(),
860    })
861}
862
863/// Read the pre-image through the staged-fs overlay (when active),
864/// falling back to disk. Returns `(bytes, existed_on_disk_or_overlay)`.
865/// `builtin` is the caller's tag — used so backend errors point at the
866/// right builtin name in diagnostics.
867fn read_existing(
868    builtin: &'static str,
869    path: &Path,
870    session_id: Option<&str>,
871) -> Result<(Vec<u8>, bool), HostlibError> {
872    if let Some(result) = read(path, session_id) {
873        return match result {
874            Ok(bytes) => Ok((bytes, true)),
875            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
876            Err(err) => Err(HostlibError::Backend {
877                builtin,
878                message: format!("read `{}`: {err}", path.display()),
879            }),
880        };
881    }
882    match stdfs::read(path) {
883        Ok(bytes) => Ok((bytes, true)),
884        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok((Vec::new(), false)),
885        Err(err) => Err(HostlibError::Backend {
886            builtin,
887            message: format!("read `{}`: {err}", path.display()),
888        }),
889    }
890}
891
892fn read_text_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
893    let raw = dict_arg(READ_TEXT_BUILTIN, args)?;
894    let dict = raw.as_ref();
895    let path_str = require_string(READ_TEXT_BUILTIN, dict, "path")?;
896    let session_id = optional_string(READ_TEXT_BUILTIN, dict, "session_id")?;
897    let path = Path::new(&path_str);
898    enforce_path_scope(READ_TEXT_BUILTIN, path, FsAccess::Read)?;
899
900    let (bytes, existed) = read_existing(READ_TEXT_BUILTIN, path, session_id.as_deref())?;
901    let hash = hash_label(&bytes);
902    let content = match std::str::from_utf8(&bytes) {
903        Ok(s) => s.to_string(),
904        Err(err) => {
905            return Err(HostlibError::Backend {
906                builtin: READ_TEXT_BUILTIN,
907                message: format!("`{path_str}` is not valid UTF-8: {err}"),
908            });
909        }
910    };
911    let bytes_len = bytes.len() as i64;
912    Ok(build_dict([
913        ("path", str_value(&path_str)),
914        ("content", str_value(&content)),
915        ("sha256", str_value(&hash)),
916        ("size", VmValue::Int(bytes_len)),
917        ("exists", VmValue::Bool(existed)),
918    ]))
919}
920
921fn safe_text_patch_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
922    let raw = dict_arg(SAFE_TEXT_PATCH_BUILTIN, args)?;
923    let dict = raw.as_ref();
924
925    let path_str = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "path")?;
926    let content = require_string(SAFE_TEXT_PATCH_BUILTIN, dict, "content")?;
927    let expected_hash = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "expected_hash")?;
928    let session_id = optional_string(SAFE_TEXT_PATCH_BUILTIN, dict, "session_id")?;
929    let create_parents = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "create_parents", true)?;
930    let overwrite = optional_bool(SAFE_TEXT_PATCH_BUILTIN, dict, "overwrite", true)?;
931
932    enforce_path_scope(
933        SAFE_TEXT_PATCH_BUILTIN,
934        Path::new(&path_str),
935        FsAccess::Write,
936    )?;
937    let outcome = safe_text_patch(
938        Path::new(&path_str),
939        &content,
940        expected_hash.as_deref(),
941        session_id.as_deref(),
942        create_parents,
943        overwrite,
944    )?;
945
946    let entries: Vec<(&'static str, VmValue)> = vec![
947        ("path", str_value(&path_str)),
948        ("result", str_value(outcome.result.as_str())),
949        (
950            "applied",
951            VmValue::Bool(outcome.result == SafeTextPatchResult::Applied),
952        ),
953        (
954            "stale_base",
955            VmValue::Bool(outcome.result == SafeTextPatchResult::StaleBase),
956        ),
957        ("current_hash", str_value(&outcome.current_hash)),
958        ("before_sha256", str_value(&outcome.current_hash)),
959        ("after_sha256", str_value(&outcome.after_hash)),
960        ("created", VmValue::Bool(outcome.created)),
961        ("bytes_written", VmValue::Int(outcome.bytes_written as i64)),
962        (
963            "expected_hash",
964            match expected_hash.as_deref() {
965                Some(hash) => str_value(hash),
966                None => VmValue::Nil,
967            },
968        ),
969    ];
970    Ok(build_dict(entries))
971}
972
973fn emit_safe_text_patch_result_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
974    let raw = dict_arg(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, args)?;
975    let dict = raw.as_ref();
976
977    let path = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "path")?;
978    let result = require_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "result")?;
979    let hunks_count = optional_int(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "hunks_count", 0)?;
980    let bytes_written = optional_int(
981        EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
982        dict,
983        "bytes_written",
984        0,
985    )?;
986    let failed_hunk_index = match dict.get("failed_hunk_index") {
987        None | Some(VmValue::Nil) => None,
988        Some(VmValue::Int(n)) if *n >= 0 => Some(*n as usize),
989        Some(other) => {
990            return Err(HostlibError::InvalidParameter {
991                builtin: EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN,
992                param: "failed_hunk_index",
993                message: format!("expected non-negative integer, got {}", other.type_name()),
994            });
995        }
996    };
997    let session_id = optional_string(EMIT_SAFE_TEXT_PATCH_RESULT_BUILTIN, dict, "session_id")?
998        .or_else(harn_vm::agent_sessions::current_session_id);
999
1000    if let Some(session_id) = session_id.filter(|s| !s.trim().is_empty()) {
1001        harn_vm::agent_events::emit_event(&AgentEvent::SafeTextPatchResult {
1002            session_id,
1003            path,
1004            result,
1005            hunks_count: hunks_count.max(0) as usize,
1006            bytes_written: bytes_written.max(0) as u64,
1007            failed_hunk_index,
1008        });
1009        Ok(VmValue::Bool(true))
1010    } else {
1011        // Silently no-op when no session is active — telemetry without a
1012        // session has nowhere to route. Caller can opt in by always
1013        // passing session_id explicitly.
1014        Ok(VmValue::Bool(false))
1015    }
1016}
1017
1018fn set_mode_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1019    let raw = dict_arg(SET_MODE_BUILTIN, args)?;
1020    let dict = raw.as_ref();
1021    let session_id = require_string(SET_MODE_BUILTIN, dict, "session_id")?;
1022    let mode = FsMode::parse(
1023        SET_MODE_BUILTIN,
1024        &require_string(SET_MODE_BUILTIN, dict, "mode")?,
1025    )?;
1026    let root = optional_string(SET_MODE_BUILTIN, dict, "root")?.map(PathBuf::from);
1027    let result = set_mode(&session_id, mode, root.as_deref())?;
1028    Ok(build_dict([(
1029        "previous_mode",
1030        str_value(result.previous_mode.as_str()),
1031    )]))
1032}
1033
1034fn staged_status_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1035    let raw = dict_arg(STATUS_BUILTIN, args)?;
1036    let session_id = require_string(STATUS_BUILTIN, raw.as_ref(), "session_id")?;
1037    Ok(status_to_value(staged_status(&session_id)?))
1038}
1039
1040fn commit_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1041    let raw = dict_arg(COMMIT_BUILTIN, args)?;
1042    let dict = raw.as_ref();
1043    let session_id = require_string(COMMIT_BUILTIN, dict, "session_id")?;
1044    let paths = optional_string_list(COMMIT_BUILTIN, dict, "paths")?;
1045    Ok(commit_result_to_value(commit_staged(&session_id, &paths)?))
1046}
1047
1048fn discard_staged_builtin(args: &[VmValue]) -> Result<VmValue, HostlibError> {
1049    let raw = dict_arg(DISCARD_BUILTIN, args)?;
1050    let dict = raw.as_ref();
1051    let session_id = require_string(DISCARD_BUILTIN, dict, "session_id")?;
1052    let paths = optional_string_list(DISCARD_BUILTIN, dict, "paths")?;
1053    Ok(discard_result_to_value(discard_staged(
1054        &session_id,
1055        &paths,
1056    )?))
1057}
1058
1059fn state_for_locked(
1060    guard: &mut BTreeMap<String, SessionState>,
1061    session_id: &str,
1062    root: Option<PathBuf>,
1063) -> Result<SessionState, HostlibError> {
1064    if let Some(existing) = guard.get(session_id) {
1065        let mut state = existing.clone();
1066        if let Some(root) = root {
1067            if state.entries.is_empty() {
1068                state.root = root;
1069            }
1070        }
1071        return Ok(state);
1072    }
1073    let state = load_state(session_id, root).map_err(|err| HostlibError::Backend {
1074        builtin: SET_MODE_BUILTIN,
1075        message: err,
1076    })?;
1077    Ok(state)
1078}
1079
1080fn load_state(session_id: &str, root: Option<PathBuf>) -> Result<SessionState, String> {
1081    let root = root.unwrap_or_else(default_root);
1082    let manifest_path = manifest_path(&root, session_id);
1083    if manifest_path.exists() {
1084        let text = stdfs::read_to_string(&manifest_path)
1085            .map_err(|err| format!("read {}: {err}", manifest_path.display()))?;
1086        let manifest: Manifest = serde_json::from_str(&text)
1087            .map_err(|err| format!("parse {}: {err}", manifest_path.display()))?;
1088        if manifest.version != MANIFEST_VERSION {
1089            return Err(format!(
1090                "unsupported staged fs manifest version {} in {}",
1091                manifest.version,
1092                manifest_path.display()
1093            ));
1094        }
1095        if manifest.session_id != session_id {
1096            return Err(format!(
1097                "staged fs manifest session id mismatch in {}",
1098                manifest_path.display()
1099            ));
1100        }
1101        return Ok(SessionState {
1102            session_id: manifest.session_id,
1103            mode: manifest.mode,
1104            root: normalize_logical(Path::new(&manifest.root)),
1105            entries: manifest
1106                .entries
1107                .into_iter()
1108                .map(|(path, entry)| (normalize_logical(Path::new(&path)), entry))
1109                .collect(),
1110        });
1111    }
1112    Ok(SessionState {
1113        session_id: session_id.to_string(),
1114        mode: FsMode::Immediate,
1115        root,
1116        entries: BTreeMap::new(),
1117    })
1118}
1119
1120fn persist_state(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1121    let dir = session_dir(&state.root, &state.session_id);
1122    stdfs::create_dir_all(dir.join("bodies"))
1123        .map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1124    let manifest = Manifest {
1125        version: MANIFEST_VERSION,
1126        session_id: state.session_id.clone(),
1127        mode: state.mode,
1128        root: state.root.to_string_lossy().into_owned(),
1129        entries: state
1130            .entries
1131            .iter()
1132            .map(|(path, entry)| (path.to_string_lossy().into_owned(), entry.clone()))
1133            .collect(),
1134    };
1135    let bytes = serde_json::to_vec_pretty(&manifest)
1136        .map_err(|err| format!("serialize staged manifest: {err}"))?;
1137    atomic_write(&manifest_path(&state.root, &state.session_id), &bytes)?;
1138    append_journal(state, op, path)?;
1139    prune_unreferenced_bodies(state);
1140    Ok(())
1141}
1142
1143fn append_journal(state: &SessionState, op: &str, path: Option<&Path>) -> Result<(), String> {
1144    let dir = session_dir(&state.root, &state.session_id);
1145    stdfs::create_dir_all(&dir).map_err(|err| format!("mkdir {}: {err}", dir.display()))?;
1146    let line = serde_json::to_string(&serde_json::json!({
1147        "ts_ms": now_ms(),
1148        "op": op,
1149        "path": path.map(|path| path.to_string_lossy().into_owned()),
1150        "pending_count": state.entries.len(),
1151    }))
1152    .map_err(|err| format!("serialize staged journal: {err}"))?;
1153    let mut file = stdfs::OpenOptions::new()
1154        .create(true)
1155        .append(true)
1156        .open(dir.join("journal.jsonl"))
1157        .map_err(|err| format!("open staged journal: {err}"))?;
1158    writeln!(file, "{line}").map_err(|err| format!("write staged journal: {err}"))
1159}
1160
1161fn write_body(state: &SessionState, bytes: &[u8]) -> Result<String, String> {
1162    let hash = hex::encode(Sha256::digest(bytes));
1163    let path = session_dir(&state.root, &state.session_id)
1164        .join("bodies")
1165        .join(&hash);
1166    if !path.exists() {
1167        atomic_write(&path, bytes)?;
1168    }
1169    Ok(hash)
1170}
1171
1172fn read_body(state: &SessionState, hash: &str) -> std::io::Result<Vec<u8>> {
1173    stdfs::read(
1174        session_dir(&state.root, &state.session_id)
1175            .join("bodies")
1176            .join(hash),
1177    )
1178}
1179
1180fn prune_unreferenced_bodies(state: &SessionState) {
1181    let live: BTreeSet<String> = state
1182        .entries
1183        .values()
1184        .filter_map(|entry| match entry {
1185            StagedEntry::Write { body_hash, .. } => Some(body_hash.clone()),
1186            StagedEntry::Delete { .. } => None,
1187        })
1188        .collect();
1189    let body_dir = session_dir(&state.root, &state.session_id).join("bodies");
1190    let Ok(entries) = stdfs::read_dir(&body_dir) else {
1191        return;
1192    };
1193    for entry in entries.flatten() {
1194        let name = entry.file_name().to_string_lossy().into_owned();
1195        if !live.contains(&name) {
1196            let _ = stdfs::remove_file(entry.path());
1197        }
1198    }
1199}
1200
1201fn atomic_write(path: &Path, bytes: &[u8]) -> Result<(), String> {
1202    if let Some(parent) = path.parent() {
1203        stdfs::create_dir_all(parent)
1204            .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
1205    }
1206    let tmp = path.with_extension(format!("tmp-{}-{}", std::process::id(), now_ms()));
1207    stdfs::write(&tmp, bytes).map_err(|err| format!("write {}: {err}", tmp.display()))?;
1208    match stdfs::rename(&tmp, path) {
1209        Ok(()) => Ok(()),
1210        Err(err) => {
1211            let _ = stdfs::remove_file(path);
1212            stdfs::rename(&tmp, path).map_err(|retry| {
1213                format!(
1214                    "rename {} to {}: {err}; retry: {retry}",
1215                    tmp.display(),
1216                    path.display()
1217                )
1218            })
1219        }
1220    }
1221}
1222
1223fn commit_entry(state: &SessionState, path: &Path, entry: &StagedEntry) -> Result<(), String> {
1224    match entry {
1225        StagedEntry::Write { body_hash, .. } => {
1226            let bytes = read_body(state, body_hash)
1227                .map_err(|err| format!("read staged body for {}: {err}", path.display()))?;
1228            atomic_write(path, &bytes)
1229        }
1230        StagedEntry::Delete { recursive, .. } => match stdfs::symlink_metadata(path) {
1231            Ok(metadata) if metadata.is_dir() => {
1232                if *recursive {
1233                    stdfs::remove_dir_all(path)
1234                        .map_err(|err| format!("remove_dir_all {}: {err}", path.display()))
1235                } else {
1236                    stdfs::remove_dir(path)
1237                        .map_err(|err| format!("remove_dir {}: {err}", path.display()))
1238                }
1239            }
1240            Ok(_) => stdfs::remove_file(path)
1241                .map_err(|err| format!("remove_file {}: {err}", path.display())),
1242            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1243            Err(err) => Err(format!("stat {}: {err}", path.display())),
1244        },
1245    }
1246}
1247
1248fn overlay_read(state: &SessionState, path: &Path) -> Option<std::io::Result<Vec<u8>>> {
1249    let key = normalize_logical(path);
1250    if let Some(entry) = state.entries.get(&key) {
1251        return Some(match entry {
1252            StagedEntry::Write { body_hash, .. } => read_body(state, body_hash),
1253            StagedEntry::Delete { .. } => Err(not_found(&key)),
1254        });
1255    }
1256    if deleted_ancestor(state, &key) {
1257        return Some(Err(not_found(&key)));
1258    }
1259    None
1260}
1261
1262fn overlay_read_dir(state: &SessionState, path: &Path) -> std::io::Result<Vec<OverlayDirEntry>> {
1263    let dir_key = normalize_logical(path);
1264    if matches!(state.entries.get(&dir_key), Some(StagedEntry::Write { .. }))
1265        || deleted_ancestor(state, &dir_key)
1266        || matches!(
1267            state.entries.get(&dir_key),
1268            Some(StagedEntry::Delete { .. })
1269        )
1270    {
1271        return Err(not_found(&dir_key));
1272    }
1273    if !path.exists() && !has_staged_descendant(state, &dir_key) {
1274        return Err(not_found(&dir_key));
1275    }
1276
1277    let mut entries: BTreeMap<String, OverlayDirEntry> = BTreeMap::new();
1278    if path.exists() {
1279        for entry in stdfs::read_dir(path)? {
1280            let entry = entry?;
1281            let name = entry.file_name().to_string_lossy().into_owned();
1282            let file_type = entry.file_type().ok();
1283            let metadata = entry.metadata().ok();
1284            entries.insert(
1285                name.clone(),
1286                OverlayDirEntry {
1287                    name,
1288                    is_dir: file_type.is_some_and(|ty| ty.is_dir()),
1289                    is_symlink: file_type.is_some_and(|ty| ty.is_symlink()),
1290                    size: metadata.map(|m| m.len()).unwrap_or(0),
1291                },
1292            );
1293        }
1294    }
1295
1296    for (path, entry) in &state.entries {
1297        let Some(name) = overlay_child_name(path, &dir_key) else {
1298            continue;
1299        };
1300        match entry {
1301            StagedEntry::Write { len, .. } => {
1302                let is_dir = path.parent() != Some(dir_key.as_path());
1303                entries.insert(
1304                    name.clone(),
1305                    OverlayDirEntry {
1306                        name,
1307                        is_dir,
1308                        is_symlink: false,
1309                        size: if is_dir { 0 } else { *len },
1310                    },
1311                );
1312            }
1313            StagedEntry::Delete { .. } => {
1314                if path.parent() == Some(dir_key.as_path()) {
1315                    entries.remove(&name);
1316                }
1317            }
1318        }
1319    }
1320
1321    Ok(entries.into_values().collect())
1322}
1323
1324fn overlay_child_name(path: &Path, dir: &Path) -> Option<String> {
1325    let suffix = path.strip_prefix(dir).ok()?;
1326    let mut components = suffix.components();
1327    let first = components.next()?;
1328    match first {
1329        Component::Normal(name) => Some(name.to_string_lossy().into_owned()),
1330        _ => None,
1331    }
1332}
1333
1334fn overlay_exists(state: &SessionState, path: &Path) -> bool {
1335    if let Some(entry) = state.entries.get(path) {
1336        return matches!(entry, StagedEntry::Write { .. });
1337    }
1338    if deleted_ancestor(state, path) {
1339        return false;
1340    }
1341    if has_staged_descendant(state, path) {
1342        return true;
1343    }
1344    path.exists()
1345}
1346
1347fn parent_exists(state: &SessionState, path: &Path) -> bool {
1348    let Some(parent) = path.parent() else {
1349        return true;
1350    };
1351    if parent.as_os_str().is_empty() {
1352        return true;
1353    }
1354    if let Some(entry) = state.entries.get(parent) {
1355        return !matches!(entry, StagedEntry::Delete { .. });
1356    }
1357    if deleted_ancestor(state, parent) {
1358        return false;
1359    }
1360    if has_staged_descendant(state, parent) {
1361        return true;
1362    }
1363    parent.is_dir()
1364}
1365
1366fn deleted_ancestor(state: &SessionState, path: &Path) -> bool {
1367    state.entries.iter().any(|(candidate, entry)| {
1368        matches!(entry, StagedEntry::Delete { .. })
1369            && path != candidate.as_path()
1370            && path.starts_with(candidate)
1371    })
1372}
1373
1374fn has_staged_descendant(state: &SessionState, path: &Path) -> bool {
1375    state.entries.iter().any(|(candidate, entry)| {
1376        matches!(entry, StagedEntry::Write { .. })
1377            && candidate != path
1378            && candidate.starts_with(path)
1379    })
1380}
1381
1382fn staged_paths_under(state: &SessionState, path: &Path) -> Vec<PathBuf> {
1383    state
1384        .entries
1385        .keys()
1386        .filter(|candidate| *candidate == path || candidate.starts_with(path))
1387        .cloned()
1388        .collect()
1389}
1390
1391fn validate_delete_shape(
1392    builtin: &'static str,
1393    path: &Path,
1394    recursive: bool,
1395) -> Result<(), HostlibError> {
1396    let Ok(metadata) = stdfs::symlink_metadata(path) else {
1397        return Ok(());
1398    };
1399    if metadata.is_dir() && !recursive {
1400        let mut entries = stdfs::read_dir(path).map_err(|err| HostlibError::Backend {
1401            builtin,
1402            message: format!("read_dir `{}`: {err}", path.display()),
1403        })?;
1404        if entries.next().is_some() {
1405            return Err(HostlibError::Backend {
1406                builtin,
1407                message: format!(
1408                    "remove_dir `{}` (pass recursive=true to delete non-empty dirs): directory not empty",
1409                    path.display()
1410                ),
1411            });
1412        }
1413    }
1414    Ok(())
1415}
1416
1417fn status_from_state(state: &SessionState) -> StagedStatus {
1418    let now = now_ms();
1419    let mut pending_writes = Vec::new();
1420    let mut total_bytes_pending = 0u64;
1421    let mut oldest = None;
1422    for (path, entry) in &state.entries {
1423        total_bytes_pending = total_bytes_pending.saturating_add(entry.body_len());
1424        oldest = Some(oldest.map_or(entry.created_at_ms(), |old: i64| {
1425            old.min(entry.created_at_ms())
1426        }));
1427        let (kind, bytes_added, bytes_removed) = match entry {
1428            StagedEntry::Write { len, .. } => ("write", *len, disk_size(path).unwrap_or(0)),
1429            StagedEntry::Delete { .. } => ("delete", 0, disk_size(path).unwrap_or(0)),
1430        };
1431        pending_writes.push(PendingWrite {
1432            path: path.to_string_lossy().into_owned(),
1433            kind,
1434            bytes_added,
1435            bytes_removed,
1436        });
1437    }
1438    StagedStatus {
1439        pending_writes,
1440        total_bytes_pending,
1441        oldest_pending_age_ms: oldest.map(|old| now.saturating_sub(old)).unwrap_or(0),
1442    }
1443}
1444
1445fn disk_size(path: &Path) -> Option<u64> {
1446    let metadata = stdfs::symlink_metadata(path).ok()?;
1447    if metadata.is_file() {
1448        return Some(metadata.len());
1449    }
1450    if metadata.is_dir() {
1451        let mut total = 0u64;
1452        for entry in walkdir::WalkDir::new(path)
1453            .into_iter()
1454            .filter_map(Result::ok)
1455        {
1456            if let Ok(metadata) = entry.metadata() {
1457                if metadata.is_file() {
1458                    total = total.saturating_add(metadata.len());
1459                }
1460            }
1461        }
1462        return Some(total);
1463    }
1464    Some(metadata.len())
1465}
1466
1467fn selected_paths(state: &SessionState, paths: &[String]) -> Vec<PathBuf> {
1468    if paths.is_empty() {
1469        return state.entries.keys().cloned().collect();
1470    }
1471    let selected: BTreeSet<PathBuf> = paths
1472        .iter()
1473        .map(|path| normalize_logical(Path::new(path)))
1474        .collect();
1475    state
1476        .entries
1477        .keys()
1478        .filter(|path| selected.contains(*path))
1479        .cloned()
1480        .collect()
1481}
1482
1483fn active_session_id(explicit: Option<&str>) -> Option<String> {
1484    explicit
1485        .map(str::to_string)
1486        .or_else(harn_vm::agent_sessions::current_session_id)
1487        .filter(|id| !id.trim().is_empty())
1488}
1489
1490fn validate_session_id(builtin: &'static str, session_id: &str) -> Result<(), HostlibError> {
1491    if session_id.trim().is_empty() {
1492        return Err(HostlibError::InvalidParameter {
1493            builtin,
1494            param: "session_id",
1495            message: "must not be empty".to_string(),
1496        });
1497    }
1498    Ok(())
1499}
1500
1501fn default_root() -> PathBuf {
1502    std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1503}
1504
1505fn session_dir(root: &Path, session_id: &str) -> PathBuf {
1506    let mut dir = root.to_path_buf();
1507    for component in STATE_REL {
1508        dir.push(component);
1509    }
1510    dir.push(sanitize_component(session_id));
1511    dir
1512}
1513
1514fn manifest_path(root: &Path, session_id: &str) -> PathBuf {
1515    session_dir(root, session_id).join("manifest.json")
1516}
1517
1518fn sanitize_component(input: &str) -> String {
1519    let sanitized: String = input
1520        .chars()
1521        .map(|ch| match ch {
1522            'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => ch,
1523            _ => '_',
1524        })
1525        .collect();
1526    if sanitized == input {
1527        sanitized
1528    } else {
1529        let hash = hex::encode(Sha256::digest(input.as_bytes()));
1530        format!("{sanitized}-{}", &hash[..12])
1531    }
1532}
1533
1534fn normalize_logical(path: &Path) -> PathBuf {
1535    let absolute = if path.is_absolute() {
1536        path.to_path_buf()
1537    } else {
1538        default_root().join(path)
1539    };
1540    let mut out = PathBuf::new();
1541    for component in absolute.components() {
1542        match component {
1543            Component::ParentDir => {
1544                out.pop();
1545            }
1546            Component::CurDir => {}
1547            other => out.push(other),
1548        }
1549    }
1550    out
1551}
1552
1553fn not_found(path: &Path) -> std::io::Error {
1554    std::io::Error::new(
1555        std::io::ErrorKind::NotFound,
1556        format!("staged fs: {} is deleted or absent", path.display()),
1557    )
1558}
1559
1560fn now_ms() -> i64 {
1561    std::time::SystemTime::now()
1562        .duration_since(std::time::UNIX_EPOCH)
1563        .map(|duration| duration.as_millis() as i64)
1564        .unwrap_or(0)
1565}
1566
1567fn emit_staged_update(state: &SessionState) {
1568    let status = status_from_state(state);
1569    harn_vm::agent_events::emit_event(&AgentEvent::StagedWritesPending {
1570        session_id: state.session_id.clone(),
1571        pending_count: status.pending_writes.len(),
1572        total_bytes: status.total_bytes_pending,
1573    });
1574}
1575
1576fn pending_write_to_value(write: PendingWrite) -> VmValue {
1577    build_dict([
1578        ("path", str_value(&write.path)),
1579        ("kind", str_value(write.kind)),
1580        ("bytes_added", VmValue::Int(write.bytes_added as i64)),
1581        ("bytes_removed", VmValue::Int(write.bytes_removed as i64)),
1582    ])
1583}
1584
1585fn status_to_value(status: StagedStatus) -> VmValue {
1586    build_dict([
1587        (
1588            "pending_writes",
1589            VmValue::List(Rc::new(
1590                status
1591                    .pending_writes
1592                    .into_iter()
1593                    .map(pending_write_to_value)
1594                    .collect(),
1595            )),
1596        ),
1597        (
1598            "total_bytes_pending",
1599            VmValue::Int(status.total_bytes_pending as i64),
1600        ),
1601        (
1602            "oldest_pending_age_ms",
1603            VmValue::Int(status.oldest_pending_age_ms),
1604        ),
1605    ])
1606}
1607
1608fn commit_result_to_value(result: CommitResult) -> VmValue {
1609    build_dict([
1610        (
1611            "committed_paths",
1612            VmValue::List(Rc::new(
1613                result
1614                    .committed_paths
1615                    .into_iter()
1616                    .map(|path| VmValue::String(Rc::from(path)))
1617                    .collect(),
1618            )),
1619        ),
1620        (
1621            "failed_paths_with_reasons",
1622            VmValue::List(Rc::new(
1623                result
1624                    .failed_paths_with_reasons
1625                    .into_iter()
1626                    .map(|(path, reason)| {
1627                        build_dict([("path", str_value(&path)), ("reason", str_value(&reason))])
1628                    })
1629                    .collect(),
1630            )),
1631        ),
1632    ])
1633}
1634
1635fn discard_result_to_value(result: DiscardResult) -> VmValue {
1636    build_dict([(
1637        "discarded_paths",
1638        VmValue::List(Rc::new(
1639            result
1640                .discarded_paths
1641                .into_iter()
1642                .map(|path| VmValue::String(Rc::from(path)))
1643                .collect(),
1644        )),
1645    )])
1646}