Skip to main content

fluers_runtime/
local_env.rs

1//! The real local-filesystem `SessionEnv`.
2//!
3//! Tools run against a real directory on disk via `tokio::fs` +
4//! `tokio::process`. **Confinement is fd-anchored**: every read, write, search,
5//! and exec cwd is resolved off a single held root fd via `openat`
6//! per-component walks with `O_NOFOLLOW` + an authoritative `fstat` on the
7//! opened leaf fd. There is no canonicalize-then-contain step in any data path,
8//! so a symlink/hardlink swapped between the containment check and the operation
9//! cannot redirect a read (exfil) or a write/exec (data loss).
10//!
11//! See `SECURITY.md`: this is *not* an OS-level sandbox (no chroot/landlock/
12//! UID separation). The fd-anchoring closes the TOCTOU class the path-based
13//! `resolve()` had; it does not turn this into a security boundary against a
14//! determined adversary until OS isolation lands.
15
16use std::ffi::OsStr;
17use std::os::fd::{AsFd, BorrowedFd, OwnedFd};
18use std::path::{Component, Path, PathBuf};
19use std::sync::Arc;
20
21use async_trait::async_trait;
22use rustix::fs::{fstat, ftruncate, mkdirat, open, openat, Dir, FileType, Mode, OFlags};
23use rustix::io::Errno;
24use tokio::io::{AsyncReadExt, AsyncWriteExt};
25use tokio::process::Command;
26use tokio_util::sync::CancellationToken;
27
28// `fcntl(F_GETPATH)` is apple-only; it backs `fd_real_path` on macOS.
29#[cfg(target_os = "macos")]
30use rustix::fs::getpath;
31// `/proc/self/fd/N` readlink needs the raw fd int on Linux.
32#[cfg(target_os = "linux")]
33use std::os::fd::AsRawFd;
34
35use crate::env::{Limits, SessionEnv, ShellResult};
36use crate::error::{RuntimeError, RuntimeResult};
37
38/// POSIX `st_mode` masks (stable, platform-independent) for the regular-file
39/// check — avoids pulling `libc` just for `S_ISREG`.
40const ST_MODE_TYPE_MASK: u32 = 0o170_000; // S_IFMT
41const ST_MODE_REGULAR: u32 = 0o100_000; // S_IFREG
42
43/// A `SessionEnv` backed by a real local directory.
44pub struct LocalSessionEnv {
45    /// Held fd over the canonical root: the anchor for every fd-anchored walk.
46    /// Opened once at construction with `O_DIRECTORY | O_NOFOLLOW | O_CLOEXEC`,
47    /// so root-path re-resolution never re-enters any data hot path. Because
48    /// the root is pinned by fd (not path), renaming/symlinking the root *path*
49    /// after construction cannot redirect a subsequent operation. `OwnedFd` is
50    /// `Send + Sync` on Unix.
51    root_fd: OwnedFd,
52    #[allow(dead_code)]
53    limits: Limits,
54    /// Optional process-sandbox backend (WP-2 slot). `None` ⇒ spawn behaviour
55    /// is byte-identical to pre-WP-2. When `Some`, every exec/grep argv is
56    /// passed through `ProcessSandbox::wrap` and the returned env additions
57    /// survive `env_clear()`.
58    exec_sandbox: Option<Arc<dyn crate::process_sandbox::ProcessSandbox>>,
59    /// Policy matching `exec_sandbox` (profile + egress). `Some` iff the
60    /// backend is `Some`.
61    sandbox_policy: Option<crate::process_sandbox::SandboxPolicy>,
62}
63
64impl LocalSessionEnv {
65    /// Create an env rooted at `root`. The directory is canonicalized; if it
66    /// does not exist it is created. An fd is held over the canonical root for
67    /// the lifetime of the env.
68    pub async fn new(root: impl Into<PathBuf>, limits: Limits) -> RuntimeResult<Self> {
69        Self::build(root, limits, None, None).await
70    }
71
72    /// Crate-private constructor that attaches a (already `prepare`d) process-
73    /// sandbox backend. Called by `LocalSandbox::env_for` on the path where a
74    /// backend is ACTIVELY attached (FullyEnforced, or Partial+Degrade). When
75    /// `env_for` degrades to no backend (Unavailable+Degrade), it calls
76    /// [`LocalSessionEnv::new`] instead — there is no "backend None + policy
77    /// Some" state, so this constructor takes a non-optional backend.
78    pub(crate) async fn new_with_sandbox(
79        root: impl Into<PathBuf>,
80        limits: Limits,
81        backend: Arc<dyn crate::process_sandbox::ProcessSandbox>,
82        policy: crate::process_sandbox::SandboxPolicy,
83    ) -> RuntimeResult<Self> {
84        Self::build(root, limits, Some(backend), Some(policy)).await
85    }
86
87    /// Shared construction for both entry points.
88    async fn build(
89        root: impl Into<PathBuf>,
90        limits: Limits,
91        exec_sandbox: Option<Arc<dyn crate::process_sandbox::ProcessSandbox>>,
92        sandbox_policy: Option<crate::process_sandbox::SandboxPolicy>,
93    ) -> RuntimeResult<Self> {
94        let root = root.into();
95        tokio::fs::create_dir_all(&root)
96            .await
97            .map_err(RuntimeError::Io)?;
98        let canon = tokio::fs::canonicalize(&root)
99            .await
100            .map_err(RuntimeError::Io)?;
101        // Hold an fd over the canonical root. Opened with O_NOFOLLOW (reject a
102        // root swapped to a symlink since construction) + O_DIRECTORY +
103        // O_CLOEXEC. From here on, no operation re-resolves the root *path* —
104        // they all anchor off this fd.
105        let root_flags = OFlags::RDONLY | OFlags::DIRECTORY | OFlags::NOFOLLOW | OFlags::CLOEXEC;
106        let root_fd = open(&canon, root_flags, Mode::empty())
107            .map_err(|e| RuntimeError::Io(std::io::Error::from(e)))?;
108        Ok(Self {
109            root_fd,
110            limits,
111            exec_sandbox,
112            sandbox_policy,
113        })
114    }
115
116    /// Wrap a `sh -c <command>` argv through the attached process-sandbox
117    /// backend (if any), returning the (possibly rewritten) argv plus any env
118    /// additions the backend requires.
119    ///
120    /// With no backend, returns the original argv and an empty env — the spawn
121    /// sites then behave byte-identically to pre-WP-2. With a backend, the
122    /// shell runs *inside* the boundary (the wrap target is `sh`, not the raw
123    /// command). An empty wrapped argv is rejected loud (a backend that drops
124    /// the command is a misconfiguration).
125    fn wrap_shell_argv(
126        &self,
127        argv: Vec<String>,
128        cwd: Option<PathBuf>,
129    ) -> RuntimeResult<crate::process_sandbox::WrappedCommand> {
130        let Some(backend) = &self.exec_sandbox else {
131            return Ok(crate::process_sandbox::WrappedCommand {
132                argv,
133                env: std::collections::BTreeMap::new(),
134            });
135        };
136        let policy = self
137            .sandbox_policy
138            .as_ref()
139            .ok_or_else(|| RuntimeError::Sandbox("backend set without policy".into()))?;
140        let ctx = crate::process_sandbox::ExecSandboxContext {
141            workspace_path: Self::fd_real_path(self.root_fd.as_fd())?,
142            cwd,
143            profile: policy.profile,
144            egress: policy.egress.clone(),
145        };
146        let wrapped = backend.wrap(&argv, &ctx)?;
147        if wrapped.argv.is_empty() {
148            return Err(RuntimeError::Sandbox(
149                "process-sandbox backend returned an empty argv".into(),
150            ));
151        }
152        Ok(wrapped)
153    }
154
155    /// Validate a model-supplied relative path and return its `Normal`
156    /// components (skipping `.`). Rejects absolute paths and any `..`
157    /// component up front — the fd walk itself then enforces containment, so
158    /// there is no canonicalize-then-contain step anywhere in the data path.
159    fn normal_components<'a>(&self, rel: &'a Path) -> RuntimeResult<Vec<&'a OsStr>> {
160        if rel.is_absolute() {
161            return Err(RuntimeError::Sandbox(format!(
162                "absolute paths are not allowed: `{}`",
163                rel.display()
164            )));
165        }
166        if rel.components().any(|c| matches!(c, Component::ParentDir)) {
167            return Err(RuntimeError::Sandbox(format!(
168                "`..` is not allowed in paths: `{}`",
169                rel.display()
170            )));
171        }
172        Ok(rel
173            .components()
174            .filter_map(|c| match c {
175                Component::Normal(name) => Some(name),
176                // `CurDir` (".") is skipped; `ParentDir`/absolute are
177                // pre-rejected above.
178                _ => None,
179            })
180            .collect())
181    }
182
183    /// Open `rel` for reading via an fd-anchored walk from the held root fd
184    /// (B-Swift Phase C1a / #4). Closes the path-based TOCTOU at the daemon
185    /// read: every component is opened with `O_NOFOLLOW` (symlink → `ELOOP`),
186    /// and the leaf is `fstat`'d on the SAME fd we hand back for reading — so a
187    /// symlink/hardlink swap between confinement and the read cannot exfiltrate.
188    /// Mirrors the Swift `readFdAnchored`.
189    ///
190    /// Returns the opened regular-file `File` and its size in bytes (the size is
191    /// authoritative — taken off the open fd, not the path).
192    fn open_anchored_read(&self, rel: &Path) -> RuntimeResult<(std::fs::File, u64)> {
193        let names = self.normal_components(rel)?;
194        if names.is_empty() {
195            return Err(RuntimeError::Sandbox(format!(
196                "read path has no components: `{}`",
197                rel.display()
198            )));
199        }
200
201        let oflag = OFlags::RDONLY | OFlags::NOFOLLOW | OFlags::CLOEXEC;
202        // Walk: hold every opened fd in `chain` so intermediates stay alive
203        // until the next level is opened; the last element is the leaf.
204        let mut chain: Vec<OwnedFd> = Vec::new();
205        for name in names {
206            let dir = match chain.last() {
207                Some(f) => f.as_fd(),
208                None => self.root_fd.as_fd(),
209            };
210            let fd = match openat(dir, name, oflag, Mode::empty()) {
211                Ok(fd) => fd,
212                Err(Errno::LOOP) => {
213                    return Err(RuntimeError::Sandbox(format!(
214                        "symlinks are not allowed in read paths: `{}`",
215                        rel.display()
216                    )));
217                }
218                Err(e) => return Err(RuntimeError::Io(std::io::Error::from(e))),
219            };
220            chain.push(fd);
221        }
222        let leaf_owned = chain
223            .pop()
224            .ok_or_else(|| RuntimeError::Sandbox("read path has no components".to_string()))?;
225        // Remaining `chain` (intermediates) drops here → their fds close.
226
227        // Authoritative leaf check: fstat the OPENED fd (not the path).
228        let stat =
229            fstat(leaf_owned.as_fd()).map_err(|e| RuntimeError::Io(std::io::Error::from(e)))?;
230        if (stat.st_mode as u32 & ST_MODE_TYPE_MASK) != ST_MODE_REGULAR {
231            return Err(RuntimeError::Sandbox(format!(
232                "not a regular file: `{}`",
233                rel.display()
234            )));
235        }
236        if stat.st_nlink > 1 {
237            // Hardlink exfil (`ln secret in_root; read in_root/link`) — mirrors
238            // the Swift-side C2/#3 reject. Authoritative here: fstat off the
239            // open fd, not the path.
240            return Err(RuntimeError::Sandbox(format!(
241                "multiple hard links — can't safely confine: `{}`",
242                rel.display()
243            )));
244        }
245        let size = stat.st_size.max(0) as u64;
246        Ok((std::fs::File::from(leaf_owned), size))
247    }
248
249    /// Open an existing directory `rel` via an fd-anchored walk from the held
250    /// root fd (B-Swift Phase C1b). Used to pin an exec `cwd` by fd (passed to
251    /// the child as `/dev/fd/N`). Every component is opened with
252    /// `O_DIRECTORY | O_NOFOLLOW`, so a symlinked intermediate dir → `ELOOP`
253    /// → reject (never followed).
254    fn open_anchored_dir(&self, rel: &Path) -> RuntimeResult<OwnedFd> {
255        let names = self.normal_components(rel)?;
256        let oflag = OFlags::RDONLY | OFlags::DIRECTORY | OFlags::NOFOLLOW | OFlags::CLOEXEC;
257        // Open "." relative to the held root → an independent owned starting fd,
258        // so we never borrow `root_fd` across the walk.
259        let mut cur = openat(self.root_fd.as_fd(), ".", oflag, Mode::empty())
260            .map_err(|e| RuntimeError::Io(std::io::Error::from(e)))?;
261        for name in names {
262            let next = match openat(cur.as_fd(), name, oflag, Mode::empty()) {
263                Ok(fd) => fd,
264                Err(Errno::LOOP) => {
265                    return Err(RuntimeError::Sandbox(format!(
266                        "symlinked directories are not allowed: `{}`",
267                        rel.display()
268                    )));
269                }
270                Err(e) => return Err(RuntimeError::Io(std::io::Error::from(e))),
271            };
272            cur = next;
273        }
274        Ok(cur)
275    }
276
277    /// Derive the real on-disk path of an already-open directory fd — macOS
278    /// `fcntl(F_GETPATH)`, Linux `/proc/self/fd/N`. The path comes from the
279    /// *inode* the fd names, NOT from any model-supplied input string, so a
280    /// symlink swap on the input path between the fd-anchored open and the
281    /// spawn/search can't redirect the operation. (`/dev/fd/N` as a `cwd` is
282    /// Linux-only — macOS fdescfs rejects `chdir` to it with `ENOTDIR`, so the
283    /// inode path is the portable fd-anchored handle.) A post-open *move* of the
284    /// directory is a residual race outside the threat model: this is not an OS
285    /// sandbox, and moving the dir requires write access under the confined root.
286    fn fd_real_path(fd: BorrowedFd<'_>) -> RuntimeResult<PathBuf> {
287        #[cfg(target_os = "macos")]
288        {
289            use std::os::unix::ffi::OsStrExt;
290            let c = getpath(fd).map_err(|e| RuntimeError::Io(std::io::Error::from(e)))?;
291            Ok(PathBuf::from(OsStr::from_bytes(c.to_bytes())))
292        }
293        #[cfg(target_os = "linux")]
294        {
295            let raw = fd.as_raw_fd();
296            std::fs::read_link(format!("/proc/self/fd/{raw}")).map_err(RuntimeError::Io)
297        }
298        #[cfg(not(any(target_os = "macos", target_os = "linux")))]
299        {
300            let _ = fd;
301            Err(RuntimeError::Sandbox(
302                "fd-derived directory path is unsupported on this platform".into(),
303            ))
304        }
305    }
306
307    /// Resolve a grep search path to its real INODE path, fd-anchored from the
308    /// held root fd. Every component is opened `O_NOFOLLOW`; a symlink anywhere
309    /// in the path (including a symlinked dir passed explicitly) is rejected
310    /// outright — `rg --no-follow` would otherwise follow an explicit
311    /// symlinked-dir argument and leak its contents. The returned path is the
312    /// inode's path (from `fd_real_path`), so a swap on the input can't redirect
313    /// the search. Handles directory and file leaf targets; `.`/empty → root.
314    fn search_path_inode(&self, p: &str) -> RuntimeResult<PathBuf> {
315        let names = self.normal_components(Path::new(p))?;
316        if names.is_empty() {
317            // `.` or empty path → the root.
318            return Self::fd_real_path(self.root_fd.as_fd());
319        }
320        let dir_oflag = OFlags::RDONLY | OFlags::DIRECTORY | OFlags::NOFOLLOW | OFlags::CLOEXEC;
321        let file_oflag = OFlags::RDONLY | OFlags::NOFOLLOW | OFlags::CLOEXEC;
322        let (parents, last) = names.split_at(names.len() - 1);
323        let mut parent = openat(self.root_fd.as_fd(), ".", dir_oflag, Mode::empty())
324            .map_err(|e| RuntimeError::Io(std::io::Error::from(e)))?;
325        for name in parents.iter().copied() {
326            parent = match openat(parent.as_fd(), name, dir_oflag, Mode::empty()) {
327                Ok(fd) => fd,
328                Err(Errno::LOOP) => {
329                    return Err(RuntimeError::Sandbox(format!(
330                        "symlinked search path is not allowed: `{p}`"
331                    )))
332                }
333                Err(e) => return Err(RuntimeError::Io(std::io::Error::from(e))),
334            };
335        }
336        let last_name = last[0];
337        // Leaf: try dir, fall back to file (a file grep target). `O_NOFOLLOW`
338        // in both means a symlink leaf → `ELOOP` → reject.
339        let leaf_fd = match openat(parent.as_fd(), last_name, dir_oflag, Mode::empty()) {
340            Ok(fd) => fd,
341            Err(Errno::NOTDIR) => {
342                match openat(parent.as_fd(), last_name, file_oflag, Mode::empty()) {
343                    Ok(fd) => fd,
344                    Err(Errno::LOOP) => {
345                        return Err(RuntimeError::Sandbox(format!(
346                            "symlinked search path is not allowed: `{p}`"
347                        )))
348                    }
349                    Err(e) => return Err(RuntimeError::Io(std::io::Error::from(e))),
350                }
351            }
352            Err(Errno::LOOP) => {
353                return Err(RuntimeError::Sandbox(format!(
354                    "symlinked search path is not allowed: `{p}`"
355                )))
356            }
357            Err(e) => return Err(RuntimeError::Io(std::io::Error::from(e))),
358        };
359        Self::fd_real_path(leaf_fd.as_fd())
360    }
361
362    /// Open `rel` for writing via an fd-anchored walk from the held root fd
363    /// (B-Swift Phase C1b — the critical counterpart of `open_anchored_read`).
364    ///
365    /// Invariants:
366    /// - Parent dirs are created with a `mkdirat` walk from the root fd (each
367    ///   level opened `O_NOFOLLOW`); `mkdirat` does not follow a symlink at the
368    ///   target name, and the follow-up `openat(O_DIRECTORY|O_NOFOLLOW)` rejects
369    ///   a symlinked intermediate outright.
370    /// - The leaf is opened `WRONLY | CREATE | NOFOLLOW` — `O_NOFOLLOW` rejects
371    ///   a symlink leaf outright (`ELOOP`). Critically, `O_TRUNC` is **not**
372    ///   passed: truncation is deferred to `ftruncate` *after* the hardlink
373    ///   check, so a write through a hardlink can never mutate before the
374    ///   confinement decision.
375    /// - The opened leaf fd is `fstat`'d (authoritative): non-regular files are
376    ///   rejected, and `st_nlink > 1` is rejected — a write through a hardlink
377    ///   mutates every name in the set (silent cross-target data loss).
378    /// - The caller truncates + writes off the SAME fd.
379    fn open_anchored_write(&self, rel: &Path) -> RuntimeResult<OwnedFd> {
380        let names = self.normal_components(rel)?;
381        let (parents, leaf) = names.split_at(names.len().saturating_sub(1));
382        let leaf_name = leaf.first().copied().ok_or_else(|| {
383            RuntimeError::Sandbox(format!("write path has no file name: `{}`", rel.display()))
384        })?;
385
386        let dir_oflag = OFlags::RDONLY | OFlags::DIRECTORY | OFlags::NOFOLLOW | OFlags::CLOEXEC;
387        // mkdirat default mode mirrors std's `create_dir` (0o777 & !umask);
388        // files below use 0o666 & !umask (std's `fs::write` default).
389        let dir_mode = Mode::RWXU | Mode::RWXG | Mode::RWXO;
390        let file_mode = Mode::RUSR | Mode::WUSR | Mode::RGRP | Mode::WGRP | Mode::ROTH | Mode::WOTH;
391
392        let mut parent = openat(self.root_fd.as_fd(), ".", dir_oflag, Mode::empty())
393            .map_err(|e| RuntimeError::Io(std::io::Error::from(e)))?;
394        for name in parents.iter().copied() {
395            let next = match openat(parent.as_fd(), name, dir_oflag, Mode::empty()) {
396                Ok(fd) => fd,
397                Err(Errno::NOENT) => {
398                    // Create the missing intermediate dir. `mkdirat` does NOT
399                    // follow a symlink at `name` (it would fail EEXIST); the
400                    // reopen below re-establishes the fd-anchored position.
401                    // EEXIST from mkdirat means another writer created it
402                    // concurrently — that's safe; just reopen it.
403                    if let Err(e) = mkdirat(parent.as_fd(), name, dir_mode) {
404                        if e != Errno::EXIST {
405                            return Err(RuntimeError::Io(std::io::Error::from(e)));
406                        }
407                    }
408                    match openat(parent.as_fd(), name, dir_oflag, Mode::empty()) {
409                        Ok(fd) => fd,
410                        Err(Errno::LOOP) => {
411                            return Err(RuntimeError::Sandbox(format!(
412                                "symlinked directories are not allowed: `{}`",
413                                rel.display()
414                            )));
415                        }
416                        Err(e) => return Err(RuntimeError::Io(std::io::Error::from(e))),
417                    }
418                }
419                Err(Errno::LOOP) => {
420                    return Err(RuntimeError::Sandbox(format!(
421                        "symlinked directories are not allowed: `{}`",
422                        rel.display()
423                    )));
424                }
425                Err(e) => return Err(RuntimeError::Io(std::io::Error::from(e))),
426            };
427            parent = next;
428        }
429
430        // Leaf: CREATE + NOFOLLOW, but deliberately NO TRUNC — truncate after
431        // the nlink check so a hardlink can't be mutated pre-decision.
432        let leaf_oflag = OFlags::WRONLY | OFlags::CREATE | OFlags::NOFOLLOW | OFlags::CLOEXEC;
433        let leaf_fd = match openat(parent.as_fd(), leaf_name, leaf_oflag, file_mode) {
434            Ok(fd) => fd,
435            Err(Errno::LOOP) => {
436                return Err(RuntimeError::Sandbox(format!(
437                    "symlink leaf is not allowed: `{}`",
438                    rel.display()
439                )));
440            }
441            Err(e) => return Err(RuntimeError::Io(std::io::Error::from(e))),
442        };
443
444        // Authoritative confinement checks off the OPEN fd (not the path).
445        let stat = fstat(leaf_fd.as_fd()).map_err(|e| RuntimeError::Io(std::io::Error::from(e)))?;
446        if (stat.st_mode as u32 & ST_MODE_TYPE_MASK) != ST_MODE_REGULAR {
447            return Err(RuntimeError::Sandbox(format!(
448                "not a regular file: `{}`",
449                rel.display()
450            )));
451        }
452        if stat.st_nlink > 1 {
453            // A write through a hardlink mutates every name in the set — reject,
454            // mirroring the read-side decision.
455            return Err(RuntimeError::Sandbox(format!(
456                "multiple hard links — can't safely confine: `{}`",
457                rel.display()
458            )));
459        }
460        Ok(leaf_fd)
461    }
462}
463
464#[async_trait]
465impl SessionEnv for LocalSessionEnv {
466    async fn read_file(
467        &self,
468        path: &Path,
469        max_lines: usize,
470        max_bytes: usize,
471    ) -> RuntimeResult<String> {
472        // B-Swift Phase C1a / #4: fd-anchored open + read from the SAME fd
473        // (closes the check-then-use TOCTOU the path-based read had).
474        //
475        // Bounded read (0.5.2): the output is capped at `max_bytes` anyway
476        // (apply_read_limits truncates beyond it), so reading the whole file
477        // first would OOM on a multi-GB file. Read at most `max_bytes` and
478        // trim any partial UTF-8 char at the cut. Memory is thus bounded by
479        // `max_bytes`, independent of the on-disk size.
480        let (file, _size) = self.open_anchored_read(path)?;
481        let (raw, truncated_at_cap) = read_bounded_string(file, max_bytes).await?;
482        let mut out = apply_read_limits(raw, max_lines, max_bytes);
483        // If the bounded read cut the file short (file > max_bytes) and
484        // apply_read_limits didn't itself add a truncation marker, surface that
485        // the content was capped — preserves the original oversized-file
486        // indicator that the unbounded read had.
487        if truncated_at_cap && !out.contains("[... truncated") {
488            out.push_str(&format!("\n[... truncated at {max_bytes} bytes ...]"));
489        }
490        Ok(out)
491    }
492
493    async fn read_file_full(&self, path: &Path, max_bytes: usize) -> RuntimeResult<String> {
494        // B-Swift Phase C1a / #4: size + read off the SAME open fd. The old
495        // path-based metadata check raced the read; now the size gate is
496        // authoritative (fstat off the open fd) and the read uses that fd.
497        let (file, size) = self.open_anchored_read(path)?;
498        let size = size as usize;
499        if size > max_bytes {
500            return Err(RuntimeError::FileTooLarge {
501                path: path.display().to_string(),
502                size,
503                max: max_bytes,
504            });
505        }
506        let mut file = tokio::fs::File::from_std(file);
507        let mut raw = String::new();
508        file.read_to_string(&mut raw)
509            .await
510            .map_err(RuntimeError::Io)?;
511        Ok(raw)
512    }
513
514    async fn write_file(&self, path: &Path, content: &str) -> RuntimeResult<()> {
515        // B-Swift Phase C1b: fd-anchored write. Open the leaf off the held root
516        // fd (mkdirat-walking parents), fstat for hardlink confinement, THEN
517        // truncate + write off the SAME fd. No path re-resolution in any step.
518        let leaf_fd = self.open_anchored_write(path)?;
519        // Truncate AFTER the nlink check (the open deliberately omitted O_TRUNC).
520        ftruncate(&leaf_fd, 0).map_err(|e| RuntimeError::Io(std::io::Error::from(e)))?;
521        let mut file = tokio::fs::File::from_std(std::fs::File::from(leaf_fd));
522        file.write_all(content.as_bytes())
523            .await
524            .map_err(RuntimeError::Io)?;
525        // Flush before returning: a subsequent `fstat` (e.g. a size-gated
526        // `read_file_full`) must observe the full new size. `write_all`'s await
527        // dispatches the pwrite on the blocking pool, but tokio `File`'s close is
528        // deferred on drop — without this barrier the size was intermittently
529        // not yet visible to a following `fstat` under parallel load (a rare
530        // flake that returned a stale/short size). `flush` completes the pending
531        // async write without an `fsync` (no durability/perf cost vs `sync_all`).
532        file.flush().await.map_err(RuntimeError::Io)?;
533        Ok(())
534    }
535
536    async fn exec(
537        &self,
538        command: &str,
539        cwd: &Path,
540        timeout_ms: Option<u64>,
541        cancel: &CancellationToken,
542    ) -> RuntimeResult<ShellResult> {
543        // The cwd is opened fd-anchored (`openat(O_DIRECTORY|O_NOFOLLOW)` per
544        // component from the held root fd), so a symlinked cwd dir is rejected
545        // outright. The child then chdirs to the *inode's* real path — derived
546        // from the open fd via `fd_real_path`, not from the input string — so a
547        // symlink swap on the cwd path between open and spawn can't redirect it.
548        // (`/dev/fd/N` would be the pure-inode handle, but macOS fdescfs rejects
549        // `chdir` to it; the inode path is the portable form.) `cwd_fd` is held
550        // in scope through `spawn()` so the inode it names stays valid.
551        let cwd_fd = self.open_anchored_dir(cwd)?;
552        let cwd_path = Self::fd_real_path(cwd_fd.as_fd())?;
553
554        // `kill_on_drop(true)`: on timeout/cancel the in-flight `wait_with_output`
555        // future (which owns the child) is dropped, and its `Drop` sends SIGKILL —
556        // so a still-running child is never orphaned.
557        //
558        // `env_clear()` + allowlist: model-run shells must NOT inherit the
559        // parent's full environment — that includes provider API keys
560        // (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `FLUERS_SERVER_TOKEN`, …),
561        // which a prompt-injected model could exfiltrate via `env | grep KEY`.
562        // Only a safe, minimal allowlist (needed for commands to function) is
563        // re-set.
564        // Wrap the `sh -c <command>` argv through the process-sandbox backend
565        // (if attached). With no backend this is a no-op (byte-identical to
566        // pre-WP-2). With a backend the shell runs *inside* the boundary.
567        let wrapped = self.wrap_shell_argv(
568            vec!["sh".into(), "-c".into(), command.into()],
569            Some(cwd_path.clone()),
570        )?;
571        let mut cmd = Command::new(&wrapped.argv[0]);
572        cmd.args(&wrapped.argv[1..])
573            .current_dir(&cwd_path)
574            .env_clear()
575            .envs(safe_exec_env())
576            // Backend-required env additions survive env_clear (C1).
577            .envs(wrapped.env)
578            .stdout(std::process::Stdio::piped())
579            .stderr(std::process::Stdio::piped())
580            .kill_on_drop(true);
581        let child = cmd.spawn().map_err(RuntimeError::Io)?;
582        // `cwd_fd` stays live until end of scope (spawn has run by now).
583
584        let timeout_fut = match timeout_ms {
585            Some(ms) => Box::pin(tokio::time::sleep(std::time::Duration::from_millis(ms)))
586                as std::pin::Pin<Box<dyn std::future::Future<Output = ()> + Send>>,
587            None => Box::pin(std::future::pending()),
588        };
589        let cancel_fut = cancel.cancelled();
590
591        // `wait_with_output` drains stdout AND stderr concurrently while it waits.
592        // The old `child.wait()` did not read the pipes, so a child emitting more
593        // than the OS pipe buffer (~64 KB) blocked on a full pipe while `wait()`
594        // blocked on the child — a deadlock that only broke on timeout (output
595        // lost, misreported as a 124), or hung forever with no timeout set.
596        tokio::select! {
597            _ = timeout_fut => {
598                // `child` (moved into the dropped `wait_with_output` future) is
599                // SIGKILLed via `kill_on_drop`. Return the 124-shaped result.
600                Ok(ShellResult {
601                    exit_code: 124,
602                    stdout: String::new(),
603                    stderr: format!("command timed out after {}ms", timeout_ms.unwrap_or(0)),
604                })
605            }
606            _ = cancel_fut => {
607                Err(RuntimeError::Sandbox("command cancelled".into()))
608            }
609            output = child.wait_with_output() => {
610                let output = output.map_err(RuntimeError::Io)?;
611                Ok(ShellResult {
612                    exit_code: output.status.code().unwrap_or(-1),
613                    stdout: String::from_utf8_lossy(&output.stdout).into_owned(),
614                    stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
615                })
616            }
617        }
618    }
619
620    async fn glob(&self, pattern: &str, limit: usize) -> RuntimeResult<Vec<String>> {
621        // Containment: reject absolute patterns and `..` so the model can't
622        // list files outside the root (e.g. `../../*` or `/etc/*`).
623        validate_search_pattern(pattern)?;
624        // Split into a base dir (must exist) + a single-segment filename pattern.
625        // As in the original matcher, the filename pattern is applied at every
626        // depth under the base (the descent is what changed: it is now
627        // fd-anchored and never enters a symlinked directory).
628        let pat_path = Path::new(pattern);
629        let base_rel = pat_path.parent().unwrap_or_else(|| Path::new(""));
630        let fname = pat_path.file_name().and_then(|s| s.to_str()).unwrap_or("*");
631        // Results are reported relative to the ROOT, but the walk starts at the
632        // base dir — so seed the descent with the base's own path relative to
633        // root (e.g. `sub/*.txt` → base prefix `sub`, so `sub/nested.txt` is
634        // reported, not `nested.txt`).
635        let base_prefix = self
636            .normal_components(base_rel)?
637            .iter()
638            .map(|s| s.to_string_lossy().into_owned())
639            .collect::<Vec<_>>()
640            .join("/");
641        // A missing/symlinked base yields no matches (preserves the original
642        // "no results" behavior for non-existent bases after validation).
643        let base_fd = match self.open_anchored_dir(base_rel) {
644            Ok(fd) => fd,
645            Err(_) => return Ok(Vec::new()),
646        };
647        let dir = match Dir::new(base_fd) {
648            Ok(d) => d,
649            Err(_) => return Ok(Vec::new()),
650        };
651        let mut results: Vec<String> = Vec::new();
652        walk_glob_fd(dir, fname, &base_prefix, &mut results, limit)?;
653        results.sort();
654        // De-dup (a `**`/depth-recursion can surface the same relative path).
655        results.dedup();
656        Ok(results)
657    }
658
659    async fn grep(
660        &self,
661        pattern: &str,
662        paths: &[&str],
663        max_matches: usize,
664    ) -> RuntimeResult<Vec<String>> {
665        // Containment: validate each search path's SHAPE (reject absolute/`..`
666        // so the model can't reach outside the root), then resolve it fd-anchored
667        // to its real INODE path. This is essential: `rg --no-follow` still
668        // follows a symlinked dir passed EXPLICITLY as a search path, so passing
669        // the input string would leak through `linkdir -> outside`. Resolving to
670        // the inode path (and rejecting symlinks outright at `openat(NO_FOLLOW)`)
671        // closes that — the search runs against the real confined dir/file.
672        let root_path = Self::fd_real_path(self.root_fd.as_fd())?;
673        let mut validated: Vec<String> = Vec::new();
674        if paths.is_empty() {
675            validated.push(shell_quote(&root_path.to_string_lossy()));
676        } else {
677            for p in paths {
678                validate_search_pattern(p)?;
679                let inode = self.search_path_inode(p)?;
680                validated.push(shell_quote(&inode.to_string_lossy()));
681            }
682        }
683        let search = validated.join(" ");
684        // The process cwd is the root's inode path too (belt-and-suspenders);
685        // `rg --no-follow` / the `find -P` fallback never follow symlinks.
686        // `kill_on_drop(true)` + a bounded timeout ensure a grep against a hung
687        // filesystem (stuck NFS, adversarial tree) can neither block this future
688        // forever nor orphan the child. The trait gives no cancel token/timeout,
689        // so a fixed ceiling is applied here.
690        const GREP_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
691        // Build the grep shell argv, then wrap it through the backend (if any).
692        // With no backend this is a no-op and the spawn is byte-identical to
693        // pre-WP-2 (grep intentionally does NOT env_clear — it inherits the
694        // parent env). With a backend the shell runs inside the boundary and
695        // backend env additions are applied.
696        let grep_command = format!(
697            "rg -n --no-follow -- {pat} {search} 2>/dev/null \
698             || find -P {search} -type f -exec grep -Hn -- {pat} {{}} + 2>/dev/null",
699            pat = shell_quote(pattern),
700        );
701        let wrapped = self.wrap_shell_argv(vec!["sh".into(), "-c".into(), grep_command], None)?;
702        let mut cmd = Command::new(&wrapped.argv[0]);
703        cmd.args(&wrapped.argv[1..])
704            .current_dir(&root_path)
705            .stdout(std::process::Stdio::piped())
706            .stderr(std::process::Stdio::piped())
707            .kill_on_drop(true);
708        // Only apply env additions when a backend is present; with no backend
709        // wrapped.env is empty and `.envs` of an empty map is a no-op, keeping
710        // the no-backend spawn byte-identical to pre-WP-2.
711        cmd.envs(wrapped.env);
712        let child = cmd.spawn().map_err(RuntimeError::Io)?;
713        let rg = match tokio::time::timeout(GREP_TIMEOUT, child.wait_with_output()).await {
714            Ok(res) => res.map_err(RuntimeError::Io)?,
715            // On timeout the `wait_with_output` future is dropped; `kill_on_drop`
716            // SIGKILLs the child. Surface an empty result rather than hanging.
717            Err(_) => return Ok(Vec::new()),
718        };
719        let out = String::from_utf8_lossy(&rg.stdout);
720        // Search paths are absolute inode paths (see above), so `rg`/`grep` emit
721        // absolute paths — strip the root's inode prefix so results stay
722        // root-relative (as they did pre-fd-anchoring) and don't leak the host
723        // temp/root path to the model.
724        let root_prefix = format!("{}/", root_path.to_string_lossy());
725        Ok(out
726            .lines()
727            .map(|l| {
728                l.strip_prefix(root_prefix.as_str())
729                    .unwrap_or(l)
730                    .to_string()
731            })
732            .take(max_matches)
733            .collect())
734    }
735}
736
737/// Truncate `raw` to `max_lines` and `max_bytes`, whichever binds first.
738fn apply_read_limits(raw: String, max_lines: usize, max_bytes: usize) -> String {
739    let mut bytes_left = max_bytes;
740    let mut out = String::new();
741    let mut truncated = false;
742    for (i, line) in raw.split_inclusive('\n').enumerate() {
743        if i >= max_lines {
744            out.push_str(&format!("\n[... truncated at {max_lines} lines ...]"));
745            truncated = true;
746            break;
747        }
748        if bytes_left < line.len() {
749            // Take as many whole bytes as fit on a UTF-8 boundary.
750            let take = line
751                .char_indices()
752                .map(|(i, _)| i)
753                .find(|&pos| pos > bytes_left)
754                .unwrap_or(line.len());
755            out.push_str(line.get(..take).unwrap_or(line));
756            out.push_str(&format!("\n[... truncated at {max_bytes} bytes ...]"));
757            truncated = true;
758            break;
759        }
760        out.push_str(line);
761        bytes_left -= line.len();
762    }
763    if truncated {
764        out
765    } else {
766        raw
767    }
768}
769
770/// Read at most `max_bytes` bytes from `file` into a `String`, trimming any
771/// partial UTF-8 char at the read boundary.
772///
773/// Bounds memory at `max_bytes` so a multi-GB file cannot OOM the truncating
774/// `read_file` path — the output is already capped at `max_bytes` by
775/// [`apply_read_limits`], so reading more than that is pure waste. If the
776/// `max_bytes` boundary splits a multibyte char, the partial trailing bytes are
777/// trimmed to the last valid char boundary. A genuinely invalid-UTF-8 file that
778/// fits within `max_bytes` still errors (mirrors the prior `read_to_string`).
779///
780/// Returns the decoded prefix and a flag set when the file was larger than
781/// `max_bytes` (i.e. the read hit the cap) so the caller can surface a
782/// truncation marker.
783async fn read_bounded_string(
784    file: std::fs::File,
785    max_bytes: usize,
786) -> RuntimeResult<(String, bool)> {
787    let file = tokio::fs::File::from_std(file);
788    let mut buf: Vec<u8> = Vec::with_capacity(max_bytes.min(8 * 1024));
789    file.take(max_bytes as u64)
790        .read_to_end(&mut buf)
791        .await
792        .map_err(RuntimeError::Io)?;
793    // `read_full`: we read the whole file (didn't hit the cap) → any UTF-8
794    // error is genuine and should surface, not be silently trimmed.
795    let read_full = buf.len() < max_bytes;
796    let truncated_at_cap = !read_full;
797    match std::str::from_utf8(&buf) {
798        Ok(s) => Ok((s.to_string(), truncated_at_cap)),
799        Err(e) => {
800            let vu = e.valid_up_to();
801            if read_full {
802                Err(RuntimeError::Io(std::io::Error::new(
803                    std::io::ErrorKind::InvalidData,
804                    "stream did not contain valid UTF-8",
805                )))
806            } else {
807                // Hit the cap: trim the truncated multibyte suffix. `vu` is, by
808                // definition, a valid char boundary, so `&buf[..vu]` is valid.
809                Ok((
810                    std::str::from_utf8(&buf[..vu])
811                        .map(str::to_string)
812                        .unwrap_or_default(),
813                    truncated_at_cap,
814                ))
815            }
816        }
817    }
818}
819
820/// fd-anchored recursive glob descent. `dir` is an already-opened directory
821/// (opened `O_NOFOLLOW` by the caller). The single-segment filename pattern
822/// `fname_pat` (supporting `*`/`?`) is matched against every entry at every
823/// depth under `dir`. Recursion into a subdirectory happens ONLY via
824/// `openat(O_DIRECTORY | O_NOFOLLOW)` — that gate authoritatively refuses a
825/// symlinked directory, so a symlink can never lead the walk out of the root.
826/// `rel_prefix` is the path of `dir` relative to the session root ("" at the
827/// base); results are accumulated as root-relative strings.
828fn walk_glob_fd(
829    mut dir: Dir,
830    fname_pat: &str,
831    rel_prefix: &str,
832    out: &mut Vec<String>,
833    limit: usize,
834) -> RuntimeResult<()> {
835    // Phase 1: drain entries into an owned vec. This ends the mutable borrow of
836    // `dir` so phase 2 can take an immutable borrow for `dir.fd()` (needed to
837    // openat children). `.`/`..` are skipped.
838    let mut entries: Vec<(String, FileType)> = Vec::new();
839    for res in &mut dir {
840        match res {
841            Ok(e) => {
842                let name = e.file_name().to_string_lossy().into_owned();
843                if name == "." || name == ".." {
844                    continue;
845                }
846                entries.push((name, e.file_type()));
847            }
848            Err(e) => return Err(RuntimeError::Io(std::io::Error::from(e))),
849        }
850    }
851    if out.len() >= limit {
852        return Ok(());
853    }
854    // The parent fd for recursion (immutable borrow — no conflict with the
855    // finished iterator).
856    let parent_fd = dir
857        .fd()
858        .map_err(|e| RuntimeError::Io(std::io::Error::from(e)))?;
859    for (name, ftype) in entries {
860        if out.len() >= limit {
861            return Ok(());
862        }
863        let rel = if rel_prefix.is_empty() {
864            name.clone()
865        } else {
866            format!("{rel_prefix}/{name}")
867        };
868        if matches_glob(&name, fname_pat) {
869            out.push(rel.clone());
870        }
871        // `is_dir()` is only a *hint* to attempt recursion; the authoritative
872        // gate is the `openat(O_DIRECTORY | O_NOFOLLOW)` below — even if d_type
873        // lies, a symlinked dir cannot be entered.
874        if ftype.is_dir() {
875            if let Ok(child_fd) = openat(
876                parent_fd,
877                name.as_str(),
878                OFlags::RDONLY | OFlags::DIRECTORY | OFlags::NOFOLLOW | OFlags::CLOEXEC,
879                Mode::empty(),
880            ) {
881                if let Ok(child_dir) = Dir::new(child_fd) {
882                    walk_glob_fd(child_dir, fname_pat, &rel, out, limit)?;
883                }
884            }
885            // openat/Dir failure (symlink, ENOTDIR, race, …) → skip, don't error.
886        }
887    }
888    Ok(())
889}
890
891/// Single-segment glob (`*`/`?`) matcher. `**` is treated as `*` here.
892fn matches_glob(name: &str, pat: &str) -> bool {
893    let name_b = name.as_bytes();
894    let pat_b = pat.as_bytes();
895    matches_at(name_b, pat_b, 0, 0)
896}
897
898fn matches_at(n: &[u8], p: &[u8], mut ni: usize, mut pi: usize) -> bool {
899    let mut star: Option<(usize, usize)> = None;
900    while ni < n.len() {
901        if pi < p.len() && (p[pi] == b'?' || p[pi] == b'*') {
902            if p[pi] == b'*' {
903                star = Some((pi, ni));
904                pi += 1;
905                continue;
906            }
907            pi += 1;
908            ni += 1;
909        } else if pi < p.len() && p[pi] == n[ni] {
910            pi += 1;
911            ni += 1;
912        } else if let Some((sp, sn)) = star {
913            pi = sp + 1;
914            ni = sn + 1;
915            star = Some((sp, sn + 1));
916        } else {
917            return false;
918        }
919    }
920    while pi < p.len() && p[pi] == b'*' {
921        pi += 1;
922    }
923    pi == p.len()
924}
925
926/// Validate a glob/grep search pattern/path is contained: reject absolute
927/// paths and `..` components so the model can't reach outside the root.
928///
929/// Patterns may legitimately contain `*`/`?` (glob) — only path-structure
930/// escapes are rejected.
931fn validate_search_pattern(input: &str) -> RuntimeResult<()> {
932    // Reject absolute paths.
933    if input.starts_with('/') || input.starts_with('\\') {
934        return Err(RuntimeError::Sandbox(format!(
935            "absolute paths are not allowed: `{input}`"
936        )));
937    }
938    // Reject any `..` path component. Walk segments, ignoring glob wildcards.
939    for seg in input.split('/') {
940        if seg == ".." {
941            return Err(RuntimeError::Sandbox(format!(
942                "`..` is not allowed in search paths: `{input}`"
943            )));
944        }
945    }
946    Ok(())
947}
948
949/// Quote a string for safe inclusion in a `sh -c` command.
950fn shell_quote(s: &str) -> String {
951    format!("'{}'", s.replace('\'', "'\\''"))
952}
953
954/// The minimal, safe environment re-applied to a model-run shell after
955/// [`std::process::Command::env_clear`]. Carries only what commands need to
956/// function — NOT provider API keys, tokens, or other secrets the parent holds.
957/// Locale/timezone are passed through (set by the user's login shell) so command
958/// output formatting matches the user's session. Keys are owned to avoid
959/// per-call leaking.
960fn safe_exec_env() -> Vec<(String, std::ffi::OsString)> {
961    let mut out: Vec<(String, std::ffi::OsString)> = Vec::new();
962    // Essentials for commands to run and find binaries.
963    for name in ["PATH", "HOME", "USER", "LOGNAME", "SHELL", "TMPDIR"] {
964        if let Some(v) = std::env::var_os(name) {
965            out.push((name.to_string(), v));
966        }
967    }
968    // Locale/timezone (formatting only — no secrets). Use exact names plus the
969    // `LC_` category prefix; a broad `LANG` prefix match would also pass secrets
970    // like `LANGCHAIN_API_KEY`/`LANGFUSE_SECRET_KEY`, defeating `env_clear()`.
971    for (k, v) in std::env::vars_os() {
972        let key = k.to_string_lossy().into_owned();
973        if matches!(key.as_str(), "TZ" | "LANG" | "LANGUAGE") || key.starts_with("LC_") {
974            out.push((key, v));
975        }
976    }
977    out
978}
979
980#[cfg(test)]
981mod tests {
982    //! Local sandbox path-containment and tool tests against a temp dir.
983
984    use super::*;
985
986    #[tokio::test]
987    async fn read_file_within_root_works() {
988        let dir = tempfile::tempdir().unwrap();
989        let env = LocalSessionEnv::new(dir.path(), Limits::default())
990            .await
991            .unwrap();
992        tokio::fs::write(dir.path().join("hello.txt"), "hi there\n")
993            .await
994            .unwrap();
995        let got = env
996            .read_file(Path::new("hello.txt"), 100, 1024)
997            .await
998            .unwrap();
999        assert_eq!(got, "hi there\n");
1000    }
1001
1002    #[tokio::test]
1003    async fn read_file_rejects_absolute_path() {
1004        let dir = tempfile::tempdir().unwrap();
1005        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1006            .await
1007            .unwrap();
1008        let res = env.read_file(Path::new("/etc/passwd"), 100, 1024).await;
1009        assert!(res.is_err(), "absolute paths must be rejected");
1010    }
1011
1012    #[tokio::test]
1013    async fn read_file_rejects_parent_dir() {
1014        let dir = tempfile::tempdir().unwrap();
1015        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1016            .await
1017            .unwrap();
1018        let res = env.read_file(Path::new("../escape.txt"), 100, 1024).await;
1019        assert!(res.is_err(), "`..` must be rejected");
1020    }
1021
1022    #[tokio::test]
1023    async fn read_file_full_returns_complete_content_without_truncation() {
1024        let dir = tempfile::tempdir().unwrap();
1025        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1026            .await
1027            .unwrap();
1028        // 10 lines of 60 bytes each = 600 bytes, well under the default cap,
1029        // but above the *truncating* read's line/byte interplay. Ensure the
1030        // full-read path returns the whole file verbatim, with no marker.
1031        let body = (0..10)
1032            .map(|i| format!("line number {i:02} with some padding text\n"))
1033            .collect::<String>();
1034        tokio::fs::write(dir.path().join("big.txt"), &body)
1035            .await
1036            .unwrap();
1037        let got = env
1038            .read_file_full(Path::new("big.txt"), 1024)
1039            .await
1040            .unwrap();
1041        assert_eq!(got, body);
1042        assert!(!got.contains("[... truncated"));
1043    }
1044
1045    #[tokio::test]
1046    async fn read_file_full_rejects_absolute_path() {
1047        let dir = tempfile::tempdir().unwrap();
1048        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1049            .await
1050            .unwrap();
1051        let res = env.read_file_full(Path::new("/etc/passwd"), 1024).await;
1052        assert!(res.is_err(), "absolute paths must be rejected");
1053    }
1054
1055    #[tokio::test]
1056    async fn read_file_full_rejects_parent_dir() {
1057        let dir = tempfile::tempdir().unwrap();
1058        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1059            .await
1060            .unwrap();
1061        let res = env.read_file_full(Path::new("../escape.txt"), 1024).await;
1062        assert!(res.is_err(), "`..` must be rejected");
1063    }
1064
1065    #[tokio::test]
1066    async fn read_file_full_errors_when_too_large_not_truncated() {
1067        let dir = tempfile::tempdir().unwrap();
1068        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1069            .await
1070            .unwrap();
1071        // 100 bytes, cap at 50 -> must ERROR (FileTooLarge), never return a
1072        // truncated prefix (the whole point vs `read_file`).
1073        tokio::fs::write(dir.path().join("over.txt"), &"a".repeat(100))
1074            .await
1075            .unwrap();
1076        let res = env.read_file_full(Path::new("over.txt"), 50).await;
1077        assert!(res.is_err(), "oversized file must error, not truncate");
1078        match res {
1079            Err(RuntimeError::FileTooLarge { size, max, .. }) => {
1080                assert_eq!(size, 100);
1081                assert_eq!(max, 50);
1082            }
1083            other => panic!("expected FileTooLarge, got {other:?}"),
1084        }
1085    }
1086
1087    #[tokio::test]
1088    async fn write_then_read_roundtrips() {
1089        let dir = tempfile::tempdir().unwrap();
1090        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1091            .await
1092            .unwrap();
1093        env.write_file(Path::new("sub/nested/file.txt"), "deep content")
1094            .await
1095            .unwrap();
1096        let got = env
1097            .read_file(Path::new("sub/nested/file.txt"), 100, 1024)
1098            .await
1099            .unwrap();
1100        assert_eq!(got, "deep content");
1101    }
1102
1103    #[tokio::test]
1104    async fn read_file_bounded_read_does_not_oom_on_large_file() {
1105        // Regression for 0.5.2 bounded read: a file far larger than `max_bytes`
1106        // must be read bounded (not fully buffered) and truncated, without
1107        // erroring or OOMing. The old `read_to_string` of the whole file would
1108        // allocate the entire multi-MB body.
1109        let dir = tempfile::tempdir().unwrap();
1110        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1111            .await
1112            .unwrap();
1113        // 100 KB of ASCII, capped at 64 bytes. Only the first ~64 bytes are
1114        // returned (plus a truncation marker); nothing else is held in memory.
1115        let body = "a".repeat(100 * 1024);
1116        tokio::fs::write(dir.path().join("big.txt"), &body)
1117            .await
1118            .unwrap();
1119        let got = env
1120            .read_file(Path::new("big.txt"), 10_000, 64)
1121            .await
1122            .unwrap();
1123        assert!(
1124            got.contains("[... truncated at 64 bytes"),
1125            "expected a byte-cap truncation marker: {got:?}"
1126        );
1127        assert!(got.len() < 128, "output must be bounded near max_bytes");
1128    }
1129
1130    #[tokio::test]
1131    async fn read_file_bounded_read_trims_multibyte_boundary() {
1132        // A multibyte char straddling the `max_bytes` cut must be trimmed to a
1133        // valid char boundary — no panic, no invalid UTF-8 in the output.
1134        let dir = tempfile::tempdir().unwrap();
1135        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1136            .await
1137            .unwrap();
1138        // Each `é` is 2 bytes (U+00E9, UTF-8 C3 A9). 10 of them = 20 bytes.
1139        // Capping at 11 bytes splits the 6th char; the trim drops its trailing
1140        // byte so the result is 5 chars (10 bytes).
1141        let body = "é".repeat(10);
1142        tokio::fs::write(dir.path().join("accent.txt"), body.as_bytes())
1143            .await
1144            .unwrap();
1145        let got = env
1146            .read_file(Path::new("accent.txt"), 10_000, 11)
1147            .await
1148            .unwrap();
1149        // The bounded prefix must be valid UTF-8 and contain only whole chars.
1150        assert!(
1151            got.starts_with("ééééé"),
1152            "trimmed prefix should be whole chars"
1153        );
1154    }
1155
1156    #[tokio::test]
1157    async fn exec_runs_shell_command() {
1158        let dir = tempfile::tempdir().unwrap();
1159        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1160            .await
1161            .unwrap();
1162        let res = env
1163            .exec(
1164                "echo hello",
1165                Path::new("."),
1166                None,
1167                &CancellationToken::new(),
1168            )
1169            .await
1170            .unwrap();
1171        assert_eq!(res.exit_code, 0);
1172        assert_eq!(res.stdout.trim(), "hello");
1173    }
1174
1175    #[tokio::test]
1176    async fn exec_does_not_leak_parent_env_secrets() {
1177        // The model-run shell must NOT inherit provider keys / tokens from the
1178        // parent. We set a distinctive secret in the parent env, run `env` in the
1179        // child, and assert the secret is absent (env_clear + allowlist).
1180        let dir = tempfile::tempdir().unwrap();
1181        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1182            .await
1183            .unwrap();
1184        std::env::set_var("FLUERS_TEST_SECRET", "leak-me-if-you-can");
1185        let res = env
1186            .exec("env", Path::new("."), None, &CancellationToken::new())
1187            .await
1188            .unwrap();
1189        assert_eq!(res.exit_code, 0, "env should run");
1190        assert!(
1191            !res.stdout.contains("FLUERS_TEST_SECRET"),
1192            "parent env secret must not leak into the model-run shell"
1193        );
1194        assert!(
1195            !res.stdout.contains("leak-me-if-you-can"),
1196            "the secret value must not appear in the child env"
1197        );
1198        std::env::remove_var("FLUERS_TEST_SECRET");
1199    }
1200
1201    #[tokio::test]
1202    async fn exec_does_not_leak_lang_prefixed_secrets() {
1203        // Regression: the locale allowlist once used `starts_with("LANG")`, which
1204        // also passed secrets like `LANGCHAIN_API_KEY` into the child. The
1205        // allowlist must match locale names exactly, not by `LANG` prefix.
1206        let dir = tempfile::tempdir().unwrap();
1207        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1208            .await
1209            .unwrap();
1210        std::env::set_var("LANGCHAIN_API_KEY", "lang-prefixed-secret");
1211        let res = env
1212            .exec("env", Path::new("."), None, &CancellationToken::new())
1213            .await
1214            .unwrap();
1215        std::env::remove_var("LANGCHAIN_API_KEY");
1216        assert!(
1217            !res.stdout.contains("LANGCHAIN_API_KEY"),
1218            "a LANG-prefixed secret must not leak into the model-run shell"
1219        );
1220        assert!(
1221            !res.stdout.contains("lang-prefixed-secret"),
1222            "the LANG-prefixed secret value must not appear in the child env"
1223        );
1224    }
1225
1226    #[tokio::test]
1227    async fn exec_timeout_returns_124() {
1228        let dir = tempfile::tempdir().unwrap();
1229        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1230            .await
1231            .unwrap();
1232        let res = env
1233            .exec(
1234                "sleep 5",
1235                Path::new("."),
1236                Some(200),
1237                &CancellationToken::new(),
1238            )
1239            .await
1240            .unwrap();
1241        assert_eq!(res.exit_code, 124, "timeout must yield exit 124");
1242    }
1243
1244    #[test]
1245    fn glob_matcher_basics() {
1246        assert!(matches_glob("foo.txt", "*.txt"));
1247        assert!(matches_glob("foo.txt", "foo.*"));
1248        assert!(!matches_glob("foo.txt", "*.md"));
1249        assert!(matches_glob("a", "?"));
1250    }
1251
1252    #[test]
1253    fn read_limit_truncates() {
1254        let got = apply_read_limits("a\nb\nc\nd\n".into(), 2, 1024);
1255        assert!(got.contains("a"));
1256        assert!(got.contains("b"));
1257        assert!(got.contains("truncated"));
1258    }
1259
1260    #[tokio::test]
1261    async fn glob_rejects_absolute_pattern() {
1262        let dir = tempfile::tempdir().unwrap();
1263        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1264            .await
1265            .unwrap();
1266        let res = env.glob("/etc/*", 10).await;
1267        assert!(res.is_err(), "absolute glob patterns must be rejected");
1268    }
1269
1270    #[tokio::test]
1271    async fn glob_rejects_parent_dir_pattern() {
1272        let dir = tempfile::tempdir().unwrap();
1273        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1274            .await
1275            .unwrap();
1276        let res = env.glob("../**/*", 10).await;
1277        assert!(res.is_err(), "`..` in glob patterns must be rejected");
1278    }
1279
1280    #[tokio::test]
1281    async fn grep_rejects_absolute_path() {
1282        let dir = tempfile::tempdir().unwrap();
1283        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1284            .await
1285            .unwrap();
1286        let res = env.grep("foo", &["/etc/passwd"], 10).await;
1287        assert!(res.is_err(), "absolute grep paths must be rejected");
1288    }
1289
1290    #[tokio::test]
1291    async fn grep_rejects_parent_dir_path() {
1292        let dir = tempfile::tempdir().unwrap();
1293        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1294            .await
1295            .unwrap();
1296        let res = env.grep("foo", &["../.env"], 10).await;
1297        assert!(res.is_err(), "`..` grep paths must be rejected");
1298    }
1299
1300    // ── B-Swift Phase C1a / #4: fd-anchored read TOCTOU / hardlink coverage ──
1301    // These prove the fix: the OLD path-based `read_to_string(resolved)` followed
1302    // symlinks (leaking the target) and ignored `st_nlink`, so each of these
1303    // would have SUCCEEDED (exfiltrated the secret) before the fix.
1304
1305    /// Write a secret to a file OUTSIDE the env root (a sibling temp dir) and
1306    /// return both the held `TempDir` (keep alive for the test) and its path.
1307    #[cfg(unix)]
1308    fn outside_secret(body: &str) -> (tempfile::TempDir, PathBuf) {
1309        use std::io::Write;
1310        let dir = tempfile::tempdir().unwrap();
1311        let path = dir.path().join("secret.txt");
1312        let mut f = std::fs::File::create(&path).unwrap();
1313        f.write_all(body.as_bytes()).unwrap();
1314        (dir, path)
1315    }
1316
1317    #[cfg(unix)]
1318    #[tokio::test]
1319    async fn read_file_rejects_symlink_leaf_even_when_target_inside_root() {
1320        use std::os::unix::fs::symlink;
1321        let dir = tempfile::tempdir().unwrap();
1322        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1323            .await
1324            .unwrap();
1325        tokio::fs::write(dir.path().join("inside.txt"), "ok\n")
1326            .await
1327            .unwrap();
1328        symlink("inside.txt", dir.path().join("link.txt")).unwrap();
1329        let res = env.read_file(Path::new("link.txt"), 100, 1024).await;
1330        assert!(
1331            res.is_err(),
1332            "a symlink leaf must be rejected even if its target is inside the root"
1333        );
1334    }
1335
1336    #[cfg(unix)]
1337    #[tokio::test]
1338    async fn read_file_rejects_symlink_leaf_to_outside_root() {
1339        // Exfil via symlink: link.txt -> /outside/secret. The OLD read followed
1340        // it and leaked "TOPSECRET"; the anchored `openat(O_NOFOLLOW)` rejects
1341        // the symlink leaf outright.
1342        use std::os::unix::fs::symlink;
1343        let dir = tempfile::tempdir().unwrap();
1344        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1345            .await
1346            .unwrap();
1347        let (_outside, secret) = outside_secret("TOPSECRET");
1348        symlink(&secret, dir.path().join("link.txt")).unwrap();
1349        let res = env.read_file(Path::new("link.txt"), 100, 1024).await;
1350        assert!(
1351            res.is_err(),
1352            "a symlink to outside the root must be rejected"
1353        );
1354        if let Ok(s) = res {
1355            assert!(!s.contains("TOPSECRET"), "the secret must not leak");
1356        }
1357    }
1358
1359    #[cfg(unix)]
1360    #[tokio::test]
1361    async fn read_file_rejects_intermediate_symlink_dir() {
1362        // Exfil via a symlinked intermediate dir: linkdir -> realdir; reading
1363        // `linkdir/file.txt` must reject at the `linkdir` component (per-component
1364        // `openat(O_NOFOLLOW)`).
1365        use std::os::unix::fs::symlink;
1366        let dir = tempfile::tempdir().unwrap();
1367        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1368            .await
1369            .unwrap();
1370        tokio::fs::create_dir_all(dir.path().join("realdir"))
1371            .await
1372            .unwrap();
1373        tokio::fs::write(dir.path().join("realdir/file.txt"), "ok\n")
1374            .await
1375            .unwrap();
1376        symlink("realdir", dir.path().join("linkdir")).unwrap();
1377        let res = env
1378            .read_file(Path::new("linkdir/file.txt"), 100, 1024)
1379            .await;
1380        assert!(
1381            res.is_err(),
1382            "a symlinked intermediate dir must be rejected"
1383        );
1384    }
1385
1386    #[cfg(unix)]
1387    #[tokio::test]
1388    async fn read_file_rejects_hardlink_to_outside_secret() {
1389        // Hardlink exfil: `ln /outside/secret root/link.txt`. The file is regular
1390        // and inside the root, but `st_nlink > 1` → reject (mirrors the Swift
1391        // C2/#3 decision; authoritative here via post-open `fstat`).
1392        let dir = tempfile::tempdir().unwrap();
1393        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1394            .await
1395            .unwrap();
1396        let (_outside, secret) = outside_secret("TOPSECRET");
1397        std::fs::hard_link(&secret, dir.path().join("link.txt")).unwrap();
1398        let res = env.read_file(Path::new("link.txt"), 100, 1024).await;
1399        assert!(res.is_err(), "a hardlink (st_nlink > 1) must be rejected");
1400        if let Ok(s) = res {
1401            assert!(!s.contains("TOPSECRET"), "the secret must not leak");
1402        }
1403    }
1404
1405    #[cfg(unix)]
1406    #[tokio::test]
1407    async fn read_file_full_rejects_symlink_leaf() {
1408        use std::os::unix::fs::symlink;
1409        let dir = tempfile::tempdir().unwrap();
1410        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1411            .await
1412            .unwrap();
1413        let (_outside, secret) = outside_secret("TOPSECRET");
1414        symlink(&secret, dir.path().join("link.txt")).unwrap();
1415        let res = env.read_file_full(Path::new("link.txt"), 1024).await;
1416        assert!(res.is_err(), "read_file_full must reject a symlink leaf");
1417        if let Ok(s) = res {
1418            assert!(!s.contains("TOPSECRET"));
1419        }
1420    }
1421
1422    #[cfg(unix)]
1423    #[tokio::test]
1424    async fn read_file_full_rejects_hardlink() {
1425        let dir = tempfile::tempdir().unwrap();
1426        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1427            .await
1428            .unwrap();
1429        let (_outside, secret) = outside_secret("TOPSECRET");
1430        std::fs::hard_link(&secret, dir.path().join("link.txt")).unwrap();
1431        let res = env.read_file_full(Path::new("link.txt"), 1024).await;
1432        assert!(
1433            res.is_err(),
1434            "read_file_full must reject a hardlink (st_nlink > 1)"
1435        );
1436    }
1437
1438    #[cfg(unix)]
1439    #[tokio::test]
1440    async fn read_anchored_nested_relative_path_still_works() {
1441        // Regression guard: the anchored walk must still read a real nested
1442        // file (intermediate dirs are opened `O_NOFOLLOW` + read off the leaf fd).
1443        let dir = tempfile::tempdir().unwrap();
1444        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1445            .await
1446            .unwrap();
1447        tokio::fs::create_dir_all(dir.path().join("a/b"))
1448            .await
1449            .unwrap();
1450        tokio::fs::write(dir.path().join("a/b/c.txt"), "deep\n")
1451            .await
1452            .unwrap();
1453        let got = env
1454            .read_file(Path::new("a/b/c.txt"), 100, 1024)
1455            .await
1456            .unwrap();
1457        assert_eq!(got, "deep\n");
1458    }
1459
1460    // ── B-Swift Phase C1b: fd-anchored write / exec / glob / grep TOCTOU ──
1461    // Each of these FAILED (or leaked) on the old path-based `resolve()` and
1462    // passes on the fd-anchored walk. The inside-target symlink cases are the
1463    // real TOCTOU proof: the OLD `resolve()` canonicalized a symlink whose
1464    // target was inside the root → passed containment → the subsequent path-
1465    // based op followed it. The fd-anchored walk rejects at `openat(NO_FOLLOW)`.
1466
1467    #[cfg(unix)]
1468    #[tokio::test]
1469    async fn write_file_rejects_symlink_leaf_pointing_inside() {
1470        // OLD: resolve() canonicalized `link.txt` → inside `target.txt`
1471        // (contained) → `tokio::fs::write` followed the symlink and overwrote
1472        // the target. NEW: `openat(O_NOFOLLOW)` rejects the symlink leaf; the
1473        // inside target is untouched.
1474        use std::os::unix::fs::symlink;
1475        let dir = tempfile::tempdir().unwrap();
1476        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1477            .await
1478            .unwrap();
1479        tokio::fs::write(dir.path().join("target.txt"), "ORIGINAL")
1480            .await
1481            .unwrap();
1482        symlink("target.txt", dir.path().join("link.txt")).unwrap();
1483        let res = env.write_file(Path::new("link.txt"), "OVERWRITE").await;
1484        assert!(
1485            res.is_err(),
1486            "writing through a symlink leaf must be rejected"
1487        );
1488        let got = tokio::fs::read_to_string(dir.path().join("target.txt"))
1489            .await
1490            .unwrap();
1491        assert_eq!(
1492            got, "ORIGINAL",
1493            "the symlink target must not be overwritten"
1494        );
1495    }
1496
1497    #[cfg(unix)]
1498    #[tokio::test]
1499    async fn write_file_rejects_symlinked_intermediate_dir() {
1500        // OLD: resolve() canonicalized `linkdir/file.txt` through the symlink
1501        // (contained) → wrote through it. NEW: the mkdirat/openat walk rejects
1502        // the symlinked `linkdir` component.
1503        use std::os::unix::fs::symlink;
1504        let dir = tempfile::tempdir().unwrap();
1505        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1506            .await
1507            .unwrap();
1508        tokio::fs::create_dir_all(dir.path().join("realdir"))
1509            .await
1510            .unwrap();
1511        symlink("realdir", dir.path().join("linkdir")).unwrap();
1512        let res = env.write_file(Path::new("linkdir/file.txt"), "data").await;
1513        assert!(
1514            res.is_err(),
1515            "writing through a symlinked intermediate dir must be rejected"
1516        );
1517    }
1518
1519    #[cfg(unix)]
1520    #[tokio::test]
1521    async fn write_file_rejects_hardlink_to_outside_secret() {
1522        // OLD: resolve() canonicalized the inside link (contained) →
1523        // `tokio::fs::write` wrote through the shared inode → corrupted
1524        // /outside/secret. NEW: fstat off the open fd sees `st_nlink > 1` →
1525        // reject; the outside file is unchanged.
1526        let dir = tempfile::tempdir().unwrap();
1527        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1528            .await
1529            .unwrap();
1530        let (_outside, secret) = outside_secret("ORIGINAL-SECRET");
1531        std::fs::hard_link(&secret, dir.path().join("link.txt")).unwrap();
1532        let res = env.write_file(Path::new("link.txt"), "CORRUPTED").await;
1533        assert!(
1534            res.is_err(),
1535            "writing a hardlink (st_nlink > 1) must be rejected"
1536        );
1537        let got = std::fs::read_to_string(&secret).unwrap();
1538        assert_eq!(
1539            got, "ORIGINAL-SECRET",
1540            "the outside secret must not be corrupted"
1541        );
1542    }
1543
1544    #[tokio::test]
1545    async fn write_file_creates_new_nested_path() {
1546        // Regression: the mkdirat walk + leaf open must still create brand-new
1547        // nested files (the happy path must not regress).
1548        let dir = tempfile::tempdir().unwrap();
1549        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1550            .await
1551            .unwrap();
1552        env.write_file(Path::new("a/b/c/new.txt"), "deep")
1553            .await
1554            .unwrap();
1555        let got = env
1556            .read_file(Path::new("a/b/c/new.txt"), 100, 1024)
1557            .await
1558            .unwrap();
1559        assert_eq!(got, "deep");
1560    }
1561
1562    #[cfg(unix)]
1563    #[tokio::test]
1564    async fn exec_rejects_symlinked_cwd_pointing_inside() {
1565        // OLD: resolve() canonicalized the symlinked cwd → inside dir
1566        // (contained) → the child ran there. NEW: open_anchored_dir rejects the
1567        // symlink at the openat component.
1568        use std::os::unix::fs::symlink;
1569        let dir = tempfile::tempdir().unwrap();
1570        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1571            .await
1572            .unwrap();
1573        tokio::fs::create_dir_all(dir.path().join("realcwd"))
1574            .await
1575            .unwrap();
1576        symlink("realcwd", dir.path().join("linkcwd")).unwrap();
1577        let res = env
1578            .exec(
1579                "echo hi",
1580                Path::new("linkcwd"),
1581                None,
1582                &CancellationToken::new(),
1583            )
1584            .await;
1585        assert!(res.is_err(), "a symlinked cwd must be rejected");
1586    }
1587
1588    #[tokio::test]
1589    async fn exec_large_stdout_does_not_deadlock() {
1590        // Regression: `exec` used to `child.wait()` WITHOUT draining the stdout
1591        // pipe, so a child emitting more than the OS pipe buffer (~64 KB) blocked
1592        // on a full pipe while `wait()` blocked on the child — a deadlock. With no
1593        // timeout set (as here) the old code hung forever; `wait_with_output` now
1594        // drains both pipes concurrently, so the full output returns intact.
1595        let dir = tempfile::tempdir().unwrap();
1596        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1597            .await
1598            .unwrap();
1599        let res = env
1600            .exec(
1601                "yes a | head -c 200000",
1602                Path::new("."),
1603                None,
1604                &CancellationToken::new(),
1605            )
1606            .await
1607            .unwrap();
1608        assert_eq!(res.exit_code, 0);
1609        assert_eq!(
1610            res.stdout.len(),
1611            200_000,
1612            "full >64 KB stdout must survive without deadlock"
1613        );
1614    }
1615
1616    #[tokio::test]
1617    async fn glob_returns_matching_files() {
1618        // Regression for the fd-anchored rewrite: it must still surface real
1619        // files at the base and nested under real subdirectories.
1620        let dir = tempfile::tempdir().unwrap();
1621        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1622            .await
1623            .unwrap();
1624        tokio::fs::write(dir.path().join("top.txt"), "x")
1625            .await
1626            .unwrap();
1627        tokio::fs::create_dir_all(dir.path().join("sub"))
1628            .await
1629            .unwrap();
1630        tokio::fs::write(dir.path().join("sub/nested.txt"), "x")
1631            .await
1632            .unwrap();
1633        let matched = env.glob("*.txt", 100).await.unwrap();
1634        assert!(
1635            matched.iter().any(|m| m == "top.txt"),
1636            "base file should match: {matched:?}"
1637        );
1638        assert!(
1639            matched.iter().any(|m| m == "sub/nested.txt"),
1640            "nested file should match: {matched:?}"
1641        );
1642    }
1643
1644    #[tokio::test]
1645    async fn glob_subdir_pattern_reports_root_relative_paths() {
1646        // Regression for the base-prefix bug: a pattern with a subdir base must
1647        // report paths relative to the ROOT, not relative to the base.
1648        let dir = tempfile::tempdir().unwrap();
1649        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1650            .await
1651            .unwrap();
1652        tokio::fs::create_dir_all(dir.path().join("sub"))
1653            .await
1654            .unwrap();
1655        tokio::fs::write(dir.path().join("sub/nested.txt"), "x")
1656            .await
1657            .unwrap();
1658        let matched = env.glob("sub/*.txt", 100).await.unwrap();
1659        assert!(
1660            matched.iter().any(|m| m == "sub/nested.txt"),
1661            "must be root-relative (`sub/nested.txt`), not base-relative: {matched:?}"
1662        );
1663        assert!(
1664            !matched.iter().any(|m| m == "nested.txt"),
1665            "base-relative leak must not happen: {matched:?}"
1666        );
1667    }
1668
1669    #[cfg(unix)]
1670    #[tokio::test]
1671    async fn glob_does_not_traverse_symlinked_dir_to_outside() {
1672        // OLD glob's `path.is_dir()` FOLLOWED the symlink → recursed into the
1673        // outside dir → leaked its `.txt`. NEW: descent is via
1674        // `openat(O_DIRECTORY|O_NOFOLLOW)` → the symlinked dir is never entered.
1675        use std::os::unix::fs::symlink;
1676        let dir = tempfile::tempdir().unwrap();
1677        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1678            .await
1679            .unwrap();
1680        tokio::fs::write(dir.path().join("inside.txt"), "ok")
1681            .await
1682            .unwrap();
1683        tokio::fs::create_dir_all(dir.path().join("realdir"))
1684            .await
1685            .unwrap();
1686        tokio::fs::write(dir.path().join("realdir/nested.txt"), "ok")
1687            .await
1688            .unwrap();
1689        // A symlinked dir pointing at the outside temp dir (which holds
1690        // `secret.txt`).
1691        let (_outside, secret) = outside_secret("OUTSIDE-SECRET");
1692        let outside_dir = secret.parent().unwrap();
1693        symlink(outside_dir, dir.path().join("linkdir")).unwrap();
1694        let matched = env.glob("*.txt", 100).await.unwrap();
1695        assert!(
1696            matched.iter().any(|m| m == "inside.txt"),
1697            "inside file should match: {matched:?}"
1698        );
1699        assert!(
1700            matched.iter().any(|m| m == "realdir/nested.txt"),
1701            "real nested file should match: {matched:?}"
1702        );
1703        assert!(
1704            !matched.iter().any(|m| m.starts_with("linkdir")),
1705            "symlinked dir must not be traversed: {matched:?}"
1706        );
1707        for m in &matched {
1708            assert!(
1709                !m.contains("secret.txt") && !m.contains("OUTSIDE-SECRET"),
1710                "outside file must not leak: {m}"
1711            );
1712        }
1713    }
1714
1715    #[tokio::test]
1716    async fn grep_returns_matches() {
1717        // Regression for the inode-anchored search: it must still surface real
1718        // matches inside the root, AND the output must be root-relative (not the
1719        // absolute host temp/root path, which the inode-path search would
1720        // otherwise leak).
1721        let dir = tempfile::tempdir().unwrap();
1722        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1723            .await
1724            .unwrap();
1725        tokio::fs::write(dir.path().join("note.md"), "findme here\n")
1726            .await
1727            .unwrap();
1728        let matched = env.grep("findme", &["."], 100).await.unwrap();
1729        assert!(
1730            matched.iter().any(|m| m.contains("findme")),
1731            "expected a match: {matched:?}"
1732        );
1733        // Output paths are root-relative...
1734        assert!(
1735            matched.iter().any(|m| m.starts_with("note.md:")),
1736            "expected a root-relative `note.md:` line: {matched:?}"
1737        );
1738        // ...and must NOT leak the host temp/root path.
1739        let root_str = dir.path().to_string_lossy().into_owned();
1740        for m in &matched {
1741            assert!(
1742                !m.contains(&root_str),
1743                "grep output must not leak the absolute root path: {m}"
1744            );
1745        }
1746    }
1747
1748    #[cfg(unix)]
1749    #[tokio::test]
1750    async fn grep_rejects_symlinked_search_path() {
1751        // `rg --no-follow` still follows a symlinked dir passed EXPLICITLY as a
1752        // search path, so the path is resolved fd-anchored to its inode and a
1753        // symlink is rejected outright (no leak via `linkdir -> outside`).
1754        use std::os::unix::fs::symlink;
1755        let dir = tempfile::tempdir().unwrap();
1756        let env = LocalSessionEnv::new(dir.path(), Limits::default())
1757            .await
1758            .unwrap();
1759        let (_outside, secret) = outside_secret("GREP-LEAK");
1760        let outside_dir = secret.parent().unwrap();
1761        symlink(outside_dir, dir.path().join("linkdir")).unwrap();
1762        // Explicit symlinked path → rejected (Err), never searched.
1763        let res = env.grep("GREP-LEAK", &["linkdir"], 100).await;
1764        assert!(
1765            res.is_err(),
1766            "an explicit symlinked search path must be rejected"
1767        );
1768        // And a `.` search must not traverse the symlinked dir either.
1769        let matched = env.grep("GREP-LEAK", &["."], 100).await.unwrap();
1770        assert!(
1771            matched.is_empty(),
1772            "the symlinked dir must not be traversed: {matched:?}"
1773        );
1774    }
1775
1776    #[cfg(unix)]
1777    #[tokio::test]
1778    async fn grep_anchors_to_root_fd_not_root_path() {
1779        // TOCTOU for grep: after the env is built, move the real root aside and
1780        // replace the root *path* with a symlink to an outside dir holding a
1781        // secret. OLD grep used `current_dir(self.root)` (the path) → would
1782        // chdir through the symlink and surface the secret. NEW grep anchors to
1783        // `/dev/fd/{root_fd}` → chdir to the real (moved) root → no leak.
1784        use std::os::unix::fs::symlink;
1785        // A parent dir we fully control (manual, not TempDir, so the swap + the
1786        // symlink-over-root don't confuse Drop cleanup).
1787        let nonce = std::time::SystemTime::now()
1788            .duration_since(std::time::UNIX_EPOCH)
1789            .map(|d| d.as_nanos())
1790            .unwrap_or(0);
1791        let parent = std::env::temp_dir().join(format!("fluers-grep-swap-{nonce}"));
1792        std::fs::create_dir_all(&parent).unwrap();
1793        let root_path = parent.join("root");
1794        std::fs::create_dir_all(&root_path).unwrap();
1795        let env = LocalSessionEnv::new(&root_path, Limits::default())
1796            .await
1797            .unwrap();
1798
1799        let outside = parent.join("outside");
1800        std::fs::create_dir_all(&outside).unwrap();
1801        std::fs::write(outside.join("leak.txt"), "PATHSWAP-SECRET\n").unwrap();
1802
1803        // Swap: move the real root aside (sibling), then symlink the root path
1804        // → outside.
1805        let moved = parent.join("moved-real-root");
1806        std::fs::rename(&root_path, &moved).unwrap();
1807        symlink(&outside, &root_path).unwrap();
1808
1809        let matched = env.grep("PATHSWAP-SECRET", &["."], 100).await.unwrap();
1810        assert!(
1811            matched.is_empty(),
1812            "root-fd anchoring must not follow the swapped root path: {matched:?}"
1813        );
1814
1815        // We own `parent` fully — clean up everything under it.
1816        let _ = std::fs::remove_dir_all(&parent);
1817    }
1818
1819    // ── WP-2: ProcessSandbox backend slot ───────────────────────────────────
1820    //
1821    // These tests exercise the *shape* only: a mock `ProcessSandbox` proves
1822    // wrap is applied at BOTH spawn sites, fail-closed fires on Refuse+Partial,
1823    // and backend-injected env survives into the child. No real backend ships.
1824
1825    use crate::process_sandbox::{
1826        Enforcement, ExecSandboxContext, OnUnavailable, ProcessSandbox, SandboxPolicy,
1827        SandboxProfile, WrappedCommand,
1828    };
1829    use crate::sandbox::Sandbox;
1830    use std::collections::BTreeMap;
1831    use std::sync::Mutex;
1832
1833    /// A mock `ProcessSandbox` that records every wrapped argv, optionally
1834    /// injects an env var, and reports a configurable `Enforcement`.
1835    struct MockProcessSandbox {
1836        enforcement: Enforcement,
1837        /// Every argv handed to `wrap`, recorded in order.
1838        wraps: Arc<Mutex<Vec<Vec<String>>>>,
1839        /// Env additions to inject on every wrap (proves env survives env_clear).
1840        inject_env: BTreeMap<String, String>,
1841        prepare_calls: Arc<Mutex<u32>>,
1842        /// The canonical workspace_path `prepare` saw (proves the backend gets
1843        /// the same absolute root LocalSessionEnv anchors on).
1844        prepared_workspace: Arc<Mutex<Option<PathBuf>>>,
1845    }
1846
1847    impl MockProcessSandbox {
1848        fn new(enforcement: Enforcement) -> Self {
1849            Self {
1850                enforcement,
1851                wraps: Arc::new(Mutex::new(Vec::new())),
1852                inject_env: BTreeMap::new(),
1853                prepare_calls: Arc::new(Mutex::new(0)),
1854                prepared_workspace: Arc::new(Mutex::new(None)),
1855            }
1856        }
1857
1858        fn with_env(mut self, env: BTreeMap<String, String>) -> Self {
1859            self.inject_env = env;
1860            self
1861        }
1862    }
1863
1864    #[async_trait::async_trait]
1865    impl ProcessSandbox for MockProcessSandbox {
1866        async fn prepare(&self, ctx: &ExecSandboxContext) -> RuntimeResult<()> {
1867            *self.prepare_calls.lock().unwrap() += 1;
1868            *self.prepared_workspace.lock().unwrap() = Some(ctx.workspace_path.clone());
1869            Ok(())
1870        }
1871        fn wrap(
1872            &self,
1873            argv: &[String],
1874            _ctx: &ExecSandboxContext,
1875        ) -> RuntimeResult<WrappedCommand> {
1876            self.wraps.lock().unwrap().push(argv.to_vec());
1877            Ok(WrappedCommand {
1878                argv: argv.to_vec(),
1879                env: self.inject_env.clone(),
1880            })
1881        }
1882        async fn probe(&self, _profile: &SandboxProfile) -> RuntimeResult<Enforcement> {
1883            Ok(self.enforcement)
1884        }
1885        async fn shutdown(&self) -> RuntimeResult<()> {
1886            Ok(())
1887        }
1888    }
1889
1890    #[tokio::test]
1891    async fn wrap_is_applied_at_both_spawn_sites() {
1892        // A backend that reports FullDev-FullyEnforced; record every wrap.
1893        let dir = tempfile::tempdir().unwrap();
1894        let mock = Arc::new(MockProcessSandbox::new(Enforcement::FullyEnforced));
1895        let wraps = Arc::clone(&mock.wraps);
1896        let sandbox = crate::LocalSandbox::new(dir.path().to_path_buf()).with_exec_sandbox(
1897            mock,
1898            SandboxPolicy {
1899                profile: SandboxProfile::FullDev,
1900                egress: Vec::new(),
1901                on_unavailable: OnUnavailable::Degrade,
1902            },
1903        );
1904        let env = sandbox.env_for(dir.path()).await.unwrap();
1905
1906        // Trigger the exec spawn site.
1907        env.exec(
1908            "true",
1909            Path::new("."),
1910            None,
1911            &tokio_util::sync::CancellationToken::new(),
1912        )
1913        .await
1914        .unwrap();
1915        // Trigger the grep spawn site (needs a file to exist).
1916        tokio::fs::write(dir.path().join("needle.txt"), "secret\n")
1917            .await
1918            .unwrap();
1919        env.grep("secret", &["."], 10).await.unwrap();
1920
1921        let recorded = wraps.lock().unwrap().clone();
1922        assert_eq!(
1923            recorded.len(),
1924            2,
1925            "wrap must be called at BOTH spawn sites; got {recorded:?}"
1926        );
1927        // Each wrap target is `sh -c <...>`.
1928        for argv in &recorded {
1929            assert_eq!(argv.first().map(String::as_str), Some("sh"));
1930            assert_eq!(argv.get(1).map(String::as_str), Some("-c"));
1931        }
1932    }
1933
1934    #[tokio::test]
1935    async fn refuse_policy_with_partial_probe_errors_at_session_construction() {
1936        // Backend reports Partial; policy is Refuse ⇒ env_for must fail loud,
1937        // before any command runs.
1938        let dir = tempfile::tempdir().unwrap();
1939        let mock = Arc::new(MockProcessSandbox::new(Enforcement::Partial));
1940        let sandbox = crate::LocalSandbox::new(dir.path().to_path_buf()).with_exec_sandbox(
1941            mock,
1942            SandboxPolicy {
1943                profile: SandboxProfile::ReadOnly,
1944                egress: Vec::new(),
1945                on_unavailable: OnUnavailable::Refuse,
1946            },
1947        );
1948        let result = sandbox.env_for(dir.path()).await;
1949        let msg = match result {
1950            Ok(_) => panic!("Refuse + Partial must fail-closed at env_for; got Ok"),
1951            Err(e) => format!("{e:?}"),
1952        };
1953        assert!(
1954            msg.contains("Refuse"),
1955            "error must explain the refuse decision: {msg}"
1956        );
1957    }
1958
1959    #[tokio::test]
1960    async fn wrap_injected_env_survives_into_child() {
1961        // The mock injects FLUERS_SANDBOX_MARKER=ok. After env_clear + the safe
1962        // allowlist, the child must still see it (proving C1: wrap env survives).
1963        let dir = tempfile::tempdir().unwrap();
1964        let mut env = BTreeMap::new();
1965        env.insert("FLUERS_SANDBOX_MARKER".into(), "ok".into());
1966        let mock = Arc::new(MockProcessSandbox::new(Enforcement::FullyEnforced).with_env(env));
1967        let sandbox = crate::LocalSandbox::new(dir.path().to_path_buf()).with_exec_sandbox(
1968            mock,
1969            SandboxPolicy {
1970                profile: SandboxProfile::FullDev,
1971                egress: Vec::new(),
1972                on_unavailable: OnUnavailable::Degrade,
1973            },
1974        );
1975        let env = sandbox.env_for(dir.path()).await.unwrap();
1976        let res = env
1977            .exec(
1978                "printf '%s' \"$FLUERS_SANDBOX_MARKER\"",
1979                Path::new("."),
1980                None,
1981                &tokio_util::sync::CancellationToken::new(),
1982            )
1983            .await
1984            .unwrap();
1985        assert_eq!(
1986            res.exit_code, 0,
1987            "marker print must succeed; stderr:\n{}",
1988            res.stderr
1989        );
1990        assert_eq!(
1991            res.stdout, "ok",
1992            "wrap-injected env must survive env_clear into the child; got stdout:\n{}",
1993            res.stdout
1994        );
1995    }
1996
1997    #[tokio::test]
1998    async fn degrade_with_unavailable_drops_the_backend() {
1999        // HIGH-1 (red-team): Unavailable + Degrade must DROP the backend and
2000        // fall back to fd-anchored containment (no prepare, no wrap calls).
2001        // The mock records wraps; none must be recorded after env_for + an exec.
2002        let dir = tempfile::tempdir().unwrap();
2003        let mock = Arc::new(MockProcessSandbox::new(Enforcement::Unavailable));
2004        let wraps = Arc::clone(&mock.wraps);
2005        let prepare_calls = Arc::clone(&mock.prepare_calls);
2006        let sandbox = crate::LocalSandbox::new(dir.path().to_path_buf()).with_exec_sandbox(
2007            mock,
2008            SandboxPolicy {
2009                profile: SandboxProfile::ReadOnly,
2010                egress: Vec::new(),
2011                on_unavailable: OnUnavailable::Degrade,
2012            },
2013        );
2014        let env = sandbox.env_for(dir.path()).await.unwrap();
2015        // The session built (Degrade accepted the unavailable backend by
2016        // dropping it), so a subsequent exec must NOT go through wrap.
2017        env.exec(
2018            "true",
2019            Path::new("."),
2020            None,
2021            &tokio_util::sync::CancellationToken::new(),
2022        )
2023        .await
2024        .unwrap();
2025        assert_eq!(
2026            *prepare_calls.lock().unwrap(),
2027            0,
2028            "Unavailable+Degrade must not call prepare (backend dropped)"
2029        );
2030        assert!(
2031            wraps.lock().unwrap().is_empty(),
2032            "Unavailable+Degrade must not wrap any command (backend dropped)"
2033        );
2034    }
2035
2036    #[tokio::test]
2037    async fn active_backend_creates_missing_root_and_passes_canonical_path() {
2038        // Regression (advisor): the active-backend path canonicalizes the root
2039        // before building the env. `LocalSessionEnv::new` creates a missing
2040        // root; the backend path MUST match that parity (create-then-canonicalize)
2041        // so a non-existent root doesn't fail env_for. And `prepare` must see the
2042        // SAME canonical absolute path the env anchors on.
2043        let dir = tempfile::tempdir().unwrap();
2044        let missing_root = dir.path().join("does-not-exist-yet");
2045        let mock = Arc::new(MockProcessSandbox::new(Enforcement::FullyEnforced));
2046        let prepared = Arc::clone(&mock.prepared_workspace);
2047        let sandbox = crate::LocalSandbox::new(missing_root.clone()).with_exec_sandbox(
2048            mock,
2049            SandboxPolicy {
2050                profile: SandboxProfile::FullDev,
2051                egress: Vec::new(),
2052                on_unavailable: OnUnavailable::Degrade,
2053            },
2054        );
2055        // Must succeed (root created), not error on a missing root.
2056        let env = sandbox.env_for(&missing_root).await.unwrap();
2057        let prepared_path = prepared.lock().unwrap().clone();
2058        assert!(
2059            prepared_path.is_some(),
2060            "prepare must have been called with a workspace_path"
2061        );
2062        let prepared_path = prepared_path.unwrap();
2063        assert!(
2064            prepared_path.is_absolute(),
2065            "prepare must see a canonical ABSOLUTE workspace path; got {prepared_path:?}"
2066        );
2067        // The root now exists on disk (created by env_for).
2068        assert!(
2069            missing_root.exists(),
2070            "active-backend path must create the missing root (parity with new)"
2071        );
2072        // A command runs (the env is usable).
2073        env.exec(
2074            "true",
2075            Path::new("."),
2076            None,
2077            &tokio_util::sync::CancellationToken::new(),
2078        )
2079        .await
2080        .unwrap();
2081    }
2082
2083    #[tokio::test]
2084    async fn wrap_injected_env_survives_into_grep_child() {
2085        // MEDIUM-2 (red-team): prove backend-injected env reaches the GREP
2086        // child specifically (grep has no env_clear, unlike exec). The previous
2087        // version of this test was a false positive (it materialized the marker
2088        // via exec then grepped a literal). Instead: a mock that REWRITES argv
2089        // to `test "$VAR" = VAL && exec "$@" <original argv...>` so the child
2090        // verifies its own env before running the real grep. If the env var is
2091        // absent, the `test` fails, the child exits before grep, and the result
2092        // is empty.
2093        let dir = tempfile::tempdir().unwrap();
2094        tokio::fs::write(dir.path().join("needle.txt"), "findme\n")
2095            .await
2096            .unwrap();
2097        let mock = Arc::new(CheckingEnvMock::new("FLUERS_GREP_MARKER", "GREP_OK"));
2098        let sandbox = crate::LocalSandbox::new(dir.path().to_path_buf()).with_exec_sandbox(
2099            mock,
2100            SandboxPolicy {
2101                profile: SandboxProfile::FullDev,
2102                egress: Vec::new(),
2103                on_unavailable: OnUnavailable::Degrade,
2104            },
2105        );
2106        let env = sandbox.env_for(dir.path()).await.unwrap();
2107        let matches = env.grep("findme", &["needle.txt"], 10).await.unwrap();
2108        assert!(
2109            !matches.is_empty(),
2110            "grep child must see the backend-injected marker env (the \
2111             `test` guard would have exited before grep otherwise); got {matches:?}"
2112        );
2113    }
2114
2115    /// A `ProcessSandbox` that asserts a specific env var is present in every
2116    /// wrapped child before execing the original argv. Used by the grep
2117    /// env-propagation test: the wrap rewrites argv to
2118    /// `sh -c 'test "$VAR" = VAL && exec "$@"' -- <original argv...>`, so the
2119    /// child checks its OWN env; if the backend-injected var didn't survive to
2120    /// the spawn, `test` fails and the original command never runs.
2121    struct CheckingEnvMock {
2122        var: String,
2123        val: String,
2124        prepare_calls: Arc<Mutex<u32>>,
2125    }
2126
2127    impl CheckingEnvMock {
2128        fn new(var: &str, val: &str) -> Self {
2129            Self {
2130                var: var.into(),
2131                val: val.into(),
2132                prepare_calls: Arc::new(Mutex::new(0)),
2133            }
2134        }
2135    }
2136
2137    #[async_trait::async_trait]
2138    impl ProcessSandbox for CheckingEnvMock {
2139        async fn prepare(&self, _ctx: &ExecSandboxContext) -> RuntimeResult<()> {
2140            *self.prepare_calls.lock().unwrap() += 1;
2141            Ok(())
2142        }
2143        fn wrap(
2144            &self,
2145            argv: &[String],
2146            _ctx: &ExecSandboxContext,
2147        ) -> RuntimeResult<WrappedCommand> {
2148            // Rewrite to: sh -c 'test "$VAR" = VAL && exec "$@"' -- <argv...>
2149            let guard = format!("test \"${}\" = {} && exec \"$@\"", self.var, self.val);
2150            let mut wrapped = vec!["sh".to_string(), "-c".to_string(), guard, "--".to_string()];
2151            wrapped.extend(argv.iter().cloned());
2152            let mut env = BTreeMap::new();
2153            env.insert(self.var.clone(), self.val.clone());
2154            Ok(WrappedCommand { argv: wrapped, env })
2155        }
2156        async fn probe(&self, _profile: &SandboxProfile) -> RuntimeResult<Enforcement> {
2157            Ok(Enforcement::FullyEnforced)
2158        }
2159        async fn shutdown(&self) -> RuntimeResult<()> {
2160            Ok(())
2161        }
2162    }
2163}