Skip to main content

agnt_tools/
builtins.rs

1//! Built-in tools for agnt agents.
2//!
3//! All filesystem tools optionally hold an `Arc<FilesystemRoot>`; when set,
4//! every user-supplied path is resolved through the sandbox before touching
5//! `std::fs`. Default (unsandboxed) constructors are still provided for
6//! development / REPL use, but their rustdoc carries an explicit warning.
7//!
8//! The [`Shell`] tool is gated behind the `shell` cargo feature and is
9//! documented as CVE-class dangerous. See [`Shell::new_sandboxed`].
10
11use agnt_core::Tool;
12use serde_json::{json, Value};
13use std::fs;
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16
17use crate::sandbox::FilesystemRoot;
18
19// ------------------------------------------------------------------------------------------------
20// ReadFile
21// ------------------------------------------------------------------------------------------------
22
23const READ_FILE_MAX: usize = 256 * 1024;
24
25/// Read a UTF-8 text file.
26///
27/// **Unsandboxed by default.** Without [`ReadFile::with_sandbox`] this tool
28/// can read any file the process has access to. Pair with a
29/// [`FilesystemRoot`] when exposing to untrusted LLM output.
30pub struct ReadFile {
31    sandbox: Option<Arc<FilesystemRoot>>,
32}
33
34impl Default for ReadFile {
35    fn default() -> Self { Self::new() }
36}
37
38impl ReadFile {
39    /// Unsandboxed constructor — full-host read access. Use only in trusted
40    /// contexts.
41    pub fn new() -> Self { Self { sandbox: None } }
42
43    /// Sandboxed constructor — paths are resolved against `sandbox` and
44    /// rejected if they escape the root.
45    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
46        Self { sandbox: Some(sandbox) }
47    }
48}
49
50fn resolve_path(sandbox: &Option<Arc<FilesystemRoot>>, input: &str) -> Result<PathBuf, String> {
51    match sandbox {
52        Some(s) => s.resolve(input),
53        None => Ok(PathBuf::from(input)),
54    }
55}
56
57impl Tool for ReadFile {
58    fn name(&self) -> &str { "read_file" }
59    fn description(&self) -> &str {
60        "Read a UTF-8 text file and return its contents. Truncated at 256KB. Prefer this over 'shell cat' — it is deterministic and cheaper."
61    }
62    fn schema(&self) -> Value {
63        json!({
64            "type": "object",
65            "properties": {
66                "path": { "type": "string", "description": "file path (must be under the agent sandbox root if one is configured)" }
67            },
68            "required": ["path"]
69        })
70    }
71    fn call(&self, args: Value) -> Result<String, String> {
72        let path = args["path"].as_str().ok_or("missing path")?;
73        let resolved = resolve_path(&self.sandbox, path)?;
74        let content = fs::read_to_string(&resolved)
75            .map_err(|e| format!("read {}: {}", resolved.display(), e))?;
76        if content.len() <= READ_FILE_MAX {
77            return Ok(content);
78        }
79        let mut cut = READ_FILE_MAX;
80        while cut > 0 && !content.is_char_boundary(cut) {
81            cut -= 1;
82        }
83        let mut out = content[..cut].to_string();
84        out.push_str(&format!(
85            "\n...(truncated at {} bytes; file is {} bytes total)",
86            cut,
87            content.len()
88        ));
89        Ok(out)
90    }
91}
92
93// ------------------------------------------------------------------------------------------------
94// EditFile — atomic (S6)
95// ------------------------------------------------------------------------------------------------
96
97/// Targeted file edit. Locks the file, re-reads under lock, verifies the
98/// unique-match invariant, writes to a temp sibling, and atomically renames
99/// into place — fixing the v0.1 TOCTOU race between read and write.
100///
101/// **Unsandboxed by default.** Use [`EditFile::with_sandbox`] when exposed to
102/// hostile LLM output.
103///
104/// ## Lockfile name is predictable
105///
106/// The sidecar lock lives at `.<filename>.agnt-edit.lock` in the same
107/// directory as the target. The name is deterministic by design — it
108/// has to be, so two agnt processes editing the same file on the same
109/// host coordinate correctly. The tradeoff is that a *different* local
110/// process on the same machine can pre-create the lockfile and hold
111/// the exclusive lock, causing every `EditFile` call on that target
112/// to block or fail. That is a local-user DoS, not a sandbox escape:
113/// it requires write access to the target's parent directory, which
114/// is already out of the agent's threat model (v0.2 Threat Model §
115/// "local untrusted users"). If you need multi-tenant isolation, put
116/// each agent in its own bwrap/container/landlock view — the lockfile
117/// pattern is designed for the single-tenant case.
118pub struct EditFile {
119    sandbox: Option<Arc<FilesystemRoot>>,
120}
121
122impl Default for EditFile {
123    fn default() -> Self { Self::new() }
124}
125
126impl EditFile {
127    pub fn new() -> Self { Self { sandbox: None } }
128    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
129        Self { sandbox: Some(sandbox) }
130    }
131}
132
133impl Tool for EditFile {
134    fn name(&self) -> &str { "edit_file" }
135    fn description(&self) -> &str {
136        "Targeted file edit. Replaces one exact occurrence of 'old' with 'new' in the file. Fails if 'old' is not found or appears more than once — in that case pass more surrounding context in 'old' to make it unique. Prefer this over write_file when changing a small part of an existing file."
137    }
138    fn schema(&self) -> Value {
139        json!({
140            "type": "object",
141            "properties": {
142                "path": { "type": "string" },
143                "old":  { "type": "string", "description": "exact text to find (must be unique in the file)" },
144                "new":  { "type": "string", "description": "replacement text" }
145            },
146            "required": ["path", "old", "new"]
147        })
148    }
149    fn call(&self, args: Value) -> Result<String, String> {
150        use fs2::FileExt;
151        use std::io::Write;
152
153        let path = args["path"].as_str().ok_or("missing path")?;
154        let old = args["old"].as_str().ok_or("missing old")?;
155        let new_s = args["new"].as_str().ok_or("missing new")?;
156        if old.is_empty() {
157            return Err("'old' must not be empty".into());
158        }
159
160        let resolved = resolve_path(&self.sandbox, path)?;
161
162        // Lock a stable sibling lockfile. Locking the target file directly
163        // does not work because atomic-rename swaps the inode — other waiters
164        // would hold locks on the orphaned pre-rename file descriptor and
165        // race past each other, clobbering the winner. The lockfile path is
166        // derived from the target filename and stays put across renames.
167        let lock_name = format!(
168            ".{}.agnt-edit.lock",
169            resolved
170                .file_name()
171                .and_then(|s| s.to_str())
172                .unwrap_or("edit")
173        );
174        let lock_path = resolved
175            .parent()
176            .map(|p| p.join(&lock_name))
177            .unwrap_or_else(|| PathBuf::from(&lock_name));
178
179        let lock_file = std::fs::OpenOptions::new()
180            .create(true)
181            .read(true)
182            .write(true)
183            .open(&lock_path)
184            .map_err(|e| format!("lock open {}: {}", lock_path.display(), e))?;
185
186        lock_file
187            .lock_exclusive()
188            .map_err(|e| format!("lock {}: {}", lock_path.display(), e))?;
189
190        // Re-read the target under the lock. Any other writer that held the
191        // lock before us has already renamed their result into place; we now
192        // see their updated bytes.
193        let perform = || -> Result<(String, String), String> {
194            let content = std::fs::read_to_string(&resolved)
195                .map_err(|e| format!("read {}: {}", resolved.display(), e))?;
196            let count = content.matches(old).count();
197            if count == 0 {
198                return Err(format!("'old' not found in {}", resolved.display()));
199            }
200            if count > 1 {
201                return Err(format!(
202                    "'old' appears {} times in {}; pass more surrounding context to make it unique",
203                    count,
204                    resolved.display()
205                ));
206            }
207            let updated = content.replacen(old, new_s, 1);
208
209            // Write to sibling .tmp and atomically rename.
210            let mut tmp = resolved.clone();
211            let tmp_name = format!(
212                "{}.agnt-edit-tmp.{}.{:?}",
213                resolved
214                    .file_name()
215                    .and_then(|s| s.to_str())
216                    .unwrap_or("edit"),
217                std::process::id(),
218                std::thread::current().id()
219            );
220            tmp.set_file_name(tmp_name);
221            {
222                let mut tmpf = std::fs::OpenOptions::new()
223                    .write(true)
224                    .create(true)
225                    .truncate(true)
226                    .open(&tmp)
227                    .map_err(|e| format!("tmp open {}: {}", tmp.display(), e))?;
228                tmpf.write_all(updated.as_bytes())
229                    .map_err(|e| format!("tmp write: {}", e))?;
230                tmpf.sync_all().map_err(|e| format!("tmp sync: {}", e))?;
231            }
232            std::fs::rename(&tmp, &resolved)
233                .map_err(|e| format!("rename {} -> {}: {}", tmp.display(), resolved.display(), e))?;
234
235            Ok((content, updated))
236        };
237
238        let res = perform();
239        // Release lock before dropping file (drop would also release, but be explicit).
240        let _ = lock_file.unlock();
241        drop(lock_file);
242
243        let (before, after) = res?;
244        Ok(format!(
245            "edited {} ({} bytes → {} bytes)",
246            resolved.display(),
247            before.len(),
248            after.len()
249        ))
250    }
251}
252
253// ------------------------------------------------------------------------------------------------
254// WriteFile
255// ------------------------------------------------------------------------------------------------
256
257pub struct WriteFile {
258    sandbox: Option<Arc<FilesystemRoot>>,
259}
260
261impl Default for WriteFile {
262    fn default() -> Self { Self::new() }
263}
264
265impl WriteFile {
266    pub fn new() -> Self { Self { sandbox: None } }
267    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
268        Self { sandbox: Some(sandbox) }
269    }
270}
271
272impl Tool for WriteFile {
273    fn name(&self) -> &str { "write_file" }
274    fn description(&self) -> &str { "Write UTF-8 content to a file, creating or overwriting it." }
275    fn schema(&self) -> Value {
276        json!({
277            "type": "object",
278            "properties": {
279                "path": { "type": "string" },
280                "content": { "type": "string" }
281            },
282            "required": ["path", "content"]
283        })
284    }
285    fn call(&self, args: Value) -> Result<String, String> {
286        let path = args["path"].as_str().ok_or("missing path")?;
287        let content = args["content"].as_str().ok_or("missing content")?;
288        let resolved = resolve_path(&self.sandbox, path)?;
289        fs::write(&resolved, content)
290            .map_err(|e| format!("write {}: {}", resolved.display(), e))?;
291        Ok(format!("wrote {} bytes to {}", content.len(), resolved.display()))
292    }
293}
294
295// ------------------------------------------------------------------------------------------------
296// ListDir
297// ------------------------------------------------------------------------------------------------
298
299pub struct ListDir {
300    sandbox: Option<Arc<FilesystemRoot>>,
301}
302
303impl Default for ListDir {
304    fn default() -> Self { Self::new() }
305}
306
307impl ListDir {
308    pub fn new() -> Self { Self { sandbox: None } }
309    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
310        Self { sandbox: Some(sandbox) }
311    }
312}
313
314impl Tool for ListDir {
315    fn name(&self) -> &str { "list_dir" }
316    fn description(&self) -> &str {
317        "List a directory. One entry per line as 'TYPE NAME' where TYPE is F (file), D (dir), or L (symlink)."
318    }
319    fn schema(&self) -> Value {
320        json!({
321            "type": "object",
322            "properties": {
323                "path": { "type": "string" }
324            },
325            "required": ["path"]
326        })
327    }
328    fn call(&self, args: Value) -> Result<String, String> {
329        let path = args["path"].as_str().ok_or("missing path")?;
330        let resolved = resolve_path(&self.sandbox, path)?;
331        let mut out = String::new();
332        for entry in fs::read_dir(&resolved)
333            .map_err(|e| format!("read_dir {}: {}", resolved.display(), e))?
334        {
335            let e = entry.map_err(|e| e.to_string())?;
336            let ft = e.file_type().map_err(|e| e.to_string())?;
337            let tag = if ft.is_dir() { 'D' } else if ft.is_symlink() { 'L' } else { 'F' };
338            out.push_str(&format!("{} {}\n", tag, e.file_name().to_string_lossy()));
339        }
340        Ok(out)
341    }
342}
343
344// ------------------------------------------------------------------------------------------------
345// Shell (feature = "shell") — CVE-class dangerous, opt-in only
346// ------------------------------------------------------------------------------------------------
347
348/// Execute a shell-like command **without** invoking `sh -c`.
349///
350/// # !!! CVE-class dangerous !!!
351///
352/// This tool can execute arbitrary commands the LLM chooses. It is CVE-class
353/// dangerous and must be paired with OS-level isolation (containers, seccomp,
354/// bubblewrap, unshare, VMs — whatever is appropriate for the host). The
355/// argv[0] allowlist implemented here is defense-in-depth, **not** a primary
356/// security boundary.
357///
358/// Construction requires [`Shell::new_sandboxed`] — there is no "default"
359/// constructor because there is no safe default. The allowlist and working
360/// directory must be explicit.
361///
362/// ## What this tool guarantees
363///
364/// - `cmd` is parsed with `shell-words` (POSIX word splitting).
365/// - `argv[0]` must appear in the caller-supplied `allowed_argv0` list.
366/// - Any token containing `$`, `` ` ``, `|`, `;`, `&`, `>`, `<`, `(`, `)`, or
367///   a newline is rejected (defense-in-depth against unexpected shell-ish
368///   metacharacters that `shell-words` happens to pass through as literal
369///   tokens).
370/// - Execution uses `std::process::Command::new(argv[0]).args(&argv[1..])` —
371///   **no `sh -c`**. There is no command-substitution / glob-expansion /
372///   env-expansion surface inside this process.
373/// - Working directory is pinned via `current_dir(&self.cwd)`.
374///
375/// ## What this tool does NOT guarantee
376///
377/// - The executed binary itself may be dangerous (e.g. `git clean -fdx`).
378/// - The binary may spawn subprocesses or shells of its own.
379/// - File-descriptor inheritance, environment variables, and kernel syscalls
380///   are unrestricted. Pair with OS-level isolation.
381#[cfg(feature = "shell")]
382pub struct Shell {
383    allowed_argv0: Vec<String>,
384    cwd: PathBuf,
385    /// v0.3 C1: optional bubblewrap configuration. When set, the call
386    /// wraps the spawned command in `bwrap` with a read-only rootfs and a
387    /// scoped bind mount of `cwd`. Linux only.
388    #[cfg(all(feature = "bwrap-shell", target_os = "linux"))]
389    bwrap: Option<BwrapConfig>,
390}
391
392/// Bubblewrap configuration for the Shell tool (v0.3 C1).
393///
394/// Wraps the allowed command in `bwrap` with:
395/// - `--ro-bind /usr /usr`, `--ro-bind /bin /bin`, `--ro-bind /lib /lib`,
396///   `--ro-bind /lib64 /lib64`, `--ro-bind /etc /etc` (when present)
397/// - `--bind <cwd> <cwd>` (read-write bind of the sandboxed working dir)
398/// - `--tmpfs /tmp`, `--proc /proc`, `--dev /dev`
399/// - `--unshare-all` optionally modified by `share_net`
400/// - `--die-with-parent`
401/// - `--chdir <cwd>`
402#[cfg(all(feature = "bwrap-shell", target_os = "linux"))]
403#[derive(Debug, Clone)]
404pub struct BwrapConfig {
405    /// Whether to share the host's network namespace with the sandboxed
406    /// process. Set to `false` to deny network access entirely — at which
407    /// point every network-reaching command (`curl`, `git fetch`, etc.)
408    /// will fail inside the sandbox.
409    pub share_net: bool,
410}
411
412#[cfg(feature = "shell")]
413impl Shell {
414    /// Construct a sandboxed Shell tool.
415    ///
416    /// - `allowed_argv0`: the exact list of program names that may appear as
417    ///   `argv[0]`. Matched as a literal string — no path resolution. Put
418    ///   full paths here if you want to pin to `/usr/bin/git`.
419    /// - `cwd`: the working directory every spawned process runs in. The
420    ///   tool does not honour relative-path arguments from the model.
421    ///
422    /// # Safety
423    ///
424    /// This constructor is not `unsafe` in the Rust sense but carries the
425    /// CVE-class warning from the struct-level docs. Do not call it without
426    /// OS-level isolation in place.
427    pub fn new_sandboxed(allowed_argv0: Vec<String>, cwd: PathBuf) -> Self {
428        Self {
429            allowed_argv0,
430            cwd,
431            #[cfg(all(feature = "bwrap-shell", target_os = "linux"))]
432            bwrap: None,
433        }
434    }
435
436    /// v0.3 C1: construct a Shell wrapped in bubblewrap for OS-level
437    /// defense-in-depth on top of the argv allowlist.
438    ///
439    /// Returns an error if `bwrap` is not available on the host PATH.
440    /// Only available when the `bwrap-shell` feature is enabled AND the
441    /// target is Linux — `bwrap` is a Linux-only tool.
442    ///
443    /// The sandbox bind-mounts `cwd` read-write; everything else under
444    /// `/usr`, `/bin`, `/lib`, `/lib64`, `/etc` is read-only. `/tmp` is a
445    /// tmpfs. No `/home`, no `/var`, no `/root` — set `cwd` to a
446    /// scratch directory and bind-mount additional paths manually by
447    /// extending [`BwrapConfig`] in your fork if you need more.
448    ///
449    /// This is CVE-class and must be paired with the v0.2 argv allowlist.
450    /// The sandbox is defense in depth, NOT a primary boundary.
451    #[cfg(all(feature = "bwrap-shell", target_os = "linux"))]
452    pub fn new_bwrap(
453        allowed_argv0: Vec<String>,
454        cwd: PathBuf,
455        share_net: bool,
456    ) -> Result<Self, String> {
457        // Probe for bwrap on PATH.
458        let probe = std::process::Command::new("bwrap")
459            .arg("--version")
460            .output()
461            .map_err(|e| format!("bwrap not available: {}", e))?;
462        if !probe.status.success() {
463            return Err(format!(
464                "bwrap --version exited {}",
465                probe.status.code().unwrap_or(-1)
466            ));
467        }
468        Ok(Self {
469            allowed_argv0,
470            cwd,
471            bwrap: Some(BwrapConfig { share_net }),
472        })
473    }
474
475    /// Non-Linux stub for `new_bwrap` — compile-error at call time via a
476    /// clear message rather than hiding the method entirely. Only compiled
477    /// when the `bwrap-shell` feature is enabled on a non-Linux target.
478    #[cfg(all(feature = "bwrap-shell", not(target_os = "linux")))]
479    pub fn new_bwrap(
480        _allowed_argv0: Vec<String>,
481        _cwd: PathBuf,
482        _share_net: bool,
483    ) -> Result<Self, String> {
484        Err("bwrap sandbox is Linux-only".into())
485    }
486
487    /// Build the bwrap argv vector that wraps `argv` with this shell's
488    /// bubblewrap config. Pure function, no I/O — used by both `call()`
489    /// and the unit tests.
490    #[cfg(all(feature = "bwrap-shell", target_os = "linux"))]
491    fn build_bwrap_argv(
492        cfg: &BwrapConfig,
493        cwd: &std::path::Path,
494        argv: &[String],
495    ) -> Vec<String> {
496        let cwd_str = cwd.to_string_lossy().into_owned();
497        let mut out: Vec<String> = vec![
498            "--ro-bind".into(), "/usr".into(), "/usr".into(),
499            "--ro-bind".into(), "/bin".into(), "/bin".into(),
500            "--ro-bind-try".into(), "/lib".into(), "/lib".into(),
501            "--ro-bind-try".into(), "/lib64".into(), "/lib64".into(),
502            "--ro-bind-try".into(), "/etc".into(), "/etc".into(),
503            "--bind".into(), cwd_str.clone(), cwd_str.clone(),
504            "--tmpfs".into(), "/tmp".into(),
505            "--proc".into(), "/proc".into(),
506            "--dev".into(), "/dev".into(),
507            "--unshare-all".into(),
508        ];
509        if cfg.share_net {
510            out.push("--share-net".into());
511        }
512        out.push("--die-with-parent".into());
513        out.push("--chdir".into());
514        out.push(cwd_str);
515        out.push("--".into());
516        out.extend(argv.iter().cloned());
517        out
518    }
519}
520
521#[cfg(feature = "shell")]
522const SHELL_FORBIDDEN_CHARS: &[char] =
523    &['$', '`', '|', ';', '&', '>', '<', '(', ')', '\n'];
524
525#[cfg(feature = "shell")]
526impl Tool for Shell {
527    fn name(&self) -> &str { "shell" }
528    fn description(&self) -> &str {
529        "Run a program with arguments. The command is parsed with shell-words; argv[0] must be in the caller's allowlist; no sh -c, no command substitution, no pipes. Prefer specialized tools (read_file, grep, glob, fetch) over this."
530    }
531    fn schema(&self) -> Value {
532        json!({
533            "type": "object",
534            "properties": {
535                "cmd": { "type": "string", "description": "command line (e.g. 'git status' or 'cargo build --release')" }
536            },
537            "required": ["cmd"]
538        })
539    }
540    fn call(&self, args: Value) -> Result<String, String> {
541        let cmd = args["cmd"].as_str().ok_or("missing cmd")?;
542        let argv = shell_words::split(cmd)
543            .map_err(|e| format!("shell parse: {}", e))?;
544        if argv.is_empty() {
545            return Err("empty command".into());
546        }
547        for tok in &argv {
548            if let Some(bad) = tok.chars().find(|c| SHELL_FORBIDDEN_CHARS.contains(c)) {
549                return Err(format!(
550                    "token contains forbidden character {:?}: {}",
551                    bad, tok
552                ));
553            }
554        }
555        let argv0 = &argv[0];
556        if !self.allowed_argv0.iter().any(|a| a == argv0) {
557            return Err(format!(
558                "argv[0] {:?} not in allowlist {:?}",
559                argv0, self.allowed_argv0
560            ));
561        }
562
563        // v0.3 C1: when bwrap is configured, wrap the command. Otherwise
564        // spawn directly with the argv allowlist constraint only.
565        #[cfg(all(feature = "bwrap-shell", target_os = "linux"))]
566        let out = if let Some(cfg) = &self.bwrap {
567            let bwrap_argv = Self::build_bwrap_argv(cfg, &self.cwd, &argv);
568            std::process::Command::new("bwrap")
569                .args(&bwrap_argv)
570                .output()
571                .map_err(|e| format!("bwrap spawn: {}", e))?
572        } else {
573            std::process::Command::new(argv0)
574                .args(&argv[1..])
575                .current_dir(&self.cwd)
576                .output()
577                .map_err(|e| format!("spawn: {}", e))?
578        };
579
580        #[cfg(not(all(feature = "bwrap-shell", target_os = "linux")))]
581        let out = std::process::Command::new(argv0)
582            .args(&argv[1..])
583            .current_dir(&self.cwd)
584            .output()
585            .map_err(|e| format!("spawn: {}", e))?;
586        let status = out
587            .status
588            .code()
589            .map(|c| c.to_string())
590            .unwrap_or_else(|| "signal".into());
591        Ok(format!(
592            "exit: {}\n--- stdout ---\n{}--- stderr ---\n{}",
593            status,
594            String::from_utf8_lossy(&out.stdout),
595            String::from_utf8_lossy(&out.stderr),
596        ))
597    }
598}
599
600// ------------------------------------------------------------------------------------------------
601// Glob
602// ------------------------------------------------------------------------------------------------
603
604pub struct Glob {
605    sandbox: Option<Arc<FilesystemRoot>>,
606}
607
608impl Default for Glob {
609    fn default() -> Self { Self::new() }
610}
611
612impl Glob {
613    pub fn new() -> Self { Self { sandbox: None } }
614    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
615        Self { sandbox: Some(sandbox) }
616    }
617}
618
619impl Tool for Glob {
620    fn name(&self) -> &str { "glob" }
621    fn description(&self) -> &str {
622        "Find files matching a shell-style glob pattern (e.g. 'src/**/*.rs', '**/Cargo.toml'). Returns one path per line. Prefer this over 'shell find' — it is faster, portable across OSes, and has no command-injection surface."
623    }
624    fn schema(&self) -> Value {
625        json!({
626            "type": "object",
627            "properties": {
628                "pattern": { "type": "string", "description": "glob pattern (must be relative to the sandbox root when sandboxed)" }
629            },
630            "required": ["pattern"]
631        })
632    }
633    fn call(&self, args: Value) -> Result<String, String> {
634        let pattern = args["pattern"].as_str().ok_or("missing pattern")?;
635
636        // When sandboxed, require the pattern to be a relative path under the
637        // root. We anchor by joining the pattern onto the root and checking
638        // that it stays under the root (which also rejects `..`).
639        let (effective_pattern, root_strip): (String, Option<PathBuf>) = match &self.sandbox {
640            Some(s) => {
641                if Path::new(pattern).is_absolute() {
642                    return Err(format!(
643                        "glob pattern must be relative when sandboxed: {}",
644                        pattern
645                    ));
646                }
647                if pattern.split('/').any(|seg| seg == "..") {
648                    return Err(format!("glob pattern contains '..': {}", pattern));
649                }
650                let joined = s.root().join(pattern);
651                let eff = joined.to_string_lossy().into_owned();
652                (eff, Some(s.root().to_path_buf()))
653            }
654            None => (pattern.to_string(), None),
655        };
656
657        let mut out = String::new();
658        let mut count = 0usize;
659        for entry in glob::glob(&effective_pattern).map_err(|e| format!("glob: {}", e))? {
660            let p = match entry {
661                Ok(p) => p,
662                Err(_) => continue,
663            };
664            // Double-check sandbox containment post-expansion (defensive —
665            // glob should never escape, but symlinks could surface).
666            if let Some(root) = &root_strip {
667                if let Ok(canonical) = std::fs::canonicalize(&p) {
668                    if !canonical.starts_with(root) {
669                        continue;
670                    }
671                }
672            }
673            out.push_str(&p.to_string_lossy());
674            out.push('\n');
675            count += 1;
676            if count >= 2000 {
677                out.push_str("(truncated at 2000)\n");
678                break;
679            }
680        }
681        if out.is_empty() {
682            Ok("(no matches)".into())
683        } else {
684            Ok(out)
685        }
686    }
687}
688
689// ------------------------------------------------------------------------------------------------
690// Grep
691// ------------------------------------------------------------------------------------------------
692
693pub struct Grep {
694    sandbox: Option<Arc<FilesystemRoot>>,
695}
696
697impl Default for Grep {
698    fn default() -> Self { Self::new() }
699}
700
701impl Grep {
702    pub fn new() -> Self { Self { sandbox: None } }
703    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
704        Self { sandbox: Some(sandbox) }
705    }
706}
707
708impl Tool for Grep {
709    fn name(&self) -> &str { "grep" }
710    fn description(&self) -> &str {
711        "Search text files under a directory for a regex pattern. Returns 'path:line:text' per match. Optional 'ext' filter (e.g. 'rs', 'md'). Prefer this over 'shell grep' — it is native, typically under 1ms for a source tree, and avoids quoting pitfalls."
712    }
713    fn schema(&self) -> Value {
714        json!({
715            "type": "object",
716            "properties": {
717                "pattern": { "type": "string", "description": "regex pattern" },
718                "path":    { "type": "string", "description": "root directory to walk" },
719                "ext":     { "type": "string", "description": "optional file extension filter without dot" }
720            },
721            "required": ["pattern", "path"]
722        })
723    }
724    fn call(&self, args: Value) -> Result<String, String> {
725        let pattern = args["pattern"].as_str().ok_or("missing pattern")?;
726        let path = args["path"].as_str().ok_or("missing path")?;
727        let ext = args["ext"].as_str();
728        let resolved = resolve_path(&self.sandbox, path)?;
729        let re = regex::Regex::new(pattern).map_err(|e| format!("regex: {}", e))?;
730        let mut out = String::new();
731        let mut count = 0usize;
732        for entry in walkdir::WalkDir::new(&resolved)
733            .into_iter()
734            .filter_map(|e| e.ok())
735        {
736            if !entry.file_type().is_file() { continue; }
737            if let Some(e) = ext {
738                if entry.path().extension().and_then(|s| s.to_str()) != Some(e) { continue; }
739            }
740            // When sandboxed, skip any file whose canonical path escapes the root.
741            if let Some(sbx) = &self.sandbox {
742                if let Ok(canonical) = std::fs::canonicalize(entry.path()) {
743                    if !canonical.starts_with(sbx.root()) {
744                        continue;
745                    }
746                }
747            }
748            let content = match fs::read_to_string(entry.path()) {
749                Ok(c) => c,
750                Err(_) => continue,
751            };
752            for (i, line) in content.lines().enumerate() {
753                if re.is_match(line) {
754                    out.push_str(&format!("{}:{}:{}\n", entry.path().display(), i + 1, line));
755                    count += 1;
756                    if count >= 500 {
757                        out.push_str("(truncated at 500 matches)\n");
758                        return Ok(out);
759                    }
760                }
761            }
762        }
763        if out.is_empty() {
764            Ok("(no matches)".into())
765        } else {
766            Ok(out)
767        }
768    }
769}
770
771// ------------------------------------------------------------------------------------------------
772// Fetch — SSRF guarded (S3)
773// ------------------------------------------------------------------------------------------------
774
775/// HTTP GET a URL with an atomic SSRF guard.
776///
777/// v0.3.1 closes the v0.2/v0.3 two-phase TOCTOU by installing a custom
778/// [`ureq::Resolver`] ([`crate::ssrf::SsrfResolver`]) on the underlying
779/// agent. ureq calls the resolver exactly once per connection, uses the
780/// exact addresses it returns, and never performs a second DNS lookup.
781/// That removes the DNS-rebinding window a short-TTL authority could
782/// previously use to flip a public check-time IP to a private
783/// request-time IP.
784///
785/// Each `Fetch` instance lazily builds its own `ureq::Agent` on first
786/// call, so a per-instance `allow_hosts` allowlist composes cleanly.
787/// Redirects are disabled (`redirects(0)`) so a `302 Location:` hop
788/// cannot bypass the resolver.
789///
790/// URL-shape validation (scheme allowlist, parsing) still happens
791/// up-front in `Fetch::call` because the resolver only sees the
792/// `host:port` netloc, not the scheme.
793pub struct Fetch {
794    allow_hosts: Option<Vec<String>>,
795    max_bytes: usize,
796    // Lazily initialised so the tool is cheap to construct and so the
797    // agent's resolver captures the final allow_hosts configured via the
798    // builder-style setters.
799    agent: std::sync::OnceLock<ureq::Agent>,
800}
801
802const FETCH_DEFAULT_MAX: usize = 64 * 1024;
803
804impl Default for Fetch {
805    fn default() -> Self { Self::new() }
806}
807
808impl Fetch {
809    pub fn new() -> Self {
810        Self {
811            allow_hosts: None,
812            max_bytes: FETCH_DEFAULT_MAX,
813            agent: std::sync::OnceLock::new(),
814        }
815    }
816
817    /// Restrict fetches to an explicit host allowlist. Case-insensitive.
818    ///
819    /// The allowlist is enforced inside the custom resolver before any
820    /// DNS query is issued, so a rejected host never triggers a lookup.
821    pub fn with_allow_hosts(mut self, hosts: Vec<String>) -> Self {
822        self.allow_hosts = Some(hosts.into_iter().map(|h| h.to_lowercase()).collect());
823        self
824    }
825
826    /// Set the maximum number of response bytes to read. Defaults to 64KB.
827    pub fn with_max_bytes(mut self, n: usize) -> Self {
828        self.max_bytes = n;
829        self
830    }
831
832    /// Build the ureq agent for this Fetch instance. Installs
833    /// [`SsrfResolver`] so DNS resolution is atomic with validation.
834    ///
835    /// [`SsrfResolver`]: crate::ssrf::SsrfResolver
836    fn agent(&self) -> &ureq::Agent {
837        self.agent.get_or_init(|| {
838            let resolver = match &self.allow_hosts {
839                Some(list) => crate::ssrf::SsrfResolver::with_allow_hosts(list.clone()),
840                None => crate::ssrf::SsrfResolver::new(),
841            };
842            let builder = ureq::AgentBuilder::new()
843                .resolver(resolver)
844                .redirects(0);
845            match native_tls::TlsConnector::new() {
846                Ok(connector) => builder.tls_connector(Arc::new(connector)).build(),
847                Err(_) => builder.build(),
848            }
849        })
850    }
851}
852
853/// Upfront URL-shape validation. The atomic IP / host check lives in
854/// [`crate::ssrf::SsrfResolver`]; this function only catches things the
855/// resolver cannot see from a netloc alone — primarily the scheme and
856/// malformed URLs.
857fn fetch_url_shape_check(url: &str) -> Result<(), String> {
858    let parsed = url::Url::parse(url).map_err(|e| format!("url parse: {}", e))?;
859    let scheme = parsed.scheme();
860    if scheme != "http" && scheme != "https" {
861        return Err(format!("rejected scheme: {}", scheme));
862    }
863    if parsed.host_str().is_none() {
864        return Err("url has no host".to_string());
865    }
866    Ok(())
867}
868
869impl Tool for Fetch {
870    fn name(&self) -> &str { "fetch" }
871    fn description(&self) -> &str {
872        "HTTP GET a URL and return the response body (first 64KB by default). Rejects loopback / private / link-local / metadata hosts atomically via a custom DNS resolver."
873    }
874    fn schema(&self) -> Value {
875        json!({
876            "type": "object",
877            "properties": {
878                "url": { "type": "string" }
879            },
880            "required": ["url"]
881        })
882    }
883    fn call(&self, args: Value) -> Result<String, String> {
884        use std::io::Read;
885        let url = args["url"].as_str().ok_or("missing url")?;
886        fetch_url_shape_check(url)?;
887        let resp = self
888            .agent()
889            .get(url)
890            .call()
891            .map_err(|e| format!("fetch: {}", e))?;
892        let status = resp.status();
893        let mut body = String::new();
894        resp.into_reader()
895            .take(self.max_bytes as u64)
896            .read_to_string(&mut body)
897            .map_err(|e| format!("read: {}", e))?;
898        Ok(format!("HTTP {}\n{}", status, body))
899    }
900}
901
902// ------------------------------------------------------------------------------------------------
903// Tests
904// ------------------------------------------------------------------------------------------------
905
906#[cfg(test)]
907mod tests {
908    use super::*;
909    use std::fs;
910
911    fn tmpdir(tag: &str) -> PathBuf {
912        let d = std::env::temp_dir().join(format!(
913            "agnt-tools-{}-{}-{}",
914            tag,
915            std::process::id(),
916            std::time::SystemTime::now()
917                .duration_since(std::time::UNIX_EPOCH)
918                .map(|d| d.as_nanos())
919                .unwrap_or(0)
920        ));
921        fs::create_dir_all(&d).unwrap();
922        d
923    }
924
925    // ---- S2: sandbox enforcement ----------------------------------------------------------
926
927    #[test]
928    fn sandbox_blocks_read_of_etc_shadow() {
929        let dir = tmpdir("sbx-read");
930        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
931        let tool = ReadFile::with_sandbox(sbx);
932        let res = tool.call(json!({"path":"/etc/shadow"}));
933        assert!(res.is_err(), "expected sandbox rejection");
934    }
935
936    #[test]
937    fn sandbox_blocks_write_outside_root() {
938        let dir = tmpdir("sbx-write");
939        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
940        let tool = WriteFile::with_sandbox(sbx);
941        let res = tool.call(json!({"path":"../escape.txt","content":"x"}));
942        assert!(res.is_err());
943    }
944
945    #[test]
946    fn sandbox_allows_read_under_root() {
947        let dir = tmpdir("sbx-ok");
948        fs::write(dir.join("hello.txt"), "world").unwrap();
949        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
950        let tool = ReadFile::with_sandbox(sbx);
951        let out = tool.call(json!({"path":"hello.txt"})).unwrap();
952        assert_eq!(out, "world");
953    }
954
955    #[test]
956    fn sandbox_blocks_listdir_of_root() {
957        let dir = tmpdir("sbx-ls");
958        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
959        let tool = ListDir::with_sandbox(sbx);
960        assert!(tool.call(json!({"path":"/"})).is_err());
961    }
962
963    #[test]
964    fn sandbox_blocks_glob_absolute() {
965        let dir = tmpdir("sbx-glob");
966        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
967        let tool = Glob::with_sandbox(sbx);
968        assert!(tool.call(json!({"pattern":"/etc/*"})).is_err());
969    }
970
971    #[test]
972    fn sandbox_blocks_glob_parent_traversal() {
973        let dir = tmpdir("sbx-glob2");
974        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
975        let tool = Glob::with_sandbox(sbx);
976        assert!(tool.call(json!({"pattern":"../*"})).is_err());
977    }
978
979    #[test]
980    fn sandbox_blocks_grep_root() {
981        let dir = tmpdir("sbx-grep");
982        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
983        let tool = Grep::with_sandbox(sbx);
984        assert!(tool.call(json!({"pattern":"root:","path":"/etc"})).is_err());
985    }
986
987    // ---- S3: Fetch SSRF guard -------------------------------------------------------------
988
989    #[test]
990    fn fetch_rejects_aws_metadata_ip() {
991        let tool = Fetch::new();
992        let err = tool
993            .call(json!({"url":"http://169.254.169.254/latest/meta-data/"}))
994            .unwrap_err();
995        assert!(err.contains("metadata") || err.contains("link") || err.contains("169.254"));
996    }
997
998    #[test]
999    fn fetch_rejects_gcp_metadata_name() {
1000        let tool = Fetch::new();
1001        let err = tool
1002            .call(json!({"url":"http://metadata.google.internal/"}))
1003            .unwrap_err();
1004        assert!(err.contains("metadata"));
1005    }
1006
1007    #[test]
1008    fn fetch_rejects_loopback() {
1009        let tool = Fetch::new();
1010        let err = tool.call(json!({"url":"http://127.0.0.1:11434/"})).unwrap_err();
1011        assert!(err.contains("IP") || err.contains("loopback") || err.contains("127"));
1012    }
1013
1014    #[test]
1015    fn fetch_rejects_private_ipv4() {
1016        let tool = Fetch::new();
1017        let err = tool.call(json!({"url":"http://192.168.1.1/"})).unwrap_err();
1018        assert!(err.contains("IPv4") || err.contains("192.168") || err.contains("private"));
1019    }
1020
1021    #[test]
1022    fn fetch_rejects_file_scheme() {
1023        let tool = Fetch::new();
1024        let err = tool.call(json!({"url":"file:///etc/passwd"})).unwrap_err();
1025        assert!(err.contains("scheme"));
1026    }
1027
1028    #[test]
1029    fn fetch_rejects_localhost_name() {
1030        let tool = Fetch::new();
1031        let err = tool.call(json!({"url":"http://localhost:6379/"})).unwrap_err();
1032        assert!(err.contains("IP") || err.contains("loopback") || err.contains("127"));
1033    }
1034
1035    #[test]
1036    fn fetch_allowlist_blocks_non_matching_host_before_dns() {
1037        let tool = Fetch::new().with_allow_hosts(vec!["example.com".into()]);
1038        let err = tool.call(json!({"url":"http://metadata.google.internal/"})).unwrap_err();
1039        // metadata is blocked by explicit list first; check allowlist on benign host
1040        assert!(err.contains("metadata"));
1041        let tool2 = Fetch::new().with_allow_hosts(vec!["example.com".into()]);
1042        let err2 = tool2.call(json!({"url":"http://not-on-list.invalid/"})).unwrap_err();
1043        assert!(err2.contains("allowlist") || err2.contains("not-on-list"));
1044    }
1045
1046    #[test]
1047    fn fetch_uses_ssrf_resolver_atomically() {
1048        // v0.3.1: ureq's custom Resolver is the ONLY DNS path the agent
1049        // uses. This test verifies the wired-up agent rejects a private
1050        // IP via the resolver (not the old pre-check) by ensuring the
1051        // returned error carries the resolver's message.
1052        //
1053        // 10.0.0.1 isn't actually resolved by the system; we use a raw
1054        // IP so ToSocketAddrs skips DNS and hits validate_addrs directly,
1055        // proving the resolver is on the code path.
1056        let tool = Fetch::new();
1057        let err = tool.call(json!({"url":"http://10.0.0.1/"})).unwrap_err();
1058        assert!(
1059            err.contains("IPv4") || err.contains("10.0.0.1") || err.contains("private"),
1060            "error should come from SsrfResolver: {}",
1061            err
1062        );
1063    }
1064
1065    #[test]
1066    fn fetch_ipv6_literal_loopback_rejected() {
1067        let tool = Fetch::new();
1068        let err = tool.call(json!({"url":"http://[::1]/"})).unwrap_err();
1069        assert!(err.contains("loopback") || err.contains("::1"), "got: {}", err);
1070    }
1071
1072    #[test]
1073    fn fetch_ipv6_literal_ula_rejected() {
1074        let tool = Fetch::new();
1075        let err = tool.call(json!({"url":"http://[fc00::1]/"})).unwrap_err();
1076        assert!(err.contains("IPv6") || err.contains("fc00"), "got: {}", err);
1077    }
1078
1079    // ---- S6: EditFile atomicity ----------------------------------------------------------
1080
1081    #[test]
1082    fn edit_file_unique_match() {
1083        let dir = tmpdir("edit-unique");
1084        let p = dir.join("f.txt");
1085        fs::write(&p, "hello world").unwrap();
1086        let tool = EditFile::new();
1087        tool.call(json!({"path": p.to_str().unwrap(), "old":"world", "new":"agnt"})).unwrap();
1088        assert_eq!(fs::read_to_string(&p).unwrap(), "hello agnt");
1089    }
1090
1091    #[test]
1092    fn edit_file_concurrent_stress() {
1093        use std::sync::atomic::{AtomicUsize, Ordering};
1094        use std::thread;
1095
1096        let dir = tmpdir("edit-stress");
1097        let path = dir.join("race.txt");
1098        // Run 100 race-rounds, 4 threads each trying to replace the same
1099        // unique marker with their own value. Under the lock + atomic-rename
1100        // semantics, exactly one winner should be observed per round.
1101        for round in 0..100 {
1102            fs::write(&path, format!("start-{}-MARK-end", round)).unwrap();
1103            let winners = Arc::new(AtomicUsize::new(0));
1104            thread::scope(|s| {
1105                for tid in 0..4 {
1106                    let path = path.clone();
1107                    let winners = winners.clone();
1108                    s.spawn(move || {
1109                        let tool = EditFile::new();
1110                        let res = tool.call(json!({
1111                            "path": path.to_str().unwrap(),
1112                            "old": "MARK",
1113                            "new": format!("T{}", tid),
1114                        }));
1115                        if res.is_ok() {
1116                            winners.fetch_add(1, Ordering::SeqCst);
1117                        }
1118                    });
1119                }
1120            });
1121            assert_eq!(
1122                winners.load(Ordering::SeqCst),
1123                1,
1124                "expected exactly one winner per round, got {} on round {}",
1125                winners.load(Ordering::SeqCst),
1126                round
1127            );
1128            let final_content = fs::read_to_string(&path).unwrap();
1129            assert!(!final_content.contains("MARK"), "marker should be replaced");
1130        }
1131    }
1132
1133    // ---- S1: Shell feature gate (only runs when shell feature enabled) ------------------
1134
1135    #[cfg(feature = "shell")]
1136    #[test]
1137    fn shell_rejects_unknown_argv0() {
1138        let s = Shell::new_sandboxed(vec!["echo".into()], std::env::temp_dir());
1139        assert!(s.call(json!({"cmd":"rm -rf /"})).is_err());
1140    }
1141
1142    #[cfg(feature = "shell")]
1143    #[test]
1144    fn shell_rejects_command_substitution() {
1145        let s = Shell::new_sandboxed(vec!["echo".into()], std::env::temp_dir());
1146        // shell-words will keep $(...) as a single token; our char filter rejects $
1147        let err = s.call(json!({"cmd":"echo $(whoami)"})).unwrap_err();
1148        assert!(err.contains("forbidden"));
1149    }
1150
1151    #[cfg(feature = "shell")]
1152    #[test]
1153    fn shell_rejects_pipe() {
1154        let s = Shell::new_sandboxed(vec!["echo".into()], std::env::temp_dir());
1155        let err = s.call(json!({"cmd":"echo hi | cat"})).unwrap_err();
1156        assert!(err.contains("forbidden") || err.contains("allowlist"));
1157    }
1158
1159    #[cfg(feature = "shell")]
1160    #[test]
1161    fn shell_allowlisted_echo_runs() {
1162        let s = Shell::new_sandboxed(vec!["echo".into()], std::env::temp_dir());
1163        let out = s.call(json!({"cmd":"echo hello"})).unwrap();
1164        assert!(out.contains("hello"));
1165    }
1166
1167    // ---- C1: bubblewrap sandbox ---------------------------------------------------------
1168
1169    #[cfg(all(feature = "bwrap-shell", target_os = "linux"))]
1170    #[test]
1171    fn bwrap_argv_contains_core_ro_binds_and_unshare() {
1172        let cfg = BwrapConfig { share_net: false };
1173        let cwd = PathBuf::from("/tmp/workdir-xyz");
1174        let argv = vec!["echo".to_string(), "hi".to_string()];
1175        let out = Shell::build_bwrap_argv(&cfg, &cwd, &argv);
1176        // Core read-only system binds
1177        assert!(out.windows(3).any(|w| w == ["--ro-bind", "/usr", "/usr"]));
1178        assert!(out.windows(3).any(|w| w == ["--ro-bind", "/bin", "/bin"]));
1179        // Isolation
1180        assert!(out.iter().any(|s| s == "--unshare-all"));
1181        assert!(out.iter().any(|s| s == "--die-with-parent"));
1182        assert!(out.iter().any(|s| s == "--tmpfs"));
1183        // cwd gets bound and chdir'd
1184        assert!(out.windows(3).any(|w| w[0] == "--bind" && w[1] == "/tmp/workdir-xyz"));
1185        let chdir_pos = out.iter().position(|s| s == "--chdir").expect("chdir");
1186        assert_eq!(out[chdir_pos + 1], "/tmp/workdir-xyz");
1187        // `--` separator then the wrapped argv at the tail
1188        let sep = out.iter().rposition(|s| s == "--").expect("-- sep");
1189        assert_eq!(&out[sep + 1..], &["echo".to_string(), "hi".to_string()][..]);
1190    }
1191
1192    #[cfg(all(feature = "bwrap-shell", target_os = "linux"))]
1193    #[test]
1194    fn bwrap_share_net_flag_toggles() {
1195        let cwd = PathBuf::from("/tmp/nw");
1196        let argv = vec!["echo".to_string()];
1197        let off = Shell::build_bwrap_argv(&BwrapConfig { share_net: false }, &cwd, &argv);
1198        assert!(!off.iter().any(|s| s == "--share-net"));
1199        let on = Shell::build_bwrap_argv(&BwrapConfig { share_net: true }, &cwd, &argv);
1200        assert!(on.iter().any(|s| s == "--share-net"));
1201    }
1202
1203    #[cfg(all(feature = "bwrap-shell", target_os = "linux"))]
1204    #[test]
1205    #[ignore = "requires bwrap installed locally"]
1206    fn bwrap_echo_runs_under_sandbox() {
1207        let s = Shell::new_bwrap(vec!["echo".into()], std::env::temp_dir(), false)
1208            .expect("bwrap must be installed to run this test");
1209        let out = s.call(json!({"cmd":"echo sandboxed"})).unwrap();
1210        assert!(out.contains("sandboxed"));
1211    }
1212}