Skip to main content

agnt_tools/
builtins.rs

1//! Built-in tools for agnt agents.
2//!
3//! All filesystem tools optionally hold an `Arc<FilesystemRoot>`; when set,
4//! every user-supplied path is resolved through the sandbox before touching
5//! `std::fs`. Default (unsandboxed) constructors are still provided for
6//! development / REPL use, but their rustdoc carries an explicit warning.
7//!
8//! The [`Shell`] tool is gated behind the `shell` cargo feature and is
9//! documented as CVE-class dangerous. See [`Shell::new_sandboxed`].
10
11use agnt_core::Tool;
12use serde_json::{json, Value};
13use std::fs;
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16
17use crate::sandbox::FilesystemRoot;
18
19// ------------------------------------------------------------------------------------------------
20// ReadFile
21// ------------------------------------------------------------------------------------------------
22
23const READ_FILE_MAX: usize = 256 * 1024;
24
25/// Read a UTF-8 text file.
26///
27/// **Unsandboxed by default.** Without [`ReadFile::with_sandbox`] this tool
28/// can read any file the process has access to. Pair with a
29/// [`FilesystemRoot`] when exposing to untrusted LLM output.
30pub struct ReadFile {
31    sandbox: Option<Arc<FilesystemRoot>>,
32}
33
34impl Default for ReadFile {
35    fn default() -> Self { Self::new() }
36}
37
38impl ReadFile {
39    /// Unsandboxed constructor — full-host read access. Use only in trusted
40    /// contexts.
41    pub fn new() -> Self { Self { sandbox: None } }
42
43    /// Sandboxed constructor — paths are resolved against `sandbox` and
44    /// rejected if they escape the root.
45    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
46        Self { sandbox: Some(sandbox) }
47    }
48}
49
50fn resolve_path(sandbox: &Option<Arc<FilesystemRoot>>, input: &str) -> Result<PathBuf, String> {
51    match sandbox {
52        Some(s) => s.resolve(input),
53        None => Ok(PathBuf::from(input)),
54    }
55}
56
57impl Tool for ReadFile {
58    fn name(&self) -> &str { "read_file" }
59    fn description(&self) -> &str {
60        "Read a UTF-8 text file and return its contents. Truncated at 256KB. Prefer this over 'shell cat' — it is deterministic and cheaper."
61    }
62    fn schema(&self) -> Value {
63        json!({
64            "type": "object",
65            "properties": {
66                "path": { "type": "string", "description": "file path (must be under the agent sandbox root if one is configured)" }
67            },
68            "required": ["path"]
69        })
70    }
71    fn call(&self, args: Value) -> Result<String, String> {
72        let path = args["path"].as_str().ok_or("missing path")?;
73        let resolved = resolve_path(&self.sandbox, path)?;
74        let content = fs::read_to_string(&resolved)
75            .map_err(|e| format!("read {}: {}", resolved.display(), e))?;
76        if content.len() <= READ_FILE_MAX {
77            return Ok(content);
78        }
79        let mut cut = READ_FILE_MAX;
80        while cut > 0 && !content.is_char_boundary(cut) {
81            cut -= 1;
82        }
83        let mut out = content[..cut].to_string();
84        out.push_str(&format!(
85            "\n...(truncated at {} bytes; file is {} bytes total)",
86            cut,
87            content.len()
88        ));
89        Ok(out)
90    }
91}
92
93// ------------------------------------------------------------------------------------------------
94// EditFile — atomic (S6)
95// ------------------------------------------------------------------------------------------------
96
97/// Targeted file edit. Locks the file, re-reads under lock, verifies the
98/// unique-match invariant, writes to a temp sibling, and atomically renames
99/// into place — fixing the v0.1 TOCTOU race between read and write.
100///
101/// **Unsandboxed by default.** Use [`EditFile::with_sandbox`] when exposed to
102/// hostile LLM output.
103pub struct EditFile {
104    sandbox: Option<Arc<FilesystemRoot>>,
105}
106
107impl Default for EditFile {
108    fn default() -> Self { Self::new() }
109}
110
111impl EditFile {
112    pub fn new() -> Self { Self { sandbox: None } }
113    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
114        Self { sandbox: Some(sandbox) }
115    }
116}
117
118impl Tool for EditFile {
119    fn name(&self) -> &str { "edit_file" }
120    fn description(&self) -> &str {
121        "Targeted file edit. Replaces one exact occurrence of 'old' with 'new' in the file. Fails if 'old' is not found or appears more than once — in that case pass more surrounding context in 'old' to make it unique. Prefer this over write_file when changing a small part of an existing file."
122    }
123    fn schema(&self) -> Value {
124        json!({
125            "type": "object",
126            "properties": {
127                "path": { "type": "string" },
128                "old":  { "type": "string", "description": "exact text to find (must be unique in the file)" },
129                "new":  { "type": "string", "description": "replacement text" }
130            },
131            "required": ["path", "old", "new"]
132        })
133    }
134    fn call(&self, args: Value) -> Result<String, String> {
135        use fs2::FileExt;
136        use std::io::Write;
137
138        let path = args["path"].as_str().ok_or("missing path")?;
139        let old = args["old"].as_str().ok_or("missing old")?;
140        let new_s = args["new"].as_str().ok_or("missing new")?;
141        if old.is_empty() {
142            return Err("'old' must not be empty".into());
143        }
144
145        let resolved = resolve_path(&self.sandbox, path)?;
146
147        // Lock a stable sibling lockfile. Locking the target file directly
148        // does not work because atomic-rename swaps the inode — other waiters
149        // would hold locks on the orphaned pre-rename file descriptor and
150        // race past each other, clobbering the winner. The lockfile path is
151        // derived from the target filename and stays put across renames.
152        let lock_name = format!(
153            ".{}.agnt-edit.lock",
154            resolved
155                .file_name()
156                .and_then(|s| s.to_str())
157                .unwrap_or("edit")
158        );
159        let lock_path = resolved
160            .parent()
161            .map(|p| p.join(&lock_name))
162            .unwrap_or_else(|| PathBuf::from(&lock_name));
163
164        let lock_file = std::fs::OpenOptions::new()
165            .create(true)
166            .read(true)
167            .write(true)
168            .open(&lock_path)
169            .map_err(|e| format!("lock open {}: {}", lock_path.display(), e))?;
170
171        lock_file
172            .lock_exclusive()
173            .map_err(|e| format!("lock {}: {}", lock_path.display(), e))?;
174
175        // Re-read the target under the lock. Any other writer that held the
176        // lock before us has already renamed their result into place; we now
177        // see their updated bytes.
178        let perform = || -> Result<(String, String), String> {
179            let content = std::fs::read_to_string(&resolved)
180                .map_err(|e| format!("read {}: {}", resolved.display(), e))?;
181            let count = content.matches(old).count();
182            if count == 0 {
183                return Err(format!("'old' not found in {}", resolved.display()));
184            }
185            if count > 1 {
186                return Err(format!(
187                    "'old' appears {} times in {}; pass more surrounding context to make it unique",
188                    count,
189                    resolved.display()
190                ));
191            }
192            let updated = content.replacen(old, new_s, 1);
193
194            // Write to sibling .tmp and atomically rename.
195            let mut tmp = resolved.clone();
196            let tmp_name = format!(
197                "{}.agnt-edit-tmp.{}.{:?}",
198                resolved
199                    .file_name()
200                    .and_then(|s| s.to_str())
201                    .unwrap_or("edit"),
202                std::process::id(),
203                std::thread::current().id()
204            );
205            tmp.set_file_name(tmp_name);
206            {
207                let mut tmpf = std::fs::OpenOptions::new()
208                    .write(true)
209                    .create(true)
210                    .truncate(true)
211                    .open(&tmp)
212                    .map_err(|e| format!("tmp open {}: {}", tmp.display(), e))?;
213                tmpf.write_all(updated.as_bytes())
214                    .map_err(|e| format!("tmp write: {}", e))?;
215                tmpf.sync_all().map_err(|e| format!("tmp sync: {}", e))?;
216            }
217            std::fs::rename(&tmp, &resolved)
218                .map_err(|e| format!("rename {} -> {}: {}", tmp.display(), resolved.display(), e))?;
219
220            Ok((content, updated))
221        };
222
223        let res = perform();
224        // Release lock before dropping file (drop would also release, but be explicit).
225        let _ = lock_file.unlock();
226        drop(lock_file);
227
228        let (before, after) = res?;
229        Ok(format!(
230            "edited {} ({} bytes → {} bytes)",
231            resolved.display(),
232            before.len(),
233            after.len()
234        ))
235    }
236}
237
238// ------------------------------------------------------------------------------------------------
239// WriteFile
240// ------------------------------------------------------------------------------------------------
241
242pub struct WriteFile {
243    sandbox: Option<Arc<FilesystemRoot>>,
244}
245
246impl Default for WriteFile {
247    fn default() -> Self { Self::new() }
248}
249
250impl WriteFile {
251    pub fn new() -> Self { Self { sandbox: None } }
252    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
253        Self { sandbox: Some(sandbox) }
254    }
255}
256
257impl Tool for WriteFile {
258    fn name(&self) -> &str { "write_file" }
259    fn description(&self) -> &str { "Write UTF-8 content to a file, creating or overwriting it." }
260    fn schema(&self) -> Value {
261        json!({
262            "type": "object",
263            "properties": {
264                "path": { "type": "string" },
265                "content": { "type": "string" }
266            },
267            "required": ["path", "content"]
268        })
269    }
270    fn call(&self, args: Value) -> Result<String, String> {
271        let path = args["path"].as_str().ok_or("missing path")?;
272        let content = args["content"].as_str().ok_or("missing content")?;
273        let resolved = resolve_path(&self.sandbox, path)?;
274        fs::write(&resolved, content)
275            .map_err(|e| format!("write {}: {}", resolved.display(), e))?;
276        Ok(format!("wrote {} bytes to {}", content.len(), resolved.display()))
277    }
278}
279
280// ------------------------------------------------------------------------------------------------
281// ListDir
282// ------------------------------------------------------------------------------------------------
283
284pub struct ListDir {
285    sandbox: Option<Arc<FilesystemRoot>>,
286}
287
288impl Default for ListDir {
289    fn default() -> Self { Self::new() }
290}
291
292impl ListDir {
293    pub fn new() -> Self { Self { sandbox: None } }
294    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
295        Self { sandbox: Some(sandbox) }
296    }
297}
298
299impl Tool for ListDir {
300    fn name(&self) -> &str { "list_dir" }
301    fn description(&self) -> &str {
302        "List a directory. One entry per line as 'TYPE NAME' where TYPE is F (file), D (dir), or L (symlink)."
303    }
304    fn schema(&self) -> Value {
305        json!({
306            "type": "object",
307            "properties": {
308                "path": { "type": "string" }
309            },
310            "required": ["path"]
311        })
312    }
313    fn call(&self, args: Value) -> Result<String, String> {
314        let path = args["path"].as_str().ok_or("missing path")?;
315        let resolved = resolve_path(&self.sandbox, path)?;
316        let mut out = String::new();
317        for entry in fs::read_dir(&resolved)
318            .map_err(|e| format!("read_dir {}: {}", resolved.display(), e))?
319        {
320            let e = entry.map_err(|e| e.to_string())?;
321            let ft = e.file_type().map_err(|e| e.to_string())?;
322            let tag = if ft.is_dir() { 'D' } else if ft.is_symlink() { 'L' } else { 'F' };
323            out.push_str(&format!("{} {}\n", tag, e.file_name().to_string_lossy()));
324        }
325        Ok(out)
326    }
327}
328
329// ------------------------------------------------------------------------------------------------
330// Shell (feature = "shell") — CVE-class dangerous, opt-in only
331// ------------------------------------------------------------------------------------------------
332
333/// Execute a shell-like command **without** invoking `sh -c`.
334///
335/// # !!! CVE-class dangerous !!!
336///
337/// This tool can execute arbitrary commands the LLM chooses. It is CVE-class
338/// dangerous and must be paired with OS-level isolation (containers, seccomp,
339/// bubblewrap, unshare, VMs — whatever is appropriate for the host). The
340/// argv[0] allowlist implemented here is defense-in-depth, **not** a primary
341/// security boundary.
342///
343/// Construction requires [`Shell::new_sandboxed`] — there is no "default"
344/// constructor because there is no safe default. The allowlist and working
345/// directory must be explicit.
346///
347/// ## What this tool guarantees
348///
349/// - `cmd` is parsed with `shell-words` (POSIX word splitting).
350/// - `argv[0]` must appear in the caller-supplied `allowed_argv0` list.
351/// - Any token containing `$`, `` ` ``, `|`, `;`, `&`, `>`, `<`, `(`, `)`, or
352///   a newline is rejected (defense-in-depth against unexpected shell-ish
353///   metacharacters that `shell-words` happens to pass through as literal
354///   tokens).
355/// - Execution uses `std::process::Command::new(argv[0]).args(&argv[1..])` —
356///   **no `sh -c`**. There is no command-substitution / glob-expansion /
357///   env-expansion surface inside this process.
358/// - Working directory is pinned via `current_dir(&self.cwd)`.
359///
360/// ## What this tool does NOT guarantee
361///
362/// - The executed binary itself may be dangerous (e.g. `git clean -fdx`).
363/// - The binary may spawn subprocesses or shells of its own.
364/// - File-descriptor inheritance, environment variables, and kernel syscalls
365///   are unrestricted. Pair with OS-level isolation.
366#[cfg(feature = "shell")]
367pub struct Shell {
368    allowed_argv0: Vec<String>,
369    cwd: PathBuf,
370}
371
372#[cfg(feature = "shell")]
373impl Shell {
374    /// Construct a sandboxed Shell tool.
375    ///
376    /// - `allowed_argv0`: the exact list of program names that may appear as
377    ///   `argv[0]`. Matched as a literal string — no path resolution. Put
378    ///   full paths here if you want to pin to `/usr/bin/git`.
379    /// - `cwd`: the working directory every spawned process runs in. The
380    ///   tool does not honour relative-path arguments from the model.
381    ///
382    /// # Safety
383    ///
384    /// This constructor is not `unsafe` in the Rust sense but carries the
385    /// CVE-class warning from the struct-level docs. Do not call it without
386    /// OS-level isolation in place.
387    pub fn new_sandboxed(allowed_argv0: Vec<String>, cwd: PathBuf) -> Self {
388        Self { allowed_argv0, cwd }
389    }
390}
391
392#[cfg(feature = "shell")]
393const SHELL_FORBIDDEN_CHARS: &[char] =
394    &['$', '`', '|', ';', '&', '>', '<', '(', ')', '\n'];
395
396#[cfg(feature = "shell")]
397impl Tool for Shell {
398    fn name(&self) -> &str { "shell" }
399    fn description(&self) -> &str {
400        "Run a program with arguments. The command is parsed with shell-words; argv[0] must be in the caller's allowlist; no sh -c, no command substitution, no pipes. Prefer specialized tools (read_file, grep, glob, fetch) over this."
401    }
402    fn schema(&self) -> Value {
403        json!({
404            "type": "object",
405            "properties": {
406                "cmd": { "type": "string", "description": "command line (e.g. 'git status' or 'cargo build --release')" }
407            },
408            "required": ["cmd"]
409        })
410    }
411    fn call(&self, args: Value) -> Result<String, String> {
412        let cmd = args["cmd"].as_str().ok_or("missing cmd")?;
413        let argv = shell_words::split(cmd)
414            .map_err(|e| format!("shell parse: {}", e))?;
415        if argv.is_empty() {
416            return Err("empty command".into());
417        }
418        for tok in &argv {
419            if let Some(bad) = tok.chars().find(|c| SHELL_FORBIDDEN_CHARS.contains(c)) {
420                return Err(format!(
421                    "token contains forbidden character {:?}: {}",
422                    bad, tok
423                ));
424            }
425        }
426        let argv0 = &argv[0];
427        if !self.allowed_argv0.iter().any(|a| a == argv0) {
428            return Err(format!(
429                "argv[0] {:?} not in allowlist {:?}",
430                argv0, self.allowed_argv0
431            ));
432        }
433
434        let out = std::process::Command::new(argv0)
435            .args(&argv[1..])
436            .current_dir(&self.cwd)
437            .output()
438            .map_err(|e| format!("spawn: {}", e))?;
439        let status = out
440            .status
441            .code()
442            .map(|c| c.to_string())
443            .unwrap_or_else(|| "signal".into());
444        Ok(format!(
445            "exit: {}\n--- stdout ---\n{}--- stderr ---\n{}",
446            status,
447            String::from_utf8_lossy(&out.stdout),
448            String::from_utf8_lossy(&out.stderr),
449        ))
450    }
451}
452
453// ------------------------------------------------------------------------------------------------
454// Glob
455// ------------------------------------------------------------------------------------------------
456
457pub struct Glob {
458    sandbox: Option<Arc<FilesystemRoot>>,
459}
460
461impl Default for Glob {
462    fn default() -> Self { Self::new() }
463}
464
465impl Glob {
466    pub fn new() -> Self { Self { sandbox: None } }
467    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
468        Self { sandbox: Some(sandbox) }
469    }
470}
471
472impl Tool for Glob {
473    fn name(&self) -> &str { "glob" }
474    fn description(&self) -> &str {
475        "Find files matching a shell-style glob pattern (e.g. 'src/**/*.rs', '**/Cargo.toml'). Returns one path per line. Prefer this over 'shell find' — it is faster, portable across OSes, and has no command-injection surface."
476    }
477    fn schema(&self) -> Value {
478        json!({
479            "type": "object",
480            "properties": {
481                "pattern": { "type": "string", "description": "glob pattern (must be relative to the sandbox root when sandboxed)" }
482            },
483            "required": ["pattern"]
484        })
485    }
486    fn call(&self, args: Value) -> Result<String, String> {
487        let pattern = args["pattern"].as_str().ok_or("missing pattern")?;
488
489        // When sandboxed, require the pattern to be a relative path under the
490        // root. We anchor by joining the pattern onto the root and checking
491        // that it stays under the root (which also rejects `..`).
492        let (effective_pattern, root_strip): (String, Option<PathBuf>) = match &self.sandbox {
493            Some(s) => {
494                if Path::new(pattern).is_absolute() {
495                    return Err(format!(
496                        "glob pattern must be relative when sandboxed: {}",
497                        pattern
498                    ));
499                }
500                if pattern.split('/').any(|seg| seg == "..") {
501                    return Err(format!("glob pattern contains '..': {}", pattern));
502                }
503                let joined = s.root().join(pattern);
504                let eff = joined.to_string_lossy().into_owned();
505                (eff, Some(s.root().to_path_buf()))
506            }
507            None => (pattern.to_string(), None),
508        };
509
510        let mut out = String::new();
511        let mut count = 0usize;
512        for entry in glob::glob(&effective_pattern).map_err(|e| format!("glob: {}", e))? {
513            let p = match entry {
514                Ok(p) => p,
515                Err(_) => continue,
516            };
517            // Double-check sandbox containment post-expansion (defensive —
518            // glob should never escape, but symlinks could surface).
519            if let Some(root) = &root_strip {
520                if let Ok(canonical) = std::fs::canonicalize(&p) {
521                    if !canonical.starts_with(root) {
522                        continue;
523                    }
524                }
525            }
526            out.push_str(&p.to_string_lossy());
527            out.push('\n');
528            count += 1;
529            if count >= 2000 {
530                out.push_str("(truncated at 2000)\n");
531                break;
532            }
533        }
534        if out.is_empty() {
535            Ok("(no matches)".into())
536        } else {
537            Ok(out)
538        }
539    }
540}
541
542// ------------------------------------------------------------------------------------------------
543// Grep
544// ------------------------------------------------------------------------------------------------
545
546pub struct Grep {
547    sandbox: Option<Arc<FilesystemRoot>>,
548}
549
550impl Default for Grep {
551    fn default() -> Self { Self::new() }
552}
553
554impl Grep {
555    pub fn new() -> Self { Self { sandbox: None } }
556    pub fn with_sandbox(sandbox: Arc<FilesystemRoot>) -> Self {
557        Self { sandbox: Some(sandbox) }
558    }
559}
560
561impl Tool for Grep {
562    fn name(&self) -> &str { "grep" }
563    fn description(&self) -> &str {
564        "Search text files under a directory for a regex pattern. Returns 'path:line:text' per match. Optional 'ext' filter (e.g. 'rs', 'md'). Prefer this over 'shell grep' — it is native, typically under 1ms for a source tree, and avoids quoting pitfalls."
565    }
566    fn schema(&self) -> Value {
567        json!({
568            "type": "object",
569            "properties": {
570                "pattern": { "type": "string", "description": "regex pattern" },
571                "path":    { "type": "string", "description": "root directory to walk" },
572                "ext":     { "type": "string", "description": "optional file extension filter without dot" }
573            },
574            "required": ["pattern", "path"]
575        })
576    }
577    fn call(&self, args: Value) -> Result<String, String> {
578        let pattern = args["pattern"].as_str().ok_or("missing pattern")?;
579        let path = args["path"].as_str().ok_or("missing path")?;
580        let ext = args["ext"].as_str();
581        let resolved = resolve_path(&self.sandbox, path)?;
582        let re = regex::Regex::new(pattern).map_err(|e| format!("regex: {}", e))?;
583        let mut out = String::new();
584        let mut count = 0usize;
585        for entry in walkdir::WalkDir::new(&resolved)
586            .into_iter()
587            .filter_map(|e| e.ok())
588        {
589            if !entry.file_type().is_file() { continue; }
590            if let Some(e) = ext {
591                if entry.path().extension().and_then(|s| s.to_str()) != Some(e) { continue; }
592            }
593            // When sandboxed, skip any file whose canonical path escapes the root.
594            if let Some(sbx) = &self.sandbox {
595                if let Ok(canonical) = std::fs::canonicalize(entry.path()) {
596                    if !canonical.starts_with(sbx.root()) {
597                        continue;
598                    }
599                }
600            }
601            let content = match fs::read_to_string(entry.path()) {
602                Ok(c) => c,
603                Err(_) => continue,
604            };
605            for (i, line) in content.lines().enumerate() {
606                if re.is_match(line) {
607                    out.push_str(&format!("{}:{}:{}\n", entry.path().display(), i + 1, line));
608                    count += 1;
609                    if count >= 500 {
610                        out.push_str("(truncated at 500 matches)\n");
611                        return Ok(out);
612                    }
613                }
614            }
615        }
616        if out.is_empty() {
617            Ok("(no matches)".into())
618        } else {
619            Ok(out)
620        }
621    }
622}
623
624// ------------------------------------------------------------------------------------------------
625// Fetch — SSRF guarded (S3)
626// ------------------------------------------------------------------------------------------------
627
628/// HTTP GET a URL with an SSRF guard.
629///
630/// Rejects non-http(s) schemes, rejects URLs whose DNS resolution returns
631/// any IP in the loopback / private / link-local / unspecified / multicast
632/// ranges, and explicitly blocklists the cloud metadata endpoints. Redirects
633/// are disabled on the underlying ureq agent so attackers cannot bypass the
634/// resolved-IP check via `302 Location: http://169.254.169.254/…`.
635pub struct Fetch {
636    allow_hosts: Option<Vec<String>>,
637    max_bytes: usize,
638}
639
640const FETCH_DEFAULT_MAX: usize = 64 * 1024;
641
642impl Default for Fetch {
643    fn default() -> Self { Self::new() }
644}
645
646impl Fetch {
647    pub fn new() -> Self {
648        Self { allow_hosts: None, max_bytes: FETCH_DEFAULT_MAX }
649    }
650
651    /// Restrict fetches to an explicit host allowlist. When set, any URL
652    /// whose host (case-insensitive) is not in the list is rejected before
653    /// DNS resolution.
654    pub fn with_allow_hosts(mut self, hosts: Vec<String>) -> Self {
655        self.allow_hosts = Some(hosts.into_iter().map(|h| h.to_lowercase()).collect());
656        self
657    }
658
659    /// Set the maximum number of response bytes to read. Defaults to 64KB.
660    pub fn with_max_bytes(mut self, n: usize) -> Self {
661        self.max_bytes = n;
662        self
663    }
664}
665
666fn ssrf_check(url: &str, allow_hosts: &Option<Vec<String>>) -> Result<(), String> {
667    use std::net::ToSocketAddrs;
668
669    let parsed = url::Url::parse(url).map_err(|e| format!("url parse: {}", e))?;
670    let scheme = parsed.scheme();
671    if scheme != "http" && scheme != "https" {
672        return Err(format!("rejected scheme: {}", scheme));
673    }
674    let host = parsed
675        .host_str()
676        .ok_or_else(|| "url has no host".to_string())?
677        .to_lowercase();
678
679    // Explicit metadata blocklist (covers the name-based GCP endpoint that
680    // would otherwise resolve to a non-private IP that happens to be routable
681    // only from inside the VM).
682    if host == "metadata.google.internal" || host == "169.254.169.254" {
683        return Err(format!("rejected metadata host: {}", host));
684    }
685
686    if let Some(allow) = allow_hosts {
687        if !allow.iter().any(|h| h == &host) {
688            return Err(format!("host {} not in allowlist", host));
689        }
690    }
691
692    let port = parsed.port_or_known_default().unwrap_or(80);
693    let addrs = (host.as_str(), port)
694        .to_socket_addrs()
695        .map_err(|e| format!("resolve {}: {}", host, e))?;
696
697    let mut any = false;
698    for sa in addrs {
699        any = true;
700        let ip = sa.ip();
701        if ip.is_loopback() || ip.is_unspecified() || ip.is_multicast() {
702            return Err(format!("rejected IP {} for {}", ip, host));
703        }
704        match ip {
705            std::net::IpAddr::V4(v4) => {
706                if v4.is_private() || v4.is_link_local() || v4.is_broadcast() {
707                    return Err(format!("rejected IPv4 {} for {}", v4, host));
708                }
709                // 169.254.169.254 is already is_link_local; explicit belt-and-suspenders:
710                if v4.octets() == [169, 254, 169, 254] {
711                    return Err(format!("rejected AWS metadata IP for {}", host));
712                }
713            }
714            std::net::IpAddr::V6(v6) => {
715                // No stable is_private / is_unique_local on stable std yet.
716                // Reject ULA (fc00::/7) and link-local (fe80::/10) by prefix.
717                let seg0 = v6.segments()[0];
718                if (seg0 & 0xfe00) == 0xfc00 || (seg0 & 0xffc0) == 0xfe80 {
719                    return Err(format!("rejected IPv6 {} for {}", v6, host));
720                }
721            }
722        }
723    }
724    if !any {
725        return Err(format!("no addresses for {}", host));
726    }
727    Ok(())
728}
729
730impl Tool for Fetch {
731    fn name(&self) -> &str { "fetch" }
732    fn description(&self) -> &str {
733        "HTTP GET a URL and return the response body (first 64KB by default). Rejects loopback / private / link-local / metadata hosts."
734    }
735    fn schema(&self) -> Value {
736        json!({
737            "type": "object",
738            "properties": {
739                "url": { "type": "string" }
740            },
741            "required": ["url"]
742        })
743    }
744    fn call(&self, args: Value) -> Result<String, String> {
745        use std::io::Read;
746        let url = args["url"].as_str().ok_or("missing url")?;
747        ssrf_check(url, &self.allow_hosts)?;
748        let resp = crate::http::agent()
749            .get(url)
750            .call()
751            .map_err(|e| format!("fetch: {}", e))?;
752        let status = resp.status();
753        let mut body = String::new();
754        resp.into_reader()
755            .take(self.max_bytes as u64)
756            .read_to_string(&mut body)
757            .map_err(|e| format!("read: {}", e))?;
758        Ok(format!("HTTP {}\n{}", status, body))
759    }
760}
761
762// ------------------------------------------------------------------------------------------------
763// Tests
764// ------------------------------------------------------------------------------------------------
765
766#[cfg(test)]
767mod tests {
768    use super::*;
769    use std::fs;
770
771    fn tmpdir(tag: &str) -> PathBuf {
772        let d = std::env::temp_dir().join(format!(
773            "agnt-tools-{}-{}-{}",
774            tag,
775            std::process::id(),
776            std::time::SystemTime::now()
777                .duration_since(std::time::UNIX_EPOCH)
778                .map(|d| d.as_nanos())
779                .unwrap_or(0)
780        ));
781        fs::create_dir_all(&d).unwrap();
782        d
783    }
784
785    // ---- S2: sandbox enforcement ----------------------------------------------------------
786
787    #[test]
788    fn sandbox_blocks_read_of_etc_shadow() {
789        let dir = tmpdir("sbx-read");
790        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
791        let tool = ReadFile::with_sandbox(sbx);
792        let res = tool.call(json!({"path":"/etc/shadow"}));
793        assert!(res.is_err(), "expected sandbox rejection");
794    }
795
796    #[test]
797    fn sandbox_blocks_write_outside_root() {
798        let dir = tmpdir("sbx-write");
799        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
800        let tool = WriteFile::with_sandbox(sbx);
801        let res = tool.call(json!({"path":"../escape.txt","content":"x"}));
802        assert!(res.is_err());
803    }
804
805    #[test]
806    fn sandbox_allows_read_under_root() {
807        let dir = tmpdir("sbx-ok");
808        fs::write(dir.join("hello.txt"), "world").unwrap();
809        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
810        let tool = ReadFile::with_sandbox(sbx);
811        let out = tool.call(json!({"path":"hello.txt"})).unwrap();
812        assert_eq!(out, "world");
813    }
814
815    #[test]
816    fn sandbox_blocks_listdir_of_root() {
817        let dir = tmpdir("sbx-ls");
818        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
819        let tool = ListDir::with_sandbox(sbx);
820        assert!(tool.call(json!({"path":"/"})).is_err());
821    }
822
823    #[test]
824    fn sandbox_blocks_glob_absolute() {
825        let dir = tmpdir("sbx-glob");
826        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
827        let tool = Glob::with_sandbox(sbx);
828        assert!(tool.call(json!({"pattern":"/etc/*"})).is_err());
829    }
830
831    #[test]
832    fn sandbox_blocks_glob_parent_traversal() {
833        let dir = tmpdir("sbx-glob2");
834        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
835        let tool = Glob::with_sandbox(sbx);
836        assert!(tool.call(json!({"pattern":"../*"})).is_err());
837    }
838
839    #[test]
840    fn sandbox_blocks_grep_root() {
841        let dir = tmpdir("sbx-grep");
842        let sbx = Arc::new(FilesystemRoot::new(&dir).unwrap());
843        let tool = Grep::with_sandbox(sbx);
844        assert!(tool.call(json!({"pattern":"root:","path":"/etc"})).is_err());
845    }
846
847    // ---- S3: Fetch SSRF guard -------------------------------------------------------------
848
849    #[test]
850    fn fetch_rejects_aws_metadata_ip() {
851        let tool = Fetch::new();
852        let err = tool
853            .call(json!({"url":"http://169.254.169.254/latest/meta-data/"}))
854            .unwrap_err();
855        assert!(err.contains("metadata") || err.contains("link") || err.contains("169.254"));
856    }
857
858    #[test]
859    fn fetch_rejects_gcp_metadata_name() {
860        let tool = Fetch::new();
861        let err = tool
862            .call(json!({"url":"http://metadata.google.internal/"}))
863            .unwrap_err();
864        assert!(err.contains("metadata"));
865    }
866
867    #[test]
868    fn fetch_rejects_loopback() {
869        let tool = Fetch::new();
870        let err = tool.call(json!({"url":"http://127.0.0.1:11434/"})).unwrap_err();
871        assert!(err.contains("IP") || err.contains("loopback") || err.contains("127"));
872    }
873
874    #[test]
875    fn fetch_rejects_private_ipv4() {
876        let tool = Fetch::new();
877        let err = tool.call(json!({"url":"http://192.168.1.1/"})).unwrap_err();
878        assert!(err.contains("IPv4") || err.contains("192.168") || err.contains("private"));
879    }
880
881    #[test]
882    fn fetch_rejects_file_scheme() {
883        let tool = Fetch::new();
884        let err = tool.call(json!({"url":"file:///etc/passwd"})).unwrap_err();
885        assert!(err.contains("scheme"));
886    }
887
888    #[test]
889    fn fetch_rejects_localhost_name() {
890        let tool = Fetch::new();
891        let err = tool.call(json!({"url":"http://localhost:6379/"})).unwrap_err();
892        assert!(err.contains("IP") || err.contains("loopback") || err.contains("127"));
893    }
894
895    #[test]
896    fn fetch_allowlist_blocks_non_matching_host_before_dns() {
897        let tool = Fetch::new().with_allow_hosts(vec!["example.com".into()]);
898        let err = tool.call(json!({"url":"http://metadata.google.internal/"})).unwrap_err();
899        // metadata is blocked by explicit list first; check allowlist on benign host
900        assert!(err.contains("metadata"));
901        let tool2 = Fetch::new().with_allow_hosts(vec!["example.com".into()]);
902        let err2 = tool2.call(json!({"url":"http://not-on-list.invalid/"})).unwrap_err();
903        assert!(err2.contains("allowlist") || err2.contains("not-on-list"));
904    }
905
906    // ---- S6: EditFile atomicity ----------------------------------------------------------
907
908    #[test]
909    fn edit_file_unique_match() {
910        let dir = tmpdir("edit-unique");
911        let p = dir.join("f.txt");
912        fs::write(&p, "hello world").unwrap();
913        let tool = EditFile::new();
914        tool.call(json!({"path": p.to_str().unwrap(), "old":"world", "new":"agnt"})).unwrap();
915        assert_eq!(fs::read_to_string(&p).unwrap(), "hello agnt");
916    }
917
918    #[test]
919    fn edit_file_concurrent_stress() {
920        use std::sync::atomic::{AtomicUsize, Ordering};
921        use std::thread;
922
923        let dir = tmpdir("edit-stress");
924        let path = dir.join("race.txt");
925        // Run 100 race-rounds, 4 threads each trying to replace the same
926        // unique marker with their own value. Under the lock + atomic-rename
927        // semantics, exactly one winner should be observed per round.
928        for round in 0..100 {
929            fs::write(&path, format!("start-{}-MARK-end", round)).unwrap();
930            let winners = Arc::new(AtomicUsize::new(0));
931            thread::scope(|s| {
932                for tid in 0..4 {
933                    let path = path.clone();
934                    let winners = winners.clone();
935                    s.spawn(move || {
936                        let tool = EditFile::new();
937                        let res = tool.call(json!({
938                            "path": path.to_str().unwrap(),
939                            "old": "MARK",
940                            "new": format!("T{}", tid),
941                        }));
942                        if res.is_ok() {
943                            winners.fetch_add(1, Ordering::SeqCst);
944                        }
945                    });
946                }
947            });
948            assert_eq!(
949                winners.load(Ordering::SeqCst),
950                1,
951                "expected exactly one winner per round, got {} on round {}",
952                winners.load(Ordering::SeqCst),
953                round
954            );
955            let final_content = fs::read_to_string(&path).unwrap();
956            assert!(!final_content.contains("MARK"), "marker should be replaced");
957        }
958    }
959
960    // ---- S1: Shell feature gate (only runs when shell feature enabled) ------------------
961
962    #[cfg(feature = "shell")]
963    #[test]
964    fn shell_rejects_unknown_argv0() {
965        let s = Shell::new_sandboxed(vec!["echo".into()], std::env::temp_dir());
966        assert!(s.call(json!({"cmd":"rm -rf /"})).is_err());
967    }
968
969    #[cfg(feature = "shell")]
970    #[test]
971    fn shell_rejects_command_substitution() {
972        let s = Shell::new_sandboxed(vec!["echo".into()], std::env::temp_dir());
973        // shell-words will keep $(...) as a single token; our char filter rejects $
974        let err = s.call(json!({"cmd":"echo $(whoami)"})).unwrap_err();
975        assert!(err.contains("forbidden"));
976    }
977
978    #[cfg(feature = "shell")]
979    #[test]
980    fn shell_rejects_pipe() {
981        let s = Shell::new_sandboxed(vec!["echo".into()], std::env::temp_dir());
982        let err = s.call(json!({"cmd":"echo hi | cat"})).unwrap_err();
983        assert!(err.contains("forbidden") || err.contains("allowlist"));
984    }
985
986    #[cfg(feature = "shell")]
987    #[test]
988    fn shell_allowlisted_echo_runs() {
989        let s = Shell::new_sandboxed(vec!["echo".into()], std::env::temp_dir());
990        let out = s.call(json!({"cmd":"echo hello"})).unwrap();
991        assert!(out.contains("hello"));
992    }
993}