Skip to main content

agent_sdk_tools/
environment.rs

1use anyhow::{Context, Result, ensure};
2use async_trait::async_trait;
3use serde::{Deserialize, Serialize};
4use std::ffi::OsString;
5use std::path::{Component, Path, PathBuf};
6
7/// Entry in a directory listing
8#[derive(Clone, Debug, Serialize, Deserialize)]
9pub struct FileEntry {
10    pub name: String,
11    pub path: String,
12    pub is_dir: bool,
13    pub size: Option<u64>,
14}
15
16/// Match result from grep operation
17#[derive(Clone, Debug, Serialize, Deserialize)]
18pub struct GrepMatch {
19    pub path: String,
20    pub line_number: usize,
21    pub line_content: String,
22    pub match_start: usize,
23    pub match_end: usize,
24}
25
26/// Result from command execution
27#[derive(Clone, Debug, Serialize, Deserialize)]
28pub struct ExecResult {
29    pub stdout: String,
30    pub stderr: String,
31    pub exit_code: i32,
32}
33
34impl ExecResult {
35    #[must_use]
36    pub const fn success(&self) -> bool {
37        self.exit_code == 0
38    }
39}
40
41/// Environment abstraction for file and command operations.
42///
43/// The SDK's primitive tools (Read, Write, Grep, Glob, Bash) use this trait
44/// to interact with the underlying filesystem or storage backend.
45///
46/// Implementations:
47/// - `LocalFileSystem` - Standard filesystem (provided by SDK)
48/// - `InMemoryFileSystem` - For testing (provided by SDK)
49/// - Custom backends (S3, Git, iCloud, etc.)
50#[async_trait]
51pub trait Environment: Send + Sync {
52    /// Read file contents as UTF-8 string
53    ///
54    /// # Errors
55    /// Returns an error if the file cannot be read.
56    async fn read_file(&self, path: &str) -> Result<String>;
57
58    /// Read file contents as raw bytes
59    ///
60    /// # Errors
61    /// Returns an error if the file cannot be read.
62    async fn read_file_bytes(&self, path: &str) -> Result<Vec<u8>>;
63
64    /// Write string content to file (creates or overwrites)
65    ///
66    /// # Errors
67    /// Returns an error if the file cannot be written.
68    async fn write_file(&self, path: &str, content: &str) -> Result<()>;
69
70    /// Write raw bytes to file
71    ///
72    /// # Errors
73    /// Returns an error if the file cannot be written.
74    async fn write_file_bytes(&self, path: &str, content: &[u8]) -> Result<()>;
75
76    /// List directory contents
77    ///
78    /// # Errors
79    /// Returns an error if the directory cannot be read.
80    async fn list_dir(&self, path: &str) -> Result<Vec<FileEntry>>;
81
82    /// Check if path exists
83    ///
84    /// # Errors
85    /// Returns an error if existence cannot be determined.
86    async fn exists(&self, path: &str) -> Result<bool>;
87
88    /// Check if path is a directory
89    ///
90    /// # Errors
91    /// Returns an error if the check fails.
92    async fn is_dir(&self, path: &str) -> Result<bool>;
93
94    /// Check if path is a file
95    ///
96    /// # Errors
97    /// Returns an error if the check fails.
98    async fn is_file(&self, path: &str) -> Result<bool>;
99
100    /// Create directory (including parents)
101    ///
102    /// # Errors
103    /// Returns an error if the directory cannot be created.
104    async fn create_dir(&self, path: &str) -> Result<()>;
105
106    /// Delete file
107    ///
108    /// # Errors
109    /// Returns an error if the file cannot be deleted.
110    async fn delete_file(&self, path: &str) -> Result<()>;
111
112    /// Delete directory (must be empty unless recursive)
113    ///
114    /// # Errors
115    /// Returns an error if the directory cannot be deleted.
116    async fn delete_dir(&self, path: &str, recursive: bool) -> Result<()>;
117
118    /// Search for pattern in files (like ripgrep)
119    ///
120    /// # Errors
121    /// Returns an error if the search fails.
122    async fn grep(&self, pattern: &str, path: &str, recursive: bool) -> Result<Vec<GrepMatch>>;
123
124    /// Find files matching glob pattern
125    ///
126    /// # Errors
127    /// Returns an error if the glob operation fails.
128    async fn glob(&self, pattern: &str) -> Result<Vec<String>>;
129
130    /// Execute a shell command
131    ///
132    /// Not all environments support this. Default implementation returns an error.
133    ///
134    /// # Errors
135    /// Returns an error if command execution is not supported or fails.
136    async fn exec(&self, _command: &str, _timeout_ms: Option<u64>) -> Result<ExecResult> {
137        anyhow::bail!("Command execution not supported in this environment")
138    }
139
140    /// Get the root/working directory for this environment
141    fn root(&self) -> &str;
142
143    /// Resolve an input path to an absolute path **clamped to [`root`](Environment::root).**
144    ///
145    /// This is the path-policy boundary for primitive tools: it interprets the
146    /// model-supplied `path` against the environment root and guarantees the
147    /// result can never lexically escape that root. Relative inputs are joined
148    /// to the root; absolute inputs are accepted only when they already fall
149    /// inside the root, otherwise they (and any `..` traversal) are clamped
150    /// back inside it. So with `root = /workspace`, both `../../etc/passwd`
151    /// and `/etc/passwd` resolve to a path under `/workspace`, never to the
152    /// host's `/etc/passwd`.
153    ///
154    /// # Security
155    ///
156    /// The clamp is **lexical** (it does not touch the filesystem) so it cannot
157    /// detect a symlink *inside* the root that points outside it. Callers that
158    /// resolve against a real filesystem and need a symlink-proof boundary must
159    /// use [`resolve_within_root_secure`], which canonicalizes the path and
160    /// verifies containment after following links.
161    fn resolve_path(&self, path: &str) -> String {
162        resolve_within_root(self.root(), path)
163    }
164}
165
166/// Resolve `path` against `root`, clamping the result so it can never lexically
167/// escape `root`.
168///
169/// Relative inputs are joined to `root`. Absolute inputs are kept as-is **only**
170/// when they already resolve inside `root`; any input that would escape (via a
171/// leading `/` outside the root, or `..` traversal) is re-interpreted as
172/// root-relative so the result stays within `root`. The returned string is
173/// lexically normalized (`.` / `..` resolved).
174///
175/// This is a lexical boundary and does **not** follow symlinks — see
176/// [`resolve_within_root_secure`] for a symlink-proof check against a real
177/// filesystem.
178#[must_use]
179pub fn resolve_within_root(root: &str, path: &str) -> String {
180    let root_norm = normalize_path_buf(Path::new(root));
181    let joined = if path.starts_with('/') {
182        PathBuf::from(path)
183    } else {
184        root_norm.join(path)
185    };
186    let normalized = normalize_path_buf(&joined);
187    if normalized == root_norm || normalized.starts_with(&root_norm) {
188        normalized.to_string_lossy().into_owned()
189    } else {
190        // The input escaped the root. Clamp it back inside by treating it as
191        // strictly root-relative: leading separators and any `..` that would
192        // climb above the root are dropped.
193        clamp_to_root(&root_norm, path)
194            .to_string_lossy()
195            .into_owned()
196    }
197}
198
199/// Re-interpret `path` as strictly relative to `root_norm`, dropping leading
200/// separators and refusing to let `..` climb above the root boundary.
201fn clamp_to_root(root_norm: &Path, path: &str) -> PathBuf {
202    let root_components: Vec<Component<'_>> = root_norm.components().collect();
203    let mut stack: Vec<Component<'_>> = root_components.clone();
204    for component in Path::new(path).components() {
205        match component {
206            // Inputs are always interpreted relative to the root, so a leading
207            // `/` (or Windows prefix) never resets the resolution.
208            Component::Prefix(_) | Component::RootDir | Component::CurDir => {}
209            Component::ParentDir => {
210                if stack.len() > root_components.len() {
211                    stack.pop();
212                }
213            }
214            normal @ Component::Normal(_) => stack.push(normal),
215        }
216    }
217    stack.iter().collect()
218}
219
220/// Lexically normalize a path by resolving `.` and `..` components without
221/// hitting the filesystem.
222///
223/// This collapses `.` / `..` segments but only clamps at the **filesystem
224/// root** (`/`); it is unaware of any environment root, so on its own it does
225/// **not** confine a path to an allowed directory — `/workspace/../../etc` still
226/// normalizes to `/etc`. Use [`resolve_within_root`] to clamp to an environment
227/// root, and [`resolve_within_root_secure`] when symlinks must also be defeated.
228/// Unlike [`std::fs::canonicalize`], this does not require the path to exist and
229/// does not follow symlinks.
230#[must_use]
231pub fn normalize_path(path: &Path) -> String {
232    normalize_path_buf(path).to_string_lossy().into_owned()
233}
234
235/// Lexically normalize a path, returning a `PathBuf`.
236///
237/// See [`normalize_path`] for the security caveats: this is a lexical helper
238/// that clamps only at the filesystem root and does not follow symlinks.
239#[must_use]
240pub fn normalize_path_buf(path: &Path) -> PathBuf {
241    let mut components: Vec<Component<'_>> = Vec::new();
242    for component in path.components() {
243        match component {
244            Component::ParentDir => {
245                // Only pop if we have a normal component to pop (don't pop past root)
246                if matches!(components.last(), Some(Component::Normal(_))) {
247                    components.pop();
248                }
249            }
250            Component::CurDir => {} // skip `.`
251            other => components.push(other),
252        }
253    }
254    if components.is_empty() {
255        PathBuf::from("/")
256    } else {
257        components.iter().collect()
258    }
259}
260
261/// Resolve `path` against `root` on a real filesystem, following symlinks and
262/// verifying the final target is contained within `root`.
263///
264/// Unlike [`resolve_within_root`] (a purely lexical clamp), this canonicalizes
265/// the deepest existing ancestor of the target — resolving any symlinks along
266/// the way — and rejects the path if the resolved location escapes the
267/// canonicalized `root`. This is the symlink-proof check that
268/// `LocalFileSystem`-backed tools should use before reading or writing, so a
269/// link such as `workspace/evil -> /etc` cannot be used to step outside the
270/// sandbox.
271///
272/// # Errors
273/// Returns an error if `root` cannot be canonicalized, if an existing ancestor
274/// cannot be canonicalized, or if the resolved path escapes `root`.
275pub fn resolve_within_root_secure(root: &Path, path: &str) -> Result<PathBuf> {
276    let canonical_root = std::fs::canonicalize(root)
277        .with_context(|| format!("failed to canonicalize environment root {}", root.display()))?;
278    let clamped = clamp_to_root(&normalize_path_buf(root), path);
279    let resolved = canonicalize_deepest_existing(&clamped)?;
280    ensure!(
281        resolved == canonical_root || resolved.starts_with(&canonical_root),
282        "path {} escapes the environment root {} after resolving symlinks",
283        resolved.display(),
284        canonical_root.display(),
285    );
286    Ok(resolved)
287}
288
289/// Canonicalize the deepest existing ancestor of `path` (resolving symlinks)
290/// and re-append the non-existent tail, so a not-yet-created file still has its
291/// real parent resolved.
292fn canonicalize_deepest_existing(path: &Path) -> Result<PathBuf> {
293    let mut existing = path.to_path_buf();
294    let mut tail: Vec<OsString> = Vec::new();
295    while !existing.exists() {
296        let Some(name) = existing.file_name().map(ToOwned::to_owned) else {
297            break;
298        };
299        tail.push(name);
300        if !existing.pop() {
301            break;
302        }
303    }
304    let mut resolved = if existing.as_os_str().is_empty() {
305        PathBuf::from("/")
306    } else {
307        std::fs::canonicalize(&existing)
308            .with_context(|| format!("failed to canonicalize {}", existing.display()))?
309    };
310    for name in tail.into_iter().rev() {
311        resolved.push(name);
312    }
313    Ok(resolved)
314}
315
316/// A null environment that rejects all operations.
317/// Useful as a default when no environment is configured.
318pub struct NullEnvironment;
319
320#[async_trait]
321impl Environment for NullEnvironment {
322    async fn read_file(&self, _path: &str) -> Result<String> {
323        anyhow::bail!("No environment configured")
324    }
325
326    async fn read_file_bytes(&self, _path: &str) -> Result<Vec<u8>> {
327        anyhow::bail!("No environment configured")
328    }
329
330    async fn write_file(&self, _path: &str, _content: &str) -> Result<()> {
331        anyhow::bail!("No environment configured")
332    }
333
334    async fn write_file_bytes(&self, _path: &str, _content: &[u8]) -> Result<()> {
335        anyhow::bail!("No environment configured")
336    }
337
338    async fn list_dir(&self, _path: &str) -> Result<Vec<FileEntry>> {
339        anyhow::bail!("No environment configured")
340    }
341
342    async fn exists(&self, _path: &str) -> Result<bool> {
343        anyhow::bail!("No environment configured")
344    }
345
346    async fn is_dir(&self, _path: &str) -> Result<bool> {
347        anyhow::bail!("No environment configured")
348    }
349
350    async fn is_file(&self, _path: &str) -> Result<bool> {
351        anyhow::bail!("No environment configured")
352    }
353
354    async fn create_dir(&self, _path: &str) -> Result<()> {
355        anyhow::bail!("No environment configured")
356    }
357
358    async fn delete_file(&self, _path: &str) -> Result<()> {
359        anyhow::bail!("No environment configured")
360    }
361
362    async fn delete_dir(&self, _path: &str, _recursive: bool) -> Result<()> {
363        anyhow::bail!("No environment configured")
364    }
365
366    async fn grep(&self, _pattern: &str, _path: &str, _recursive: bool) -> Result<Vec<GrepMatch>> {
367        anyhow::bail!("No environment configured")
368    }
369
370    async fn glob(&self, _pattern: &str) -> Result<Vec<String>> {
371        anyhow::bail!("No environment configured")
372    }
373
374    fn root(&self) -> &'static str {
375        "/"
376    }
377}
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382
383    #[test]
384    fn test_normalize_path_resolves_parent_dir() {
385        let path = Path::new("/workspace/src/../../etc/passwd");
386        assert_eq!(normalize_path(path), "/etc/passwd");
387    }
388
389    #[test]
390    fn test_normalize_path_resolves_current_dir() {
391        let path = Path::new("/workspace/./src/./file.rs");
392        assert_eq!(normalize_path(path), "/workspace/src/file.rs");
393    }
394
395    #[test]
396    fn test_normalize_path_lexical_clamps_only_at_filesystem_root() {
397        // `normalize_path` is a *lexical* helper: it clamps at the filesystem
398        // root (`/`) but is unaware of any environment root, so it deliberately
399        // does NOT confine the path to `/workspace`. Root-confinement is the
400        // job of `resolve_within_root` / `resolve_path`.
401        let path = Path::new("/workspace/../../../etc/shadow");
402        assert_eq!(normalize_path(path), "/etc/shadow");
403    }
404
405    #[test]
406    fn test_normalize_path_identity() {
407        let path = Path::new("/workspace/src/main.rs");
408        assert_eq!(normalize_path(path), "/workspace/src/main.rs");
409    }
410
411    #[test]
412    fn test_normalize_path_clamps_at_root() {
413        // Trying to go above root should stop at /
414        let path = Path::new("/a/../../../../z");
415        assert_eq!(normalize_path(path), "/z");
416    }
417
418    #[test]
419    fn test_resolve_path_normalizes_traversal() {
420        let env = NullEnvironment;
421        // NullEnvironment root is "/", so relative paths are joined with "/"
422        let resolved = env.resolve_path("src/../../etc/passwd");
423        assert_eq!(resolved, "/etc/passwd");
424    }
425
426    #[test]
427    fn test_resolve_path_absolute_normalized() {
428        let env = NullEnvironment;
429        let resolved = env.resolve_path("/workspace/src/../../../etc/passwd");
430        assert_eq!(resolved, "/etc/passwd");
431    }
432
433    #[test]
434    fn resolve_within_root_keeps_paths_already_inside_root() {
435        // An absolute path that already lives under the root passes through
436        // unchanged (after lexical normalization).
437        assert_eq!(
438            resolve_within_root("/workspace", "/workspace/src/main.rs"),
439            "/workspace/src/main.rs"
440        );
441        // Relative paths are joined to the root.
442        assert_eq!(
443            resolve_within_root("/workspace", "src/main.rs"),
444            "/workspace/src/main.rs"
445        );
446    }
447
448    #[test]
449    fn resolve_within_root_clamps_parent_traversal() {
450        // `..` may not climb above the root: it is clamped back inside.
451        assert_eq!(
452            resolve_within_root("/workspace", "../../etc/passwd"),
453            "/workspace/etc/passwd"
454        );
455        assert_eq!(
456            resolve_within_root("/workspace", "src/../../../../etc/passwd"),
457            "/workspace/etc/passwd"
458        );
459    }
460
461    #[test]
462    fn resolve_within_root_clamps_absolute_escape() {
463        // An absolute path outside the root is re-rooted, never allowed out.
464        assert_eq!(
465            resolve_within_root("/workspace", "/etc/passwd"),
466            "/workspace/etc/passwd"
467        );
468    }
469
470    #[test]
471    fn resolve_within_root_does_not_confuse_sibling_prefixes() {
472        // `/workspace-evil` must not be treated as inside `/workspace`.
473        assert_eq!(
474            resolve_within_root("/workspace", "/workspace-evil/secret"),
475            "/workspace/workspace-evil/secret"
476        );
477    }
478
479    #[cfg(unix)]
480    #[test]
481    fn resolve_within_root_secure_rejects_symlink_escape() -> Result<()> {
482        use std::os::unix::fs::symlink;
483
484        let nanos = time::OffsetDateTime::now_utc().unix_timestamp_nanos();
485        let base =
486            std::env::temp_dir().join(format!("agent-sdk-secpath-{}-{nanos}", std::process::id()));
487        let root = base.join("workspace");
488        let outside = base.join("outside");
489        std::fs::create_dir_all(&root)?;
490        std::fs::create_dir_all(&outside)?;
491        std::fs::write(outside.join("secret.txt"), b"top secret")?;
492
493        // A symlink inside the root that points outside it. A purely lexical
494        // check (`resolve_within_root`) would accept `link/secret.txt` as
495        // contained; the secure resolver must reject it.
496        symlink(&outside, root.join("link"))?;
497
498        let escape = resolve_within_root_secure(&root, "link/secret.txt");
499        assert!(
500            escape.is_err(),
501            "symlink escape must be rejected, got {escape:?}"
502        );
503
504        // A genuinely-inside path resolves cleanly.
505        std::fs::write(root.join("inside.txt"), b"ok")?;
506        let inside = resolve_within_root_secure(&root, "inside.txt")?;
507        assert!(inside.starts_with(std::fs::canonicalize(&root)?));
508
509        // A not-yet-created file under the root is allowed (parent resolved).
510        let new_file = resolve_within_root_secure(&root, "subdir/new.txt")?;
511        assert!(new_file.starts_with(std::fs::canonicalize(&root)?));
512
513        let _ = std::fs::remove_dir_all(&base);
514        Ok(())
515    }
516}