Skip to main content

mixtape_tools/filesystem/
mod.rs

1//! Filesystem tools with path traversal protection.
2//!
3//! All tools in this module operate within a configured `base_path` directory,
4//! preventing access to files outside this boundary. This security model protects
5//! against directory traversal attacks where malicious input like `../../../etc/passwd`
6//! attempts to escape the intended directory.
7//!
8//! # Security Model
9//!
10//! Every file operation validates paths using [`validate_path`] before execution:
11//!
12//! - Paths are resolved relative to `base_path` (or used directly if absolute)
13//! - The resolved path is canonicalized to eliminate `..`, `.`, and symlinks
14//! - The canonical path must start with the canonical `base_path`
15//! - For non-existent paths, the nearest existing ancestor is validated instead
16//!
17//! This means symlinks that point outside `base_path` are rejected, and crafted
18//! paths like `subdir/../../../etc/passwd` are caught after canonicalization.
19//!
20//! # Defense in Depth
21//!
22//! Path validation provides **guardrails for AI agents**, not a complete security
23//! boundary. Error messages intentionally include path details to help agents
24//! understand and correct invalid requests.
25//!
26//! For production deployments with untrusted input, use defense in depth:
27//!
28//! - **Docker isolation**: Run tools in containers with only necessary directories mounted
29//! - **OS-level permissions**: Use a dedicated user with minimal filesystem access
30//! - **Network isolation**: Restrict container network access where possible
31//!
32//! These tools are one layer in a security stack, not a standalone sandbox.
33//!
34//! # Available Tools
35//!
36//! | Tool | Description |
37//! |------|-------------|
38//! | [`ReadFileTool`] | Read file contents with optional offset/limit |
39//! | [`ReadMultipleFilesTool`] | Read multiple files concurrently |
40//! | [`WriteFileTool`] | Write or append to files |
41//! | [`CreateDirectoryTool`] | Create directories (including parents) |
42//! | [`ListDirectoryTool`] | List directory contents recursively |
43//! | [`MoveFileTool`] | Move or rename files and directories |
44//! | [`FileInfoTool`] | Get file metadata (size, timestamps, type) |
45//!
46//! # Building Custom Tools
47//!
48//! Use [`validate_path`] when building your own filesystem tools:
49//!
50//! ```
51//! use mixtape_tools::filesystem::validate_path;
52//! use std::path::Path;
53//!
54//! let base = Path::new("/app/data");
55//! let user_input = Path::new("../etc/passwd");
56//!
57//! // This will return an error because the path escapes base
58//! assert!(validate_path(base, user_input).is_err());
59//! ```
60
61mod create_directory;
62mod file_info;
63mod list_directory;
64mod move_file;
65mod read_file;
66mod read_multiple_files;
67mod write_file;
68
69pub use create_directory::CreateDirectoryTool;
70pub use file_info::FileInfoTool;
71pub use list_directory::ListDirectoryTool;
72pub use move_file::MoveFileTool;
73pub use read_file::ReadFileTool;
74pub use read_multiple_files::ReadMultipleFilesTool;
75pub use write_file::WriteFileTool;
76
77use mixtape_core::tool::{box_tool, DynTool};
78use mixtape_core::ToolError;
79use std::path::{Path, PathBuf};
80
81/// Validates that a path is within the base directory, preventing directory traversal attacks.
82///
83/// This function is the security foundation for all filesystem tools. It ensures that
84/// user-provided paths cannot escape the configured base directory, even when using
85/// tricks like `..` components, absolute paths, or symlinks.
86///
87/// # Arguments
88///
89/// * `base_path` - The root directory that all paths must stay within
90/// * `target_path` - The user-provided path to validate (relative or absolute)
91///
92/// # Returns
93///
94/// * `Ok(PathBuf)` - The validated path, canonicalized if the file exists
95/// * `Err(ToolError::PathValidation)` - If the path escapes the base directory
96///
97/// # Security Properties
98///
99/// - **Symlink resolution**: Symlinks are resolved via canonicalization, so a symlink
100///   pointing outside `base_path` will be rejected
101/// - **Parent traversal**: Paths like `foo/../../../etc` are caught after canonicalization
102/// - **Absolute paths**: Absolute paths outside `base_path` are rejected
103/// - **Non-existent paths**: For paths that don't exist yet (e.g., for write operations),
104///   the nearest existing ancestor is validated instead
105///
106/// # Example
107///
108/// ```
109/// use mixtape_tools::filesystem::validate_path;
110/// use std::path::Path;
111///
112/// let base = Path::new("/home/user/documents");
113///
114/// // Relative path within base - OK
115/// let result = validate_path(base, Path::new("report.txt"));
116/// // Returns Ok with resolved path
117///
118/// // Traversal attempt - REJECTED
119/// let result = validate_path(base, Path::new("../../../etc/passwd"));
120/// assert!(result.is_err());
121///
122/// // Absolute path outside base - REJECTED
123/// let result = validate_path(base, Path::new("/etc/passwd"));
124/// assert!(result.is_err());
125/// ```
126pub fn validate_path(base_path: &Path, target_path: &Path) -> Result<PathBuf, ToolError> {
127    let full_path = if target_path.is_absolute() {
128        target_path.to_path_buf()
129    } else {
130        base_path.join(target_path)
131    };
132
133    // Try to canonicalize if the file exists
134    if full_path.exists() {
135        let canonical = full_path.canonicalize().map_err(|e| {
136            ToolError::PathValidation(format!(
137                "Failed to canonicalize '{}': {}",
138                full_path.display(),
139                e
140            ))
141        })?;
142
143        // Canonicalize base path for comparison
144        let canonical_base = base_path.canonicalize().map_err(|e| {
145            ToolError::PathValidation(format!(
146                "Failed to canonicalize base path '{}': {}",
147                base_path.display(),
148                e
149            ))
150        })?;
151
152        if !canonical.starts_with(&canonical_base) {
153            return Err(ToolError::PathValidation(format!(
154                "Path '{}' escapes base directory '{}' (resolved to '{}')",
155                target_path.display(),
156                canonical_base.display(),
157                canonical.display()
158            )));
159        }
160
161        Ok(canonical)
162    } else {
163        // For non-existent paths, verify the parent is within base
164        let mut check_path = full_path.clone();
165
166        // Find the first existing ancestor
167        while !check_path.exists() {
168            match check_path.parent() {
169                Some(parent) => check_path = parent.to_path_buf(),
170                None => {
171                    return Err(ToolError::PathValidation(format!(
172                        "Invalid path '{}': no valid parent directory exists",
173                        target_path.display()
174                    )))
175                }
176            }
177        }
178
179        // Canonicalize the existing ancestor and verify it's within base
180        let canonical_ancestor = check_path.canonicalize().map_err(|e| {
181            ToolError::PathValidation(format!(
182                "Failed to canonicalize ancestor '{}': {}",
183                check_path.display(),
184                e
185            ))
186        })?;
187
188        let canonical_base = base_path.canonicalize().map_err(|e| {
189            ToolError::PathValidation(format!(
190                "Failed to canonicalize base path '{}': {}",
191                base_path.display(),
192                e
193            ))
194        })?;
195
196        if !canonical_ancestor.starts_with(&canonical_base) {
197            return Err(ToolError::PathValidation(format!(
198                "Path '{}' escapes base directory '{}' (nearest ancestor '{}' is outside)",
199                target_path.display(),
200                canonical_base.display(),
201                canonical_ancestor.display()
202            )));
203        }
204
205        Ok(full_path)
206    }
207}
208
209/// Returns all read-only filesystem tools
210///
211/// These tools can read and inspect files but cannot modify the filesystem.
212pub fn read_only_tools() -> Vec<Box<dyn DynTool>> {
213    vec![
214        box_tool(ReadFileTool::default()),
215        box_tool(ReadMultipleFilesTool::default()),
216        box_tool(ListDirectoryTool::default()),
217        box_tool(FileInfoTool::default()),
218    ]
219}
220
221/// Returns all mutative filesystem tools
222///
223/// These tools can modify the filesystem by writing, creating, or moving files.
224pub fn mutative_tools() -> Vec<Box<dyn DynTool>> {
225    vec![
226        box_tool(WriteFileTool::default()),
227        box_tool(CreateDirectoryTool::default()),
228        box_tool(MoveFileTool::default()),
229    ]
230}
231
232/// Returns all filesystem tools
233pub fn all_tools() -> Vec<Box<dyn DynTool>> {
234    let mut tools = read_only_tools();
235    tools.extend(mutative_tools());
236    tools
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242    use std::fs;
243    use tempfile::TempDir;
244
245    #[test]
246    fn test_validate_path_accepts_relative_path_to_existing_file() {
247        let temp_dir = TempDir::new().unwrap();
248        fs::write(temp_dir.path().join("test.txt"), "content").unwrap();
249
250        let result = validate_path(temp_dir.path(), Path::new("test.txt"));
251        assert!(result.is_ok());
252        let path = result.unwrap();
253        assert!(path.ends_with("test.txt"));
254    }
255
256    #[test]
257    fn test_validate_path_accepts_relative_path_to_nonexistent_file() {
258        let temp_dir = TempDir::new().unwrap();
259
260        let result = validate_path(temp_dir.path(), Path::new("new_file.txt"));
261        assert!(result.is_ok());
262        let path = result.unwrap();
263        assert!(path.ends_with("new_file.txt"));
264    }
265
266    #[test]
267    fn test_validate_path_accepts_nested_nonexistent_path() {
268        let temp_dir = TempDir::new().unwrap();
269        fs::create_dir(temp_dir.path().join("subdir")).unwrap();
270
271        let result = validate_path(temp_dir.path(), Path::new("subdir/new_file.txt"));
272        assert!(result.is_ok());
273    }
274
275    #[test]
276    fn test_validate_path_rejects_traversal_existing_file() {
277        let temp_dir = TempDir::new().unwrap();
278        let sibling_dir = TempDir::new().unwrap();
279        fs::write(sibling_dir.path().join("secret.txt"), "secret").unwrap();
280
281        // Try to escape via ..
282        let evil_path = format!(
283            "../{}/secret.txt",
284            sibling_dir.path().file_name().unwrap().to_str().unwrap()
285        );
286        let result = validate_path(temp_dir.path(), Path::new(&evil_path));
287
288        assert!(result.is_err());
289        let err = result.unwrap_err();
290        assert!(
291            err.to_string().contains("escapes") || err.to_string().contains("Invalid"),
292            "Error should mention path escape: {}",
293            err
294        );
295    }
296
297    #[test]
298    fn test_validate_path_rejects_absolute_path_outside_base() {
299        let temp_dir = TempDir::new().unwrap();
300        let other_dir = TempDir::new().unwrap();
301        fs::write(other_dir.path().join("file.txt"), "content").unwrap();
302
303        let result = validate_path(temp_dir.path(), other_dir.path().join("file.txt").as_path());
304
305        assert!(result.is_err());
306        assert!(result.unwrap_err().to_string().contains("escapes"));
307    }
308
309    #[test]
310    fn test_validate_path_accepts_absolute_path_inside_base() {
311        let temp_dir = TempDir::new().unwrap();
312        fs::write(temp_dir.path().join("file.txt"), "content").unwrap();
313
314        let absolute_path = temp_dir.path().join("file.txt");
315        let result = validate_path(temp_dir.path(), &absolute_path);
316
317        assert!(result.is_ok());
318    }
319
320    #[test]
321    fn test_validate_path_rejects_nonexistent_with_traversal() {
322        let temp_dir = TempDir::new().unwrap();
323
324        // Path doesn't exist but tries to escape
325        let result = validate_path(temp_dir.path(), Path::new("../../../etc/shadow"));
326
327        assert!(result.is_err());
328    }
329
330    #[test]
331    fn test_validate_path_handles_symlink_inside_base() {
332        let temp_dir = TempDir::new().unwrap();
333        let real_file = temp_dir.path().join("real.txt");
334        let symlink = temp_dir.path().join("link.txt");
335
336        fs::write(&real_file, "content").unwrap();
337
338        #[cfg(unix)]
339        {
340            std::os::unix::fs::symlink(&real_file, &symlink).unwrap();
341
342            let result = validate_path(temp_dir.path(), Path::new("link.txt"));
343            assert!(result.is_ok(), "Symlink within base should be allowed");
344        }
345    }
346
347    #[test]
348    fn test_validate_path_rejects_symlink_escaping_base() {
349        let temp_dir = TempDir::new().unwrap();
350        let outside_dir = TempDir::new().unwrap();
351        let outside_file = outside_dir.path().join("secret.txt");
352        fs::write(&outside_file, "secret").unwrap();
353
354        let symlink = temp_dir.path().join("escape_link.txt");
355
356        #[cfg(unix)]
357        {
358            std::os::unix::fs::symlink(&outside_file, &symlink).unwrap();
359
360            let result = validate_path(temp_dir.path(), Path::new("escape_link.txt"));
361            // After canonicalization, the symlink resolves outside base
362            assert!(result.is_err(), "Symlink escaping base should be rejected");
363        }
364    }
365
366    #[test]
367    fn test_validate_path_deep_nesting() {
368        let temp_dir = TempDir::new().unwrap();
369        fs::create_dir_all(temp_dir.path().join("a/b/c/d/e")).unwrap();
370        fs::write(temp_dir.path().join("a/b/c/d/e/deep.txt"), "deep").unwrap();
371
372        let result = validate_path(temp_dir.path(), Path::new("a/b/c/d/e/deep.txt"));
373        assert!(result.is_ok());
374    }
375
376    #[test]
377    fn test_validate_path_dot_components() {
378        let temp_dir = TempDir::new().unwrap();
379        fs::create_dir(temp_dir.path().join("subdir")).unwrap();
380        fs::write(temp_dir.path().join("subdir/file.txt"), "content").unwrap();
381
382        // Path with . component
383        let result = validate_path(temp_dir.path(), Path::new("./subdir/./file.txt"));
384        assert!(result.is_ok());
385    }
386
387    #[test]
388    fn test_validate_path_nonexistent_with_ancestor_escaping_base() {
389        // This tests the branch at lines 71-75: when a non-existent path's
390        // existing ancestor is outside the base directory
391        let base_dir = TempDir::new().unwrap();
392        let outside_dir = TempDir::new().unwrap();
393
394        // Create a subdirectory outside base that will be our existing ancestor
395        fs::create_dir(outside_dir.path().join("existing_subdir")).unwrap();
396
397        // Try to access a non-existent file inside outside_dir using an absolute path
398        // The file doesn't exist, but its ancestor (outside_dir/existing_subdir) does
399        // and is outside base_dir
400        let nonexistent_file = outside_dir.path().join("existing_subdir/new_file.txt");
401
402        let result = validate_path(base_dir.path(), &nonexistent_file);
403
404        assert!(
405            result.is_err(),
406            "Non-existent path with ancestor outside base should be rejected"
407        );
408        assert!(
409            result.unwrap_err().to_string().contains("escapes"),
410            "Error should mention path escape"
411        );
412    }
413
414    #[test]
415    fn test_validate_path_deeply_nested_nonexistent() {
416        // Test deeply nested non-existent path where we walk up multiple levels
417        let temp_dir = TempDir::new().unwrap();
418
419        // Only the base exists, but we're trying to access deeply nested non-existent path
420        let result = validate_path(temp_dir.path(), Path::new("a/b/c/d/e/f/g/new_file.txt"));
421
422        // Should succeed because ancestor (temp_dir) is within base
423        assert!(result.is_ok());
424        let path = result.unwrap();
425        assert!(path.ends_with("a/b/c/d/e/f/g/new_file.txt"));
426    }
427
428    #[test]
429    fn test_validate_path_nonexistent_relative_traversal_to_outside() {
430        // Test traversal that ends up with existing ancestor outside base
431        let base_dir = TempDir::new().unwrap();
432        let sibling_dir = TempDir::new().unwrap();
433
434        // Create a subdir in sibling so it's the ancestor found
435        fs::create_dir(sibling_dir.path().join("subdir")).unwrap();
436
437        // Try: ../sibling_temp_name/subdir/nonexistent.txt
438        // The existing ancestor will be sibling_dir/subdir which is outside base
439        let evil_path = format!(
440            "../{}/subdir/nonexistent.txt",
441            sibling_dir.path().file_name().unwrap().to_str().unwrap()
442        );
443
444        let result = validate_path(base_dir.path(), Path::new(&evil_path));
445
446        assert!(
447            result.is_err(),
448            "Traversal to outside ancestor should be rejected"
449        );
450    }
451
452    #[test]
453    fn test_validate_path_error_includes_path_details() {
454        // Verify error messages include actionable details for debugging
455        let temp_dir = TempDir::new().unwrap();
456        let other_dir = TempDir::new().unwrap();
457        fs::write(other_dir.path().join("file.txt"), "content").unwrap();
458
459        let result = validate_path(temp_dir.path(), other_dir.path().join("file.txt").as_path());
460
461        let err = result.unwrap_err();
462        let err_msg = err.to_string();
463
464        // Error should mention the attempted path
465        assert!(
466            err_msg.contains("file.txt"),
467            "Error should include the target path: {}",
468            err_msg
469        );
470
471        // Error should mention escaping
472        assert!(
473            err_msg.contains("escapes"),
474            "Error should mention 'escapes': {}",
475            err_msg
476        );
477
478        // Error should include "resolved to" showing the canonical path
479        assert!(
480            err_msg.contains("resolved to"),
481            "Error should show resolved path: {}",
482            err_msg
483        );
484    }
485}