Skip to main content

matrixcode_core/
path_validator.rs

1//! Path validation for file operations.
2//! 
3//! This module provides security checks for file paths to prevent:
4//! - Path traversal attacks (e.g., `../../../etc/passwd`)
5//! - Accessing files outside project directory
6//! - Writing to critical system files
7
8use anyhow::{Result, Context};
9use std::path::{Path, PathBuf};
10
11/// Maximum allowed file size (10MB)
12pub const MAX_FILE_SIZE: usize = 10 * 1024 * 1024;
13
14/// Maximum allowed path length
15pub const MAX_PATH_LENGTH: usize = 1024;
16
17/// Validate a file path for security.
18/// 
19/// **Checks performed**:
20/// 1. Path length must not exceed MAX_PATH_LENGTH
21/// 2. No path traversal patterns (..)
22/// 3. Path must be within project base directory (if specified)
23/// 4. Cannot write to critical system files (if writing)
24/// 
25/// # Arguments
26/// * `path_str` - User-provided path string
27/// * `base_dir` - Project base directory (optional)
28/// * `is_write` - Whether this is a write operation (more strict)
29/// 
30/// # Returns
31/// * `Ok(PathBuf)` - Validated canonical path
32/// * `Err(...)` - Validation failure with descriptive error
33pub fn validate_path(
34    path_str: &str,
35    base_dir: Option<&Path>,
36    is_write: bool
37) -> Result<PathBuf> {
38    // 1. Check path length
39    if path_str.len() > MAX_PATH_LENGTH {
40        return Err(anyhow::anyhow!(
41            "Path too long: {} characters (max: {})",
42            path_str.len(),
43            MAX_PATH_LENGTH
44        ));
45    }
46    
47    // 2. Check for path traversal
48    if path_str.contains("..") {
49        return Err(anyhow::anyhow!(
50            "Path traversal detected: '{}'. Paths cannot contain '..' for security",
51            path_str
52        ));
53    }
54    
55    // 3. Check for empty path
56    if path_str.trim().is_empty() {
57        return Err(anyhow::anyhow!("Path cannot be empty"));
58    }
59    
60    // 4. Create PathBuf and resolve
61    let path = PathBuf::from(path_str);
62    let is_relative = path.is_relative();  // Check before potential move
63    
64    // 5. Check for critical system files (for write operations)
65    if is_write {
66        check_critical_system_files(&path)?;
67    }
68    
69    // 6. Resolve against base directory
70    let resolved_path = if let Some(base) = base_dir {
71        // If path is absolute, check if it's within base
72        if path.is_absolute() {
73            // For absolute paths, we allow them but warn in docs
74            // Users can configure whether to allow absolute paths
75            path
76        } else {
77            // Relative path: resolve against base
78            base.join(&path)
79        }
80    } else {
81        // No base directory specified
82        if path.is_absolute() {
83            path
84        } else {
85            // Relative to current directory
86            std::env::current_dir()
87                .context("Cannot get current directory")?
88                .join(&path)
89        }
90    };
91    
92    // 7. Try to canonicalize (for existing paths)
93    // For non-existing paths (write operations), we do a best-effort resolution
94    let canonical = if resolved_path.exists() {
95        resolved_path.canonicalize()
96            .with_context(|| format!("Cannot resolve path: {}", resolved_path.display()))?
97    } else {
98        // Path doesn't exist yet (write operation)
99        // We can't canonicalize, but we can still check security
100        resolved_path.clone()
101    };
102    
103    // 8. Check if path is within base directory (if specified)
104    // For non-existing paths, we check the resolved path (before canonicalize)
105    if let Some(base) = base_dir {
106        let base_canonical = if base.exists() {
107            base.canonicalize()
108                .with_context(|| format!("Cannot resolve base directory: {}", base.display()))?
109        } else {
110            base.to_path_buf()
111        };
112        
113        // Check if resolved/canonical path is within base
114        // For relative paths that don't have traversal, they're considered safe
115        let is_within_base = if is_relative && !path_str.contains("..") {
116            // Relative path without traversal is always safe
117            true
118        } else {
119            // For absolute paths or paths with potential traversal, check strictly
120            resolved_path.starts_with(&base_canonical)
121                || canonical.starts_with(&base_canonical)
122        };
123        
124        if !is_within_base {
125            return Err(anyhow::anyhow!(
126                "Path escapes project directory: '{}'. Resolved path '{}' appears outside '{}'",
127                path_str,
128                resolved_path.display(),
129                base_canonical.display()
130            ));
131        }
132    }
133    
134    Ok(canonical)
135}
136
137/// Check if path targets critical system files.
138fn check_critical_system_files(path: &Path) -> Result<()> {
139    // Critical system files that should never be written
140    const CRITICAL_FILES: &[&str] = &[
141        "/etc/passwd",
142        "/etc/shadow",
143        "/etc/sudoers",
144        "/etc/ssh/sshd_config",
145        "/etc/hosts",
146        "/etc/fstab",
147        "/boot/",
148        "/dev/sda",
149        "/dev/hda",
150        "/proc/",
151        "/sys/",
152    ];
153    
154    let path_str = path.to_string_lossy();
155    
156    for critical in CRITICAL_FILES {
157        if path_str.starts_with(critical) || path_str == *critical {
158            return Err(anyhow::anyhow!(
159                "Cannot write to critical system file: '{}'. This is blocked for security",
160                path.display()
161            ));
162        }
163    }
164    
165    Ok(())
166}
167
168/// Validate content size for file writes.
169pub fn validate_content_size(content: &str) -> Result<()> {
170    if content.len() > MAX_FILE_SIZE {
171        return Err(anyhow::anyhow!(
172            "Content too large: {} bytes (max: {} bytes = {} MB). \
173             Split into smaller files or use streaming",
174            content.len(),
175            MAX_FILE_SIZE,
176            MAX_FILE_SIZE / 1_000_000
177        ));
178    }
179    
180    Ok(())
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186    use tempfile::TempDir;
187    
188    #[test]
189    fn test_path_traversal_blocked() {
190        let base = TempDir::new().unwrap();
191        
192        // Path traversal should be blocked
193        assert!(validate_path("../../../etc/passwd", Some(base.path()), false).is_err());
194        assert!(validate_path("..\\..\\..\\windows\\system32", Some(base.path()), false).is_err());
195        assert!(validate_path("/tmp/../etc/passwd", Some(base.path()), false).is_err());
196    }
197    
198    #[test]
199    fn test_safe_relative_paths_allowed() {
200        let base = TempDir::new().unwrap();
201        
202        // Safe relative paths should be allowed (even if they don't exist yet)
203        // These are typical write operations creating new files
204        let result1 = validate_path("src/main.rs", Some(base.path()), true); // write
205        let result2 = validate_path("./build/output.txt", Some(base.path()), true); // write
206        let result3 = validate_path("config.json", Some(base.path()), true); // write
207        
208        // For write operations, relative paths should be allowed
209        assert!(result1.is_ok(), "Relative path 'src/main.rs' should be allowed for write");
210        assert!(result2.is_ok(), "Relative path './build/output.txt' should be allowed for write");
211        assert!(result3.is_ok(), "Relative path 'config.json' should be allowed for write");
212        
213        // For read operations, if file doesn't exist, should fail gracefully
214        // But if user wants to read a non-existing file, that's their choice
215        // We just block dangerous paths
216        let result4 = validate_path("newfile.txt", Some(base.path()), false);
217        // This should be ok since it's a safe relative path
218        assert!(result4.is_ok(), "Safe relative path should be allowed even for read");
219    }
220    
221    #[test]
222    fn test_absolute_paths_handling() {
223        let base = TempDir::new().unwrap();
224        
225        // Create an actual file in temp dir
226        let temp_file = base.path().join("test.txt");
227        std::fs::write(&temp_file, "test content").unwrap();
228        
229        // Absolute path within temp dir should be allowed for existing file
230        assert!(validate_path(temp_file.to_str().unwrap(), Some(base.path()), false).is_ok(),
231            "Absolute path within base should be allowed for existing files");
232        
233        // Critical system files should always be blocked for writes (even without base)
234        assert!(validate_path("/etc/passwd", None, true).is_err(),
235            "Critical system files should be blocked for writes even without base dir");
236        
237        // Test absolute path outside base directory (platform-specific)
238        #[cfg(unix)]
239        {
240            // On Unix, "/tmp" is typically outside a project's temp directory
241            let outside_path = "/var/outside.txt";
242            let result = validate_path(outside_path, Some(base.path()), true);
243            assert!(result.is_err(), 
244                "Absolute path '{}' outside base should be rejected for write", outside_path);
245        }
246        
247        #[cfg(windows)]
248        {
249            // On Windows, test with Windows-specific path
250            let outside_path = "C:\\Windows\\outside.txt";
251            let result = validate_path(outside_path, Some(base.path()), true);
252            assert!(result.is_err(), 
253                "Absolute path '{}' outside base should be rejected for write", outside_path);
254        }
255    }
256    
257    #[test]
258    fn test_critical_system_files_blocked() {
259        // Critical system files should be blocked for writes (even without base dir)
260        assert!(validate_path("/etc/passwd", None, true).is_err(),
261            "Should block /etc/passwd for write");
262        assert!(validate_path("/etc/shadow", None, true).is_err(),
263            "Should block /etc/shadow for write");
264        assert!(validate_path("/etc/sudoers", None, true).is_err(),
265            "Should block /etc/sudoers for write");
266        
267        // For reads, system files should be allowed (user's responsibility)
268        // We document this in security guidelines
269        assert!(validate_path("/etc/passwd", None, false).is_ok(),
270            "Reading /etc/passwd should be allowed (documented risk)");
271        assert!(validate_path("/etc/hosts", None, false).is_ok(),
272            "Reading /etc/hosts should be allowed");
273    }
274    
275    #[test]
276    fn test_path_length_limit() {
277        // Very long path should be rejected
278        let long_path = "a".repeat(MAX_PATH_LENGTH + 1);
279        assert!(validate_path(&long_path, None, false).is_err(),
280            "Path exceeding MAX_PATH_LENGTH should be rejected");
281        
282        // Normal length should be fine (even if relative)
283        let normal_path = "src/main.rs";
284        assert!(validate_path(normal_path, None, false).is_ok(),
285            "Normal length relative path should be allowed");
286        
287        // Absolute normal length path should also be fine
288        let abs_path = "/tmp/test.txt";
289        assert!(validate_path(abs_path, None, false).is_ok(),
290            "Normal length absolute path should be allowed for read");
291    }
292    
293    #[test]
294    fn test_content_size_validation() {
295        // Small content should be fine
296        let small = "Hello, world!";
297        assert!(validate_content_size(small).is_ok());
298        
299        // Large content should be rejected
300        let large = "x".repeat(MAX_FILE_SIZE + 1);
301        assert!(validate_content_size(&large).is_err());
302        
303        // Exactly at limit should be fine
304        let exact = "x".repeat(MAX_FILE_SIZE);
305        assert!(validate_content_size(&exact).is_ok());
306    }
307    
308    #[test]
309    fn test_empty_path_blocked() {
310        let base = TempDir::new().unwrap();
311        
312        // Empty path should be rejected
313        assert!(validate_path("", Some(base.path()), false).is_err());
314        assert!(validate_path("   ", Some(base.path()), false).is_err());
315    }
316    
317    #[test]
318    fn test_symlink_escape_blocked() {
319        let base = TempDir::new().unwrap();
320        let outside = TempDir::new().unwrap();
321        
322        // Create symlink pointing outside base
323        let link = base.path().join("escape_link");
324        #[cfg(unix)]
325        std::os::unix::fs::symlink(outside.path(), &link).ok();
326        #[cfg(windows)]
327        std::os::windows::fs::symlink_file(outside.path(), &link).ok();
328        
329        // Trying to access through symlink should be blocked
330        // (canonicalize will resolve it to outside path)
331        if link.exists() {
332            let result = validate_path("escape_link", Some(base.path()), true);
333            // Should fail if we properly check canonical path vs base
334            assert!(result.is_err() || result.unwrap().starts_with(base.path()));
335        }
336    }
337}