Skip to main content

matrixcode_core/
path_validator.rs

1//! Path validation for file operations.
2//!
3//! This module provides security checks for file paths to prevent:
4//! - Path traversal attacks (e.g., `../../../etc/passwd`)
5//! - Accessing files outside project directory
6//! - Writing to critical system files
7
8use anyhow::{Context, Result};
9use std::path::{Path, PathBuf};
10
11/// Maximum allowed file size (10MB)
12pub const MAX_FILE_SIZE: usize = 10 * 1024 * 1024;
13
14/// Maximum allowed path length
15pub const MAX_PATH_LENGTH: usize = 1024;
16
17/// Validate a file path for security.
18///
19/// **Checks performed**:
20/// 1. Path length must not exceed MAX_PATH_LENGTH
21/// 2. No path traversal patterns (..)
22/// 3. Path must be within project base directory (if specified)
23/// 4. Cannot write to critical system files (if writing)
24///
25/// # Arguments
26/// * `path_str` - User-provided path string
27/// * `base_dir` - Project base directory (optional)
28/// * `is_write` - Whether this is a write operation (more strict)
29///
30/// # Returns
31/// * `Ok(PathBuf)` - Validated canonical path
32/// * `Err(...)` - Validation failure with descriptive error
33pub fn validate_path(path_str: &str, base_dir: Option<&Path>, is_write: bool) -> Result<PathBuf> {
34    // 1. Check path length
35    if path_str.len() > MAX_PATH_LENGTH {
36        return Err(anyhow::anyhow!(
37            "Path too long: {} characters (max: {})",
38            path_str.len(),
39            MAX_PATH_LENGTH
40        ));
41    }
42
43    // 2. Check for path traversal
44    if path_str.contains("..") {
45        return Err(anyhow::anyhow!(
46            "Path traversal detected: '{}'. Paths cannot contain '..' for security",
47            path_str
48        ));
49    }
50
51    // 3. Check for empty path
52    if path_str.trim().is_empty() {
53        return Err(anyhow::anyhow!("Path cannot be empty"));
54    }
55
56    // 4. Create PathBuf and resolve
57    let path = PathBuf::from(path_str);
58    let is_relative = path.is_relative(); // Check before potential move
59
60    // 5. Check for critical system files (for write operations)
61    if is_write {
62        check_critical_system_files(&path)?;
63    }
64
65    // 6. Resolve against base directory
66    let resolved_path = if let Some(base) = base_dir {
67        // If path is absolute, check if it's within base
68        if path.is_absolute() {
69            // For absolute paths, we allow them but warn in docs
70            // Users can configure whether to allow absolute paths
71            path
72        } else {
73            // Relative path: resolve against base
74            base.join(&path)
75        }
76    } else {
77        // No base directory specified
78        if path.is_absolute() {
79            path
80        } else {
81            // Relative to current directory
82            std::env::current_dir()
83                .context("Cannot get current directory")?
84                .join(&path)
85        }
86    };
87
88    // 7. Try to canonicalize (for existing paths)
89    // For non-existing paths (write operations), we do a best-effort resolution
90    let canonical = if resolved_path.exists() {
91        resolved_path
92            .canonicalize()
93            .with_context(|| format!("Cannot resolve path: {}", resolved_path.display()))?
94    } else {
95        // Path doesn't exist yet (write operation)
96        // We can't canonicalize, but we can still check security
97        resolved_path.clone()
98    };
99
100    // 8. Check if path is within base directory (if specified)
101    // For non-existing paths, we check the resolved path (before canonicalize)
102    if let Some(base) = base_dir {
103        let base_canonical = if base.exists() {
104            base.canonicalize()
105                .with_context(|| format!("Cannot resolve base directory: {}", base.display()))?
106        } else {
107            base.to_path_buf()
108        };
109
110        // Check if resolved/canonical path is within base
111        // For relative paths that don't have traversal, they're considered safe
112        let is_within_base = if is_relative && !path_str.contains("..") {
113            // Relative path without traversal is always safe
114            true
115        } else {
116            // For absolute paths or paths with potential traversal, check strictly
117            resolved_path.starts_with(&base_canonical) || canonical.starts_with(&base_canonical)
118        };
119
120        if !is_within_base {
121            return Err(anyhow::anyhow!(
122                "Path escapes project directory: '{}'. Resolved path '{}' appears outside '{}'",
123                path_str,
124                resolved_path.display(),
125                base_canonical.display()
126            ));
127        }
128    }
129
130    Ok(canonical)
131}
132
133/// Check if path targets critical system files.
134fn check_critical_system_files(path: &Path) -> Result<()> {
135    // Critical system files that should never be written
136    const CRITICAL_FILES: &[&str] = &[
137        "/etc/passwd",
138        "/etc/shadow",
139        "/etc/sudoers",
140        "/etc/ssh/sshd_config",
141        "/etc/hosts",
142        "/etc/fstab",
143        "/boot/",
144        "/dev/sda",
145        "/dev/hda",
146        "/proc/",
147        "/sys/",
148    ];
149
150    let path_str = path.to_string_lossy();
151
152    for critical in CRITICAL_FILES {
153        if path_str.starts_with(critical) || path_str == *critical {
154            return Err(anyhow::anyhow!(
155                "Cannot write to critical system file: '{}'. This is blocked for security",
156                path.display()
157            ));
158        }
159    }
160
161    Ok(())
162}
163
164/// Validate content size for file writes.
165pub fn validate_content_size(content: &str) -> Result<()> {
166    if content.len() > MAX_FILE_SIZE {
167        return Err(anyhow::anyhow!(
168            "Content too large: {} bytes (max: {} bytes = {} MB). \
169             Split into smaller files or use streaming",
170            content.len(),
171            MAX_FILE_SIZE,
172            MAX_FILE_SIZE / 1_000_000
173        ));
174    }
175
176    Ok(())
177}
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182    use tempfile::TempDir;
183
184    #[test]
185    fn test_path_traversal_blocked() {
186        let base = TempDir::new().unwrap();
187
188        // Path traversal should be blocked
189        assert!(validate_path("../../../etc/passwd", Some(base.path()), false).is_err());
190        assert!(validate_path("..\\..\\..\\windows\\system32", Some(base.path()), false).is_err());
191        assert!(validate_path("/tmp/../etc/passwd", Some(base.path()), false).is_err());
192    }
193
194    #[test]
195    fn test_safe_relative_paths_allowed() {
196        let base = TempDir::new().unwrap();
197
198        // Safe relative paths should be allowed (even if they don't exist yet)
199        // These are typical write operations creating new files
200        let result1 = validate_path("src/main.rs", Some(base.path()), true); // write
201        let result2 = validate_path("./build/output.txt", Some(base.path()), true); // write
202        let result3 = validate_path("config.json", Some(base.path()), true); // write
203
204        // For write operations, relative paths should be allowed
205        assert!(
206            result1.is_ok(),
207            "Relative path 'src/main.rs' should be allowed for write"
208        );
209        assert!(
210            result2.is_ok(),
211            "Relative path './build/output.txt' should be allowed for write"
212        );
213        assert!(
214            result3.is_ok(),
215            "Relative path 'config.json' should be allowed for write"
216        );
217
218        // For read operations, if file doesn't exist, should fail gracefully
219        // But if user wants to read a non-existing file, that's their choice
220        // We just block dangerous paths
221        let result4 = validate_path("newfile.txt", Some(base.path()), false);
222        // This should be ok since it's a safe relative path
223        assert!(
224            result4.is_ok(),
225            "Safe relative path should be allowed even for read"
226        );
227    }
228
229    #[test]
230    fn test_absolute_paths_handling() {
231        let base = TempDir::new().unwrap();
232
233        // Create an actual file in temp dir
234        let temp_file = base.path().join("test.txt");
235        std::fs::write(&temp_file, "test content").unwrap();
236
237        // Absolute path within temp dir should be allowed for existing file
238        assert!(
239            validate_path(temp_file.to_str().unwrap(), Some(base.path()), false).is_ok(),
240            "Absolute path within base should be allowed for existing files"
241        );
242
243        // Critical system files should always be blocked for writes (even without base)
244        assert!(
245            validate_path("/etc/passwd", None, true).is_err(),
246            "Critical system files should be blocked for writes even without base dir"
247        );
248
249        // Test absolute path outside base directory (platform-specific)
250        #[cfg(unix)]
251        {
252            // On Unix, "/tmp" is typically outside a project's temp directory
253            let outside_path = "/var/outside.txt";
254            let result = validate_path(outside_path, Some(base.path()), true);
255            assert!(
256                result.is_err(),
257                "Absolute path '{}' outside base should be rejected for write",
258                outside_path
259            );
260        }
261
262        #[cfg(windows)]
263        {
264            // On Windows, test with Windows-specific path
265            let outside_path = "C:\\Windows\\outside.txt";
266            let result = validate_path(outside_path, Some(base.path()), true);
267            assert!(
268                result.is_err(),
269                "Absolute path '{}' outside base should be rejected for write",
270                outside_path
271            );
272        }
273    }
274
275    #[test]
276    fn test_critical_system_files_blocked() {
277        // Critical system files should be blocked for writes (even without base dir)
278        assert!(
279            validate_path("/etc/passwd", None, true).is_err(),
280            "Should block /etc/passwd for write"
281        );
282        assert!(
283            validate_path("/etc/shadow", None, true).is_err(),
284            "Should block /etc/shadow for write"
285        );
286        assert!(
287            validate_path("/etc/sudoers", None, true).is_err(),
288            "Should block /etc/sudoers for write"
289        );
290
291        // For reads, system files should be allowed (user's responsibility)
292        // We document this in security guidelines
293        assert!(
294            validate_path("/etc/passwd", None, false).is_ok(),
295            "Reading /etc/passwd should be allowed (documented risk)"
296        );
297        assert!(
298            validate_path("/etc/hosts", None, false).is_ok(),
299            "Reading /etc/hosts should be allowed"
300        );
301    }
302
303    #[test]
304    fn test_path_length_limit() {
305        // Very long path should be rejected
306        let long_path = "a".repeat(MAX_PATH_LENGTH + 1);
307        assert!(
308            validate_path(&long_path, None, false).is_err(),
309            "Path exceeding MAX_PATH_LENGTH should be rejected"
310        );
311
312        // Normal length should be fine (even if relative)
313        let normal_path = "src/main.rs";
314        assert!(
315            validate_path(normal_path, None, false).is_ok(),
316            "Normal length relative path should be allowed"
317        );
318
319        // Absolute normal length path should also be fine
320        let abs_path = "/tmp/test.txt";
321        assert!(
322            validate_path(abs_path, None, false).is_ok(),
323            "Normal length absolute path should be allowed for read"
324        );
325    }
326
327    #[test]
328    fn test_content_size_validation() {
329        // Small content should be fine
330        let small = "Hello, world!";
331        assert!(validate_content_size(small).is_ok());
332
333        // Large content should be rejected
334        let large = "x".repeat(MAX_FILE_SIZE + 1);
335        assert!(validate_content_size(&large).is_err());
336
337        // Exactly at limit should be fine
338        let exact = "x".repeat(MAX_FILE_SIZE);
339        assert!(validate_content_size(&exact).is_ok());
340    }
341
342    #[test]
343    fn test_empty_path_blocked() {
344        let base = TempDir::new().unwrap();
345
346        // Empty path should be rejected
347        assert!(validate_path("", Some(base.path()), false).is_err());
348        assert!(validate_path("   ", Some(base.path()), false).is_err());
349    }
350
351    #[test]
352    fn test_symlink_escape_blocked() {
353        let base = TempDir::new().unwrap();
354        let outside = TempDir::new().unwrap();
355
356        // Create symlink pointing outside base
357        let link = base.path().join("escape_link");
358        #[cfg(unix)]
359        std::os::unix::fs::symlink(outside.path(), &link).ok();
360        #[cfg(windows)]
361        std::os::windows::fs::symlink_file(outside.path(), &link).ok();
362
363        // Trying to access through symlink should be blocked
364        // (canonicalize will resolve it to outside path)
365        if link.exists() {
366            let result = validate_path("escape_link", Some(base.path()), true);
367            // Should fail if we properly check canonical path vs base
368            assert!(result.is_err() || result.unwrap().starts_with(base.path()));
369        }
370    }
371}