ruvector_security/
path.rs

1//! Path validation utilities
2//!
3//! Provides protection against path traversal attacks by validating
4//! that paths stay within allowed directories.
5
6use crate::error::{SecurityError, SecurityResult};
7use std::path::{Path, PathBuf};
8
9/// Path validator that ensures paths stay within allowed directories
10///
11/// # Example
12///
13/// ```rust,no_run
14/// use ruvector_security::PathValidator;
15/// use std::path::PathBuf;
16///
17/// // Create validator with allowed directories
18/// let validator = PathValidator::new(vec![PathBuf::from("/data"), PathBuf::from("/tmp")]);
19///
20/// // Paths within allowed directories are valid
21/// // (Note: these paths must exist for validate() to succeed)
22/// // validator.validate("/data/vectors.db");
23///
24/// // Paths outside allowed directories are rejected
25/// assert!(validator.validate("/etc/passwd").is_err());
26///
27/// // Path traversal attempts are rejected
28/// assert!(validator.validate("/data/../etc/passwd").is_err());
29/// ```
30#[derive(Debug, Clone)]
31pub struct PathValidator {
32    /// Allowed directories (canonicalized)
33    allowed_dirs: Vec<PathBuf>,
34    /// Whether to allow symlinks
35    allow_symlinks: bool,
36    /// Maximum path length
37    max_path_length: usize,
38}
39
40impl PathValidator {
41    /// Maximum allowed path length (default 4096)
42    pub const DEFAULT_MAX_PATH_LENGTH: usize = 4096;
43
44    /// Create a new path validator with allowed directories
45    pub fn new(allowed_dirs: Vec<PathBuf>) -> Self {
46        // Canonicalize allowed directories where possible
47        let allowed_dirs = allowed_dirs
48            .into_iter()
49            .filter_map(|p| {
50                p.canonicalize().ok().or_else(|| {
51                    // If canonicalization fails, try to resolve relative to cwd
52                    std::env::current_dir()
53                        .ok()
54                        .map(|cwd| cwd.join(&p))
55                        .and_then(|p| p.canonicalize().ok())
56                        .or(Some(p))
57                })
58            })
59            .collect();
60
61        Self {
62            allowed_dirs,
63            allow_symlinks: false,
64            max_path_length: Self::DEFAULT_MAX_PATH_LENGTH,
65        }
66    }
67
68    /// Set whether symlinks are allowed
69    pub fn allow_symlinks(mut self, allow: bool) -> Self {
70        self.allow_symlinks = allow;
71        self
72    }
73
74    /// Set maximum path length
75    pub fn max_path_length(mut self, length: usize) -> Self {
76        self.max_path_length = length;
77        self
78    }
79
80    /// Validate a path and return the canonical path if valid
81    ///
82    /// # Errors
83    ///
84    /// Returns an error if:
85    /// - The path contains path traversal sequences (`..`)
86    /// - The path is outside all allowed directories
87    /// - The path is a symlink (unless allowed)
88    /// - The path exceeds maximum length
89    pub fn validate<P: AsRef<Path>>(&self, path: P) -> SecurityResult<PathBuf> {
90        let path = path.as_ref();
91        let path_str = path.to_string_lossy();
92
93        // Check path length
94        if path_str.len() > self.max_path_length {
95            return Err(SecurityError::InvalidPath(path.to_path_buf()));
96        }
97
98        // Check for null bytes (security risk)
99        if path_str.contains('\0') {
100            return Err(SecurityError::InvalidPathCharacters(path.to_path_buf()));
101        }
102
103        // Check for explicit path traversal in string form
104        // This catches attempts like "foo/../../etc/passwd"
105        if path_str.contains("..") {
106            // Additional check: see if normalized path still contains traversal
107            let normalized = self.normalize_path(path)?;
108            // If after normalization it differs significantly, it's suspicious
109            if !self.is_path_safe(&normalized)? {
110                return Err(SecurityError::PathTraversal(path.to_path_buf()));
111            }
112        }
113
114        // Resolve to canonical path
115        let canonical = self.resolve_canonical(path)?;
116
117        // Check symlink if not allowed
118        if !self.allow_symlinks {
119            if let Ok(metadata) = std::fs::symlink_metadata(&canonical) {
120                if metadata.file_type().is_symlink() {
121                    return Err(SecurityError::SymlinkDetected(path.to_path_buf()));
122                }
123            }
124            // Also check the original path for symlinks
125            if let Ok(metadata) = std::fs::symlink_metadata(path) {
126                if metadata.file_type().is_symlink() {
127                    return Err(SecurityError::SymlinkDetected(path.to_path_buf()));
128                }
129            }
130        }
131
132        // Verify path is within allowed directories
133        self.check_allowed(&canonical)?;
134
135        Ok(canonical)
136    }
137
138    /// Validate a path for a new file (may not exist yet)
139    ///
140    /// This validates the parent directory exists and is within allowed paths,
141    /// and that the filename is safe.
142    pub fn validate_new_file<P: AsRef<Path>>(&self, path: P) -> SecurityResult<PathBuf> {
143        let path = path.as_ref();
144        let path_str = path.to_string_lossy();
145
146        // Check path length
147        if path_str.len() > self.max_path_length {
148            return Err(SecurityError::InvalidPath(path.to_path_buf()));
149        }
150
151        // Check for null bytes
152        if path_str.contains('\0') {
153            return Err(SecurityError::InvalidPathCharacters(path.to_path_buf()));
154        }
155
156        // Check for path traversal
157        if path_str.contains("..") {
158            return Err(SecurityError::PathTraversal(path.to_path_buf()));
159        }
160
161        // Get and validate parent directory
162        let parent = path.parent().ok_or_else(|| SecurityError::InvalidPath(path.to_path_buf()))?;
163
164        let canonical_parent = if parent.exists() {
165            parent.canonicalize().map_err(|_| SecurityError::InvalidPath(parent.to_path_buf()))?
166        } else {
167            // For new directories, resolve as much as possible
168            self.resolve_existing_ancestor(parent)?
169        };
170
171        // Verify parent is within allowed directories
172        self.check_allowed(&canonical_parent)?;
173
174        // Get filename and validate it
175        let filename = path
176            .file_name()
177            .ok_or_else(|| SecurityError::InvalidPath(path.to_path_buf()))?;
178
179        let filename_str = filename.to_string_lossy();
180
181        // Check for hidden files starting with dots (optional, can be removed if needed)
182        // Check for dangerous characters in filename
183        if filename_str.contains('/') || filename_str.contains('\\') || filename_str.contains('\0')
184        {
185            return Err(SecurityError::InvalidPathCharacters(path.to_path_buf()));
186        }
187
188        Ok(canonical_parent.join(filename))
189    }
190
191    /// Normalize a path by resolving `.` and `..` components
192    fn normalize_path(&self, path: &Path) -> SecurityResult<PathBuf> {
193        let mut normalized = PathBuf::new();
194
195        for component in path.components() {
196            match component {
197                std::path::Component::ParentDir => {
198                    if !normalized.pop() {
199                        // Can't go above root
200                        return Err(SecurityError::PathTraversal(path.to_path_buf()));
201                    }
202                }
203                std::path::Component::CurDir => {
204                    // Skip `.`
205                }
206                c => {
207                    normalized.push(c);
208                }
209            }
210        }
211
212        Ok(normalized)
213    }
214
215    /// Check if a normalized path is safe (no traversal above allowed roots)
216    fn is_path_safe(&self, normalized: &Path) -> SecurityResult<bool> {
217        // If path exists, canonicalize and check
218        if normalized.exists() {
219            let canonical = normalized
220                .canonicalize()
221                .map_err(|_| SecurityError::InvalidPath(normalized.to_path_buf()))?;
222            return Ok(self.allowed_dirs.iter().any(|dir| canonical.starts_with(dir)));
223        }
224
225        // For non-existent paths, find the nearest existing ancestor
226        let mut current = normalized.to_path_buf();
227        while !current.exists() {
228            if !current.pop() {
229                return Ok(false);
230            }
231        }
232
233        if current.as_os_str().is_empty() {
234            current = std::env::current_dir().map_err(SecurityError::Io)?;
235        }
236
237        let canonical = current
238            .canonicalize()
239            .map_err(|_| SecurityError::InvalidPath(normalized.to_path_buf()))?;
240
241        Ok(self.allowed_dirs.iter().any(|dir| canonical.starts_with(dir)))
242    }
243
244    /// Resolve to canonical path, handling non-existent files
245    fn resolve_canonical(&self, path: &Path) -> SecurityResult<PathBuf> {
246        if path.exists() {
247            path.canonicalize()
248                .map_err(|_| SecurityError::InvalidPath(path.to_path_buf()))
249        } else {
250            // For non-existent paths, canonicalize the parent
251            self.validate_new_file(path)
252        }
253    }
254
255    /// Find the nearest existing ancestor and canonicalize it
256    fn resolve_existing_ancestor(&self, path: &Path) -> SecurityResult<PathBuf> {
257        let mut current = path.to_path_buf();
258
259        while !current.exists() {
260            if !current.pop() {
261                // Reached root without finding existing path
262                return std::env::current_dir().map_err(SecurityError::Io);
263            }
264        }
265
266        if current.as_os_str().is_empty() {
267            current = std::env::current_dir().map_err(SecurityError::Io)?;
268        }
269
270        current
271            .canonicalize()
272            .map_err(|_| SecurityError::InvalidPath(path.to_path_buf()))
273    }
274
275    /// Check if path is within allowed directories
276    fn check_allowed(&self, canonical: &Path) -> SecurityResult<()> {
277        let allowed = self
278            .allowed_dirs
279            .iter()
280            .any(|dir| canonical.starts_with(dir));
281
282        if !allowed {
283            return Err(SecurityError::PathOutsideAllowed {
284                path: canonical.to_path_buf(),
285                allowed: self.allowed_dirs.clone(),
286            });
287        }
288
289        Ok(())
290    }
291}
292
293impl Default for PathValidator {
294    fn default() -> Self {
295        // Default to current working directory
296        let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
297        Self::new(vec![cwd])
298    }
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304    use tempfile::TempDir;
305
306    #[test]
307    fn test_path_traversal_blocked() {
308        let temp = TempDir::new().unwrap();
309        let validator = PathValidator::new(vec![temp.path().to_path_buf()]);
310
311        // Path traversal should be blocked
312        let evil_path = temp.path().join("../../../etc/passwd");
313        assert!(validator.validate(&evil_path).is_err());
314    }
315
316    #[test]
317    fn test_valid_path_allowed() {
318        let temp = TempDir::new().unwrap();
319        let valid_file = temp.path().join("test.db");
320        std::fs::write(&valid_file, "test").unwrap();
321
322        let validator = PathValidator::new(vec![temp.path().to_path_buf()]);
323        assert!(validator.validate(&valid_file).is_ok());
324    }
325
326    #[test]
327    fn test_outside_allowed_blocked() {
328        let temp = TempDir::new().unwrap();
329        let validator = PathValidator::new(vec![temp.path().to_path_buf()]);
330
331        // Absolute path outside allowed directories
332        let outside_path = PathBuf::from("/etc/passwd");
333        assert!(validator.validate(&outside_path).is_err());
334    }
335
336    #[test]
337    fn test_null_bytes_blocked() {
338        let temp = TempDir::new().unwrap();
339        let validator = PathValidator::new(vec![temp.path().to_path_buf()]);
340
341        let evil_path = temp.path().join("test\0.db");
342        assert!(validator.validate(&evil_path).is_err());
343    }
344
345    #[test]
346    fn test_new_file_validation() {
347        let temp = TempDir::new().unwrap();
348        let validator = PathValidator::new(vec![temp.path().to_path_buf()]);
349
350        // New file in valid directory
351        let new_file = temp.path().join("new_vectors.db");
352        assert!(validator.validate_new_file(&new_file).is_ok());
353
354        // New file with traversal
355        let evil_new = temp.path().join("../evil.db");
356        assert!(validator.validate_new_file(&evil_new).is_err());
357    }
358}