path_security/
validation.rs

1//! Main validation functions for path security
2
3use anyhow::{bail, Context, Result};
4use std::path::{Path, PathBuf};
5
6use crate::attacks::*;
7use crate::constants::*;
8use crate::encoding::*;
9
10/// Validate and sanitize a file path to prevent path traversal attacks
11/// 
12/// This function ensures that:
13/// 1. The path doesn't contain ".." sequences (directory traversal)
14/// 2. The path is not absolute (must be relative)
15/// 3. The path resolves to a location within the specified base directory
16/// 4. The path doesn't contain suspicious patterns
17///
18/// # Arguments
19///
20/// * `path` - The relative path to validate
21/// * `base_dir` - The base directory that the path must resolve within
22///
23/// # Returns
24///
25/// Returns the canonical absolute path if validation succeeds, or an error if validation fails.
26///
27/// # Examples
28///
29/// ```rust
30/// use path_security::validate_path;
31/// use std::path::Path;
32/// # use std::fs;
33/// # use tempfile::TempDir;
34///
35/// # fn main() -> anyhow::Result<()> {
36/// # let temp_dir = TempDir::new()?;
37/// # let base_dir = temp_dir.path();
38/// # fs::create_dir(base_dir.join("safe"))?;
39/// // Safe path - allowed
40/// let safe_path = validate_path(Path::new("safe/file.txt"), base_dir)?;
41///
42/// // Dangerous path - rejected
43/// let result = validate_path(Path::new("../etc/passwd"), base_dir);
44/// assert!(result.is_err());
45/// # Ok(())
46/// # }
47/// ```
48pub fn validate_path(path: &Path, base_dir: &Path) -> Result<PathBuf> {
49    // Convert to string for analysis
50    let path_str = path.to_string_lossy().to_string();
51    
52    // ========================================================================
53    // PHASE 1: Pre-processing and normalization checks
54    // ========================================================================
55    
56    // Normalize and check for whitespace tricks
57    let normalized_path = normalize_and_check(&path_str)?;
58    
59    // ========================================================================
60    // PHASE 2: Protocol and URL scheme detection
61    // ========================================================================
62    
63    // Check for file:// and other protocol schemes (SSRF prevention)
64    detect_protocol_schemes(&normalized_path)?;
65    
66    // ========================================================================
67    // PHASE 3: Encoding attack detection
68    // ========================================================================
69    
70    // Detect URL encoding attacks (must come before other checks)
71    detect_url_encoding(&normalized_path)?;
72    
73    // Detect UTF-8 overlong encoding
74    detect_overlong_utf8(&normalized_path)?;
75    
76    // Detect Unicode encoding tricks
77    detect_unicode_encoding(&normalized_path)?;
78    
79    // Detect dangerous Unicode characters
80    detect_dangerous_unicode(&normalized_path)?;
81    
82    // ========================================================================
83    // PHASE 4: Path structure validation
84    // ========================================================================
85    
86    // Check for absolute paths
87    if path.is_absolute() {
88        bail!("Absolute paths are not allowed: {}", path_str);
89    }
90    
91    // Detect path separator manipulation
92    detect_separator_manipulation(&normalized_path)?;
93    
94    // Detect advanced traversal patterns
95    detect_advanced_traversal(&normalized_path)?;
96    
97    // ========================================================================
98    // PHASE 5: Windows-specific attack detection
99    // ========================================================================
100    
101    detect_windows_attacks(&normalized_path)?;
102    
103    // ========================================================================
104    // PHASE 6: Basic suspicious pattern checks
105    // ========================================================================
106    
107    // Check for suspicious patterns (legacy checks, kept for defense in depth)
108    detect_suspicious_patterns(&normalized_path)?;
109    
110    // ========================================================================
111    // PHASE 7: Special path validation
112    // ========================================================================
113    
114    // Check against special/sensitive system paths
115    validate_special_paths(&normalized_path)?;
116    
117    // ========================================================================
118    // PHASE 8: TOCTOU Prevention and Canonicalization
119    // ========================================================================
120    
121    // TOCTOU Prevention: Use atomic operations to prevent race conditions
122    let canonical_path = validate_path_atomic(path, base_dir)?;
123    
124    Ok(canonical_path)
125}
126
127/// TOCTOU-safe path validation with atomic operations
128/// 
129/// This function prevents Time-of-Check-Time-of-Use race conditions by:
130/// 1. Using atomic filesystem operations
131/// 2. Detecting recursive symlinks
132/// 3. Validating path length limits
133/// 4. Enhanced null byte detection
134/// 5. Mixed encoding detection
135fn validate_path_atomic(path: &Path, base_dir: &Path) -> Result<PathBuf> {
136    // ========================================================================
137    // TOCTOU Prevention: Atomic operations
138    // ========================================================================
139    
140    // Construct full path by joining with base directory
141    let full_path = base_dir.join(path);
142    
143    // Canonicalize base directory first (atomic operation)
144    let canonical_base = base_dir.canonicalize()
145        .context("Failed to canonicalize base directory")?;
146    
147    // ========================================================================
148    // Recursive Symlink Detection
149    // ========================================================================
150    
151    // Check for recursive symlinks by following the chain
152    let mut visited = std::collections::HashSet::new();
153    let mut current_path = full_path.clone();
154    
155    while current_path.is_symlink() {
156        if visited.contains(&current_path) {
157            bail!("Recursive symlink detected: {}", current_path.display());
158        }
159        visited.insert(current_path.clone());
160        
161        current_path = current_path.read_link()
162            .context("Failed to read symlink")?;
163        
164        // Prevent infinite loops
165        if visited.len() > MAX_SYMLINK_CHAIN_LENGTH {
166            bail!("Symlink chain too long, possible recursive symlink");
167        }
168    }
169    
170    // ========================================================================
171    // Path Length Attack Prevention
172    // ========================================================================
173    
174    // Check for extremely long paths that could cause buffer overflows
175    if full_path.to_string_lossy().len() > MAX_PATH_LENGTH {
176        bail!("Path too long: {} characters (max: {})", 
177              full_path.to_string_lossy().len(), MAX_PATH_LENGTH);
178    }
179    
180    // ========================================================================
181    // Enhanced Null Byte Detection
182    // ========================================================================
183    
184    // Check for null bytes anywhere in the path (not just contains)
185    let path_string = full_path.to_string_lossy();
186    let path_bytes = path_string.as_bytes();
187    for (i, &byte) in path_bytes.iter().enumerate() {
188        if byte == 0 {
189            bail!("Null byte detected at position {} in path: {}", i, full_path.display());
190        }
191    }
192    
193    // ========================================================================
194    // Mixed Encoding Detection
195    // ========================================================================
196    
197    // Detect mixed UTF-8 and UTF-16 encoding attacks
198    let path_str = full_path.to_string_lossy();
199    if detect_mixed_encoding(&path_str) {
200        bail!("Mixed encoding attack detected in path: {}", path_str);
201    }
202    
203    // ========================================================================
204    // Atomic Canonicalization
205    // ========================================================================
206    
207    // Use atomic canonicalization to prevent TOCTOU
208    let canonical_path = if full_path.exists() {
209        // File exists, canonicalize atomically
210        full_path.canonicalize()
211            .context("Failed to canonicalize existing path")?
212    } else {
213        // File doesn't exist, validate parent directory atomically
214        if let Some(parent) = full_path.parent() {
215            let canonical_parent = parent.canonicalize()
216                .or_else(|_| {
217                    // Parent might not exist, construct it from base
218                    if let Ok(rel_parent) = parent.strip_prefix(base_dir) {
219                        canonical_base.join(rel_parent).canonicalize()
220                    } else {
221                        Err(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid parent path"))
222                    }
223                })
224                .context("Failed to validate parent directory")?;
225            
226            // Construct canonical path from validated parent
227            canonical_parent.join(full_path.file_name().unwrap())
228        } else {
229            bail!("Path has no parent directory: {}", full_path.display());
230        }
231    };
232    
233    // ========================================================================
234    // Final Security Check
235    // ========================================================================
236    
237    // Verify the canonical path is still within the base directory
238    if !canonical_path.starts_with(&canonical_base) {
239        bail!(
240            "Path traversal detected: '{}' resolves outside base directory '{}'",
241            full_path.display(),
242            canonical_base.display()
243        );
244    }
245    
246    Ok(canonical_path)
247}
248
249/// Validate a project name for use in file paths
250/// 
251/// Project names must:
252/// 1. Only contain alphanumeric characters, hyphens, and underscores
253/// 2. Not start or end with a hyphen or underscore
254/// 3. Be between 1 and 64 characters long
255/// 4. Not contain any path separators or special characters
256/// 5. Not be a reserved system name (Windows: CON, PRN, AUX, etc.)
257///
258/// # Arguments
259///
260/// * `name` - The project name to validate
261///
262/// # Returns
263///
264/// Returns the validated name if validation succeeds, or an error if validation fails.
265///
266/// # Examples
267///
268/// ```rust
269/// use path_security::validate_project_name;
270///
271/// # fn main() -> anyhow::Result<()> {
272/// // Valid names
273/// let name = validate_project_name("my-project")?;
274/// let name = validate_project_name("project_123")?;
275///
276/// // Invalid names
277/// assert!(validate_project_name("../etc").is_err());
278/// assert!(validate_project_name("-invalid").is_err());
279/// assert!(validate_project_name("CON").is_err()); // Windows reserved
280/// # Ok(())
281/// # }
282/// ```
283pub fn validate_project_name(name: &str) -> Result<String> {
284    // Check length
285    if name.is_empty() {
286        bail!("Project name cannot be empty");
287    }
288    
289    if name.len() > MAX_PROJECT_NAME_LENGTH {
290        bail!("Project name too long: {} characters (max {})", name.len(), MAX_PROJECT_NAME_LENGTH);
291    }
292    
293    // Check for valid characters (alphanumeric, hyphen, underscore)
294    if !name.chars().all(|c| c.is_alphanumeric() || c == '-' || c == '_') {
295        bail!("Project name contains invalid characters: {}", name);
296    }
297    
298    // Check it doesn't start or end with hyphen/underscore
299    if name.starts_with('-') || name.starts_with('_') || name.ends_with('-') || name.ends_with('_') {
300        bail!("Project name cannot start or end with '-' or '_': {}", name);
301    }
302    
303    // Check for reserved names (OS-specific)
304    let name_upper = name.to_uppercase();
305    if WINDOWS_RESERVED_NAMES.contains(&name_upper.as_str()) {
306        bail!("Project name is a reserved system name: {}", name);
307    }
308    
309    Ok(name.to_string())
310}
311
312/// Validate a file name for safety
313/// 
314/// File names must:
315/// 1. Not contain path separators (/ or \)
316/// 2. Not be "." or ".."
317/// 3. Not contain null bytes or other control characters
318/// 4. Be reasonable length (< 255 characters)
319///
320/// # Arguments
321///
322/// * `filename` - The filename to validate
323///
324/// # Returns
325///
326/// Returns the validated filename if validation succeeds, or an error if validation fails.
327///
328/// # Examples
329///
330/// ```rust
331/// use path_security::validate_filename;
332///
333/// # fn main() -> anyhow::Result<()> {
334/// // Valid filenames
335/// let name = validate_filename("document.pdf")?;
336/// let name = validate_filename("report-2024.xlsx")?;
337///
338/// // Invalid filenames
339/// assert!(validate_filename("../etc/passwd").is_err());
340/// assert!(validate_filename(".").is_err());
341/// assert!(validate_filename("..").is_err());
342/// # Ok(())
343/// # }
344/// ```
345pub fn validate_filename(filename: &str) -> Result<String> {
346    if filename.is_empty() {
347        bail!("Filename cannot be empty");
348    }
349    
350    if filename.len() > MAX_FILENAME_LENGTH {
351        bail!("Filename too long: {} characters", filename.len());
352    }
353    
354    // Run all encoding and attack detection checks
355    normalize_and_check(filename)?;
356    detect_url_encoding(filename)?;
357    detect_overlong_utf8(filename)?;
358    detect_unicode_encoding(filename)?;
359    detect_dangerous_unicode(filename)?;
360    detect_windows_attacks(filename)?;
361    
362    // Check for path separators
363    if filename.contains('/') || filename.contains('\\') {
364        bail!("Filename cannot contain path separators: {}", filename);
365    }
366    
367    // Check for current/parent directory references
368    if filename == "." || filename == ".." {
369        bail!("Invalid filename: {}", filename);
370    }
371    
372    // Check for null bytes
373    if filename.contains('\0') {
374        bail!("Filename contains null byte");
375    }
376    
377    // Check for control characters
378    if filename.chars().any(|c| c.is_control()) {
379        bail!("Filename contains control characters: {}", filename);
380    }
381    
382    // Check for trailing dots or spaces (Windows exploit)
383    if filename.ends_with('.') || filename.ends_with(' ') {
384        bail!("Filename cannot end with dot or space: {}", filename);
385    }
386    
387    // Check for NTFS streams in filename
388    if filename.contains(':') {
389        bail!("Filename cannot contain colon (NTFS stream syntax): {}", filename);
390    }
391    
392    Ok(filename.to_string())
393}