path_security/validation.rs
1//! Main validation functions for path security
2
3use anyhow::{bail, Context, Result};
4use std::path::{Path, PathBuf};
5
6use crate::attacks::*;
7use crate::constants::*;
8use crate::encoding::*;
9
10/// Validate and sanitize a file path to prevent path traversal attacks
11///
12/// This function ensures that:
13/// 1. The path doesn't contain ".." sequences (directory traversal)
14/// 2. The path is not absolute (must be relative)
15/// 3. The path resolves to a location within the specified base directory
16/// 4. The path doesn't contain suspicious patterns
17///
18/// # Arguments
19///
20/// * `path` - The relative path to validate
21/// * `base_dir` - The base directory that the path must resolve within
22///
23/// # Returns
24///
25/// Returns the canonical absolute path if validation succeeds, or an error if validation fails.
26///
27/// # Examples
28///
29/// ```rust
30/// use path_security::validate_path;
31/// use std::path::Path;
32/// # use std::fs;
33/// # use tempfile::TempDir;
34///
35/// # fn main() -> anyhow::Result<()> {
36/// # let temp_dir = TempDir::new()?;
37/// # let base_dir = temp_dir.path();
38/// # fs::create_dir(base_dir.join("safe"))?;
39/// // Safe path - allowed
40/// let safe_path = validate_path(Path::new("safe/file.txt"), base_dir)?;
41///
42/// // Dangerous path - rejected
43/// let result = validate_path(Path::new("../etc/passwd"), base_dir);
44/// assert!(result.is_err());
45/// # Ok(())
46/// # }
47/// ```
48pub fn validate_path(path: &Path, base_dir: &Path) -> Result<PathBuf> {
49 // Convert to string for analysis
50 let path_str = path.to_string_lossy().to_string();
51
52 // ========================================================================
53 // PHASE 1: Pre-processing and normalization checks
54 // ========================================================================
55
56 // Normalize and check for whitespace tricks
57 let normalized_path = normalize_and_check(&path_str)?;
58
59 // ========================================================================
60 // PHASE 2: Protocol and URL scheme detection
61 // ========================================================================
62
63 // Check for file:// and other protocol schemes (SSRF prevention)
64 detect_protocol_schemes(&normalized_path)?;
65
66 // ========================================================================
67 // PHASE 3: Encoding attack detection
68 // ========================================================================
69
70 // Detect URL encoding attacks (must come before other checks)
71 detect_url_encoding(&normalized_path)?;
72
73 // Detect UTF-8 overlong encoding
74 detect_overlong_utf8(&normalized_path)?;
75
76 // Detect Unicode encoding tricks
77 detect_unicode_encoding(&normalized_path)?;
78
79 // Detect dangerous Unicode characters
80 detect_dangerous_unicode(&normalized_path)?;
81
82 // ========================================================================
83 // PHASE 4: Path structure validation
84 // ========================================================================
85
86 // Check for absolute paths
87 if path.is_absolute() {
88 bail!("Absolute paths are not allowed: {}", path_str);
89 }
90
91 // Detect path separator manipulation
92 detect_separator_manipulation(&normalized_path)?;
93
94 // Detect advanced traversal patterns
95 detect_advanced_traversal(&normalized_path)?;
96
97 // ========================================================================
98 // PHASE 5: Windows-specific attack detection
99 // ========================================================================
100
101 detect_windows_attacks(&normalized_path)?;
102
103 // ========================================================================
104 // PHASE 6: Basic suspicious pattern checks
105 // ========================================================================
106
107 // Check for suspicious patterns (legacy checks, kept for defense in depth)
108 detect_suspicious_patterns(&normalized_path)?;
109
110 // ========================================================================
111 // PHASE 7: Special path validation
112 // ========================================================================
113
114 // Check against special/sensitive system paths
115 validate_special_paths(&normalized_path)?;
116
117 // ========================================================================
118 // PHASE 8: TOCTOU Prevention and Canonicalization
119 // ========================================================================
120
121 // TOCTOU Prevention: Use atomic operations to prevent race conditions
122 let canonical_path = validate_path_atomic(path, base_dir)?;
123
124 Ok(canonical_path)
125}
126
127/// TOCTOU-safe path validation with atomic operations
128///
129/// This function prevents Time-of-Check-Time-of-Use race conditions by:
130/// 1. Using atomic filesystem operations
131/// 2. Detecting recursive symlinks
132/// 3. Validating path length limits
133/// 4. Enhanced null byte detection
134/// 5. Mixed encoding detection
135fn validate_path_atomic(path: &Path, base_dir: &Path) -> Result<PathBuf> {
136 // ========================================================================
137 // TOCTOU Prevention: Atomic operations
138 // ========================================================================
139
140 // Construct full path by joining with base directory
141 let full_path = base_dir.join(path);
142
143 // Canonicalize base directory first (atomic operation)
144 let canonical_base = base_dir.canonicalize()
145 .context("Failed to canonicalize base directory")?;
146
147 // ========================================================================
148 // Recursive Symlink Detection
149 // ========================================================================
150
151 // Check for recursive symlinks by following the chain
152 let mut visited = std::collections::HashSet::new();
153 let mut current_path = full_path.clone();
154
155 while current_path.is_symlink() {
156 if visited.contains(¤t_path) {
157 bail!("Recursive symlink detected: {}", current_path.display());
158 }
159 visited.insert(current_path.clone());
160
161 current_path = current_path.read_link()
162 .context("Failed to read symlink")?;
163
164 // Prevent infinite loops
165 if visited.len() > MAX_SYMLINK_CHAIN_LENGTH {
166 bail!("Symlink chain too long, possible recursive symlink");
167 }
168 }
169
170 // ========================================================================
171 // Path Length Attack Prevention
172 // ========================================================================
173
174 // Check for extremely long paths that could cause buffer overflows
175 if full_path.to_string_lossy().len() > MAX_PATH_LENGTH {
176 bail!("Path too long: {} characters (max: {})",
177 full_path.to_string_lossy().len(), MAX_PATH_LENGTH);
178 }
179
180 // ========================================================================
181 // Enhanced Null Byte Detection
182 // ========================================================================
183
184 // Check for null bytes anywhere in the path (not just contains)
185 let path_string = full_path.to_string_lossy();
186 let path_bytes = path_string.as_bytes();
187 for (i, &byte) in path_bytes.iter().enumerate() {
188 if byte == 0 {
189 bail!("Null byte detected at position {} in path: {}", i, full_path.display());
190 }
191 }
192
193 // ========================================================================
194 // Mixed Encoding Detection
195 // ========================================================================
196
197 // Detect mixed UTF-8 and UTF-16 encoding attacks
198 let path_str = full_path.to_string_lossy();
199 if detect_mixed_encoding(&path_str) {
200 bail!("Mixed encoding attack detected in path: {}", path_str);
201 }
202
203 // ========================================================================
204 // Atomic Canonicalization
205 // ========================================================================
206
207 // Use atomic canonicalization to prevent TOCTOU
208 let canonical_path = if full_path.exists() {
209 // File exists, canonicalize atomically
210 full_path.canonicalize()
211 .context("Failed to canonicalize existing path")?
212 } else {
213 // File doesn't exist, validate parent directory atomically
214 if let Some(parent) = full_path.parent() {
215 let canonical_parent = parent.canonicalize()
216 .or_else(|_| {
217 // Parent might not exist, construct it from base
218 if let Ok(rel_parent) = parent.strip_prefix(base_dir) {
219 canonical_base.join(rel_parent).canonicalize()
220 } else {
221 Err(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid parent path"))
222 }
223 })
224 .context("Failed to validate parent directory")?;
225
226 // Construct canonical path from validated parent
227 canonical_parent.join(full_path.file_name().unwrap())
228 } else {
229 bail!("Path has no parent directory: {}", full_path.display());
230 }
231 };
232
233 // ========================================================================
234 // Final Security Check
235 // ========================================================================
236
237 // Verify the canonical path is still within the base directory
238 if !canonical_path.starts_with(&canonical_base) {
239 bail!(
240 "Path traversal detected: '{}' resolves outside base directory '{}'",
241 full_path.display(),
242 canonical_base.display()
243 );
244 }
245
246 Ok(canonical_path)
247}
248
249/// Validate a project name for use in file paths
250///
251/// Project names must:
252/// 1. Only contain alphanumeric characters, hyphens, and underscores
253/// 2. Not start or end with a hyphen or underscore
254/// 3. Be between 1 and 64 characters long
255/// 4. Not contain any path separators or special characters
256/// 5. Not be a reserved system name (Windows: CON, PRN, AUX, etc.)
257///
258/// # Arguments
259///
260/// * `name` - The project name to validate
261///
262/// # Returns
263///
264/// Returns the validated name if validation succeeds, or an error if validation fails.
265///
266/// # Examples
267///
268/// ```rust
269/// use path_security::validate_project_name;
270///
271/// # fn main() -> anyhow::Result<()> {
272/// // Valid names
273/// let name = validate_project_name("my-project")?;
274/// let name = validate_project_name("project_123")?;
275///
276/// // Invalid names
277/// assert!(validate_project_name("../etc").is_err());
278/// assert!(validate_project_name("-invalid").is_err());
279/// assert!(validate_project_name("CON").is_err()); // Windows reserved
280/// # Ok(())
281/// # }
282/// ```
283pub fn validate_project_name(name: &str) -> Result<String> {
284 // Check length
285 if name.is_empty() {
286 bail!("Project name cannot be empty");
287 }
288
289 if name.len() > MAX_PROJECT_NAME_LENGTH {
290 bail!("Project name too long: {} characters (max {})", name.len(), MAX_PROJECT_NAME_LENGTH);
291 }
292
293 // Check for valid characters (alphanumeric, hyphen, underscore)
294 if !name.chars().all(|c| c.is_alphanumeric() || c == '-' || c == '_') {
295 bail!("Project name contains invalid characters: {}", name);
296 }
297
298 // Check it doesn't start or end with hyphen/underscore
299 if name.starts_with('-') || name.starts_with('_') || name.ends_with('-') || name.ends_with('_') {
300 bail!("Project name cannot start or end with '-' or '_': {}", name);
301 }
302
303 // Check for reserved names (OS-specific)
304 let name_upper = name.to_uppercase();
305 if WINDOWS_RESERVED_NAMES.contains(&name_upper.as_str()) {
306 bail!("Project name is a reserved system name: {}", name);
307 }
308
309 Ok(name.to_string())
310}
311
312/// Validate a file name for safety
313///
314/// File names must:
315/// 1. Not contain path separators (/ or \)
316/// 2. Not be "." or ".."
317/// 3. Not contain null bytes or other control characters
318/// 4. Be reasonable length (< 255 characters)
319///
320/// # Arguments
321///
322/// * `filename` - The filename to validate
323///
324/// # Returns
325///
326/// Returns the validated filename if validation succeeds, or an error if validation fails.
327///
328/// # Examples
329///
330/// ```rust
331/// use path_security::validate_filename;
332///
333/// # fn main() -> anyhow::Result<()> {
334/// // Valid filenames
335/// let name = validate_filename("document.pdf")?;
336/// let name = validate_filename("report-2024.xlsx")?;
337///
338/// // Invalid filenames
339/// assert!(validate_filename("../etc/passwd").is_err());
340/// assert!(validate_filename(".").is_err());
341/// assert!(validate_filename("..").is_err());
342/// # Ok(())
343/// # }
344/// ```
345pub fn validate_filename(filename: &str) -> Result<String> {
346 if filename.is_empty() {
347 bail!("Filename cannot be empty");
348 }
349
350 if filename.len() > MAX_FILENAME_LENGTH {
351 bail!("Filename too long: {} characters", filename.len());
352 }
353
354 // Run all encoding and attack detection checks
355 normalize_and_check(filename)?;
356 detect_url_encoding(filename)?;
357 detect_overlong_utf8(filename)?;
358 detect_unicode_encoding(filename)?;
359 detect_dangerous_unicode(filename)?;
360 detect_windows_attacks(filename)?;
361
362 // Check for path separators
363 if filename.contains('/') || filename.contains('\\') {
364 bail!("Filename cannot contain path separators: {}", filename);
365 }
366
367 // Check for current/parent directory references
368 if filename == "." || filename == ".." {
369 bail!("Invalid filename: {}", filename);
370 }
371
372 // Check for null bytes
373 if filename.contains('\0') {
374 bail!("Filename contains null byte");
375 }
376
377 // Check for control characters
378 if filename.chars().any(|c| c.is_control()) {
379 bail!("Filename contains control characters: {}", filename);
380 }
381
382 // Check for trailing dots or spaces (Windows exploit)
383 if filename.ends_with('.') || filename.ends_with(' ') {
384 bail!("Filename cannot end with dot or space: {}", filename);
385 }
386
387 // Check for NTFS streams in filename
388 if filename.contains(':') {
389 bail!("Filename cannot contain colon (NTFS stream syntax): {}", filename);
390 }
391
392 Ok(filename.to_string())
393}