ddex_builder/security/
path_validator.rs

1//! Cross-Platform Path Validation Module
2//!
3//! This module provides robust, platform-agnostic path validation that prevents:
4//! - Directory traversal attacks (../, ..\, encoded variants)
5//! - Absolute path access (/etc/passwd, C:\Windows\System32)
6//! - UNC path attacks (\\server\share)
7//! - URL-encoded traversal (%2e%2e/, %252e%252e/)
8//! - Null byte injection (file.xml%00.txt)
9//! - Symlink attacks and canonicalization issues
10//! - Unicode normalization attacks
11//!
12//! The validator works identically across Windows, Linux, and macOS by:
13//! - Normalizing all paths to forward slashes internally
14//! - Using platform-specific canonicalization when available
15//! - Implementing whitelist-based validation
16//! - Resolving symlinks and checking final destinations
17
18use crate::error::BuildError;
19use once_cell::sync::Lazy;
20use regex::Regex;
21use std::collections::HashSet;
22use std::fs;
23use std::path::{Component, Path, PathBuf};
24
25/// Maximum allowed path length (cross-platform safe)
26const MAX_PATH_LENGTH: usize = 260; // Windows MAX_PATH limit
27
28/// Maximum path components depth
29const MAX_PATH_DEPTH: usize = 32;
30
31/// Directory traversal pattern detection
32static DIRECTORY_TRAVERSAL: Lazy<Regex> =
33    Lazy::new(|| Regex::new(r"(?i)(\.\./|\.\.\x5c|/\.\./|\x5c\.\.\x5c)").unwrap());
34
35/// URL-encoded traversal pattern detection
36static ENCODED_TRAVERSAL: Lazy<Regex> =
37    Lazy::new(|| Regex::new(r"(?i)(%2e%2e%2f|%2e%2e%5c|%252e%252e%252f|%252e%252e%255c)").unwrap());
38
39/// Absolute path detection (Unix/Windows)
40static ABSOLUTE_PATH: Lazy<Regex> =
41    Lazy::new(|| Regex::new(r"(?i)(^[a-zA-Z]:\x5c|^/|^\x5c\x5c)").unwrap());
42
43/// Null byte and dangerous characters
44static DANGEROUS_CHARS: Lazy<Regex> =
45    Lazy::new(|| Regex::new(r"[\x00-\x1F\x7F-\x9F]|%00").unwrap());
46
47/// Windows reserved filenames
48static WINDOWS_RESERVED: Lazy<Regex> =
49    Lazy::new(|| Regex::new(r"(?i)^(con|prn|aux|nul|com[1-9]|lpt[1-9])(\.|$)").unwrap());
50
51/// Suspicious file extensions
52static SUSPICIOUS_EXTENSIONS: Lazy<Regex> =
53    Lazy::new(|| Regex::new(r"(?i)\.(exe|bat|cmd|com|scr|pif|vbs|js|jar|dll|sys)$").unwrap());
54
55/// Windows reserved device names
56static WINDOWS_RESERVED_NAMES: Lazy<HashSet<&str>> = Lazy::new(|| {
57    let mut set = HashSet::new();
58    set.insert("CON");
59    set.insert("PRN");
60    set.insert("AUX");
61    set.insert("NUL");
62    set.insert("COM1");
63    set.insert("COM2");
64    set.insert("COM3");
65    set.insert("COM4");
66    set.insert("COM5");
67    set.insert("COM6");
68    set.insert("COM7");
69    set.insert("COM8");
70    set.insert("COM9");
71    set.insert("LPT1");
72    set.insert("LPT2");
73    set.insert("LPT3");
74    set.insert("LPT4");
75    set.insert("LPT5");
76    set.insert("LPT6");
77    set.insert("LPT7");
78    set.insert("LPT8");
79    set.insert("LPT9");
80    set
81});
82
83/// Path validation configuration
84#[derive(Debug, Clone)]
85pub struct PathValidationConfig {
86    /// Maximum allowed path length
87    pub max_path_length: usize,
88    /// Maximum path depth (number of components)
89    pub max_path_depth: usize,
90    /// Allowed base directories (whitelist)
91    pub allowed_base_dirs: Vec<PathBuf>,
92    /// Whether to allow relative paths outside allowed directories
93    pub allow_relative_outside_base: bool,
94    /// Whether to resolve and validate symlinks
95    pub validate_symlinks: bool,
96    /// Whether to check for file existence
97    pub check_existence: bool,
98    /// Additional allowed file extensions
99    pub allowed_extensions: HashSet<String>,
100    /// Whether to allow hidden files/directories
101    pub allow_hidden: bool,
102}
103
104impl Default for PathValidationConfig {
105    fn default() -> Self {
106        let mut allowed_extensions = HashSet::new();
107        allowed_extensions.insert("xml".to_string());
108        allowed_extensions.insert("json".to_string());
109        allowed_extensions.insert("txt".to_string());
110        allowed_extensions.insert("csv".to_string());
111
112        Self {
113            max_path_length: MAX_PATH_LENGTH,
114            max_path_depth: MAX_PATH_DEPTH,
115            allowed_base_dirs: vec![
116                PathBuf::from("data"),
117                PathBuf::from("input"),
118                PathBuf::from("output"),
119                PathBuf::from("temp"),
120                PathBuf::from("."),
121            ],
122            allow_relative_outside_base: false,
123            validate_symlinks: true,
124            check_existence: false,
125            allowed_extensions,
126            allow_hidden: false,
127        }
128    }
129}
130
131/// Result of path validation
132#[derive(Debug, Clone)]
133pub struct ValidatedPath {
134    /// Original input path
135    pub original: String,
136    /// Normalized path (forward slashes, no redundant components)
137    pub normalized: PathBuf,
138    /// Canonicalized path (if successful)
139    pub canonical: Option<PathBuf>,
140    /// Whether the path exists
141    pub exists: bool,
142    /// Detected security issues (warnings)
143    pub warnings: Vec<String>,
144}
145
146/// Cross-platform path validator
147#[derive(Debug, Clone)]
148pub struct PathValidator {
149    config: PathValidationConfig,
150}
151
152impl PathValidator {
153    /// Create a new path validator with default configuration
154    pub fn new() -> Self {
155        Self {
156            config: PathValidationConfig::default(),
157        }
158    }
159
160    /// Create a new path validator with custom configuration
161    pub fn with_config(config: PathValidationConfig) -> Self {
162        Self { config }
163    }
164
165    /// Validate a path string for security issues
166    pub fn validate(&self, path_str: &str) -> Result<ValidatedPath, BuildError> {
167        // Input sanitization and initial checks
168        let sanitized_input = self.sanitize_input(path_str)?;
169
170        // Length check
171        if sanitized_input.len() > self.config.max_path_length {
172            return Err(BuildError::InputSanitization(format!(
173                "Path too long: {} > {}",
174                sanitized_input.len(),
175                self.config.max_path_length
176            )));
177        }
178
179        // Detect dangerous patterns
180        self.detect_dangerous_patterns(&sanitized_input)?;
181
182        // Normalize the path
183        let normalized = self.normalize_path(&sanitized_input)?;
184
185        // Validate path components
186        self.validate_components(&normalized)?;
187
188        // Check against whitelist
189        self.validate_against_whitelist(&normalized)?;
190
191        // Handle canonicalization (platform-aware)
192        let (canonical, exists) = self.safe_canonicalize(&normalized);
193
194        // Validate symlinks if enabled
195        if self.config.validate_symlinks {
196            self.validate_symlinks(&normalized, &canonical)?;
197        }
198
199        // Check file existence if required
200        if self.config.check_existence && !exists {
201            return Err(BuildError::InputSanitization(
202                "File does not exist".to_string(),
203            ));
204        }
205
206        let warnings = self.collect_warnings(&sanitized_input, &normalized);
207
208        Ok(ValidatedPath {
209            original: path_str.to_string(),
210            normalized,
211            canonical,
212            exists,
213            warnings,
214        })
215    }
216
217    /// Sanitize input string and detect encoding attacks
218    fn sanitize_input(&self, input: &str) -> Result<String, BuildError> {
219        // Check for null bytes
220        if input.contains('\0') {
221            return Err(BuildError::InputSanitization(
222                "Null byte detected in path".to_string(),
223            ));
224        }
225
226        // Decode URL encoding (but detect double-encoding attacks)
227        let decoded = self.safe_url_decode(input)?;
228
229        // Check for control characters
230        if decoded
231            .chars()
232            .any(|c| c.is_control() && c != '\n' && c != '\r' && c != '\t')
233        {
234            return Err(BuildError::InputSanitization(
235                "Control characters detected in path".to_string(),
236            ));
237        }
238
239        // Normalize Unicode (detect normalization attacks)
240        let normalized = self.normalize_unicode(&decoded)?;
241
242        Ok(normalized)
243    }
244
245    /// Safe URL decoding that detects double-encoding attacks
246    fn safe_url_decode(&self, input: &str) -> Result<String, BuildError> {
247        let first_decode = urlencoding::decode(input)
248            .map_err(|e| BuildError::InputSanitization(format!("URL decode error: {}", e)))?;
249
250        // Check for double-encoding by attempting to decode again
251        let second_decode = urlencoding::decode(&first_decode);
252        if second_decode.is_ok() && second_decode.as_ref().unwrap() != &first_decode {
253            return Err(BuildError::InputSanitization(
254                "Double URL encoding detected (potential attack)".to_string(),
255            ));
256        }
257
258        Ok(first_decode.into_owned())
259    }
260
261    /// Normalize Unicode and detect normalization attacks
262    fn normalize_unicode(&self, input: &str) -> Result<String, BuildError> {
263        use unicode_normalization::UnicodeNormalization;
264
265        let nfc = input.nfc().collect::<String>();
266        let nfd = input.nfd().collect::<String>();
267        let nfkc = input.nfkc().collect::<String>();
268        let nfkd = input.nfkd().collect::<String>();
269
270        // Check if normalization forms are different AND contain dangerous patterns
271        // Only flag as normalization attack if forms differ significantly
272        let forms_identical = nfc == nfd && nfd == nfkc && nfkc == nfkd;
273
274        // If all forms are the same, this is not a normalization attack
275        // Let the normal path validation handle it
276        if !forms_identical {
277            let forms = [&nfc, &nfd, &nfkc, &nfkd];
278            let mut dangerous_forms = Vec::new();
279
280            for (i, form) in forms.iter().enumerate() {
281                if DIRECTORY_TRAVERSAL.is_match(form)
282                    || ENCODED_TRAVERSAL.is_match(form)
283                    || ABSOLUTE_PATH.is_match(form)
284                    || DANGEROUS_CHARS.is_match(form)
285                {
286                    dangerous_forms.push(match i {
287                        0 => "NFC",
288                        1 => "NFD",
289                        2 => "NFKC",
290                        3 => "NFKD",
291                        _ => unreachable!(),
292                    });
293                }
294            }
295
296            if !dangerous_forms.is_empty() {
297                return Err(BuildError::InputSanitization(format!(
298                    "Unicode normalization attack detected in forms: {:?}",
299                    dangerous_forms
300                )));
301            }
302        }
303
304        // Use NFC normalization
305        Ok(nfc)
306    }
307
308    /// Detect dangerous patterns in the path
309    fn detect_dangerous_patterns(&self, path: &str) -> Result<(), BuildError> {
310        // Check directory traversal patterns
311        if DIRECTORY_TRAVERSAL.is_match(path) {
312            return Err(BuildError::InputSanitization(
313                "Directory traversal pattern detected".to_string(),
314            ));
315        }
316
317        // Check encoded traversal patterns
318        if ENCODED_TRAVERSAL.is_match(path) {
319            return Err(BuildError::InputSanitization(
320                "Encoded path traversal detected".to_string(),
321            ));
322        }
323
324        // Check for absolute paths
325        if ABSOLUTE_PATH.is_match(path) {
326            return Err(BuildError::InputSanitization(
327                "Absolute path not allowed".to_string(),
328            ));
329        }
330
331        // Check for dangerous characters
332        if DANGEROUS_CHARS.is_match(path) {
333            return Err(BuildError::InputSanitization(
334                "Dangerous characters detected".to_string(),
335            ));
336        }
337
338        // Check for suspicious filenames
339        if let Some(filename) = Path::new(path).file_name().and_then(|s| s.to_str()) {
340            if WINDOWS_RESERVED.is_match(filename) {
341                return Err(BuildError::InputSanitization(
342                    "Windows reserved filename detected".to_string(),
343                ));
344            }
345
346            // Check Windows reserved names
347            let filename_upper = filename.to_uppercase();
348            let base_name = filename_upper.split('.').next().unwrap_or(&filename_upper);
349            if WINDOWS_RESERVED_NAMES.contains(base_name) {
350                return Err(BuildError::InputSanitization(
351                    "Windows reserved filename detected".to_string(),
352                ));
353            }
354        }
355
356        Ok(())
357    }
358
359    /// Normalize path to use forward slashes and remove redundant components
360    fn normalize_path(&self, path: &str) -> Result<PathBuf, BuildError> {
361        // Convert all separators to forward slashes for consistent processing
362        let normalized_str = path.replace('\\', "/");
363
364        // Split into components and filter out empty and current directory references
365        let components: Vec<&str> = normalized_str
366            .split('/')
367            .filter(|c| !c.is_empty() && *c != ".")
368            .collect();
369
370        // Check depth
371        if components.len() > self.config.max_path_depth {
372            return Err(BuildError::InputSanitization(format!(
373                "Path too deep: {} > {}",
374                components.len(),
375                self.config.max_path_depth
376            )));
377        }
378
379        // Build normalized path
380        let mut normalized = PathBuf::new();
381        for component in components {
382            // Reject parent directory references
383            if component == ".." {
384                return Err(BuildError::InputSanitization(
385                    "Path traversal (..) detected".to_string(),
386                ));
387            }
388
389            normalized.push(component);
390        }
391
392        Ok(normalized)
393    }
394
395    /// Validate individual path components
396    fn validate_components(&self, path: &Path) -> Result<(), BuildError> {
397        for component in path.components() {
398            match component {
399                Component::Normal(name) => {
400                    let name_str = name.to_string_lossy();
401
402                    // Check for hidden files/directories
403                    // Allow current directory reference (.) but not other hidden files
404                    if !self.config.allow_hidden && name_str.starts_with('.') && name_str != "." {
405                        return Err(BuildError::InputSanitization(
406                            "Hidden files/directories not allowed".to_string(),
407                        ));
408                    }
409
410                    // Check component length
411                    if name_str.len() > 255 {
412                        return Err(BuildError::InputSanitization(
413                            "Path component too long".to_string(),
414                        ));
415                    }
416
417                    // Check for dangerous characters in component
418                    if name_str.chars().any(|c| r#"<>:"|?*"#.contains(c)) {
419                        return Err(BuildError::InputSanitization(
420                            "Dangerous characters in path component".to_string(),
421                        ));
422                    }
423                }
424                Component::ParentDir => {
425                    return Err(BuildError::InputSanitization(
426                        "Parent directory traversal detected".to_string(),
427                    ));
428                }
429                Component::RootDir => {
430                    return Err(BuildError::InputSanitization(
431                        "Root directory access not allowed".to_string(),
432                    ));
433                }
434                Component::Prefix(_) => {
435                    return Err(BuildError::InputSanitization(
436                        "Windows path prefix not allowed".to_string(),
437                    ));
438                }
439                Component::CurDir => {
440                    // Already filtered out in normalize_path
441                }
442            }
443        }
444
445        Ok(())
446    }
447
448    /// Validate path against whitelist of allowed base directories
449    fn validate_against_whitelist(&self, path: &Path) -> Result<(), BuildError> {
450        if self.config.allow_relative_outside_base && path.is_relative() {
451            return Ok(()); // Allow any relative path
452        }
453
454        // Check if path starts with any allowed base directory
455        for base_dir in &self.config.allowed_base_dirs {
456            if path.starts_with(base_dir) || path == base_dir {
457                return Ok(());
458            }
459
460            // Special case: if the base directory is "." and the path is a relative file
461            // without a directory component, consider it as being in the current directory
462            if base_dir == Path::new(".")
463                && (path.parent().is_none() || path.parent() == Some(Path::new("")))
464            {
465                return Ok(());
466            }
467
468            // Also check if the path is within the base directory when normalized
469            if let Ok(canonical_base) = base_dir.canonicalize() {
470                if let Ok(canonical_path) = path.canonicalize() {
471                    if canonical_path.starts_with(canonical_base) {
472                        return Ok(());
473                    }
474                }
475            }
476        }
477
478        Err(BuildError::InputSanitization(
479            "Path not within allowed directories".to_string(),
480        ))
481    }
482
483    /// Safely canonicalize path (handle platform differences)
484    fn safe_canonicalize(&self, path: &Path) -> (Option<PathBuf>, bool) {
485        let exists = path.exists();
486
487        // Try to canonicalize if the path exists
488        if exists {
489            match path.canonicalize() {
490                Ok(canonical) => (Some(canonical), true),
491                Err(_) => (None, exists),
492            }
493        } else {
494            // For non-existent paths, try to canonicalize the parent directory
495            if let Some(parent) = path.parent() {
496                if parent.exists() {
497                    match parent.canonicalize() {
498                        Ok(canonical_parent) => {
499                            if let Some(filename) = path.file_name() {
500                                let canonical = canonical_parent.join(filename);
501                                (Some(canonical), false)
502                            } else {
503                                (None, false)
504                            }
505                        }
506                        Err(_) => (None, false),
507                    }
508                } else {
509                    (None, false)
510                }
511            } else {
512                (None, false)
513            }
514        }
515    }
516
517    /// Validate symlinks to prevent symlink attacks
518    fn validate_symlinks(
519        &self,
520        normalized: &Path,
521        canonical: &Option<PathBuf>,
522    ) -> Result<(), BuildError> {
523        if let Some(canonical_path) = canonical {
524            // Check if the canonical path is different from the normalized path
525            // This indicates the presence of symlinks
526            if normalized != canonical_path {
527                // Verify the canonical path is still within allowed directories
528                self.validate_against_whitelist(canonical_path)?;
529
530                // Check if the symlink target contains dangerous patterns
531                if let Some(target_str) = canonical_path.to_str() {
532                    if DIRECTORY_TRAVERSAL.is_match(target_str)
533                        || ENCODED_TRAVERSAL.is_match(target_str)
534                        || ABSOLUTE_PATH.is_match(target_str)
535                        || DANGEROUS_CHARS.is_match(target_str)
536                    {
537                        return Err(BuildError::InputSanitization(
538                            "Symlink target contains dangerous patterns".to_string(),
539                        ));
540                    }
541                }
542
543                // Check for symlink loops (basic detection)
544                if let Ok(metadata) = fs::symlink_metadata(normalized) {
545                    if metadata.file_type().is_symlink() {
546                        // This is a symlink, let's check for potential loops
547                        let mut visited = HashSet::new();
548                        let mut current = normalized.to_path_buf();
549
550                        while current.is_symlink() && visited.len() < 32 {
551                            if visited.contains(&current) {
552                                return Err(BuildError::InputSanitization(
553                                    "Symlink loop detected".to_string(),
554                                ));
555                            }
556                            visited.insert(current.clone());
557
558                            match fs::read_link(&current) {
559                                Ok(target) => {
560                                    current = if target.is_absolute() {
561                                        target
562                                    } else {
563                                        current
564                                            .parent()
565                                            .unwrap_or_else(|| Path::new("."))
566                                            .join(target)
567                                    };
568                                }
569                                Err(_) => break,
570                            }
571                        }
572
573                        if visited.len() >= 32 {
574                            return Err(BuildError::InputSanitization(
575                                "Symlink chain too long (potential loop)".to_string(),
576                            ));
577                        }
578                    }
579                }
580            }
581        }
582
583        Ok(())
584    }
585
586    /// Collect warnings about potentially suspicious but not necessarily dangerous patterns
587    fn collect_warnings(&self, input: &str, normalized: &Path) -> Vec<String> {
588        let mut warnings = Vec::new();
589
590        // Warn about unusual characters
591        if input.chars().any(|c| !c.is_ascii()) {
592            warnings.push("Path contains non-ASCII characters".to_string());
593        }
594
595        // Warn about very long filenames
596        if let Some(filename) = normalized.file_name().and_then(|s| s.to_str()) {
597            if filename.len() > 100 {
598                warnings.push("Very long filename".to_string());
599            }
600        }
601
602        // Warn about deeply nested paths
603        if normalized.components().count() > 8 {
604            warnings.push("Deeply nested path".to_string());
605        }
606
607        // Warn about unusual extensions
608        if let Some(extension) = normalized.extension().and_then(|s| s.to_str()) {
609            if !self
610                .config
611                .allowed_extensions
612                .contains(&extension.to_lowercase())
613            {
614                warnings.push(format!("Unusual file extension: {}", extension));
615            }
616        }
617
618        // Warn about suspicious extensions
619        if let Some(filename) = normalized.file_name().and_then(|s| s.to_str()) {
620            if SUSPICIOUS_EXTENSIONS.is_match(filename) {
621                warnings.push("Suspicious file extension detected".to_string());
622            }
623        }
624
625        warnings
626    }
627
628    /// Get the current configuration
629    pub fn config(&self) -> &PathValidationConfig {
630        &self.config
631    }
632
633    /// Update the configuration
634    pub fn update_config(&mut self, config: PathValidationConfig) {
635        self.config = config;
636    }
637}
638
639impl Default for PathValidator {
640    fn default() -> Self {
641        Self::new()
642    }
643}
644
645#[cfg(test)]
646mod tests {
647    use super::*;
648    use std::path::Path;
649
650    #[test]
651    fn test_basic_path_validation() {
652        let validator = PathValidator::new();
653
654        // Valid paths
655        assert!(validator.validate("data/file.xml").is_ok());
656        assert!(validator.validate("input/subdir/file.json").is_ok());
657
658        assert!(validator.validate("./file.txt").is_ok());
659
660        // Invalid paths
661        assert!(validator.validate("../etc/passwd").is_err());
662        assert!(validator.validate("/etc/passwd").is_err());
663        assert!(validator.validate("C:\\Windows\\System32").is_err());
664    }
665
666    #[test]
667    fn test_dangerous_patterns() {
668        let validator = PathValidator::new();
669
670        let dangerous_paths = vec![
671            "../../../etc/passwd",
672            "..\\..\\..\\windows\\system32\\config\\sam",
673            "/etc/passwd",
674            "/proc/self/environ",
675            "C:\\Windows\\System32",
676            "\\\\server\\share",
677            "file%00.txt",
678            "%2e%2e%2fpasswd",
679            "%252e%252e%252fpasswd",
680        ];
681
682        for path in dangerous_paths {
683            let result = validator.validate(path);
684            assert!(result.is_err(), "Should reject dangerous path: {}", path);
685        }
686    }
687
688    #[test]
689    fn test_url_encoding_attacks() {
690        let validator = PathValidator::new();
691
692        let encoded_attacks = vec![
693            "%2e%2e%2f",       // ../
694            "%2e%2e%5c",       // ..\
695            "%252e%252e%252f", // Double-encoded ../
696            "..%2f",           // ../ mixed
697            "..%00",           // Null byte
698        ];
699
700        for attack in encoded_attacks {
701            assert!(
702                validator.validate(attack).is_err(),
703                "Should block encoded attack: {}",
704                attack
705            );
706        }
707    }
708
709    #[test]
710    fn test_windows_reserved_names() {
711        let validator = PathValidator::new();
712
713        let reserved_names = vec![
714            "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "LPT1", "LPT2", "con.txt", "prn.xml",
715            "aux.json",
716        ];
717
718        for name in reserved_names {
719            assert!(
720                validator.validate(name).is_err(),
721                "Should block reserved name: {}",
722                name
723            );
724        }
725    }
726
727    #[test]
728    fn test_path_normalization() {
729        let validator = PathValidator::new();
730
731        // Test that paths are normalized correctly
732        let result = validator.validate("data//file.xml").unwrap();
733        assert_eq!(result.normalized, Path::new("data/file.xml"));
734
735        let result = validator.validate("data\\subdir\\file.json").unwrap();
736        assert_eq!(result.normalized, Path::new("data/subdir/file.json"));
737
738        let result = validator.validate("./data/./file.txt").unwrap();
739        assert_eq!(result.normalized, Path::new("data/file.txt"));
740    }
741
742    #[test]
743    fn test_whitelist_validation() {
744        let mut config = PathValidationConfig::default();
745        config.allowed_base_dirs = vec![PathBuf::from("allowed")];
746        config.allow_relative_outside_base = false;
747
748        let validator = PathValidator::with_config(config);
749
750        assert!(validator.validate("allowed/file.xml").is_ok());
751        assert!(validator.validate("disallowed/file.xml").is_err());
752    }
753
754    #[test]
755    fn test_unicode_normalization() {
756        let validator = PathValidator::new();
757
758        // Test normal Unicode characters
759        assert!(validator.validate("data/résumé.txt").is_ok());
760
761        // The validator should handle Unicode normalization safely
762        // This is a basic test - more sophisticated Unicode attacks would need specific test cases
763    }
764
765    #[test]
766    fn test_length_limits() {
767        let mut config = PathValidationConfig::default();
768        config.max_path_length = 50;
769        config.max_path_depth = 3;
770
771        let validator = PathValidator::with_config(config);
772
773        // Too long
774        let long_path = "a/".repeat(30);
775        assert!(validator.validate(&long_path).is_err());
776
777        // Too deep
778        let deep_path = "a/b/c/d/e/f/g.txt";
779        assert!(validator.validate(deep_path).is_err());
780    }
781
782    #[test]
783    fn test_file_extensions() {
784        let mut config = PathValidationConfig::default();
785        config.allowed_extensions = vec!["xml".to_string(), "json".to_string()]
786            .into_iter()
787            .collect();
788
789        let validator = PathValidator::with_config(config);
790
791        let result = validator.validate("data/file.xml").unwrap();
792        assert!(result.warnings.is_empty());
793
794        let result = validator.validate("data/file.exe").unwrap();
795        assert!(result.warnings.iter().any(|w| w.contains("extension")));
796    }
797}