elif_storage/
validation.rs

1//! File validation utilities
2
3use crate::{StorageResult, StorageError};
4use std::collections::HashSet;
5
6/// File validation configuration
7#[derive(Debug, Clone)]
8pub struct ValidationConfig {
9    /// Maximum file size in bytes
10    pub max_file_size: Option<u64>,
11    
12    /// Minimum file size in bytes
13    pub min_file_size: Option<u64>,
14    
15    /// Allowed MIME types (prefixes)
16    pub allowed_mime_types: Option<HashSet<String>>,
17    
18    /// Blocked MIME types (prefixes)
19    pub blocked_mime_types: Option<HashSet<String>>,
20    
21    /// Allowed file extensions
22    pub allowed_extensions: Option<HashSet<String>>,
23    
24    /// Blocked file extensions
25    pub blocked_extensions: Option<HashSet<String>>,
26    
27    /// Enable content validation (magic number checking)
28    pub validate_content: bool,
29    
30    /// Maximum filename length
31    pub max_filename_length: Option<usize>,
32    
33    /// Allow unicode characters in filenames
34    pub allow_unicode_filenames: bool,
35}
36
37impl Default for ValidationConfig {
38    fn default() -> Self {
39        Self {
40            max_file_size: Some(100 * 1024 * 1024), // 100MB
41            min_file_size: Some(1), // At least 1 byte
42            allowed_mime_types: None,
43            blocked_mime_types: Some({
44                let mut blocked = HashSet::new();
45                // Block potentially dangerous file types
46                blocked.insert("application/x-executable".to_string());
47                blocked.insert("application/x-msdownload".to_string());
48                blocked.insert("application/x-dosexec".to_string());
49                blocked
50            }),
51            allowed_extensions: None,
52            blocked_extensions: Some({
53                let mut blocked = HashSet::new();
54                // Block dangerous extensions
55                blocked.insert("exe".to_string());
56                blocked.insert("bat".to_string());
57                blocked.insert("cmd".to_string());
58                blocked.insert("com".to_string());
59                blocked.insert("scr".to_string());
60                blocked.insert("pif".to_string());
61                blocked
62            }),
63            validate_content: true,
64            max_filename_length: Some(255),
65            allow_unicode_filenames: true,
66        }
67    }
68}
69
70impl ValidationConfig {
71    /// Create a new validation configuration
72    pub fn new() -> Self {
73        Self::default()
74    }
75    
76    /// Set maximum file size
77    pub fn max_size(mut self, size: u64) -> Self {
78        self.max_file_size = Some(size);
79        self
80    }
81    
82    /// Remove maximum file size limit
83    pub fn unlimited_size(mut self) -> Self {
84        self.max_file_size = None;
85        self
86    }
87    
88    /// Set minimum file size
89    pub fn min_size(mut self, size: u64) -> Self {
90        self.min_file_size = Some(size);
91        self
92    }
93    
94    /// Allow specific MIME types only
95    pub fn allow_mime_types<I>(mut self, types: I) -> Self
96    where
97        I: IntoIterator<Item = String>,
98    {
99        self.allowed_mime_types = Some(types.into_iter().collect());
100        self
101    }
102    
103    /// Block specific MIME types
104    pub fn block_mime_types<I>(mut self, types: I) -> Self
105    where
106        I: IntoIterator<Item = String>,
107    {
108        self.blocked_mime_types = Some(types.into_iter().collect());
109        self
110    }
111    
112    /// Allow specific file extensions only
113    pub fn allow_extensions<I>(mut self, extensions: I) -> Self
114    where
115        I: IntoIterator<Item = String>,
116    {
117        self.allowed_extensions = Some(extensions.into_iter().collect());
118        self
119    }
120    
121    /// Block specific file extensions
122    pub fn block_extensions<I>(mut self, extensions: I) -> Self
123    where
124        I: IntoIterator<Item = String>,
125    {
126        self.blocked_extensions = Some(extensions.into_iter().collect());
127        self
128    }
129    
130    /// Enable content validation
131    pub fn validate_content(mut self) -> Self {
132        self.validate_content = true;
133        self
134    }
135    
136    /// Disable content validation
137    pub fn skip_content_validation(mut self) -> Self {
138        self.validate_content = false;
139        self
140    }
141    
142    /// Set maximum filename length
143    pub fn max_filename_length(mut self, length: usize) -> Self {
144        self.max_filename_length = Some(length);
145        self
146    }
147    
148    /// Allow unicode characters in filenames
149    pub fn allow_unicode_filenames(mut self) -> Self {
150        self.allow_unicode_filenames = true;
151        self
152    }
153    
154    /// Disallow unicode characters in filenames
155    pub fn ascii_filenames_only(mut self) -> Self {
156        self.allow_unicode_filenames = false;
157        self
158    }
159}
160
161/// File validator
162#[derive(Debug)]
163pub struct FileValidator {
164    config: ValidationConfig,
165}
166
167impl FileValidator {
168    /// Create a new file validator
169    pub fn new(config: ValidationConfig) -> Self {
170        Self { config }
171    }
172    
173    /// Validate file size
174    pub fn validate_size(&self, size: u64) -> StorageResult<()> {
175        if let Some(max_size) = self.config.max_file_size {
176            if size > max_size {
177                return Err(StorageError::FileTooLarge(size, max_size));
178            }
179        }
180        
181        if let Some(min_size) = self.config.min_file_size {
182            if size < min_size {
183                return Err(StorageError::Validation(format!(
184                    "File too small: {} bytes, minimum required: {} bytes",
185                    size, min_size
186                )));
187            }
188        }
189        
190        Ok(())
191    }
192    
193    /// Validate MIME type
194    pub fn validate_mime_type(&self, mime_type: &str) -> StorageResult<()> {
195        // Check blocked types first
196        if let Some(blocked) = &self.config.blocked_mime_types {
197            for blocked_type in blocked {
198                if mime_type.starts_with(blocked_type) {
199                    return Err(StorageError::UnsupportedFileType(format!(
200                        "File type '{}' is blocked", mime_type
201                    )));
202                }
203            }
204        }
205        
206        // Check allowed types
207        if let Some(allowed) = &self.config.allowed_mime_types {
208            let is_allowed = allowed.iter().any(|allowed_type| mime_type.starts_with(allowed_type));
209            if !is_allowed {
210                return Err(StorageError::UnsupportedFileType(format!(
211                    "File type '{}' is not allowed", mime_type
212                )));
213            }
214        }
215        
216        Ok(())
217    }
218    
219    /// Validate file extension
220    pub fn validate_extension(&self, filename: &str) -> StorageResult<()> {
221        let extension = std::path::Path::new(filename)
222            .extension()
223            .and_then(|e| e.to_str())
224            .map(|e| e.to_lowercase())
225            .unwrap_or_default();
226        
227        // Check blocked extensions first
228        if let Some(blocked) = &self.config.blocked_extensions {
229            if blocked.contains(&extension) {
230                return Err(StorageError::UnsupportedFileType(format!(
231                    "File extension '{}' is blocked", extension
232                )));
233            }
234        }
235        
236        // Check allowed extensions
237        if let Some(allowed) = &self.config.allowed_extensions {
238            if !allowed.contains(&extension) {
239                return Err(StorageError::UnsupportedFileType(format!(
240                    "File extension '{}' is not allowed", extension
241                )));
242            }
243        }
244        
245        Ok(())
246    }
247    
248    /// Validate filename
249    pub fn validate_filename(&self, filename: &str) -> StorageResult<()> {
250        // Check filename length
251        if let Some(max_length) = self.config.max_filename_length {
252            if filename.len() > max_length {
253                return Err(StorageError::Validation(format!(
254                    "Filename too long: {} characters, maximum allowed: {}",
255                    filename.len(), max_length
256                )));
257            }
258        }
259        
260        // Check for empty filename
261        if filename.trim().is_empty() {
262            return Err(StorageError::Validation("Filename cannot be empty".to_string()));
263        }
264        
265        // Check for dangerous characters
266        let dangerous_chars = ['<', '>', ':', '"', '|', '?', '*', '\0'];
267        if filename.chars().any(|c| dangerous_chars.contains(&c)) {
268            return Err(StorageError::Validation(format!(
269                "Filename contains dangerous characters: '{}'", filename
270            )));
271        }
272        
273        // Check for unicode if not allowed
274        if !self.config.allow_unicode_filenames && !filename.is_ascii() {
275            return Err(StorageError::Validation(
276                "Unicode characters not allowed in filename".to_string()
277            ));
278        }
279        
280        // Check for reserved names (Windows)
281        let reserved_names = [
282            "CON", "PRN", "AUX", "NUL",
283            "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
284            "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
285        ];
286        
287        let name_part = std::path::Path::new(filename)
288            .file_stem()
289            .and_then(|s| s.to_str())
290            .unwrap_or(filename)
291            .to_uppercase();
292        
293        if reserved_names.contains(&name_part.as_str()) {
294            return Err(StorageError::Validation(format!(
295                "Filename '{}' is reserved", filename
296            )));
297        }
298        
299        Ok(())
300    }
301    
302    /// Validate file content (magic number checking)
303    pub fn validate_content(&self, filename: &str, content: &[u8], declared_mime: &str) -> StorageResult<()> {
304        if !self.config.validate_content {
305            return Ok(());
306        }
307        
308        let detected_mime = detect_mime_from_content(content);
309        
310        // If we can detect the MIME type, ensure it matches the declared type
311        if let Some(detected) = detected_mime {
312            // Allow some flexibility - just check the major type matches
313            let declared_major = declared_mime.split('/').next().unwrap_or(declared_mime);
314            let detected_major = detected.split('/').next().unwrap_or(&detected);
315            
316            if declared_major != "application" && declared_major != detected_major {
317                return Err(StorageError::Validation(format!(
318                    "File content does not match declared type. Declared: '{}', Detected: '{}'",
319                    declared_mime, detected
320                )));
321            }
322        }
323        
324        // Check for dangerous content patterns
325        self.scan_for_dangerous_content(filename, content)?;
326        
327        Ok(())
328    }
329    
330    /// Scan content for dangerous patterns
331    fn scan_for_dangerous_content(&self, filename: &str, content: &[u8]) -> StorageResult<()> {
332        // Check for executable signatures
333        if content.len() >= 2 {
334            match &content[0..2] {
335                [0x4D, 0x5A] => { // PE executable (Windows)
336                    return Err(StorageError::Validation(
337                        "File appears to be a Windows executable".to_string()
338                    ));
339                }
340                [0x7F, 0x45] if content.len() >= 4 && &content[2..4] == [0x4C, 0x46] => { // ELF
341                    return Err(StorageError::Validation(
342                        "File appears to be a Linux executable".to_string()
343                    ));
344                }
345                _ => {}
346            }
347        }
348        
349        // Check for Mach-O executables (macOS)
350        if content.len() >= 4 {
351            match &content[0..4] {
352                [0xFE, 0xED, 0xFA, 0xCE] | 
353                [0xFE, 0xED, 0xFA, 0xCF] | 
354                [0xCE, 0xFA, 0xED, 0xFE] | 
355                [0xCF, 0xFA, 0xED, 0xFE] => {
356                    return Err(StorageError::Validation(
357                        "File appears to be a macOS executable".to_string()
358                    ));
359                }
360                _ => {}
361            }
362        }
363        
364        // Check for script files that might be dangerous
365        if let Ok(text) = std::str::from_utf8(content) {
366            let text_lower = text.to_lowercase();
367            
368            // Check for script shebangs
369            if text.starts_with("#!") {
370                let extension = std::path::Path::new(filename)
371                    .extension()
372                    .and_then(|e| e.to_str())
373                    .unwrap_or("")
374                    .to_lowercase();
375                
376                // Allow some safe script types
377                let safe_scripts = ["sh", "bash", "py", "rb", "js", "pl"];
378                if !safe_scripts.contains(&extension.as_str()) {
379                    return Err(StorageError::Validation(
380                        "Executable script files are not allowed".to_string()
381                    ));
382                }
383            }
384            
385            // Check for dangerous PowerShell or batch commands
386            let dangerous_patterns = [
387                "invoke-expression", "iex", "invoke-webrequest", "iwr",
388                "start-process", "downloadstring", "downloadfile",
389                "@echo off", "cmd.exe", "powershell.exe",
390            ];
391            
392            for pattern in &dangerous_patterns {
393                if text_lower.contains(pattern) {
394                    return Err(StorageError::Validation(format!(
395                        "File contains potentially dangerous content: '{}'", pattern
396                    )));
397                }
398            }
399        }
400        
401        Ok(())
402    }
403    
404    /// Validate complete file
405    pub fn validate_file(&self, filename: &str, content: &[u8], mime_type: &str) -> StorageResult<()> {
406        self.validate_filename(filename)?;
407        self.validate_size(content.len() as u64)?;
408        self.validate_extension(filename)?;
409        self.validate_mime_type(mime_type)?;
410        self.validate_content(filename, content, mime_type)?;
411        
412        Ok(())
413    }
414}
415
416/// Detect MIME type from file content using magic numbers
417fn detect_mime_from_content(content: &[u8]) -> Option<String> {
418    if content.len() < 4 {
419        return None;
420    }
421    
422    match &content[0..4] {
423        [0xFF, 0xD8, 0xFF, _] => Some("image/jpeg".to_string()),
424        [0x89, 0x50, 0x4E, 0x47] => Some("image/png".to_string()),
425        [0x47, 0x49, 0x46, 0x38] => Some("image/gif".to_string()),
426        [0x25, 0x50, 0x44, 0x46] => Some("application/pdf".to_string()),
427        [0x50, 0x4B, 0x03, 0x04] | [0x50, 0x4B, 0x05, 0x06] | [0x50, 0x4B, 0x07, 0x08] => {
428            Some("application/zip".to_string())
429        }
430        _ => {
431            // Check for text content
432            if content.iter().take(1024).all(|&b| b.is_ascii() && (b >= 32 || b == 9 || b == 10 || b == 13)) {
433                Some("text/plain".to_string())
434            } else {
435                None
436            }
437        }
438    }
439}
440
441#[cfg(test)]
442mod tests {
443    use super::*;
444    
445    #[test]
446    fn test_validation_config() {
447        let config = ValidationConfig::new()
448            .max_size(50 * 1024 * 1024)
449            .allow_mime_types(vec!["image/".to_string(), "text/".to_string()])
450            .block_extensions(vec!["exe".to_string(), "bat".to_string()])
451            .ascii_filenames_only();
452        
453        assert_eq!(config.max_file_size, Some(50 * 1024 * 1024));
454        assert!(config.allowed_mime_types.as_ref().unwrap().contains("image/"));
455        assert!(config.blocked_extensions.as_ref().unwrap().contains("exe"));
456        assert!(!config.allow_unicode_filenames);
457    }
458    
459    #[test]
460    fn test_file_validator_size() {
461        let config = ValidationConfig::new().max_size(1000).min_size(10);
462        let validator = FileValidator::new(config);
463        
464        assert!(validator.validate_size(500).is_ok());
465        assert!(validator.validate_size(1000).is_ok());
466        assert!(validator.validate_size(1001).is_err());
467        assert!(validator.validate_size(5).is_err());
468    }
469    
470    #[test]
471    fn test_file_validator_mime_type() {
472        let config = ValidationConfig::new()
473            .allow_mime_types(vec!["image/".to_string(), "text/plain".to_string()]);
474        let validator = FileValidator::new(config);
475        
476        assert!(validator.validate_mime_type("image/jpeg").is_ok());
477        assert!(validator.validate_mime_type("image/png").is_ok());
478        assert!(validator.validate_mime_type("text/plain").is_ok());
479        assert!(validator.validate_mime_type("application/pdf").is_err());
480        assert!(validator.validate_mime_type("text/html").is_err());
481    }
482    
483    #[test]
484    fn test_file_validator_extension() {
485        let config = ValidationConfig::new()
486            .block_extensions(vec!["exe".to_string(), "bat".to_string()]);
487        let validator = FileValidator::new(config);
488        
489        assert!(validator.validate_extension("document.pdf").is_ok());
490        assert!(validator.validate_extension("image.jpg").is_ok());
491        assert!(validator.validate_extension("script.exe").is_err());
492        assert!(validator.validate_extension("script.bat").is_err());
493        assert!(validator.validate_extension("Script.EXE").is_err()); // Case insensitive
494    }
495    
496    #[test]
497    fn test_file_validator_filename() {
498        let config = ValidationConfig::new().max_filename_length(20).ascii_filenames_only();
499        let validator = FileValidator::new(config);
500        
501        assert!(validator.validate_filename("document.pdf").is_ok());
502        assert!(validator.validate_filename("very_long_filename_that_exceeds_limit.txt").is_err());
503        assert!(validator.validate_filename("").is_err());
504        assert!(validator.validate_filename("file<script>.txt").is_err()); // Dangerous char
505        assert!(validator.validate_filename("测试.txt").is_err()); // Unicode not allowed
506        assert!(validator.validate_filename("CON.txt").is_err()); // Reserved name
507    }
508    
509    #[test]
510    fn test_detect_mime_from_content() {
511        // JPEG
512        let jpeg_data = [0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
513        assert_eq!(detect_mime_from_content(&jpeg_data), Some("image/jpeg".to_string()));
514        
515        // PNG
516        let png_data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
517        assert_eq!(detect_mime_from_content(&png_data), Some("image/png".to_string()));
518        
519        // Text
520        let text_data = b"Hello, World!";
521        assert_eq!(detect_mime_from_content(text_data), Some("text/plain".to_string()));
522        
523        // Unknown binary
524        let binary_data = [0x00, 0x01, 0x02, 0x03];
525        assert_eq!(detect_mime_from_content(&binary_data), None);
526    }
527    
528    #[test]
529    fn test_dangerous_content_detection() {
530        let config = ValidationConfig::new();
531        let validator = FileValidator::new(config);
532        
533        // PE executable
534        let pe_data = [0x4D, 0x5A, 0x90, 0x00]; // MZ header
535        assert!(validator.scan_for_dangerous_content("test.txt", &pe_data).is_err());
536        
537        // ELF executable
538        let elf_data = [0x7F, 0x45, 0x4C, 0x46]; // ELF header
539        assert!(validator.scan_for_dangerous_content("test.txt", &elf_data).is_err());
540        
541        // Safe text
542        let safe_text = b"This is just normal text content.";
543        assert!(validator.scan_for_dangerous_content("test.txt", safe_text).is_ok());
544        
545        // Dangerous script content
546        let dangerous_script = b"powershell.exe -Command Invoke-Expression";
547        assert!(validator.scan_for_dangerous_content("test.txt", dangerous_script).is_err());
548    }
549}