1use crate::{StorageResult, StorageError};
4use std::collections::HashSet;
5
6#[derive(Debug, Clone)]
8pub struct ValidationConfig {
9 pub max_file_size: Option<u64>,
11
12 pub min_file_size: Option<u64>,
14
15 pub allowed_mime_types: Option<HashSet<String>>,
17
18 pub blocked_mime_types: Option<HashSet<String>>,
20
21 pub allowed_extensions: Option<HashSet<String>>,
23
24 pub blocked_extensions: Option<HashSet<String>>,
26
27 pub validate_content: bool,
29
30 pub max_filename_length: Option<usize>,
32
33 pub allow_unicode_filenames: bool,
35}
36
37impl Default for ValidationConfig {
38 fn default() -> Self {
39 Self {
40 max_file_size: Some(100 * 1024 * 1024), min_file_size: Some(1), allowed_mime_types: None,
43 blocked_mime_types: Some({
44 let mut blocked = HashSet::new();
45 blocked.insert("application/x-executable".to_string());
47 blocked.insert("application/x-msdownload".to_string());
48 blocked.insert("application/x-dosexec".to_string());
49 blocked
50 }),
51 allowed_extensions: None,
52 blocked_extensions: Some({
53 let mut blocked = HashSet::new();
54 blocked.insert("exe".to_string());
56 blocked.insert("bat".to_string());
57 blocked.insert("cmd".to_string());
58 blocked.insert("com".to_string());
59 blocked.insert("scr".to_string());
60 blocked.insert("pif".to_string());
61 blocked
62 }),
63 validate_content: true,
64 max_filename_length: Some(255),
65 allow_unicode_filenames: true,
66 }
67 }
68}
69
70impl ValidationConfig {
71 pub fn new() -> Self {
73 Self::default()
74 }
75
76 pub fn max_size(mut self, size: u64) -> Self {
78 self.max_file_size = Some(size);
79 self
80 }
81
82 pub fn unlimited_size(mut self) -> Self {
84 self.max_file_size = None;
85 self
86 }
87
88 pub fn min_size(mut self, size: u64) -> Self {
90 self.min_file_size = Some(size);
91 self
92 }
93
94 pub fn allow_mime_types<I>(mut self, types: I) -> Self
96 where
97 I: IntoIterator<Item = String>,
98 {
99 self.allowed_mime_types = Some(types.into_iter().collect());
100 self
101 }
102
103 pub fn block_mime_types<I>(mut self, types: I) -> Self
105 where
106 I: IntoIterator<Item = String>,
107 {
108 self.blocked_mime_types = Some(types.into_iter().collect());
109 self
110 }
111
112 pub fn allow_extensions<I>(mut self, extensions: I) -> Self
114 where
115 I: IntoIterator<Item = String>,
116 {
117 self.allowed_extensions = Some(extensions.into_iter().collect());
118 self
119 }
120
121 pub fn block_extensions<I>(mut self, extensions: I) -> Self
123 where
124 I: IntoIterator<Item = String>,
125 {
126 self.blocked_extensions = Some(extensions.into_iter().collect());
127 self
128 }
129
130 pub fn validate_content(mut self) -> Self {
132 self.validate_content = true;
133 self
134 }
135
136 pub fn skip_content_validation(mut self) -> Self {
138 self.validate_content = false;
139 self
140 }
141
142 pub fn max_filename_length(mut self, length: usize) -> Self {
144 self.max_filename_length = Some(length);
145 self
146 }
147
148 pub fn allow_unicode_filenames(mut self) -> Self {
150 self.allow_unicode_filenames = true;
151 self
152 }
153
154 pub fn ascii_filenames_only(mut self) -> Self {
156 self.allow_unicode_filenames = false;
157 self
158 }
159}
160
161#[derive(Debug)]
163pub struct FileValidator {
164 config: ValidationConfig,
165}
166
167impl FileValidator {
168 pub fn new(config: ValidationConfig) -> Self {
170 Self { config }
171 }
172
173 pub fn validate_size(&self, size: u64) -> StorageResult<()> {
175 if let Some(max_size) = self.config.max_file_size {
176 if size > max_size {
177 return Err(StorageError::FileTooLarge(size, max_size));
178 }
179 }
180
181 if let Some(min_size) = self.config.min_file_size {
182 if size < min_size {
183 return Err(StorageError::Validation(format!(
184 "File too small: {} bytes, minimum required: {} bytes",
185 size, min_size
186 )));
187 }
188 }
189
190 Ok(())
191 }
192
193 pub fn validate_mime_type(&self, mime_type: &str) -> StorageResult<()> {
195 if let Some(blocked) = &self.config.blocked_mime_types {
197 for blocked_type in blocked {
198 if mime_type.starts_with(blocked_type) {
199 return Err(StorageError::UnsupportedFileType(format!(
200 "File type '{}' is blocked", mime_type
201 )));
202 }
203 }
204 }
205
206 if let Some(allowed) = &self.config.allowed_mime_types {
208 let is_allowed = allowed.iter().any(|allowed_type| mime_type.starts_with(allowed_type));
209 if !is_allowed {
210 return Err(StorageError::UnsupportedFileType(format!(
211 "File type '{}' is not allowed", mime_type
212 )));
213 }
214 }
215
216 Ok(())
217 }
218
219 pub fn validate_extension(&self, filename: &str) -> StorageResult<()> {
221 let extension = std::path::Path::new(filename)
222 .extension()
223 .and_then(|e| e.to_str())
224 .map(|e| e.to_lowercase())
225 .unwrap_or_default();
226
227 if let Some(blocked) = &self.config.blocked_extensions {
229 if blocked.contains(&extension) {
230 return Err(StorageError::UnsupportedFileType(format!(
231 "File extension '{}' is blocked", extension
232 )));
233 }
234 }
235
236 if let Some(allowed) = &self.config.allowed_extensions {
238 if !allowed.contains(&extension) {
239 return Err(StorageError::UnsupportedFileType(format!(
240 "File extension '{}' is not allowed", extension
241 )));
242 }
243 }
244
245 Ok(())
246 }
247
248 pub fn validate_filename(&self, filename: &str) -> StorageResult<()> {
250 if let Some(max_length) = self.config.max_filename_length {
252 if filename.len() > max_length {
253 return Err(StorageError::Validation(format!(
254 "Filename too long: {} characters, maximum allowed: {}",
255 filename.len(), max_length
256 )));
257 }
258 }
259
260 if filename.trim().is_empty() {
262 return Err(StorageError::Validation("Filename cannot be empty".to_string()));
263 }
264
265 let dangerous_chars = ['<', '>', ':', '"', '|', '?', '*', '\0'];
267 if filename.chars().any(|c| dangerous_chars.contains(&c)) {
268 return Err(StorageError::Validation(format!(
269 "Filename contains dangerous characters: '{}'", filename
270 )));
271 }
272
273 if !self.config.allow_unicode_filenames && !filename.is_ascii() {
275 return Err(StorageError::Validation(
276 "Unicode characters not allowed in filename".to_string()
277 ));
278 }
279
280 let reserved_names = [
282 "CON", "PRN", "AUX", "NUL",
283 "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
284 "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
285 ];
286
287 let name_part = std::path::Path::new(filename)
288 .file_stem()
289 .and_then(|s| s.to_str())
290 .unwrap_or(filename)
291 .to_uppercase();
292
293 if reserved_names.contains(&name_part.as_str()) {
294 return Err(StorageError::Validation(format!(
295 "Filename '{}' is reserved", filename
296 )));
297 }
298
299 Ok(())
300 }
301
302 pub fn validate_content(&self, filename: &str, content: &[u8], declared_mime: &str) -> StorageResult<()> {
304 if !self.config.validate_content {
305 return Ok(());
306 }
307
308 let detected_mime = detect_mime_from_content(content);
309
310 if let Some(detected) = detected_mime {
312 let declared_major = declared_mime.split('/').next().unwrap_or(declared_mime);
314 let detected_major = detected.split('/').next().unwrap_or(&detected);
315
316 if declared_major != "application" && declared_major != detected_major {
317 return Err(StorageError::Validation(format!(
318 "File content does not match declared type. Declared: '{}', Detected: '{}'",
319 declared_mime, detected
320 )));
321 }
322 }
323
324 self.scan_for_dangerous_content(filename, content)?;
326
327 Ok(())
328 }
329
330 fn scan_for_dangerous_content(&self, filename: &str, content: &[u8]) -> StorageResult<()> {
332 if content.len() >= 2 {
334 match &content[0..2] {
335 [0x4D, 0x5A] => { return Err(StorageError::Validation(
337 "File appears to be a Windows executable".to_string()
338 ));
339 }
340 [0x7F, 0x45] if content.len() >= 4 && &content[2..4] == [0x4C, 0x46] => { return Err(StorageError::Validation(
342 "File appears to be a Linux executable".to_string()
343 ));
344 }
345 _ => {}
346 }
347 }
348
349 if content.len() >= 4 {
351 match &content[0..4] {
352 [0xFE, 0xED, 0xFA, 0xCE] |
353 [0xFE, 0xED, 0xFA, 0xCF] |
354 [0xCE, 0xFA, 0xED, 0xFE] |
355 [0xCF, 0xFA, 0xED, 0xFE] => {
356 return Err(StorageError::Validation(
357 "File appears to be a macOS executable".to_string()
358 ));
359 }
360 _ => {}
361 }
362 }
363
364 if let Ok(text) = std::str::from_utf8(content) {
366 let text_lower = text.to_lowercase();
367
368 if text.starts_with("#!") {
370 let extension = std::path::Path::new(filename)
371 .extension()
372 .and_then(|e| e.to_str())
373 .unwrap_or("")
374 .to_lowercase();
375
376 let safe_scripts = ["sh", "bash", "py", "rb", "js", "pl"];
378 if !safe_scripts.contains(&extension.as_str()) {
379 return Err(StorageError::Validation(
380 "Executable script files are not allowed".to_string()
381 ));
382 }
383 }
384
385 let dangerous_patterns = [
387 "invoke-expression", "iex", "invoke-webrequest", "iwr",
388 "start-process", "downloadstring", "downloadfile",
389 "@echo off", "cmd.exe", "powershell.exe",
390 ];
391
392 for pattern in &dangerous_patterns {
393 if text_lower.contains(pattern) {
394 return Err(StorageError::Validation(format!(
395 "File contains potentially dangerous content: '{}'", pattern
396 )));
397 }
398 }
399 }
400
401 Ok(())
402 }
403
404 pub fn validate_file(&self, filename: &str, content: &[u8], mime_type: &str) -> StorageResult<()> {
406 self.validate_filename(filename)?;
407 self.validate_size(content.len() as u64)?;
408 self.validate_extension(filename)?;
409 self.validate_mime_type(mime_type)?;
410 self.validate_content(filename, content, mime_type)?;
411
412 Ok(())
413 }
414}
415
416fn detect_mime_from_content(content: &[u8]) -> Option<String> {
418 if content.len() < 4 {
419 return None;
420 }
421
422 match &content[0..4] {
423 [0xFF, 0xD8, 0xFF, _] => Some("image/jpeg".to_string()),
424 [0x89, 0x50, 0x4E, 0x47] => Some("image/png".to_string()),
425 [0x47, 0x49, 0x46, 0x38] => Some("image/gif".to_string()),
426 [0x25, 0x50, 0x44, 0x46] => Some("application/pdf".to_string()),
427 [0x50, 0x4B, 0x03, 0x04] | [0x50, 0x4B, 0x05, 0x06] | [0x50, 0x4B, 0x07, 0x08] => {
428 Some("application/zip".to_string())
429 }
430 _ => {
431 if content.iter().take(1024).all(|&b| b.is_ascii() && (b >= 32 || b == 9 || b == 10 || b == 13)) {
433 Some("text/plain".to_string())
434 } else {
435 None
436 }
437 }
438 }
439}
440
441#[cfg(test)]
442mod tests {
443 use super::*;
444
445 #[test]
446 fn test_validation_config() {
447 let config = ValidationConfig::new()
448 .max_size(50 * 1024 * 1024)
449 .allow_mime_types(vec!["image/".to_string(), "text/".to_string()])
450 .block_extensions(vec!["exe".to_string(), "bat".to_string()])
451 .ascii_filenames_only();
452
453 assert_eq!(config.max_file_size, Some(50 * 1024 * 1024));
454 assert!(config.allowed_mime_types.as_ref().unwrap().contains("image/"));
455 assert!(config.blocked_extensions.as_ref().unwrap().contains("exe"));
456 assert!(!config.allow_unicode_filenames);
457 }
458
459 #[test]
460 fn test_file_validator_size() {
461 let config = ValidationConfig::new().max_size(1000).min_size(10);
462 let validator = FileValidator::new(config);
463
464 assert!(validator.validate_size(500).is_ok());
465 assert!(validator.validate_size(1000).is_ok());
466 assert!(validator.validate_size(1001).is_err());
467 assert!(validator.validate_size(5).is_err());
468 }
469
470 #[test]
471 fn test_file_validator_mime_type() {
472 let config = ValidationConfig::new()
473 .allow_mime_types(vec!["image/".to_string(), "text/plain".to_string()]);
474 let validator = FileValidator::new(config);
475
476 assert!(validator.validate_mime_type("image/jpeg").is_ok());
477 assert!(validator.validate_mime_type("image/png").is_ok());
478 assert!(validator.validate_mime_type("text/plain").is_ok());
479 assert!(validator.validate_mime_type("application/pdf").is_err());
480 assert!(validator.validate_mime_type("text/html").is_err());
481 }
482
483 #[test]
484 fn test_file_validator_extension() {
485 let config = ValidationConfig::new()
486 .block_extensions(vec!["exe".to_string(), "bat".to_string()]);
487 let validator = FileValidator::new(config);
488
489 assert!(validator.validate_extension("document.pdf").is_ok());
490 assert!(validator.validate_extension("image.jpg").is_ok());
491 assert!(validator.validate_extension("script.exe").is_err());
492 assert!(validator.validate_extension("script.bat").is_err());
493 assert!(validator.validate_extension("Script.EXE").is_err()); }
495
496 #[test]
497 fn test_file_validator_filename() {
498 let config = ValidationConfig::new().max_filename_length(20).ascii_filenames_only();
499 let validator = FileValidator::new(config);
500
501 assert!(validator.validate_filename("document.pdf").is_ok());
502 assert!(validator.validate_filename("very_long_filename_that_exceeds_limit.txt").is_err());
503 assert!(validator.validate_filename("").is_err());
504 assert!(validator.validate_filename("file<script>.txt").is_err()); assert!(validator.validate_filename("测试.txt").is_err()); assert!(validator.validate_filename("CON.txt").is_err()); }
508
509 #[test]
510 fn test_detect_mime_from_content() {
511 let jpeg_data = [0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
513 assert_eq!(detect_mime_from_content(&jpeg_data), Some("image/jpeg".to_string()));
514
515 let png_data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
517 assert_eq!(detect_mime_from_content(&png_data), Some("image/png".to_string()));
518
519 let text_data = b"Hello, World!";
521 assert_eq!(detect_mime_from_content(text_data), Some("text/plain".to_string()));
522
523 let binary_data = [0x00, 0x01, 0x02, 0x03];
525 assert_eq!(detect_mime_from_content(&binary_data), None);
526 }
527
528 #[test]
529 fn test_dangerous_content_detection() {
530 let config = ValidationConfig::new();
531 let validator = FileValidator::new(config);
532
533 let pe_data = [0x4D, 0x5A, 0x90, 0x00]; assert!(validator.scan_for_dangerous_content("test.txt", &pe_data).is_err());
536
537 let elf_data = [0x7F, 0x45, 0x4C, 0x46]; assert!(validator.scan_for_dangerous_content("test.txt", &elf_data).is_err());
540
541 let safe_text = b"This is just normal text content.";
543 assert!(validator.scan_for_dangerous_content("test.txt", safe_text).is_ok());
544
545 let dangerous_script = b"powershell.exe -Command Invoke-Expression";
547 assert!(validator.scan_for_dangerous_content("test.txt", dangerous_script).is_err());
548 }
549}