Skip to main content

fraiseql_server/files/
validation.rs

1//! File validation with magic bytes detection
2
3use bytes::Bytes;
4
5use crate::files::{
6    config::{FileConfig, parse_size},
7    error::FileError,
8    traits::{FileValidator, ValidatedFile},
9};
10
11/// Default file validator implementation
12pub struct DefaultFileValidator;
13
14impl FileValidator for DefaultFileValidator {
15    fn validate(
16        &self,
17        data: &Bytes,
18        declared_type: &str,
19        filename: &str,
20        config: &FileConfig,
21    ) -> Result<ValidatedFile, FileError> {
22        validate_file(data, declared_type, filename, config)
23    }
24}
25
26/// Validate uploaded file
27pub fn validate_file(
28    data: &Bytes,
29    declared_type: &str,
30    filename: &str,
31    config: &FileConfig,
32) -> Result<ValidatedFile, FileError> {
33    // Check size
34    let max_size = parse_size(&config.max_size).unwrap_or(10 * 1024 * 1024);
35
36    if data.len() > max_size {
37        return Err(FileError::TooLarge {
38            size: data.len(),
39            max:  max_size,
40        });
41    }
42
43    // Check MIME type is allowed
44    if !config.allowed_types.iter().any(|t| t == declared_type || t == "*/*") {
45        return Err(FileError::InvalidType {
46            got:     declared_type.to_string(),
47            allowed: config.allowed_types.clone(),
48        });
49    }
50
51    // Sanitize filename
52    let sanitized = sanitize_filename(filename)?;
53
54    // Detect content type if magic bytes validation is enabled
55    let detected_type = if config.validate_magic_bytes {
56        let detected = detect_content_type(data);
57        validate_magic_bytes(&detected, declared_type)?;
58        Some(detected)
59    } else {
60        None
61    };
62
63    Ok(ValidatedFile {
64        content_type: declared_type.to_string(),
65        sanitized_filename: sanitized,
66        size: data.len(),
67        detected_type,
68    })
69}
70
71/// Detect content type from magic bytes
72pub fn detect_content_type(data: &Bytes) -> String {
73    infer::get(data)
74        .map(|t| t.mime_type().to_string())
75        .unwrap_or_else(|| "application/octet-stream".to_string())
76}
77
78/// Validate file content matches declared MIME type
79fn validate_magic_bytes(detected: &str, declared: &str) -> Result<(), FileError> {
80    // Allow some flexibility in MIME type matching
81    if !mime_types_compatible(detected, declared) {
82        return Err(FileError::MimeMismatch {
83            declared: declared.to_string(),
84            detected: detected.to_string(),
85        });
86    }
87
88    Ok(())
89}
90
91fn mime_types_compatible(detected: &str, declared: &str) -> bool {
92    // Exact match
93    if detected == declared {
94        return true;
95    }
96
97    // Common equivalents
98    let equivalents = [
99        ("image/jpeg", "image/jpg"),
100        ("text/plain", "application/octet-stream"),
101    ];
102
103    for (a, b) in equivalents {
104        if (detected == a && declared == b) || (detected == b && declared == a) {
105            return true;
106        }
107    }
108
109    // Same major type (e.g., image/*)
110    let detected_major = detected.split('/').next().unwrap_or("");
111    let declared_major = declared.split('/').next().unwrap_or("");
112
113    // For images, allow any image type if major matches
114    if detected_major == "image" && declared_major == "image" {
115        return true;
116    }
117
118    false
119}
120
121/// Sanitize filename to prevent path traversal and other attacks
122pub fn sanitize_filename(filename: &str) -> Result<String, FileError> {
123    // Remove path components (prevent ../../../etc/passwd)
124    let filename = filename.rsplit(['/', '\\']).next().unwrap_or(filename);
125
126    // Empty filename after removing path
127    if filename.is_empty() || filename == "." || filename == ".." {
128        return Err(FileError::InvalidFilename {
129            reason: "Filename cannot be empty or path component".into(),
130        });
131    }
132
133    // Remove null bytes (C string terminator attack)
134    let filename = filename.replace('\0', "");
135
136    // Limit length
137    if filename.len() > 255 {
138        return Err(FileError::InvalidFilename {
139            reason: "Filename too long (max 255 characters)".into(),
140        });
141    }
142
143    // Replace dangerous characters but preserve extension
144    let sanitized: String = filename
145        .chars()
146        .enumerate()
147        .map(|(i, c)| {
148            match c {
149                // Allow alphanumeric
150                'a'..='z' | 'A'..='Z' | '0'..='9' => c,
151                // Allow dot (for extension) but not as first character
152                '.' if i > 0 => c,
153                // Allow hyphen and underscore
154                '-' | '_' => c,
155                // Replace everything else with underscore
156                _ => '_',
157            }
158        })
159        .collect();
160
161    // Ensure we have a valid filename
162    if sanitized.is_empty() || sanitized.chars().all(|c| c == '_') {
163        return Err(FileError::InvalidFilename {
164            reason: "Filename contains no valid characters".into(),
165        });
166    }
167
168    Ok(sanitized)
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174
175    #[test]
176    fn test_mime_compatibility() {
177        assert!(mime_types_compatible("image/jpeg", "image/jpeg"));
178        assert!(mime_types_compatible("image/jpeg", "image/jpg"));
179        assert!(mime_types_compatible("image/png", "image/webp")); // Same major
180        assert!(!mime_types_compatible("image/jpeg", "application/pdf"));
181    }
182
183    #[test]
184    fn test_sanitize_filename() {
185        assert_eq!(sanitize_filename("photo.jpg").unwrap(), "photo.jpg");
186        assert_eq!(sanitize_filename("my-file_2024.pdf").unwrap(), "my-file_2024.pdf");
187
188        // Path traversal
189        let result = sanitize_filename("../../../etc/passwd").unwrap();
190        assert!(!result.contains(".."));
191        assert_eq!(result, "passwd");
192
193        // Dangerous characters
194        let result = sanitize_filename("file<>:\"|?*.jpg").unwrap();
195        assert!(!result.contains('<'));
196        assert!(!result.contains('>'));
197        assert!(!result.contains(':'));
198    }
199
200    #[test]
201    fn test_null_byte_removal() {
202        let result = sanitize_filename("image.jpg\0.exe").unwrap();
203        assert!(!result.contains('\0'));
204    }
205}