acton_htmx/storage/
validation.rs

1//! MIME type validation with magic number checking
2//!
3//! This module provides secure MIME type validation that goes beyond trusting
4//! the Content-Type header. It uses magic number detection to verify file types
5//! based on actual file content.
6//!
7//! # Security
8//!
9//! **Never trust client-provided Content-Type headers alone!** Attackers can easily
10//! forge headers to bypass simple MIME type checks. This module uses the `infer` crate
11//! to examine file signatures (magic numbers) to determine the actual file type.
12//!
13//! # Examples
14//!
15//! ```rust
16//! use acton_htmx::storage::{UploadedFile, validation::MimeValidator};
17//!
18//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
19//! let file = UploadedFile::new(
20//!     "image.jpg",
21//!     "image/jpeg", // Client-provided (could be forged!)
22//!     vec![0xFF, 0xD8, 0xFF], // JPEG magic bytes
23//! );
24//!
25//! let validator = MimeValidator::new();
26//!
27//! // Verify the file is actually a JPEG based on content
28//! validator.validate_against_magic(&file, &["image/jpeg"])?;
29//!
30//! // This would fail even if Content-Type says "image/jpeg"
31//! // because the magic bytes don't match
32//! let fake = UploadedFile::new(
33//!     "fake.jpg",
34//!     "image/jpeg", // Lies!
35//!     b"not actually a jpeg".to_vec(),
36//! );
37//! assert!(validator.validate_against_magic(&fake, &["image/jpeg"]).is_err());
38//! # Ok(())
39//! # }
40//! ```
41
42use super::types::{StorageError, StorageResult, UploadedFile};
43
44/// MIME type validator using magic number detection
45///
46/// This validator uses file signatures (magic numbers) to determine the actual
47/// file type, providing security against forged Content-Type headers.
48#[derive(Debug, Clone, Default)]
49pub struct MimeValidator {
50    /// Whether to strictly enforce magic number matches
51    strict: bool,
52}
53
54impl MimeValidator {
55    /// Creates a new MIME validator
56    ///
57    /// # Examples
58    ///
59    /// ```rust
60    /// use acton_htmx::storage::validation::MimeValidator;
61    ///
62    /// let validator = MimeValidator::new();
63    /// ```
64    #[must_use]
65    pub const fn new() -> Self {
66        Self { strict: true }
67    }
68
69    /// Creates a validator in permissive mode
70    ///
71    /// In permissive mode, if the magic number cannot be detected,
72    /// the validator falls back to checking the Content-Type header.
73    /// This is useful for file types without clear magic numbers.
74    ///
75    /// **Warning**: Permissive mode is less secure. Use only when necessary.
76    ///
77    /// # Examples
78    ///
79    /// ```rust
80    /// use acton_htmx::storage::validation::MimeValidator;
81    ///
82    /// let validator = MimeValidator::permissive();
83    /// ```
84    #[must_use]
85    pub const fn permissive() -> Self {
86        Self { strict: false }
87    }
88
89    /// Detects the actual MIME type from file content
90    ///
91    /// Uses magic number detection to determine the file type.
92    /// Returns `None` if the file type cannot be determined.
93    ///
94    /// # Examples
95    ///
96    /// ```rust
97    /// use acton_htmx::storage::{UploadedFile, validation::MimeValidator};
98    ///
99    /// let file = UploadedFile::new(
100    ///     "test.jpg",
101    ///     "application/octet-stream",
102    ///     vec![0xFF, 0xD8, 0xFF], // JPEG magic bytes
103    /// );
104    ///
105    /// let validator = MimeValidator::new();
106    /// let detected = validator.detect_mime(&file);
107    /// assert_eq!(detected, Some("image/jpeg"));
108    /// ```
109    #[must_use]
110    pub fn detect_mime(&self, file: &UploadedFile) -> Option<&'static str> {
111        infer::get(&file.data).map(|kind| kind.mime_type())
112    }
113
114    /// Validates file against allowed MIME types using magic number detection
115    ///
116    /// This is the most secure validation method as it checks the actual file content
117    /// rather than trusting the Content-Type header.
118    ///
119    /// # Errors
120    ///
121    /// Returns `StorageError::InvalidMimeType` if:
122    /// - The detected type is not in `allowed_types`
123    /// - In strict mode: The file type cannot be detected
124    ///
125    /// # Examples
126    ///
127    /// ```rust
128    /// use acton_htmx::storage::{UploadedFile, validation::MimeValidator};
129    ///
130    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
131    /// let file = UploadedFile::new(
132    ///     "photo.png",
133    ///     "image/png",
134    ///     vec![0x89, 0x50, 0x4E, 0x47], // PNG magic bytes
135    /// );
136    ///
137    /// let validator = MimeValidator::new();
138    /// validator.validate_against_magic(&file, &["image/png", "image/jpeg"])?;
139    /// # Ok(())
140    /// # }
141    /// ```
142    pub fn validate_against_magic(
143        &self,
144        file: &UploadedFile,
145        allowed_types: &[&str],
146    ) -> StorageResult<()> {
147        match self.detect_mime(file) {
148            Some(detected_type) => {
149                if !allowed_types.contains(&detected_type) {
150                    return Err(StorageError::InvalidMimeType {
151                        expected: allowed_types.iter().map(|s| (*s).to_string()).collect(),
152                        actual: detected_type.to_string(),
153                    });
154                }
155                Ok(())
156            }
157            None => {
158                if self.strict {
159                    // In strict mode, inability to detect is an error
160                    Err(StorageError::InvalidMimeType {
161                        expected: allowed_types.iter().map(|s| (*s).to_string()).collect(),
162                        actual: "unknown (could not detect from content)".to_string(),
163                    })
164                } else {
165                    // In permissive mode, fall back to Content-Type header
166                    file.validate_mime(allowed_types)
167                }
168            }
169        }
170    }
171
172    /// Validates that the Content-Type header matches the detected type
173    ///
174    /// This ensures that the client-provided Content-Type header is accurate.
175    /// Useful for detecting mismatches that might indicate malicious uploads.
176    ///
177    /// # Errors
178    ///
179    /// Returns `StorageError::InvalidMimeType` if the header doesn't match detected type
180    ///
181    /// # Examples
182    ///
183    /// ```rust
184    /// use acton_htmx::storage::{UploadedFile, validation::MimeValidator};
185    ///
186    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
187    /// // Honest upload - header matches content
188    /// let honest = UploadedFile::new(
189    ///     "photo.jpg",
190    ///     "image/jpeg",
191    ///     vec![0xFF, 0xD8, 0xFF], // JPEG magic bytes
192    /// );
193    ///
194    /// let validator = MimeValidator::new();
195    /// validator.validate_header_matches_content(&honest)?;
196    ///
197    /// // Dishonest upload - header lies about content
198    /// let dishonest = UploadedFile::new(
199    ///     "malware.jpg", // Claims to be JPEG
200    ///     "image/jpeg",
201    ///     b"#!/bin/sh\nrm -rf /".to_vec(), // But it's a shell script!
202    /// );
203    ///
204    /// assert!(validator.validate_header_matches_content(&dishonest).is_err());
205    /// # Ok(())
206    /// # }
207    /// ```
208    pub fn validate_header_matches_content(&self, file: &UploadedFile) -> StorageResult<()> {
209        match self.detect_mime(file) {
210            Some(detected_type) => {
211                if detected_type != file.content_type {
212                    return Err(StorageError::InvalidMimeType {
213                        expected: vec![file.content_type.clone()],
214                        actual: detected_type.to_string(),
215                    });
216                }
217                Ok(())
218            }
219            None => {
220                // If we can't detect, we can't verify
221                if self.strict {
222                    Err(StorageError::InvalidMimeType {
223                        expected: vec![file.content_type.clone()],
224                        actual: "unknown (could not detect from content)".to_string(),
225                    })
226                } else {
227                    Ok(())
228                }
229            }
230        }
231    }
232
233    /// Checks if the file is an image
234    ///
235    /// # Examples
236    ///
237    /// ```rust
238    /// use acton_htmx::storage::{UploadedFile, validation::MimeValidator};
239    ///
240    /// let image = UploadedFile::new(
241    ///     "photo.png",
242    ///     "image/png",
243    ///     vec![0x89, 0x50, 0x4E, 0x47], // PNG magic bytes
244    /// );
245    ///
246    /// let validator = MimeValidator::new();
247    /// assert!(validator.is_image(&image));
248    ///
249    /// let text = UploadedFile::new(
250    ///     "doc.txt",
251    ///     "text/plain",
252    ///     b"Hello, world!".to_vec(),
253    /// );
254    /// assert!(!validator.is_image(&text));
255    /// ```
256    #[must_use]
257    pub fn is_image(&self, file: &UploadedFile) -> bool {
258        self.detect_mime(file)
259            .is_some_and(|mime| mime.starts_with("image/"))
260    }
261
262    /// Checks if the file is a video
263    ///
264    /// # Examples
265    ///
266    /// ```rust
267    /// use acton_htmx::storage::{UploadedFile, validation::MimeValidator};
268    ///
269    /// let video = UploadedFile::new(
270    ///     "clip.mp4",
271    ///     "video/mp4",
272    ///     vec![0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70], // MP4 magic
273    /// );
274    ///
275    /// let validator = MimeValidator::new();
276    /// assert!(validator.is_video(&video));
277    /// ```
278    #[must_use]
279    pub fn is_video(&self, file: &UploadedFile) -> bool {
280        self.detect_mime(file)
281            .is_some_and(|mime| mime.starts_with("video/"))
282    }
283
284    /// Checks if the file is a document (PDF, Office, etc.)
285    ///
286    /// # Examples
287    ///
288    /// ```rust
289    /// use acton_htmx::storage::{UploadedFile, validation::MimeValidator};
290    ///
291    /// let pdf = UploadedFile::new(
292    ///     "doc.pdf",
293    ///     "application/pdf",
294    ///     vec![0x25, 0x50, 0x44, 0x46], // PDF magic bytes
295    /// );
296    ///
297    /// let validator = MimeValidator::new();
298    /// assert!(validator.is_document(&pdf));
299    /// ```
300    #[must_use]
301    pub fn is_document(&self, file: &UploadedFile) -> bool {
302        const DOCUMENT_TYPES: &[&str] = &[
303            "application/pdf",
304            "application/msword",
305            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
306            "application/vnd.ms-excel",
307            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
308            "application/vnd.ms-powerpoint",
309            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
310        ];
311
312        self.detect_mime(file)
313            .is_some_and(|mime| DOCUMENT_TYPES.contains(&mime))
314    }
315}
316
317#[cfg(test)]
318mod tests {
319    use super::*;
320
321    // Common file magic numbers for testing
322    const JPEG_MAGIC: &[u8] = &[0xFF, 0xD8, 0xFF];
323    const PNG_MAGIC: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
324    const GIF_MAGIC: &[u8] = b"GIF89a";
325    const PDF_MAGIC: &[u8] = b"%PDF-1.4";
326    const ZIP_MAGIC: &[u8] = &[0x50, 0x4B, 0x03, 0x04];
327
328    #[test]
329    fn test_detect_jpeg() {
330        let file = UploadedFile::new("test.jpg", "image/jpeg", JPEG_MAGIC.to_vec());
331        let validator = MimeValidator::new();
332        assert_eq!(validator.detect_mime(&file), Some("image/jpeg"));
333    }
334
335    #[test]
336    fn test_detect_png() {
337        let file = UploadedFile::new("test.png", "image/png", PNG_MAGIC.to_vec());
338        let validator = MimeValidator::new();
339        assert_eq!(validator.detect_mime(&file), Some("image/png"));
340    }
341
342    #[test]
343    fn test_detect_gif() {
344        let file = UploadedFile::new("test.gif", "image/gif", GIF_MAGIC.to_vec());
345        let validator = MimeValidator::new();
346        assert_eq!(validator.detect_mime(&file), Some("image/gif"));
347    }
348
349    #[test]
350    fn test_detect_pdf() {
351        let file = UploadedFile::new("test.pdf", "application/pdf", PDF_MAGIC.to_vec());
352        let validator = MimeValidator::new();
353        assert_eq!(validator.detect_mime(&file), Some("application/pdf"));
354    }
355
356    #[test]
357    fn test_detect_unknown() {
358        let file = UploadedFile::new("test.txt", "text/plain", b"hello".to_vec());
359        let validator = MimeValidator::new();
360        // Text files don't have magic numbers
361        assert_eq!(validator.detect_mime(&file), None);
362    }
363
364    #[test]
365    fn test_validate_against_magic_success() {
366        let file = UploadedFile::new("photo.jpg", "image/jpeg", JPEG_MAGIC.to_vec());
367        let validator = MimeValidator::new();
368        assert!(validator
369            .validate_against_magic(&file, &["image/jpeg", "image/png"])
370            .is_ok());
371    }
372
373    #[test]
374    fn test_validate_against_magic_failure() {
375        let file = UploadedFile::new("photo.jpg", "image/jpeg", JPEG_MAGIC.to_vec());
376        let validator = MimeValidator::new();
377        let result = validator.validate_against_magic(&file, &["image/png", "image/gif"]);
378        assert!(result.is_err());
379        assert!(matches!(
380            result.unwrap_err(),
381            StorageError::InvalidMimeType { .. }
382        ));
383    }
384
385    #[test]
386    fn test_validate_against_magic_strict_unknown() {
387        let file = UploadedFile::new("test.txt", "text/plain", b"hello".to_vec());
388        let validator = MimeValidator::new(); // Strict mode
389        let result = validator.validate_against_magic(&file, &["text/plain"]);
390        assert!(result.is_err()); // Strict mode fails on unknown
391    }
392
393    #[test]
394    fn test_validate_against_magic_permissive_unknown() {
395        let file = UploadedFile::new("test.txt", "text/plain", b"hello".to_vec());
396        let validator = MimeValidator::permissive();
397        let result = validator.validate_against_magic(&file, &["text/plain"]);
398        assert!(result.is_ok()); // Permissive mode falls back to Content-Type
399    }
400
401    #[test]
402    fn test_header_matches_content_honest() {
403        let file = UploadedFile::new("photo.png", "image/png", PNG_MAGIC.to_vec());
404        let validator = MimeValidator::new();
405        assert!(validator.validate_header_matches_content(&file).is_ok());
406    }
407
408    #[test]
409    fn test_header_matches_content_dishonest() {
410        // Claim it's a JPEG but it's actually a PNG
411        let file = UploadedFile::new("fake.jpg", "image/jpeg", PNG_MAGIC.to_vec());
412        let validator = MimeValidator::new();
413        let result = validator.validate_header_matches_content(&file);
414        assert!(result.is_err());
415    }
416
417    #[test]
418    fn test_header_matches_content_malicious() {
419        // Claim it's an image but upload a shell script
420        let file = UploadedFile::new(
421            "malware.jpg",
422            "image/jpeg",
423            b"#!/bin/sh\nrm -rf /".to_vec(),
424        );
425        let validator = MimeValidator::new();
426        let result = validator.validate_header_matches_content(&file);
427        assert!(result.is_err());
428    }
429
430    #[test]
431    fn test_is_image() {
432        let validator = MimeValidator::new();
433
434        let jpeg = UploadedFile::new("photo.jpg", "image/jpeg", JPEG_MAGIC.to_vec());
435        assert!(validator.is_image(&jpeg));
436
437        let png = UploadedFile::new("photo.png", "image/png", PNG_MAGIC.to_vec());
438        assert!(validator.is_image(&png));
439
440        let pdf = UploadedFile::new("doc.pdf", "application/pdf", PDF_MAGIC.to_vec());
441        assert!(!validator.is_image(&pdf));
442    }
443
444    #[test]
445    fn test_is_document() {
446        let validator = MimeValidator::new();
447
448        let pdf = UploadedFile::new("doc.pdf", "application/pdf", PDF_MAGIC.to_vec());
449        assert!(validator.is_document(&pdf));
450
451        let jpeg = UploadedFile::new("photo.jpg", "image/jpeg", JPEG_MAGIC.to_vec());
452        assert!(!validator.is_document(&jpeg));
453    }
454
455    #[test]
456    fn test_forged_extension() {
457        // Attacker renames malware.exe to malware.jpg
458        let file = UploadedFile::new(
459            "malware.jpg",
460            "image/jpeg",
461            ZIP_MAGIC.to_vec(), // ZIP/EXE magic
462        );
463
464        let validator = MimeValidator::new();
465
466        // This should fail because magic number doesn't match claimed type
467        assert!(validator
468            .validate_against_magic(&file, &["image/jpeg"])
469            .is_err());
470    }
471}