nameback_core/
detector.rs

1use anyhow::Result;
2use std::fs::File;
3use std::io::Read;
4use std::path::Path;
5
6/// Represents the category of a file based on its type
7#[derive(Debug, Clone, PartialEq)]
8pub enum FileCategory {
9    Image,
10    Document,
11    Audio,
12    Video,
13    Email,
14    Web,
15    Archive,
16    SourceCode,
17    Unknown,
18}
19
20/// Detects the file type using the infer library (pure Rust, cross-platform)
21pub fn detect_file_type(path: &Path) -> Result<FileCategory> {
22    // Read the first 8192 bytes for file type detection
23    let mut file = File::open(path)?;
24    let mut buffer = vec![0u8; 8192];
25    let bytes_read = file.read(&mut buffer)?;
26    buffer.truncate(bytes_read);
27
28    // Use infer to detect file type from magic bytes
29    let category = if let Some(kind) = infer::get(&buffer) {
30        let mime_type = kind.mime_type();
31
32        match mime_type {
33            // Image types
34            s if s.starts_with("image/") => FileCategory::Image,
35
36            // Document types
37            "application/pdf" => FileCategory::Document,
38            s if s.starts_with("application/vnd.openxmlformats-officedocument") => {
39                FileCategory::Document
40            }
41            s if s.starts_with("application/vnd.ms-") => FileCategory::Document,
42            s if s.starts_with("application/vnd.oasis.opendocument") => FileCategory::Document,
43            "application/rtf" => FileCategory::Document,
44            "application/msword" => FileCategory::Document,
45            s if s.starts_with("text/") => FileCategory::Document,
46
47            // Audio types
48            s if s.starts_with("audio/") => FileCategory::Audio,
49
50            // Video types
51            s if s.starts_with("video/") => FileCategory::Video,
52
53            _ => FileCategory::Unknown,
54        }
55    } else {
56        // Fallback to extension-based detection if magic bytes don't match
57        detect_by_extension(path)
58    };
59
60    Ok(category)
61}
62
63/// Fallback file type detection based on extension
64fn detect_by_extension(path: &Path) -> FileCategory {
65    path.extension()
66        .and_then(|ext| ext.to_str())
67        .map(|ext| {
68            let ext_lower = ext.to_lowercase();
69            match ext_lower.as_str() {
70                // Images
71                "jpg" | "jpeg" | "png" | "gif" | "bmp" | "tiff" | "tif" | "webp" | "heic"
72                | "heif" | "ico" | "svg" => FileCategory::Image,
73                // Documents
74                "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" | "odt" | "ods"
75                | "odp" | "rtf" | "txt" | "md" | "markdown" | "csv" => FileCategory::Document,
76                // Email
77                "eml" | "msg" => FileCategory::Email,
78                // Web
79                "html" | "htm" | "mhtml" => FileCategory::Web,
80                // Archive
81                "zip" | "tar" | "gz" | "tgz" | "bz2" | "xz" | "7z" | "rar" => FileCategory::Archive,
82                // Source Code (non-text mime types)
83                "py" | "js" | "ts" | "rs" | "java" | "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" => FileCategory::SourceCode,
84                // Config files as documents
85                "json" | "yaml" | "yml" => FileCategory::Document,
86                // Audio
87                "mp3" | "wav" | "flac" | "aac" | "ogg" | "m4a" | "wma" | "opus" => {
88                    FileCategory::Audio
89                }
90                // Video
91                "mp4" | "avi" | "mkv" | "mov" | "wmv" | "flv" | "webm" | "m4v" | "mpg" | "mpeg" => {
92                    FileCategory::Video
93                }
94                _ => FileCategory::Unknown,
95            }
96        })
97        .unwrap_or(FileCategory::Unknown)
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use std::fs;
104    use std::path::PathBuf;
105    use tempfile::TempDir;
106
107    #[test]
108    fn test_detect_by_extension_images() {
109        assert_eq!(
110            detect_by_extension(&PathBuf::from("photo.jpg")),
111            FileCategory::Image
112        );
113        assert_eq!(
114            detect_by_extension(&PathBuf::from("image.png")),
115            FileCategory::Image
116        );
117        assert_eq!(
118            detect_by_extension(&PathBuf::from("graphic.gif")),
119            FileCategory::Image
120        );
121        assert_eq!(
122            detect_by_extension(&PathBuf::from("photo.HEIC")),
123            FileCategory::Image
124        );
125    }
126
127    #[test]
128    fn test_detect_by_extension_documents() {
129        assert_eq!(
130            detect_by_extension(&PathBuf::from("report.pdf")),
131            FileCategory::Document
132        );
133        assert_eq!(
134            detect_by_extension(&PathBuf::from("doc.docx")),
135            FileCategory::Document
136        );
137        assert_eq!(
138            detect_by_extension(&PathBuf::from("sheet.xlsx")),
139            FileCategory::Document
140        );
141        assert_eq!(
142            detect_by_extension(&PathBuf::from("notes.txt")),
143            FileCategory::Document
144        );
145        assert_eq!(
146            detect_by_extension(&PathBuf::from("config.json")),
147            FileCategory::Document
148        );
149    }
150
151    #[test]
152    fn test_detect_by_extension_audio() {
153        assert_eq!(
154            detect_by_extension(&PathBuf::from("song.mp3")),
155            FileCategory::Audio
156        );
157        assert_eq!(
158            detect_by_extension(&PathBuf::from("audio.wav")),
159            FileCategory::Audio
160        );
161        assert_eq!(
162            detect_by_extension(&PathBuf::from("track.flac")),
163            FileCategory::Audio
164        );
165    }
166
167    #[test]
168    fn test_detect_by_extension_video() {
169        assert_eq!(
170            detect_by_extension(&PathBuf::from("movie.mp4")),
171            FileCategory::Video
172        );
173        assert_eq!(
174            detect_by_extension(&PathBuf::from("clip.avi")),
175            FileCategory::Video
176        );
177        assert_eq!(
178            detect_by_extension(&PathBuf::from("video.mkv")),
179            FileCategory::Video
180        );
181    }
182
183    #[test]
184    fn test_detect_by_extension_email() {
185        assert_eq!(
186            detect_by_extension(&PathBuf::from("message.eml")),
187            FileCategory::Email
188        );
189        assert_eq!(
190            detect_by_extension(&PathBuf::from("email.msg")),
191            FileCategory::Email
192        );
193    }
194
195    #[test]
196    fn test_detect_by_extension_web() {
197        assert_eq!(
198            detect_by_extension(&PathBuf::from("page.html")),
199            FileCategory::Web
200        );
201        assert_eq!(
202            detect_by_extension(&PathBuf::from("site.htm")),
203            FileCategory::Web
204        );
205        assert_eq!(
206            detect_by_extension(&PathBuf::from("archive.mhtml")),
207            FileCategory::Web
208        );
209    }
210
211    #[test]
212    fn test_detect_by_extension_archive() {
213        assert_eq!(
214            detect_by_extension(&PathBuf::from("files.zip")),
215            FileCategory::Archive
216        );
217        assert_eq!(
218            detect_by_extension(&PathBuf::from("backup.tar")),
219            FileCategory::Archive
220        );
221        assert_eq!(
222            detect_by_extension(&PathBuf::from("archive.gz")),
223            FileCategory::Archive
224        );
225    }
226
227    #[test]
228    fn test_detect_by_extension_source_code() {
229        assert_eq!(
230            detect_by_extension(&PathBuf::from("script.py")),
231            FileCategory::SourceCode
232        );
233        assert_eq!(
234            detect_by_extension(&PathBuf::from("app.js")),
235            FileCategory::SourceCode
236        );
237        assert_eq!(
238            detect_by_extension(&PathBuf::from("main.rs")),
239            FileCategory::SourceCode
240        );
241    }
242
243    #[test]
244    fn test_detect_by_extension_unknown() {
245        assert_eq!(
246            detect_by_extension(&PathBuf::from("unknown.xyz")),
247            FileCategory::Unknown
248        );
249        assert_eq!(
250            detect_by_extension(&PathBuf::from("noextension")),
251            FileCategory::Unknown
252        );
253    }
254
255    #[test]
256    fn test_detect_by_extension_case_insensitive() {
257        assert_eq!(
258            detect_by_extension(&PathBuf::from("IMAGE.JPG")),
259            FileCategory::Image
260        );
261        assert_eq!(
262            detect_by_extension(&PathBuf::from("Document.PDF")),
263            FileCategory::Document
264        );
265    }
266
267    #[test]
268    fn test_detect_file_type_with_temp_file() {
269        let temp_dir = TempDir::new().unwrap();
270
271        // Create a simple PNG file (PNG magic bytes)
272        let png_path = temp_dir.path().join("test.png");
273        let png_magic = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
274        fs::write(&png_path, png_magic).unwrap();
275
276        let result = detect_file_type(&png_path).unwrap();
277        assert_eq!(result, FileCategory::Image);
278    }
279
280    #[test]
281    fn test_detect_file_type_jpeg() {
282        let temp_dir = TempDir::new().unwrap();
283
284        // Create a JPEG file (JPEG magic bytes: FF D8 FF)
285        let jpeg_path = temp_dir.path().join("test.jpg");
286        let jpeg_magic = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46];
287        fs::write(&jpeg_path, jpeg_magic).unwrap();
288
289        let result = detect_file_type(&jpeg_path).unwrap();
290        assert_eq!(result, FileCategory::Image);
291    }
292
293    #[test]
294    fn test_detect_file_type_pdf() {
295        let temp_dir = TempDir::new().unwrap();
296
297        // Create a PDF file (PDF magic bytes: %PDF)
298        let pdf_path = temp_dir.path().join("test.pdf");
299        let pdf_magic = b"%PDF-1.4\n".to_vec();
300        fs::write(&pdf_path, pdf_magic).unwrap();
301
302        let result = detect_file_type(&pdf_path).unwrap();
303        assert_eq!(result, FileCategory::Document);
304    }
305
306    #[test]
307    fn test_detect_file_type_falls_back_to_extension() {
308        let temp_dir = TempDir::new().unwrap();
309
310        // Create a text file with no magic bytes
311        let txt_path = temp_dir.path().join("test.txt");
312        fs::write(&txt_path, "Hello, world!").unwrap();
313
314        let result = detect_file_type(&txt_path).unwrap();
315        assert_eq!(result, FileCategory::Document);
316    }
317}