nameback_core/
detector.rs

1use anyhow::Result;
2use std::fs::File;
3use std::io::Read;
4use std::path::Path;
5
6/// Represents the category of a file based on its type
7#[derive(Debug, Clone, PartialEq)]
8pub enum FileCategory {
9    Image,
10    Document,
11    Audio,
12    Video,
13    Email,
14    Web,
15    Archive,
16    SourceCode,
17    Unknown,
18}
19
20/// Detects the file type using the infer library (pure Rust, cross-platform)
21pub fn detect_file_type(path: &Path) -> Result<FileCategory> {
22    // Read the first 8192 bytes for file type detection
23    let mut file = File::open(path)?;
24    let mut buffer = vec![0u8; 8192];
25    let bytes_read = file.read(&mut buffer)?;
26    buffer.truncate(bytes_read);
27
28    // Use infer to detect file type from magic bytes
29    let category = if let Some(kind) = infer::get(&buffer) {
30        let mime_type = kind.mime_type();
31
32        match mime_type {
33            // Image types
34            s if s.starts_with("image/") => FileCategory::Image,
35
36            // Document types
37            "application/pdf" => FileCategory::Document,
38            s if s.starts_with("application/vnd.openxmlformats-officedocument") => {
39                FileCategory::Document
40            }
41            s if s.starts_with("application/vnd.ms-") => FileCategory::Document,
42            s if s.starts_with("application/vnd.oasis.opendocument") => FileCategory::Document,
43            "application/rtf" => FileCategory::Document,
44            "application/msword" => FileCategory::Document,
45            s if s.starts_with("text/") => FileCategory::Document,
46
47            // Audio types
48            s if s.starts_with("audio/") => FileCategory::Audio,
49
50            // Video types
51            s if s.starts_with("video/") => FileCategory::Video,
52
53            _ => FileCategory::Unknown,
54        }
55    } else {
56        // Fallback to extension-based detection if magic bytes don't match
57        detect_by_extension(path)
58    };
59
60    Ok(category)
61}
62
63/// Fallback file type detection based on extension
64fn detect_by_extension(path: &Path) -> FileCategory {
65    path.extension()
66        .and_then(|ext| ext.to_str())
67        .map(|ext| {
68            let ext_lower = ext.to_lowercase();
69            match ext_lower.as_str() {
70                // Images
71                "jpg" | "jpeg" | "png" | "gif" | "bmp" | "tiff" | "tif" | "webp" | "heic"
72                | "heif" | "ico" | "svg" => FileCategory::Image,
73                // Documents
74                "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" | "odt" | "ods"
75                | "odp" | "rtf" | "txt" | "md" | "markdown" | "csv" => FileCategory::Document,
76                // Email
77                "eml" | "msg" => FileCategory::Email,
78                // Web
79                "html" | "htm" | "mhtml" => FileCategory::Web,
80                // Archive
81                "zip" | "tar" | "gz" | "tgz" | "bz2" | "xz" | "7z" | "rar" => FileCategory::Archive,
82                // Source Code (non-text mime types)
83                "py" | "js" | "ts" | "rs" | "java" | "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" => FileCategory::SourceCode,
84                // Config files as documents
85                "json" | "yaml" | "yml" => FileCategory::Document,
86                // Audio
87                "mp3" | "wav" | "flac" | "aac" | "ogg" | "m4a" | "wma" | "opus" => {
88                    FileCategory::Audio
89                }
90                // Video
91                "mp4" | "avi" | "mkv" | "mov" | "wmv" | "flv" | "webm" | "m4v" | "mpg" | "mpeg" => {
92                    FileCategory::Video
93                }
94                _ => FileCategory::Unknown,
95            }
96        })
97        .unwrap_or(FileCategory::Unknown)
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use std::path::PathBuf;
104
105    #[test]
106    fn test_detect_file_type_exists() {
107        // Test that the function exists and returns a Result
108        let test_path = PathBuf::from("/dev/null");
109        let _ = detect_file_type(&test_path);
110    }
111}