nameback_core/
detector.rs1use anyhow::Result;
2use std::fs::File;
3use std::io::Read;
4use std::path::Path;
5
6#[derive(Debug, Clone, PartialEq)]
8pub enum FileCategory {
9 Image,
10 Document,
11 Audio,
12 Video,
13 Email,
14 Web,
15 Archive,
16 SourceCode,
17 Unknown,
18}
19
20pub fn detect_file_type(path: &Path) -> Result<FileCategory> {
22 let mut file = File::open(path)?;
24 let mut buffer = vec![0u8; 8192];
25 let bytes_read = file.read(&mut buffer)?;
26 buffer.truncate(bytes_read);
27
28 let category = if let Some(kind) = infer::get(&buffer) {
30 let mime_type = kind.mime_type();
31
32 match mime_type {
33 s if s.starts_with("image/") => FileCategory::Image,
35
36 "application/pdf" => FileCategory::Document,
38 s if s.starts_with("application/vnd.openxmlformats-officedocument") => {
39 FileCategory::Document
40 }
41 s if s.starts_with("application/vnd.ms-") => FileCategory::Document,
42 s if s.starts_with("application/vnd.oasis.opendocument") => FileCategory::Document,
43 "application/rtf" => FileCategory::Document,
44 "application/msword" => FileCategory::Document,
45 s if s.starts_with("text/") => FileCategory::Document,
46
47 s if s.starts_with("audio/") => FileCategory::Audio,
49
50 s if s.starts_with("video/") => FileCategory::Video,
52
53 _ => FileCategory::Unknown,
54 }
55 } else {
56 detect_by_extension(path)
58 };
59
60 Ok(category)
61}
62
63fn detect_by_extension(path: &Path) -> FileCategory {
65 path.extension()
66 .and_then(|ext| ext.to_str())
67 .map(|ext| {
68 let ext_lower = ext.to_lowercase();
69 match ext_lower.as_str() {
70 "jpg" | "jpeg" | "png" | "gif" | "bmp" | "tiff" | "tif" | "webp" | "heic"
72 | "heif" | "ico" | "svg" => FileCategory::Image,
73 "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" | "odt" | "ods"
75 | "odp" | "rtf" | "txt" | "md" | "markdown" | "csv" => FileCategory::Document,
76 "eml" | "msg" => FileCategory::Email,
78 "html" | "htm" | "mhtml" => FileCategory::Web,
80 "zip" | "tar" | "gz" | "tgz" | "bz2" | "xz" | "7z" | "rar" => FileCategory::Archive,
82 "py" | "js" | "ts" | "rs" | "java" | "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" => FileCategory::SourceCode,
84 "json" | "yaml" | "yml" => FileCategory::Document,
86 "mp3" | "wav" | "flac" | "aac" | "ogg" | "m4a" | "wma" | "opus" => {
88 FileCategory::Audio
89 }
90 "mp4" | "avi" | "mkv" | "mov" | "wmv" | "flv" | "webm" | "m4v" | "mpg" | "mpeg" => {
92 FileCategory::Video
93 }
94 _ => FileCategory::Unknown,
95 }
96 })
97 .unwrap_or(FileCategory::Unknown)
98}
99
100#[cfg(test)]
101mod tests {
102 use super::*;
103 use std::fs;
104 use std::path::PathBuf;
105 use tempfile::TempDir;
106
107 #[test]
108 fn test_detect_by_extension_images() {
109 assert_eq!(
110 detect_by_extension(&PathBuf::from("photo.jpg")),
111 FileCategory::Image
112 );
113 assert_eq!(
114 detect_by_extension(&PathBuf::from("image.png")),
115 FileCategory::Image
116 );
117 assert_eq!(
118 detect_by_extension(&PathBuf::from("graphic.gif")),
119 FileCategory::Image
120 );
121 assert_eq!(
122 detect_by_extension(&PathBuf::from("photo.HEIC")),
123 FileCategory::Image
124 );
125 }
126
127 #[test]
128 fn test_detect_by_extension_documents() {
129 assert_eq!(
130 detect_by_extension(&PathBuf::from("report.pdf")),
131 FileCategory::Document
132 );
133 assert_eq!(
134 detect_by_extension(&PathBuf::from("doc.docx")),
135 FileCategory::Document
136 );
137 assert_eq!(
138 detect_by_extension(&PathBuf::from("sheet.xlsx")),
139 FileCategory::Document
140 );
141 assert_eq!(
142 detect_by_extension(&PathBuf::from("notes.txt")),
143 FileCategory::Document
144 );
145 assert_eq!(
146 detect_by_extension(&PathBuf::from("config.json")),
147 FileCategory::Document
148 );
149 }
150
151 #[test]
152 fn test_detect_by_extension_audio() {
153 assert_eq!(
154 detect_by_extension(&PathBuf::from("song.mp3")),
155 FileCategory::Audio
156 );
157 assert_eq!(
158 detect_by_extension(&PathBuf::from("audio.wav")),
159 FileCategory::Audio
160 );
161 assert_eq!(
162 detect_by_extension(&PathBuf::from("track.flac")),
163 FileCategory::Audio
164 );
165 }
166
167 #[test]
168 fn test_detect_by_extension_video() {
169 assert_eq!(
170 detect_by_extension(&PathBuf::from("movie.mp4")),
171 FileCategory::Video
172 );
173 assert_eq!(
174 detect_by_extension(&PathBuf::from("clip.avi")),
175 FileCategory::Video
176 );
177 assert_eq!(
178 detect_by_extension(&PathBuf::from("video.mkv")),
179 FileCategory::Video
180 );
181 }
182
183 #[test]
184 fn test_detect_by_extension_email() {
185 assert_eq!(
186 detect_by_extension(&PathBuf::from("message.eml")),
187 FileCategory::Email
188 );
189 assert_eq!(
190 detect_by_extension(&PathBuf::from("email.msg")),
191 FileCategory::Email
192 );
193 }
194
195 #[test]
196 fn test_detect_by_extension_web() {
197 assert_eq!(
198 detect_by_extension(&PathBuf::from("page.html")),
199 FileCategory::Web
200 );
201 assert_eq!(
202 detect_by_extension(&PathBuf::from("site.htm")),
203 FileCategory::Web
204 );
205 assert_eq!(
206 detect_by_extension(&PathBuf::from("archive.mhtml")),
207 FileCategory::Web
208 );
209 }
210
211 #[test]
212 fn test_detect_by_extension_archive() {
213 assert_eq!(
214 detect_by_extension(&PathBuf::from("files.zip")),
215 FileCategory::Archive
216 );
217 assert_eq!(
218 detect_by_extension(&PathBuf::from("backup.tar")),
219 FileCategory::Archive
220 );
221 assert_eq!(
222 detect_by_extension(&PathBuf::from("archive.gz")),
223 FileCategory::Archive
224 );
225 }
226
227 #[test]
228 fn test_detect_by_extension_source_code() {
229 assert_eq!(
230 detect_by_extension(&PathBuf::from("script.py")),
231 FileCategory::SourceCode
232 );
233 assert_eq!(
234 detect_by_extension(&PathBuf::from("app.js")),
235 FileCategory::SourceCode
236 );
237 assert_eq!(
238 detect_by_extension(&PathBuf::from("main.rs")),
239 FileCategory::SourceCode
240 );
241 }
242
243 #[test]
244 fn test_detect_by_extension_unknown() {
245 assert_eq!(
246 detect_by_extension(&PathBuf::from("unknown.xyz")),
247 FileCategory::Unknown
248 );
249 assert_eq!(
250 detect_by_extension(&PathBuf::from("noextension")),
251 FileCategory::Unknown
252 );
253 }
254
255 #[test]
256 fn test_detect_by_extension_case_insensitive() {
257 assert_eq!(
258 detect_by_extension(&PathBuf::from("IMAGE.JPG")),
259 FileCategory::Image
260 );
261 assert_eq!(
262 detect_by_extension(&PathBuf::from("Document.PDF")),
263 FileCategory::Document
264 );
265 }
266
267 #[test]
268 fn test_detect_file_type_with_temp_file() {
269 let temp_dir = TempDir::new().unwrap();
270
271 let png_path = temp_dir.path().join("test.png");
273 let png_magic = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
274 fs::write(&png_path, png_magic).unwrap();
275
276 let result = detect_file_type(&png_path).unwrap();
277 assert_eq!(result, FileCategory::Image);
278 }
279
280 #[test]
281 fn test_detect_file_type_jpeg() {
282 let temp_dir = TempDir::new().unwrap();
283
284 let jpeg_path = temp_dir.path().join("test.jpg");
286 let jpeg_magic = vec![0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46];
287 fs::write(&jpeg_path, jpeg_magic).unwrap();
288
289 let result = detect_file_type(&jpeg_path).unwrap();
290 assert_eq!(result, FileCategory::Image);
291 }
292
293 #[test]
294 fn test_detect_file_type_pdf() {
295 let temp_dir = TempDir::new().unwrap();
296
297 let pdf_path = temp_dir.path().join("test.pdf");
299 let pdf_magic = b"%PDF-1.4\n".to_vec();
300 fs::write(&pdf_path, pdf_magic).unwrap();
301
302 let result = detect_file_type(&pdf_path).unwrap();
303 assert_eq!(result, FileCategory::Document);
304 }
305
306 #[test]
307 fn test_detect_file_type_falls_back_to_extension() {
308 let temp_dir = TempDir::new().unwrap();
309
310 let txt_path = temp_dir.path().join("test.txt");
312 fs::write(&txt_path, "Hello, world!").unwrap();
313
314 let result = detect_file_type(&txt_path).unwrap();
315 assert_eq!(result, FileCategory::Document);
316 }
317}