tika_magic/
lib.rs

1//! # Example
2//! ```rust
3//! // Load a GIF file
4//! let input: &[u8] = include_bytes!("../tests/inputs/image/gif/gif.gif");
5//!
6//! // Check if the MIME and the file are a match
7//! let result = tika_magic::match_u8("image/gif", input);
8//! assert_eq!(result, true);
9//! ```
10
11mod magic;
12
13use crate::magic::{MIME_MAP, MIME_TYPES, PRIORITY_MIME_TYPES};
14use std::fs::File;
15use std::io::Read;
16use std::path::Path;
17
18pub type Mime = &'static str;
19
20/// Checks if the given bytestream matches the given MIME type.
21///
22/// Returns true or false if it matches or not. If the given MIME type is not known,
23/// the function will always return false.
24/// If mimetype is an alias of a known MIME, the file will be checked against that MIME.
25///
26/// # Examples
27/// ```rust
28/// // Load a GIF file
29/// let input: &[u8] = include_bytes!("../tests/inputs/image/gif/gif.gif");
30///
31/// // Check if the MIME and the file are a match
32/// let result = tika_magic::match_u8("image/gif", input);
33/// assert_eq!(result, true);
34/// ```
35pub fn match_u8(mimetype: &str, bytes: &[u8]) -> bool {
36    if handle_special_files(bytes).is_some() {
37        return true;
38    }
39
40    let Some(mm) = MIME_MAP.get(mimetype) else {
41        return false;
42    };
43
44    for m in mm.iter() {
45        if m.check(bytes) {
46            return true;
47        }
48    }
49
50    false
51}
52
53fn check_recursive(checker: &'static dyn magic::MimeTypeChecker, bytes: &[u8]) -> Option<Mime> {
54    let matches = checker.check(bytes);
55    if matches || checker.is_virtual() {
56        let children = checker.get_children();
57        for child in children {
58            if let Some(mime) = check_recursive(*child, bytes) {
59                return Some(mime);
60            }
61        }
62
63        if matches {
64            return Some(checker.get_mime());
65        }
66    }
67
68    None
69}
70
71#[cfg(feature = "open_zips")]
72fn maybe_open_zip(bytes: &[u8]) -> Option<Mime> {
73    crate::magic::ZipSpecialHandler.check(bytes)
74}
75#[cfg(not(feature = "open_zips"))]
76fn maybe_open_zip(_bytes: &[u8]) -> Option<Mime> {
77    None
78}
79
80#[cfg(feature = "open_ole")]
81fn maybe_open_ole(bytes: &[u8]) -> Option<Mime> {
82    crate::magic::OleSpecialHandler.check(bytes)
83}
84#[cfg(not(feature = "open_ole"))]
85fn maybe_open_ole(_bytes: &[u8]) -> Option<Mime> {
86    None
87}
88
89fn handle_special_files(bytes: &[u8]) -> Option<Mime> {
90    if let Some(mime) = maybe_open_zip(bytes) {
91        return Some(mime);
92    }
93
94    if let Some(mime) = maybe_open_ole(bytes) {
95        return Some(mime);
96    }
97
98    None
99}
100
101/// Gets the MIME from a byte stream.
102///
103/// Returns MIME as string.
104///
105/// # Examples
106/// ```rust
107/// // Load a GIF file
108/// let input: &[u8] = include_bytes!("../tests/inputs/image/gif/gif.gif");
109///
110/// // Find the MIME type of the GIF
111/// let result = tika_magic::from_u8(input);
112/// assert_eq!(result, "image/gif");
113/// ```
114pub fn from_u8(bytes: &[u8]) -> Mime {
115    if let Some(mime) = handle_special_files(bytes) {
116        return mime;
117    }
118
119    for m in PRIORITY_MIME_TYPES {
120        if let Some(mime) = check_recursive(*m, bytes) {
121            return mime;
122        }
123    }
124
125    for m in MIME_TYPES {
126        if let Some(mime) = check_recursive(*m, bytes) {
127            return mime;
128        }
129    }
130
131    "application/octet-stream"
132}
133
134/// Gets the MIME types that match a byte stream.
135///
136/// Returns a vector of MIMEs.
137///
138/// # Examples
139/// ```rust
140/// // Load a MP4 file
141/// let input: &[u8] = include_bytes!("../tests/inputs/video/mp4/mp4.mp4");
142///
143/// // Find the MIME type of the MP4
144/// let result = tika_magic::from_u8_exhaustive(input);
145/// assert_eq!(result, vec!["video/mp4", "video/quicktime"]);
146/// ```
147pub fn from_u8_exhaustive(bytes: &[u8]) -> Vec<Mime> {
148    MIME_TYPES
149        .iter()
150        .filter_map(|m| match m.check(bytes) {
151            true => Some(m.get_mime()),
152            false => None,
153        })
154        .collect()
155}
156
157/// Check if the given file matches the given MIME type.
158///
159/// # Examples
160/// ```rust
161/// use std::fs::File;
162///
163/// // Get path to a GIF file
164/// let file = File::open("./tests/inputs/image/gif/gif.gif").unwrap();
165///
166/// // Check if the MIME and the file are a match
167/// let result = tika_magic::match_file("image/gif", &file);
168/// assert_eq!(result, true);
169/// ```
170pub fn match_file(mimetype: &str, file: &File) -> bool {
171    let mut buf = [0u8; 0x20000];
172    match file.take(buf.len() as u64).read(&mut buf) {
173        Ok(0) => return false,
174        Err(_) => return false,
175        _ => (),
176    }
177
178    match_u8(mimetype, &buf)
179}
180
181/// Check if the file at the given path matches the given MIME type.
182///
183/// Returns false if the file could not be read or the given MIME type is not known.
184///
185/// # Examples
186/// ```rust
187/// use std::path::Path;
188///
189/// // Get path to a GIF file
190/// let path: &Path = Path::new("./tests/inputs/image/gif/gif.gif");
191///
192/// // Check if the MIME and the file are a match
193/// let result = tika_magic::match_filepath("image/gif", path);
194/// assert_eq!(result, true);
195/// ```
196pub fn match_filepath(mimetype: &str, path: &Path) -> bool {
197    match File::open(path) {
198        Ok(file) => match_file(mimetype, &file),
199        Err(_) => false,
200    }
201}
202
203/// Gets the MIME type for a file.
204///
205/// Does not look at file name or extension, just the contents.
206///
207/// # Examples
208/// ```rust
209/// use std::fs::File;
210///
211/// // Get path to a GIF file
212/// let file = File::open("./tests/inputs/image/gif/gif.gif").unwrap();
213///
214/// // Find the MIME type of the GIF
215/// let result = tika_magic::from_file(&file);
216/// assert_eq!(result, Some("image/gif"));
217/// ```
218pub fn from_file(file: &File) -> Option<Mime> {
219    let mut buf = [0u8; 0x20000];
220
221    match file.take(buf.len() as u64).read(&mut buf) {
222        Ok(0) => return None,
223        Err(_) => return None,
224        _ => (),
225    }
226
227    Some(from_u8(&buf))
228}
229
230/// Gets all the MIME types that match a file.
231///
232/// # Examples
233/// ```rust
234/// use std::fs::File;
235///
236/// // Get path to a MP4 file
237/// let file = File::open("./tests/inputs/video/mp4/mp4.mp4").unwrap();
238///
239/// // Find the MIME type of the MP4
240/// let result = tika_magic::from_file_exhaustive(&file);
241/// assert_eq!(result, Some(vec!["video/mp4", "video/quicktime"]));
242/// ```
243pub fn from_file_exhaustive(file: &File) -> Option<Vec<Mime>> {
244    let mut buf = [0u8; 0x20000];
245
246    match file.take(buf.len() as u64).read(&mut buf) {
247        Ok(0) => return None,
248        Err(_) => return None,
249        _ => (),
250    }
251
252    Some(from_u8_exhaustive(&buf))
253}
254
255/// Gets the MIME type for a path
256///
257/// Returns None if the file cannot be opened
258/// or if no matching MIME type is found.
259///
260/// # Examples
261/// ```rust
262/// use std::path::Path;
263///
264/// // Get path to a GIF file
265/// let path = Path::new("./tests/inputs/image/gif/gif.gif");
266///
267/// // Find the MIME type of the GIF
268/// let result = tika_magic::from_filepath(path);
269/// assert_eq!(result, Some("image/gif"));
270/// ```
271pub fn from_filepath(path: &Path) -> Option<Mime> {
272    match File::open(path) {
273        Ok(file) => from_file(&file),
274        Err(_) => None,
275    }
276}
277
278/// Gets all the MIME types that match for a path.
279///
280/// # Examples
281/// ```rust
282/// use std::path::Path;
283///
284/// // Get path to a MP4 file
285/// let path = Path::new("./tests/inputs/video/mp4/mp4.mp4");
286///
287/// // Find the MIME types of the MP4
288/// let result = tika_magic::from_filepath_exhaustive(path);
289/// assert_eq!(result, Some(vec!["video/mp4", "video/quicktime"]));
290/// ```
291pub fn from_filepath_exhaustive(path: &Path) -> Option<Vec<Mime>> {
292    match File::open(path) {
293        Ok(file) => from_file_exhaustive(&file),
294        Err(_) => None,
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301    use rstest::rstest;
302
303    #[rstest]
304    fn test_ooxml_file() {
305        let path = Path::new(
306            "./tests/inputs/application/vnd.openxmlformats-officedocument.presentationml.presentation/vnd.openxmlformats-officedocument.presentationml.presentation.pptx",
307        );
308        assert!(path.exists());
309
310        assert_eq!(
311            from_filepath(path).unwrap(),
312            "application/vnd.openxmlformats-officedocument.presentationml.presentation"
313        );
314        assert!(match_filepath(
315            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
316            path
317        ));
318    }
319
320    #[rstest]
321    fn test_ods_file() {
322        let path = Path::new(
323            "./tests/inputs/application/vnd.oasis.opendocument.spreadsheet/vnd.oasis.opendocument.spreadsheet.ods",
324        );
325        assert!(path.exists());
326        assert!(match_filepath(
327            "application/vnd.oasis.opendocument.spreadsheet",
328            path
329        ));
330        assert_eq!(
331            from_filepath(path).unwrap(),
332            "application/vnd.oasis.opendocument.spreadsheet"
333        );
334    }
335
336    #[rstest]
337    fn test_uue_file() {
338        let data = include_bytes!("../tests/inputs/text/x-uuencode/sample-data-csv.uue");
339        assert_eq!(from_u8(data), "text/x-uuencode");
340        assert!(match_u8("text/x-uuencode", data));
341    }
342}