tika_magic/
lib.rs

1//! # Example
2//! ```rust
3//! // Load a GIF file
4//! let input: &[u8] = include_bytes!("../tests/inputs/image/gif/gif.gif");
5//!
6//! // Check if the MIME and the file are a match
7//! let result = tika_magic::match_u8("image/gif", input);
8//! assert_eq!(result, true);
9//! ```
10
11mod magic;
12
13use crate::magic::{MIME_MAP, MIME_TYPES};
14use std::fs::File;
15use std::io::Read;
16use std::path::Path;
17
18pub type Mime = &'static str;
19
20/// Checks if the given bytestream matches the given MIME type.
21///
22/// Returns true or false if it matches or not. If the given MIME type is not known,
23/// the function will always return false.
24/// If mimetype is an alias of a known MIME, the file will be checked against that MIME.
25///
26/// # Examples
27/// ```rust
28/// // Load a GIF file
29/// let input: &[u8] = include_bytes!("../tests/inputs/image/gif/gif.gif");
30///
31/// // Check if the MIME and the file are a match
32/// let result = tika_magic::match_u8("image/gif", input);
33/// assert_eq!(result, true);
34/// ```
35pub fn match_u8(mimetype: &str, bytes: &[u8]) -> bool {
36    let Some(mm) = MIME_MAP.get(mimetype) else {
37        return false;
38    };
39
40    for m in mm.iter() {
41        if m.check(bytes) {
42            return true;
43        }
44    }
45
46    false
47}
48
49fn check_recursive(checker: &'static dyn magic::MimeTypeChecker, bytes: &[u8]) -> Option<Mime> {
50    if checker.check(bytes) {
51        let children = checker.get_children();
52        for child in children {
53            if let Some(mime) = check_recursive(*child, bytes) {
54                return Some(mime);
55            }
56        }
57
58        return Some(checker.get_mime());
59    }
60
61    None
62}
63
64/// Gets the MIME from a byte stream.
65///
66/// Returns MIME as string.
67///
68/// # Examples
69/// ```rust
70/// // Load a GIF file
71/// let input: &[u8] = include_bytes!("../tests/inputs/image/gif/gif.gif");
72///
73/// // Find the MIME type of the GIF
74/// let result = tika_magic::from_u8(input);
75/// assert_eq!(result, "image/gif");
76/// ```
77pub fn from_u8(bytes: &[u8]) -> Mime {
78    for m in MIME_TYPES {
79        if let Some(mime) = check_recursive(*m, bytes) {
80            return mime;
81        }
82    }
83
84    "application/octet-stream"
85}
86
87/// Gets the MIME types that match a byte stream.
88///
89/// Returns a vector of MIMEs.
90///
91/// # Examples
92/// ```rust
93/// // Load a MP4 file
94/// let input: &[u8] = include_bytes!("../tests/inputs/video/mp4/mp4.mp4");
95///
96/// // Find the MIME type of the MP4
97/// let result = tika_magic::from_u8_exhaustive(input);
98/// assert_eq!(result, vec!["video/mp4", "video/quicktime"]);
99/// ```
100pub fn from_u8_exhaustive(bytes: &[u8]) -> Vec<Mime> {
101    MIME_TYPES
102        .iter()
103        .filter_map(|m| match m.check(bytes) {
104            true => Some(m.get_mime()),
105            false => None,
106        })
107        .collect()
108}
109
110/// Check if the given file matches the given MIME type.
111///
112/// # Examples
113/// ```rust
114/// use std::fs::File;
115///
116/// // Get path to a GIF file
117/// let file = File::open("./tests/inputs/image/gif/gif.gif").unwrap();
118///
119/// // Check if the MIME and the file are a match
120/// let result = tika_magic::match_file("image/gif", &file);
121/// assert_eq!(result, true);
122/// ```
123pub fn match_file(mimetype: &str, file: &File) -> bool {
124    let mut buf = [0u8; 0x20000];
125    match file.take(buf.len() as u64).read(&mut buf) {
126        Ok(0) => return false,
127        Err(_) => return false,
128        _ => (),
129    }
130
131    match_u8(mimetype, &buf)
132}
133
134/// Check if the file at the given path matches the given MIME type.
135///
136/// Returns false if the file could not be read or the given MIME type is not known.
137///
138/// # Examples
139/// ```rust
140/// use std::path::Path;
141///
142/// // Get path to a GIF file
143/// let path: &Path = Path::new("./tests/inputs/image/gif/gif.gif");
144///
145/// // Check if the MIME and the file are a match
146/// let result = tika_magic::match_filepath("image/gif", path);
147/// assert_eq!(result, true);
148/// ```
149pub fn match_filepath(mimetype: &str, path: &Path) -> bool {
150    match File::open(path) {
151        Ok(mut file) => match_file(mimetype, &mut file),
152        Err(_) => false,
153    }
154}
155
156/// Gets the MIME type for a file.
157///
158/// Does not look at file name or extension, just the contents.
159///
160/// # Examples
161/// ```rust
162/// use std::fs::File;
163///
164/// // Get path to a GIF file
165/// let file = File::open("./tests/inputs/image/gif/gif.gif").unwrap();
166///
167/// // Find the MIME type of the GIF
168/// let result = tika_magic::from_file(&file);
169/// assert_eq!(result, Some("image/gif"));
170/// ```
171pub fn from_file(file: &File) -> Option<Mime> {
172    let mut buf = [0u8; 0x20000];
173
174    match file.take(buf.len() as u64).read(&mut buf) {
175        Ok(0) => return None,
176        Err(_) => return None,
177        _ => (),
178    }
179
180    Some(from_u8(&buf))
181}
182
183/// Gets all the MIME types that match a file.
184///
185/// # Examples
186/// ```rust
187/// use std::fs::File;
188///
189/// // Get path to a MP4 file
190/// let file = File::open("./tests/inputs/video/mp4/mp4.mp4").unwrap();
191///
192/// // Find the MIME type of the MP4
193/// let result = tika_magic::from_file_exhaustive(&file);
194/// assert_eq!(result, Some(vec!["video/mp4", "video/quicktime"]));
195/// ```
196pub fn from_file_exhaustive(file: &File) -> Option<Vec<Mime>> {
197    let mut buf = [0u8; 0x20000];
198
199    match file.take(buf.len() as u64).read(&mut buf) {
200        Ok(0) => return None,
201        Err(_) => return None,
202        _ => (),
203    }
204
205    Some(from_u8_exhaustive(&buf))
206}
207
208/// Gets the MIME type for a path
209///
210/// Returns None if the file cannot be opened
211/// or if no matching MIME type is found.
212///
213/// # Examples
214/// ```rust
215/// use std::path::Path;
216///
217/// // Get path to a GIF file
218/// let path = Path::new("./tests/inputs/image/gif/gif.gif");
219///
220/// // Find the MIME type of the GIF
221/// let result = tika_magic::from_filepath(path);
222/// assert_eq!(result, Some("image/gif"));
223/// ```
224pub fn from_filepath(path: &Path) -> Option<Mime> {
225    match File::open(path) {
226        Ok(file) => from_file(&file),
227        Err(_) => None,
228    }
229}
230
231/// Gets all the MIME types that match for a path.
232///
233/// # Examples
234/// ```rust
235/// use std::path::Path;
236///
237/// // Get path to a MP4 file
238/// let path = Path::new("./tests/inputs/video/mp4/mp4.mp4");
239///
240/// // Find the MIME types of the MP4
241/// let result = tika_magic::from_filepath_exhaustive(path);
242/// assert_eq!(result, Some(vec!["video/mp4", "video/quicktime"]));
243/// ```
244pub fn from_filepath_exhaustive(path: &Path) -> Option<Vec<Mime>> {
245    match File::open(path) {
246        Ok(file) => from_file_exhaustive(&file),
247        Err(_) => None,
248    }
249}
250
251#[cfg(test)]
252mod tests {
253    use super::*;
254    use rstest::rstest;
255
256    #[rstest]
257    fn test_ooxml_file() {
258        let path = Path::new(
259            "./tests/inputs/application/vnd.openxmlformats-officedocument.presentationml.presentation/vnd.openxmlformats-officedocument.presentationml.presentation.pptx",
260        );
261        assert!(path.exists());
262
263        assert_eq!(
264            from_filepath(path).unwrap(),
265            "application/vnd.openxmlformats-officedocument.presentationml.presentation"
266        );
267        assert!(match_filepath(
268            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
269            path
270        ));
271    }
272
273    #[rstest]
274    fn test_ods_file() {
275        let path = Path::new(
276            "./tests/inputs/application/vnd.oasis.opendocument.spreadsheet/vnd.oasis.opendocument.spreadsheet.ods",
277        );
278        assert!(path.exists());
279        assert!(match_filepath(
280            "application/vnd.oasis.opendocument.spreadsheet",
281            path
282        ));
283        assert_eq!(
284            from_filepath(path).unwrap(),
285            "application/vnd.oasis.opendocument.spreadsheet"
286        );
287    }
288
289    #[rstest]
290    fn test_tika_ooxml_rules() {
291        let path = Path::new(
292            "./tests/inputs/application/vnd.openxmlformats-officedocument.wordprocessingml.document/converted_from_google_doc.docx",
293        );
294        assert!(path.exists());
295        assert!(match_filepath("application/x-tika-ooxml", path));
296        assert_eq!(
297            from_filepath(path).unwrap(),
298            "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
299        );
300    }
301}