tika_magic/lib.rs
1//! # Example
2//! ```rust
3//! // Load a GIF file
4//! let input: &[u8] = include_bytes!("../tests/inputs/image/gif/gif.gif");
5//!
6//! // Check if the MIME and the file are a match
7//! let result = tika_magic::match_u8("image/gif", input);
8//! assert_eq!(result, true);
9//! ```
10
11mod magic;
12
13use crate::magic::{MIME_MAP, MIME_TYPES};
14use std::fs::File;
15use std::io::Read;
16use std::path::Path;
17
18pub type Mime = &'static str;
19
20/// Checks if the given bytestream matches the given MIME type.
21///
22/// Returns true or false if it matches or not. If the given MIME type is not known,
23/// the function will always return false.
24/// If mimetype is an alias of a known MIME, the file will be checked against that MIME.
25///
26/// # Examples
27/// ```rust
28/// // Load a GIF file
29/// let input: &[u8] = include_bytes!("../tests/inputs/image/gif/gif.gif");
30///
31/// // Check if the MIME and the file are a match
32/// let result = tika_magic::match_u8("image/gif", input);
33/// assert_eq!(result, true);
34/// ```
35pub fn match_u8(mimetype: &str, bytes: &[u8]) -> bool {
36 let Some(mm) = MIME_MAP.get(mimetype) else {
37 return false;
38 };
39
40 for m in mm.iter() {
41 if m.check(bytes) {
42 return true;
43 }
44 }
45
46 false
47}
48
49fn check_recursive(checker: &'static dyn magic::MimeTypeChecker, bytes: &[u8]) -> Option<Mime> {
50 if checker.check(bytes) {
51 let children = checker.get_children();
52 for child in children {
53 if let Some(mime) = check_recursive(*child, bytes) {
54 return Some(mime);
55 }
56 }
57
58 return Some(checker.get_mime());
59 }
60
61 None
62}
63
64/// Gets the MIME from a byte stream.
65///
66/// Returns MIME as string.
67///
68/// # Examples
69/// ```rust
70/// // Load a GIF file
71/// let input: &[u8] = include_bytes!("../tests/inputs/image/gif/gif.gif");
72///
73/// // Find the MIME type of the GIF
74/// let result = tika_magic::from_u8(input);
75/// assert_eq!(result, "image/gif");
76/// ```
77pub fn from_u8(bytes: &[u8]) -> Mime {
78 for m in MIME_TYPES {
79 if let Some(mime) = check_recursive(*m, bytes) {
80 return mime;
81 }
82 }
83
84 "application/octet-stream"
85}
86
87/// Gets the MIME types that match a byte stream.
88///
89/// Returns a vector of MIMEs.
90///
91/// # Examples
92/// ```rust
93/// // Load a MP4 file
94/// let input: &[u8] = include_bytes!("../tests/inputs/video/mp4/mp4.mp4");
95///
96/// // Find the MIME type of the MP4
97/// let result = tika_magic::from_u8_exhaustive(input);
98/// assert_eq!(result, vec!["video/mp4", "video/quicktime"]);
99/// ```
100pub fn from_u8_exhaustive(bytes: &[u8]) -> Vec<Mime> {
101 MIME_TYPES
102 .iter()
103 .filter_map(|m| match m.check(bytes) {
104 true => Some(m.get_mime()),
105 false => None,
106 })
107 .collect()
108}
109
110/// Check if the given file matches the given MIME type.
111///
112/// # Examples
113/// ```rust
114/// use std::fs::File;
115///
116/// // Get path to a GIF file
117/// let file = File::open("./tests/inputs/image/gif/gif.gif").unwrap();
118///
119/// // Check if the MIME and the file are a match
120/// let result = tika_magic::match_file("image/gif", &file);
121/// assert_eq!(result, true);
122/// ```
123pub fn match_file(mimetype: &str, file: &File) -> bool {
124 let mut buf = [0u8; 0x20000];
125 match file.take(buf.len() as u64).read(&mut buf) {
126 Ok(0) => return false,
127 Err(_) => return false,
128 _ => (),
129 }
130
131 match_u8(mimetype, &buf)
132}
133
134/// Check if the file at the given path matches the given MIME type.
135///
136/// Returns false if the file could not be read or the given MIME type is not known.
137///
138/// # Examples
139/// ```rust
140/// use std::path::Path;
141///
142/// // Get path to a GIF file
143/// let path: &Path = Path::new("./tests/inputs/image/gif/gif.gif");
144///
145/// // Check if the MIME and the file are a match
146/// let result = tika_magic::match_filepath("image/gif", path);
147/// assert_eq!(result, true);
148/// ```
149pub fn match_filepath(mimetype: &str, path: &Path) -> bool {
150 match File::open(path) {
151 Ok(mut file) => match_file(mimetype, &mut file),
152 Err(_) => false,
153 }
154}
155
156/// Gets the MIME type for a file.
157///
158/// Does not look at file name or extension, just the contents.
159///
160/// # Examples
161/// ```rust
162/// use std::fs::File;
163///
164/// // Get path to a GIF file
165/// let file = File::open("./tests/inputs/image/gif/gif.gif").unwrap();
166///
167/// // Find the MIME type of the GIF
168/// let result = tika_magic::from_file(&file);
169/// assert_eq!(result, Some("image/gif"));
170/// ```
171pub fn from_file(file: &File) -> Option<Mime> {
172 let mut buf = [0u8; 0x20000];
173
174 match file.take(buf.len() as u64).read(&mut buf) {
175 Ok(0) => return None,
176 Err(_) => return None,
177 _ => (),
178 }
179
180 Some(from_u8(&buf))
181}
182
183/// Gets all the MIME types that match a file.
184///
185/// # Examples
186/// ```rust
187/// use std::fs::File;
188///
189/// // Get path to a MP4 file
190/// let file = File::open("./tests/inputs/video/mp4/mp4.mp4").unwrap();
191///
192/// // Find the MIME type of the MP4
193/// let result = tika_magic::from_file_exhaustive(&file);
194/// assert_eq!(result, Some(vec!["video/mp4", "video/quicktime"]));
195/// ```
196pub fn from_file_exhaustive(file: &File) -> Option<Vec<Mime>> {
197 let mut buf = [0u8; 0x20000];
198
199 match file.take(buf.len() as u64).read(&mut buf) {
200 Ok(0) => return None,
201 Err(_) => return None,
202 _ => (),
203 }
204
205 Some(from_u8_exhaustive(&buf))
206}
207
208/// Gets the MIME type for a path
209///
210/// Returns None if the file cannot be opened
211/// or if no matching MIME type is found.
212///
213/// # Examples
214/// ```rust
215/// use std::path::Path;
216///
217/// // Get path to a GIF file
218/// let path = Path::new("./tests/inputs/image/gif/gif.gif");
219///
220/// // Find the MIME type of the GIF
221/// let result = tika_magic::from_filepath(path);
222/// assert_eq!(result, Some("image/gif"));
223/// ```
224pub fn from_filepath(path: &Path) -> Option<Mime> {
225 match File::open(path) {
226 Ok(file) => from_file(&file),
227 Err(_) => None,
228 }
229}
230
231/// Gets all the MIME types that match for a path.
232///
233/// # Examples
234/// ```rust
235/// use std::path::Path;
236///
237/// // Get path to a MP4 file
238/// let path = Path::new("./tests/inputs/video/mp4/mp4.mp4");
239///
240/// // Find the MIME types of the MP4
241/// let result = tika_magic::from_filepath_exhaustive(path);
242/// assert_eq!(result, Some(vec!["video/mp4", "video/quicktime"]));
243/// ```
244pub fn from_filepath_exhaustive(path: &Path) -> Option<Vec<Mime>> {
245 match File::open(path) {
246 Ok(file) => from_file_exhaustive(&file),
247 Err(_) => None,
248 }
249}
250
251#[cfg(test)]
252mod tests {
253 use super::*;
254 use rstest::rstest;
255
256 #[rstest]
257 fn test_ooxml_file() {
258 let path = Path::new(
259 "./tests/inputs/application/vnd.openxmlformats-officedocument.presentationml.presentation/vnd.openxmlformats-officedocument.presentationml.presentation.pptx",
260 );
261 assert!(path.exists());
262
263 assert_eq!(
264 from_filepath(path).unwrap(),
265 "application/vnd.openxmlformats-officedocument.presentationml.presentation"
266 );
267 assert!(match_filepath(
268 "application/vnd.openxmlformats-officedocument.presentationml.presentation",
269 path
270 ));
271 }
272
273 #[rstest]
274 fn test_ods_file() {
275 let path = Path::new(
276 "./tests/inputs/application/vnd.oasis.opendocument.spreadsheet/vnd.oasis.opendocument.spreadsheet.ods",
277 );
278 assert!(path.exists());
279 assert!(match_filepath(
280 "application/vnd.oasis.opendocument.spreadsheet",
281 path
282 ));
283 assert_eq!(
284 from_filepath(path).unwrap(),
285 "application/vnd.oasis.opendocument.spreadsheet"
286 );
287 }
288
289 #[rstest]
290 fn test_tika_ooxml_rules() {
291 let path = Path::new(
292 "./tests/inputs/application/vnd.openxmlformats-officedocument.wordprocessingml.document/converted_from_google_doc.docx",
293 );
294 assert!(path.exists());
295 assert!(match_filepath("application/x-tika-ooxml", path));
296 assert_eq!(
297 from_filepath(path).unwrap(),
298 "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
299 );
300 }
301}