nom_exif/
file.rs

1use nom::{bytes::complete, multi::many0, FindSubstring};
2use std::{
3    fmt::Display,
4    io::{Cursor, Read},
5};
6
7use crate::{
8    bbox::{travel_header, BoxHolder},
9    ebml::element::parse_ebml_doc_type,
10    error::{ParsedError, ParsingError},
11    exif::TiffHeader,
12    jpeg::check_jpeg,
13    loader::Load,
14    raf::RafInfo,
15    slice::SubsliceRange,
16};
17
18const HEIF_HEIC_BRAND_NAMES: &[&[u8]] = &[
19    b"heic", // the usual HEIF images
20    b"heix", // 10bit images, or anything that uses h265 with range extension
21    b"hevc", // 'hevx': brands for image sequences
22    b"heim", // multiview
23    b"heis", // scalable
24    b"hevm", // multiview sequence
25    b"hevs", // scalable sequence
26    b"mif1", b"MiHE", b"miaf", b"MiHB", // HEIC file's compatible brands
27];
28
29const HEIC_BRAND_NAMES: &[&[u8]] = &[b"heic", b"heix", b"heim", b"heis"];
30
31// TODO: Refer to the information on the website https://www.ftyps.com to add
32// other less common MP4 brands.
33const MP4_BRAND_NAMES: &[&str] = &[
34    "3g2a", "3g2b", "3g2c", "3ge6", "3ge7", "3gg6", "3gp4", "3gp5", "3gp6", "3gs7", "avc1", "mp41",
35    "mp42", "iso2", "isom", "vfj1",
36];
37
38const QT_BRAND_NAMES: &[&str] = &["qt  ", "mqt "];
39
40#[derive(Debug, Clone, PartialEq, Eq, Copy)]
41pub(crate) enum Mime {
42    Image(MimeImage),
43    Video(MimeVideo),
44}
45
46impl Mime {
47    pub fn unwrap_image(self) -> MimeImage {
48        match self {
49            Mime::Image(val) => val,
50            Mime::Video(_) => panic!("called `Mime::unwrap_image()` on an `Mime::Video`"),
51        }
52    }
53    pub fn unwrap_video(self) -> MimeVideo {
54        match self {
55            Mime::Image(_) => panic!("called `Mime::unwrap_video()` on an `Mime::Image`"),
56            Mime::Video(val) => val,
57        }
58    }
59}
60
61#[derive(Debug, Clone, PartialEq, Eq, Copy)]
62pub(crate) enum MimeImage {
63    Jpeg,
64    Heic,
65    Heif,
66    Tiff,
67    Raf, // Fujifilm RAW, image/x-fuji-raf
68}
69
70#[derive(Debug, Clone, PartialEq, Eq, Copy)]
71pub(crate) enum MimeVideo {
72    QuickTime,
73    Mp4,
74    Webm,
75    Matroska,
76    _3gpp,
77}
78
79impl TryFrom<&[u8]> for Mime {
80    type Error = crate::Error;
81    fn try_from(input: &[u8]) -> Result<Self, Self::Error> {
82        let mime = if let Ok(x) = parse_bmff_mime(input) {
83            x
84        } else if let Ok(x) = get_ebml_doc_type(input) {
85            if x == "webm" {
86                Mime::Video(MimeVideo::Webm)
87            } else {
88                Mime::Video(MimeVideo::Matroska)
89            }
90        } else if TiffHeader::parse(input).is_ok() {
91            Mime::Image(MimeImage::Tiff)
92        } else if check_jpeg(input).is_ok() {
93            Mime::Image(MimeImage::Jpeg)
94        } else if RafInfo::check(input).is_ok() {
95            Mime::Image(MimeImage::Raf)
96        } else {
97            return Err(crate::Error::UnrecognizedFileFormat);
98        };
99
100        Ok(mime)
101    }
102}
103
104/// *Deprecated*: Please use [`MediaSource`] instead.
105#[deprecated(since = "2.0.0")]
106#[allow(unused)]
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108pub enum FileFormat {
109    Jpeg,
110    /// heic, heif
111    Heif,
112
113    // Currently, there is not much difference between QuickTime and MP4 when
114    // parsing metadata, and they share the same parsing mechanism.
115    //
116    // The only difference is that if detected as an MP4 file, the
117    // `moov/udta/©xyz` atom is additionally checked and an attempt is made to
118    // read GPS information from it, since Android phones store GPS information
119    // in that atom.
120    /// mov
121    QuickTime,
122    MP4,
123
124    /// webm, mkv, mka, mk3d
125    Ebml,
126}
127
128// Parse the input buffer and detect its file type
129#[allow(deprecated)]
130impl TryFrom<&[u8]> for FileFormat {
131    type Error = crate::Error;
132
133    fn try_from(input: &[u8]) -> Result<Self, Self::Error> {
134        if let Ok(ff) = check_bmff(input) {
135            Ok(ff)
136        } else if get_ebml_doc_type(input).is_ok() {
137            Ok(Self::Ebml)
138        } else if check_jpeg(input).is_ok() {
139            Ok(Self::Jpeg)
140        } else {
141            Err(crate::Error::UnrecognizedFileFormat)
142        }
143    }
144}
145
146#[allow(deprecated)]
147impl FileFormat {
148    pub fn try_from_read<T: Read>(reader: T) -> crate::Result<Self> {
149        const BUF_SIZE: usize = 4096;
150        let mut buf = Vec::with_capacity(BUF_SIZE);
151        let n = reader.take(BUF_SIZE as u64).read_to_end(buf.as_mut())?;
152        if n == 0 {
153            Err("file is empty")?;
154        }
155
156        buf.as_slice().try_into()
157    }
158
159    pub(crate) fn try_from_load<T: Load>(loader: &mut T) -> Result<Self, ParsedError> {
160        loader.load_and_parse(|x| {
161            x.try_into()
162                .map_err(|_| ParsingError::Failed("unrecognized file format".to_string()))
163        })
164    }
165}
166
167#[allow(deprecated)]
168impl Display for FileFormat {
169    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
170        match self {
171            Self::Jpeg => "JPEG".fmt(f),
172            Self::Heif => "HEIF/HEIC".fmt(f),
173            Self::QuickTime => "QuickTime".fmt(f),
174            Self::MP4 => "MP4".fmt(f),
175            Self::Ebml => "EBML".fmt(f),
176        }
177    }
178}
179
180fn get_ebml_doc_type(input: &[u8]) -> crate::Result<String> {
181    let mut cursor = Cursor::new(input);
182    let doc = parse_ebml_doc_type(&mut cursor)?;
183    Ok(doc)
184}
185
186#[tracing::instrument(skip_all)]
187fn parse_bmff_mime(input: &[u8]) -> crate::Result<Mime> {
188    let (ftyp, Some(major_brand)) =
189        get_ftyp_and_major_brand(input).map_err(|_| crate::Error::UnrecognizedFileFormat)?
190    else {
191        if travel_header(input, |header, _| header.box_type != "mdat").is_ok() {
192            // ftyp is None, mdat box is found, assume it's a MOV file extracted from HEIC
193            return Ok(Mime::Video(MimeVideo::QuickTime));
194        }
195
196        return Err(crate::Error::UnrecognizedFileFormat);
197    };
198
199    tracing::debug!(?ftyp);
200
201    // Check if it is a QuickTime file
202    if QT_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
203        return Ok(Mime::Video(MimeVideo::QuickTime));
204    }
205
206    // Check if it is a HEIF file
207    if HEIF_HEIC_BRAND_NAMES.contains(&major_brand) {
208        if HEIC_BRAND_NAMES.contains(&major_brand) {
209            return Ok(Mime::Image(MimeImage::Heic));
210        }
211        return Ok(Mime::Image(MimeImage::Heif));
212    }
213
214    // Check if it is a MP4 file
215    if MP4_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
216        if major_brand.starts_with(b"3gp") {
217            return Ok(Mime::Video(MimeVideo::_3gpp));
218        }
219        return Ok(Mime::Video(MimeVideo::Mp4));
220    }
221
222    // Check compatible brands
223    let compatible_brands = ftyp.body_data();
224
225    if QT_BRAND_NAMES
226        .iter()
227        .any(|v| compatible_brands.find_substring(v.as_bytes()).is_some())
228    {
229        return Ok(Mime::Video(MimeVideo::QuickTime));
230    }
231
232    if HEIF_HEIC_BRAND_NAMES
233        .iter()
234        .any(|x| compatible_brands.find_substring(*x).is_some())
235    {
236        if HEIC_BRAND_NAMES.contains(&major_brand) {
237            return Ok(Mime::Image(MimeImage::Heic));
238        }
239        return Ok(Mime::Image(MimeImage::Heif));
240    }
241
242    if MP4_BRAND_NAMES
243        .iter()
244        .any(|v| compatible_brands.subslice_in_range(v.as_bytes()).is_some())
245    {
246        if major_brand.starts_with(b"3gp") {
247            return Ok(Mime::Video(MimeVideo::_3gpp));
248        }
249        return Ok(Mime::Video(MimeVideo::Mp4));
250    }
251
252    tracing::warn!(
253        marjor_brand = major_brand.iter().map(|b| *b as char).collect::<String>(),
254        "unknown major brand",
255    );
256
257    if travel_header(input, |header, _| header.box_type != "mdat").is_ok() {
258        // mdat box found, assume it's a mp4 file
259        return Ok(Mime::Video(MimeVideo::Mp4));
260    }
261
262    Err(crate::Error::UnrecognizedFileFormat)
263}
264
265#[allow(deprecated)]
266fn check_bmff(input: &[u8]) -> crate::Result<FileFormat> {
267    let (ftyp, Some(major_brand)) = get_ftyp_and_major_brand(input)? else {
268        if travel_header(input, |header, _| header.box_type != "mdat").is_ok() {
269            // ftyp is None, mdat box is found, assume it's a MOV file extracted from HEIC
270            return Ok(FileFormat::QuickTime);
271        }
272
273        return Err(crate::Error::UnrecognizedFileFormat);
274    };
275
276    // Check if it is a QuickTime file
277    if QT_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
278        return Ok(FileFormat::QuickTime);
279    }
280
281    // Check if it is a HEIF file
282    if HEIF_HEIC_BRAND_NAMES.contains(&major_brand) {
283        return Ok(FileFormat::Heif);
284    }
285
286    // Check if it is a MP4 file
287    if MP4_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
288        return Ok(FileFormat::MP4);
289    }
290
291    // Check compatible brands
292    let compatible_brands = get_compatible_brands(ftyp.body_data())?;
293
294    if QT_BRAND_NAMES
295        .iter()
296        .any(|v| compatible_brands.iter().any(|x| v.as_bytes() == *x))
297    {
298        return Ok(FileFormat::QuickTime);
299    }
300
301    if HEIF_HEIC_BRAND_NAMES
302        .iter()
303        .any(|x| compatible_brands.contains(x))
304    {
305        return Ok(FileFormat::Heif);
306    }
307
308    if MP4_BRAND_NAMES
309        .iter()
310        .any(|v| compatible_brands.iter().any(|x| v.as_bytes() == *x))
311    {
312        return Ok(FileFormat::MP4);
313    }
314
315    tracing::warn!(
316        marjor_brand = major_brand.iter().map(|b| *b as char).collect::<String>(),
317        "unknown major brand",
318    );
319
320    if travel_header(input, |header, _| header.box_type != "mdat").is_ok() {
321        // find mdat box, assume it's a mp4 file
322        return Ok(FileFormat::MP4);
323    }
324
325    Err(crate::Error::UnrecognizedFileFormat)
326}
327
328fn get_ftyp_and_major_brand(input: &[u8]) -> crate::Result<(BoxHolder, Option<&[u8]>)> {
329    let (_, bbox) = BoxHolder::parse(input).map_err(|e| format!("parse ftyp failed: {e}"))?;
330
331    if bbox.box_type() == "ftyp" {
332        if bbox.body_data().len() < 4 {
333            return Err(format!(
334                "parse ftyp failed; body size should greater than 4, got {}",
335                bbox.body_data().len()
336            )
337            .into());
338        }
339        let (_, ftyp) = complete::take(4_usize)(bbox.body_data())?;
340        Ok((bbox, Some(ftyp)))
341    } else if bbox.box_type() == "wide" {
342        // MOV files that extracted from HEIC starts with `wide` & `mdat` atoms
343        Ok((bbox, None))
344    } else {
345        Err(format!("parse ftyp failed; first box type is: {}", bbox.box_type()).into())
346    }
347}
348
349fn get_compatible_brands(body: &[u8]) -> crate::Result<Vec<&[u8]>> {
350    let Ok((_, brands)) = many0(complete::take::<usize, &[u8], nom::error::Error<&[u8]>>(
351        4_usize,
352    ))(body) else {
353        return Err("get compatible brands failed".into());
354    };
355    Ok(brands)
356}
357
358#[allow(deprecated)]
359#[cfg(test)]
360mod tests {
361    use std::ops::Deref;
362
363    use super::*;
364    use test_case::test_case;
365    use Mime::*;
366    use MimeImage::*;
367    use MimeVideo::*;
368
369    use crate::testkit::{open_sample, read_sample};
370
371    #[test_case("exif.heic", Image(Heic))]
372    #[test_case("exif.jpg", Image(Jpeg))]
373    #[test_case("fujifilm_x_t1_01.raf.meta", Image(Raf))]
374    #[test_case("meta.mp4", Video(Mp4))]
375    #[test_case("meta.mov", Video(QuickTime))]
376    #[test_case("embedded-in-heic.mov", Video(QuickTime))]
377    #[test_case("compatible-brands.mov", Video(QuickTime))]
378    #[test_case("webm_480.webm", Video(Webm))]
379    #[test_case("mkv_640x360.mkv", Video(Matroska))]
380    #[test_case("mka.mka", Video(Matroska))]
381    #[test_case("3gp_640x360.3gp", Video(_3gpp))]
382    fn mime(path: &str, mime: Mime) {
383        let data = read_sample(path).unwrap();
384        let m: Mime = data.deref().try_into().unwrap();
385        assert_eq!(m, mime);
386    }
387
388    #[test_case("exif.heic", FileFormat::Heif)]
389    #[test_case("exif.jpg", FileFormat::Jpeg)]
390    #[test_case("meta.mov", FileFormat::QuickTime)]
391    #[test_case("meta.mp4", FileFormat::MP4)]
392    #[test_case("embedded-in-heic.mov", FileFormat::QuickTime)]
393    #[test_case("compatible-brands.mov", FileFormat::QuickTime)]
394    fn file_format(path: &str, expect: FileFormat) {
395        let f = open_sample(path).unwrap();
396        let ff = FileFormat::try_from_read(f).unwrap();
397        assert_eq!(ff, expect);
398    }
399
400    #[test_case("compatible-brands-fail.mov")]
401    fn file_format_error(path: &str) {
402        let f = open_sample(path).unwrap();
403        FileFormat::try_from_read(f).unwrap_err();
404    }
405}