nom_exif/
file.rs

1use nom::{bytes::complete, multi::many0, FindSubstring};
2use std::{
3    fmt::Display,
4    io::{Cursor, Read},
5};
6
7use crate::{
8    bbox::{travel_header, BoxHolder},
9    ebml::element::parse_ebml_doc_type,
10    error::{ParsedError, ParsingError},
11    exif::TiffHeader,
12    jpeg::check_jpeg,
13    loader::Load,
14    raf::RafInfo,
15    slice::SubsliceRange,
16};
17
18const HEIF_HEIC_BRAND_NAMES: &[&[u8]] = &[
19    b"heic", // the usual HEIF images
20    b"heix", // 10bit images, or anything that uses h265 with range extension
21    b"hevc", // 'hevx': brands for image sequences
22    b"heim", // multiview
23    b"heis", // scalable
24    b"hevm", // multiview sequence
25    b"hevs", // scalable sequence
26    b"mif1", b"MiHE", b"miaf", b"MiHB", // HEIC file's compatible brands
27];
28
29const HEIC_BRAND_NAMES: &[&[u8]] = &[b"heic", b"heix", b"heim", b"heis"];
30
31// TODO: Refer to the information on the website https://www.ftyps.com to add
32// other less common MP4 brands.
33const MP4_BRAND_NAMES: &[&str] = &[
34    "3g2a", "3g2b", "3g2c", "3ge6", "3ge7", "3gg6", "3gp4", "3gp5", "3gp6", "3gs7", "avc1", "mp41",
35    "mp42", "iso2", "isom", "vfj1",
36];
37
38const QT_BRAND_NAMES: &[&str] = &["qt  ", "mqt "];
39
40const CR3_BRAND_NAMES: &[&str] = &["crx "];
41
42#[derive(Debug, Clone, PartialEq, Eq, Copy)]
43pub(crate) enum Mime {
44    Image(MimeImage),
45    Video(MimeVideo),
46}
47
48impl Mime {
49    pub fn unwrap_image(self) -> MimeImage {
50        match self {
51            Mime::Image(val) => val,
52            Mime::Video(_) => panic!("called `Mime::unwrap_image()` on an `Mime::Video`"),
53        }
54    }
55    pub fn unwrap_video(self) -> MimeVideo {
56        match self {
57            Mime::Image(_) => panic!("called `Mime::unwrap_video()` on an `Mime::Image`"),
58            Mime::Video(val) => val,
59        }
60    }
61}
62
63#[derive(Debug, Clone, PartialEq, Eq, Copy)]
64pub(crate) enum MimeImage {
65    Jpeg,
66    Heic,
67    Heif,
68    Tiff,
69    Raf, // Fujifilm RAW, image/x-fuji-raf
70    Cr3, // Canon RAW, image/x-canon-cr3
71}
72
73#[derive(Debug, Clone, PartialEq, Eq, Copy)]
74pub(crate) enum MimeVideo {
75    QuickTime,
76    Mp4,
77    Webm,
78    Matroska,
79    _3gpp,
80}
81
82impl TryFrom<&[u8]> for Mime {
83    type Error = crate::Error;
84    fn try_from(input: &[u8]) -> Result<Self, Self::Error> {
85        let mime = if let Ok(x) = parse_bmff_mime(input) {
86            x
87        } else if let Ok(x) = get_ebml_doc_type(input) {
88            if x == "webm" {
89                Mime::Video(MimeVideo::Webm)
90            } else {
91                Mime::Video(MimeVideo::Matroska)
92            }
93        } else if TiffHeader::parse(input).is_ok() {
94            Mime::Image(MimeImage::Tiff)
95        } else if check_jpeg(input).is_ok() {
96            Mime::Image(MimeImage::Jpeg)
97        } else if RafInfo::check(input).is_ok() {
98            Mime::Image(MimeImage::Raf)
99        } else {
100            return Err(crate::Error::UnrecognizedFileFormat);
101        };
102
103        Ok(mime)
104    }
105}
106
107/// *Deprecated*: Please use [`crate::MediaSource`] instead.
108#[deprecated(since = "2.0.0")]
109#[allow(unused)]
110#[derive(Debug, Clone, Copy, PartialEq, Eq)]
111pub enum FileFormat {
112    Jpeg,
113    /// heic, heif
114    Heif,
115
116    // Currently, there is not much difference between QuickTime and MP4 when
117    // parsing metadata, and they share the same parsing mechanism.
118    //
119    // The only difference is that if detected as an MP4 file, the
120    // `moov/udta/©xyz` atom is additionally checked and an attempt is made to
121    // read GPS information from it, since Android phones store GPS information
122    // in that atom.
123    /// mov
124    QuickTime,
125    MP4,
126
127    /// webm, mkv, mka, mk3d
128    Ebml,
129}
130
131// Parse the input buffer and detect its file type
132#[allow(deprecated)]
133impl TryFrom<&[u8]> for FileFormat {
134    type Error = crate::Error;
135
136    fn try_from(input: &[u8]) -> Result<Self, Self::Error> {
137        if let Ok(ff) = check_bmff(input) {
138            Ok(ff)
139        } else if get_ebml_doc_type(input).is_ok() {
140            Ok(Self::Ebml)
141        } else if check_jpeg(input).is_ok() {
142            Ok(Self::Jpeg)
143        } else {
144            Err(crate::Error::UnrecognizedFileFormat)
145        }
146    }
147}
148
149#[allow(deprecated)]
150impl FileFormat {
151    pub fn try_from_read<T: Read>(reader: T) -> crate::Result<Self> {
152        const BUF_SIZE: usize = 4096;
153        let mut buf = Vec::with_capacity(BUF_SIZE);
154        let n = reader.take(BUF_SIZE as u64).read_to_end(buf.as_mut())?;
155        if n == 0 {
156            Err("file is empty")?;
157        }
158
159        buf.as_slice().try_into()
160    }
161
162    pub(crate) fn try_from_load<T: Load>(loader: &mut T) -> Result<Self, ParsedError> {
163        loader.load_and_parse(|x| {
164            x.try_into()
165                .map_err(|_| ParsingError::Failed("unrecognized file format".to_string()))
166        })
167    }
168}
169
170#[allow(deprecated)]
171impl Display for FileFormat {
172    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
173        match self {
174            Self::Jpeg => "JPEG".fmt(f),
175            Self::Heif => "HEIF/HEIC".fmt(f),
176            Self::QuickTime => "QuickTime".fmt(f),
177            Self::MP4 => "MP4".fmt(f),
178            Self::Ebml => "EBML".fmt(f),
179        }
180    }
181}
182
183fn get_ebml_doc_type(input: &[u8]) -> crate::Result<String> {
184    let mut cursor = Cursor::new(input);
185    let doc = parse_ebml_doc_type(&mut cursor)?;
186    Ok(doc)
187}
188
189#[tracing::instrument(skip_all)]
190fn parse_bmff_mime(input: &[u8]) -> crate::Result<Mime> {
191    let (ftyp, Some(major_brand)) =
192        get_ftyp_and_major_brand(input).map_err(|_| crate::Error::UnrecognizedFileFormat)?
193    else {
194        if travel_header(input, |header, _| header.box_type != "mdat").is_ok() {
195            // ftyp is None, mdat box is found, assume it's a MOV file extracted from HEIC
196            return Ok(Mime::Video(MimeVideo::QuickTime));
197        }
198
199        return Err(crate::Error::UnrecognizedFileFormat);
200    };
201
202    tracing::debug!(?ftyp);
203
204    // Check if it is a QuickTime file
205    if QT_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
206        return Ok(Mime::Video(MimeVideo::QuickTime));
207    }
208
209    // Check if it is a HEIF file
210    if HEIF_HEIC_BRAND_NAMES.contains(&major_brand) {
211        if HEIC_BRAND_NAMES.contains(&major_brand) {
212            return Ok(Mime::Image(MimeImage::Heic));
213        }
214        return Ok(Mime::Image(MimeImage::Heif));
215    }
216
217    // Check if it is a MP4 file
218    if MP4_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
219        if major_brand.starts_with(b"3gp") {
220            return Ok(Mime::Video(MimeVideo::_3gpp));
221        }
222        return Ok(Mime::Video(MimeVideo::Mp4));
223    }
224
225    // Check if it is a CR3 file
226    if CR3_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
227        return Ok(Mime::Image(MimeImage::Cr3));
228    }
229
230    // Check compatible brands
231    let compatible_brands = ftyp.body_data();
232
233    if QT_BRAND_NAMES
234        .iter()
235        .any(|v| compatible_brands.find_substring(v.as_bytes()).is_some())
236    {
237        return Ok(Mime::Video(MimeVideo::QuickTime));
238    }
239
240    if HEIF_HEIC_BRAND_NAMES
241        .iter()
242        .any(|x| compatible_brands.find_substring(*x).is_some())
243    {
244        if HEIC_BRAND_NAMES.contains(&major_brand) {
245            return Ok(Mime::Image(MimeImage::Heic));
246        }
247        return Ok(Mime::Image(MimeImage::Heif));
248    }
249
250    if MP4_BRAND_NAMES
251        .iter()
252        .any(|v| compatible_brands.subslice_in_range(v.as_bytes()).is_some())
253    {
254        if major_brand.starts_with(b"3gp") {
255            return Ok(Mime::Video(MimeVideo::_3gpp));
256        }
257        return Ok(Mime::Video(MimeVideo::Mp4));
258    }
259
260    tracing::warn!(
261        marjor_brand = major_brand.iter().map(|b| *b as char).collect::<String>(),
262        "unknown major brand",
263    );
264
265    if travel_header(input, |header, _| header.box_type != "mdat").is_ok() {
266        // mdat box found, assume it's a mp4 file
267        return Ok(Mime::Video(MimeVideo::Mp4));
268    }
269
270    Err(crate::Error::UnrecognizedFileFormat)
271}
272
273#[allow(deprecated)]
274fn check_bmff(input: &[u8]) -> crate::Result<FileFormat> {
275    let (ftyp, Some(major_brand)) = get_ftyp_and_major_brand(input)? else {
276        if travel_header(input, |header, _| header.box_type != "mdat").is_ok() {
277            // ftyp is None, mdat box is found, assume it's a MOV file extracted from HEIC
278            return Ok(FileFormat::QuickTime);
279        }
280
281        return Err(crate::Error::UnrecognizedFileFormat);
282    };
283
284    // Check if it is a QuickTime file
285    if QT_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
286        return Ok(FileFormat::QuickTime);
287    }
288
289    // Check if it is a HEIF file
290    if HEIF_HEIC_BRAND_NAMES.contains(&major_brand) {
291        return Ok(FileFormat::Heif);
292    }
293
294    // Check if it is a MP4 file
295    if MP4_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
296        return Ok(FileFormat::MP4);
297    }
298
299    // Check compatible brands
300    let compatible_brands = get_compatible_brands(ftyp.body_data())?;
301
302    if QT_BRAND_NAMES
303        .iter()
304        .any(|v| compatible_brands.contains(&v.as_bytes()))
305    {
306        return Ok(FileFormat::QuickTime);
307    }
308
309    if HEIF_HEIC_BRAND_NAMES
310        .iter()
311        .any(|x| compatible_brands.contains(x))
312    {
313        return Ok(FileFormat::Heif);
314    }
315
316    if MP4_BRAND_NAMES
317        .iter()
318        .any(|v| compatible_brands.contains(&v.as_bytes()))
319    {
320        return Ok(FileFormat::MP4);
321    }
322
323    tracing::warn!(
324        marjor_brand = major_brand.iter().map(|b| *b as char).collect::<String>(),
325        "unknown major brand",
326    );
327
328    if travel_header(input, |header, _| header.box_type != "mdat").is_ok() {
329        // find mdat box, assume it's a mp4 file
330        return Ok(FileFormat::MP4);
331    }
332
333    Err(crate::Error::UnrecognizedFileFormat)
334}
335
336fn get_ftyp_and_major_brand(input: &[u8]) -> crate::Result<(BoxHolder<'_>, Option<&[u8]>)> {
337    let (_, bbox) = BoxHolder::parse(input).map_err(|e| format!("parse ftyp failed: {e}"))?;
338
339    if bbox.box_type() == "ftyp" {
340        if bbox.body_data().len() < 4 {
341            return Err(format!(
342                "parse ftyp failed; body size should greater than 4, got {}",
343                bbox.body_data().len()
344            )
345            .into());
346        }
347        let (_, ftyp) = complete::take(4_usize)(bbox.body_data())?;
348        Ok((bbox, Some(ftyp)))
349    } else if bbox.box_type() == "wide" {
350        // MOV files that extracted from HEIC starts with `wide` & `mdat` atoms
351        Ok((bbox, None))
352    } else {
353        Err(format!("parse ftyp failed; first box type is: {}", bbox.box_type()).into())
354    }
355}
356
357fn get_compatible_brands(body: &[u8]) -> crate::Result<Vec<&[u8]>> {
358    let Ok((_, brands)) = many0(complete::take::<usize, &[u8], nom::error::Error<&[u8]>>(
359        4_usize,
360    ))(body) else {
361        return Err("get compatible brands failed".into());
362    };
363    Ok(brands)
364}
365
366#[allow(deprecated)]
367#[cfg(test)]
368mod tests {
369    use std::ops::Deref;
370
371    use super::*;
372    use test_case::test_case;
373    use Mime::*;
374    use MimeImage::*;
375    use MimeVideo::*;
376
377    use crate::testkit::{open_sample, read_sample};
378
379    #[test_case("exif.heic", Image(Heic))]
380    #[test_case("exif.jpg", Image(Jpeg))]
381    #[test_case("fujifilm_x_t1_01.raf.meta", Image(Raf))]
382    #[test_case("meta.mp4", Video(Mp4))]
383    #[test_case("meta.mov", Video(QuickTime))]
384    #[test_case("embedded-in-heic.mov", Video(QuickTime))]
385    #[test_case("compatible-brands.mov", Video(QuickTime))]
386    #[test_case("webm_480.webm", Video(Webm))]
387    #[test_case("mkv_640x360.mkv", Video(Matroska))]
388    #[test_case("mka.mka", Video(Matroska))]
389    #[test_case("3gp_640x360.3gp", Video(_3gpp))]
390    fn mime(path: &str, mime: Mime) {
391        let data = read_sample(path).unwrap();
392        let m: Mime = data.deref().try_into().unwrap();
393        assert_eq!(m, mime);
394    }
395
396    #[test_case("exif.heic", FileFormat::Heif)]
397    #[test_case("exif.jpg", FileFormat::Jpeg)]
398    #[test_case("meta.mov", FileFormat::QuickTime)]
399    #[test_case("meta.mp4", FileFormat::MP4)]
400    #[test_case("embedded-in-heic.mov", FileFormat::QuickTime)]
401    #[test_case("compatible-brands.mov", FileFormat::QuickTime)]
402    fn file_format(path: &str, expect: FileFormat) {
403        let f = open_sample(path).unwrap();
404        let ff = FileFormat::try_from_read(f).unwrap();
405        assert_eq!(ff, expect);
406    }
407
408    #[test_case("compatible-brands-fail.mov")]
409    fn file_format_error(path: &str) {
410        let f = open_sample(path).unwrap();
411        FileFormat::try_from_read(f).unwrap_err();
412    }
413}