Skip to main content

nom_exif/
mov.rs

1use std::{
2    collections::BTreeMap,
3    io::{Read, Seek},
4    ops::Range,
5};
6
7use chrono::DateTime;
8use nom::{bytes::streaming, IResult};
9
10use crate::{bbox::to_boxes, values::filter_zero};
11#[allow(deprecated)]
12use crate::{
13    bbox::{
14        find_box, parse_video_tkhd_in_moov, travel_header, IlstBox, KeysBox, MvhdBox, ParseBox,
15    },
16    error::ParsingError,
17    loader::{BufLoader, Load},
18    partial_vec::PartialVec,
19    skip::Seekable,
20    video::TrackInfoTag,
21    EntryValue, FileFormat,
22};
23
24/// *Deprecated*: Please use [`crate::MediaParser`] instead.
25///
26/// Analyze the byte stream in the `reader` as a MOV/MP4 file, attempting to
27/// extract any possible metadata it may contain, and return it in the form of
28/// key-value pairs.
29///
30/// Please note that the parsing routine itself provides a buffer, so the
31/// `reader` may not need to be wrapped with `BufRead`.
32///
33/// # Usage
34///
35/// ```rust
36/// use nom_exif::*;
37///
38/// use std::fs::File;
39/// use std::path::Path;
40///
41/// let f = File::open(Path::new("./testdata/meta.mov")).unwrap();
42/// let entries = parse_metadata(f).unwrap();
43///
44/// assert_eq!(
45///     entries
46///         .iter()
47///         .map(|x| format!("{x:?}"))
48///         .collect::<Vec<_>>()
49///         .join("\n"),
50///     r#"("com.apple.quicktime.make", Text("Apple"))
51/// ("com.apple.quicktime.model", Text("iPhone X"))
52/// ("com.apple.quicktime.software", Text("12.1.2"))
53/// ("com.apple.quicktime.location.ISO6709", Text("+27.1281+100.2508+000.000/"))
54/// ("com.apple.quicktime.creationdate", Time(2019-02-12T15:27:12+08:00))
55/// ("duration", U32(500))
56/// ("width", U32(720))
57/// ("height", U32(1280))"#,
58/// );
59/// ```
60#[deprecated(since = "2.0.0")]
61#[tracing::instrument(skip_all)]
62#[allow(deprecated)]
63pub fn parse_metadata<R: Read + Seek>(reader: R) -> crate::Result<Vec<(String, EntryValue)>> {
64    let mut loader = BufLoader::<Seekable, _>::new(reader);
65    let ff = FileFormat::try_from_load(&mut loader)?;
66    match ff {
67        FileFormat::Jpeg | FileFormat::Heif => {
68            return Err(crate::error::Error::ParseFailed(
69                "can not parse metadata from an image".into(),
70            ));
71        }
72        FileFormat::QuickTime | FileFormat::MP4 => (),
73        FileFormat::Ebml => {
74            return Err(crate::error::Error::ParseFailed(
75                "please use MediaParser to parse *.webm, *.mkv files".into(),
76            ))
77        }
78    };
79
80    let moov_body = extract_moov_body(loader)?;
81
82    let (_, mut entries) = match parse_moov_body(&moov_body) {
83        Ok((remain, Some(entries))) => (remain, entries),
84        Ok((remain, None)) => (remain, Vec::new()),
85        Err(_) => {
86            return Err("invalid moov body".into());
87        }
88    };
89
90    let map: BTreeMap<TrackInfoTag, EntryValue> = convert_video_tags(entries.clone());
91    let mut extras = parse_mvhd_tkhd(&moov_body);
92
93    const CREATIONDATE_KEY: &str = "com.apple.quicktime.creationdate";
94    if map.contains_key(&TrackInfoTag::CreateDate) {
95        extras.remove(&TrackInfoTag::CreateDate);
96        let date = map.get(&TrackInfoTag::CreateDate);
97        if let Some(pos) = entries.iter().position(|x| x.0 == CREATIONDATE_KEY) {
98            if let Some(date) = date {
99                entries[pos] = (CREATIONDATE_KEY.to_string(), date.clone());
100            } else {
101                entries.remove(pos);
102            }
103        }
104    }
105
106    entries.extend(extras.into_iter().map(|(k, v)| match k {
107        TrackInfoTag::ImageWidth => ("width".to_string(), v),
108        TrackInfoTag::ImageHeight => ("height".to_string(), v),
109        TrackInfoTag::DurationMs => (
110            "duration".to_string(),
111            // For compatibility with older versions, convert to u32
112            EntryValue::U32(v.as_u64().unwrap() as u32),
113        ),
114        TrackInfoTag::CreateDate => (CREATIONDATE_KEY.to_string(), v),
115        _ => unreachable!(),
116    }));
117
118    if map.contains_key(&TrackInfoTag::GpsIso6709) {
119        const LOCATION_KEY: &str = "com.apple.quicktime.location.ISO6709";
120        if let Some(idx) = entries.iter().position(|(k, _)| k == "udta.©xyz") {
121            entries.remove(idx);
122            entries.push((
123                LOCATION_KEY.to_string(),
124                map.get(&TrackInfoTag::GpsIso6709).unwrap().to_owned(),
125            ));
126        }
127    }
128
129    Ok(entries)
130}
131
132#[tracing::instrument(skip_all)]
133pub(crate) fn parse_isobmff(
134    moov_body: &[u8],
135) -> Result<BTreeMap<TrackInfoTag, EntryValue>, ParsingError> {
136    let (_, entries) = match parse_moov_body(moov_body) {
137        Ok((remain, Some(entries))) => (remain, entries),
138        Ok((remain, None)) => (remain, Vec::new()),
139        Err(_) => {
140            return Err("invalid moov body".into());
141        }
142    };
143
144    let mut entries: BTreeMap<TrackInfoTag, EntryValue> = convert_video_tags(entries);
145    let mut extras = parse_mvhd_tkhd(moov_body);
146    if entries.contains_key(&TrackInfoTag::CreateDate) {
147        extras.remove(&TrackInfoTag::CreateDate);
148    }
149    entries.extend(extras);
150
151    Ok(entries)
152}
153
154fn parse_mvhd_tkhd(moov_body: &[u8]) -> BTreeMap<TrackInfoTag, EntryValue> {
155    let mut entries = BTreeMap::new();
156    if let Ok((_, Some(bbox))) = find_box(moov_body, "mvhd") {
157        if let Ok((_, mvhd)) = MvhdBox::parse_box(bbox.data) {
158            entries.insert(TrackInfoTag::DurationMs, mvhd.duration_ms().into());
159
160            entries.insert(
161                TrackInfoTag::CreateDate,
162                EntryValue::Time(mvhd.creation_time()),
163            );
164        }
165    }
166
167    if let Ok(Some(tkhd)) = parse_video_tkhd_in_moov(moov_body) {
168        entries.insert(TrackInfoTag::ImageWidth, tkhd.width.into());
169        entries.insert(TrackInfoTag::ImageHeight, tkhd.height.into());
170    }
171
172    entries
173}
174
175fn convert_video_tags(entries: Vec<(String, EntryValue)>) -> BTreeMap<TrackInfoTag, EntryValue> {
176    entries
177        .into_iter()
178        .filter_map(|(k, v)| {
179            if k == "com.apple.quicktime.creationdate" {
180                v.as_str()
181                    .and_then(|s| DateTime::parse_from_str(s, "%+").ok())
182                    .map(|t| (TrackInfoTag::CreateDate, EntryValue::Time(t)))
183            } else if k == "com.apple.quicktime.make" {
184                Some((TrackInfoTag::Make, v))
185            } else if k == "com.apple.quicktime.model" {
186                Some((TrackInfoTag::Model, v))
187            } else if k == "com.apple.quicktime.software" {
188                Some((TrackInfoTag::Software, v))
189            } else if k == "com.apple.quicktime.author" {
190                Some((TrackInfoTag::Author, v))
191            } else if k == "com.apple.quicktime.location.ISO6709" {
192                Some((TrackInfoTag::GpsIso6709, v))
193            } else if k == "udta.©xyz" {
194                // For mp4 files, Android phones store GPS info in that box.
195                v.as_u8array()
196                    .and_then(parse_udta_gps)
197                    .map(|v| (TrackInfoTag::GpsIso6709, EntryValue::Text(v)))
198            } else if k == "udta.auth" {
199                v.as_u8array()
200                    .and_then(parse_udta_auth)
201                    .map(|v| (TrackInfoTag::Author, EntryValue::Text(v)))
202            } else if k.starts_with("udta.") {
203                let tag = TryInto::<TrackInfoTag>::try_into(k.as_str()).ok();
204                tag.map(|t| (t, v))
205            } else {
206                None
207            }
208        })
209        .collect()
210}
211
212/// Try to find GPS info from box `moov/udta/©xyz`. For mp4 files, Android
213/// phones store GPS info in that box.
214// fn parse_mp4_gps(moov_body: &[u8]) -> Option<String> {
215//     let bbox = match find_box(moov_body, "udta/©xyz") {
216//         Ok((_, b)) => b,
217//         Err(_) => None,
218//     };
219//     if let Some(bbox) = bbox {
220//         return parse_udta_gps(bbox.body_data());
221//     }
222//     None
223// }
224fn parse_udta_gps(data: &[u8]) -> Option<String> {
225    if data.len() <= 4 {
226        tracing::warn!("moov/udta/©xyz body is too small");
227        None
228    } else {
229        // The first 4 bytes is zero, skip them
230        let location = data[4..] // Safe-slice
231            .iter()
232            .map(|b| *b as char)
233            .collect::<String>();
234        Some(location)
235    }
236}
237
238const ISO_639_2_UND: [u8; 2] = [0x55, 0xc4];
239
240fn parse_udta_auth(data: &[u8]) -> Option<String> {
241    // Skip leading zero bytes
242    let data = filter_zero(data);
243
244    // Skip leading language flags.
245    // Refer to: https://exiftool.org/forum/index.php?topic=11498.0
246    if data.starts_with(&ISO_639_2_UND) {
247        String::from_utf8(data.into_iter().skip(2).collect()).ok()
248    } else {
249        String::from_utf8(data).ok()
250    }
251}
252
253/// *Deprecated*: Please use [`crate::MediaParser`] instead.
254///
255/// Analyze the byte stream in the `reader` as a MOV file, attempting to extract
256/// any possible metadata it may contain, and return it in the form of key-value
257/// pairs.
258///
259/// Please note that the parsing routine itself provides a buffer, so the
260/// `reader` may not need to be wrapped with `BufRead`.
261///
262/// # Usage
263///
264/// ```rust
265/// use nom_exif::*;
266///
267/// use std::fs::File;
268/// use std::path::Path;
269///
270/// let f = File::open(Path::new("./testdata/meta.mov")).unwrap();
271/// let entries = parse_mov_metadata(f).unwrap();
272///
273/// assert_eq!(
274///     entries
275///         .iter()
276///         .map(|x| format!("{x:?}"))
277///         .collect::<Vec<_>>()
278///         .join("\n"),
279///     r#"("com.apple.quicktime.make", Text("Apple"))
280/// ("com.apple.quicktime.model", Text("iPhone X"))
281/// ("com.apple.quicktime.software", Text("12.1.2"))
282/// ("com.apple.quicktime.location.ISO6709", Text("+27.1281+100.2508+000.000/"))
283/// ("com.apple.quicktime.creationdate", Time(2019-02-12T15:27:12+08:00))
284/// ("duration", U32(500))
285/// ("width", U32(720))
286/// ("height", U32(1280))"#,
287/// );
288/// ```
289#[deprecated(since = "2.0.0")]
290pub fn parse_mov_metadata<R: Read + Seek>(reader: R) -> crate::Result<Vec<(String, EntryValue)>> {
291    #[allow(deprecated)]
292    parse_metadata(reader)
293}
294
295#[tracing::instrument(skip_all)]
296fn extract_moov_body<L: Load>(mut loader: L) -> Result<PartialVec, crate::Error> {
297    let moov_body_range = loader.load_and_parse(extract_moov_body_from_buf)?;
298
299    tracing::debug!(?moov_body_range);
300    Ok(PartialVec::from_vec_range(
301        loader.into_vec(),
302        moov_body_range,
303    ))
304}
305
306/// Parse the byte data of an ISOBMFF file and return the potential body data of
307/// moov atom it may contain.
308///
309/// Regarding error handling, please refer to [Error] for more information.
310#[tracing::instrument(skip_all)]
311pub(crate) fn extract_moov_body_from_buf(input: &[u8]) -> Result<Range<usize>, ParsingError> {
312    // parse metadata from moov/meta/keys & moov/meta/ilst
313    let remain = input;
314
315    let convert_error = |e: nom::Err<_>, msg: &str| match e {
316        nom::Err::Incomplete(needed) => match needed {
317            nom::Needed::Unknown => ParsingError::Need(1),
318            nom::Needed::Size(n) => ParsingError::Need(n.get()),
319        },
320        nom::Err::Failure(_) | nom::Err::Error(_) => ParsingError::Failed(msg.to_string()),
321    };
322
323    let mut to_skip = 0;
324    let mut skipped = 0;
325    let (remain, header) = travel_header(remain, |h, remain| {
326        tracing::debug!(?h.box_type, ?h.box_size, "Got");
327        if h.box_type == "moov" {
328            // stop travelling
329            skipped += h.header_size;
330            false
331        } else if (remain.len() as u64) < h.body_size() {
332            // stop travelling & skip unused box data
333            to_skip = h.body_size() as usize - remain.len();
334            false
335        } else {
336            // body has been read, so just consume it
337            skipped += h.box_size as usize;
338            true
339        }
340    })
341    .map_err(|e| convert_error(e, "search atom moov failed"))?;
342
343    if to_skip > 0 {
344        return Err(ParsingError::ClearAndSkip(
345            to_skip
346                .checked_add(input.len())
347                .ok_or_else(|| ParsingError::Failed("to_skip is too big".into()))?,
348        ));
349    }
350
351    let size: usize = header.body_size().try_into().expect("must fit");
352    let (_, body) =
353        streaming::take(size)(remain).map_err(|e| convert_error(e, "moov is too small"))?;
354
355    Ok(skipped..skipped + body.len())
356}
357
358type EntriesResult<'a> = IResult<&'a [u8], Option<Vec<(String, EntryValue)>>>;
359
360#[tracing::instrument(skip(input))]
361fn parse_moov_body(input: &[u8]) -> EntriesResult<'_> {
362    tracing::debug!("parse_moov_body");
363
364    let mut entries = parse_meta(input).unwrap_or_default();
365
366    if let Ok((_, Some(udta))) = find_box(input, "udta") {
367        tracing::debug!("udta");
368        if let Ok(boxes) = to_boxes(udta.body_data()) {
369            for entry in boxes.iter() {
370                tracing::debug!(?entry, "udta entry");
371                entries.push((
372                    format!("udta.{}", entry.box_type()),
373                    EntryValue::U8Array(Vec::from(entry.body_data())),
374                ));
375            }
376        }
377    }
378
379    Ok((input, Some(entries)))
380}
381
382fn parse_meta(input: &[u8]) -> Option<Vec<(String, EntryValue)>> {
383    let (_, Some(meta)) = find_box(input, "meta").ok()? else {
384        return None;
385    };
386
387    let (_, Some(keys)) = find_box(meta.body_data(), "keys").ok()? else {
388        return None;
389    };
390
391    let (_, Some(ilst)) = find_box(meta.body_data(), "ilst").ok()? else {
392        return None;
393    };
394
395    let (_, keys) = KeysBox::parse_box(keys.data).ok()?;
396    let (_, ilst) = IlstBox::parse_box(ilst.data).ok()?;
397
398    let entries = keys
399        .entries
400        .into_iter()
401        .map(|k| k.key)
402        .zip(ilst.items.into_iter().map(|v| v.value))
403        .collect::<Vec<_>>();
404
405    Some(entries)
406}
407
408/// Change timezone format from iso 8601 to rfc3339, e.g.:
409///
410/// - `2023-11-02T19:58:34+08` -> `2023-11-02T19:58:34+08:00`
411/// - `2023-11-02T19:58:34+0800` -> `2023-11-02T19:58:34+08:00`
412#[allow(dead_code)]
413fn tz_iso_8601_to_rfc3339(s: String) -> String {
414    use regex::Regex;
415
416    let ss = s.trim();
417    // Safe unwrap
418    let re = Regex::new(r"([+-][0-9][0-9])([0-9][0-9])?$").unwrap();
419
420    if let Some((offset, tz)) = re.captures(ss).map(|caps| {
421        (
422            // Safe unwrap
423            caps.get(1).unwrap().start(),
424            format!(
425                "{}:{}",
426                caps.get(1).map_or("00", |m| m.as_str()),
427                caps.get(2).map_or("00", |m| m.as_str())
428            ),
429        )
430    }) {
431        let s1 = &ss.as_bytes()[..offset]; // Safe-slice
432        let s2 = tz.as_bytes();
433        s1.iter().chain(s2.iter()).map(|x| *x as char).collect()
434    } else {
435        s
436    }
437}
438
439#[cfg(test)]
440#[allow(deprecated)]
441mod tests {
442    use super::*;
443    use crate::testkit::*;
444    use test_case::test_case;
445
446    #[test_case("meta.mov")]
447    fn mov_parse(path: &str) {
448        let reader = open_sample(path).unwrap();
449        let entries = parse_metadata(reader).unwrap();
450        assert_eq!(
451            entries
452                .iter()
453                .map(|x| format!("{x:?}"))
454                .collect::<Vec<_>>()
455                .join("\n"),
456            "(\"com.apple.quicktime.make\", Text(\"Apple\"))
457(\"com.apple.quicktime.model\", Text(\"iPhone X\"))
458(\"com.apple.quicktime.software\", Text(\"12.1.2\"))
459(\"com.apple.quicktime.location.ISO6709\", Text(\"+27.1281+100.2508+000.000/\"))
460(\"com.apple.quicktime.creationdate\", Time(2019-02-12T15:27:12+08:00))
461(\"duration\", U32(500))
462(\"width\", U32(720))
463(\"height\", U32(1280))"
464        );
465    }
466
467    #[test_case("meta.mov")]
468    fn mov_extract_mov(path: &str) {
469        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
470
471        let buf = read_sample(path).unwrap();
472        tracing::info!(bytes = buf.len(), "File size.");
473        let range = extract_moov_body_from_buf(&buf).unwrap();
474        let (_, entries) = parse_moov_body(&buf[range]).unwrap();
475        assert_eq!(
476            entries
477                .unwrap()
478                .iter()
479                .map(|x| format!("{x:?}"))
480                .collect::<Vec<_>>()
481                .join("\n"),
482            "(\"com.apple.quicktime.make\", Text(\"Apple\"))
483(\"com.apple.quicktime.model\", Text(\"iPhone X\"))
484(\"com.apple.quicktime.software\", Text(\"12.1.2\"))
485(\"com.apple.quicktime.location.ISO6709\", Text(\"+27.1281+100.2508+000.000/\"))
486(\"com.apple.quicktime.creationdate\", Text(\"2019-02-12T15:27:12+08:00\"))"
487        );
488    }
489
490    #[test_case("meta.mp4")]
491    fn parse_mp4(path: &str) {
492        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
493
494        let entries = parse_metadata(open_sample(path).unwrap()).unwrap();
495        assert_eq!(
496            entries
497                .iter()
498                .map(|x| format!("{x:?}"))
499                .collect::<Vec<_>>()
500                .join("\n"),
501            "(\"com.apple.quicktime.creationdate\", Time(2024-02-03T07:05:38+00:00))
502(\"duration\", U32(1063))
503(\"width\", U32(1920))
504(\"height\", U32(1080))
505(\"com.apple.quicktime.location.ISO6709\", Text(\"+27.2939+112.6932/\"))"
506        );
507    }
508
509    #[test_case("embedded-in-heic.mov")]
510    fn parse_embedded_mov(path: &str) {
511        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
512
513        let entries = parse_mov_metadata(open_sample(path).unwrap()).unwrap();
514        assert_eq!(
515            entries
516                .iter()
517                .map(|x| format!("{x:?}"))
518                .collect::<Vec<_>>()
519                .join("\n"),
520            "(\"com.apple.quicktime.location.accuracy.horizontal\", Text(\"14.235563\"))
521(\"com.apple.quicktime.live-photo.auto\", U8(1))
522(\"com.apple.quicktime.content.identifier\", Text(\"DA1A7EE8-0925-4C9F-9266-DDA3F0BB80F0\"))
523(\"com.apple.quicktime.live-photo.vitality-score\", F32(0.93884003))
524(\"com.apple.quicktime.live-photo.vitality-scoring-version\", I64(4))
525(\"com.apple.quicktime.location.ISO6709\", Text(\"+22.5797+113.9380+028.396/\"))
526(\"com.apple.quicktime.make\", Text(\"Apple\"))
527(\"com.apple.quicktime.model\", Text(\"iPhone 15 Pro\"))
528(\"com.apple.quicktime.software\", Text(\"17.1\"))
529(\"com.apple.quicktime.creationdate\", Time(2023-11-02T19:58:34+08:00))
530(\"duration\", U32(2795))
531(\"width\", U32(1920))
532(\"height\", U32(1440))"
533        );
534    }
535
536    #[test]
537    fn test_iso_8601_tz_to_rfc3339() {
538        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
539
540        let s = "2023-11-02T19:58:34+08".to_string();
541        assert_eq!(tz_iso_8601_to_rfc3339(s), "2023-11-02T19:58:34+08:00");
542
543        let s = "2023-11-02T19:58:34+0800".to_string();
544        assert_eq!(tz_iso_8601_to_rfc3339(s), "2023-11-02T19:58:34+08:00");
545
546        let s = "2023-11-02T19:58:34+08:00".to_string();
547        assert_eq!(tz_iso_8601_to_rfc3339(s), "2023-11-02T19:58:34+08:00");
548
549        let s = "2023-11-02T19:58:34Z".to_string();
550        assert_eq!(tz_iso_8601_to_rfc3339(s), "2023-11-02T19:58:34Z");
551
552        let s = "2023-11-02T19:58:34".to_string();
553        assert_eq!(tz_iso_8601_to_rfc3339(s), "2023-11-02T19:58:34");
554    }
555}