nom_exif/
mov.rs

1use std::{
2    collections::BTreeMap,
3    io::{Read, Seek},
4    ops::Range,
5};
6
7use chrono::DateTime;
8use nom::{bytes::streaming, IResult};
9
10use crate::{bbox::to_boxes, values::filter_zero};
11#[allow(deprecated)]
12use crate::{
13    bbox::{
14        find_box, parse_video_tkhd_in_moov, travel_header, IlstBox, KeysBox, MvhdBox, ParseBox,
15    },
16    error::ParsingError,
17    loader::{BufLoader, Load},
18    partial_vec::PartialVec,
19    skip::Seekable,
20    video::TrackInfoTag,
21    EntryValue, FileFormat,
22};
23
24/// *Deprecated*: Please use [`crate::MediaParser`] instead.
25///
26/// Analyze the byte stream in the `reader` as a MOV/MP4 file, attempting to
27/// extract any possible metadata it may contain, and return it in the form of
28/// key-value pairs.
29///
30/// Please note that the parsing routine itself provides a buffer, so the
31/// `reader` may not need to be wrapped with `BufRead`.
32///
33/// # Usage
34///
35/// ```rust
36/// use nom_exif::*;
37///
38/// use std::fs::File;
39/// use std::path::Path;
40///
41/// let f = File::open(Path::new("./testdata/meta.mov")).unwrap();
42/// let entries = parse_metadata(f).unwrap();
43///
44/// assert_eq!(
45///     entries
46///         .iter()
47///         .map(|x| format!("{x:?}"))
48///         .collect::<Vec<_>>()
49///         .join("\n"),
50///     r#"("com.apple.quicktime.make", Text("Apple"))
51/// ("com.apple.quicktime.model", Text("iPhone X"))
52/// ("com.apple.quicktime.software", Text("12.1.2"))
53/// ("com.apple.quicktime.location.ISO6709", Text("+27.1281+100.2508+000.000/"))
54/// ("com.apple.quicktime.creationdate", Time(2019-02-12T15:27:12+08:00))
55/// ("duration", U32(500))
56/// ("width", U32(720))
57/// ("height", U32(1280))"#,
58/// );
59/// ```
60#[deprecated(since = "2.0.0")]
61#[tracing::instrument(skip_all)]
62#[allow(deprecated)]
63pub fn parse_metadata<R: Read + Seek>(reader: R) -> crate::Result<Vec<(String, EntryValue)>> {
64    let mut loader = BufLoader::<Seekable, _>::new(reader);
65    let ff = FileFormat::try_from_load(&mut loader)?;
66    match ff {
67        FileFormat::Jpeg | FileFormat::Heif => {
68            return Err(crate::error::Error::ParseFailed(
69                "can not parse metadata from an image".into(),
70            ));
71        }
72        FileFormat::QuickTime | FileFormat::MP4 => (),
73        FileFormat::Ebml => {
74            return Err(crate::error::Error::ParseFailed(
75                "please use MediaParser to parse *.webm, *.mkv files".into(),
76            ))
77        }
78    };
79
80    let moov_body = extract_moov_body(loader)?;
81
82    let (_, mut entries) = match parse_moov_body(&moov_body) {
83        Ok((remain, Some(entries))) => (remain, entries),
84        Ok((remain, None)) => (remain, Vec::new()),
85        Err(_) => {
86            return Err("invalid moov body".into());
87        }
88    };
89
90    let map: BTreeMap<TrackInfoTag, EntryValue> = convert_video_tags(entries.clone());
91    let mut extras = parse_mvhd_tkhd(&moov_body);
92
93    const CREATIONDATE_KEY: &str = "com.apple.quicktime.creationdate";
94    if map.contains_key(&TrackInfoTag::CreateDate) {
95        extras.remove(&TrackInfoTag::CreateDate);
96        let date = map.get(&TrackInfoTag::CreateDate);
97        if let Some(pos) = entries.iter().position(|x| x.0 == CREATIONDATE_KEY) {
98            if let Some(date) = date {
99                entries[pos] = (CREATIONDATE_KEY.to_string(), date.clone());
100            } else {
101                entries.remove(pos);
102            }
103        }
104    }
105
106    entries.extend(extras.into_iter().map(|(k, v)| match k {
107        TrackInfoTag::ImageWidth => ("width".to_string(), v),
108        TrackInfoTag::ImageHeight => ("height".to_string(), v),
109        TrackInfoTag::DurationMs => (
110            "duration".to_string(),
111            // For compatibility with older versions, convert to u32
112            EntryValue::U32(v.as_u64().unwrap() as u32),
113        ),
114        TrackInfoTag::CreateDate => (CREATIONDATE_KEY.to_string(), v),
115        _ => unreachable!(),
116    }));
117
118    if map.contains_key(&TrackInfoTag::GpsIso6709) {
119        const LOCATION_KEY: &str = "com.apple.quicktime.location.ISO6709";
120        if let Some(idx) = entries.iter().position(|(k, _)| k == "udta.©xyz") {
121            entries.remove(idx);
122            entries.push((
123                LOCATION_KEY.to_string(),
124                map.get(&TrackInfoTag::GpsIso6709).unwrap().to_owned(),
125            ));
126        }
127    }
128
129    Ok(entries)
130}
131
132#[tracing::instrument(skip_all)]
133pub(crate) fn parse_qt(
134    moov_body: &[u8],
135) -> Result<BTreeMap<TrackInfoTag, EntryValue>, ParsingError> {
136    let (_, entries) = match parse_moov_body(moov_body) {
137        Ok((remain, Some(entries))) => (remain, entries),
138        Ok((remain, None)) => (remain, Vec::new()),
139        Err(_) => {
140            return Err("invalid moov body".into());
141        }
142    };
143
144    let mut entries: BTreeMap<TrackInfoTag, EntryValue> = convert_video_tags(entries);
145    let extras = parse_mvhd_tkhd(moov_body);
146    if entries.contains_key(&TrackInfoTag::CreateDate) {
147        entries.remove(&TrackInfoTag::CreateDate);
148    }
149    entries.extend(extras);
150
151    Ok(entries)
152}
153
154#[tracing::instrument(skip_all)]
155pub(crate) fn parse_mp4(
156    moov_body: &[u8],
157) -> Result<BTreeMap<TrackInfoTag, EntryValue>, ParsingError> {
158    let (_, entries) = match parse_moov_body(moov_body) {
159        Ok((remain, Some(entries))) => (remain, entries),
160        Ok((remain, None)) => (remain, Vec::new()),
161        Err(_) => {
162            return Err("invalid moov body".into());
163        }
164    };
165
166    let mut entries: BTreeMap<TrackInfoTag, EntryValue> = convert_video_tags(entries);
167    let extras = parse_mvhd_tkhd(moov_body);
168    entries.extend(extras);
169
170    Ok(entries)
171}
172
173fn parse_mvhd_tkhd(moov_body: &[u8]) -> BTreeMap<TrackInfoTag, EntryValue> {
174    let mut entries = BTreeMap::new();
175    if let Ok((_, Some(bbox))) = find_box(moov_body, "mvhd") {
176        if let Ok((_, mvhd)) = MvhdBox::parse_box(bbox.data) {
177            entries.insert(TrackInfoTag::DurationMs, mvhd.duration_ms().into());
178
179            entries.insert(
180                TrackInfoTag::CreateDate,
181                EntryValue::Time(mvhd.creation_time()),
182            );
183        }
184    }
185
186    if let Ok(Some(tkhd)) = parse_video_tkhd_in_moov(moov_body) {
187        entries.insert(TrackInfoTag::ImageWidth, tkhd.width.into());
188        entries.insert(TrackInfoTag::ImageHeight, tkhd.height.into());
189    }
190
191    entries
192}
193
194fn convert_video_tags(entries: Vec<(String, EntryValue)>) -> BTreeMap<TrackInfoTag, EntryValue> {
195    entries
196        .into_iter()
197        .filter_map(|(k, v)| {
198            if k == "com.apple.quicktime.creationdate" {
199                v.as_str()
200                    .and_then(|s| DateTime::parse_from_str(s, "%+").ok())
201                    .map(|t| (TrackInfoTag::CreateDate, EntryValue::Time(t)))
202            } else if k == "com.apple.quicktime.make" {
203                Some((TrackInfoTag::Make, v))
204            } else if k == "com.apple.quicktime.model" {
205                Some((TrackInfoTag::Model, v))
206            } else if k == "com.apple.quicktime.software" {
207                Some((TrackInfoTag::Software, v))
208            } else if k == "com.apple.quicktime.author" {
209                Some((TrackInfoTag::Author, v))
210            } else if k == "com.apple.quicktime.location.ISO6709" {
211                Some((TrackInfoTag::GpsIso6709, v))
212            } else if k == "udta.©xyz" {
213                // For mp4 files, Android phones store GPS info in that box.
214                v.as_u8array()
215                    .and_then(parse_udta_gps)
216                    .map(|v| (TrackInfoTag::GpsIso6709, EntryValue::Text(v)))
217            } else if k == "udta.auth" {
218                v.as_u8array()
219                    .and_then(parse_udta_auth)
220                    .map(|v| (TrackInfoTag::Author, EntryValue::Text(v)))
221            } else if k.starts_with("udta.") {
222                let tag = TryInto::<TrackInfoTag>::try_into(k.as_str()).ok();
223                tag.map(|t| (t, v))
224            } else {
225                None
226            }
227        })
228        .collect()
229}
230
231/// Try to find GPS info from box `moov/udta/©xyz`. For mp4 files, Android
232/// phones store GPS info in that box.
233// fn parse_mp4_gps(moov_body: &[u8]) -> Option<String> {
234//     let bbox = match find_box(moov_body, "udta/©xyz") {
235//         Ok((_, b)) => b,
236//         Err(_) => None,
237//     };
238//     if let Some(bbox) = bbox {
239//         return parse_udta_gps(bbox.body_data());
240//     }
241//     None
242// }
243fn parse_udta_gps(data: &[u8]) -> Option<String> {
244    if data.len() <= 4 {
245        tracing::warn!("moov/udta/©xyz body is too small");
246        None
247    } else {
248        // The first 4 bytes is zero, skip them
249        let location = data[4..] // Safe-slice
250            .iter()
251            .map(|b| *b as char)
252            .collect::<String>();
253        Some(location)
254    }
255}
256
257const ISO_639_2_UND: [u8; 2] = [0x55, 0xc4];
258
259fn parse_udta_auth(data: &[u8]) -> Option<String> {
260    // Skip leading zero bytes
261    let data = filter_zero(data);
262
263    // Skip leading language flags.
264    // Refer to: https://exiftool.org/forum/index.php?topic=11498.0
265    if data.starts_with(&ISO_639_2_UND) {
266        String::from_utf8(data.into_iter().skip(2).collect()).ok()
267    } else {
268        String::from_utf8(data).ok()
269    }
270}
271
272/// *Deprecated*: Please use [`crate::MediaParser`] instead.
273///
274/// Analyze the byte stream in the `reader` as a MOV file, attempting to extract
275/// any possible metadata it may contain, and return it in the form of key-value
276/// pairs.
277///
278/// Please note that the parsing routine itself provides a buffer, so the
279/// `reader` may not need to be wrapped with `BufRead`.
280///
281/// # Usage
282///
283/// ```rust
284/// use nom_exif::*;
285///
286/// use std::fs::File;
287/// use std::path::Path;
288///
289/// let f = File::open(Path::new("./testdata/meta.mov")).unwrap();
290/// let entries = parse_mov_metadata(f).unwrap();
291///
292/// assert_eq!(
293///     entries
294///         .iter()
295///         .map(|x| format!("{x:?}"))
296///         .collect::<Vec<_>>()
297///         .join("\n"),
298///     r#"("com.apple.quicktime.make", Text("Apple"))
299/// ("com.apple.quicktime.model", Text("iPhone X"))
300/// ("com.apple.quicktime.software", Text("12.1.2"))
301/// ("com.apple.quicktime.location.ISO6709", Text("+27.1281+100.2508+000.000/"))
302/// ("com.apple.quicktime.creationdate", Time(2019-02-12T15:27:12+08:00))
303/// ("duration", U32(500))
304/// ("width", U32(720))
305/// ("height", U32(1280))"#,
306/// );
307/// ```
308#[deprecated(since = "2.0.0")]
309pub fn parse_mov_metadata<R: Read + Seek>(reader: R) -> crate::Result<Vec<(String, EntryValue)>> {
310    #[allow(deprecated)]
311    parse_metadata(reader)
312}
313
314#[tracing::instrument(skip_all)]
315fn extract_moov_body<L: Load>(mut loader: L) -> Result<PartialVec, crate::Error> {
316    let moov_body_range = loader.load_and_parse(extract_moov_body_from_buf)?;
317
318    tracing::debug!(?moov_body_range);
319    Ok(PartialVec::from_vec_range(
320        loader.into_vec(),
321        moov_body_range,
322    ))
323}
324
325/// Parse the byte data of an ISOBMFF file and return the potential body data of
326/// moov atom it may contain.
327///
328/// Regarding error handling, please refer to [Error] for more information.
329#[tracing::instrument(skip_all)]
330pub(crate) fn extract_moov_body_from_buf(input: &[u8]) -> Result<Range<usize>, ParsingError> {
331    // parse metadata from moov/meta/keys & moov/meta/ilst
332    let remain = input;
333
334    let convert_error = |e: nom::Err<_>, msg: &str| match e {
335        nom::Err::Incomplete(needed) => match needed {
336            nom::Needed::Unknown => ParsingError::Need(1),
337            nom::Needed::Size(n) => ParsingError::Need(n.get()),
338        },
339        nom::Err::Failure(_) | nom::Err::Error(_) => ParsingError::Failed(msg.to_string()),
340    };
341
342    let mut to_skip = 0;
343    let mut skipped = 0;
344    let (remain, header) = travel_header(remain, |h, remain| {
345        tracing::debug!(?h.box_type, ?h.box_size, "Got");
346        if h.box_type == "moov" {
347            // stop travelling
348            skipped += h.header_size;
349            false
350        } else if (remain.len() as u64) < h.body_size() {
351            // stop travelling & skip unused box data
352            to_skip = h.body_size() as usize - remain.len();
353            false
354        } else {
355            // body has been read, so just consume it
356            skipped += h.box_size as usize;
357            true
358        }
359    })
360    .map_err(|e| convert_error(e, "search atom moov failed"))?;
361
362    if to_skip > 0 {
363        return Err(ParsingError::ClearAndSkip(
364            to_skip
365                .checked_add(input.len())
366                .ok_or_else(|| ParsingError::Failed("to_skip is too big".into()))?,
367        ));
368    }
369
370    let size: usize = header.body_size().try_into().expect("must fit");
371    let (_, body) =
372        streaming::take(size)(remain).map_err(|e| convert_error(e, "moov is too small"))?;
373
374    Ok(skipped..skipped + body.len())
375}
376
377type EntriesResult<'a> = IResult<&'a [u8], Option<Vec<(String, EntryValue)>>>;
378
379#[tracing::instrument(skip(input))]
380fn parse_moov_body(input: &[u8]) -> EntriesResult<'_> {
381    tracing::debug!("parse_moov_body");
382
383    let mut entries = parse_meta(input).unwrap_or_default();
384
385    if let Ok((_, Some(udta))) = find_box(input, "udta") {
386        tracing::debug!("udta");
387        if let Ok(boxes) = to_boxes(udta.body_data()) {
388            for entry in boxes.iter() {
389                tracing::debug!(?entry, "udta entry");
390                entries.push((
391                    format!("udta.{}", entry.box_type()),
392                    EntryValue::U8Array(Vec::from(entry.body_data())),
393                ));
394            }
395        }
396    }
397
398    Ok((input, Some(entries)))
399}
400
401fn parse_meta(input: &[u8]) -> Option<Vec<(String, EntryValue)>> {
402    let (_, Some(meta)) = find_box(input, "meta").ok()? else {
403        return None;
404    };
405
406    let (_, Some(keys)) = find_box(meta.body_data(), "keys").ok()? else {
407        return None;
408    };
409
410    let (_, Some(ilst)) = find_box(meta.body_data(), "ilst").ok()? else {
411        return None;
412    };
413
414    let (_, keys) = KeysBox::parse_box(keys.data).ok()?;
415    let (_, ilst) = IlstBox::parse_box(ilst.data).ok()?;
416
417    let entries = keys
418        .entries
419        .into_iter()
420        .map(|k| k.key)
421        .zip(ilst.items.into_iter().map(|v| v.value))
422        .collect::<Vec<_>>();
423
424    Some(entries)
425}
426
427/// Change timezone format from iso 8601 to rfc3339, e.g.:
428///
429/// - `2023-11-02T19:58:34+08` -> `2023-11-02T19:58:34+08:00`
430/// - `2023-11-02T19:58:34+0800` -> `2023-11-02T19:58:34+08:00`
431#[allow(dead_code)]
432fn tz_iso_8601_to_rfc3339(s: String) -> String {
433    use regex::Regex;
434
435    let ss = s.trim();
436    // Safe unwrap
437    let re = Regex::new(r"([+-][0-9][0-9])([0-9][0-9])?$").unwrap();
438
439    if let Some((offset, tz)) = re.captures(ss).map(|caps| {
440        (
441            // Safe unwrap
442            caps.get(1).unwrap().start(),
443            format!(
444                "{}:{}",
445                caps.get(1).map_or("00", |m| m.as_str()),
446                caps.get(2).map_or("00", |m| m.as_str())
447            ),
448        )
449    }) {
450        let s1 = &ss.as_bytes()[..offset]; // Safe-slice
451        let s2 = tz.as_bytes();
452        s1.iter().chain(s2.iter()).map(|x| *x as char).collect()
453    } else {
454        s
455    }
456}
457
458#[cfg(test)]
459#[allow(deprecated)]
460mod tests {
461    use super::*;
462    use crate::testkit::*;
463    use test_case::test_case;
464
465    #[test_case("meta.mov")]
466    fn mov_parse(path: &str) {
467        let reader = open_sample(path).unwrap();
468        let entries = parse_metadata(reader).unwrap();
469        assert_eq!(
470            entries
471                .iter()
472                .map(|x| format!("{x:?}"))
473                .collect::<Vec<_>>()
474                .join("\n"),
475            "(\"com.apple.quicktime.make\", Text(\"Apple\"))
476(\"com.apple.quicktime.model\", Text(\"iPhone X\"))
477(\"com.apple.quicktime.software\", Text(\"12.1.2\"))
478(\"com.apple.quicktime.location.ISO6709\", Text(\"+27.1281+100.2508+000.000/\"))
479(\"com.apple.quicktime.creationdate\", Time(2019-02-12T15:27:12+08:00))
480(\"duration\", U32(500))
481(\"width\", U32(720))
482(\"height\", U32(1280))"
483        );
484    }
485
486    #[test_case("meta.mov")]
487    fn mov_extract_mov(path: &str) {
488        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
489
490        let buf = read_sample(path).unwrap();
491        tracing::info!(bytes = buf.len(), "File size.");
492        let range = extract_moov_body_from_buf(&buf).unwrap();
493        let (_, entries) = parse_moov_body(&buf[range]).unwrap();
494        assert_eq!(
495            entries
496                .unwrap()
497                .iter()
498                .map(|x| format!("{x:?}"))
499                .collect::<Vec<_>>()
500                .join("\n"),
501            "(\"com.apple.quicktime.make\", Text(\"Apple\"))
502(\"com.apple.quicktime.model\", Text(\"iPhone X\"))
503(\"com.apple.quicktime.software\", Text(\"12.1.2\"))
504(\"com.apple.quicktime.location.ISO6709\", Text(\"+27.1281+100.2508+000.000/\"))
505(\"com.apple.quicktime.creationdate\", Text(\"2019-02-12T15:27:12+08:00\"))"
506        );
507    }
508
509    #[test_case("meta.mp4")]
510    fn parse_mp4(path: &str) {
511        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
512
513        let entries = parse_metadata(open_sample(path).unwrap()).unwrap();
514        assert_eq!(
515            entries
516                .iter()
517                .map(|x| format!("{x:?}"))
518                .collect::<Vec<_>>()
519                .join("\n"),
520            "(\"com.apple.quicktime.creationdate\", Time(2024-02-03T07:05:38+00:00))
521(\"duration\", U32(1063))
522(\"width\", U32(1920))
523(\"height\", U32(1080))
524(\"com.apple.quicktime.location.ISO6709\", Text(\"+27.2939+112.6932/\"))"
525        );
526    }
527
528    #[test_case("embedded-in-heic.mov")]
529    fn parse_embedded_mov(path: &str) {
530        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
531
532        let entries = parse_mov_metadata(open_sample(path).unwrap()).unwrap();
533        assert_eq!(
534            entries
535                .iter()
536                .map(|x| format!("{x:?}"))
537                .collect::<Vec<_>>()
538                .join("\n"),
539            "(\"com.apple.quicktime.location.accuracy.horizontal\", Text(\"14.235563\"))
540(\"com.apple.quicktime.live-photo.auto\", U8(1))
541(\"com.apple.quicktime.content.identifier\", Text(\"DA1A7EE8-0925-4C9F-9266-DDA3F0BB80F0\"))
542(\"com.apple.quicktime.live-photo.vitality-score\", F32(0.93884003))
543(\"com.apple.quicktime.live-photo.vitality-scoring-version\", I64(4))
544(\"com.apple.quicktime.location.ISO6709\", Text(\"+22.5797+113.9380+028.396/\"))
545(\"com.apple.quicktime.make\", Text(\"Apple\"))
546(\"com.apple.quicktime.model\", Text(\"iPhone 15 Pro\"))
547(\"com.apple.quicktime.software\", Text(\"17.1\"))
548(\"com.apple.quicktime.creationdate\", Time(2023-11-02T19:58:34+08:00))
549(\"duration\", U32(2795))
550(\"width\", U32(1920))
551(\"height\", U32(1440))"
552        );
553    }
554
555    #[test]
556    fn test_iso_8601_tz_to_rfc3339() {
557        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
558
559        let s = "2023-11-02T19:58:34+08".to_string();
560        assert_eq!(tz_iso_8601_to_rfc3339(s), "2023-11-02T19:58:34+08:00");
561
562        let s = "2023-11-02T19:58:34+0800".to_string();
563        assert_eq!(tz_iso_8601_to_rfc3339(s), "2023-11-02T19:58:34+08:00");
564
565        let s = "2023-11-02T19:58:34+08:00".to_string();
566        assert_eq!(tz_iso_8601_to_rfc3339(s), "2023-11-02T19:58:34+08:00");
567
568        let s = "2023-11-02T19:58:34Z".to_string();
569        assert_eq!(tz_iso_8601_to_rfc3339(s), "2023-11-02T19:58:34Z");
570
571        let s = "2023-11-02T19:58:34".to_string();
572        assert_eq!(tz_iso_8601_to_rfc3339(s), "2023-11-02T19:58:34");
573    }
574}