Skip to main content

nom_exif/
bbox.rs

1use std::fmt::Debug;
2
3use nom::{
4    bytes::streaming,
5    combinator::{fail, map_res},
6    error::context,
7    number, AsChar, IResult, Needed,
8};
9
10pub mod cr3_moov;
11mod idat;
12mod iinf;
13mod iloc;
14mod ilst;
15mod keys;
16mod meta;
17mod mvhd;
18mod tkhd;
19mod uuid;
20pub use cr3_moov::Cr3MoovBox;
21pub use ilst::IlstBox;
22pub use keys::KeysBox;
23pub use meta::MetaBox;
24pub use mvhd::MvhdBox;
25pub use tkhd::parse_video_tkhd_in_moov;
26
27const MAX_BODY_LEN: usize = 2000 * 1024 * 1024;
28
29/// Representing an ISO base media file format box header.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct BoxHeader {
32    pub box_size: u64,
33    pub box_type: String,
34    pub header_size: usize, // include size, type
35}
36
37impl BoxHeader {
38    pub fn parse<'a>(input: &'a [u8]) -> IResult<&'a [u8], BoxHeader> {
39        let (remain, size) = number::streaming::be_u32(input)?;
40
41        let (remain, box_type) = map_res(streaming::take(4_usize), |res: &'a [u8]| {
42            // String::from_utf8 will fail on "©xyz"
43            Ok::<String, ()>(res.iter().map(|b| b.as_char()).collect::<String>())
44            // String::from_utf8(res.to_vec()).map_err(|error| {
45            //     tracing::error!(?error, ?res, "Failed to construct string");
46            //     error
47            // })
48        })(remain)?;
49
50        let (remain, box_size) = if size == 1 {
51            number::streaming::be_u64(remain)?
52        } else if size < 8 {
53            context("invalid box header: box_size is too small", fail)(remain)?
54        } else {
55            (remain, size as u64)
56        };
57
58        let header_size = input.len() - remain.len();
59        assert!(header_size == 8 || header_size == 16);
60
61        if box_size < header_size as u64 {
62            return fail(remain);
63        }
64
65        Ok((
66            remain,
67            BoxHeader {
68                box_size,
69                box_type,
70                header_size,
71            },
72        ))
73    }
74
75    pub fn body_size(&self) -> u64 {
76        self.box_size - self.header_size as u64
77    }
78}
79
80/// Representing an ISO base media file format full box header.
81#[derive(Debug, Clone, PartialEq, Eq)]
82pub struct FullBoxHeader {
83    pub box_size: u64,
84    pub box_type: String,
85    pub header_size: usize, // include size, type, version, flags
86
87    version: u8, // 8 bits
88    flags: u32,  // 24 bits
89}
90
91impl FullBoxHeader {
92    fn parse(input: &[u8]) -> IResult<&[u8], FullBoxHeader> {
93        let (remain, header) = BoxHeader::parse(input)?;
94
95        let (remain, version) = number::streaming::u8(remain)?;
96        let (remain, flags) = number::streaming::be_u24(remain)?;
97
98        let header_size = input.len() - remain.len();
99        assert!(header_size == 12 || header_size == 20);
100
101        if header.box_size < header_size as u64 {
102            return fail(remain);
103        }
104
105        Ok((
106            remain,
107            FullBoxHeader {
108                box_type: header.box_type,
109                box_size: header.box_size,
110                header_size,
111                version,
112                flags,
113            },
114        ))
115    }
116
117    pub fn body_size(&self) -> u64 {
118        self.box_size - self.header_size as u64
119    }
120}
121
122/// Representing a generic ISO base media file format box.
123#[derive(Clone, PartialEq, Eq)]
124pub struct BoxHolder<'a> {
125    pub header: BoxHeader,
126    // Including header
127    pub data: &'a [u8],
128}
129
130impl Debug for BoxHolder<'_> {
131    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
132        f.debug_struct("BoxHolder")
133            .field("header", &self.header)
134            .field("body_size", &self.body_data().len())
135            .field(
136                "data",
137                &(self
138                    .body_data()
139                    .iter()
140                    .take(64)
141                    .map(|x| x.as_char())
142                    .collect::<String>()),
143            )
144            .finish()
145    }
146}
147
148impl<'a> BoxHolder<'a> {
149    #[tracing::instrument(skip_all)]
150    pub fn parse(input: &'a [u8]) -> IResult<&'a [u8], BoxHolder<'a>> {
151        let (_, header) = BoxHeader::parse(input)?;
152        tracing::debug!(box_type = header.box_type, ?header, "Got");
153
154        let box_size = usize::try_from(header.box_size)
155            .expect("header box size should always fit into a `usize`.");
156
157        let (remain, data) = streaming::take(box_size)(input)?;
158
159        Ok((remain, BoxHolder { header, data }))
160    }
161
162    #[allow(unused)]
163    pub fn box_size(&self) -> u64 {
164        self.header.box_size
165    }
166
167    pub fn box_type(&self) -> &str {
168        &self.header.box_type
169    }
170
171    pub fn header_size(&self) -> usize {
172        self.header.header_size
173    }
174
175    pub fn body_data(&self) -> &'a [u8] {
176        &self.data[self.header_size()..] // Safe-slice
177    }
178}
179
180type BoxResult<'a> = IResult<&'a [u8], Option<BoxHolder<'a>>>;
181
182pub fn to_boxes(input: &[u8]) -> crate::Result<Vec<BoxHolder<'_>>> {
183    let mut res = Vec::new();
184    let mut remain = input;
185    loop {
186        if remain.is_empty() {
187            break;
188        }
189
190        let (rem, bbox) = BoxHolder::parse(remain)?;
191        res.push(bbox);
192        // Sanity check, to avoid infinite loops caused by unexpected errors.
193        assert!(rem.len() < remain.len());
194        remain = rem;
195    }
196    Ok(res)
197}
198
199/// Parses every top level box while `predicate` returns true, then returns the
200/// last parsed box.
201pub fn travel_while<'a, F>(input: &'a [u8], mut predicate: F) -> BoxResult<'a>
202where
203    F: FnMut(&BoxHolder<'a>) -> bool,
204{
205    let mut remain = input;
206    loop {
207        if remain.is_empty() {
208            return Ok((remain, None));
209        }
210
211        let (rem, bbox) = BoxHolder::parse(remain)?;
212        // Sanity check, to avoid infinite loops caused by unexpected errors.
213        assert!(rem.len() < remain.len());
214        remain = rem;
215
216        if !predicate(&bbox) {
217            return Ok((remain, Some(bbox)));
218        }
219    }
220}
221
222pub fn travel_header<'a, F>(input: &'a [u8], mut predicate: F) -> IResult<&'a [u8], BoxHeader>
223where
224    F: FnMut(&BoxHeader, &'a [u8]) -> bool,
225{
226    let mut remain = input;
227    loop {
228        let (rem, header) = BoxHeader::parse(remain)?;
229        // Sanity check, to avoid infinite loops caused by unexpected errors.
230        assert!(rem.len() < remain.len());
231        remain = rem;
232
233        if !predicate(&header, rem) {
234            break Ok((rem, header));
235        }
236
237        if remain.len() < header.body_size() as usize {
238            return Err(nom::Err::Incomplete(Needed::new(
239                header.body_size() as usize - remain.len(),
240            )));
241        }
242
243        // skip box body
244        remain = &remain[header.body_size() as usize..]; // Safe-slice
245    }
246}
247
248#[allow(unused)]
249/// Find a box by atom `path`, which is separated by '/', e.g.: "meta/iloc".
250pub fn find_box<'a>(input: &'a [u8], path: &str) -> IResult<&'a [u8], Option<BoxHolder<'a>>> {
251    if path.is_empty() {
252        return Ok((input, None));
253    }
254
255    let mut bbox = None;
256    let mut remain = input;
257    let mut data = input;
258
259    for box_type in path.split('/').filter(|x| !x.is_empty()) {
260        assert!(!box_type.is_empty());
261
262        let (rem, b) = find_box_by_type(data, box_type)?;
263        let Some(b) = b else {
264            return Ok((rem, None));
265        };
266
267        data = b.body_data();
268        (remain, bbox) = (rem, Some(b));
269    }
270
271    Ok((remain, bbox))
272}
273
274fn find_box_by_type<'a>(
275    input: &'a [u8],
276    box_type: &str,
277) -> IResult<&'a [u8], Option<BoxHolder<'a>>> {
278    let mut remain = input;
279    loop {
280        if remain.is_empty() {
281            return Ok((remain, None));
282        }
283
284        let (rem, bbox) = BoxHolder::parse(remain)?;
285        // Sanity check, to avoid infinite loops caused by unexpected errors.
286        assert!(rem.len() < remain.len());
287        remain = rem;
288
289        if bbox.box_type() == box_type {
290            return Ok((rem, Some(bbox)));
291        }
292    }
293}
294
295trait ParseBody<O> {
296    fn parse_body(body: &[u8], header: FullBoxHeader) -> IResult<&[u8], O>;
297}
298
299pub trait ParseBox<O> {
300    fn parse_box(input: &[u8]) -> IResult<&[u8], O>;
301}
302
303/// auto implements parse_box for each Box which implements ParseBody
304impl<O, T: ParseBody<O>> ParseBox<O> for T {
305    #[tracing::instrument(skip_all)]
306    fn parse_box(input: &[u8]) -> IResult<&[u8], O> {
307        let (remain, header) = FullBoxHeader::parse(input)?;
308        assert_eq!(input.len(), header.header_size + remain.len());
309        assert!(
310            header.box_size >= header.header_size as u64,
311            "box_size = {}, header_size = {}",
312            header.box_size,
313            header.header_size
314        );
315
316        // limit parsing size
317        let box_size = header.body_size() as usize;
318        if box_size > MAX_BODY_LEN {
319            tracing::error!(?header.box_type, ?box_size, "Box is too big");
320            return fail(remain);
321        }
322        let (remain, data) = streaming::take(box_size)(remain)?;
323        assert_eq!(input.len(), header.header_size + data.len() + remain.len());
324
325        let (rem, bbox) = Self::parse_body(data, header)?;
326
327        if !rem.is_empty() {
328            // TODO: Body data is not exhausted, should report this error with
329            // tracing
330        }
331
332        Ok((remain, bbox))
333    }
334}
335
336#[cfg(test)]
337mod tests {
338    use crate::testkit::read_sample;
339
340    use super::*;
341    use nom::error::make_error;
342    use test_case::test_case;
343
344    #[test_case("exif.heic")]
345    fn travel_heic(path: &str) {
346        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
347
348        let buf = read_sample(path).unwrap();
349        let mut boxes = Vec::new();
350
351        let (remain, bbox) = travel_while(&buf, |bbox| {
352            tracing::info!(bbox.header.box_type, "Got");
353            boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
354            bbox.box_type() != "mdat"
355        })
356        .unwrap();
357        let bbox = bbox.unwrap();
358
359        assert_eq!(bbox.header.box_type, "mdat");
360        assert_eq!(remain, b"");
361
362        let (types, _): (Vec<_>, Vec<_>) = boxes.iter().cloned().unzip();
363
364        // top level boxes
365        assert_eq!(types, ["ftyp", "meta", "mdat"],);
366
367        let (_, meta) = boxes.remove(1);
368        assert_eq!(meta.box_type(), "meta");
369
370        let mut boxes = Vec::new();
371        let (remain, bbox) = travel_while(
372            &meta.body_data()[4..], // Safe-slice in test_case
373            |bbox| {
374                tracing::info!(bbox.header.box_type, "Got");
375                boxes.push(bbox.header.box_type.to_owned());
376                bbox.box_type() != "iloc"
377            },
378        )
379        .unwrap();
380        let bbox = bbox.unwrap();
381        assert_eq!(bbox.box_type(), "iloc");
382        assert_eq!(remain, b"");
383
384        // sub-boxes in meta
385        assert_eq!(
386            boxes,
387            ["hdlr", "dinf", "pitm", "iinf", "iref", "iprp", "idat", "iloc"],
388        );
389    }
390
391    #[test_case("meta.mov")]
392    fn travel_mov(path: &str) {
393        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
394
395        let buf = read_sample(path).unwrap();
396        let mut boxes = Vec::new();
397
398        let (remain, bbox) = travel_while(&buf, |bbox| {
399            tracing::info!(bbox.header.box_type, "Got");
400            boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
401            bbox.box_type() != "moov"
402        })
403        .unwrap();
404        let bbox = bbox.unwrap();
405
406        assert_eq!(bbox.header.box_type, "moov");
407        assert_eq!(remain, b"");
408
409        let (types, _): (Vec<_>, Vec<_>) = boxes.iter().cloned().unzip();
410
411        // top level boxes
412        assert_eq!(types, ["ftyp", "wide", "mdat", "moov"],);
413
414        let (_, moov) = boxes.pop().unwrap();
415        assert_eq!(moov.box_type(), "moov");
416
417        let mut boxes = Vec::new();
418        let (remain, bbox) = travel_while(moov.body_data(), |bbox| {
419            tracing::info!(bbox.header.box_type, "Got");
420            boxes.push(bbox.header.box_type.to_owned());
421            bbox.box_type() != "meta"
422        })
423        .unwrap();
424        let bbox = bbox.unwrap();
425
426        assert_eq!(bbox.box_type(), "meta");
427        assert_eq!(remain, b"");
428
429        // sub-boxes in moov
430        assert_eq!(boxes, ["mvhd", "trak", "trak", "trak", "trak", "meta"],);
431
432        let meta = bbox;
433        let mut boxes = Vec::new();
434        let (remain, _) = travel_while(meta.body_data(), |bbox| {
435            tracing::info!(bbox.header.box_type, "Got");
436            boxes.push(bbox.header.box_type.to_owned());
437            bbox.box_type() != "ilst"
438        })
439        .unwrap();
440        assert_eq!(remain, b"");
441
442        // sub-boxes in meta
443        assert_eq!(boxes, ["hdlr", "keys", "ilst"],);
444    }
445
446    #[test_case("meta.mp4")]
447    fn travel_mp4(path: &str) {
448        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
449
450        let buf = read_sample(path).unwrap();
451        let mut boxes = Vec::new();
452
453        let (remain, bbox) = travel_while(&buf, |bbox| {
454            tracing::info!(bbox.header.box_type, "Got");
455            boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
456            bbox.box_type() != "moov"
457        })
458        .unwrap();
459        let bbox = bbox.unwrap();
460
461        assert_eq!(bbox.header.box_type, "moov");
462        assert_eq!(remain, b"");
463
464        let (types, _): (Vec<_>, Vec<_>) = boxes.iter().cloned().unzip();
465
466        // top level boxes
467        assert_eq!(types, ["ftyp", "mdat", "moov"],);
468
469        let (_, moov) = boxes.pop().unwrap();
470        assert_eq!(moov.box_type(), "moov");
471
472        let mut boxes = Vec::new();
473        let (remain, bbox) = travel_while(moov.body_data(), |bbox| {
474            tracing::info!(bbox.header.box_type, "Got");
475            boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
476            bbox.box_type() != "udta"
477        })
478        .unwrap();
479        let bbox = bbox.unwrap();
480
481        assert_eq!(bbox.box_type(), "udta");
482        assert_eq!(remain, b"");
483
484        // sub-boxes in moov
485        assert_eq!(
486            boxes.iter().map(|x| x.0.to_owned()).collect::<Vec<_>>(),
487            ["mvhd", "trak", "trak", "udta"],
488        );
489
490        let (_, trak) = boxes.iter().find(|x| x.0 == "trak").unwrap();
491
492        let meta = bbox;
493        let mut boxes = Vec::new();
494        let (remain, _) = travel_while(meta.body_data(), |bbox| {
495            tracing::info!(bbox.header.box_type, "Got");
496            boxes.push(bbox.header.box_type.to_owned());
497            bbox.box_type() != "©xyz"
498        })
499        .unwrap();
500        assert_eq!(remain, b"");
501
502        // sub-boxes in udta
503        assert_eq!(boxes, ["©xyz"],);
504
505        let mut boxes = Vec::new();
506        let (remain, bbox) = travel_while(trak.body_data(), |bbox| {
507            tracing::info!(bbox.header.box_type, "Got");
508            boxes.push(bbox.header.box_type.to_owned());
509            bbox.box_type() != "mdia"
510        })
511        .unwrap();
512        assert_eq!(remain, b"");
513
514        // sub-boxes in trak
515        assert_eq!(boxes, ["tkhd", "edts", "mdia"],);
516
517        let mdia = bbox.unwrap();
518        let mut boxes = Vec::new();
519        let (remain, _) = travel_while(mdia.body_data(), |bbox| {
520            tracing::info!(bbox.header.box_type, "Got");
521            boxes.push(bbox.header.box_type.to_owned());
522            bbox.box_type() != "minf"
523        })
524        .unwrap();
525        assert_eq!(remain, b"");
526
527        // sub-boxes in mdia
528        assert_eq!(boxes, ["mdhd", "hdlr", "minf"],);
529    }
530
531    // For mp4 files, Android phones store GPS info in the `moov/udta/©xyz`
532    // atom.
533    #[test_case("meta.mp4")]
534    fn find_android_gps_box(path: &str) {
535        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
536
537        let buf = read_sample(path).unwrap();
538        let (_, bbox) = find_box(&buf, "moov/udta/©xyz").unwrap();
539        let bbox = bbox.unwrap();
540        tracing::info!(?bbox.header, "bbox");
541
542        // gps info
543        assert_eq!(
544            "+27.2939+112.6932/",
545            std::str::from_utf8(&bbox.body_data()[4..]).unwrap() // Safe-slice in test_case
546        );
547    }
548
549    #[test]
550    fn box_header() {
551        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
552
553        let data = [
554            0x00, 0x00, 0x01, 0xdd, 0x6d, 0x65, 0x74, 0x61, 0x02, 0x04, 0x04, 0x00,
555        ];
556        let (remain, header) = FullBoxHeader::parse(&data).unwrap();
557        assert_eq!(header.box_type, "meta");
558        assert_eq!(header.box_size, 0x01dd);
559        assert_eq!(header.version, 0x2);
560        assert_eq!(header.flags, 0x40400,);
561        assert_eq!(header.header_size, 12);
562        assert_eq!(remain, b"");
563
564        let data = [
565            0x00, 0x00, 0x00, 0x01, 0x6d, 0x64, 0x61, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e,
566            0xfa, 0x74, 0x01, 0x04, 0x04, 0x00,
567        ];
568        let (remain, header) = FullBoxHeader::parse(&data).unwrap();
569        assert_eq!(header.box_type, "mdat");
570        assert_eq!(header.box_size, 0xefa74);
571        assert_eq!(header.version, 0x1);
572        assert_eq!(header.flags, 0x40400,);
573        assert_eq!(header.header_size, 20);
574        assert_eq!(remain, b"");
575
576        let data = [0x00, 0x00, 0x01, 0xdd, 0x6d, 0x65, 0x74];
577        let err = BoxHeader::parse(&data).unwrap_err();
578        assert!(err.is_incomplete());
579
580        let data = [0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00];
581        let err = BoxHeader::parse(&data).unwrap_err();
582        assert_eq!(
583            err,
584            nom::Err::Error(make_error(&[] as &[u8], nom::error::ErrorKind::Fail))
585        );
586    }
587}