nom_exif/
bbox.rs

1use std::fmt::{Debug, Display};
2
3use nom::{
4    bytes::streaming,
5    combinator::{fail, map_res},
6    error::context,
7    number, AsChar, IResult, Needed,
8};
9
10mod idat;
11mod iinf;
12mod iloc;
13mod ilst;
14mod keys;
15mod meta;
16mod mvhd;
17mod tkhd;
18pub use ilst::IlstBox;
19pub use keys::KeysBox;
20pub use meta::MetaBox;
21pub use mvhd::MvhdBox;
22pub use tkhd::parse_video_tkhd_in_moov;
23
24const MAX_BODY_LEN: usize = 2000 * 1024 * 1024;
25
26#[derive(Debug, PartialEq)]
27pub enum Error {
28    UnsupportedConstructionMethod(u8),
29}
30
31impl std::error::Error for Error {}
32
33impl Display for Error {
34    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35        match self {
36            Error::UnsupportedConstructionMethod(x) => {
37                Debug::fmt(&format!("unsupported construction method ({x})"), f)
38            }
39        }
40    }
41}
42
43/// Representing an ISO base media file format box header.
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub struct BoxHeader {
46    pub box_size: u64,
47    pub box_type: String,
48    pub header_size: usize, // include size, type
49}
50
51impl BoxHeader {
52    pub fn parse<'a>(input: &'a [u8]) -> IResult<&'a [u8], BoxHeader> {
53        let (remain, size) = number::streaming::be_u32(input)?;
54
55        let (remain, box_type) = map_res(streaming::take(4_usize), |res: &'a [u8]| {
56            // String::from_utf8 will fail on "©xyz"
57            Ok::<String, ()>(res.iter().map(|b| b.as_char()).collect::<String>())
58            // String::from_utf8(res.to_vec()).map_err(|error| {
59            //     tracing::error!(?error, ?res, "Failed to construct string");
60            //     error
61            // })
62        })(remain)?;
63
64        let (remain, box_size) = if size == 1 {
65            number::streaming::be_u64(remain)?
66        } else if size < 8 {
67            context("invalid box header: box_size is too small", fail)(remain)?
68        } else {
69            (remain, size as u64)
70        };
71
72        let header_size = input.len() - remain.len();
73        assert!(header_size == 8 || header_size == 16);
74
75        if box_size < header_size as u64 {
76            return fail(remain);
77        }
78
79        Ok((
80            remain,
81            BoxHeader {
82                box_size,
83                box_type,
84                header_size,
85            },
86        ))
87    }
88
89    pub fn body_size(&self) -> u64 {
90        self.box_size - self.header_size as u64
91    }
92}
93
94/// Representing an ISO base media file format full box header.
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub struct FullBoxHeader {
97    pub box_size: u64,
98    pub box_type: String,
99    pub header_size: usize, // include size, type, version, flags
100
101    version: u8, // 8 bits
102    flags: u32,  // 24 bits
103}
104
105impl FullBoxHeader {
106    fn parse(input: &[u8]) -> IResult<&[u8], FullBoxHeader> {
107        let (remain, header) = BoxHeader::parse(input)?;
108
109        let (remain, version) = number::streaming::u8(remain)?;
110        let (remain, flags) = number::streaming::be_u24(remain)?;
111
112        let header_size = input.len() - remain.len();
113        assert!(header_size == 12 || header_size == 20);
114
115        if header.box_size < header_size as u64 {
116            return fail(remain);
117        }
118
119        Ok((
120            remain,
121            FullBoxHeader {
122                box_type: header.box_type,
123                box_size: header.box_size,
124                header_size,
125                version,
126                flags,
127            },
128        ))
129    }
130
131    pub fn body_size(&self) -> u64 {
132        self.box_size - self.header_size as u64
133    }
134}
135
136/// Representing a generic ISO base media file format box.
137#[derive(Clone, PartialEq, Eq)]
138pub struct BoxHolder<'a> {
139    pub header: BoxHeader,
140    // Including header
141    pub data: &'a [u8],
142}
143
144impl Debug for BoxHolder<'_> {
145    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
146        f.debug_struct("BoxHolder")
147            .field("header", &self.header)
148            .field("body_size", &self.body_data().len())
149            .field(
150                "data",
151                &(self
152                    .body_data()
153                    .iter()
154                    .take(64)
155                    .map(|x| x.as_char())
156                    .collect::<String>()),
157            )
158            .finish()
159    }
160}
161
162impl<'a> BoxHolder<'a> {
163    #[tracing::instrument(skip_all)]
164    pub fn parse(input: &'a [u8]) -> IResult<&'a [u8], BoxHolder<'a>> {
165        let (_, header) = BoxHeader::parse(input)?;
166        tracing::debug!(box_type = header.box_type, ?header, "Got");
167
168        let box_size = usize::try_from(header.box_size)
169            .expect("header box size should always fit into a `usize`.");
170
171        let (remain, data) = streaming::take(box_size)(input)?;
172
173        Ok((remain, BoxHolder { header, data }))
174    }
175
176    #[allow(unused)]
177    pub fn box_size(&self) -> u64 {
178        self.header.box_size
179    }
180
181    pub fn box_type(&self) -> &str {
182        &self.header.box_type
183    }
184
185    pub fn header_size(&self) -> usize {
186        self.header.header_size
187    }
188
189    pub fn body_data(&self) -> &'a [u8] {
190        &self.data[self.header_size()..] // Safe-slice
191    }
192}
193
194type BoxResult<'a> = IResult<&'a [u8], Option<BoxHolder<'a>>>;
195
196pub fn to_boxes(input: &[u8]) -> crate::Result<Vec<BoxHolder<'_>>> {
197    let mut res = Vec::new();
198    let mut remain = input;
199    loop {
200        if remain.is_empty() {
201            break;
202        }
203
204        let (rem, bbox) = BoxHolder::parse(remain)?;
205        res.push(bbox);
206        // Sanity check, to avoid infinite loops caused by unexpected errors.
207        assert!(rem.len() < remain.len());
208        remain = rem;
209    }
210    Ok(res)
211}
212
213/// Parses every top level box while `predicate` returns true, then returns the
214/// last parsed box.
215pub fn travel_while<'a, F>(input: &'a [u8], mut predicate: F) -> BoxResult<'a>
216where
217    F: FnMut(&BoxHolder<'a>) -> bool,
218{
219    let mut remain = input;
220    loop {
221        if remain.is_empty() {
222            return Ok((remain, None));
223        }
224
225        let (rem, bbox) = BoxHolder::parse(remain)?;
226        // Sanity check, to avoid infinite loops caused by unexpected errors.
227        assert!(rem.len() < remain.len());
228        remain = rem;
229
230        if !predicate(&bbox) {
231            return Ok((remain, Some(bbox)));
232        }
233    }
234}
235
236pub fn travel_header<'a, F>(input: &'a [u8], mut predicate: F) -> IResult<&'a [u8], BoxHeader>
237where
238    F: FnMut(&BoxHeader, &'a [u8]) -> bool,
239{
240    let mut remain = input;
241    loop {
242        let (rem, header) = BoxHeader::parse(remain)?;
243        // Sanity check, to avoid infinite loops caused by unexpected errors.
244        assert!(rem.len() < remain.len());
245        remain = rem;
246
247        if !predicate(&header, rem) {
248            break Ok((rem, header));
249        }
250
251        if remain.len() < header.body_size() as usize {
252            return Err(nom::Err::Incomplete(Needed::new(
253                header.body_size() as usize - remain.len(),
254            )));
255        }
256
257        // skip box body
258        remain = &remain[header.body_size() as usize..]; // Safe-slice
259    }
260}
261
262#[allow(unused)]
263/// Find a box by atom `path`, which is separated by '/', e.g.: "meta/iloc".
264pub fn find_box<'a>(input: &'a [u8], path: &str) -> IResult<&'a [u8], Option<BoxHolder<'a>>> {
265    if path.is_empty() {
266        return Ok((input, None));
267    }
268
269    let mut bbox = None;
270    let mut remain = input;
271    let mut data = input;
272
273    for box_type in path.split('/').filter(|x| !x.is_empty()) {
274        assert!(!box_type.is_empty());
275
276        let (rem, b) = find_box_by_type(data, box_type)?;
277        let Some(b) = b else {
278            return Ok((rem, None));
279        };
280
281        data = b.body_data();
282        (remain, bbox) = (rem, Some(b));
283    }
284
285    Ok((remain, bbox))
286}
287
288fn find_box_by_type<'a>(
289    input: &'a [u8],
290    box_type: &str,
291) -> IResult<&'a [u8], Option<BoxHolder<'a>>> {
292    let mut remain = input;
293    loop {
294        if remain.is_empty() {
295            return Ok((remain, None));
296        }
297
298        let (rem, bbox) = BoxHolder::parse(remain)?;
299        // Sanity check, to avoid infinite loops caused by unexpected errors.
300        assert!(rem.len() < remain.len());
301        remain = rem;
302
303        if bbox.box_type() == box_type {
304            return Ok((rem, Some(bbox)));
305        }
306    }
307}
308
309trait ParseBody<O> {
310    fn parse_body(body: &[u8], header: FullBoxHeader) -> IResult<&[u8], O>;
311}
312
313pub trait ParseBox<O> {
314    fn parse_box(input: &[u8]) -> IResult<&[u8], O>;
315}
316
317/// auto implements parse_box for each Box which implements ParseBody
318impl<O, T: ParseBody<O>> ParseBox<O> for T {
319    #[tracing::instrument(skip_all)]
320    fn parse_box(input: &[u8]) -> IResult<&[u8], O> {
321        let (remain, header) = FullBoxHeader::parse(input)?;
322        assert_eq!(input.len(), header.header_size + remain.len());
323        assert!(
324            header.box_size >= header.header_size as u64,
325            "box_size = {}, header_size = {}",
326            header.box_size,
327            header.header_size
328        );
329
330        // limit parsing size
331        let box_size = header.body_size() as usize;
332        if box_size > MAX_BODY_LEN {
333            tracing::error!(?header.box_type, ?box_size, "Box is too big");
334            return fail(remain);
335        }
336        let (remain, data) = streaming::take(box_size)(remain)?;
337        assert_eq!(input.len(), header.header_size + data.len() + remain.len());
338
339        let (rem, bbox) = Self::parse_body(data, header)?;
340
341        if !rem.is_empty() {
342            // TODO: Body data is not exhausted, should report this error with
343            // tracing
344        }
345
346        Ok((remain, bbox))
347    }
348}
349
350#[cfg(test)]
351mod tests {
352    use crate::testkit::read_sample;
353
354    use super::*;
355    use nom::error::make_error;
356    use test_case::test_case;
357
358    #[test_case("exif.heic")]
359    fn travel_heic(path: &str) {
360        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
361
362        let buf = read_sample(path).unwrap();
363        let mut boxes = Vec::new();
364
365        let (remain, bbox) = travel_while(&buf, |bbox| {
366            tracing::info!(bbox.header.box_type, "Got");
367            boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
368            bbox.box_type() != "mdat"
369        })
370        .unwrap();
371        let bbox = bbox.unwrap();
372
373        assert_eq!(bbox.header.box_type, "mdat");
374        assert_eq!(remain, b"");
375
376        let (types, _): (Vec<_>, Vec<_>) = boxes.iter().cloned().unzip();
377
378        // top level boxes
379        assert_eq!(types, ["ftyp", "meta", "mdat"],);
380
381        let (_, meta) = boxes.remove(1);
382        assert_eq!(meta.box_type(), "meta");
383
384        let mut boxes = Vec::new();
385        let (remain, bbox) = travel_while(
386            &meta.body_data()[4..], // Safe-slice in test_case
387            |bbox| {
388                tracing::info!(bbox.header.box_type, "Got");
389                boxes.push(bbox.header.box_type.to_owned());
390                bbox.box_type() != "iloc"
391            },
392        )
393        .unwrap();
394        let bbox = bbox.unwrap();
395        assert_eq!(bbox.box_type(), "iloc");
396        assert_eq!(remain, b"");
397
398        // sub-boxes in meta
399        assert_eq!(
400            boxes,
401            ["hdlr", "dinf", "pitm", "iinf", "iref", "iprp", "idat", "iloc"],
402        );
403    }
404
405    #[test_case("meta.mov")]
406    fn travel_mov(path: &str) {
407        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
408
409        let buf = read_sample(path).unwrap();
410        let mut boxes = Vec::new();
411
412        let (remain, bbox) = travel_while(&buf, |bbox| {
413            tracing::info!(bbox.header.box_type, "Got");
414            boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
415            bbox.box_type() != "moov"
416        })
417        .unwrap();
418        let bbox = bbox.unwrap();
419
420        assert_eq!(bbox.header.box_type, "moov");
421        assert_eq!(remain, b"");
422
423        let (types, _): (Vec<_>, Vec<_>) = boxes.iter().cloned().unzip();
424
425        // top level boxes
426        assert_eq!(types, ["ftyp", "wide", "mdat", "moov"],);
427
428        let (_, moov) = boxes.pop().unwrap();
429        assert_eq!(moov.box_type(), "moov");
430
431        let mut boxes = Vec::new();
432        let (remain, bbox) = travel_while(moov.body_data(), |bbox| {
433            tracing::info!(bbox.header.box_type, "Got");
434            boxes.push(bbox.header.box_type.to_owned());
435            bbox.box_type() != "meta"
436        })
437        .unwrap();
438        let bbox = bbox.unwrap();
439
440        assert_eq!(bbox.box_type(), "meta");
441        assert_eq!(remain, b"");
442
443        // sub-boxes in moov
444        assert_eq!(boxes, ["mvhd", "trak", "trak", "trak", "trak", "meta"],);
445
446        let meta = bbox;
447        let mut boxes = Vec::new();
448        let (remain, _) = travel_while(meta.body_data(), |bbox| {
449            tracing::info!(bbox.header.box_type, "Got");
450            boxes.push(bbox.header.box_type.to_owned());
451            bbox.box_type() != "ilst"
452        })
453        .unwrap();
454        assert_eq!(remain, b"");
455
456        // sub-boxes in meta
457        assert_eq!(boxes, ["hdlr", "keys", "ilst"],);
458    }
459
460    #[test_case("meta.mp4")]
461    fn travel_mp4(path: &str) {
462        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
463
464        let buf = read_sample(path).unwrap();
465        let mut boxes = Vec::new();
466
467        let (remain, bbox) = travel_while(&buf, |bbox| {
468            tracing::info!(bbox.header.box_type, "Got");
469            boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
470            bbox.box_type() != "moov"
471        })
472        .unwrap();
473        let bbox = bbox.unwrap();
474
475        assert_eq!(bbox.header.box_type, "moov");
476        assert_eq!(remain, b"");
477
478        let (types, _): (Vec<_>, Vec<_>) = boxes.iter().cloned().unzip();
479
480        // top level boxes
481        assert_eq!(types, ["ftyp", "mdat", "moov"],);
482
483        let (_, moov) = boxes.pop().unwrap();
484        assert_eq!(moov.box_type(), "moov");
485
486        let mut boxes = Vec::new();
487        let (remain, bbox) = travel_while(moov.body_data(), |bbox| {
488            tracing::info!(bbox.header.box_type, "Got");
489            boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
490            bbox.box_type() != "udta"
491        })
492        .unwrap();
493        let bbox = bbox.unwrap();
494
495        assert_eq!(bbox.box_type(), "udta");
496        assert_eq!(remain, b"");
497
498        // sub-boxes in moov
499        assert_eq!(
500            boxes.iter().map(|x| x.0.to_owned()).collect::<Vec<_>>(),
501            ["mvhd", "trak", "trak", "udta"],
502        );
503
504        let (_, trak) = boxes.iter().find(|x| x.0 == "trak").unwrap();
505
506        let meta = bbox;
507        let mut boxes = Vec::new();
508        let (remain, _) = travel_while(meta.body_data(), |bbox| {
509            tracing::info!(bbox.header.box_type, "Got");
510            boxes.push(bbox.header.box_type.to_owned());
511            bbox.box_type() != "©xyz"
512        })
513        .unwrap();
514        assert_eq!(remain, b"");
515
516        // sub-boxes in udta
517        assert_eq!(boxes, ["©xyz"],);
518
519        let mut boxes = Vec::new();
520        let (remain, bbox) = travel_while(trak.body_data(), |bbox| {
521            tracing::info!(bbox.header.box_type, "Got");
522            boxes.push(bbox.header.box_type.to_owned());
523            bbox.box_type() != "mdia"
524        })
525        .unwrap();
526        assert_eq!(remain, b"");
527
528        // sub-boxes in trak
529        assert_eq!(boxes, ["tkhd", "edts", "mdia"],);
530
531        let mdia = bbox.unwrap();
532        let mut boxes = Vec::new();
533        let (remain, _) = travel_while(mdia.body_data(), |bbox| {
534            tracing::info!(bbox.header.box_type, "Got");
535            boxes.push(bbox.header.box_type.to_owned());
536            bbox.box_type() != "minf"
537        })
538        .unwrap();
539        assert_eq!(remain, b"");
540
541        // sub-boxes in mdia
542        assert_eq!(boxes, ["mdhd", "hdlr", "minf"],);
543    }
544
545    // For mp4 files, Android phones store GPS info in the `moov/udta/©xyz`
546    // atom.
547    #[test_case("meta.mp4")]
548    fn find_android_gps_box(path: &str) {
549        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
550
551        let buf = read_sample(path).unwrap();
552        let (_, bbox) = find_box(&buf, "moov/udta/©xyz").unwrap();
553        let bbox = bbox.unwrap();
554        tracing::info!(?bbox.header, "bbox");
555
556        // gps info
557        assert_eq!(
558            "+27.2939+112.6932/",
559            std::str::from_utf8(&bbox.body_data()[4..]).unwrap() // Safe-slice in test_case
560        );
561    }
562
563    #[test]
564    fn box_header() {
565        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
566
567        let data = [
568            0x00, 0x00, 0x01, 0xdd, 0x6d, 0x65, 0x74, 0x61, 0x02, 0x04, 0x04, 0x00,
569        ];
570        let (remain, header) = FullBoxHeader::parse(&data).unwrap();
571        assert_eq!(header.box_type, "meta");
572        assert_eq!(header.box_size, 0x01dd);
573        assert_eq!(header.version, 0x2);
574        assert_eq!(header.flags, 0x40400,);
575        assert_eq!(header.header_size, 12);
576        assert_eq!(remain, b"");
577
578        let data = [
579            0x00, 0x00, 0x00, 0x01, 0x6d, 0x64, 0x61, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e,
580            0xfa, 0x74, 0x01, 0x04, 0x04, 0x00,
581        ];
582        let (remain, header) = FullBoxHeader::parse(&data).unwrap();
583        assert_eq!(header.box_type, "mdat");
584        assert_eq!(header.box_size, 0xefa74);
585        assert_eq!(header.version, 0x1);
586        assert_eq!(header.flags, 0x40400,);
587        assert_eq!(header.header_size, 20);
588        assert_eq!(remain, b"");
589
590        let data = [0x00, 0x00, 0x01, 0xdd, 0x6d, 0x65, 0x74];
591        let err = BoxHeader::parse(&data).unwrap_err();
592        assert!(err.is_incomplete());
593
594        let data = [0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00];
595        let err = BoxHeader::parse(&data).unwrap_err();
596        assert_eq!(
597            err,
598            nom::Err::Error(make_error(&[] as &[u8], nom::error::ErrorKind::Fail))
599        );
600    }
601}