/home/min/dev/nom-exif/src/bbox.rs:
1| |use std::fmt::Debug;
2| |
3| |use nom::{
4| | bytes::streaming,
5| | combinator::{fail, map_res},
6| | error::context,
7| | number, AsChar, IResult, Needed, Parser,
8| |};
9| |
10| |pub mod cr3_moov;
11| |mod iinf;
12| |mod iloc;
13| |mod ilst;
14| |mod keys;
15| |mod meta;
16| |mod mvhd;
17| |mod tkhd;
18| |mod uuid;
19| |pub use cr3_moov::Cr3MoovBox;
20| |pub use ilst::IlstBox;
21| |pub use keys::KeysBox;
22| |pub use meta::MetaBox;
23| |pub use mvhd::MvhdBox;
24| |pub use tkhd::parse_video_tkhd_in_moov;
25| |
26| |const MAX_BODY_LEN: usize = 2000 * 1024 * 1024;
27| |
28| |/// Representing an ISO base media file format box header.
29| |#[derive(Debug, Clone, PartialEq, Eq)]
30| |pub struct BoxHeader {
31| | pub box_size: u64,
32| | pub box_type: String,
33| | pub header_size: usize, // include size, type
34| |}
35| |
36| |impl BoxHeader {
37| 4.54k| pub fn parse<'a>(input: &'a [u8]) -> IResult<&'a [u8], BoxHeader> {
38| 4.54k| let (remain, size) = number::streaming::be_u32(input)?;
^4.54k ^4.54k ^3
39| |
40| 4.54k| let (remain, box_type) = map_res(streaming::take(4_usize), |res: &'a [u8]| {
^4.53k ^4.53k ^4.53k
41| | // String::from_utf8 will fail on "©xyz"
42| 18.1k| Ok::<String, ()>(res.iter().map(|b| b.as_char()).collect::<String>())
^4.53k^4.53k ^4.53k ^4.53k
43| | // String::from_utf8(res.to_vec()).map_err(|error| {
44| | // tracing::error!(?error, ?res, "Failed to construct string");
45| | // error
46| | // })
47| 4.53k| })
48| 4.54k| .parse(remain)?;
^6
49| |
50| 4.53k| let (remain, box_size) = if size == 1 {
^4.53k ^4.53k
51| 47| number::streaming::be_u64(remain)?
^0
52| 4.49k| } else if size < 8 {
53| 2| context("invalid box header: box_size is too small", fail()).parse(remain)?
54| | } else {
55| 4.49k| (remain, size as u64)
56| | };
57| |
58| 4.53k| let header_size = input.len() - remain.len();
59| 4.53k| assert!(header_size == 8 || header_size == 16);
^47
60| |
61| 4.53k| if box_size < header_size as u64 {
62| 0| return fail().parse(remain);
63| 4.53k| }
64| |
65| 4.53k| Ok((
66| 4.53k| remain,
67| 4.53k| BoxHeader {
68| 4.53k| box_size,
69| 4.53k| box_type,
70| 4.53k| header_size,
71| 4.53k| },
72| 4.53k| ))
73| 4.54k| }
74| |
75| 775| pub fn body_size(&self) -> u64 {
76| 775| self.box_size - self.header_size as u64
77| 775| }
78| |}
79| |
80| |/// Representing an ISO base media file format full box header.
81| |#[derive(Debug, Clone, PartialEq, Eq)]
82| |pub struct FullBoxHeader {
83| | pub box_size: u64,
84| | pub box_type: String,
85| | pub header_size: usize, // include size, type, version, flags
86| |
87| | version: u8, // 8 bits
88| | flags: u32, // 24 bits
89| |}
90| |
91| |impl FullBoxHeader {
92| 1.27k| fn parse(input: &[u8]) -> IResult<&[u8], FullBoxHeader> {
93| 1.27k| let (remain, header) = BoxHeader::parse(input)?;
^0
94| |
95| 1.27k| let (remain, version) = number::streaming::u8(remain)?;
^0
96| 1.27k| let (remain, flags) = number::streaming::be_u24(remain)?;
^0
97| |
98| 1.27k| let header_size = input.len() - remain.len();
99| 1.27k| assert!(header_size == 12 || header_size == 20);
^1
100| |
101| 1.27k| if header.box_size < header_size as u64 {
102| 0| return fail().parse(remain);
103| 1.27k| }
104| |
105| 1.27k| Ok((
106| 1.27k| remain,
107| 1.27k| FullBoxHeader {
108| 1.27k| box_type: header.box_type,
109| 1.27k| box_size: header.box_size,
110| 1.27k| header_size,
111| 1.27k| version,
112| 1.27k| flags,
113| 1.27k| },
114| 1.27k| ))
115| 1.27k| }
116| |
117| 1.26k| pub fn body_size(&self) -> u64 {
118| 1.26k| self.box_size - self.header_size as u64
119| 1.26k| }
120| |}
121| |
122| |/// Representing a generic ISO base media file format box.
123| |#[derive(Clone, PartialEq, Eq)]
124| |pub struct BoxHolder<'a> {
125| | pub header: BoxHeader,
126| | // Including header
127| | pub data: &'a [u8],
128| |}
129| |
130| |impl Debug for BoxHolder<'_> {
131| 0| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
132| 0| f.debug_struct("BoxHolder")
133| 0| .field("header", &self.header)
134| 0| .field("body_size", &self.body_data().len())
135| 0| .field(
136| 0| "data",
137| 0| &(self
138| 0| .body_data()
139| 0| .iter()
140| 0| .take(64)
141| 0| .map(|x| x.as_char())
142| 0| .collect::<String>()),
143| | )
144| 0| .finish()
145| 0| }
146| |}
147| |
148| |impl<'a> BoxHolder<'a> {
149| | #[tracing::instrument(skip_all)]
150| 2.86k| pub fn parse(input: &'a [u8]) -> IResult<&'a [u8], BoxHolder<'a>> {
151| 2.86k| let (_, header) = BoxHeader::parse(input)?;
^2.86k ^3
152| 2.86k| tracing::debug!(box_type = header.box_type, ?header, "Got");
153| |
154| 2.86k| let box_size = usize::try_from(header.box_size)
155| 2.86k| .expect("header box size should always fit into a `usize`.");
156| |
157| 2.86k| let (remain, data) = streaming::take(box_size)(input)?;
^2.70k ^2.70k ^162
158| |
159| 2.70k| Ok((remain, BoxHolder { header, data }))
160| 2.86k| }
161| |
162| | #[allow(unused)]
163| 7| pub fn box_size(&self) -> u64 {
164| 7| self.header.box_size
165| 7| }
166| |
167| 2.53k| pub fn box_type(&self) -> &str {
168| 2.53k| &self.header.box_type
169| 2.53k| }
170| |
171| 1.51k| pub fn header_size(&self) -> usize {
172| 1.51k| self.header.header_size
173| 1.51k| }
174| |
175| 1.49k| pub fn body_data(&self) -> &'a [u8] {
176| 1.49k| &self.data[self.header_size()..] // Safe-slice
177| 1.49k| }
178| |}
179| |
180| |type BoxResult<'a> = IResult<&'a [u8], Option<BoxHolder<'a>>>;
181| |
182| 39|pub fn to_boxes(input: &[u8]) -> crate::Result<Vec<BoxHolder<'_>>> {
183| 39| let mut res = Vec::new();
184| 39| let mut remain = input;
185| | loop {
186| 78| if remain.is_empty() {
187| 39| break;
188| 39| }
189| |
190| 39| let (rem, bbox) = BoxHolder::parse(remain).map_err(|e| {
^0
191| 0| crate::error::nom_err_to_malformed(e, crate::error::MalformedKind::IsoBmffBox)
192| 0| })?;
193| 39| res.push(bbox);
194| | // Sanity check, to avoid infinite loops caused by unexpected errors.
195| 39| assert!(rem.len() < remain.len());
196| 39| remain = rem;
197| | }
198| 39| Ok(res)
199| 39|}
200| |
201| |/// Parses every top level box while `predicate` returns true, then returns the
202| |/// last parsed box.
203| 126|pub fn travel_while<'a, F>(input: &'a [u8], mut predicate: F) -> BoxResult<'a>
204| 126|where
205| 126| F: FnMut(&BoxHolder<'a>) -> bool,
206| |{
207| 126| let mut remain = input;
208| | loop {
209| 313| if remain.is_empty() {
210| 3| return Ok((remain, None));
211| 310| }
212| |
213| 310| let (rem, bbox) = BoxHolder::parse(remain)?;
^0
214| | // Sanity check, to avoid infinite loops caused by unexpected errors.
215| 310| assert!(rem.len() < remain.len());
216| 310| remain = rem;
217| |
218| 310| if !predicate(&bbox) {
219| 123| return Ok((remain, Some(bbox)));
220| 187| }
221| | }
222| 126|}
223| |
224| 185|pub fn travel_header<'a, F>(input: &'a [u8], mut predicate: F) -> IResult<&'a [u8], BoxHeader>
225| 185|where
226| 185| F: FnMut(&BoxHeader, &'a [u8]) -> bool,
227| |{
228| 185| let mut remain = input;
229| | loop {
230| 365| let (rem, header) = BoxHeader::parse(remain)?;
^359 ^359 ^6
231| | // Sanity check, to avoid infinite loops caused by unexpected errors.
232| 359| assert!(rem.len() < remain.len());
233| 359| remain = rem;
234| |
235| 359| if !predicate(&header, rem) {
236| 179| break Ok((rem, header));
237| 180| }
238| |
239| 180| if remain.len() < header.body_size() as usize {
240| 0| return Err(nom::Err::Incomplete(Needed::new(
241| 0| header.body_size() as usize - remain.len(),
242| 0| )));
243| 180| }
244| |
245| | // skip box body
246| 180| remain = &remain[header.body_size() as usize..]; // Safe-slice
247| | }
248| 185|}
249| |
250| |#[allow(unused)]
251| |/// Find a box by atom `path`, which is separated by '/', e.g.: "meta/iloc".
252| 598|pub fn find_box<'a>(input: &'a [u8], path: &str) -> IResult<&'a [u8], Option<BoxHolder<'a>>> {
253| 598| if path.is_empty() {
254| 0| return Ok((input, None));
255| 598| }
256| |
257| 598| let mut bbox = None;
258| 598| let mut remain = input;
259| 598| let mut data = input;
260| |
261| 701| for box_type in path.split('/').filter(|x| !x.is_empty()) {
^598 ^598 ^598
262| 701| assert!(!box_type.is_empty());
263| |
264| 701| let (rem, b) = find_box_by_type(data, box_type)?;
^699 ^699 ^2
265| 699| let Some(b) = b else {
^590
266| 109| return Ok((rem, None));
267| | };
268| |
269| 590| data = b.body_data();
270| 590| (remain, bbox) = (rem, Some(b));
271| | }
272| |
273| 487| Ok((remain, bbox))
274| 598|}
275| |
276| 701|fn find_box_by_type<'a>(
277| 701| input: &'a [u8],
278| 701| box_type: &str,
279| 701|) -> IResult<&'a [u8], Option<BoxHolder<'a>>> {
280| 701| let mut remain = input;
281| | loop {
282| 2.02k| if remain.is_empty() {
283| 109| return Ok((remain, None));
284| 1.91k| }
285| |
286| 1.91k| let (rem, bbox) = BoxHolder::parse(remain)?;
^1.91k^1.91k ^2
287| | // Sanity check, to avoid infinite loops caused by unexpected errors.
288| 1.91k| assert!(rem.len() < remain.len());
289| 1.91k| remain = rem;
290| |
291| 1.91k| if bbox.box_type() == box_type {
292| 590| return Ok((rem, Some(bbox)));
293| 1.32k| }
294| | }
295| 701|}
296| |
297| |trait ParseBody<O> {
298| | fn parse_body(body: &[u8], header: FullBoxHeader) -> IResult<&[u8], O>;
299| |}
300| |
301| |pub trait ParseBox<O> {
302| | fn parse_box(input: &[u8]) -> IResult<&[u8], O>;
303| |}
304| |
305| |/// auto implements parse_box for each Box which implements ParseBody
306| |impl<O, T: ParseBody<O>> ParseBox<O> for T {
307| | #[tracing::instrument(skip_all)]
308| 1.26k| fn parse_box(input: &[u8]) -> IResult<&[u8], O> {
309| 1.26k| let (remain, header) = FullBoxHeader::parse(input)?;
^0
310| 1.26k| assert_eq!(input.len(), header.header_size + remain.len());
311| 1.26k| assert!(
312| 1.26k| header.box_size >= header.header_size as u64,
313| | "box_size = {}, header_size = {}",
314| | header.box_size,
315| | header.header_size
316| | );
317| |
318| | // limit parsing size
319| 1.26k| let box_size = header.body_size() as usize;
320| 1.26k| if box_size > MAX_BODY_LEN {
321| 0| tracing::error!(?header.box_type, ?box_size, "Box is too big");
322| 0| return fail().parse(remain);
323| 1.26k| }
324| 1.26k| let (remain, data) = streaming::take(box_size).parse(remain)?;
^0
325| 1.26k| assert_eq!(input.len(), header.header_size + data.len() + remain.len());
326| |
327| 1.26k| let (rem, bbox) = Self::parse_body(data, header)?;
^1.26k^1.26k ^4
328| |
329| 1.26k| if !rem.is_empty() {
330| 0| // TODO: Body data is not exhausted, should report this error with
331| 0| // tracing
332| 1.26k| }
333| |
334| 1.26k| Ok((remain, bbox))
335| 1.26k| }
336| |}
337| |
338| |#[cfg(test)]
339| |mod tests {
340| | use crate::testkit::read_sample;
341| |
342| | use super::*;
343| | use nom::error::make_error;
344| | use test_case::test_case;
345| |
346| | #[test_case("exif.heic")]
347| 1| fn travel_heic(path: &str) {
348| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
349| |
350| 1| let buf = read_sample(path).unwrap();
351| 1| let mut boxes = Vec::new();
352| |
353| 3| let (remain, bbox) = travel_while(&buf, |bbox| {
^1 ^1 ^1 ^1
354| 3| tracing::info!(bbox.header.box_type, "Got");
355| 3| boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
356| 3| bbox.box_type() != "mdat"
357| 3| })
358| 1| .unwrap();
359| 1| let bbox = bbox.unwrap();
360| |
361| 1| assert_eq!(bbox.header.box_type, "mdat");
362| 1| assert_eq!(remain, b"");
363| |
364| 1| let (types, _): (Vec<_>, Vec<_>) = boxes.iter().cloned().unzip();
365| |
366| | // top level boxes
367| 1| assert_eq!(types, ["ftyp", "meta", "mdat"],);
368| |
369| 1| let (_, meta) = boxes.remove(1);
370| 1| assert_eq!(meta.box_type(), "meta");
371| |
372| 1| let mut boxes = Vec::new();
373| 1| let (remain, bbox) = travel_while(
374| 1| &meta.body_data()[4..], // Safe-slice in test_case
375| 8| |bbox| {
376| 8| tracing::info!(bbox.header.box_type, "Got");
377| 8| boxes.push(bbox.header.box_type.to_owned());
378| 8| bbox.box_type() != "iloc"
379| 8| },
380| | )
381| 1| .unwrap();
382| 1| let bbox = bbox.unwrap();
383| 1| assert_eq!(bbox.box_type(), "iloc");
384| 1| assert_eq!(remain, b"");
385| |
386| | // sub-boxes in meta
387| 1| assert_eq!(
388| | boxes,
389| | ["hdlr", "dinf", "pitm", "iinf", "iref", "iprp", "idat", "iloc"],
390| | );
391| 1| }
392| |
393| | #[test_case("meta.mov")]
394| 1| fn travel_mov(path: &str) {
395| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
396| |
397| 1| let buf = read_sample(path).unwrap();
398| 1| let mut boxes = Vec::new();
399| |
400| 4| let (remain, bbox) = travel_while(&buf, |bbox| {
^1 ^1 ^1 ^1
401| 4| tracing::info!(bbox.header.box_type, "Got");
402| 4| boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
403| 4| bbox.box_type() != "moov"
404| 4| })
405| 1| .unwrap();
406| 1| let bbox = bbox.unwrap();
407| |
408| 1| assert_eq!(bbox.header.box_type, "moov");
409| 1| assert_eq!(remain, b"");
410| |
411| 1| let (types, _): (Vec<_>, Vec<_>) = boxes.iter().cloned().unzip();
412| |
413| | // top level boxes
414| 1| assert_eq!(types, ["ftyp", "wide", "mdat", "moov"],);
415| |
416| 1| let (_, moov) = boxes.pop().unwrap();
417| 1| assert_eq!(moov.box_type(), "moov");
418| |
419| 1| let mut boxes = Vec::new();
420| 6| let (remain, bbox) = travel_while(moov.body_data(), |bbox| {
^1 ^1 ^1 ^1
421| 6| tracing::info!(bbox.header.box_type, "Got");
422| 6| boxes.push(bbox.header.box_type.to_owned());
423| 6| bbox.box_type() != "meta"
424| 6| })
425| 1| .unwrap();
426| 1| let bbox = bbox.unwrap();
427| |
428| 1| assert_eq!(bbox.box_type(), "meta");
429| 1| assert_eq!(remain, b"");
430| |
431| | // sub-boxes in moov
432| 1| assert_eq!(boxes, ["mvhd", "trak", "trak", "trak", "trak", "meta"],);
433| |
434| 1| let meta = bbox;
435| 1| let mut boxes = Vec::new();
436| 3| let (remain, _) = travel_while(meta.body_data(), |bbox| {
^1 ^1 ^1
437| 3| tracing::info!(bbox.header.box_type, "Got");
438| 3| boxes.push(bbox.header.box_type.to_owned());
439| 3| bbox.box_type() != "ilst"
440| 3| })
441| 1| .unwrap();
442| 1| assert_eq!(remain, b"");
443| |
444| | // sub-boxes in meta
445| 1| assert_eq!(boxes, ["hdlr", "keys", "ilst"],);
446| 1| }
447| |
448| | #[test_case("meta.mp4")]
449| 1| fn travel_mp4(path: &str) {
450| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
451| |
452| 1| let buf = read_sample(path).unwrap();
453| 1| let mut boxes = Vec::new();
454| |
455| 3| let (remain, bbox) = travel_while(&buf, |bbox| {
^1 ^1 ^1 ^1
456| 3| tracing::info!(bbox.header.box_type, "Got");
457| 3| boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
458| 3| bbox.box_type() != "moov"
459| 3| })
460| 1| .unwrap();
461| 1| let bbox = bbox.unwrap();
462| |
463| 1| assert_eq!(bbox.header.box_type, "moov");
464| 1| assert_eq!(remain, b"");
465| |
466| 1| let (types, _): (Vec<_>, Vec<_>) = boxes.iter().cloned().unzip();
467| |
468| | // top level boxes
469| 1| assert_eq!(types, ["ftyp", "mdat", "moov"],);
470| |
471| 1| let (_, moov) = boxes.pop().unwrap();
472| 1| assert_eq!(moov.box_type(), "moov");
473| |
474| 1| let mut boxes = Vec::new();
475| 4| let (remain, bbox) = travel_while(moov.body_data(), |bbox| {
^1 ^1 ^1 ^1
476| 4| tracing::info!(bbox.header.box_type, "Got");
477| 4| boxes.push((bbox.header.box_type.to_owned(), bbox.to_owned()));
478| 4| bbox.box_type() != "udta"
479| 4| })
480| 1| .unwrap();
481| 1| let bbox = bbox.unwrap();
482| |
483| 1| assert_eq!(bbox.box_type(), "udta");
484| 1| assert_eq!(remain, b"");
485| |
486| | // sub-boxes in moov
487| 1| assert_eq!(
488| 4| boxes.iter().map(|x| x.0.to_owned()).collect::<Vec<_>>(),
^1 ^1 ^1
489| | ["mvhd", "trak", "trak", "udta"],
490| | );
491| |
492| 2| let (_, trak) = boxes.iter().find(|x| x.0 == "trak").unwrap();
^1 ^1 ^1 ^1
493| |
494| 1| let meta = bbox;
495| 1| let mut boxes = Vec::new();
496| 1| let (remain, _) = travel_while(meta.body_data(), |bbox| {
497| 1| tracing::info!(bbox.header.box_type, "Got");
498| 1| boxes.push(bbox.header.box_type.to_owned());
499| 1| bbox.box_type() != "©xyz"
500| 1| })
501| 1| .unwrap();
502| 1| assert_eq!(remain, b"");
503| |
504| | // sub-boxes in udta
505| 1| assert_eq!(boxes, ["©xyz"],);
506| |
507| 1| let mut boxes = Vec::new();
508| 3| let (remain, bbox) = travel_while(trak.body_data(), |bbox| {
^1 ^1 ^1 ^1
509| 3| tracing::info!(bbox.header.box_type, "Got");
510| 3| boxes.push(bbox.header.box_type.to_owned());
511| 3| bbox.box_type() != "mdia"
512| 3| })
513| 1| .unwrap();
514| 1| assert_eq!(remain, b"");
515| |
516| | // sub-boxes in trak
517| 1| assert_eq!(boxes, ["tkhd", "edts", "mdia"],);
518| |
519| 1| let mdia = bbox.unwrap();
520| 1| let mut boxes = Vec::new();
521| 3| let (remain, _) = travel_while(mdia.body_data(), |bbox| {
^1 ^1 ^1
522| 3| tracing::info!(bbox.header.box_type, "Got");
523| 3| boxes.push(bbox.header.box_type.to_owned());
524| 3| bbox.box_type() != "minf"
525| 3| })
526| 1| .unwrap();
527| 1| assert_eq!(remain, b"");
528| |
529| | // sub-boxes in mdia
530| 1| assert_eq!(boxes, ["mdhd", "hdlr", "minf"],);
531| 1| }
532| |
533| | // For mp4 files, Android phones store GPS info in the `moov/udta/©xyz`
534| | // atom.
535| | #[test_case("meta.mp4")]
536| 1| fn find_android_gps_box(path: &str) {
537| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
538| |
539| 1| let buf = read_sample(path).unwrap();
540| 1| let (_, bbox) = find_box(&buf, "moov/udta/©xyz").unwrap();
541| 1| let bbox = bbox.unwrap();
542| 1| tracing::info!(?bbox.header, "bbox");
543| |
544| | // gps info
545| 1| assert_eq!(
546| | "+27.2939+112.6932/",
547| 1| std::str::from_utf8(&bbox.body_data()[4..]).unwrap() // Safe-slice in test_case
548| | );
549| 1| }
550| |
551| | #[test]
552| 1| fn box_header() {
553| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
554| |
555| 1| let data = [
556| 1| 0x00, 0x00, 0x01, 0xdd, 0x6d, 0x65, 0x74, 0x61, 0x02, 0x04, 0x04, 0x00,
557| 1| ];
558| 1| let (remain, header) = FullBoxHeader::parse(&data).unwrap();
559| 1| assert_eq!(header.box_type, "meta");
560| 1| assert_eq!(header.box_size, 0x01dd);
561| 1| assert_eq!(header.version, 0x2);
562| 1| assert_eq!(header.flags, 0x40400,);
563| 1| assert_eq!(header.header_size, 12);
564| 1| assert_eq!(remain, b"");
565| |
566| 1| let data = [
567| 1| 0x00, 0x00, 0x00, 0x01, 0x6d, 0x64, 0x61, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e,
568| 1| 0xfa, 0x74, 0x01, 0x04, 0x04, 0x00,
569| 1| ];
570| 1| let (remain, header) = FullBoxHeader::parse(&data).unwrap();
571| 1| assert_eq!(header.box_type, "mdat");
572| 1| assert_eq!(header.box_size, 0xefa74);
573| 1| assert_eq!(header.version, 0x1);
574| 1| assert_eq!(header.flags, 0x40400,);
575| 1| assert_eq!(header.header_size, 20);
576| 1| assert_eq!(remain, b"");
577| |
578| 1| let data = [0x00, 0x00, 0x01, 0xdd, 0x6d, 0x65, 0x74];
579| 1| let err = BoxHeader::parse(&data).unwrap_err();
580| 1| assert!(err.is_incomplete());
581| |
582| 1| let data = [0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00];
583| 1| let err = BoxHeader::parse(&data).unwrap_err();
584| 1| assert_eq!(
585| | err,
586| 1| nom::Err::Error(make_error(&[] as &[u8], nom::error::ErrorKind::Fail))
587| | );
588| 1| }
589| |}
/home/min/dev/nom-exif/src/bbox/cr3_moov.rs:
1| |use std::ops::Range;
2| |
3| |use nom::{combinator::fail, IResult, Parser};
4| |
5| |use super::{
6| | uuid::{CanonUuidBox, CANON_UUID, UUID_SIZE},
7| | BoxHolder,
8| |};
9| |
10| |const MIN_CR3_INPUT_SIZE: usize = 8;
11| |
12| |const MIN_FTYP_BODY_SIZE: usize = 4;
13| |
14| |/// Represents the parsed moov box structure for Canon CR3 files.
15| |///
16| |/// Canon CR3 files are based on the ISO Base Media File Format (similar to MP4/MOV)
17| |/// but contain Canon-specific metadata in a UUID box within the moov container.
18| |/// This struct provides access to the Canon UUID box containing EXIF metadata.
19| |///
20| |/// # CR3 File Structure
21| |/// CR3 File
22| |/// +-- ftyp (file type box)
23| |/// +-- moov (movie box)
24| |/// | +-- uuid (Canon UUID box)
25| |/// | +-- CMT1 (main EXIF data)
26| |/// | +-- CMT2 (ExifIFD data)
27| |/// | +-- CMT3 (MakerNotes data)
28| |/// +-- mdat (media data)
29| |#[derive(Debug, Clone, PartialEq, Eq)]
30| |pub struct Cr3MoovBox {
31| | /// Canon's UUID box containing CMT metadata, if present
32| | uuid_canon_box: Option<CanonUuidBox>,
33| |}
34| |
35| |impl Cr3MoovBox {
36| 13| pub fn parse(input: &[u8]) -> IResult<&[u8], Option<Cr3MoovBox>> {
37| | // Validate minimum input size
38| 13| if input.len() < MIN_CR3_INPUT_SIZE {
39| 1| tracing::warn!(
40| | "Input too small for CR3 parsing: {} bytes, expected at least {}",
41| 0| input.len(),
42| | MIN_CR3_INPUT_SIZE
43| | );
44| 1| return fail().parse(input);
45| 12| }
46| |
47| 12| let remain = input;
48| 12| let (remain, bbox) = BoxHolder::parse(remain)?;
^0
49| |
50| | // Verify this is a valid file format by checking for ftyp box
51| 12| if bbox.box_type() != "ftyp" {
52| 1| tracing::warn!("Expected ftyp box, found: {}", bbox.box_type());
^0 ^0
53| 1| return fail().parse(input);
54| 11| }
55| |
56| | // Validate ftyp box has minimum required size
57| 11| if bbox.body_data().len() < MIN_FTYP_BODY_SIZE {
58| 1| tracing::warn!(
59| | "ftyp box too small: {} bytes, expected at least {}",
60| 0| bbox.body_data().len(),
61| | MIN_FTYP_BODY_SIZE
62| | );
63| 1| return fail().parse(input);
64| 10| }
65| |
66| | // Find the moov box containing the metadata
67| 10| let (remain, Some(moov_bbox)) = super::find_box(remain, "moov")? else {
^7 ^7 ^2
68| 1| tracing::debug!("moov box not found in CR3 file");
69| 1| return Ok((remain, None));
70| | };
71| |
72| 7| tracing::debug!(
73| 0| box_type = moov_bbox.box_type(),
74| | size = moov_bbox.header.box_size,
75| | "Found moov box in CR3 file"
76| | );
77| |
78| | // Parse the moov box contents to find Canon UUID box
79| 7| let (_, moov_box) = Self::parse_moov_content(moov_bbox.body_data(), input)?;
^0
80| 7| tracing::debug!(?moov_box, "Successfully parsed CR3 moov box");
81| |
82| 7| Ok((remain, Some(moov_box)))
83| 13| }
84| |
85| 7| fn parse_moov_content<'a>(
86| 7| moov_data: &'a [u8],
87| 7| full_input: &'a [u8],
88| 7| ) -> IResult<&'a [u8], Cr3MoovBox> {
89| 7| let mut remain = moov_data;
90| 7| let mut uuid_canon_box = None;
91| |
92| | // Iterate through all boxes within the moov box to find Canon's UUID box
93| 7| while !remain.is_empty() {
94| 7| let (new_remain, bbox) = match BoxHolder::parse(remain) {
95| 7| Ok(result) => result,
96| 0| Err(e) => {
97| 0| tracing::warn!(
98| | "Failed to parse box in moov content, continuing with partial data: {:?}",
99| | e
100| | );
101| 0| break; // Stop parsing but return what we found so far
102| | }
103| | };
104| |
105| 7| if bbox.box_type() == "uuid" {
106| 7| let body_data = bbox.body_data();
107| |
108| | // Validate UUID box has minimum required size
109| 7| if body_data.len() < UUID_SIZE {
110| 0| tracing::debug!("UUID box too small: {} bytes", body_data.len());
111| 0| remain = new_remain;
112| 0| continue;
113| 7| }
114| |
115| 7| let uuid_bytes = &body_data[0..UUID_SIZE];
116| |
117| 7| if uuid_bytes == CANON_UUID {
118| 7| tracing::debug!(
119| | "Found Canon UUID box with {} bytes of data",
120| 0| body_data.len()
121| | );
122| 7| let (_, canon_box) = CanonUuidBox::parse(body_data, full_input)?;
^0
123| 7| uuid_canon_box = Some(canon_box);
124| 7| break;
125| | } else {
126| 0| tracing::debug!("Found non-Canon UUID box");
127| | }
128| 0| }
129| |
130| 0| remain = new_remain;
131| | }
132| |
133| 7| Ok((remain, Cr3MoovBox { uuid_canon_box }))
134| 7| }
135| |
136| | #[allow(dead_code)] // API method for tests
137| 2| pub fn uuid_canon_box(&self) -> Option<&CanonUuidBox> {
138| 2| self.uuid_canon_box.as_ref()
139| 2| }
140| |
141| 3| pub fn exif_data_offset(&self) -> Option<Range<usize>> {
142| | // For CR3, we primarily use CMT1 which contains the main EXIF IFD0 data
143| 3| self.uuid_canon_box.as_ref()?.exif_data_offset().cloned()
^0
144| 3| }
145| |
146| | /// Returns offset ranges for all CMT boxes (CMT1, CMT2, CMT3).
147| | /// CMT1 is the primary EXIF data, CMT2 is ExifIFD data, CMT3 is MakerNotes.
148| 4| pub fn all_cmt_data_offsets(&self) -> Vec<(&'static str, Range<usize>)> {
149| 4| let Some(uuid_box) = self.uuid_canon_box.as_ref() else {
150| 0| return Vec::new();
151| | };
152| |
153| 4| let mut offsets = Vec::with_capacity(3);
154| 4| if let Some(range) = uuid_box.exif_data_offset() {
155| 4| offsets.push(("CMT1", range.clone()));
156| 4| }
^0
157| 4| if let Some(range) = uuid_box.cmt2_data_offset() {
158| 4| offsets.push(("CMT2", range.clone()));
159| 4| }
^0
160| 4| if let Some(range) = uuid_box.cmt3_data_offset() {
161| 4| offsets.push(("CMT3", range.clone()));
162| 4| }
^0
163| 4| offsets
164| 4| }
165| |}
166| |
167| |#[cfg(test)]
168| |mod tests {
169| | use super::*;
170| | use crate::testkit::read_sample;
171| |
172| | #[test]
173| 1| fn parse_rejects_too_small_input() {
174| | // Covers lines 38-44.
175| 1| let result = Cr3MoovBox::parse(&[0u8; 4]);
176| 1| assert!(result.is_err());
177| 1| }
178| |
179| | #[test]
180| 1| fn parse_rejects_non_ftyp_first_box() {
181| | // 8-byte box where the type is not "ftyp" (covers lines 51-54).
182| 1| let mut buf = Vec::new();
183| 1| buf.extend_from_slice(&16u32.to_be_bytes()); // box size
184| 1| buf.extend_from_slice(b"mdat"); // not ftyp
185| 1| buf.extend_from_slice(&[0u8; 8]); // body to satisfy take(16)
186| 1| let result = Cr3MoovBox::parse(&buf);
187| 1| assert!(result.is_err());
188| 1| }
189| |
190| | #[test]
191| 1| fn parse_rejects_ftyp_too_small_body() {
192| | // ftyp present but body < MIN_FTYP_BODY_SIZE (covers lines 57-63).
193| 1| let mut buf = Vec::new();
194| 1| buf.extend_from_slice(&10u32.to_be_bytes()); // total 10
195| 1| buf.extend_from_slice(b"ftyp");
196| 1| buf.extend_from_slice(&[0u8, 0u8]); // 2-byte body, below the 4-byte minimum
197| 1| buf.extend_from_slice(&[0u8; 16]); // padding for MIN_CR3_INPUT_SIZE
198| 1| let result = Cr3MoovBox::parse(&buf);
199| 1| assert!(result.is_err());
200| 1| }
201| |
202| | #[test]
203| 1| fn parse_ftyp_without_moov_returns_none() {
204| | // ftyp present, no moov — covers lines 67-70.
205| 1| let mut buf = Vec::new();
206| 1| buf.extend_from_slice(&24u32.to_be_bytes());
207| 1| buf.extend_from_slice(b"ftyp");
208| 1| buf.extend_from_slice(b"crx ");
209| 1| buf.extend_from_slice(&[0u8; 12]);
210| | // No moov follows. find_box returns Ok((_, None)) on empty remainder,
211| | // so the Ok arm is what's actually exercised.
212| 1| if let Ok((_, moov)) = Cr3MoovBox::parse(&buf) {
213| 1| assert!(moov.is_none());
214| 0| }
215| 1| }
216| |
217| | #[test]
218| 1| fn parse_real_canon_r6() {
219| | // Happy path through parse_moov_content (lines 85-134).
220| 1| let buf = read_sample("canon-r6.cr3").unwrap();
221| 1| let (_, moov) = Cr3MoovBox::parse(&buf).unwrap();
222| 1| let moov = moov.unwrap();
223| 1| assert!(moov.uuid_canon_box().is_some());
224| 1| assert!(moov.exif_data_offset().is_some());
225| 1| let all = moov.all_cmt_data_offsets();
226| 1| assert!(all.iter().any(|(id, _)| *id == "CMT1"));
227| 1| }
228| |}
/home/min/dev/nom-exif/src/bbox/iinf.rs:
1| |use std::collections::HashMap;
2| |
3| |use nom::{
4| | bytes::streaming,
5| | combinator::{cond, fail, map_res},
6| | error::context,
7| | multi::many_m_n,
8| | number::streaming::{be_u16, be_u32},
9| | IResult, Parser,
10| |};
11| |
12| |use crate::{bbox::FullBoxHeader, utils::parse_cstr};
13| |
14| |use super::{ParseBody, ParseBox};
15| |
16| |#[derive(Debug, Clone, PartialEq, Eq)]
17| |pub struct IinfBox {
18| | pub(crate) header: FullBoxHeader,
19| | pub(crate) entries: HashMap<String, InfeBox>,
20| |}
21| |
22| |impl ParseBody<IinfBox> for IinfBox {
23| 21| fn parse_body(remain: &[u8], header: FullBoxHeader) -> IResult<&[u8], IinfBox> {
24| 21| let version = header.version;
25| |
26| 21| let (remain, item_count) = if version > 0 {
27| 0| be_u32(remain)?
28| | } else {
29| 21| map_res(be_u16, |x| Ok::<u32, ()>(x as u32)).parse(remain)?
^0
30| | };
31| |
32| 21| let (remain, entries) =
33| 21| many_m_n(item_count as usize, item_count as usize, InfeBox::parse_box).parse(remain)?;
^0
34| |
35| 21| let entries = entries
36| 21| .into_iter()
37| 969| .map(|e| (e.key().to_owned(), e))
^21
38| 21| .collect::<HashMap<_, _>>();
39| |
40| 21| Ok((remain, IinfBox { header, entries }))
41| 21| }
42| |}
43| |
44| |impl IinfBox {
45| 21| pub fn get_infe(&self, item_type: &'static str) -> Option<&InfeBox> {
46| 21| self.entries.get(item_type)
47| 21| }
48| |}
49| |
50| |/// Info entry box
51| |#[derive(Debug, Clone, PartialEq, Eq)]
52| |pub(crate) struct InfeBox {
53| | pub header: FullBoxHeader,
54| | pub id: u32,
55| | pub protection_index: u16,
56| | pub item_type: Option<String>, // version >= 2
57| | pub item_name: String,
58| | content_type: Option<String>,
59| | content_encoding: Option<String>,
60| | uri_type: Option<String>,
61| |}
62| |
63| |impl ParseBody<InfeBox> for InfeBox {
64| | #[tracing::instrument(skip_all)]
65| 969| fn parse_body<'a>(remain: &'a [u8], header: FullBoxHeader) -> IResult<&'a [u8], InfeBox> {
66| 969| let version = header.version;
67| |
68| 969| let (remain, id) = if version > 2 {
69| 0| be_u32(remain)?
70| | } else {
71| 969| map_res(be_u16, |x| Ok::<u32, ()>(x as u32)).parse(remain)?
^0
72| | };
73| |
74| 969| let (remain, protection_index) = be_u16(remain)?;
^0
75| |
76| 969| let (remain, item_type) = cond(
77| 969| version >= 2,
78| 969| map_res(streaming::take(4_usize), |res: &'a [u8]| {
79| 969| String::from_utf8(res.to_vec())
80| 969| }),
81| | )
82| 969| .parse(remain)?;
^0
83| |
84| | // tracing::debug!(?header.box_type, ?item_type, ?version, "Got");
85| |
86| 969| let (remain, item_name) = parse_cstr(remain).map_err(|e| {
^0
87| 0| if e.is_incomplete() {
88| 0| context("no enough bytes for infe item name", fail::<_, (), _>())
89| 0| .parse(remain)
90| 0| .unwrap_err()
91| | } else {
92| 0| e
93| | }
94| 0| })?;
95| |
96| 969| let (remain, content_type, content_encoding) = if version <= 1
97| 969| || (version >= 2 && item_type.as_ref().unwrap() == "mime")
98| | {
99| 32| let (remain, content_type) = parse_cstr(remain)?;
^0
100| 32| let (remain, content_encoding) = cond(!remain.is_empty(), parse_cstr).parse(remain)?;
^0
101| 32| (remain, Some(content_type), content_encoding)
102| | } else {
103| 937| (remain, None, None)
104| | };
105| |
106| 969| let (remain, uri_type) = if version >= 2 && item_type.as_ref().unwrap() == "uri" {
107| 0| let (remain, uri_type) = parse_cstr(remain)?;
108| 0| (remain, Some(uri_type))
109| | } else {
110| 969| (remain, None)
111| | };
112| |
113| 969| Ok((
114| 969| remain,
115| 969| InfeBox {
116| 969| header,
117| 969| id,
118| 969| protection_index,
119| 969| item_type,
120| 969| item_name,
121| 969| content_type,
122| 969| content_encoding,
123| 969| uri_type,
124| 969| },
125| 969| ))
126| 969| }
127| |}
128| |
129| |impl InfeBox {
130| 969| fn key(&self) -> &String {
131| 969| self.item_type.as_ref().unwrap_or(&self.item_name)
132| 969| }
133| |}
/home/min/dev/nom-exif/src/bbox/iloc.rs:
1| |use std::collections::HashMap;
2| |
3| |use nom::{
4| | combinator::{cond, fail, map_res},
5| | error::context,
6| | multi::many_m_n,
7| | number::streaming::{be_u16, be_u32, be_u64, be_u8},
8| | IResult, Parser,
9| |};
10| |
11| |use crate::bbox::FullBoxHeader;
12| |
13| |use super::ParseBody;
14| |
15| |#[derive(Debug, Clone, PartialEq, Eq)]
16| |pub struct IlocBox {
17| | header: FullBoxHeader,
18| | offset_size: u8, // 4 bits
19| | length_size: u8, // 4 bits
20| | base_offset_size: u8, // 4 bits
21| | index_size: u8, // 4 bits, version 1/2, reserved in version 0
22| | pub(crate) items: HashMap<u32, ItemLocation>,
23| |}
24| |
25| |const MAX_ILOC_EXTENTS_PER_ITEM: u16 = 32;
26| |
27| |impl ParseBody<IlocBox> for IlocBox {
28| | #[tracing::instrument(skip_all)]
29| 21| fn parse_body(remain: &[u8], header: FullBoxHeader) -> IResult<&[u8], IlocBox> {
30| 21| let version = header.version;
31| |
32| 21| let (remain, (offset_size, length_size)) =
33| 21| map_res(be_u8, |res| Ok::<(u8, u8), ()>((res >> 4, res & 0xF))).parse(remain)?;
^0
34| |
35| 21| let (remain, (base_offset_size, index_size)) =
36| 21| map_res(be_u8, |res| Ok::<(u8, u8), ()>((res >> 4, res & 0xF))).parse(remain)?;
^0
37| |
38| 21| let (remain, item_count) = if version < 2 {
39| 21| map_res(be_u16, |x| Ok::<u32, ()>(x as u32)).parse(remain)?
^0
40| | } else {
41| 0| be_u32(remain)?
42| | };
43| |
44| 969| let (remain, items) = many_m_n(item_count as usize, item_count as usize, |remain| {
^21 ^21 ^21 ^21 ^21
45| 969| let (remain, item_id) = if version < 2 {
46| 969| map_res(be_u16, |x| Ok::<u32, ()>(x as u32)).parse(remain)?
^0
47| | } else {
48| 0| be_u32(remain)?
49| | };
50| |
51| 969| let (remain, construction_method_raw) = cond(
52| 969| version >= 1,
53| 969| map_res(be_u16, |res| Ok::<u8, ()>((res & 0xF) as u8)),
^963
54| | )
55| 969| .parse(remain)?;
^0
56| 969| let construction_method =
57| 969| ConstructionMethod::from(construction_method_raw.unwrap_or(0));
58| |
59| 969| let (remain, data_ref_index) = be_u16(remain)?;
^0
60| |
61| 969| let (remain, base_offset) =
62| 969| parse_base_offset(base_offset_size, remain, "base_offset_size is not 4 or 8")?;
^0
63| |
64| 969| let (remain, extent_count) = be_u16(remain)?;
^0
65| 969| if extent_count > MAX_ILOC_EXTENTS_PER_ITEM {
66| 0| tracing::debug!(?extent_count, "extent_count");
67| 0| context("extent_count > 32", fail::<_, (), _>()).parse(remain)?;
68| 969| }
69| |
70| 969| let (remain, extents) =
71| 969| many_m_n(extent_count as usize, extent_count as usize, |remain| {
72| 969| let (remain, index) =
73| 969| parse_base_offset(index_size, remain, "index_size is not 4 or 8")?;
^0
74| 969| let (remain, offset) =
75| 969| parse_base_offset(offset_size, remain, "offset_size is not 4 or 8")?;
^0
76| 969| let (remain, length) =
77| 969| parse_base_offset(length_size, remain, "length_size is not 4 or 8")?;
^0
78| |
79| 969| Ok((
80| 969| remain,
81| 969| ItemLocationExtent {
82| 969| index,
83| 969| offset,
84| 969| length,
85| 969| },
86| 969| ))
87| 969| })
88| 969| .parse(remain)?;
^0
89| |
90| 969| Ok((
91| 969| remain,
92| 969| ItemLocation {
93| 969| extents,
94| 969| id: item_id,
95| 969| construction_method,
96| 969| base_offset,
97| 969| data_ref_index,
98| 969| },
99| 969| ))
100| 969| })
101| 21| .parse(remain)?;
^0
102| |
103| | Ok((
104| 21| remain,
105| | IlocBox {
106| 21| header,
107| 21| offset_size,
108| 21| length_size,
109| 21| base_offset_size,
110| 21| index_size,
111| 969| items: items.into_iter().map(|x| (x.id, x)).collect(),
^21 ^21 ^21 ^21
112| | },
113| | ))
114| 21| }
115| |}
116| |
117| |impl IlocBox {
118| 21| pub fn item_offset_len(&self, id: u32) -> Option<(ConstructionMethod, u64, u64)> {
119| 21| self.items.get(&id).and_then(|item| {
120| 21| let extent = item.extents.first()?;
^0
121| 21| Some((
122| 21| item.construction_method,
123| 21| item.base_offset + extent.offset,
124| 21| extent.length,
125| 21| ))
126| 21| })
127| 21| }
128| |}
129| |
130| |#[derive(Debug, Clone, PartialEq, Eq)]
131| |struct ItemLocationExtent {
132| | index: u64,
133| | offset: u64,
134| | length: u64,
135| |}
136| |
137| 3.87k|fn parse_base_offset<'a>(size: u8, remain: &'a [u8], msg: &'static str) -> IResult<&'a [u8], u64> {
138| 3.87k| Ok(if size == 4 {
139| 1.94k| map_res(be_u32, |x| Ok::<u64, ()>(x as u64)).parse(remain)?
^0
140| 1.93k| } else if size == 8 {
141| 0| be_u64(remain)?
142| 1.93k| } else if size == 0 {
143| 1.93k| (remain, 0)
144| | } else {
145| 0| context(msg, fail()).parse(remain)?
146| | })
147| 3.87k|}
148| |
149| |#[derive(Debug, Clone, PartialEq, Eq)]
150| |pub struct ItemLocation {
151| | id: u32,
152| | construction_method: ConstructionMethod,
153| | data_ref_index: u16,
154| | base_offset: u64,
155| | extents: Vec<ItemLocationExtent>,
156| |}
157| |
158| |#[derive(Debug, Clone, Copy, PartialEq, Eq)]
159| |#[allow(clippy::enum_variant_names)]
160| |pub enum ConstructionMethod {
161| | FileOffset = 0,
162| | IdatOffset = 1,
163| | ItemOffset = 2,
164| |}
165| |
166| |impl From<u8> for ConstructionMethod {
167| 969| fn from(value: u8) -> Self {
168| 969| match value {
169| 950| 0 => Self::FileOffset,
170| 19| 1 => Self::IdatOffset,
171| 0| 2 => Self::ItemOffset,
172| 0| other => {
173| 0| tracing::warn!(
174| | other,
175| | "Unknown construction method, defaulting to FileOffset"
176| | );
177| 0| Self::FileOffset
178| | }
179| | }
180| 969| }
181| |}
/home/min/dev/nom-exif/src/bbox/ilst.rs:
1| |use nom::bytes::complete::{tag, take};
2| |use nom::combinator::{fail, map_res};
3| |use nom::error::context;
4| |use nom::multi::many0;
5| |use nom::number::complete::{
6| | be_f32, be_f64, be_i16, be_i24, be_i32, be_i64, be_u16, be_u24, be_u32, be_u64, u8,
7| |};
8| |use nom::Parser;
9| |
10| |use crate::error::{nom_err_to_malformed, MalformedKind};
11| |use crate::EntryValue;
12| |
13| |use super::BoxHeader;
14| |
15| |/// Represents an [item list atom][1].
16| |///
17| |/// ilst is not a fullbox, it doesn't have version & flags.
18| |///
19| |/// atom-path: moov/meta/ilst
20| |///
21| |/// [1]: https://developer.apple.com/documentation/quicktime-file-format/metadata_item_list_atom
22| |#[derive(Debug, Clone, PartialEq)]
23| |pub struct IlstBox {
24| | header: BoxHeader,
25| | pub items: Vec<IlstItem>,
26| |}
27| |
28| |impl IlstBox {
29| 45| pub fn parse_box(input: &[u8]) -> nom::IResult<&[u8], IlstBox> {
30| 45| let (remain, header) = BoxHeader::parse(input)?;
^0
31| 45| let (remain, items) = many0(IlstItem::parse).parse(remain)?;
^0
32| |
33| 45| Ok((remain, IlstBox { header, items }))
34| 45| }
35| |}
36| |
37| |#[derive(Debug, Clone, PartialEq)]
38| |pub struct IlstItem {
39| | size: u32,
40| | index: u32, // 1-based index (start from 1)
41| | data_len: u32, // including self size
42| |
43| | /// Type indicator, see [type
44| | /// indicator](https://developer.apple.com/documentation/quicktime-file-format/type_indicator)
45| | type_set: u8,
46| | type_code: u32, // 24-bits
47| |
48| | local: u32,
49| | pub value: EntryValue, // len: data_len - 16
50| |}
51| |
52| |impl IlstItem {
53| 282| fn parse<'a>(input: &'a [u8]) -> nom::IResult<&'a [u8], IlstItem> {
54| 237| let (remain, (size, index, data_len, _, type_set, type_code, local)) =
55| 282| (be_u32, be_u32, be_u32, tag("data"), u8, be_u24, be_u32).parse(input)?;
^45
56| |
57| 237| if size < 24 || data_len < 16 {
58| 0| context("invalid ilst item", fail::<_, (), _>()).parse(remain)?;
59| 237| }
60| |
61| | // assert_eq!(size - 24, data_len - 16);
62| 237| if size - 24 != data_len - 16 {
63| 0| context("invalid ilst item", fail::<_, (), _>()).parse(remain)?;
64| 237| }
65| |
66| 237| let (remain, value) = map_res(take(data_len - 16), |bs: &'a [u8]| {
67| 237| parse_value(type_code, bs)
68| 237| })
69| 237| .parse(remain)?;
^0
70| |
71| 237| Ok((
72| 237| remain,
73| 237| IlstItem {
74| 237| size,
75| 237| index,
76| 237| data_len,
77| 237| type_set,
78| 237| type_code,
79| 237| local,
80| 237| value,
81| 237| },
82| 237| ))
83| 282| }
84| |}
85| |
86| |/// Parse ilst item data to value, see [Well-known
87| |/// types](https://developer.apple.com/documentation/quicktime-file-format/well-known_types)
88| |#[tracing::instrument(skip(data))]
89| 237|fn parse_value(type_code: u32, data: &[u8]) -> crate::Result<EntryValue> {
90| | use EntryValue::*;
91| 237| let v = match type_code {
92| | 1 => {
93| 225| let s = String::from_utf8(data.to_vec()).map_err(|e| crate::Error::Malformed {
94| 0| kind: MalformedKind::IsoBmffBox,
95| 0| message: format!("ilst text item is not valid utf-8: {e}"),
96| 0| })?;
97| 225| Text(s)
98| | }
99| 4| 21 => match data.len() {
100| 0| 1 => data[0].into(),
101| 0| 2 => be_i16(data)
102| 0| .map_err(|e| nom_err_to_malformed(e, MalformedKind::IsoBmffBox))?
103| | .1
104| 0| .into(),
105| 0| 3 => be_i24(data)
106| 0| .map_err(|e| nom_err_to_malformed(e, MalformedKind::IsoBmffBox))?
107| | .1
108| 0| .into(),
109| 0| 4 => be_i32(data)
110| 0| .map_err(|e| nom_err_to_malformed(e, MalformedKind::IsoBmffBox))?
111| | .1
112| 0| .into(),
113| 4| 8 => be_i64(data)
114| 4| .map_err(|e| nom_err_to_malformed(e, MalformedKind::IsoBmffBox))?
^0 ^0 ^0 ^0
115| | .1
116| 4| .into(),
117| 0| data_len => {
118| 0| let data_type = "BE Signed Integer";
119| 0| tracing::warn!(data_type, data_len, "Invalid ilst item data.");
120| 0| let msg = format!(
121| | "Invalid ilst item data; \
122| | data type is {data_type} while data len is : {data_len}",
123| | );
124| 0| return Err(crate::Error::Malformed {
125| 0| kind: MalformedKind::IsoBmffBox,
126| 0| message: msg,
127| 0| });
128| | }
129| | },
130| 4| 22 => match data.len() {
131| 4| 1 => data[0].into(),
132| 0| 2 => be_u16(data)
133| 0| .map_err(|e| nom_err_to_malformed(e, MalformedKind::IsoBmffBox))?
134| | .1
135| 0| .into(),
136| 0| 3 => be_u24(data)
137| 0| .map_err(|e| nom_err_to_malformed(e, MalformedKind::IsoBmffBox))?
138| | .1
139| 0| .into(),
140| 0| 4 => be_u32(data)
141| 0| .map_err(|e| nom_err_to_malformed(e, MalformedKind::IsoBmffBox))?
142| | .1
143| 0| .into(),
144| 0| 8 => be_u64(data)
145| 0| .map_err(|e| nom_err_to_malformed(e, MalformedKind::IsoBmffBox))?
146| | .1
147| 0| .into(),
148| 0| data_len => {
149| 0| let data_type = "BE Unsigned Integer";
150| 0| tracing::warn!(data_type, data_len, "Invalid ilst item data.");
151| 0| let msg = format!(
152| | "Invalid ilst item data; \
153| | data type is {data_type} while data len is : {data_len}",
154| | );
155| 0| return Err(crate::Error::Malformed {
156| 0| kind: MalformedKind::IsoBmffBox,
157| 0| message: msg,
158| 0| });
159| | }
160| | },
161| 4| 23 => be_f32(data)
162| 4| .map_err(|e| nom_err_to_malformed(e, MalformedKind::IsoBmffBox))?
^0 ^0 ^0 ^0
163| | .1
164| 4| .into(),
165| 0| 24 => be_f64(data)
166| 0| .map_err(|e| nom_err_to_malformed(e, MalformedKind::IsoBmffBox))?
167| | .1
168| 0| .into(),
169| 0| data_type => {
170| 0| let msg = "Unsupported ilst item data type";
171| 0| tracing::warn!(data_type, "{}.", msg);
172| 0| return Err(crate::Error::Malformed {
173| 0| kind: MalformedKind::IsoBmffBox,
174| 0| message: format!("{}: {data_type}", msg),
175| 0| });
176| | }
177| | };
178| 237| Ok(v)
179| 237|}
180| |
181| |#[cfg(test)]
182| |mod tests {
183| | use crate::{bbox::travel_while, testkit::read_sample};
184| |
185| | use super::*;
186| | use test_case::test_case;
187| |
188| | #[test_case("meta.mov")]
189| 1| fn ilst_box(path: &str) {
190| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
191| |
192| 1| let buf = read_sample(path).unwrap();
193| 4| let (_, bbox) = travel_while(&buf, |b| b.box_type() != "moov").unwrap();
^1 ^1 ^1 ^1
194| 1| let bbox = bbox.unwrap();
195| 6| let (_, bbox) = travel_while(bbox.body_data(), |b| b.box_type() != "meta").unwrap();
^1 ^1 ^1 ^1
196| 1| let bbox = bbox.unwrap();
197| 3| let (_, bbox) = travel_while(bbox.body_data(), |b| b.box_type() != "ilst").unwrap();
^1 ^1 ^1 ^1
198| 1| let bbox = bbox.unwrap();
199| |
200| 1| let (rem, ilst) = IlstBox::parse_box(bbox.data).unwrap();
201| 1| tracing::info!(?ilst, "ilst");
202| 1| assert_eq!(rem, b"");
203| |
204| 1| assert_eq!(
205| 1| ilst.items
206| 1| .iter()
207| 5| .map(|x| format!("{x:?}"))
^1
208| 1| .collect::<Vec<_>>(),
209| |[
210| |"IlstItem { size: 29, index: 1, data_len: 21, type_set: 0, type_code: 1, local: 0, value: Text(\"Apple\") }",
211| |"IlstItem { size: 32, index: 2, data_len: 24, type_set: 0, type_code: 1, local: 0, value: Text(\"iPhone X\") }",
212| |"IlstItem { size: 30, index: 3, data_len: 22, type_set: 0, type_code: 1, local: 0, value: Text(\"12.1.2\") }",
213| |"IlstItem { size: 50, index: 4, data_len: 42, type_set: 0, type_code: 1, local: 0, value: Text(\"+27.1281+100.2508+000.000/\") }",
214| |"IlstItem { size: 49, index: 5, data_len: 41, type_set: 0, type_code: 1, local: 0, value: Text(\"2019-02-12T15:27:12+08:00\") }"
215| |],
216| | );
217| 1| }
218| |
219| | #[test_case("embedded-in-heic.mov")]
220| 1| fn heic_mov_ilst(path: &str) {
221| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
222| |
223| 1| let buf = read_sample(path).unwrap();
224| 3| let (_, moov) = travel_while(&buf, |b| b.box_type() != "moov").unwrap();
^1 ^1 ^1 ^1
225| 1| let moov = moov.unwrap();
226| 7| let (_, meta) = travel_while(moov.body_data(), |b| b.box_type() != "meta").unwrap();
^1 ^1 ^1 ^1
227| 1| let meta = meta.unwrap();
228| 3| let (_, ilst) = travel_while(meta.body_data(), |b| b.box_type() != "ilst").unwrap();
^1 ^1 ^1 ^1
229| 1| let ilst = ilst.unwrap();
230| |
231| 1| let (rem, ilst) = IlstBox::parse_box(ilst.data).unwrap();
232| 1| assert_eq!(rem.len(), 0);
233| |
234| 1| let mut s = ilst
235| 1| .items
236| 1| .iter()
237| 10| .map(|x| format!("{x:?}"))
^1
238| 1| .collect::<Vec<_>>()
239| 1| .join("\n");
240| 1| s.insert(0, '\n');
241| |
242| 1| assert_eq!(
243| | s,
244| |"
245| |IlstItem { size: 33, index: 1, data_len: 25, type_set: 0, type_code: 1, local: 0, value: Text(\"14.235563\") }
246| |IlstItem { size: 25, index: 2, data_len: 17, type_set: 0, type_code: 22, local: 0, value: U8(1) }
247| |IlstItem { size: 60, index: 3, data_len: 52, type_set: 0, type_code: 1, local: 0, value: Text(\"DA1A7EE8-0925-4C9F-9266-DDA3F0BB80F0\") }
248| |IlstItem { size: 28, index: 4, data_len: 20, type_set: 0, type_code: 23, local: 0, value: F32(0.93884003) }
249| |IlstItem { size: 32, index: 5, data_len: 24, type_set: 0, type_code: 21, local: 0, value: I64(4) }
250| |IlstItem { size: 50, index: 6, data_len: 42, type_set: 0, type_code: 1, local: 0, value: Text(\"+22.5797+113.9380+028.396/\") }
251| |IlstItem { size: 29, index: 7, data_len: 21, type_set: 0, type_code: 1, local: 0, value: Text(\"Apple\") }
252| |IlstItem { size: 37, index: 8, data_len: 29, type_set: 0, type_code: 1, local: 0, value: Text(\"iPhone 15 Pro\") }
253| |IlstItem { size: 28, index: 9, data_len: 20, type_set: 0, type_code: 1, local: 0, value: Text(\"17.1\") }
254| |IlstItem { size: 48, index: 10, data_len: 40, type_set: 0, type_code: 1, local: 0, value: Text(\"2023-11-02T19:58:34+0800\") }"
255| | );
256| 1| }
257| |}
/home/min/dev/nom-exif/src/bbox/keys.rs:
1| |use nom::bytes::complete::take;
2| |use nom::combinator::{flat_map, map_res};
3| |use nom::multi::many_m_n;
4| |use nom::number::complete::be_u32;
5| |use nom::Parser;
6| |
7| |use crate::bbox::{FullBoxHeader, ParseBody};
8| |
9| |/// Represents a [keys atom][1].
10| |///
11| |/// `keys` is a fullbox which contains version & flags.
12| |///
13| |/// atom-path: moov/meta/keys
14| |///
15| |/// [1]: https://developer.apple.com/documentation/quicktime-file-format/metadata_item_keys_atom
16| |#[derive(Debug, Clone, PartialEq, Eq)]
17| |pub struct KeysBox {
18| | header: FullBoxHeader,
19| | entry_count: u32,
20| | pub entries: Vec<KeyEntry>,
21| |}
22| |
23| |impl ParseBody<KeysBox> for KeysBox {
24| 45| fn parse_body(body: &[u8], header: FullBoxHeader) -> nom::IResult<&[u8], KeysBox> {
25| 45| let (remain, entry_count) = be_u32(body)?;
^0
26| 45| let (remain, entries) =
27| 45| many_m_n(entry_count as usize, entry_count as usize, KeyEntry::parse).parse(remain)?;
^0
28| |
29| 45| Ok((
30| 45| remain,
31| 45| KeysBox {
32| 45| header,
33| 45| entry_count,
34| 45| entries,
35| 45| },
36| 45| ))
37| 45| }
38| |}
39| |
40| |#[derive(Debug, Clone, PartialEq, Eq, Hash)]
41| |pub struct KeyEntry {
42| | size: u32,
43| | pub namespace: String, // 4 bytes
44| | pub key: String, // len: size - 8
45| |}
46| |
47| |impl KeyEntry {
48| 237| fn parse<'a>(input: &'a [u8]) -> nom::IResult<&'a [u8], KeyEntry> {
49| 237| let (remain, s) = map_res(
50| 237| flat_map(
51| 237| map_res(be_u32, |len| {
52| 237| len.checked_sub(4).ok_or("invalid KeyEntry header")
53| 237| }),
54| | take,
55| | ),
56| 237| |bs: &'a [u8]| String::from_utf8(bs.to_vec()),
57| | )
58| 237| .parse(input)?;
^0
59| |
60| 237| Ok((
61| 237| remain,
62| 237| KeyEntry {
63| 237| size: (s.len() + 4) as u32,
64| 237| namespace: s.chars().take(4).collect(),
65| 237| key: s.chars().skip(4).collect(),
66| 237| },
67| 237| ))
68| 237| }
69| |}
70| |
71| |#[cfg(test)]
72| |mod tests {
73| | use crate::{
74| | bbox::{travel_while, ParseBox},
75| | testkit::read_sample,
76| | };
77| |
78| | use super::*;
79| | use test_case::test_case;
80| |
81| | #[test_case("meta.mov", 4133, 0x01b9, 0xc9)]
82| 1| fn keys_box(path: &str, moov_size: u64, meta_size: u64, keys_size: u64) {
83| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
84| |
85| 1| let buf = read_sample(path).unwrap();
86| 4| let (_, moov) = travel_while(&buf, |b| b.box_type() != "moov").unwrap();
^1 ^1 ^1 ^1
87| 1| let moov = moov.unwrap();
88| 6| let (_, meta) = travel_while(moov.body_data(), |b| b.box_type() != "meta").unwrap();
^1 ^1 ^1 ^1
89| 1| let meta = meta.unwrap();
90| 2| let (_, keys) = travel_while(meta.body_data(), |b| b.box_type() != "keys").unwrap();
^1 ^1 ^1 ^1
91| 1| let keys = keys.unwrap();
92| |
93| 1| assert_eq!(moov.box_size(), moov_size);
94| 1| assert_eq!(meta.box_size(), meta_size);
95| 1| assert_eq!(keys.box_size(), keys_size);
96| |
97| 1| let (rem, keys) = KeysBox::parse_box(keys.data).unwrap();
98| 1| assert!(rem.is_empty());
99| |
100| 1| assert_eq!(
101| | keys.entries,
102| 1| vec![
103| 1| KeyEntry {
104| 1| size: 32,
105| 1| namespace: "mdta".to_owned(),
106| 1| key: "com.apple.quicktime.make".to_owned()
107| 1| },
108| 1| KeyEntry {
109| 1| size: 33,
110| 1| namespace: "mdta".to_owned(),
111| 1| key: "com.apple.quicktime.model".to_owned()
112| 1| },
113| 1| KeyEntry {
114| 1| size: 36,
115| 1| namespace: "mdta".to_owned(),
116| 1| key: "com.apple.quicktime.software".to_owned()
117| 1| },
118| 1| KeyEntry {
119| 1| size: 44,
120| 1| namespace: "mdta".to_owned(),
121| 1| key: "com.apple.quicktime.location.ISO6709".to_owned()
122| 1| },
123| 1| KeyEntry {
124| 1| size: 40,
125| 1| namespace: "mdta".to_owned(),
126| 1| key: "com.apple.quicktime.creationdate".to_owned()
127| 1| }
128| | ]
129| | );
130| 1| }
131| |
132| | #[test_case("embedded-in-heic.mov", 0x1790, 0x0372, 0x1ce)]
133| 1| fn heic_mov_keys(path: &str, moov_size: u64, meta_size: u64, keys_size: u64) {
134| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
135| |
136| 1| let buf = read_sample(path).unwrap();
137| 3| let (_, moov) = travel_while(&buf, |b| b.box_type() != "moov").unwrap();
^1 ^1 ^1 ^1
138| 1| let moov = moov.unwrap();
139| 7| let (_, meta) = travel_while(moov.body_data(), |b| b.box_type() != "meta").unwrap();
^1 ^1 ^1 ^1
140| 1| let meta = meta.unwrap();
141| 2| let (_, keys) = travel_while(meta.body_data(), |b| b.box_type() != "keys").unwrap();
^1 ^1 ^1 ^1
142| 1| let keys = keys.unwrap();
143| |
144| 1| assert_eq!(moov.box_size(), moov_size);
145| 1| assert_eq!(meta.box_size(), meta_size);
146| 1| assert_eq!(keys.box_size(), keys_size);
147| |
148| 1| let (rem, keys) = KeysBox::parse_box(keys.data).unwrap();
149| 1| assert!(rem.is_empty());
150| |
151| 1| let mut s = keys
152| 1| .entries
153| 1| .iter()
154| 10| .map(|x| format!("{x:?}"))
^1
155| 1| .collect::<Vec<_>>()
156| 1| .join("\n");
157| 1| s.insert(0, '\n');
158| |
159| 1| assert_eq!(
160| | s,
161| | r#"
162| |KeyEntry { size: 56, namespace: "mdta", key: "com.apple.quicktime.location.accuracy.horizontal" }
163| |KeyEntry { size: 43, namespace: "mdta", key: "com.apple.quicktime.live-photo.auto" }
164| |KeyEntry { size: 46, namespace: "mdta", key: "com.apple.quicktime.content.identifier" }
165| |KeyEntry { size: 53, namespace: "mdta", key: "com.apple.quicktime.live-photo.vitality-score" }
166| |KeyEntry { size: 63, namespace: "mdta", key: "com.apple.quicktime.live-photo.vitality-scoring-version" }
167| |KeyEntry { size: 44, namespace: "mdta", key: "com.apple.quicktime.location.ISO6709" }
168| |KeyEntry { size: 32, namespace: "mdta", key: "com.apple.quicktime.make" }
169| |KeyEntry { size: 33, namespace: "mdta", key: "com.apple.quicktime.model" }
170| |KeyEntry { size: 36, namespace: "mdta", key: "com.apple.quicktime.software" }
171| |KeyEntry { size: 40, namespace: "mdta", key: "com.apple.quicktime.creationdate" }"#,
172| | );
173| 1| }
174| |}
/home/min/dev/nom-exif/src/bbox/meta.rs:
1| |use std::{collections::HashMap, fmt::Debug, ops::Range};
2| |
3| |use nom::{combinator::fail, multi::many0, IResult, Needed, Parser};
4| |
5| |use crate::bbox::FullBoxHeader;
6| |
7| |use super::{
8| | iinf::IinfBox,
9| | iloc::{ConstructionMethod, IlocBox},
10| | BoxHolder, ParseBody, ParseBox,
11| |};
12| |
13| |/// Representing the `meta` box in a HEIF/HEIC file.
14| |#[derive(Clone, PartialEq, Eq)]
15| |pub struct MetaBox {
16| | header: FullBoxHeader,
17| | iinf: Option<IinfBox>,
18| | iloc: Option<IlocBox>,
19| |}
20| |
21| |impl Debug for MetaBox {
22| 0| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23| 0| f.debug_struct("MetaBox")
24| 0| .field("header", &self.header)
25| 0| .field(
26| 0| "iinf entries num",
27| 0| &self.iinf.as_ref().map(|x| x.entries.len()),
28| | )
29| 0| .field("iloc items num", &self.iloc.as_ref().map(|x| x.items.len()))
30| 0| .finish()
31| 0| }
32| |}
33| |
34| |impl ParseBody<MetaBox> for MetaBox {
35| 25| fn parse_body<'a>(remain: &'a [u8], header: FullBoxHeader) -> IResult<&'a [u8], MetaBox> {
36| 205| let (remain, boxes) = many0(|remain: &'a [u8]| {
^21 ^21 ^25
37| 205| if remain.is_empty() {
38| | // stop many0 parsing to prevent Incomplete error
39| 21| fail::<_, (), _>().parse(remain)?;
40| 184| }
41| 184| let (remain, bbox) = BoxHolder::parse(remain)?;
^180 ^180 ^4
42| 180| Ok((remain, bbox))
43| 205| })
44| 25| .parse(remain)?;
^4
45| |
46| 21| let boxes = boxes
47| 21| .into_iter()
48| 164| .map(|b| (b.header.box_type.to_owned(), b))
^21
49| 21| .collect::<HashMap<_, _>>();
50| |
51| | // parse iinf box
52| 21| let iinf = boxes
53| 21| .get("iinf")
54| 21| .map(|iinf| IinfBox::parse_box(iinf.data))
55| 21| .transpose()?
^0
56| 21| .map(|x| x.1);
57| |
58| | // parse iloc box
59| 21| let iloc = boxes
60| 21| .get("iloc")
61| 21| .map(|iloc| IlocBox::parse_box(iloc.data))
62| 21| .transpose()?
^0
63| 21| .map(|x| x.1);
64| |
65| 21| Ok((remain, MetaBox { header, iinf, iloc }))
66| 25| }
67| |}
68| |
69| |impl MetaBox {
70| | #[allow(dead_code)]
71| | #[tracing::instrument(skip_all)]
72| 1| pub fn exif_data<'a>(&self, input: &'a [u8]) -> IResult<&'a [u8], Option<&'a [u8]>> {
73| 1| self.iinf
74| 1| .as_ref()
75| 1| .and_then(|iinf| iinf.get_infe("Exif"))
76| 1| .and_then(|exif_infe| {
77| 1| self.iloc
78| 1| .as_ref()
79| 1| .and_then(|iloc| iloc.item_offset_len(exif_infe.id))
80| 1| })
81| 1| .map(|(construction_method, offset, length)| {
82| 1| let start = offset as usize;
83| 1| let end = (offset + length) as usize;
84| 1| match construction_method {
85| | ConstructionMethod::FileOffset => {
86| 1| if end > input.len() {
87| 0| Err(nom::Err::Incomplete(Needed::new(end - input.len())))
88| | } else {
89| 1| Ok((&input[end..], Some(&input[start..end]))) // Safe-slice
90| | }
91| | }
92| | ConstructionMethod::IdatOffset => {
93| 0| tracing::debug!("idat offset construction method is not supported yet");
94| 0| fail().parse(input)
95| | }
96| | ConstructionMethod::ItemOffset => {
97| 0| tracing::debug!("item offset construction method is not supported yet");
98| 0| fail().parse(input)
99| | }
100| | }
101| 1| })
102| 1| .unwrap_or(Ok((input, None)))
103| 1| }
104| |
105| | #[tracing::instrument(skip_all)]
106| 20| pub fn exif_data_offset(&self) -> Option<Range<usize>> {
107| 20| self.iinf
108| 20| .as_ref()
109| 20| .and_then(|iinf| iinf.get_infe("Exif"))
110| 20| .and_then(|exif_infe| {
111| 20| self.iloc
112| 20| .as_ref()
113| 20| .and_then(|iloc| iloc.item_offset_len(exif_infe.id))
114| 20| })
115| 20| .and_then(|(construction_method, offset, length)| {
116| 20| let start = offset as usize;
117| 20| let end = (offset + length) as usize;
118| 20| match construction_method {
119| 20| ConstructionMethod::FileOffset => Some(start..end),
120| | ConstructionMethod::IdatOffset => {
121| 0| tracing::debug!("idat offset construction method is not supported yet");
122| 0| None
123| | }
124| | ConstructionMethod::ItemOffset => {
125| 0| tracing::debug!("item offset construction method is not supported yet");
126| 0| None
127| | }
128| | }
129| 20| })
130| 20| }
131| |}
132| |
133| |#[cfg(test)]
134| |mod tests {
135| | use crate::{bbox::travel_while, testkit::read_sample};
136| |
137| | use super::*;
138| | use test_case::test_case;
139| |
140| | #[test_case("exif.heic", 2618)]
141| 1| fn meta(path: &str, meta_size: usize) {
142| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
143| |
144| 1| let buf = read_sample(path).unwrap();
145| 2| let (_, bbox) = travel_while(&buf, |bbox| {
^1 ^1 ^1
146| 2| tracing::info!(bbox.header.box_type, "Got");
147| 2| bbox.box_type() != "meta"
148| 2| })
149| 1| .unwrap();
150| 1| let bbox = bbox.unwrap();
151| |
152| 1| assert_eq!(bbox.data.len() as u64, bbox.box_size());
153| 1| let (remain, meta) = MetaBox::parse_box(bbox.data).unwrap();
154| 1| assert_eq!(remain, b"");
155| 1| assert_eq!(meta.header.box_type, "meta");
156| 1| assert_eq!(meta.exif_data(&buf).unwrap().1.unwrap().len(), meta_size);
157| 1| }
158| |}
/home/min/dev/nom-exif/src/bbox/mvhd.rs:
1| |use chrono::{DateTime, Duration, FixedOffset, Local, NaiveDate, NaiveDateTime, TimeZone, Utc};
2| |use nom::{bytes::complete::take, number::complete::be_u32, Parser};
3| |
4| |use super::{FullBoxHeader, ParseBody};
5| |
6| |/// Represents a [movie header atom][1].
7| |///
8| |/// mvhd is a fullbox which contains version & flags.
9| |///
10| |/// atom-path: moov/mvhd
11| |///
12| |/// [1]: https://developer.apple.com/documentation/quicktime-file-format/movie_header_atom
13| |#[derive(Debug, Clone, PartialEq, Eq)]
14| |pub struct MvhdBox {
15| | header: FullBoxHeader,
16| |
17| | /// seconds since midnight, January 1, 1904
18| | creation_time: u32,
19| |
20| | /// seconds since midnight, January 1, 1904
21| | modification_time: u32,
22| |
23| | /// The number of time units that pass per second in its time coordinate
24| | /// system.
25| | time_scale: u32,
26| |
27| | /// Indicates the duration of the movie in time scale units.
28| | ///
29| | /// # convert to seconds
30| | ///
31| | /// seconds = duration / time_scale
32| | duration: u32,
33| | // omit 76 bytes...
34| | next_track_id: u32,
35| |}
36| |
37| |impl MvhdBox {
38| 95| pub fn duration_ms(&self) -> u64 {
39| 95| ((self.duration as f64) / (self.time_scale as f64) * 1000_f64) as u64
40| 95| }
41| |
42| 97| fn creation_time_naive(&self) -> NaiveDateTime {
43| 97| NaiveDate::from_ymd_opt(1904, 1, 1)
44| 97| .unwrap()
45| 97| .and_hms_opt(0, 0, 0)
46| 97| .unwrap()
47| 97| + Duration::seconds(self.creation_time as i64)
48| 97| }
49| |
50| 95| pub fn creation_time(&self) -> DateTime<FixedOffset> {
51| 95| self.creation_time_utc().fixed_offset()
52| 95| }
53| |
54| | #[allow(dead_code)]
55| 0| pub fn creation_time_local(&self) -> DateTime<Local> {
56| 0| Local.from_utc_datetime(&self.creation_time_naive())
57| 0| }
58| |
59| 97| pub fn creation_time_utc(&self) -> DateTime<Utc> {
60| 97| self.creation_time_naive().and_utc()
61| 97| }
62| |}
63| |
64| |impl ParseBody<MvhdBox> for MvhdBox {
65| 95| fn parse_body(body: &[u8], header: FullBoxHeader) -> nom::IResult<&[u8], MvhdBox> {
66| 95| let (remain, (creation_time, modification_time, time_scale, duration, _, next_track_id)) =
67| 95| (be_u32, be_u32, be_u32, be_u32, take(76usize), be_u32).parse(body)?;
^0
68| |
69| 95| Ok((
70| 95| remain,
71| 95| MvhdBox {
72| 95| header,
73| 95| creation_time,
74| 95| modification_time,
75| 95| time_scale,
76| 95| duration,
77| 95| next_track_id,
78| 95| },
79| 95| ))
80| 95| }
81| |}
82| |
83| |#[cfg(test)]
84| |mod tests {
85| | use crate::{
86| | bbox::{travel_while, ParseBox},
87| | testkit::read_sample,
88| | };
89| |
90| | use super::*;
91| | use chrono::FixedOffset;
92| | use test_case::test_case;
93| |
94| | #[test_case(
95| | "meta.mov",
96| | "2024-02-02T08:09:57.000000Z",
97| | "2024-02-02T16:09:57+08:00",
98| | 500
99| | )]
100| | #[test_case(
101| | "meta.mp4",
102| | "2024-02-03T07:05:38.000000Z",
103| | "2024-02-03T15:05:38+08:00",
104| | 1063
105| | )]
106| 2| fn mvhd_box(path: &str, time_utc: &str, time_east8: &str, milliseconds: u64) {
107| 2| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
108| |
109| 2| let buf = read_sample(path).unwrap();
110| |
111| 7| let (_, bbox) = travel_while(&buf, |b| b.box_type() != "moov").unwrap();
^2 ^2 ^2 ^2
112| 2| let bbox = bbox.unwrap();
113| 2| let (_, bbox) = travel_while(bbox.body_data(), |b| b.box_type() != "mvhd").unwrap();
114| 2| let bbox = bbox.unwrap();
115| 2| let (_, mvhd) = MvhdBox::parse_box(bbox.data).unwrap();
116| |
117| 2| assert_eq!(mvhd.duration_ms(), milliseconds);
118| |
119| | // time is represented in seconds since midnight, January 1, 1904,
120| | // preferably using coordinated universal time (UTC).
121| 2| let created = mvhd.creation_time_utc();
122| 2| assert_eq!(created, mvhd.creation_time());
123| 2| assert_eq!(
124| 2| created.to_rfc3339_opts(chrono::SecondsFormat::Micros, true),
125| | time_utc
126| | );
127| 2| assert_eq!(
128| 2| created
129| 2| .with_timezone(&FixedOffset::east_opt(8 * 3600).unwrap())
130| 2| .to_rfc3339_opts(chrono::SecondsFormat::Secs, true),
131| | time_east8
132| | );
133| 2| }
134| |}
/home/min/dev/nom-exif/src/bbox/tkhd.rs:
1| |use nom::{
2| | bytes::complete::take,
3| | number::complete::{be_u16, be_u32, be_u64},
4| | Parser,
5| |};
6| |
7| |use crate::error::MalformedKind;
8| |
9| |use super::{find_box, travel_while, BoxHolder, FullBoxHeader, ParseBody, ParseBox};
10| |
11| |/// Represents a [movie header atom][1].
12| |///
13| |/// tkhd is a fullbox which contains version & flags.
14| |///
15| |/// atom-path: moov/trak/tkhd
16| |///
17| |/// [1]: https://developer.apple.com/documentation/quicktime-file-format/track_header_atom
18| |#[derive(Debug, Clone, PartialEq, Eq)]
19| |pub struct TkhdBox {
20| | header: FullBoxHeader,
21| |
22| | /// seconds since midnight, January 1, 1904
23| | creation_time: u32,
24| |
25| | /// seconds since midnight, January 1, 1904
26| | modification_time: u32,
27| |
28| | track_id: u32,
29| | // reserved: u32,
30| | duration: u32,
31| | // reserved2: u64,
32| | layer: u16,
33| | alt_group: u16,
34| | volume: u16,
35| | // reserved3: u16,
36| |
37| | // matrix: [u8; 36],
38| | pub width: u32,
39| | pub height: u32,
40| |}
41| |
42| |impl ParseBody<TkhdBox> for TkhdBox {
43| 93| fn parse_body(body: &[u8], header: FullBoxHeader) -> nom::IResult<&[u8], TkhdBox> {
44| | let (
45| 93| remain,
46| | (
47| 93| creation_time,
48| 93| modification_time,
49| 93| track_id,
50| | _,
51| 93| duration,
52| | _,
53| 93| layer,
54| 93| alt_group,
55| 93| volume,
56| | _,
57| | _,
58| 93| width,
59| | _,
60| 93| height,
61| | _,
62| | ),
63| 93| ) = (
64| 93| be_u32,
65| 93| be_u32,
66| 93| be_u32,
67| 93| be_u32,
68| 93| be_u32,
69| 93| be_u64,
70| 93| be_u16,
71| 93| be_u16,
72| 93| be_u16,
73| 93| be_u16,
74| 93| take(36usize),
75| 93| be_u16,
76| 93| be_u16,
77| 93| be_u16,
78| 93| be_u16,
79| 93| )
80| 93| .parse(body)?;
^0
81| |
82| 93| Ok((
83| 93| remain,
84| 93| TkhdBox {
85| 93| header,
86| 93| creation_time,
87| 93| modification_time,
88| 93| track_id,
89| 93| duration,
90| 93| layer,
91| 93| alt_group,
92| 93| volume,
93| 93| width: width as u32,
94| 93| height: height as u32,
95| 93| },
96| 93| ))
97| 93| }
98| |}
99| |
100| |/// Try to find a video track's tkhd in moov body. atom-path: "moov/trak/tkhd".
101| 96|pub fn parse_video_tkhd_in_moov(input: &[u8]) -> crate::Result<Option<TkhdBox>> {
102| 96| let Some(bbox) = find_video_track(input)? else {
^93 ^0
103| 3| return Ok(None);
104| | };
105| 93| let (_, Some(bbox)) = find_box(bbox.body_data(), "tkhd").map_err(|e| {
^0
106| 0| crate::error::nom_err_to_malformed(e, crate::error::MalformedKind::IsoBmffBox)
107| 0| })?
108| | else {
109| 0| return Ok(None);
110| | };
111| 93| let (_, tkhd) = TkhdBox::parse_box(bbox.data).map_err(|_| crate::Error::Malformed {
112| 0| kind: MalformedKind::IsoBmffBox,
113| 0| message: "parse tkhd failed".to_string(),
114| 0| })?;
115| 93| Ok(Some(tkhd))
116| 96|}
117| |
118| 96|fn find_video_track(input: &[u8]) -> crate::Result<Option<BoxHolder<'_>>> {
119| 200| let (_, bbox) = travel_while(input, |b| {
^96 ^96 ^96
120| | // find video track
121| 200| if b.box_type() != "trak" {
122| 99| true
123| | } else {
124| | // got a 'trak', to check if it's a 'vide' trak
125| |
126| 101| let found = find_box(b.body_data(), "mdia/hdlr");
127| 101| let Ok(bbox) = found else {
128| 0| return true;
129| | };
130| 101| let Some(hdlr) = bbox.1 else {
131| 0| return true;
132| | };
133| |
134| | // component subtype
135| 101| if hdlr.body_data().len() < 12 {
136| 3| return true;
137| 98| }
138| 98| let subtype = &hdlr.body_data()[8..12]; // Safe-slice
139| 98| if subtype == b"vide" {
140| | // found it!
141| 93| false
142| | } else {
143| 5| true
144| | }
145| | }
146| 200| })
147| 96| .map_err(|e| crate::Error::Malformed {
148| 0| kind: MalformedKind::IsoBmffBox,
149| 0| message: format!("find vide trak failed: {e:?}"),
150| 0| })?;
151| |
152| 96| Ok(bbox)
153| 96|}
154| |
155| |#[cfg(test)]
156| |mod tests {
157| | use crate::{bbox::travel_while, testkit::read_sample};
158| |
159| | use super::*;
160| | use test_case::test_case;
161| |
162| | #[test_case("meta.mov", 720, 1280)]
163| | #[test_case("meta.mp4", 1920, 1080)]
164| 2| fn tkhd_box(path: &str, width: u32, height: u32) {
165| 2| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
166| |
167| 2| let buf = read_sample(path).unwrap();
168| |
169| 7| let (_, bbox) = travel_while(&buf, |b| b.box_type() != "moov").unwrap();
^2 ^2 ^2 ^2
170| 2| let bbox = bbox.unwrap();
171| 2| let tkhd = parse_video_tkhd_in_moov(bbox.body_data()).unwrap().unwrap();
172| |
173| 2| assert_eq!(tkhd.width, width);
174| 2| assert_eq!(tkhd.height, height);
175| 2| }
176| |
177| | #[test_case("crash_moov-trak")]
178| 1| fn tkhd_crash(path: &str) {
179| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
180| |
181| 1| let buf = read_sample(path).unwrap();
182| |
183| 4| let (_, bbox) = travel_while(&buf, |b| b.box_type() != "moov").unwrap();
^1 ^1 ^1 ^1
184| 1| let bbox = bbox.unwrap();
185| 1| let _ = parse_video_tkhd_in_moov(bbox.body_data());
186| 1| }
187| |}
/home/min/dev/nom-exif/src/bbox/uuid.rs:
1| |use std::ops::Range;
2| |
3| |use nom::{IResult, Parser};
4| |
5| |use super::BoxHolder;
6| |use crate::exif::TiffHeader;
7| |
8| |/// Size of a UUID in bytes
9| |pub const UUID_SIZE: usize = 16;
10| |
11| |/// Canon CMT box types
12| |const CMT_BOX_TYPES: &[&str] = &["CMT1", "CMT2", "CMT3"];
13| |
14| |/// Canon's UUID for CR3 files: 85c0b687-820f-11e0-8111-f4ce462b6a48
15| |pub const CANON_UUID: [u8; 16] = [
16| | 0x85, 0xc0, 0xb6, 0x87, 0x82, 0x0f, 0x11, 0xe0, 0x81, 0x11, 0xf4, 0xce, 0x46, 0x2b, 0x6a, 0x48,
17| |];
18| |
19| |/// Represents Canon's UUID box containing CMT (Canon Metadata) boxes.
20| |///
21| |/// Canon CR3 files store EXIF metadata in a proprietary UUID box format.
22| |/// The UUID box contains three CMT (Canon Metadata) sub-boxes:
23| |/// - CMT1: Main EXIF IFD0 data (camera settings, basic metadata)
24| |/// - CMT2: ExifIFD data (detailed EXIF information)
25| |/// - CMT3: MakerNotes data (Canon-specific metadata)
26| |#[derive(Debug, Clone, PartialEq, Eq)]
27| |pub struct CanonUuidBox {
28| | /// CMT1 contains the main EXIF IFD0 data (primary metadata)
29| | cmt1_offset: Option<Range<usize>>,
30| | /// CMT2 contains the ExifIFD data (detailed EXIF information)
31| | cmt2_offset: Option<Range<usize>>,
32| | /// CMT3 contains the MakerNotes data (Canon-specific metadata)
33| | cmt3_offset: Option<Range<usize>>,
34| |}
35| |
36| |impl CanonUuidBox {
37| | /// Returns the offset range for the primary EXIF data (CMT1).
38| 9| pub fn exif_data_offset(&self) -> Option<&Range<usize>> {
39| | // For CR3, we primarily use CMT1 which contains the main EXIF IFD0 data
40| 9| self.cmt1_offset.as_ref()
41| 9| }
42| |
43| | /// Returns the offset range for the ExifIFD data (CMT2).
44| | #[allow(dead_code)] // API method for future use
45| 5| pub fn cmt2_data_offset(&self) -> Option<&Range<usize>> {
46| 5| self.cmt2_offset.as_ref()
47| 5| }
48| |
49| | /// Returns the offset range for the MakerNotes data (CMT3).
50| | #[allow(dead_code)] // API method for future use
51| 5| pub fn cmt3_data_offset(&self) -> Option<&Range<usize>> {
52| 5| self.cmt3_offset.as_ref()
53| 5| }
54| |
55| | /// Parses Canon's UUID box to extract CMT (Canon Metadata) box offsets.
56| 7| pub fn parse<'a>(uuid_data: &'a [u8], full_input: &'a [u8]) -> IResult<&'a [u8], CanonUuidBox> {
57| | // Validate input sizes
58| 7| if uuid_data.len() < UUID_SIZE {
59| 0| tracing::error!(
60| | "Canon UUID box data too small: {} bytes, expected at least {}",
61| 0| uuid_data.len(),
62| | UUID_SIZE
63| | );
64| 0| return nom::combinator::fail().parse(uuid_data);
65| 7| }
66| |
67| 7| if full_input.is_empty() {
68| 0| tracing::error!("Full input is empty for Canon UUID box parsing");
69| 0| return nom::combinator::fail().parse(uuid_data);
70| 7| }
71| |
72| | // Skip the UUID header
73| 7| let mut remain = &uuid_data[UUID_SIZE..];
74| 7| let mut cmt1_offset = None;
75| 7| let mut cmt2_offset = None;
76| 7| let mut cmt3_offset = None;
77| |
78| 7| tracing::debug!(
79| | "Parsing Canon UUID box with {} bytes of CMT data",
80| 0| remain.len()
81| | );
82| |
83| | // Parse CMT boxes within the Canon UUID box
84| 70| while !remain.is_empty() {
85| 63| let (new_remain, bbox) = match BoxHolder::parse(remain) {
86| 63| Ok(result) => result,
87| 0| Err(e) => {
88| 0| tracing::warn!(
89| | "Failed to parse CMT box, continuing with partial data: {:?}",
90| | e
91| | );
92| 0| break; // Stop parsing but return what we found so far
93| | }
94| | };
95| |
96| 63| let box_type = bbox.box_type();
97| 63| if CMT_BOX_TYPES.contains(&box_type) {
98| | // Calculate offset safely using slice bounds checking
99| 21| let data_start = bbox.data.as_ptr() as usize;
100| 21| let input_start = full_input.as_ptr() as usize;
101| |
102| | // Ensure the data pointer is within the input bounds
103| 21| if data_start < input_start || data_start >= input_start + full_input.len() {
104| 0| tracing::warn!("CMT box data pointer outside input bounds");
105| 0| remain = new_remain;
106| 0| continue;
107| 21| }
108| |
109| 21| let start_offset = data_start - input_start;
110| 21| let body_start = start_offset + bbox.header_size();
111| 21| let body_end = start_offset + bbox.data.len();
112| |
113| | // Validate offset ranges are within bounds
114| 21| if body_end > full_input.len() {
115| 0| tracing::warn!(
116| | "CMT box body extends beyond input bounds: {}..{} > {}",
117| | body_start,
118| | body_end,
119| 0| full_input.len()
120| | );
121| 0| remain = new_remain;
122| 0| continue;
123| 21| }
124| |
125| 21| let offset_range = body_start..body_end;
126| |
127| | // Validate CMT box data has minimum size and reasonable content
128| 21| let cmt_data = &full_input[offset_range.clone()];
129| 21| if !Self::validate_cmt_data(box_type, cmt_data) {
130| 0| tracing::warn!("CMT box {} failed validation, skipping", box_type);
131| 0| remain = new_remain;
132| 0| continue;
133| 21| }
134| |
135| 21| match box_type {
136| 21| "CMT1" => {
137| 7| cmt1_offset = Some(offset_range);
138| 7| tracing::debug!("Found CMT1 (IFD0) at offset {}..{}", body_start, body_end);
139| | }
140| 14| "CMT2" => {
141| 7| cmt2_offset = Some(offset_range);
142| 7| tracing::debug!(
143| | "Found CMT2 (ExifIFD) at offset {}..{}",
144| | body_start,
145| | body_end
146| | );
147| | }
148| 7| "CMT3" => {
149| 7| cmt3_offset = Some(offset_range);
150| 7| tracing::debug!(
151| | "Found CMT3 (MakerNotes) at offset {}..{}",
152| | body_start,
153| | body_end
154| | );
155| | }
156| 0| _ => unreachable!("box_type should be one of CMT1, CMT2, or CMT3"),
157| | }
158| | } else {
159| | // Skip unknown boxes within Canon UUID
160| 42| tracing::debug!("Skipping unknown box type: {}", box_type);
161| | }
162| |
163| 63| remain = new_remain;
164| | }
165| |
166| 7| Ok((
167| 7| remain,
168| 7| CanonUuidBox {
169| 7| cmt1_offset,
170| 7| cmt2_offset,
171| 7| cmt3_offset,
172| 7| },
173| 7| ))
174| 7| }
175| |
176| | /// Validates CMT box data for basic integrity.
177| 21| fn validate_cmt_data(box_type: &str, data: &[u8]) -> bool {
178| | // Minimum size check - CMT boxes should have at least 8 bytes
179| 21| if data.len() < 8 {
180| 0| tracing::warn!("CMT box {} too small: {} bytes", box_type, data.len());
181| 0| return false;
182| 21| }
183| |
184| 21| match box_type {
185| 21| "CMT1" => {
186| | // CMT1 should start with TIFF header - validate using TiffHeader::parse
187| 7| if TiffHeader::parse(data).is_ok() {
188| 7| tracing::debug!("CMT1 has valid TIFF header");
189| 7| true
190| | } else {
191| 0| tracing::warn!("CMT1 does not have valid TIFF header");
192| 0| false
193| | }
194| | }
195| 14| "CMT2" | "CMT3" => {
^7
196| | // CMT2 and CMT3 should also be TIFF format, but we're more lenient
197| | // since they might have different internal structures
198| 14| if data.len() >= 8 {
199| 14| tracing::debug!("CMT box {} has sufficient size", box_type);
200| 14| true
201| | } else {
202| 0| tracing::warn!("CMT box {} too small for valid data", box_type);
203| 0| false
204| | }
205| | }
206| | _ => {
207| 0| tracing::warn!("Unknown CMT box type: {}", box_type);
208| 0| false
209| | }
210| | }
211| 21| }
212| |}
/home/min/dev/nom-exif/src/cr3.rs:
1| |use std::ops::Range;
2| |
3| |use nom::IResult;
4| |
5| |use crate::{
6| | bbox::Cr3MoovBox,
7| | error::{
8| | nom_error_to_parsing_error_with_state, MalformedKind, ParsingError, ParsingErrorState,
9| | },
10| | exif::{check_exif_header2, TiffHeader},
11| | parser::ParsingState,
12| |};
13| |
14| 6|pub(crate) fn parse_moov_box(input: &[u8]) -> IResult<&[u8], Option<Cr3MoovBox>> {
15| 6| Cr3MoovBox::parse(input)
16| 6|}
17| |
18| |/// Result containing all CMT ranges for CR3 files.
19| |/// Each tuple contains (block_id, data_range).
20| |#[derive(Debug, Clone)]
21| |pub(crate) struct Cr3CmtRanges {
22| | /// All CMT ranges: (block_id, range)
23| | pub ranges: Vec<(&'static str, Range<usize>)>,
24| |}
25| |
26| |/// Extract all CMT data ranges from a CR3 file buffer.
27| |/// Returns the moov box and all CMT ranges if available.
28| 4|pub(crate) fn extract_all_cmt_ranges(
29| 4| buf: &[u8],
30| 4|) -> Result<Option<Cr3CmtRanges>, ParsingErrorState> {
31| 4| let (_, moov) = parse_moov_box(buf)
^3
32| 4| .map_err(|e| nom_error_to_parsing_error_with_state(e, MalformedKind::IsoBmffBox, None))?;
^1 ^1 ^1 ^1 ^1
33| |
34| 3| let Some(moov) = moov else {
35| 0| return Ok(None);
36| | };
37| |
38| 3| let ranges = moov.all_cmt_data_offsets();
39| 3| if ranges.is_empty() {
40| 0| return Err(ParsingErrorState::new(
41| 0| ParsingError::Failed {
42| 0| kind: MalformedKind::IsoBmffBox,
43| 0| message:
44| 0| "CR3 file contains no EXIF data: Canon UUID box found but no CMT offsets available"
45| 0| .into(),
46| 0| },
47| 0| None,
48| 0| ));
49| 3| }
50| |
51| | // Enforce the invariant that parse_moov_box's streaming::take guarantees:
52| | // when we reach here all CMT blocks are fully loaded. parse_cr3_exif_iter
53| | // (which slices each range out of the shared buffer upfront) relies on
54| | // this; better to surface a structured error here than to let Bytes::slice
55| | // panic later.
56| 9| for (block_id, range) in &ranges {
^3
57| 9| if range.end > buf.len() {
58| 0| tracing::error!(
59| | block_id,
60| | range_end = range.end,
61| 0| buf_len = buf.len(),
62| | "CMT range extends beyond loaded buffer (parse_moov_box invariant violated)"
63| | );
64| 0| return Err(ParsingErrorState::new(
65| 0| ParsingError::Failed {
66| 0| kind: MalformedKind::IsoBmffBox,
67| 0| message: format!(
68| 0| "CR3 CMT block {block_id} range {range:?} extends past loaded buffer ({} bytes)",
69| 0| buf.len()
70| 0| ),
71| 0| },
72| 0| None,
73| 0| ));
74| 9| }
75| | }
76| |
77| 3| Ok(Some(Cr3CmtRanges { ranges }))
78| 4|}
79| |
80| 3|pub(crate) fn extract_exif_data(
81| 3| state: Option<ParsingState>,
82| 3| buf: &[u8],
83| 3|) -> Result<(Option<&[u8]>, Option<ParsingState>), ParsingErrorState> {
84| 2| let (data, state) = match state {
^1
85| 1| Some(ParsingState::Cr3ExifSize(size)) => {
86| 1| let (_, data) = nom::bytes::streaming::take(size)(buf).map_err(|e| {
^0
87| 0| nom_error_to_parsing_error_with_state(e, MalformedKind::IsoBmffBox, state.clone())
88| 0| })?;
89| 1| (Some(data), state)
90| | }
91| | None => {
92| 2| let (_, moov) = parse_moov_box(buf).map_err(|e| {
^1 ^1
93| 1| nom_error_to_parsing_error_with_state(e, MalformedKind::IsoBmffBox, state)
94| 1| })?;
95| |
96| 1| if let Some(moov) = moov {
97| 1| if let Some(range) = moov.exif_data_offset() {
98| 1| if range.end > buf.len() {
99| 0| let state = ParsingState::Cr3ExifSize(range.len());
100| 0| let clear_and_skip = ParsingError::ClearAndSkip(range.start);
101| 0| return Err(ParsingErrorState::new(clear_and_skip, Some(state)));
102| | } else {
103| 1| (Some(&buf[range]), None)
104| | }
105| | } else {
106| 0| return Err(ParsingErrorState::new(
107| 0| ParsingError::Failed {
108| 0| kind: MalformedKind::IsoBmffBox,
109| 0| message:
110| 0| "CR3 file contains no EXIF data: Canon UUID box found but no CMT1 offset available"
111| 0| .into(),
112| 0| },
113| 0| None,
114| 0| ));
115| | }
116| | } else {
117| 0| (None, None)
118| | }
119| | }
120| 0| _ => unreachable!(),
121| | };
122| |
123| | // For CR3 files, the CMT1 data already contains TIFF header, so we don't need to check for EXIF header
124| 2| let data = data.and_then(|x| {
125| 2| if TiffHeader::parse(x).is_ok() {
126| 2| Some(x)
127| | } else {
128| | // Try to find TIFF header if not at the beginning
129| 0| check_exif_header2(x).map(|x| x.0).ok()
130| | }
131| 2| });
132| |
133| 2| Ok((data, state))
134| 3|}
135| |
136| |#[cfg(test)]
137| |mod tests {
138| | use super::*;
139| | use crate::bbox::Cr3MoovBox;
140| | use crate::testkit::*;
141| | use crate::{MediaParser, MediaSource};
142| | use std::io::Read;
143| | use test_case::test_case;
144| |
145| | #[test_case("canon-r6.cr3")]
146| 1| fn cr3_parse_with_media_parser(path: &str) {
147| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
148| |
149| 1| let mut parser = MediaParser::new();
150| 1| let ms = MediaSource::open(format!("testdata/{}", path)).unwrap();
151| 1| assert_eq!(ms.kind(), crate::MediaKind::Image);
152| |
153| 1| let iter: crate::ExifIter = parser.parse_exif(ms).unwrap();
154| 1| let exif: crate::Exif = iter.into();
155| |
156| 1| let mut expect = String::new();
157| 1| open_sample(&format!("{path}.sorted.txt"))
158| 1| .unwrap()
159| 1| .read_to_string(&mut expect)
160| 1| .unwrap();
161| |
162| 1| assert_eq!(sorted_exif_entries(&exif).join("\n"), expect.trim());
163| 1| }
164| |
165| | #[test_case("canon-r6.cr3")]
166| 1| fn cr3_moov_box_parsing(path: &str) {
167| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
168| |
169| 1| let buf = read_sample(path).unwrap();
170| 1| let (_, moov_box) = Cr3MoovBox::parse(&buf[..]).unwrap();
171| |
172| 1| assert!(moov_box.is_some(), "Moov box should be found");
173| 1| let moov_box = moov_box.unwrap();
174| |
175| 1| let canon_box = moov_box.uuid_canon_box().unwrap();
176| |
177| 1| assert!(
178| 1| canon_box.exif_data_offset().is_some(),
179| | "CMT1 box should be found"
180| | );
181| 1| assert!(
182| 1| canon_box.cmt2_data_offset().is_some(),
183| | "CMT2 box should be found"
184| | );
185| 1| assert!(
186| 1| canon_box.cmt3_data_offset().is_some(),
187| | "CMT3 box should be found"
188| | );
189| |
190| | // Verify the offsets are reasonable
191| 1| let cmt1 = canon_box.exif_data_offset().unwrap();
192| 1| assert!(cmt1.start < cmt1.end, "CMT1 offset range should be valid");
193| 1| assert!(
194| 1| cmt1.end <= buf.len(),
195| | "CMT1 offset should be within file bounds"
196| | );
197| 1| }
198| |
199| | #[test_case("canon-r6.cr3")]
200| 1| fn test_cmt_api_access(path: &str) {
201| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
202| |
203| 1| let buf = read_sample(path).unwrap();
204| 1| let (_, moov_box) = Cr3MoovBox::parse(&buf[..]).unwrap();
205| 1| let moov_box = moov_box.expect("Should have moov box");
206| |
207| | // Test CMT1 access (should be available)
208| 1| assert!(
209| 1| moov_box.exif_data_offset().is_some(),
210| | "Should have CMT1 data"
211| | );
212| 1| }
213| |
214| | #[test_case("canon-r6.cr3")]
215| 1| fn cr3_truncated_before_moov(path: &str) {
216| | // Truncate the file early — must produce an error, not a panic
217| | // (covers Incomplete paths in extract_exif_data, lines 73-92).
218| 1| let buf = read_sample(path).unwrap();
219| 1| let small = &buf[..64];
220| 1| let result = extract_exif_data(None, small);
221| 1| assert!(result.is_err());
222| 1| }
223| |
224| | #[test_case("canon-r6.cr3")]
225| 1| fn cr3_extract_exif_happy_path(path: &str) {
226| | // The full file should yield exif data — exercises lines 84-94.
227| 1| let buf = read_sample(path).unwrap();
228| 1| let (data, _) = extract_exif_data(None, &buf).unwrap();
229| 1| assert!(data.is_some());
230| 1| }
231| |
232| | #[test_case("canon-r6.cr3")]
233| 1| fn cr3_extract_all_cmt_ranges(path: &str) {
234| | // Drives extract_all_cmt_ranges (lines 29-71).
235| 1| let buf = read_sample(path).unwrap();
236| 1| let ranges = extract_all_cmt_ranges(&buf).unwrap();
237| 1| let ranges = ranges.expect("Canon CR3 must have CMT ranges");
238| 1| assert!(!ranges.ranges.is_empty());
239| 3| for (id, r) in &ranges.ranges {
^1
240| 3| assert!(*id == "CMT1" || *id == "CMT2" || *id == "CMT3");
^2 ^1
241| 3| assert!(r.end <= buf.len());
242| | }
243| 1| }
244| |
245| | #[test_case("canon-r6.cr3")]
246| 1| fn cr3_second_pass_with_state(path: &str) {
247| | // Drive the Some(Cr3ExifSize(size)) state branch (lines 78-82).
248| 1| let buf = read_sample(path).unwrap();
249| 1| let ranges = extract_all_cmt_ranges(&buf).unwrap().unwrap();
250| 1| let (_, cmt1) = ranges
251| 1| .ranges
252| 1| .iter()
253| 1| .find(|(id, _)| *id == "CMT1")
254| 1| .expect("Canon CR3 must have CMT1");
255| 1| let exif_bytes = &buf[cmt1.start..cmt1.end];
256| 1| let state = Some(ParsingState::Cr3ExifSize(exif_bytes.len()));
257| 1| let (data, _) = extract_exif_data(state, exif_bytes).unwrap();
258| | // CR3 CMT1 starts with TIFF header — should pass through.
259| 1| assert!(data.is_some());
260| 1| }
261| |}
/home/min/dev/nom-exif/src/ebml/element.rs:
1| |use std::{
2| | fmt::Debug,
3| | io::{BufRead, Cursor, Read},
4| |};
5| |
6| |use bytes::Buf;
7| |use thiserror::Error;
8| |
9| |use crate::ebml::vint::VInt;
10| |
11| |use super::vint::ParseVIntFailed;
12| |
13| |#[derive(Debug, Error)]
14| |pub enum ParseEBMLFailed {
15| | #[error("need more bytes: {0}")]
16| | Need(usize),
17| |
18| | #[error("not an EBML file")]
19| | NotEBMLFile,
20| |
21| | #[error("invalid EBML file: {0}")]
22| | InvalidEBMLFile(Box<dyn std::error::Error + Send + Sync>),
23| |}
24| |
25| |impl From<ParseEBMLFailed> for crate::Error {
26| 128| fn from(e: ParseEBMLFailed) -> Self {
27| 128| match e {
28| 2| ParseEBMLFailed::Need(_) => Self::UnexpectedEof {
29| 2| context: "ebml element",
30| 2| },
31| 116| ParseEBMLFailed::NotEBMLFile => Self::Malformed {
32| 116| kind: crate::error::MalformedKind::EbmlElement,
33| 116| message: e.to_string(),
34| 116| },
35| 10| ParseEBMLFailed::InvalidEBMLFile(e) => Self::Malformed {
36| 10| kind: crate::error::MalformedKind::EbmlElement,
37| 10| message: e.to_string(),
38| 10| },
39| | }
40| 128| }
41| |}
42| |
43| |impl From<ParseVIntFailed> for ParseEBMLFailed {
44| 13| fn from(value: ParseVIntFailed) -> Self {
45| 13| match value {
46| 10| ParseVIntFailed::InvalidVInt(e) => ParseEBMLFailed::InvalidEBMLFile(e.into()),
47| 3| ParseVIntFailed::Need(i) => ParseEBMLFailed::Need(i),
48| | }
49| 13| }
50| |}
51| |
52| |pub(crate) const INVALID_ELEMENT_ID: u8 = 0xFF;
53| |
54| |#[derive(Debug, Clone, Copy)]
55| |pub(crate) enum TopElementId {
56| | Ebml = 0x1A45DFA3,
57| | Segment = 0x18538067,
58| |}
59| |
60| |impl TopElementId {
61| 17| fn code(self) -> u32 {
62| 17| self as u32
63| 17| }
64| |}
65| |
66| |#[derive(Debug, Error)]
67| |#[error("unknown ebml ID: {0}")]
68| |pub struct UnknowEbmlIDError(pub u64);
69| |
70| |impl TryFrom<u64> for TopElementId {
71| | type Error = UnknowEbmlIDError;
72| 9| fn try_from(v: u64) -> Result<Self, Self::Error> {
73| 9| let id = match v {
^1
74| 9| x if x == TopElementId::Ebml.code() as u64 => TopElementId::Ebml,
^1 ^1
75| 8| x if x == TopElementId::Segment.code() as u64 => TopElementId::Segment,
^0 ^0
76| 8| o => return Err(UnknowEbmlIDError(o)),
77| | };
78| 1| Ok(id)
79| 9| }
80| |}
81| |
82| |#[allow(unused)]
83| |#[derive(Debug, Clone, Copy)]
84| |enum EBMLHeaderId {
85| | Version = 0x4286,
86| | ReadVersion = 0x42F7,
87| | MaxIdlength = 0x42F2,
88| | MaxSizeLength = 0x42F3,
89| | DocType = 0x4282,
90| | DocTypeVersion = 0x4287,
91| | DocTypeReadVersion = 0x4285,
92| | DocTypeExtension = 0x4281,
93| | DocTypeExtensionName = 0x4283,
94| | DocTypeExtensionVersion = 0x4284,
95| |}
96| |
97| |/// These extra elements apply only to the EBML Body, not the EBML Header.
98| |pub(crate) enum EBMLGlobalId {
99| | Crc32 = 0xBF,
100| | Void = 0xEC,
101| |}
102| |
103| |/// Refer to [EBML header
104| |/// elements](https://github.com/ietf-wg-cellar/ebml-specification/blob/master/specification.markdown#ebml-header-elements)
105| 211|pub(crate) fn parse_ebml_doc_type(cursor: &mut Cursor<&[u8]>) -> Result<String, ParseEBMLFailed> {
106| 211| let header = next_element_header(cursor)?;
^199 ^12
107| 199| tracing::debug!(ebml_header = ?header);
108| |
109| 199| if header.id != TopElementId::Ebml as u64 {
110| 117| return Err(ParseEBMLFailed::NotEBMLFile);
111| 82| }
112| |
113| 82| if cursor.remaining() < header.data_size {
114| 0| return Err(ParseEBMLFailed::Need(header.data_size - cursor.remaining()));
115| 82| }
116| |
117| 82| let pos = cursor.position() as usize;
118| | // consume all header data
119| 82| cursor.consume(header.data_size);
120| |
121| | // get doc type
122| 82| match parse_ebml_head_data(&cursor.get_ref()[pos..pos + header.data_size]) {
123| 82| Ok(x) => Ok(x),
124| | // Don't bubble Need error to caller here
125| 0| Err(ParseEBMLFailed::Need(_)) => Err(ParseEBMLFailed::NotEBMLFile),
126| 0| Err(e) => Err(e),
127| | }
128| 211|}
129| |
130| 82|fn parse_ebml_head_data(input: &[u8]) -> Result<String, ParseEBMLFailed> {
131| 82| let mut cur = Cursor::new(input);
132| 410| while cur.has_remaining() {
133| 410| let h = next_element_header(&mut cur)?;
^0
134| |
135| 410| if h.id == EBMLHeaderId::DocType as u64 {
136| 82| let s = get_cstr(&mut cur, h.data_size)
137| 82| .ok_or_else(|| ParseEBMLFailed::Need(h.data_size - cur.remaining()))?;
^0 ^0
138| 82| return Ok(s);
139| 328| }
140| 328| if cur.remaining() < h.data_size {
141| 0| return Err(ParseEBMLFailed::Need(h.data_size - cur.remaining()));
142| 328| }
143| 328| cur.consume(h.data_size);
144| | }
145| 0| Err(ParseEBMLFailed::NotEBMLFile)
146| 82|}
147| |
148| 44|pub(crate) fn find_element_by_id(
149| 44| cursor: &mut Cursor<&[u8]>,
150| 44| target_id: u64,
151| 44|) -> Result<ElementHeader, ParseEBMLFailed> {
152| 44| while cursor.has_remaining() {
153| 44| let header = next_element_header(cursor)?;
^0
154| 44| if header.id == target_id {
155| 44| return Ok(header);
156| 0| }
157| 0| if cursor.remaining() < header.data_size {
158| 0| return Err(ParseEBMLFailed::Need(header.data_size - cursor.remaining()));
159| 0| }
160| |
161| 0| cursor.consume(header.data_size);
162| | }
163| 0| Err(ParseEBMLFailed::Need(1))
164| 44|}
165| |
166| 77|pub(crate) fn travel_while<F>(
167| 77| cursor: &mut Cursor<&[u8]>,
168| 77| mut predict: F,
169| 77|) -> Result<ElementHeader, ParseEBMLFailed>
170| 77|where
171| 77| F: FnMut(&ElementHeader) -> bool,
172| |{
173| 125| while cursor.has_remaining() {
174| 125| let header = next_element_header(cursor)?;
^0
175| 125| if !predict(&header) {
176| 76| return Ok(header);
177| 49| }
178| 49| if cursor.remaining() < header.data_size {
179| 1| return Err(ParseEBMLFailed::Need(header.data_size - cursor.remaining()));
180| 48| }
181| |
182| 48| cursor.consume(header.data_size);
183| | }
184| 0| Err(ParseEBMLFailed::Need(1))
185| 77|}
186| |
187| |#[derive(Clone)]
188| |pub(crate) struct ElementHeader {
189| | pub id: u64,
190| | pub data_size: usize,
191| | pub header_size: usize,
192| |}
193| |
194| 1.65k|pub(crate) fn next_element_header(
195| 1.65k| cursor: &mut Cursor<&[u8]>,
196| 1.65k|) -> Result<ElementHeader, ParseEBMLFailed> {
197| 1.65k| let pos = cursor.position() as usize;
198| 1.65k| let id = VInt::as_u64_with_marker(cursor)?;
^1.64k ^9
199| 1.64k| let data_size = VInt::as_usize(cursor)?;
^1.63k ^4
200| 1.63k| let header_size = cursor.position() as usize - pos;
201| |
202| 1.63k| Ok(ElementHeader {
203| 1.63k| id,
204| 1.63k| data_size,
205| 1.63k| header_size,
206| 1.63k| })
207| 1.65k|}
208| |
209| 82|fn get_cstr(cursor: &mut Cursor<&[u8]>, size: usize) -> Option<String> {
210| 82| if cursor.remaining() < size {
211| 0| return None;
212| 82| }
213| 82| let it = Iterator::take(cursor.chunk().iter(), size);
214| 82| let s = it
215| 604| .take_while(|b| **b != 0)
^82
216| 604| .map(|b| (*b) as char)
^82
217| 82| .collect::<String>();
218| 82| cursor.consume(size);
219| 82| Some(s)
220| 82|}
221| |
222| 324|pub(crate) fn get_as_u64(cursor: &mut Cursor<&[u8]>, size: usize) -> Option<u64> {
223| 324| if cursor.remaining() < size {
224| 0| return None;
225| 324| }
226| |
227| 324| let n = match size {
228| 82| 1 => cursor.get_u8() as u64,
229| 123| 2 => cursor.get_u16() as u64,
230| | 3 => {
231| 81| let bytes = [0, cursor.get_u8(), cursor.get_u8(), cursor.get_u8()];
232| 81| u32::from_be_bytes(bytes) as u64
233| | }
234| 0| 4 => cursor.get_u32() as u64,
235| 38| 5..=8 => {
236| 38| let mut buf = [0u8; 8];
237| 38| cursor.read_exact(&mut buf[8 - size..]).ok()?;
^0
238| 38| u64::from_be_bytes(buf)
239| | }
240| 0| _ => return None,
241| | };
242| |
243| 324| Some(n)
244| 324|}
245| |
246| 42|pub(crate) fn get_as_f64(cursor: &mut Cursor<&[u8]>, size: usize) -> Option<f64> {
247| 42| if cursor.remaining() < size {
248| 0| return None;
249| 42| }
250| |
251| 42| let n = match size {
252| | 4 => {
253| 0| let buf = [0u8; 4];
254| 0| f32::from_be_bytes(buf) as f64
255| | }
256| 42| 5..=8 => {
257| 42| let mut buf = [0u8; 8];
258| 42| cursor.read_exact(&mut buf[8 - size..]).ok()?;
^0
259| 42| f64::from_be_bytes(buf)
260| | }
261| 0| _ => return None,
262| | };
263| |
264| 42| Some(n)
265| 42|}
/home/min/dev/nom-exif/src/ebml/vint.rs:
1| |use std::io::Cursor;
2| |
3| |use bytes::Buf;
4| |use thiserror::Error;
5| |
6| |#[derive(Debug)]
7| |pub(crate) struct VInt;
8| |
9| |#[derive(Debug, Error)]
10| |pub(crate) enum ParseVIntFailed {
11| | #[error("invalid VInt: {0}")]
12| | InvalidVInt(&'static str),
13| |
14| | #[error("need more bytes: {0}")]
15| | Need(usize),
16| |}
17| |
18| |impl VInt {
19| 2.37k| pub fn as_u64_with_marker(data: &mut Cursor<&[u8]>) -> Result<u64, ParseVIntFailed> {
20| 2.37k| let (remain, v) = Self::parse_unsigned(&data.get_ref()[data.position() as usize..], true)?;
^2.36k ^2.36k ^9
21| 2.36k| data.set_position(data.position() + (data.remaining() - remain.len()) as u64);
22| 2.36k| Ok(v)
23| 2.37k| }
24| |
25| 2.18k| pub fn as_usize(data: &mut Cursor<&[u8]>) -> Result<usize, ParseVIntFailed> {
26| 2.18k| let (remain, v) = Self::parse_unsigned(&data.get_ref()[data.position() as usize..], false)
^2.18k ^2.18k
27| 2.18k| .map(|(d, v)| (d, v as usize))?;
^2.18k^2.18k ^4
28| 2.18k| data.set_position(data.position() + (data.remaining() - remain.len()) as u64);
29| 2.18k| Ok(v)
30| 2.18k| }
31| |
32| 4.56k| pub(crate) fn parse_unsigned(
33| 4.56k| data: &[u8],
34| 4.56k| reserve_marker: bool,
35| 4.56k| ) -> Result<(&[u8], u64), ParseVIntFailed> {
36| 4.56k| if data.is_empty() {
37| 0| return Err(ParseVIntFailed::Need(1));
38| 4.56k| }
39| |
40| 4.56k| let n = data[0].leading_zeros() as usize + 1;
41| 4.56k| if n > data.len() {
42| 4| return Err(ParseVIntFailed::Need(n - data.len()));
43| 4.56k| }
44| 4.56k| if n > 8 {
45| 10| return Err(ParseVIntFailed::InvalidVInt("size > 8 is not supported"));
46| 4.55k| }
47| | // println!("n: {n}");
48| |
49| 4.55k| let mut octets = [0u8; 8];
50| 4.55k| let start = 8 - n;
51| 4.55k| octets[start..].copy_from_slice(&data[..n]);
52| |
53| | // remove the marker
54| 4.55k| if !reserve_marker {
55| 2.19k| if n == 8 {
56| 265| octets[0] = 0;
57| 1.92k| } else {
58| 1.92k| // println!("first byte: {:08b}", data[0]);
59| 1.92k| let first = data[0] & (0xFF >> n);
60| 1.92k| // println!("first byte: {:08b}", first);
61| 1.92k| octets[start] = first;
62| 1.92k| }
63| 2.36k| }
64| |
65| 4.55k| let v = u64::from_be_bytes(octets);
66| |
67| 4.55k| Ok((&data[n..], v))
68| 4.56k| }
69| |}
70| |
71| |#[cfg(test)]
72| |mod tests {
73| | use super::*;
74| | use test_case::test_case;
75| |
76| | #[test_case(&[0b1000_0010], Some((&[], 2)))]
77| | #[test_case(&[0b0100_0000, 0b0000_0010], Some((&[], 2)))]
78| | #[test_case(&[0b0010_0000, 0b0000_0000, 0b0000_0010], Some((&[], 2)))]
79| | #[test_case(&[0b0001_0000, 0b0000_0000, 0b0000_0000, 0b0000_0010], Some((&[], 2)))]
80| | #[test_case(&[0b0001_0000, 0b0000_0000, 0b1000_0000, 0b0000_0000, 0xFF], Some((&[0xFF], 0x8000)))]
81| | #[test_case(&[0b0000_0001, 0b1000_0000, 0b1000_0000, 0b0000_0001], None)]
82| | #[test_case(&[0b0000_0010, 0b1000_1000, 0b1000_1000, 0b0000_0000, 0, 0, 0x80, 0x08], Some((&[0x08], 0x0000_8888_0000_0080)))]
83| | #[test_case(&[0b0000_0001, 0b1000_1000, 0b1000_1000, 0b0000_0000, 0, 0, 0x80, 0x08], Some((&[], 0x0088_8800_0000_8008)))]
84| | #[test_case(&[0b0000_0001, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], Some((&[], 0x00ff_ffff_ffff_ffff)))]
85| 9| fn vint_parse_u(data: &[u8], expect: Option<(&[u8], u64)>) {
86| 9| let actual = VInt::parse_unsigned(data, false);
87| 9| if let Some(expect) = expect {
^8
88| 8| assert_eq!(actual.unwrap(), expect);
89| 1| } else {
90| 1| actual.unwrap_err();
91| 1| }
92| 9| }
93| |}
/home/min/dev/nom-exif/src/ebml/webm.rs:
1| |use std::{
2| | collections::HashMap,
3| | fmt::Debug,
4| | io::{BufRead, Cursor},
5| |};
6| |
7| |use bytes::Buf;
8| |use chrono::{DateTime, NaiveDate, Utc};
9| |use nom::{error::ErrorKind, multi::many_till, Parser};
10| |use thiserror::Error;
11| |
12| |use crate::{
13| | ebml::element::{
14| | find_element_by_id, get_as_f64, get_as_u64, next_element_header, parse_ebml_doc_type,
15| | EBMLGlobalId, TopElementId,
16| | },
17| | error::{MalformedKind, ParsingError},
18| | video::{TrackInfo, TrackInfoTag},
19| | EntryValue,
20| |};
21| |
22| |use super::{
23| | element::{
24| | travel_while, ElementHeader, ParseEBMLFailed, UnknowEbmlIDError, INVALID_ELEMENT_ID,
25| | },
26| | vint::{ParseVIntFailed, VInt},
27| |};
28| |
29| |#[derive(Debug, Clone, Default)]
30| |pub struct EbmlFileInfo {
31| | #[allow(unused)]
32| | doc_type: String,
33| | segment_info: SegmentInfo,
34| | tracks_info: TracksInfo,
35| |}
36| |
37| |impl From<EbmlFileInfo> for TrackInfo {
38| 34| fn from(value: EbmlFileInfo) -> Self {
39| 34| let mut info = TrackInfo::default();
40| 34| if let Some(date) = value.segment_info.date {
^31
41| 31| info.put(
42| 31| TrackInfoTag::CreateDate,
43| 31| EntryValue::DateTime(date.fixed_offset()),
44| 31| );
45| 31| }
^3
46| 34| info.put(
47| 34| TrackInfoTag::DurationMs,
48| 34| ((value.segment_info.duration / 1000.0 / 1000.0) as u64).into(),
49| | );
50| 34| info.put(TrackInfoTag::Width, value.tracks_info.width.into());
51| 34| info.put(TrackInfoTag::Height, value.tracks_info.height.into());
52| 34| info
53| 34| }
54| |}
55| |
56| |#[derive(Debug, Error)]
57| |pub enum ParseWebmFailed {
58| | #[error("need more bytes: {0}")]
59| | Need(usize),
60| |
61| | #[error("not an WEBM file")]
62| | NotWebmFile,
63| |
64| | #[error("invalid WEBM file: {0}")]
65| | InvalidWebmFile(Box<dyn std::error::Error>),
66| |
67| | #[error("invalid seek entry")]
68| | InvalidSeekEntry,
69| |}
70| |
71| |/// Parse EBML based files, e.g.: `.webm`, `.mkv`, etc.
72| |///
73| |/// Refer to:
74| |/// - [Matroska Elements](https://www.matroska.org/technical/elements.html)
75| |/// - [EBML Specification](https://github.com/ietf-wg-cellar/ebml-specification/blob/master/specification.markdown)
76| |#[tracing::instrument(skip_all)]
77| 46|pub(crate) fn parse_webm(input: &[u8]) -> Result<EbmlFileInfo, ParsingError> {
78| 45| let (doc_type, pos) = {
79| 46| let mut cursor = Cursor::new(input);
80| 46| let doc_type = parse_ebml_doc_type(&mut cursor)?;
^45 ^1
81| 45| (doc_type, cursor.position() as usize)
82| | };
83| |
84| 45| tracing::debug!(doc_type, pos);
85| |
86| 44| let pos = {
87| 45| let mut cursor = Cursor::new(&input[pos..]);
88| 45| let header = next_element_header(&mut cursor)?;
^0
89| 45| tracing::debug!(segment_header = ?header);
90| 45| if header.id != TopElementId::Segment as u64 {
91| 1| return Err(ParseWebmFailed::NotWebmFile.into());
92| 44| }
93| 44| pos + cursor.position() as usize
94| | };
95| |
96| 44| let mut file_info = EbmlFileInfo {
97| 44| doc_type,
98| 44| ..Default::default()
99| 44| };
100| |
101| 44| let mut info_set = false;
102| 44| let mut tracks_set = false;
103| |
104| 44| if let Ok(seeks) = parse_seeks(input, pos) {
^43
105| 43| let info_seek = seeks.get(&(SegmentId::Info as u32)).cloned();
106| 43| let tracks_seek = seeks.get(&(SegmentId::Tracks as u32)).cloned();
107| 43| if let Some(pos) = info_seek {
108| 43| let info = parse_segment_info(input, pos as usize)?;
^42 ^1
109| 42| tracing::debug!(?info);
110| 42| if let Some(info) = info {
111| 42| info_set = true;
112| 42| file_info.segment_info = info;
113| 42| }
^0
114| 0| }
115| 42| if let Some(pos) = tracks_seek {
116| 42| let tracks = parse_tracks_info(input, pos as usize)?;
^40 ^2
117| 40| tracing::debug!(?tracks);
118| 40| if let Some(info) = tracks {
^36
119| 36| tracks_set = true;
120| 36| file_info.tracks_info = info;
121| 36| }
^4
122| 0| }
123| 1| }
124| |
125| 41| if !info_set {
126| | // According to the specification, The first Info Element SHOULD occur
127| | // before the first Tracks Element
128| 0| let info: Option<SegmentInfo> = {
129| 1| let mut cursor = Cursor::new(&input[pos..]);
130| 1| let header = travel_while(&mut cursor, |h| h.id != SegmentId::Info as u64)?;
^0
131| 0| parse_segment_info(
132| 0| &input[pos + cursor.position() as usize - header.header_size..],
133| | 0,
134| | )
135| 0| }?;
136| 0| tracing::debug!(?info);
137| 0| if let Some(info) = info {
138| 0| file_info.segment_info = info;
139| 0| }
140| 40| }
141| |
142| 40| if !tracks_set {
143| 4| let track = {
144| 4| let mut cursor = Cursor::new(&input[pos..]);
145| 16| let header = travel_while(&mut cursor, |h| h.id != SegmentId::Tracks as u64)?;
^4 ^4 ^4 ^0
146| 4| parse_tracks_info(
147| 4| &input[pos + cursor.position() as usize - header.header_size..],
148| | 0,
149| 0| )?
150| | };
151| 4| tracing::debug!(?track);
152| 4| if let Some(info) = track {
^0
153| 0| file_info.tracks_info = info;
154| 4| }
155| 36| }
156| |
157| 40| Ok(file_info)
158| 46|}
159| |
160| |#[derive(Debug, Clone, Default)]
161| |struct TracksInfo {
162| | width: u32,
163| | height: u32,
164| |}
165| |
166| |#[tracing::instrument(skip(input))]
167| 46|fn parse_tracks_info(input: &[u8], pos: usize) -> Result<Option<TracksInfo>, ParseWebmFailed> {
168| 46| if pos >= input.len() {
169| 0| return Err(ParseWebmFailed::Need(pos - input.len() + 1));
170| 46| }
171| 46| let mut cursor = Cursor::new(&input[pos..]);
172| 46| let header = next_element_header(&mut cursor)?;
^0
173| 46| tracing::debug!(tracks_info_header = ?header);
174| |
175| 46| if cursor.remaining() < header.data_size {
176| 2| return Err(ParseWebmFailed::Need(header.data_size - cursor.remaining()));
177| 44| }
178| |
179| | const Z: &[u8] = &[];
180| 44| let start = pos + cursor.position() as usize;
181| 44| let data = &input[start..start + header.data_size];
182| |
183| 44| if let Ok((_, (_, track))) = many_till(
184| 32| |data| {
185| 32| let mut cursor = Cursor::new(data);
186| 32| let header = next_element_header(&mut cursor)?;
^0
187| 32| cursor.consume(std::cmp::min(cursor.remaining(), header.data_size));
188| 32| Ok((&data[cursor.position() as usize..], ()))
189| 32| },
190| 76| |data| {
191| 76| let mut cursor = Cursor::new(data);
192| 76| let header = next_element_header(&mut cursor)?;
^0
193| 76| tracing::debug!(tracks_sub_track_entry = ?header);
194| 76| if header.id != TracksId::TrackEntry as u64 {
195| 32| return Err(nom::Err::Error((Z, ErrorKind::Fail)));
196| 44| };
197| |
198| 44| if cursor.remaining() < header.data_size {
199| 0| return Err(nom::Err::Error((Z, ErrorKind::Fail)));
200| 44| }
201| |
202| 44| let track = parse_track(&cursor.chunk()[..header.data_size]).map(|x| {
203| 44| x.map(|x| TracksInfo {
204| 36| width: x.width,
205| 36| height: x.height,
206| 36| })
207| 44| })?;
^0
208| |
209| 44| Ok((Z, track))
210| 76| },
211| | )
212| 44| .parse(data)
213| | {
214| 44| Ok(track)
215| | } else {
216| 0| Ok(None)
217| | }
218| |
219| | // let mut cursor = Cursor::new(&cursor.chunk()[..header.data_size]);
220| | // let header = match travel_while(&mut cursor, |h| h.id != TracksId::VideoTrack as u64) {
221| | // Ok(x) => x,
222| | // // Don't bubble Need error to caller here
223| | // Err(ParseEBMLFailed::Need(_)) => return Ok(None),
224| | // Err(e) => return Err(e.into()),
225| | // };
226| | // tracing::debug!(?header, "video track");
227| |
228| | // if cursor.remaining() < header.data_size {
229| | // return Err(ParseWebmFailed::Need(header.data_size - cursor.remaining()));
230| | // }
231| |
232| | // match parse_track(&cursor.chunk()[..header.data_size]).map(|x| {
233| | // x.map(|x| TracksInfo {
234| | // width: x.width,
235| | // height: x.height,
236| | // })
237| | // }) {
238| | // Ok(x) => Ok(x),
239| | // // Don't bubble Need error to caller here
240| | // Err(ParseWebmFailed::Need(_)) => Ok(None),
241| | // Err(e) => Err(e),
242| | // }
243| 46|}
244| |
245| 44|fn parse_track(input: &[u8]) -> Result<Option<VideoTrackInfo>, ParseWebmFailed> {
246| 44| let mut cursor = Cursor::new(input);
247| |
248| 352| while cursor.has_remaining() {
249| 344| let header = next_element_header(&mut cursor)?;
^0
250| 344| tracing::debug!(?header, "track sub-element");
251| |
252| 344| let id = TryInto::<TracksId>::try_into(header.id);
253| 344| let pos = cursor.position() as usize;
254| 344| cursor.consume(header.data_size);
255| |
256| 344| let Ok(id) = id else {
^80
257| 264| continue;
258| | };
259| |
260| 80| if id == TracksId::VideoTrack {
261| 36| let end = pos + header.data_size;
262| 36| if end > input.len() {
263| 0| tracing::warn!(
264| | ?pos,
265| 0| end = pos + header.data_size,
266| 0| input_len = input.len(),
267| | "invalid track sub-element"
268| | );
269| 0| continue;
270| 36| }
271| | // Safe-slice
272| 36| return parse_video_track(&input[pos..pos + header.data_size]);
273| 44| }
274| | }
275| 8| Ok(None)
276| 44|}
277| |
278| 36|fn parse_video_track(input: &[u8]) -> Result<Option<VideoTrackInfo>, ParseWebmFailed> {
279| 36| let mut cursor = Cursor::new(input);
280| 36| let mut info = VideoTrackInfo::default();
281| |
282| 36| let header = travel_while(&mut cursor, |h| h.id != TracksId::PixelWidth as u64)?;
^0
283| 36| tracing::debug!(?header, "video track width element");
284| 36| if let Some(v) = get_as_u64(&mut cursor, header.data_size) {
285| 36| info.width = v as u32;
286| 36| }
^0
287| |
288| | // search from beginning
289| 36| cursor.set_position(0);
290| 72| let header = travel_while(&mut cursor, |h| h.id != TracksId::PixelHeight as u64)?;
^36 ^36 ^36 ^0
291| 36| tracing::debug!(?header, "video track height element");
292| 36| if let Some(v) = get_as_u64(&mut cursor, header.data_size) {
293| 36| info.height = v as u32;
294| 36| }
^0
295| |
296| 36| if info == VideoTrackInfo::default() {
297| 0| Ok(None)
298| | } else {
299| 36| Ok(Some(info))
300| | }
301| 36|}
302| |
303| |#[derive(Debug, Clone, Default, PartialEq, Eq)]
304| |struct VideoTrackInfo {
305| | width: u32,
306| | height: u32,
307| |}
308| |
309| |#[derive(Debug, Clone, Default)]
310| |struct SegmentInfo {
311| | // in nano seconds
312| | duration: f64,
313| | date: Option<DateTime<Utc>>,
314| |}
315| |
316| |#[tracing::instrument(skip(input))]
317| 43|fn parse_segment_info(input: &[u8], pos: usize) -> Result<Option<SegmentInfo>, ParsingError> {
318| 43| if pos >= input.len() {
319| 0| return Err(ParsingError::Need(pos - input.len() + 1));
320| 43| }
321| 43| let mut cursor = Cursor::new(&input[pos..]);
322| 43| let header = next_element_header(&mut cursor)?;
^42 ^1
323| 42| tracing::debug!(segment_info_header = ?header);
324| |
325| 42| if cursor.remaining() < header.data_size {
326| 0| return Err(ParsingError::Need(header.data_size - cursor.remaining()));
327| 42| }
328| |
329| 42| let mut cursor = Cursor::new(&cursor.chunk()[..header.data_size]);
330| 42| match parse_segment_info_body(&mut cursor) {
331| 42| Ok(x) => Ok(Some(x)),
332| | // Don't bubble Need error to caller here
333| 0| Err(ParsingError::Need(_)) => Ok(None),
334| 0| Err(e) => Err(e),
335| | }
336| 43|}
337| |
338| 42|fn parse_segment_info_body(cursor: &mut Cursor<&[u8]>) -> Result<SegmentInfo, ParsingError> {
339| | // timestamp in nanosecond = element value * TimestampScale
340| | // By default, one segment tick represents one millisecond
341| 42| let mut time_scale = 1_000_000;
342| 42| let mut info = SegmentInfo::default();
343| |
344| 316| while cursor.has_remaining() {
345| 274| let header = next_element_header(cursor)?;
^0
346| 274| let id = TryInto::<InfoId>::try_into(header.id);
347| 274| tracing::debug!(?header, "segment info sub-element");
348| |
349| 274| if let Ok(id) = id {
^122
350| 122| match id {
351| | InfoId::TimestampScale => {
352| 42| if let Some(v) = get_as_u64(cursor, header.data_size) {
353| 42| time_scale = v;
354| 42| }
^0
355| | }
356| | InfoId::Duration => {
357| 42| if let Some(v) = get_as_f64(cursor, header.data_size) {
358| 42| info.duration = v * time_scale as f64;
359| 42| }
^0
360| | }
361| | InfoId::Date => {
362| 38| if let Some(v) = get_as_u64(cursor, header.data_size) {
363| 38| // webm date is a 2001 based timestamp
364| 38| let dt = NaiveDate::from_ymd_opt(2001, 1, 1)
365| 38| .unwrap()
366| 38| .and_hms_opt(0, 0, 0)
367| 38| .unwrap()
368| 38| .and_utc();
369| 38| let diff = dt - DateTime::from_timestamp_nanos(0);
370| 38| info.date = Some(DateTime::from_timestamp_nanos(v as i64) + diff);
371| 38| }
^0
372| | }
373| | }
374| 152| } else {
375| 152| cursor.consume(header.data_size);
376| 152| }
377| | }
378| |
379| 42| Ok(info)
380| 42|}
381| |
382| 44|fn parse_seeks(input: &[u8], pos: usize) -> Result<HashMap<u32, u64>, ParsingError> {
383| 44| let mut cursor = Cursor::new(&input[pos..]);
384| | // find SeekHead element
385| 44| let header = find_element_by_id(&mut cursor, SegmentId::SeekHead as u64)?;
^0
386| 44| tracing::debug!(segment_header = ?header);
387| 44| if cursor.remaining() < header.data_size {
388| 1| return Err(ParsingError::Need(header.data_size - cursor.remaining()));
389| 43| }
390| |
391| 43| let header_pos = pos + cursor.position() as usize - header.header_size;
392| 43| let mut cur = Cursor::new(&cursor.chunk()[..header.data_size]);
393| 43| let mut seeks = parse_seek_head(&mut cur)?;
^0
394| 172| for (_, pos) in seeks.iter_mut() {
^43 ^43
395| 172| *pos += header_pos as u64;
396| 172| }
397| 43| Ok(seeks)
398| 44|}
399| |
400| |#[derive(Clone)]
401| |struct SeekEntry {
402| | seek_id: u32,
403| | seek_pos: u64,
404| |}
405| |
406| |impl Debug for SeekEntry {
407| 2| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
408| 2| let id = self.seek_id as u64;
409| 2| let s = TryInto::<TopElementId>::try_into(id)
410| 2| .map(|x| format!("{x:?}"))
^0
411| 2| .or_else(|_| TryInto::<SegmentId>::try_into(id).map(|x| format!("{x:?}")))
^1
412| 2| .unwrap_or_else(|_| format!("0x{:04x}", id));
^1
413| 2| f.debug_struct("SeekEntry")
414| 2| .field("seekId", &s)
415| 2| .field("seekPosition", &self.seek_pos.to_string())
416| 2| .finish()
417| 2| }
418| |}
419| |
420| |#[tracing::instrument(skip_all)]
421| 43|fn parse_seek_head(input: &mut Cursor<&[u8]>) -> Result<HashMap<u32, u64>, ParseWebmFailed> {
422| 43| let mut entries = HashMap::new();
423| 247| while input.has_remaining() {
424| 204| match parse_seek_entry(input) {
425| 172| Ok(Some(entry)) => {
426| 172| tracing::debug!(seek_entry=?entry);
427| 172| entries.insert(entry.seek_id, entry.seek_pos);
428| | }
429| 32| Ok(None) => {
430| 32| // tracing::debug!("Void or Crc32 Element");
431| 32| }
432| | Err(ParseWebmFailed::InvalidSeekEntry) => {
433| 0| tracing::debug!("ignore invalid seek entry");
434| | }
435| 0| Err(e) => return Err(e),
436| | };
437| | }
438| 43| Ok(entries)
439| 43|}
440| |
441| 204|fn parse_seek_entry(input: &mut Cursor<&[u8]>) -> Result<Option<SeekEntry>, ParseWebmFailed> {
442| | // 0xFF is an invalid ID
443| 204| let mut seek_id = INVALID_ELEMENT_ID as u32;
444| 204| let mut seek_pos = 0u64;
445| |
446| 204| let id = VInt::as_u64_with_marker(input)?;
^0
447| 204| let data_size = VInt::as_usize(input)?;
^0
448| 204| if input.remaining() < data_size {
449| 0| return Err(ParseWebmFailed::Need(data_size - input.remaining()));
450| 204| }
451| |
452| 204| if id != SeekHeadId::Seek as u64 {
453| 32| input.consume(data_size);
454| 32| if id == EBMLGlobalId::Crc32 as u64 || id == EBMLGlobalId::Void as u64 {
^0
455| 32| return Ok(None);
456| 0| }
457| 0| tracing::debug!(
458| 0| id = format!("0x{id:x}"),
459| | "invalid seek entry: id != 0x{:x}",
460| 0| SeekHeadId::Seek as u32
461| | );
462| 0| return Err(ParseWebmFailed::InvalidSeekEntry);
463| 172| }
464| |
465| 172| let pos = input.position() as usize;
466| 172| input.consume(data_size);
467| 172| let mut buf = Cursor::new(&input.get_ref()[pos..pos + data_size]);
468| |
469| 344| while buf.has_remaining() {
470| 344| let id = VInt::as_u64_with_marker(&mut buf)?;
^0
471| 344| let size = VInt::as_usize(&mut buf)?;
^0
472| |
473| 344| match id {
474| 344| x if x == SeekHeadId::SeekId as u64 => {
^172
475| 172| seek_id = VInt::as_u64_with_marker(&mut buf)? as u32;
^0
476| | }
477| 172| x if x == SeekHeadId::SeekPosition as u64 => {
478| | seek_pos =
479| 172| get_as_u64(&mut buf, size).ok_or_else(|| ParseWebmFailed::InvalidSeekEntry)?;
^0
480| | }
481| | _ => {
482| 0| tracing::debug!(id = format!("0x{id:x}"), "invalid seek entry");
483| 0| return Err(ParseWebmFailed::InvalidSeekEntry);
484| | }
485| | }
486| |
487| 344| if seek_id != INVALID_ELEMENT_ID as u32 && seek_pos != 0 {
488| 172| break;
489| 172| }
490| | }
491| |
492| 172| if seek_id == INVALID_ELEMENT_ID as u32 || seek_pos == 0 {
493| 0| return Err(ParseWebmFailed::InvalidSeekEntry);
494| 172| }
495| |
496| 172| Ok(Some(SeekEntry { seek_id, seek_pos }))
497| 204|}
498| |
499| |#[derive(Debug, Clone, Copy)]
500| |enum SegmentId {
501| | SeekHead = 0x114D9B74,
502| | Info = 0x1549A966,
503| | Tracks = 0x1654AE6B,
504| | Cluster = 0x1F43B675,
505| | Cues = 0x1C53BB6B,
506| |}
507| |
508| |#[derive(Debug, Clone, Copy)]
509| |enum InfoId {
510| | TimestampScale = 0x2AD7B1,
511| | Duration = 0x4489,
512| | Date = 0x4461,
513| |}
514| |
515| |#[derive(Debug, Clone, Copy, PartialEq, Eq)]
516| |enum TracksId {
517| | TrackEntry = 0xAE,
518| | TrackType = 0x83,
519| | VideoTrack = 0xE0,
520| | PixelWidth = 0xB0,
521| | PixelHeight = 0xBA,
522| |}
523| |
524| |impl TryFrom<u64> for TracksId {
525| | type Error = UnknowEbmlIDError;
526| 345| fn try_from(v: u64) -> Result<Self, Self::Error> {
527| 345| let id = match v {
^80
528| 345| x if x == Self::TrackEntry as u64 => Self::TrackEntry,
^0 ^0
529| 345| x if x == Self::TrackType as u64 => Self::TrackType,
^44 ^44
530| 301| x if x == Self::VideoTrack as u64 => Self::VideoTrack,
^36 ^36
531| 265| x if x == Self::PixelWidth as u64 => Self::PixelWidth,
^0 ^0
532| 265| x if x == Self::PixelHeight as u64 => Self::PixelHeight,
^0 ^0
533| 265| o => return Err(UnknowEbmlIDError(o)),
534| | };
535| 80| Ok(id)
536| 345| }
537| |}
538| |
539| |impl TryFrom<u64> for InfoId {
540| | type Error = UnknowEbmlIDError;
541| 275| fn try_from(v: u64) -> Result<Self, Self::Error> {
542| 275| let id = match v {
^122
543| 275| x if x == Self::TimestampScale as u64 => Self::TimestampScale,
^42 ^42
544| 233| x if x == Self::Duration as u64 => Self::Duration,
^42 ^42
545| 191| x if x == Self::Date as u64 => Self::Date,
^38 ^38
546| 153| o => return Err(UnknowEbmlIDError(o)),
547| | };
548| 122| Ok(id)
549| 275| }
550| |}
551| |
552| |#[derive(Debug, Clone, Copy)]
553| |enum SeekHeadId {
554| | Seek = 0x4DBB,
555| | SeekId = 0x53AB,
556| | SeekPosition = 0x53AC,
557| |}
558| |
559| |impl TryFrom<u64> for SegmentId {
560| | type Error = UnknowEbmlIDError;
561| 14| fn try_from(v: u64) -> Result<Self, Self::Error> {
562| 14| let id = match v {
^11
563| 14| x if x == Self::SeekHead as u64 => Self::SeekHead,
^2 ^2
564| 12| x if x == Self::Info as u64 => Self::Info,
^3 ^3
565| 9| x if x == Self::Tracks as u64 => Self::Tracks,
^2 ^2
566| 7| x if x == Self::Cluster as u64 => Self::Cluster,
^2 ^2
567| 5| x if x == Self::Cues as u64 => Self::Cues,
^2 ^2
568| 3| o => return Err(UnknowEbmlIDError(o)),
569| | };
570| 11| Ok(id)
571| 14| }
572| |}
573| |
574| |impl Debug for ElementHeader {
575| 7| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
576| 7| let s = TryInto::<TopElementId>::try_into(self.id)
577| 7| .map(|x| format!("{x:?}"))
^1
578| 7| .or_else(|_| TryInto::<SegmentId>::try_into(self.id).map(|x| format!("{x:?}")))
^6 ^6 ^6 ^5
579| 7| .or_else(|_| TryInto::<InfoId>::try_into(self.id).map(|x| format!("{x:?}")))
^1 ^1 ^1 ^0
580| 7| .or_else(|_| TryInto::<TracksId>::try_into(self.id).map(|x| format!("{x:?}")))
^1 ^1 ^1 ^0
581| 7| .unwrap_or_else(|_| format!("0x{:04x}", self.id));
^1
582| 7| f.debug_struct("ElementHeader")
583| 7| .field("id", &s)
584| 7| .field("data_size", &self.data_size.to_string())
585| 7| .finish()
586| 7| }
587| |}
588| |
589| |impl From<ParseEBMLFailed> for ParseWebmFailed {
590| 2| fn from(value: ParseEBMLFailed) -> Self {
591| 2| match value {
592| 1| ParseEBMLFailed::Need(i) => Self::Need(i),
593| 0| ParseEBMLFailed::NotEBMLFile => Self::NotWebmFile,
594| 1| ParseEBMLFailed::InvalidEBMLFile(e) => Self::InvalidWebmFile(e),
595| | }
596| 2| }
597| |}
598| |
599| |impl From<ParseEBMLFailed> for ParsingError {
600| 5| fn from(value: ParseEBMLFailed) -> Self {
601| 5| match value {
602| 3| ParseEBMLFailed::Need(i) => ParsingError::Need(i),
603| | ParseEBMLFailed::NotEBMLFile | ParseEBMLFailed::InvalidEBMLFile(_) => {
604| 2| ParsingError::Failed {
605| 2| kind: MalformedKind::EbmlElement,
606| 2| message: value.to_string(),
607| 2| }
608| | }
609| | }
610| 5| }
611| |}
612| |
613| |impl From<ParseVIntFailed> for ParseWebmFailed {
614| 2| fn from(value: ParseVIntFailed) -> Self {
615| 2| match value {
616| 1| ParseVIntFailed::InvalidVInt(e) => Self::InvalidWebmFile(e.into()),
617| 1| ParseVIntFailed::Need(i) => Self::Need(i),
618| | }
619| 2| }
620| |}
621| |
622| |impl From<ParseVIntFailed> for ParsingError {
623| 1| fn from(value: ParseVIntFailed) -> Self {
624| 1| match value {
625| 0| ParseVIntFailed::InvalidVInt(_) => Self::Failed {
626| 0| kind: MalformedKind::EbmlElement,
627| 0| message: value.to_string(),
628| 0| },
629| 1| ParseVIntFailed::Need(i) => Self::Need(i),
630| | }
631| 1| }
632| |}
633| |
634| |impl From<ParseWebmFailed> for ParsingError {
635| 8| fn from(value: ParseWebmFailed) -> Self {
636| 8| match value {
637| | ParseWebmFailed::NotWebmFile
638| | | ParseWebmFailed::InvalidWebmFile(_)
639| 5| | ParseWebmFailed::InvalidSeekEntry => Self::Failed {
640| 5| kind: MalformedKind::EbmlElement,
641| 5| message: value.to_string(),
642| 5| },
643| 3| ParseWebmFailed::Need(n) => Self::Need(n),
644| | }
645| 8| }
646| |}
647| |
648| |impl From<ParseEBMLFailed> for nom::Err<(&[u8], ErrorKind)> {
649| 0| fn from(value: ParseEBMLFailed) -> Self {
650| 0| match value {
651| | // Don't bubble Need error to caller, since we only use nom for
652| | // complete data here.
653| | ParseEBMLFailed::Need(_)
654| | | ParseEBMLFailed::NotEBMLFile
655| 0| | ParseEBMLFailed::InvalidEBMLFile(_) => nom::Err::Error((&[], ErrorKind::Fail)),
656| | }
657| 0| }
658| |}
659| |
660| |impl From<ParseWebmFailed> for nom::Err<(&[u8], ErrorKind)> {
661| 0| fn from(_: ParseWebmFailed) -> Self {
662| | // Don't bubble Need error to caller, since we only use nom for
663| | // complete data here.
664| 0| nom::Err::Error((&[], ErrorKind::Fail))
665| 0| }
666| |}
667| |
668| |#[cfg(test)]
669| |mod tests {
670| | use super::*;
671| | use crate::testkit::read_sample;
672| |
673| | #[test]
674| 1| fn webm_happy_path() {
675| | // Exercises parse_webm against full files for the three EBML containers
676| | // we ship fixtures for.
677| 3| for path in &["webm_480.webm", "mkv_640x360.mkv", "mka.mka"] {
^1
678| 3| let buf = read_sample(path).unwrap();
679| 3| let info = parse_webm(&buf).unwrap();
680| 3| // Just assert no panic; field values vary per file.
681| 3| let _ = format!("{:?}", info);
682| 3| }
683| 1| }
684| |
685| | #[test]
686| 1| fn webm_rejects_non_webm_input() {
687| | // JPEG lead bytes are rejected by parse_ebml_doc_type long before the
688| | // header-id check; this covers the early-error path out of parse_webm,
689| | // NOT the line-91 NotWebmFile branch (that's
690| | // webm_valid_ebml_header_but_no_segment).
691| 1| let buf = read_sample("exif.jpg").unwrap();
692| 1| let err = parse_webm(&buf[..256]).unwrap_err();
693| 1| assert!(
694| 0| matches!(
695| 1| err,
696| | ParsingError::Failed {
697| | kind: MalformedKind::EbmlElement,
698| | ..
699| | }
700| | ),
701| | "expected ParsingError::Failed(EbmlElement) from doc_type parse error, got {err:?}"
702| | );
703| 1| }
704| |
705| | #[test]
706| 1| fn webm_truncated_yields_need() {
707| | // Truncate after the EBML header but before Segment body — must produce
708| | // a Need error or similar (covers Need-error paths and truncation
709| | // handling in parse_tracks_info/parse_segment_info).
710| 1| let buf = read_sample("webm_480.webm").unwrap();
711| 4| for cut in &[64usize, 128, 256, 512] {
^1
712| 4| if *cut < buf.len() {
713| 4| // Either succeeds with partial info or errors — both fine.
714| 4| let _ = parse_webm(&buf[..*cut]);
715| 4| }
^0
716| | }
717| 1| }
718| |
719| | #[test]
720| 1| fn webm_truncated_at_tracks() {
721| | // Truncate inside the Tracks element specifically — chases the
722| | // cursor.remaining() < header.data_size branch (line 176).
723| 1| let buf = read_sample("mkv_640x360.mkv").unwrap();
724| 1| let n = buf.len();
725| 3| for cut in [n * 3 / 4, n * 7 / 8, n - 64] {
^1^1
726| 3| if cut > 64 && cut < n {
727| 3| let _ = parse_webm(&buf[..cut]);
728| 3| }
^0
729| | }
730| 1| }
731| |
732| | #[test]
733| 1| fn webm_valid_ebml_header_but_no_segment() {
734| | // Reuse the first 0x2B bytes of webm_480.webm (a complete EBML header
735| | // with DocType="webm") and append a non-Segment top-level element
736| | // (Void 0xEC). This drives parse_webm past parse_ebml_doc_type to the
737| | // header.id != Segment branch (the NotWebmFile early-out at line 91).
738| 1| let buf = read_sample("webm_480.webm").unwrap();
739| | // Header in fixture ends just before the Segment (0x18538067) at 0x2B.
740| 1| let mut synthetic = buf[..0x2B].to_vec();
741| | // Append a Void element (id=0xEC, data_size=0x80 -> empty).
742| 1| synthetic.extend_from_slice(&[0xEC, 0x80]);
743| 1| let err = parse_webm(&synthetic).unwrap_err();
744| 1| assert!(
745| 0| matches!(
746| 1| err,
747| | ParsingError::Failed {
748| | kind: MalformedKind::EbmlElement,
749| | ..
750| | }
751| | ),
752| | "expected ParsingError::Failed from NotWebmFile branch, got {err:?}"
753| | );
754| 1| }
755| |
756| | #[test]
757| 1| fn webm_exercise_debug_impls() {
758| | // Force ParseWebmFailed enum Debug/Display through their formatting,
759| | // plus the From<ParseEBMLFailed>/From<ParseVIntFailed>/From<...> impls
760| | // for ParsingError + nom::Err — these are otherwise dead in tests.
761| 3| for v in [
762| 1| ParseWebmFailed::Need(7),
763| 1| ParseWebmFailed::NotWebmFile,
764| 1| ParseWebmFailed::InvalidSeekEntry,
765| 3| ] {
766| 3| let _ = format!("{v}");
767| 3| let _ = format!("{v:?}");
768| 3| let _: ParsingError = v.into();
769| 3| }
770| | // Round-trip ParseVIntFailed and ParseEBMLFailed through their
771| | // From impls into ParseWebmFailed and ParsingError.
772| 1| let need_vint = ParseVIntFailed::Need(3);
773| 1| let _: ParseWebmFailed = need_vint.into();
774| 1| let need_vint2 = ParseVIntFailed::Need(3);
775| 1| let _: ParsingError = need_vint2.into();
776| |
777| 1| let need_ebml = ParseEBMLFailed::Need(5);
778| 1| let _: ParseWebmFailed = need_ebml.into();
779| 1| let need_ebml2 = ParseEBMLFailed::Need(5);
780| 1| let _: ParsingError = need_ebml2.into();
781| 1| let not_ebml: ParsingError = ParseEBMLFailed::NotEBMLFile.into();
782| 1| let _ = format!("{not_ebml:?}");
783| |
784| | // InvalidVInt and InvalidEBMLFile arms collapse into InvalidWebmFile.
785| 1| let bad_vint: ParseWebmFailed = ParseVIntFailed::InvalidVInt("test").into();
786| 1| let _: ParsingError = bad_vint.into();
787| 1| let bad_ebml: ParseWebmFailed =
788| 1| ParseEBMLFailed::InvalidEBMLFile(Box::new(std::io::Error::other("test"))).into();
789| 1| let _: ParsingError = bad_ebml.into();
790| |
791| | // SeekEntry Debug — both the known-id (SegmentId::Info) branch and
792| | // the unknown-id (falls through to hex) branch.
793| 1| let known = SeekEntry {
794| 1| seek_id: SegmentId::Info as u32,
795| 1| seek_pos: 42,
796| 1| };
797| 1| let _ = format!("{known:?}");
798| 1| let unknown = SeekEntry {
799| 1| seek_id: 0xDEAD_BEEF,
800| 1| seek_pos: 0,
801| 1| };
802| 1| let _ = format!("{unknown:?}");
803| |
804| | // ElementHeader Debug — exercise each TryInto fallback rung by
805| | // formatting headers whose ids resolve through Top/Segment/Info/Tracks
806| | // plus an unknown id that bottoms out at the hex format.
807| 7| for id in [
808| 1| TopElementId::Ebml as u64,
809| 1| SegmentId::SeekHead as u64,
810| 1| SegmentId::Info as u64,
811| 1| SegmentId::Tracks as u64,
812| 1| SegmentId::Cluster as u64,
813| 1| SegmentId::Cues as u64,
814| 1| 0x4242_4242u64, // unknown -> hex fallback
815| 7| ] {
816| 7| let h = ElementHeader {
817| 7| id,
818| 7| data_size: 4,
819| 7| header_size: 2,
820| 7| };
821| 7| let _ = format!("{h:?}");
822| 7| }
823| |
824| | // SegmentId::TryFrom for every variant + the error path.
825| 5| for v in [
826| 1| SegmentId::SeekHead as u64,
827| 1| SegmentId::Info as u64,
828| 1| SegmentId::Tracks as u64,
829| 1| SegmentId::Cluster as u64,
830| 1| SegmentId::Cues as u64,
831| 5| ] {
832| 5| let id: SegmentId = v.try_into().unwrap();
833| 5| let _ = format!("{id:?}");
834| 5| }
835| 1| assert!(TryInto::<SegmentId>::try_into(0u64).is_err());
836| 1| }
837| |}
/home/min/dev/nom-exif/src/error.rs:
1| |use std::fmt::{Debug, Display};
2| |use thiserror::Error;
3| |
4| |/// Top-level error returned by `read_exif`, `MediaParser::parse_*`,
5| |/// `MediaSource::open`, and any other public function that touches a file.
6| |///
7| |/// `#[non_exhaustive]` — downstream code MUST use a `_ =>` fallback in `match`
8| |/// to remain compatible with future variants.
9| |#[derive(Debug, Error)]
10| |#[non_exhaustive]
11| |pub enum Error {
12| | #[error("io error: {0}")]
13| | Io(#[from] std::io::Error),
14| |
15| | #[error("unsupported media format")]
16| | UnsupportedFormat,
17| |
18| | #[error("no exif data found in this file")]
19| | ExifNotFound,
20| |
21| | #[error("no track info found in this file")]
22| | TrackNotFound,
23| |
24| | /// Data was recognized as the target format but its inner structure is broken.
25| | #[error("malformed {kind}: {message}")]
26| | Malformed {
27| | kind: MalformedKind,
28| | message: String,
29| | },
30| |
31| | /// Parsing needed more bytes but the stream ended.
32| | #[error("unexpected end of input while parsing {context}")]
33| | UnexpectedEof { context: &'static str },
34| |}
35| |
36| |#[derive(Debug, Error)]
37| |pub(crate) enum ParsedError {
38| | #[error("no enough bytes")]
39| | NoEnoughBytes,
40| |
41| | #[error("io error: {0}")]
42| | IOError(std::io::Error),
43| |
44| | #[error("malformed {kind}: {message}")]
45| | Failed {
46| | kind: MalformedKind,
47| | message: String,
48| | },
49| |}
50| |
51| |/// Due to the fact that metadata in MOV files is typically located at the end
52| |/// of the file, conventional parsing methods would require reading a
53| |/// significant amount of unnecessary data during the parsing process. This
54| |/// would impact the performance of the parsing program and consume more memory.
55| |///
56| |/// To address this issue, we have defined an `Error::Skip` enumeration type to
57| |/// inform the caller that certain bytes in the parsing process are not required
58| |/// and can be skipped directly. The specific method of skipping can be
59| |/// determined by the caller based on the situation. For example:
60| |///
61| |/// - For files, you can quickly skip using a `Seek` operation.
62| |///
63| |/// - For network byte streams, you may need to skip these bytes through read
64| |/// operations, or preferably, by designing an appropriate network protocol for
65| |/// skipping.
66| |///
67| |/// # [`ParsingError::ClearAndSkip`]
68| |///
69| |/// Please note that when the caller receives an `Error::Skip(n)` error, it
70| |/// should be understood as follows:
71| |///
72| |/// - The parsing program has already consumed all available data and needs to
73| |/// skip n bytes further.
74| |///
75| |/// - After skipping n bytes, it should continue to read subsequent data to fill
76| |/// the buffer and use it as input for the parsing function.
77| |///
78| |/// - The next time the parsing function is called (usually within a loop), the
79| |/// previously consumed data (including the skipped bytes) should be ignored,
80| |/// and only the newly read data should be passed in.
81| |///
82| |/// # [`ParsingError::Need`]
83| |///
84| |/// Additionally, to simplify error handling, we have integrated
85| |/// `nom::Err::Incomplete` error into `Error::Need`. This allows us to use the
86| |/// same error type to notify the caller that we require more bytes to continue
87| |/// parsing.
88| |#[derive(Debug, Error)]
89| |pub(crate) enum ParsingError {
90| | #[error("need more bytes: {0}")]
91| | Need(usize),
92| |
93| | #[error("clear and skip bytes: {0:?}")]
94| | ClearAndSkip(usize),
95| |
96| | #[error("malformed {kind}: {message}")]
97| | Failed {
98| | kind: MalformedKind,
99| | message: String,
100| | },
101| |}
102| |
103| |#[derive(Debug, Error)]
104| |pub(crate) struct ParsingErrorState {
105| | pub err: ParsingError,
106| | pub state: Option<ParsingState>,
107| |}
108| |
109| |impl ParsingErrorState {
110| 167| pub fn new(err: ParsingError, state: Option<ParsingState>) -> Self {
111| 167| Self { err, state }
112| 167| }
113| |}
114| |
115| |impl Display for ParsingErrorState {
116| 0| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
117| 0| Display::fmt(
118| 0| &format!(
119| | "ParsingError(err: {}, state: {})",
120| | self.err,
121| 0| self.state
122| 0| .as_ref()
123| 0| .map(|x| x.to_string())
124| 0| .unwrap_or("None".to_string())
125| | ),
126| 0| f,
127| | )
128| 0| }
129| |}
130| |
131| |impl From<std::io::Error> for ParsedError {
132| 6| fn from(value: std::io::Error) -> Self {
133| 6| Self::IOError(value)
134| 6| }
135| |}
136| |
137| |impl From<ParsedError> for crate::Error {
138| 9| fn from(value: ParsedError) -> Self {
139| 9| match value {
140| 0| ParsedError::NoEnoughBytes => Self::UnexpectedEof {
141| 0| context: "media stream",
142| 0| },
143| 6| ParsedError::IOError(e) => Self::Io(e),
144| 3| ParsedError::Failed { kind, message } => Self::Malformed { kind, message },
145| | }
146| 9| }
147| |}
148| |
149| |use crate::parser::ParsingState;
150| |
151| |/// Convert a nom error into `crate::Error` with the supplied `kind`.
152| |/// Replaces the old blanket `From<nom::Err<...>> for crate::Error` impl,
153| |/// which hard-coded `MalformedKind::TiffHeader` for every caller
154| |/// regardless of context. Use this with `.map_err(|e| ...)` at sites
155| |/// that previously relied on `?` doing the implicit conversion.
156| 60|pub(crate) fn nom_err_to_malformed<T: Debug>(
157| 60| e: nom::Err<nom::error::Error<T>>,
158| 60| kind: MalformedKind,
159| 60|) -> crate::Error {
160| 60| let message = match e {
161| 26| nom::Err::Incomplete(_) => format!("{e}"),
162| 34| nom::Err::Error(e) | nom::Err::Failure(e) => e.code.description().to_string(),
^0
163| | };
164| 60| crate::Error::Malformed { kind, message }
165| 60|}
166| |
167| 59|pub(crate) fn nom_error_to_parsing_error_with_state(
168| 59| e: nom::Err<nom::error::Error<&[u8]>>,
169| 59| kind: MalformedKind,
170| 59| state: Option<ParsingState>,
171| 59|) -> ParsingErrorState {
172| 59| match e {
173| 59| nom::Err::Incomplete(needed) => match needed {
174| 0| nom::Needed::Unknown => ParsingErrorState::new(ParsingError::Need(1), state),
175| 59| nom::Needed::Size(n) => ParsingErrorState::new(ParsingError::Need(n.get()), state),
176| | },
177| 0| nom::Err::Failure(e) | nom::Err::Error(e) => ParsingErrorState::new(
178| 0| ParsingError::Failed {
179| 0| kind,
180| 0| message: e.code.description().to_string(),
181| 0| },
182| 0| state,
183| | ),
184| | }
185| 59|}
186| |
187| |/// Categorizes the *structural unit* that produced a `Error::Malformed`.
188| |///
189| |/// Variants describe the kind of bytes that failed to parse (a JPEG segment,
190| |/// a TIFF header, an IFD entry, an ISO BMFF box, an EBML element, a PNG
191| |/// chunk), not the outer file format. Format-specific context — e.g. "cr3:",
192| |/// "heif idat:" — is conveyed in the accompanying `message` string.
193| |///
194| |/// This intentionally avoids a parallel format-level taxonomy (`Heif`,
195| |/// `Cr3Container`, `Raf`, …): those families are all built on top of one of
196| |/// the structural units listed here, so adding a row per format would create
197| |/// non-orthogonal categories that overlap with the structural ones.
198| |#[derive(Debug, Clone, Copy, PartialEq, Eq)]
199| |#[non_exhaustive]
200| |pub enum MalformedKind {
201| | JpegSegment,
202| | TiffHeader,
203| | IfdEntry,
204| | IsoBmffBox,
205| | EbmlElement,
206| | PngChunk,
207| |}
208| |
209| |impl std::fmt::Display for MalformedKind {
210| 1| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
211| 1| let s = match self {
212| 1| Self::JpegSegment => "jpeg segment",
213| 0| Self::TiffHeader => "tiff header",
214| 0| Self::IfdEntry => "ifd entry",
215| 0| Self::IsoBmffBox => "iso-bmff box",
216| 0| Self::EbmlElement => "ebml element",
217| 0| Self::PngChunk => "png chunk",
218| | };
219| 1| f.write_str(s)
220| 1| }
221| |}
222| |
223| |/// Errors from conversions that are *orthogonal* to file parsing: parsing a tag
224| |/// name from a string, narrowing an `IRational` into a `URational`, building a
225| |/// `LatLng` from decimal degrees, parsing an ISO 6709 coordinate string.
226| |///
227| |/// Deliberately a peer type of `Error` — there is **no** `From<ConvertError>
228| |/// for Error`. Downstream code that needs to combine file-level errors and
229| |/// conversion errors should define its own wrapper enum (the standard
230| |/// `thiserror` `#[from]` pattern). See spec §3.2.
231| |#[derive(Debug, Clone, thiserror::Error)]
232| |#[non_exhaustive]
233| |pub enum ConvertError {
234| | #[error("unknown ExifTag name: {0}")]
235| | UnknownTagName(String),
236| |
237| | #[error("invalid ISO 6709 coordinate: {0}")]
238| | InvalidIso6709(String),
239| |
240| | #[error("rational has negative value")]
241| | NegativeRational,
242| |
243| | #[error("decimal degrees out of range or non-finite: {0}")]
244| | InvalidDecimalDegrees(f64),
245| |}
246| |
247| |/// Errors that occur while decoding a single IFD entry.
248| |///
249| |/// Constructed internally during EXIF parsing; surfaces to downstream code
250| |/// as the `Err` arm of [`crate::ExifIterEntry::result`],
251| |/// or — when converted via `From<EntryError> for Error` — as
252| |/// [`Error::Malformed`] with [`MalformedKind::IfdEntry`].
253| |#[derive(Debug, Clone, PartialEq, thiserror::Error)]
254| |#[non_exhaustive]
255| |pub enum EntryError {
256| | #[error("entry truncated: needed {needed} bytes, only {available} available")]
257| | Truncated { needed: usize, available: usize },
258| |
259| | #[error("invalid entry shape: format={format}, count={count}")]
260| | InvalidShape { format: u16, count: u32 },
261| |
262| | #[error("invalid value: {0}")]
263| | InvalidValue(&'static str),
264| |}
265| |
266| |impl From<EntryError> for Error {
267| 1| fn from(e: EntryError) -> Self {
268| 1| Error::Malformed {
269| 1| kind: MalformedKind::IfdEntry,
270| 1| message: e.to_string(),
271| 1| }
272| 1| }
273| |}
274| |
275| |#[cfg(test)]
276| |mod tests {
277| | use super::*;
278| |
279| | #[test]
280| 1| fn malformed_kind_is_copy_and_eq() {
281| 1| let a = MalformedKind::JpegSegment;
282| 1| let b = a;
283| 1| assert_eq!(a, b);
284| 1| }
285| |
286| | #[test]
287| 1| fn malformed_kind_covers_all_structural_units() {
288| 6| for k in [
289| 1| MalformedKind::JpegSegment,
290| 1| MalformedKind::TiffHeader,
291| 1| MalformedKind::IfdEntry,
292| 1| MalformedKind::IsoBmffBox,
293| 1| MalformedKind::EbmlElement,
294| 1| MalformedKind::PngChunk,
295| 6| ] {
296| 6| let _ = format!("{k:?}");
297| 6| }
298| 1| }
299| |
300| | #[test]
301| 1| fn parsed_error_failed_propagates_kind_to_top_level_error() {
302| | // Previously `ParsedError::Failed` was string-only and the
303| | // `From<ParsedError> for Error` impl always labelled the
304| | // resulting `Error::Malformed` as `IsoBmffBox`. That mislabel
305| | // is what `parse_image_metadata` on a streaming PNG used to
306| | // surface ("malformed iso-bmff box: PNG: bad signature").
307| | // Verify the conversion now preserves the structural unit.
308| 1| let pe = ParsedError::Failed {
309| 1| kind: MalformedKind::PngChunk,
310| 1| message: "PNG: bad signature".into(),
311| 1| };
312| 1| let top: Error = pe.into();
313| 1| match top {
314| 1| Error::Malformed { kind, message } => {
315| 1| assert_eq!(kind, MalformedKind::PngChunk);
316| 1| assert_eq!(message, "PNG: bad signature");
317| | }
318| 0| other => panic!("expected Malformed, got {other:?}"),
319| | }
320| 1| }
321| |
322| | #[test]
323| 1| fn convert_error_displays_each_variant() {
324| 1| let cases: &[(ConvertError, &str)] = &[
325| 1| (
326| 1| ConvertError::UnknownTagName("Foo".into()),
327| 1| "unknown ExifTag name: Foo",
328| 1| ),
329| 1| (
330| 1| ConvertError::InvalidIso6709("garbage".into()),
331| 1| "invalid ISO 6709 coordinate: garbage",
332| 1| ),
333| 1| (
334| 1| ConvertError::NegativeRational,
335| 1| "rational has negative value",
336| 1| ),
337| 1| (
338| 1| ConvertError::InvalidDecimalDegrees(f64::NAN),
339| 1| "decimal degrees out of range or non-finite: NaN",
340| 1| ),
341| 1| ];
342| 4| for (err, expected) in cases {
^1
343| 4| assert_eq!(err.to_string(), *expected);
344| | }
345| 1| }
346| |
347| | #[test]
348| 1| fn convert_error_does_not_convert_to_error() {
349| | // Compile-time intent: ConvertError must NOT be convertible into Error.
350| | // This is asserted documentally — there is no `impl From<ConvertError> for Error`.
351| | // We just verify both types compile here.
352| 1| let _ = ConvertError::NegativeRational;
353| 1| let _ = Error::UnsupportedFormat;
354| 1| }
355| |
356| | #[test]
357| 1| fn error_io_from_io_error() {
358| 1| let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "x");
359| 1| let err: Error = io_err.into();
360| 1| assert!(matches!(err, Error::Io(_)));
^0
361| 1| }
362| |
363| | #[test]
364| 1| fn error_unsupported_format_displays() {
365| 1| assert_eq!(
366| 1| Error::UnsupportedFormat.to_string(),
367| | "unsupported media format"
368| | );
369| 1| }
370| |
371| | #[test]
372| 1| fn error_exif_not_found_displays() {
373| 1| assert_eq!(
374| 1| Error::ExifNotFound.to_string(),
375| | "no exif data found in this file"
376| | );
377| 1| }
378| |
379| | #[test]
380| 1| fn error_track_not_found_displays() {
381| 1| assert_eq!(
382| 1| Error::TrackNotFound.to_string(),
383| | "no track info found in this file"
384| | );
385| 1| }
386| |
387| | #[test]
388| 1| fn error_malformed_displays() {
389| 1| let e = Error::Malformed {
390| 1| kind: MalformedKind::JpegSegment,
391| 1| message: "bad SOI".into(),
392| 1| };
393| 1| assert_eq!(e.to_string(), "malformed jpeg segment: bad SOI");
394| 1| }
395| |
396| | #[test]
397| 1| fn error_unexpected_eof_displays() {
398| 1| let e = Error::UnexpectedEof {
399| 1| context: "tiff header",
400| 1| };
401| 1| assert_eq!(
402| 1| e.to_string(),
403| | "unexpected end of input while parsing tiff header"
404| | );
405| 1| }
406| |
407| | #[test]
408| 1| fn entry_error_truncated_displays() {
409| 1| let e = EntryError::Truncated {
410| 1| needed: 8,
411| 1| available: 4,
412| 1| };
413| 1| assert_eq!(
414| 1| e.to_string(),
415| | "entry truncated: needed 8 bytes, only 4 available"
416| | );
417| 1| }
418| |
419| | #[test]
420| 1| fn entry_error_invalid_shape_displays() {
421| 1| let e = EntryError::InvalidShape {
422| 1| format: 7,
423| 1| count: 1,
424| 1| };
425| 1| assert_eq!(e.to_string(), "invalid entry shape: format=7, count=1");
426| 1| }
427| |
428| | #[test]
429| 1| fn entry_error_invalid_value_displays() {
430| 1| let e = EntryError::InvalidValue("not utf-8");
431| 1| assert_eq!(e.to_string(), "invalid value: not utf-8");
432| 1| }
433| |
434| | #[test]
435| 1| fn entry_error_into_error_routes_to_malformed_ifd_entry() {
436| 1| let e = EntryError::Truncated {
437| 1| needed: 8,
438| 1| available: 4,
439| 1| };
440| 1| let err: Error = e.into();
441| 1| match err {
442| 1| Error::Malformed { kind, message } => {
443| 1| assert_eq!(kind, MalformedKind::IfdEntry);
444| 1| assert!(message.contains("entry truncated"));
445| | }
446| 0| other => panic!("unexpected variant: {other:?}"),
447| | }
448| 1| }
449| |}
/home/min/dev/nom-exif/src/exif.rs:
1| |use crate::error::{
2| | nom_error_to_parsing_error_with_state, MalformedKind, ParsingError, ParsingErrorState,
3| |};
4| |use crate::file::MediaMimeImage;
5| |use crate::parser::{BufParser, ParsingState, ShareBuf};
6| |use crate::raf::RafInfo;
7| |use crate::slice::SubsliceRange;
8| |use crate::{cr3, heif, jpeg, MediaParser};
9| |pub use exif_exif::Exif;
10| |use exif_exif::TIFF_HEADER_LEN;
11| |use exif_iter::input_into_iter;
12| |pub use exif_iter::{ExifEntry, ExifIter, ExifIterEntry, IfdIndex};
13| |pub use gps::{GPSInfo, LatLng};
14| |pub use tags::{ExifTag, TagOrCode};
15| |
16| |use std::io::Read;
17| |use std::ops::Range;
18| |
19| |pub(crate) mod ifd;
20| |pub(crate) use exif_exif::{check_exif_header, check_exif_header2, TiffHeader};
21| |pub(crate) use travel::IfdHeaderTravel;
22| |
23| |mod exif_exif;
24| |mod exif_iter;
25| |pub mod gps;
26| |pub mod png_text;
27| |mod tags;
28| |mod travel;
29| |
30| |#[tracing::instrument(skip(reader, skip_by_seek))]
31| 79|pub(crate) fn parse_exif_iter<R: Read>(
32| 79| parser: &mut MediaParser,
33| 79| mime_img: MediaMimeImage,
34| 79| reader: &mut R,
35| 79| skip_by_seek: crate::parser::SkipBySeekFn<R>,
36| 79|) -> Result<ExifIter, crate::Error> {
37| | // For CR3 files, we need special handling to get all CMT blocks
38| 79| if mime_img == MediaMimeImage::Cr3 {
39| 1| return parse_cr3_exif_iter(parser, reader, skip_by_seek);
40| 78| }
41| | // PNG: special-cased path peer to CR3.
42| 78| if mime_img == MediaMimeImage::Png {
43| 7| return parse_png_exif_iter(parser, reader, skip_by_seek);
44| 71| }
45| |
46| 117| let out = parser.load_and_parse(reader, skip_by_seek, |buf, state| {
^67 ^71 ^71 ^71 ^71
47| 117| extract_exif_range(mime_img, buf, state)
48| 117| })?;
^4
49| |
50| 67| let has_track = match mime_img {
51| 55| MediaMimeImage::Jpeg => detect_motion_photo(parser, reader),
52| 12| _ => false,
53| | };
54| |
55| 67| range_to_iter(parser, out, has_track)
56| 79|}
57| |
58| |/// Demand-driven scan for a Pixel/Google Motion Photo signal in a JPEG
59| |/// buffer that may not yet hold all APP segments.
60| |///
61| |/// `load_and_parse` only fills enough bytes to read the EXIF segment;
62| |/// for JPEGs with a large EXIF (Pixel/Galaxy thumbnails routinely push
63| |/// it past 30 KB) the XMP segment that carries `GCamera:MotionPhoto`
64| |/// can sit just past the buffer's edge. Try the scan first; if it
65| |/// reports `NeedMoreBytes`, pull another small chunk and retry. Cap
66| |/// the total extra reads at `MAX_EXTRA` so a malformed file can't loop
67| |/// forever.
68| 55|fn detect_motion_photo<R: Read>(parser: &mut MediaParser, reader: &mut R) -> bool {
69| | use crate::parser::{Buf, BufParser};
70| | const CHUNK: usize = 8 * 1024;
71| | const MAX_EXTRA: usize = 256 * 1024;
72| 55| let mut extra = 0;
73| | loop {
74| 1.17k| match jpeg::scan_motion_photo(parser.buffer()) {
75| 4| jpeg::MotionPhotoScan::Found(_) => return true,
76| 13| jpeg::MotionPhotoScan::NotPresent => return false,
77| | jpeg::MotionPhotoScan::NeedMoreBytes => {
78| 1.15k| if extra >= MAX_EXTRA {
79| 35| return false;
80| 1.12k| }
81| 1.12k| let want = CHUNK.min(MAX_EXTRA - extra);
82| 1.12k| if parser.fill_buf(reader, want).is_err() {
83| 3| return false;
84| 1.12k| }
85| 1.12k| extra += want;
86| | }
87| | }
88| | }
89| 55|}
90| |
91| |/// Special parser for CR3 files that extracts all CMT blocks (CMT1, CMT2, CMT3)
92| |/// and adds them as additional TIFF blocks to the ExifIter.
93| |#[tracing::instrument(skip(reader, skip_by_seek))]
94| 1|fn parse_cr3_exif_iter<R: Read>(
95| 1| parser: &mut MediaParser,
96| 1| reader: &mut R,
97| 1| skip_by_seek: crate::parser::SkipBySeekFn<R>,
98| 1|) -> Result<ExifIter, crate::Error> {
99| | // First, parse to get all CMT ranges
100| 2| let cmt_ranges = parser.load_and_parse(reader, skip_by_seek, |buf, _state| {
^1 ^1 ^1 ^1 ^1
101| 2| cr3::extract_all_cmt_ranges(buf)
102| 2| })?;
^0
103| |
104| 1| let Some(cmt_ranges) = cmt_ranges else {
105| 0| return Err(crate::Error::Malformed {
106| 0| kind: crate::error::MalformedKind::IsoBmffBox,
107| 0| message: "cr3: no CMT data found".into(),
108| 0| });
109| | };
110| |
111| 1| if cmt_ranges.ranges.is_empty() {
112| 0| return Err(crate::Error::Malformed {
113| 0| kind: crate::error::MalformedKind::IsoBmffBox,
114| 0| message: "cr3: no CMT ranges available".into(),
115| 0| });
116| 1| }
117| |
118| 1| tracing::debug!(
119| 0| cmt_count = cmt_ranges.ranges.len(),
120| | "Found CMT ranges in CR3 file"
121| | );
122| |
123| | // Get the first CMT range (CMT1) to create the primary ExifIter
124| 1| let (first_block_id, first_range) = &cmt_ranges.ranges[0];
125| 1| tracing::debug!(
126| | block_id = first_block_id,
127| | range = ?first_range,
128| | "Creating primary ExifIter from first CMT block"
129| | );
130| |
131| | // Take ownership of the parser's full buffer once. All CMT block ranges
132| | // are relative to the parser's position-adjusted buffer view; absolute
133| | // ranges within `full` are obtained by adding `position`.
134| 1| let (full, position) = parser.share_buf();
135| |
136| | // Invariant: parse_moov_box uses streaming::take(box_size) for every box,
137| | // so when extract_all_cmt_ranges returns Some, all child boxes (including
138| | // CMT1/2/3 data) are fully loaded into `full`. Step 3a hardens this from
139| | // a soft warning to a structured ParsingError, so reaching here always
140| | // means every range fits within `full`.
141| 1| debug_assert!(
142| 1| cmt_ranges
143| 1| .ranges
144| 1| .iter()
145| 3| .all(|(_, r)| r.end + position <= full.len()),
^1
146| | "CMT range extends beyond loaded buffer; parse_moov_box invariant violated"
147| | );
148| |
149| 1| let primary_abs = (first_range.start + position)..(first_range.end + position);
150| 1| let primary_view = full.slice(primary_abs);
151| 1| let mut iter = input_into_iter(primary_view, None)?;
^0
152| |
153| 2| for (block_id, range) in cmt_ranges.ranges.iter().skip(1) {
^1 ^1
154| 2| if *block_id == "CMT3" {
155| 1| tracing::debug!(block_id, "Skipping CMT3 (MakerNotes) - proprietary format");
156| 1| continue;
157| 1| }
158| 1| let abs = (range.start + position)..(range.end + position);
159| 1| tracing::debug!(
160| | block_id,
161| | original_range = ?range,
162| | absolute_range = ?abs,
163| | "Adding additional CMT block"
164| | );
165| 1| iter.add_tiff_block(block_id.to_string(), full.slice(abs), None);
166| | }
167| |
168| 1| Ok(iter)
169| 1|}
170| |
171| |/// Special parser for PNG files: walks the chunk stream via
172| |/// `png::extract_chunks`, materializes the resulting [`PngExifSource`]
173| |/// into an [`ExifIter`]. Phase 4: handles only the `eXIf` chunk path
174| |/// (legacy `Raw profile type *` decoding lands in phase 5).
175| |#[tracing::instrument(skip(reader, skip_by_seek))]
176| 7|fn parse_png_exif_iter<R: Read>(
177| 7| parser: &mut MediaParser,
178| 7| reader: &mut R,
179| 7| skip_by_seek: crate::parser::SkipBySeekFn<R>,
180| 7|) -> Result<ExifIter, crate::Error> {
181| | use crate::png::{PngExifSource, PngParseOut};
182| |
183| 16| let out: PngParseOut = parser.load_and_parse(reader, skip_by_seek, |buf, state| {
^7 ^7 ^7 ^7 ^7 ^7
184| 16| crate::png::extract_chunks(buf, state)
185| 16| })?;
^0
186| |
187| 7| let Some(source) = out.exif else {
^4
188| 3| return Err(crate::Error::ExifNotFound);
189| | };
190| |
191| 4| match source {
192| 2| PngExifSource::EXif(range) => {
193| 2| let (full, position) = parser.share_buf();
194| 2| let abs = (range.start + position)..(range.end + position);
195| 2| let view = full.slice(abs);
196| 2| input_into_iter(view, None)
197| | }
198| 2| PngExifSource::Legacy(bytes) => {
199| | // Owned bytes — wrap in a fresh Bytes (separate allocation
200| | // from the parser buffer; acceptable because legacy is
201| | // rare and typically small).
202| 2| let view = bytes::Bytes::from(bytes);
203| 2| input_into_iter(view, None)
204| | }
205| | }
206| 7|}
207| |
208| |/// Like [`parse_png_exif_iter`] but also returns the captured `tEXt`
209| |/// chunks. Used by `MediaParser::parse_image_metadata` for PNG.
210| |#[allow(clippy::type_complexity)]
211| |#[tracing::instrument(skip(reader, skip_by_seek))]
212| 8|pub(crate) fn parse_png_full<R: Read>(
213| 8| parser: &mut MediaParser,
214| 8| reader: &mut R,
215| 8| skip_by_seek: crate::parser::SkipBySeekFn<R>,
216| 8|) -> Result<(Option<ExifIter>, Vec<(String, String)>), crate::Error> {
217| | use crate::png::{PngExifSource, PngParseOut};
218| |
219| 14| let out: PngParseOut = parser.load_and_parse(reader, skip_by_seek, |buf, state| {
^8 ^8 ^8 ^8 ^8 ^8
220| 14| crate::png::extract_chunks(buf, state)
221| 14| })?;
^0
222| |
223| 8| let exif_iter = match out.exif {
^4
224| 3| Some(PngExifSource::EXif(range)) => {
225| 3| let (full, position) = parser.share_buf();
226| 3| let abs = (range.start + position)..(range.end + position);
227| 3| let view = full.slice(abs);
228| 3| Some(input_into_iter(view, None)?)
^0
229| | }
230| 1| Some(PngExifSource::Legacy(bytes)) => {
231| 1| let view = bytes::Bytes::from(bytes);
232| 1| Some(input_into_iter(view, None)?)
^0
233| | }
234| 4| None => None,
235| | };
236| |
237| 8| Ok((exif_iter, out.text_chunks))
238| 8|}
239| |
240| |type ExifRangeResult = Result<Option<(Range<usize>, Option<TiffHeader>)>, ParsingErrorState>;
241| |
242| 145|fn extract_exif_range(
243| 145| img: MediaMimeImage,
244| 145| buf: &[u8],
245| 145| state: Option<ParsingState>,
246| 145|) -> ExifRangeResult {
247| 145| let (exif_data, state) = extract_exif_with_mime(img, buf, state)?;
^86 ^86 ^59
248| 86| let header = state.and_then(|x| match x {
^3
249| 3| ParsingState::TiffHeader(h) => Some(h),
250| 0| ParsingState::HeifExifSize(_) => None,
251| 0| ParsingState::Cr3ExifSize(_) => None,
252| 0| ParsingState::PngPastSignature => None,
253| 3| });
254| 86| Ok(exif_data
255| 86| .and_then(|x| buf.subslice_in_range(x))
^83 ^83 ^83
256| 86| .map(|x| (x, header)))
^83^83
257| 145|}
258| |
259| 86|fn range_to_iter(
260| 86| parser: &mut impl ShareBuf,
261| 86| out: Option<(Range<usize>, Option<TiffHeader>)>,
262| 86| has_embedded_track: bool,
263| 86|) -> Result<ExifIter, crate::Error> {
264| 86| if let Some((range, header)) = out {
^83 ^83
265| 83| tracing::debug!(?range, ?header, "Got Exif data");
266| 83| let (full, position) = parser.share_buf();
267| 83| let abs = (range.start + position)..(range.end + position);
268| 83| let view = full.slice(abs);
269| 83| let mut iter = input_into_iter(view, header)?;
^0
270| 83| iter.set_has_embedded_track(has_embedded_track);
271| 83| Ok(iter)
272| | } else {
273| 3| tracing::debug!("Exif not found");
274| 3| Err(crate::Error::ExifNotFound)
275| | }
276| 86|}
277| |
278| |#[cfg(feature = "tokio")]
279| |#[tracing::instrument(skip(parser, reader, skip_by_seek))]
280| 20|pub(crate) async fn parse_exif_iter_async<P, R: AsyncRead + Unpin + Send>(
281| 20| parser: &mut P,
282| 20| mime_img: MediaMimeImage,
283| 20| reader: &mut R,
284| 20| skip_by_seek: crate::parser_async::AsyncSkipBySeekFn<R>,
285| 20|) -> Result<ExifIter, crate::Error>
286| 20|where
287| 20| P: crate::parser_async::AsyncBufParser + crate::parser::ShareBuf,
288| 20|{
289| | if mime_img == MediaMimeImage::Png {
290| | return parse_png_exif_iter_async(parser, reader, skip_by_seek).await;
291| | }
292| |
293| | let out = parser
294| 28| .load_and_parse(reader, skip_by_seek, |buf, state| {
295| 28| extract_exif_range(mime_img, buf, state)
296| 28| })
297| | .await?;
298| |
299| 20| let has_track = match mime_img {
300| | MediaMimeImage::Jpeg => detect_motion_photo_async(parser, reader).await,
301| | _ => false,
302| | };
303| |
304| | range_to_iter(parser, out, has_track)
305| 20|}
306| |
307| |/// Async twin of [`detect_motion_photo`].
308| |#[cfg(feature = "tokio")]
309| 13|async fn detect_motion_photo_async<P, R>(parser: &mut P, reader: &mut R) -> bool
310| 13|where
311| 13| P: crate::parser_async::AsyncBufParser + crate::parser::Buf,
312| 13| R: AsyncRead + Unpin + Send,
313| 13|{
314| | const CHUNK: usize = 8 * 1024;
315| | const MAX_EXTRA: usize = 256 * 1024;
316| 13| let mut extra = 0;
317| | loop {
318| 237| match jpeg::scan_motion_photo(parser.buffer()) {
319| 0| jpeg::MotionPhotoScan::Found(_) => return true,
320| 6| jpeg::MotionPhotoScan::NotPresent => return false,
321| | jpeg::MotionPhotoScan::NeedMoreBytes => {
322| 231| if extra >= MAX_EXTRA {
323| 7| return false;
324| 224| }
325| 224| let want = CHUNK.min(MAX_EXTRA - extra);
326| 224| if parser.fill_buf(reader, want).await.is_err() {
327| 0| return false;
328| 224| }
329| 224| extra += want;
330| | }
331| | }
332| | }
333| 13|}
334| |
335| |#[tracing::instrument(skip(buf))]
336| 153|pub(crate) fn extract_exif_with_mime(
337| 153| img_type: crate::file::MediaMimeImage,
338| 153| buf: &[u8],
339| 153| state: Option<ParsingState>,
340| 153|) -> Result<(Option<&[u8]>, Option<ParsingState>), ParsingErrorState> {
341| 153| let (exif_data, state) = match img_type {
^94 ^94
342| 123| MediaMimeImage::Jpeg => jpeg::extract_exif_data(buf)
343| 123| .map(|res| (res.1, state.clone()))
^72 ^72 ^72
344| 123| .map_err(|e| {
^51
345| 51| nom_error_to_parsing_error_with_state(e, MalformedKind::JpegSegment, state)
346| 51| })?,
347| | MediaMimeImage::Heic
348| | | crate::file::MediaMimeImage::Heif
349| 20| | crate::file::MediaMimeImage::Avif => heif_extract_exif(state, buf)?,
^4
350| | MediaMimeImage::Tiff => {
351| 4| let header = match state {
^3
352| 3| Some(ParsingState::TiffHeader(ref h)) => h.to_owned(),
353| | None => {
354| 4| let (_, header) = TiffHeader::parse(buf).map_err(|e| {
^0
355| 0| nom_error_to_parsing_error_with_state(e, MalformedKind::TiffHeader, None)
356| 0| })?;
357| 4| if header.ifd0_offset as usize > buf.len() {
358| 3| let clear_and_skip =
359| 3| ParsingError::Need(header.ifd0_offset as usize - TIFF_HEADER_LEN + 2);
360| 3| let state = Some(ParsingState::TiffHeader(header));
361| 3| return Err(ParsingErrorState::new(clear_and_skip, state));
362| 1| }
363| 1| header
364| | }
365| | _ => {
366| 0| return Err(ParsingErrorState::new(
367| 0| ParsingError::Failed {
368| 0| kind: MalformedKind::TiffHeader,
369| 0| message: "unexpected parsing state for tiff".into(),
370| 0| },
371| 0| None,
372| 0| ))
373| | }
374| | };
375| |
376| | // full fill TIFF data
377| 4| tracing::debug!("full fill TIFF data");
378| 4| let mut iter = IfdHeaderTravel::new(
379| 4| buf,
380| 4| header.ifd0_offset as usize,
381| 4| TagOrCode::Unknown(0x2a),
382| 4| header.endian,
383| | );
384| 4| iter.travel_ifd(0)
385| 4| .map_err(|e| ParsingErrorState::new(e, state.clone()))?;
^0 ^0 ^0 ^0 ^0
386| 4| tracing::debug!("full fill TIFF data done");
387| |
388| 4| (Some(buf), state)
389| | }
390| 3| MediaMimeImage::Raf => RafInfo::parse(buf)
391| 3| .map(|res| (res.1.exif_data, state.clone()))
^2 ^2 ^2
392| 3| .map_err(|e| {
^1
393| 1| nom_error_to_parsing_error_with_state(e, MalformedKind::TiffHeader, state)
394| 1| })?,
395| 0| MediaMimeImage::Cr3 => cr3_extract_exif(state, buf)?,
396| | MediaMimeImage::Png => {
397| | // PNG is dispatched to parse_png_exif_iter at the top of
398| | // parse_exif_iter; this arm is unreachable in v3.3.
399| 0| unreachable!("PNG should have been dispatched at parse_exif_iter top");
400| | }
401| | };
402| 94| Ok((exif_data, state))
403| 153|}
404| |
405| 20|fn heif_extract_exif(
406| 20| state: Option<ParsingState>,
407| 20| buf: &[u8],
408| 20|) -> Result<(Option<&[u8]>, Option<ParsingState>), ParsingErrorState> {
409| 20| heif::extract_exif_data(state, buf)
410| 20|}
411| |
412| 0|fn cr3_extract_exif(
413| 0| state: Option<ParsingState>,
414| 0| buf: &[u8],
415| 0|) -> Result<(Option<&[u8]>, Option<ParsingState>), ParsingErrorState> {
416| 0| cr3::extract_exif_data(state, buf)
417| 0|}
418| |
419| |#[cfg(feature = "tokio")]
420| 1|async fn parse_png_exif_iter_async<P, R>(
421| 1| parser: &mut P,
422| 1| reader: &mut R,
423| 1| skip_by_seek: crate::parser_async::AsyncSkipBySeekFn<R>,
424| 1|) -> Result<ExifIter, crate::Error>
425| 1|where
426| 1| P: crate::parser_async::AsyncBufParser + crate::parser::ShareBuf,
427| 1| R: AsyncRead + Unpin + Send,
428| 1|{
429| | use crate::png::{PngExifSource, PngParseOut};
430| |
431| 1| let out: PngParseOut = parser
432| 1| .load_and_parse(reader, skip_by_seek, |buf, state| {
433| 1| crate::png::extract_chunks(buf, state)
434| 1| })
435| 1| .await?;
^0
436| |
437| 1| let Some(source) = out.exif else {
^0
438| 1| return Err(crate::Error::ExifNotFound);
439| | };
440| |
441| 0| match source {
442| 0| PngExifSource::EXif(range) => {
443| 0| let (full, position) = parser.share_buf();
444| 0| let abs = (range.start + position)..(range.end + position);
445| 0| let view = full.slice(abs);
446| 0| input_into_iter(view, None)
447| | }
448| 0| PngExifSource::Legacy(bytes) => {
449| 0| let view = bytes::Bytes::from(bytes);
450| 0| input_into_iter(view, None)
451| | }
452| | }
453| 1|}
454| |
455| |#[cfg(feature = "tokio")]
456| |#[allow(clippy::type_complexity)]
457| 5|pub(crate) async fn parse_png_full_async<P, R>(
458| 5| parser: &mut P,
459| 5| reader: &mut R,
460| 5| skip_by_seek: crate::parser_async::AsyncSkipBySeekFn<R>,
461| 5|) -> Result<(Option<ExifIter>, Vec<(String, String)>), crate::Error>
462| 5|where
463| 5| P: crate::parser_async::AsyncBufParser + crate::parser::ShareBuf,
464| 5| R: AsyncRead + Unpin + Send,
465| 5|{
466| | use crate::png::{PngExifSource, PngParseOut};
467| |
468| 5| let out: PngParseOut = parser
469| 7| .load_and_parse(reader, skip_by_seek, |buf, state| {
^5 ^5 ^5
470| 7| crate::png::extract_chunks(buf, state)
471| 7| })
472| 5| .await?;
^0
473| |
474| 5| let exif_iter = match out.exif {
^2
475| 2| Some(PngExifSource::EXif(range)) => {
476| 2| let (full, position) = parser.share_buf();
477| 2| let abs = (range.start + position)..(range.end + position);
478| 2| let view = full.slice(abs);
479| 2| Some(input_into_iter(view, None)?)
^0
480| | }
481| 0| Some(PngExifSource::Legacy(bytes)) => {
482| 0| let view = bytes::Bytes::from(bytes);
483| 0| Some(input_into_iter(view, None)?)
484| | }
485| 3| None => None,
486| | };
487| |
488| 5| Ok((exif_iter, out.text_chunks))
489| 5|}
490| |
491| |#[cfg(feature = "tokio")]
492| |use tokio::io::AsyncRead;
493| |
494| |#[cfg(test)]
495| |mod tests {
496| | use crate::{
497| | exif::gps::{Altitude, LatRef, LonRef, Speed},
498| | file::MediaMimeImage,
499| | testkit::read_sample,
500| | values::URational,
501| | };
502| | use test_case::test_case;
503| |
504| | use super::*;
505| |
506| | #[test_case(
507| | "exif.jpg",
508| | LatRef::North,
509| | LatLng::new(
510| | URational::new(22, 1),
511| | URational::new(31, 1),
512| | URational::new(5208, 100)
513| | ),
514| | LonRef::East,
515| | LatLng::new(
516| | URational::new(114, 1),
517| | URational::new(1, 1),
518| | URational::new(1733, 100)
519| | ),
520| | Altitude::AboveSeaLevel(URational::new(0, 1)),
521| | None
522| | )]
523| 1| fn gps_info(
524| 1| path: &str,
525| 1| latitude_ref: LatRef,
526| 1| latitude: LatLng,
527| 1| longitude_ref: LonRef,
528| 1| longitude: LatLng,
529| 1| altitude: Altitude,
530| 1| speed: Option<Speed>,
531| 1| ) {
532| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
533| |
534| 1| let buf = read_sample(path).unwrap();
535| 1| let (data, _) = extract_exif_with_mime(MediaMimeImage::Jpeg, &buf, None).unwrap();
536| 1| let data = data.unwrap();
537| |
538| 1| let subslice_in_range = buf.subslice_in_range(data).unwrap();
539| 1| let iter = input_into_iter(bytes::Bytes::from(buf).slice(subslice_in_range), None).unwrap();
540| 1| let exif: Exif = iter.into();
541| |
542| 1| let gps = exif.gps_info().unwrap();
543| 1| assert_eq!(
544| | *gps,
545| 1| GPSInfo {
546| 1| latitude_ref,
547| 1| latitude,
548| 1| longitude_ref,
549| 1| longitude,
550| 1| altitude,
551| 1| speed,
552| 1| }
553| | )
554| 1| }
555| |}
/home/min/dev/nom-exif/src/exif/exif_exif.rs:
1| |use std::fmt::Debug;
2| |
3| |use nom::{
4| | branch::alt, bytes::streaming::tag, combinator, number::Endianness, IResult, Needed, Parser,
5| |};
6| |
7| |use crate::{EntryValue, ExifEntry, ExifIter, ExifTag, GPSInfo, IfdIndex, TagOrCode};
8| |
9| |use super::ifd::ParsedImageFileDirectory;
10| |
11| |/// Represents parsed Exif information, can be converted from an [`ExifIter`]
12| |/// like this: `let exif: Exif = iter.into()`.
13| |#[derive(Clone, Debug, PartialEq)]
14| |pub struct Exif {
15| | ifds: Vec<ParsedImageFileDirectory>,
16| | gps_info: Option<GPSInfo>,
17| | errors: Vec<(IfdIndex, TagOrCode, crate::EntryError)>,
18| | has_embedded_track: bool,
19| |}
20| |
21| |impl Exif {
22| 75| fn new(gps_info: Option<GPSInfo>, has_embedded_track: bool) -> Exif {
23| 75| Exif {
24| 75| ifds: Vec::new(),
25| 75| gps_info,
26| 75| errors: Vec::new(),
27| 75| has_embedded_track,
28| 75| }
29| 75| }
30| |
31| | /// Get entry value for the specified `tag` in ifd0 (the main image).
32| | ///
33| | /// *Note*:
34| | ///
35| | /// - The parsing error related to this tag won't be reported by this
36| | /// method. Either this entry is not parsed successfully, or the tag does
37| | /// not exist in the input data, this method will return None.
38| | ///
39| | /// - If you want to handle parsing error, please consider to use
40| | /// [`ExifIter`].
41| | ///
42| | /// - If you have any custom defined tag which does not exist in
43| | /// [`ExifTag`], you can always get the entry value by a raw tag code,
44| | /// see [`Self::get_by_code`].
45| | ///
46| | /// ## Example
47| | ///
48| | /// ```rust
49| | /// use nom_exif::*;
50| | ///
51| | /// fn main() -> Result<()> {
52| | /// let mut parser = MediaParser::new();
53| | ///
54| | /// let ms = MediaSource::open("./testdata/exif.jpg")?;
55| | /// assert_eq!(ms.kind(), MediaKind::Image);
56| | /// let iter = parser.parse_exif(ms)?;
57| | /// let exif: Exif = iter.into();
58| | ///
59| | /// assert_eq!(exif.get(ExifTag::Model).unwrap(), &"vivo X90 Pro+".into());
60| | /// Ok(())
61| | /// }
62| 146| pub fn get(&self, tag: ExifTag) -> Option<&EntryValue> {
63| 146| self.get_in(IfdIndex::MAIN, tag)
64| 146| }
65| |
66| | /// Get entry value for the specified `tag` in the specified `ifd`.
67| | ///
68| | /// *Note*:
69| | ///
70| | /// - The parsing error related to this tag won't be reported by this
71| | /// method. Either this entry is not parsed successfully, or the tag does
72| | /// not exist in the input data, this method will return None. Use
73| | /// [`Self::errors`] to inspect per-entry errors.
74| | ///
75| | /// - For raw tag codes (e.g. unrecognized tags), use [`Self::get_by_code`].
76| | ///
77| | /// ## Example
78| | ///
79| | /// ```rust
80| | /// use nom_exif::*;
81| | ///
82| | /// fn main() -> Result<()> {
83| | /// let mut parser = MediaParser::new();
84| | /// let ms = MediaSource::open("./testdata/exif.jpg")?;
85| | /// let iter = parser.parse_exif(ms)?;
86| | /// let exif: Exif = iter.into();
87| | ///
88| | /// assert_eq!(exif.get_in(IfdIndex::MAIN, ExifTag::Model).unwrap(),
89| | /// &"vivo X90 Pro+".into());
90| | /// Ok(())
91| | /// }
92| | /// ```
93| 148| pub fn get_in(&self, ifd: IfdIndex, tag: ExifTag) -> Option<&EntryValue> {
94| 148| self.get_by_code(ifd, tag.code())
95| 148| }
96| |
97| | /// Get entry value for the specified raw `code` in the specified `ifd`.
98| | /// Used for tags not in the recognized [`ExifTag`] enum.
99| 216| pub fn get_by_code(&self, ifd: IfdIndex, code: u16) -> Option<&EntryValue> {
100| 216| self.ifds.get(ifd.as_usize()).and_then(|d| d.get(code))
101| 216| }
102| |
103| | /// Iterate every parsed entry in every IFD.
104| | ///
105| | /// Order is: IFD0 entries first (in `HashMap` order — not stable), then
106| | /// IFD1, etc. Filter by IFD with `.iter().filter(|e| e.ifd == IfdIndex::MAIN)`.
107| 4| pub fn iter(&self) -> impl Iterator<Item = ExifEntry<'_>> {
108| 7| self.ifds.iter().enumerate().flat_map(|(idx, dir)| {
^4 ^4 ^4
109| 7| let ifd = IfdIndex::new(idx);
110| 7| dir.iter().map(move |(code, value)| ExifEntry {
111| 238| ifd,
112| 238| tag: TagOrCode::from(code),
113| 238| value,
114| 238| })
115| 7| })
116| 4| }
117| |
118| | /// Get parsed GPS information.
119| | ///
120| | /// Returns `None` if the source had no `GPSInfo` IFD or if its parse
121| | /// failed (failures land in [`Self::errors`]).
122| 4| pub fn gps_info(&self) -> Option<&GPSInfo> {
123| 4| self.gps_info.as_ref()
124| 4| }
125| |
126| | /// Per-entry errors collected during `From<ExifIter>` conversion. Each
127| | /// tuple is `(ifd, tag, error)`. Empty slice if the parse was clean.
128| 3| pub fn errors(&self) -> &[(IfdIndex, TagOrCode, crate::EntryError)] {
129| 3| &self.errors
130| 3| }
131| |
132| | /// Whether the source file is known to embed a paired media track
133| | /// that this parse path did *not* surface — a Pixel/Google or Samsung
134| | /// Galaxy Motion Photo (JPEG with `GCamera:MotionPhoto` XMP and an
135| | /// MP4 trailer). Use [`crate::MediaParser::parse_track`] on the same
136| | /// source to extract the embedded track.
137| | ///
138| | /// **Content-detected, not MIME-guessed**: returns `true` only when
139| | /// `parse_exif` observed a concrete content signal
140| | /// (`GCamera:MotionPhoto="1"` plus a `Container:Directory` /
141| | /// `MotionPhotoOffset` / `MicroVideoOffset`). A plain JPEG or HEIC
142| | /// without such signals returns `false`.
143| | ///
144| | /// **Coverage**: Pixel/Google Motion Photos and Samsung Galaxy
145| | /// Motion Photos that use the Adobe XMP Container directory format
146| | /// (JPEG variants).
147| 6| pub fn has_embedded_track(&self) -> bool {
148| 6| self.has_embedded_track
149| 6| }
150| |
151| | /// Deprecated alias for [`Self::has_embedded_track`].
152| | #[deprecated(
153| | since = "3.1.0",
154| | note = "renamed to `has_embedded_track` to reflect the actual semantics (paired track hint, not arbitrary embedded media)"
155| | )]
156| 1| pub fn has_embedded_media(&self) -> bool {
157| 1| self.has_embedded_track()
158| 1| }
159| |
160| 4.33k| fn put_value(&mut self, ifd: usize, code: u16, v: EntryValue) {
161| 4.46k| while self.ifds.len() < ifd + 1 {
162| 125| self.ifds.push(ParsedImageFileDirectory::new());
163| 125| }
164| 4.33k| self.ifds[ifd].put(code, v);
165| 4.33k| }
166| |}
167| |
168| |impl From<ExifIter> for Exif {
169| 75| fn from(iter: ExifIter) -> Self {
170| 75| let gps_info = iter.parse_gps().ok().flatten();
171| 75| let has_embedded_track = iter.has_embedded_track();
172| 75| let mut exif = Exif::new(gps_info, has_embedded_track);
173| |
174| 4.33k| for entry in iter {
^75
175| 4.33k| let ifd = entry.ifd();
176| 4.33k| let tag = entry.tag();
177| 4.33k| let code = tag.code();
178| 4.33k| match entry.into_result() {
179| 4.33k| Ok(v) => exif.put_value(ifd.as_usize(), code, v),
180| 0| Err(e) => exif.errors.push((ifd, tag, e)),
181| | }
182| | }
183| |
184| 75| exif
185| 75| }
186| |}
187| |
188| |pub(crate) const TIFF_HEADER_LEN: usize = 8;
189| |
190| |/// TIFF Header
191| |#[derive(Clone, PartialEq, Eq)]
192| |pub(crate) struct TiffHeader {
193| | pub endian: Endianness,
194| | pub ifd0_offset: u32,
195| |}
196| |
197| |impl Default for TiffHeader {
198| 0| fn default() -> Self {
199| 0| Self {
200| 0| endian: Endianness::Big,
201| 0| ifd0_offset: 0,
202| 0| }
203| 0| }
204| |}
205| |
206| |impl Debug for TiffHeader {
207| 526| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
208| 526| let endian_str = match self.endian {
209| 495| Endianness::Big => "Big",
210| 31| Endianness::Little => "Little",
211| 0| Endianness::Native => "Native",
212| | };
213| 526| f.debug_struct("TiffHeader")
214| 526| .field("endian", &endian_str)
215| 526| .field("ifd0_offset", &format!("{:#x}", self.ifd0_offset))
216| 526| .finish()
217| 526| }
218| |}
219| |
220| |pub(crate) const IFD_ENTRY_SIZE: usize = 12;
221| |
222| |impl TiffHeader {
223| 258| pub fn parse(input: &[u8]) -> IResult<&[u8], TiffHeader> {
224| | use nom::number::streaming::{u16, u32};
225| 258| let (remain, endian) = TiffHeader::parse_endian(input)?;
^133 ^133 ^125
226| 133| let (_, (_, offset)) = (
227| 133| combinator::verify(u16(endian), |magic| *magic == 0x2a),
228| 133| u32(endian),
229| | )
230| 133| .parse(remain)?;
^0
231| |
232| 133| let header = Self {
233| 133| endian,
234| 133| ifd0_offset: offset,
235| 133| };
236| |
237| 133| Ok((remain, header))
238| 258| }
239| |
240| 677| pub fn parse_ifd_entry_num(input: &[u8], endian: Endianness) -> IResult<&[u8], u16> {
241| 677| let (remain, num) = nom::number::streaming::u16(endian)(input)?; // Safe-slice
^0
242| 677| if num == 0 {
243| 1| return Ok((remain, 0));
244| 676| }
245| |
246| | // 12 bytes per entry
247| 676| let size = (num as usize)
248| 676| .checked_mul(IFD_ENTRY_SIZE)
249| 676| .expect("should fit");
250| |
251| 676| if size > remain.len() {
252| 26| return Err(nom::Err::Incomplete(Needed::new(size - remain.len())));
253| 650| }
254| |
255| 650| Ok((remain, num))
256| 677| }
257| |
258| | // pub fn first_ifd<'a>(&self, input: &'a [u8], tag_ids: HashSet<u16>) -> IResult<&'a [u8], IFD> {
259| | // // ifd0_offset starts from the beginning of Header, so we should
260| | // // subtract the header size, which is 8
261| | // let offset = self.ifd0_offset - 8;
262| |
263| | // // skip to offset
264| | // let (_, remain) = take(offset)(input)?;
265| |
266| | // IFD::parse(remain, self.endian, tag_ids)
267| | // }
268| |
269| 258| fn parse_endian(input: &[u8]) -> IResult<&[u8], Endianness> {
270| 258| combinator::map(alt((tag("MM"), tag("II"))), |endian_marker| {
^133
271| 133| if endian_marker == b"MM" {
272| 107| Endianness::Big
273| | } else {
274| 26| Endianness::Little
275| | }
276| 133| })
277| 258| .parse(input)
278| 258| }
279| |}
280| |
281| 79|pub(crate) fn check_exif_header(data: &[u8]) -> Result<bool, nom::Err<nom::error::Error<&[u8]>>> {
282| 79| tag::<_, _, nom::error::Error<_>>(EXIF_IDENT)(data).map(|_| true)
283| 79|}
284| |
285| 20|pub(crate) fn check_exif_header2(i: &[u8]) -> IResult<&[u8], ()> {
286| 20| let (remain, _) = (
287| 20| nom::number::complete::be_u32,
288| 20| nom::bytes::complete::tag(EXIF_IDENT),
289| 20| )
290| 20| .parse(i)?;
^0
291| 20| Ok((remain, ()))
292| 20|}
293| |
294| |pub(crate) const EXIF_IDENT: &str = "Exif\0\0";
295| |
296| |#[cfg(test)]
297| |mod tests {
298| | use std::io::Read;
299| | use std::thread;
300| |
301| | use test_case::test_case;
302| |
303| | use crate::exif::input_into_iter;
304| | use crate::jpeg::extract_exif_data;
305| | use crate::slice::SubsliceRange;
306| | use crate::testkit::{open_sample, read_sample};
307| | use crate::ExifIterEntry;
308| |
309| | use super::*;
310| |
311| | #[test]
312| 1| fn header() {
313| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
314| |
315| 1| let buf = [0x4d, 0x4d, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x08, 0x00];
316| |
317| 1| let (_, header) = TiffHeader::parse(&buf).unwrap();
318| 1| assert_eq!(
319| | header,
320| | TiffHeader {
321| | endian: Endianness::Big,
322| | ifd0_offset: 8,
323| | }
324| | );
325| 1| }
326| |
327| | #[test_case("exif.jpg")]
328| 1| fn exif_iter_gps(path: &str) {
329| 1| let buf = read_sample(path).unwrap();
330| 1| let (_, data) = extract_exif_data(&buf).unwrap();
331| 1| let range = data.and_then(|x| buf.subslice_in_range(x)).unwrap();
332| 1| let data = bytes::Bytes::from(buf).slice(range);
333| 1| let iter = input_into_iter(data, None).unwrap();
334| 1| let gps = iter.parse_gps().unwrap().unwrap();
335| 1| assert_eq!(gps.to_iso6709(), "+22.53113+114.02148/");
336| 1| }
337| |
338| | #[test_case("exif.jpg")]
339| 1| fn clone_exif_iter_to_thread(path: &str) {
340| 1| let buf = read_sample(path).unwrap();
341| 1| let (_, data) = extract_exif_data(&buf).unwrap();
342| 1| let range = data.and_then(|x| buf.subslice_in_range(x)).unwrap();
343| 1| let data = bytes::Bytes::from(buf).slice(range);
344| 1| let iter = input_into_iter(data, None).unwrap();
345| 1| let iter2 = iter.clone();
346| |
347| 1| let mut expect = String::new();
348| 1| open_sample(&format!("{path}.txt"))
349| 1| .unwrap()
350| 1| .read_to_string(&mut expect)
351| 1| .unwrap();
352| |
353| 1| let jh = thread::spawn(move || iter_to_str(iter2));
354| |
355| 1| let result = iter_to_str(iter);
356| |
357| | // open_sample_w(&format!("{path}.txt"))
358| | // .unwrap()
359| | // .write_all(result.as_bytes())
360| | // .unwrap();
361| |
362| 1| assert_eq!(result.trim(), expect.trim());
363| 1| assert_eq!(jh.join().unwrap().trim(), expect.trim());
364| 1| }
365| |
366| 2| fn iter_to_str(it: impl Iterator<Item = ExifIterEntry>) -> String {
367| 2| let ss = it
368| 132| .map(|x| {
^2
369| 132| format!(
370| | "{}.{:<32} » {}",
371| 132| x.ifd(),
372| 132| match x.tag() {
373| 132| crate::TagOrCode::Tag(t) => t.to_string(),
374| 0| crate::TagOrCode::Unknown(c) => format!("Unknown(0x{c:04x})"),
375| | },
376| 132| x.result()
377| 132| .map(|v| v.to_string())
378| 132| .map_err(|e| e.to_string())
^0^0
379| 132| .unwrap_or_else(|s| s)
380| | )
381| 132| })
382| 2| .collect::<Vec<String>>();
383| 2| ss.join("\n")
384| 2| }
385| |
386| | #[test]
387| 1| fn p5_baseline_exif_jpg_dump_snapshot() {
388| | // Lock down the post-refactor invariant: parsing testdata/exif.jpg
389| | // through the public API yields the same set of (ifd, tag, value)
390| | // triples before and after every P5 task. Captured as a sorted
391| | // formatted string so the assertion is a single Vec compare.
392| | use crate::{MediaParser, MediaSource};
393| 1| let mut parser = MediaParser::new();
394| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
395| 1| let iter = parser.parse_exif(ms).unwrap();
396| |
397| 1| let mut entries: Vec<String> = iter
398| 66| .map(|e| {
^1
399| 66| let val = match e.result() {
400| 66| Ok(v) => format!("{v}"),
401| 0| Err(err) => format!("<err:{err}>"),
402| | };
403| 66| format!("{}.0x{:04x}={val}", e.ifd(), e.tag().code())
404| 66| })
405| 1| .collect();
406| 1| entries.sort();
407| 1| assert!(
408| 1| entries.len() > 5,
409| | "expected >5 entries, got {}",
410| 0| entries.len()
411| | );
412| 1| assert!(
413| 12| entries.iter().any(|s| s.contains("0x010f")),
^1 ^1
414| | "expected Make tag (0x010f) in snapshot, got {entries:?}"
415| | );
416| 1| }
417| |
418| | #[test]
419| 1| fn exif_get_in_main_routes_via_ifd_index() {
420| | use crate::{ExifTag, IfdIndex, MediaParser, MediaSource};
421| 1| let mut parser = MediaParser::new();
422| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
423| 1| let iter = parser.parse_exif(ms).unwrap();
424| 1| let exif: Exif = iter.into();
425| |
426| | // Main image: same as exif.get(...)
427| 1| let v_via_get = exif.get(ExifTag::Model);
428| 1| let v_via_get_in = exif.get_in(IfdIndex::MAIN, ExifTag::Model);
429| 1| assert_eq!(v_via_get, v_via_get_in);
430| 1| assert!(
431| 1| v_via_get.is_some(),
432| | "Model tag expected in testdata/exif.jpg"
433| | );
434| 1| }
435| |
436| | #[test]
437| 1| fn exif_get_by_code_finds_unrecognized_or_recognized_tag() {
438| | use crate::{ExifTag, IfdIndex, MediaParser, MediaSource};
439| 1| let mut parser = MediaParser::new();
440| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
441| 1| let iter = parser.parse_exif(ms).unwrap();
442| 1| let exif: Exif = iter.into();
443| | // Make = 0x010f
444| 1| let v = exif.get_by_code(IfdIndex::MAIN, ExifTag::Make.code());
445| 1| assert!(v.is_some());
446| 1| }
447| |
448| | #[test]
449| 1| fn exif_gps_info_returns_borrow_no_result_wrap() {
450| | use crate::{MediaParser, MediaSource};
451| 1| let mut parser = MediaParser::new();
452| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
453| 1| let iter = parser.parse_exif(ms).unwrap();
454| 1| let exif: Exif = iter.into();
455| | // gps_info returns Option<&GPSInfo> directly (no Result wrap).
456| 1| let g: Option<&crate::GPSInfo> = exif.gps_info();
457| 1| assert!(g.is_some(), "testdata/exif.jpg has GPS info");
458| 1| assert_eq!(g.unwrap().to_iso6709(), "+22.53113+114.02148/");
459| 1| }
460| |
461| | #[test]
462| 1| fn exif_iter_yields_main_ifd_entries() {
463| | use crate::{ExifTag, IfdIndex, MediaParser, MediaSource};
464| 1| let mut parser = MediaParser::new();
465| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
466| 1| let iter = parser.parse_exif(ms).unwrap();
467| 1| let exif: Exif = iter.into();
468| |
469| 66| let main_count = exif.iter().filter(|e| e.ifd == IfdIndex::MAIN).count();
^1 ^1 ^1 ^1 ^1
470| 1| assert!(
471| 1| main_count > 1,
472| | "expected >1 entries in main IFD, got {main_count}"
473| | );
474| |
475| | // Ensure each entry is well-formed.
476| 66| for entry in exif.iter() {
^1 ^1
477| | // value is a real reference to an EntryValue
478| 66| let _: &crate::EntryValue = entry.value;
479| | // Tag round-trips
480| 66| let code = entry.tag.code();
481| 66| assert_eq!(
482| 66| exif.get_by_code(entry.ifd, code).unwrap(),
483| | entry.value,
484| | "iter entry value should match get_by_code lookup"
485| | );
486| | }
487| |
488| | // Specifically: Model entry is present and matches get().
489| 1| let model_via_iter = exif
490| 1| .iter()
491| 40| .find(|e| e.tag.tag() == Some(ExifTag::Model))
^1
492| 1| .map(|e| e.value);
493| 1| assert_eq!(model_via_iter, exif.get(ExifTag::Model));
494| 1| }
495| |
496| | #[test]
497| 1| fn exif_errors_is_empty_for_clean_fixture() {
498| | use crate::{MediaParser, MediaSource};
499| 1| let mut parser = MediaParser::new();
500| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
501| 1| let iter = parser.parse_exif(ms).unwrap();
502| 1| let exif: Exif = iter.into();
503| | // Clean fixture: errors() returns empty slice but the method exists
504| | // and the type matches the spec.
505| 1| let errs: &[(crate::IfdIndex, crate::TagOrCode, crate::EntryError)] = exif.errors();
506| 1| assert!(
507| 1| errs.is_empty(),
508| | "exif.jpg has no per-entry errors, got {errs:?}"
509| | );
510| 1| }
511| |
512| | #[test]
513| 1| fn exif_errors_captures_per_entry_errors_for_broken_fixture() {
514| | use crate::{MediaParser, MediaSource};
515| 1| let mut parser = MediaParser::new();
516| 1| let ms = MediaSource::open("testdata/broken.jpg").unwrap();
517| 1| let iter = parser.parse_exif(ms).unwrap();
518| 1| let exif: Exif = iter.into();
519| | // broken.jpg has malformed IFD entries — at least one should land in errors().
520| | // (Note: if broken.jpg's particular breakage doesn't surface as a per-entry
521| | // error, this assertion may be `>= 0`. Adjust as needed.)
522| 1| let _ = exif.errors();
523| 1| }
524| |
525| | #[test]
526| 1| fn has_embedded_track_true_for_pixel_motion_photo() {
527| | use crate::{MediaParser, MediaSource};
528| 1| let mut parser = MediaParser::new();
529| 1| let ms = MediaSource::open("testdata/motion_photo_pixel_synth.jpg").unwrap();
530| 1| let iter = parser.parse_exif(ms).unwrap();
531| 1| assert!(
532| 1| iter.has_embedded_track(),
533| | "Pixel-style Motion Photo carries an embedded MP4 track"
534| | );
535| 1| let exif: Exif = iter.into();
536| 1| assert!(exif.has_embedded_track(), "flag survives From<ExifIter>");
537| 1| }
538| |
539| | #[test]
540| 1| fn has_embedded_track_false_for_plain_jpeg_and_heic() {
541| | use crate::{MediaParser, MediaSource};
542| 2| for path in ["testdata/exif.jpg", "testdata/exif.heic"] {
^1
543| 2| let mut parser = MediaParser::new();
544| 2| let iter = parser.parse_exif(MediaSource::open(path).unwrap()).unwrap();
545| 2| assert!(
546| 2| !iter.has_embedded_track(),
547| | "{path} has no Motion Photo / paired track signal"
548| | );
549| 2| let exif: Exif = iter.into();
550| 2| assert!(!exif.has_embedded_track());
551| | }
552| 1| }
553| |
554| | #[test]
555| | #[allow(deprecated)]
556| 1| fn deprecated_has_embedded_media_still_works() {
557| | use crate::{MediaParser, MediaSource};
558| 1| let mut parser = MediaParser::new();
559| 1| let ms = MediaSource::open("testdata/motion_photo_pixel_synth.jpg").unwrap();
560| 1| let iter = parser.parse_exif(ms).unwrap();
561| | // Deprecated alias must still forward to the new method.
562| 1| assert_eq!(iter.has_embedded_media(), iter.has_embedded_track());
563| 1| let exif: Exif = iter.into();
564| 1| assert_eq!(exif.has_embedded_media(), exif.has_embedded_track());
565| 1| }
566| |
567| | /// End-to-end: `has_embedded_track == true` ⇒ `parse_track` extracts a
568| | /// real `TrackInfo` from the same source. This locks the v3.1 contract
569| | /// for Pixel/Google Motion Photo JPEGs.
570| | #[test]
571| 1| fn parse_track_extracts_motion_photo_trailer() {
572| | use crate::{MediaParser, MediaSource, TrackInfoTag};
573| 1| let path = "testdata/motion_photo_pixel_synth.jpg";
574| |
575| 1| let mut p1 = MediaParser::new();
576| 1| let iter = p1.parse_exif(MediaSource::open(path).unwrap()).unwrap();
577| 1| assert!(iter.has_embedded_track());
578| |
579| 1| let mut p2 = MediaParser::new();
580| 1| let track = p2
581| 1| .parse_track(MediaSource::open(path).unwrap())
582| 1| .expect("parse_track must extract the trailer MP4");
583| 1| assert!(
584| 1| track.get(TrackInfoTag::Width).is_some() || track.get(TrackInfoTag::Height).is_some(),
^0 ^0
585| | "trailer should yield at least one geometry tag"
586| | );
587| 1| }
588| |
589| | /// Plain JPEGs (no Motion Photo XMP) must keep returning TrackNotFound.
590| | #[test]
591| 1| fn parse_track_on_plain_jpeg_returns_track_not_found() {
592| | use crate::{Error, MediaParser, MediaSource};
593| 1| let mut parser = MediaParser::new();
594| 1| let err = parser
595| 1| .parse_track(MediaSource::open("testdata/exif.jpg").unwrap())
596| 1| .unwrap_err();
597| 1| assert!(
598| 1| matches!(err, Error::TrackNotFound),
^0
599| | "expected TrackNotFound, got {err:?}"
600| | );
601| 1| }
602| |}
/home/min/dev/nom-exif/src/exif/exif_iter.rs:
1| |use std::{collections::HashSet, fmt::Debug};
2| |
3| |use bytes::Bytes;
4| |use nom::{number::complete, Parser};
5| |
6| |use crate::{
7| | error::EntryError,
8| | slice::SliceChecked,
9| | values::{DataFormat, EntryData, IRational, URational},
10| | EntryValue, ExifTag,
11| |};
12| |
13| |use super::{exif_exif::IFD_ENTRY_SIZE, GPSInfo, LatLng, TiffHeader};
14| |use crate::TagOrCode;
15| |
16| |/// Index of an IFD (Image File Directory) within an EXIF blob.
17| |///
18| |/// `0` = main image (`IfdIndex::MAIN`), `1` = thumbnail (`IfdIndex::THUMBNAIL`),
19| |/// `>=2` = sub-IFDs in the order encountered. Use the constants for the common
20| |/// cases and [`IfdIndex::new`] for raw indexing.
21| |#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22| |#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
23| |pub struct IfdIndex(usize);
24| |
25| |impl IfdIndex {
26| | /// Index of the main image IFD (always `0`).
27| | pub const MAIN: Self = IfdIndex(0);
28| |
29| | /// Index of the thumbnail IFD (`1` when present).
30| | pub const THUMBNAIL: Self = IfdIndex(1);
31| |
32| | /// Construct from a raw index. `0`/`1` correspond to [`Self::MAIN`] /
33| | /// [`Self::THUMBNAIL`]; values `>= 2` are sub-IFDs.
34| 8.85k| pub const fn new(index: usize) -> Self {
35| 8.85k| IfdIndex(index)
36| 8.85k| }
37| |
38| | /// Underlying raw index as a `usize`.
39| 4.63k| pub const fn as_usize(self) -> usize {
40| 4.63k| self.0
41| 4.63k| }
42| |}
43| |
44| |impl std::fmt::Display for IfdIndex {
45| 266| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46| 266| write!(f, "ifd{}", self.0)
47| 266| }
48| |}
49| |
50| |/// Eager view into a single Exif entry. Yielded by [`crate::Exif::iter`] and
51| |/// designed to be cheap to copy: the `value` is a borrow into the parent
52| |/// [`crate::Exif`].
53| |///
54| |/// # Why pub fields instead of getters?
55| |///
56| |/// `ifd`, `tag`, and `value` are independent — there is no cross-field
57| |/// invariant to enforce. The Rust idiom for plain data carriers (cf.
58| |/// [`std::ops::Range`]) is `pub` fields. The lazy yield type
59| |/// [`crate::ExifIterEntry`] uses *private* fields because it carries a
60| |/// `value xor error` invariant.
61| |#[derive(Clone, Copy, Debug)]
62| |pub struct ExifEntry<'a> {
63| | pub ifd: IfdIndex,
64| | pub tag: TagOrCode,
65| | pub value: &'a crate::EntryValue,
66| |}
67| |
68| |/// Represents an additional TIFF data block to be processed after the primary block.
69| |/// Used for CR3 files with multiple CMT boxes (CMT1, CMT2, CMT3).
70| |#[derive(Clone)]
71| |pub(crate) struct TiffDataBlock {
72| | /// Block identifier (e.g., "CMT1", "CMT2", "CMT3")
73| | #[allow(dead_code)]
74| | pub block_id: String,
75| | /// Pre-sliced bytes view for this block's data
76| | pub data: Bytes,
77| | /// TIFF header information (optional, if known)
78| | pub header: Option<TiffHeader>,
79| |}
80| |
81| |/// Parses header from input data, and returns an [`ExifIter`].
82| |///
83| |/// All entries are lazy-parsed. That is, only when you iterate over
84| |/// [`ExifIter`] will the IFD entries be parsed one by one.
85| |///
86| |/// The one exception is the time zone entries. The method will try to find
87| |/// and parse the time zone data first, so we can correctly parse all time
88| |/// information in subsequent iterates.
89| |#[tracing::instrument]
90| 107|pub(crate) fn input_into_iter(
91| 107| input: impl Into<bytes::Bytes> + Debug,
92| 107| state: Option<TiffHeader>,
93| 107|) -> crate::Result<ExifIter> {
94| 107| let input: bytes::Bytes = input.into();
95| 107| let header = match state {
96| | // header has been parsed, and header has been skipped, input data
97| | // is the IFD data
98| 3| Some(header) => header,
99| | _ => {
100| | // header has not been parsed, input data includes IFD header
101| 104| let (_, header) = TiffHeader::parse(&input[..]).map_err(|e| {
^0
102| 0| crate::error::nom_err_to_malformed(e, crate::error::MalformedKind::TiffHeader)
103| 0| })?;
104| |
105| 104| tracing::debug!(
106| | ?header,
107| 0| data_len = format!("{:#x}", input.len()),
108| | "TIFF header parsed"
109| | );
110| 104| header
111| | }
112| | };
113| |
114| 107| let start = header.ifd0_offset as usize;
115| 107| if start > input.len() {
116| 0| return Err(crate::Error::UnexpectedEof {
117| 0| context: "exif iter init",
118| 0| });
119| 107| }
120| 107| tracing::debug!(?header, offset = start);
121| |
122| 107| let mut ifd0 = IfdIter::try_new(0, input.clone(), header.to_owned(), start, None)?;
^0
123| |
124| 107| let tz = ifd0.find_tz_offset();
125| 107| ifd0.tz = tz.clone();
126| 107| let iter: ExifIter = ExifIter::new(input, header, tz, ifd0);
127| |
128| 107| tracing::debug!(?iter, "got IFD0");
129| |
130| 107| Ok(iter)
131| 107|}
132| |
133| |/// An iterator version of [`Exif`](crate::Exif). Use [`ExifIterEntry`] as
134| |/// iterator items.
135| |///
136| |/// Clone an `ExifIter` is very cheap; the underlying data is shared
137| |/// via `bytes::Bytes` reference counting.
138| |///
139| |/// The new cloned `ExifIter`'s iteration index will be reset to the first one.
140| |///
141| |/// If you want to convert an `ExifIter` `into` an [`Exif`](crate::Exif), you probably want
142| |/// to clone the `ExifIter` and use the new cloned one to do the converting.
143| |/// Since the original's iteration index may have been modified by
144| |/// `Iterator::next()` calls.
145| |pub struct ExifIter {
146| | input: Bytes,
147| | tiff_header: TiffHeader,
148| | tz: Option<String>,
149| | ifd0: IfdIter,
150| |
151| | // Iterating status
152| | ifds: Vec<IfdIter>,
153| | visited_offsets: HashSet<usize>,
154| |
155| | // Multi-block support for CR3 files with multiple CMT boxes
156| | /// Additional TIFF data blocks to process after the primary block
157| | additional_blocks: Vec<TiffDataBlock>,
158| | /// Current block index: 0 = primary block, 1+ = additional blocks
159| | current_block_index: usize,
160| | /// Tags encountered so far for duplicate filtering (ifd_index, tag_code)
161| | encountered_tags: HashSet<(usize, u16)>,
162| | has_embedded_track: bool,
163| |}
164| |
165| |impl Debug for ExifIter {
166| 0| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
167| 0| f.debug_struct("ExifIter")
168| 0| .field("data len", &self.input.len())
169| 0| .field("tiff_header", &self.tiff_header)
170| 0| .field("ifd0", &self.ifd0)
171| 0| .field("state", &self.ifds.first().map(|x| (x.index, x.pos)))
172| 0| .field("ifds num", &self.ifds.len())
173| 0| .field("additional_blocks", &self.additional_blocks.len())
174| 0| .field("current_block_index", &self.current_block_index)
175| 0| .finish_non_exhaustive()
176| 0| }
177| |}
178| |
179| |impl Clone for ExifIter {
180| 1| fn clone(&self) -> Self {
181| 1| self.clone_rewound()
182| 1| }
183| |}
184| |
185| |impl ExifIter {
186| 107| pub(crate) fn new(
187| 107| input: bytes::Bytes,
188| 107| tiff_header: TiffHeader,
189| 107| tz: Option<String>,
190| 107| ifd0: IfdIter,
191| 107| ) -> ExifIter {
192| 107| let ifds = vec![ifd0.clone()];
193| 107| ExifIter {
194| 107| input,
195| 107| tiff_header,
196| 107| tz,
197| 107| ifd0,
198| 107| ifds,
199| 107| visited_offsets: HashSet::new(),
200| 107| additional_blocks: Vec::new(),
201| 107| current_block_index: 0,
202| 107| encountered_tags: HashSet::new(),
203| 107| has_embedded_track: false,
204| 107| }
205| 107| }
206| |
207| | /// Clone with iteration state reset to entry 0.
208| | ///
209| | /// Cheap: `ExifIter` shares its underlying `bytes::Bytes` via refcount.
210| 116| pub fn clone_rewound(&self) -> Self {
211| 116| let ifd0 = self.ifd0.clone_and_rewind();
212| 116| let ifds = vec![ifd0.clone()];
213| 116| Self {
214| 116| input: self.input.clone(),
215| 116| tiff_header: self.tiff_header.clone(),
216| 116| tz: self.tz.clone(),
217| 116| ifd0,
218| 116| ifds,
219| 116| visited_offsets: HashSet::new(),
220| 116| additional_blocks: self.additional_blocks.clone(),
221| 116| current_block_index: 0,
222| 116| encountered_tags: HashSet::new(),
223| 116| has_embedded_track: self.has_embedded_track,
224| 116| }
225| 116| }
226| |
227| | /// Reset iteration to the first entry (in-place). After this call,
228| | /// `next()` yields entries starting from IFD0 entry 0 again.
229| 1| pub fn rewind(&mut self) {
230| 1| let ifd0 = self.ifd0.clone_and_rewind();
231| 1| self.ifds = vec![ifd0.clone()];
232| 1| self.ifd0 = ifd0;
233| 1| self.visited_offsets.clear();
234| 1| self.current_block_index = 0;
235| 1| self.encountered_tags.clear();
236| 1| }
237| |
238| | /// Try to find and parse GPS information.
239| | ///
240| | /// Calling this method won't affect the iterator's state.
241| | ///
242| | /// Returns:
243| | ///
244| | /// - An `Ok<Some<GPSInfo>>` if gps info is found and parsed successfully.
245| | /// - An `Ok<None>` if gps info is not found.
246| | /// - An `Err` if gps info is found but parsing failed.
247| | #[tracing::instrument(skip_all)]
248| 96| pub fn parse_gps(&self) -> crate::Result<Option<GPSInfo>> {
249| 96| let mut iter = self.clone_rewound();
250| 3.57k| let Some(gps) = iter.find(|x| {
^79 ^96 ^96
251| 3.57k| tracing::info!(?x, "find");
252| 3.57k| x.tag().tag().is_some_and(|t| t == ExifTag::GPSInfo)
^3.50k^3.50k
253| 3.57k| }) else {
254| 17| tracing::warn!(ifd0 = ?iter.ifds.first(), "GPSInfo not found");
255| 17| return Ok(None);
256| | };
257| |
258| 79| let offset = match gps.result() {
259| 79| Ok(v) => {
260| 79| if let Some(offset) = v.as_u32() {
261| 79| offset
262| | } else {
263| 0| return Err(EntryError::InvalidValue("invalid gps offset").into());
264| | }
265| | }
266| 0| Err(e) => return Err(e.clone().into()),
267| | };
268| 79| if offset as usize >= iter.input.len() {
269| 0| return Err(crate::Error::Malformed {
270| 0| kind: crate::error::MalformedKind::IfdEntry,
271| 0| message: "GPSInfo offset out of range".into(),
272| 0| });
273| 79| }
274| |
275| 79| let mut gps_subifd = match IfdIter::try_new(
^71
276| 79| gps.ifd().as_usize(),
277| 79| iter.input.clone(),
278| 79| iter.tiff_header,
279| 79| offset as usize,
280| 79| iter.tz.clone(),
281| 79| ) {
282| 71| Ok(ifd0) => ifd0.tag_code(ExifTag::GPSInfo.code()),
283| 8| Err(e) => return Err(e),
284| | };
285| 71| Ok(gps_subifd.parse_gps_info())
286| 96| }
287| |
288| | /// Add an additional TIFF data block to be iterated after the current block.
289| | /// Used internally for CR3 files with multiple CMT boxes.
290| | ///
291| | /// # Arguments
292| | /// * `block_id` - Identifier for this TIFF block (e.g., "CMT2", "CMT3")
293| | /// * `data` - Pre-sliced `Bytes` view containing this block's TIFF data
294| | /// * `header` - Optional TIFF header if already parsed
295| 1| pub(crate) fn add_tiff_block(
296| 1| &mut self,
297| 1| block_id: String,
298| 1| data: bytes::Bytes,
299| 1| header: Option<TiffHeader>,
300| 1| ) {
301| 1| self.additional_blocks.push(TiffDataBlock {
302| 1| block_id,
303| 1| data,
304| 1| header,
305| 1| });
306| 1| }
307| |
308| | /// Internal-only setter used by [`crate::MediaParser::parse_exif`] to
309| | /// stamp the iterator with content-detected embedded-track information.
310| 83| pub(crate) fn set_has_embedded_track(&mut self, v: bool) {
311| 83| self.has_embedded_track = v;
312| 83| }
313| |
314| | /// Whether the source file is known to embed a paired media track that
315| | /// `parse_exif` did *not* surface — a Pixel/Google or Samsung Galaxy
316| | /// Motion Photo (JPEG with `GCamera:MotionPhoto` XMP and an MP4
317| | /// trailer). Use [`crate::MediaParser::parse_track`] on the same
318| | /// source to extract the embedded track.
319| | ///
320| | /// **Content-detected, not MIME-guessed**: returns `true` only when
321| | /// the parser observes concrete signals during `parse_exif`
322| | /// (`GCamera:MotionPhoto="1"` plus a `Container:Directory` /
323| | /// `MotionPhotoOffset` / `MicroVideoOffset`). A plain JPEG or HEIC
324| | /// without such signals returns `false`.
325| | ///
326| | /// **Coverage**: Pixel/Google Motion Photos and Samsung Galaxy
327| | /// Motion Photos that use the Adobe XMP Container directory format
328| | /// (JPEG variants).
329| 82| pub fn has_embedded_track(&self) -> bool {
330| 82| self.has_embedded_track
331| 82| }
332| |
333| | /// Deprecated alias for [`Self::has_embedded_track`].
334| | #[deprecated(
335| | since = "3.1.0",
336| | note = "renamed to `has_embedded_track`; the original `has_embedded_media` was too vague and lumped in still-image previews"
337| | )]
338| 1| pub fn has_embedded_media(&self) -> bool {
339| 1| self.has_embedded_track()
340| 1| }
341| |}
342| |
343| |/// Lazy yield from [`ExifIter`]. Carries a *value xor error* invariant —
344| |/// every entry holds exactly one of [`Self::value`] or [`Self::error`].
345| |///
346| |/// # Why private fields?
347| |///
348| |/// Public fields would let callers construct nonsense like `value=Some,
349| |/// error=Some`. Private fields + getters preserve the invariant while
350| |/// exposing the natural API: [`Self::result`] for borrowed access,
351| |/// [`Self::into_result`] for ownership transfer (consumes `self`, no panic
352| |/// path).
353| |#[derive(Clone)]
354| |pub struct ExifIterEntry {
355| | ifd: IfdIndex,
356| | tag: TagOrCode,
357| | res: Result<EntryValue, crate::error::EntryError>,
358| |}
359| |
360| |impl ExifIterEntry {
361| | /// IFD this entry was found in (`IfdIndex::MAIN` for the primary image).
362| 4.67k| pub fn ifd(&self) -> IfdIndex {
363| 4.67k| self.ifd
364| 4.67k| }
365| |
366| | /// Recognized tag, or raw `u16` code if not in [`ExifTag`].
367| 8.30k| pub fn tag(&self) -> TagOrCode {
368| 8.30k| self.tag
369| 8.30k| }
370| |
371| | /// Borrow the value. `None` iff this entry hit a parse error.
372| 198| pub fn value(&self) -> Option<&EntryValue> {
373| 198| self.res.as_ref().ok()
374| 198| }
375| |
376| | /// Borrow the error. `None` iff this entry parsed successfully.
377| 66| pub fn error(&self) -> Option<&crate::error::EntryError> {
378| 66| self.res.as_ref().err()
379| 66| }
380| |
381| | /// Borrow either value or error, mirroring the underlying invariant.
382| 343| pub fn result(&self) -> Result<&EntryValue, &crate::error::EntryError> {
383| 343| self.res.as_ref()
384| 343| }
385| |
386| | /// Consume self and return the value or error. No second-call panic
387| | /// path (the entry is moved out).
388| 4.46k| pub fn into_result(self) -> Result<EntryValue, crate::error::EntryError> {
389| 4.46k| self.res
390| 4.46k| }
391| |
392| 8.83k| pub(crate) fn make_ok(ifd: usize, tag: TagOrCode, v: EntryValue) -> Self {
393| 8.83k| Self {
394| 8.83k| ifd: IfdIndex::new(ifd),
395| 8.83k| tag,
396| 8.83k| res: Ok(v),
397| 8.83k| }
398| 8.83k| }
399| |}
400| |
401| |impl std::fmt::Debug for ExifIterEntry {
402| 2.79k| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
403| 2.79k| let value = match &self.res {
404| 2.79k| Ok(v) => format!("{v}"),
405| 0| Err(e) => format!("{e:?}"),
406| | };
407| 2.79k| f.debug_struct("ExifIterEntry")
408| 2.79k| .field("ifd", &self.ifd)
409| 2.79k| .field("tag", &self.tag)
410| 2.79k| .field("value", &value)
411| 2.79k| .finish()
412| 2.79k| }
413| |}
414| |
415| |const MAX_IFD_DEPTH: usize = 8;
416| |
417| |impl ExifIter {
418| | /// Attempt to load and start iterating the next additional TIFF block.
419| | /// Returns true if a new block was successfully loaded, false if no more blocks.
420| 112| fn load_next_block(&mut self) -> bool {
421| | // Move to the next additional block
422| 112| let block_index = self.current_block_index;
423| 112| if block_index >= self.additional_blocks.len() {
424| 110| return false;
425| 2| }
426| |
427| 2| let block = &self.additional_blocks[block_index];
428| 2| tracing::debug!(
429| | block_id = block.block_id,
430| | block_index,
431| | "Loading additional TIFF block"
432| | );
433| |
434| | // Get the data for this block from the shared input
435| 2| let block_data = block.data.clone();
436| 2| let header = block.header.clone();
437| |
438| | // Try to create an ExifIter for this block
439| 2| match input_into_iter(block_data, header) {
440| 2| Ok(iter) => {
441| | // Update our state with the new block's data
442| 2| self.ifd0 = iter.ifd0;
443| 2| self.ifds = vec![self.ifd0.clone()];
444| 2| self.visited_offsets.clear();
445| 2| self.current_block_index += 1;
446| |
447| 2| tracing::debug!(block_index, "Successfully loaded additional TIFF block");
448| 2| true
449| | }
450| 0| Err(e) => {
451| 0| tracing::warn!(
452| | block_index,
453| | error = %e,
454| | "Failed to load additional TIFF block, skipping"
455| | );
456| | // Move to next block and try again
457| 0| self.current_block_index += 1;
458| 0| self.load_next_block()
459| | }
460| | }
461| 112| }
462| |
463| | /// Check if a tag should be included based on duplicate filtering.
464| | /// Returns true if the tag should be included, false if it's a duplicate.
465| 8.83k| fn should_include_tag(&mut self, ifd_index: usize, tag_code: u16) -> bool {
466| 8.83k| let tag_key = (ifd_index, tag_code);
467| 8.83k| if self.encountered_tags.contains(&tag_key) {
468| 0| tracing::debug!(ifd_index, tag_code, "Skipping duplicate tag");
469| 0| false
470| | } else {
471| 8.83k| self.encountered_tags.insert(tag_key);
472| 8.83k| true
473| | }
474| 8.83k| }
475| |}
476| |
477| |impl Iterator for ExifIter {
478| | type Item = ExifIterEntry;
479| |
480| | #[tracing::instrument(skip_all)]
481| 8.94k| fn next(&mut self) -> Option<Self::Item> {
482| | loop {
483| 9.35k| if self.ifds.is_empty() {
484| | // Current block exhausted, try to load next additional block
485| 112| if !self.load_next_block() {
486| 110| tracing::debug!(?self, "all IFDs and blocks have been parsed");
487| 110| return None;
488| 2| }
489| | // Continue with the newly loaded block
490| 2| continue;
491| 9.24k| }
492| |
493| 9.24k| if self.ifds.len() > MAX_IFD_DEPTH {
494| 0| let depth = self.ifds.len();
495| 0| self.ifds.clear();
496| 0| tracing::error!(
497| | ifds_depth = depth,
498| | "ifd depth is too deep, just go back to ifd0"
499| | );
500| 0| self.ifds.push(self.ifd0.clone_with_state());
501| 9.24k| }
502| |
503| 9.24k| let mut ifd = self.ifds.pop()?;
^0
504| 9.24k| let cur_ifd_idx = ifd.ifd_idx;
505| 9.24k| match ifd.next() {
506| 8.90k| Some((tag_code, entry)) => {
507| 8.90k| tracing::debug!(ifd = ifd.ifd_idx, ?tag_code, "next tag entry");
508| |
509| 8.90k| match entry {
510| 366| IfdEntry::IfdNew(new_ifd) => {
511| 366| if new_ifd.offset > 0 {
512| 366| if self.visited_offsets.contains(&new_ifd.offset) {
513| | // Ignore repeated ifd parsing to avoid dead looping
514| 0| continue;
515| 366| }
516| 366| self.visited_offsets.insert(new_ifd.offset);
517| 0| }
518| |
519| 366| let is_subifd = if new_ifd.ifd_idx == ifd.ifd_idx {
520| | // Push the current ifd before enter sub-ifd.
521| 300| self.ifds.push(ifd);
522| 300| tracing::debug!(?tag_code, ?new_ifd, "got new SUB-IFD");
523| 300| true
524| | } else {
525| | // Otherwise this is a next ifd. It means that the
526| | // current ifd has been parsed, so we don't need to
527| | // push it.
528| 66| tracing::debug!("IFD{} parsing completed", cur_ifd_idx);
529| 66| tracing::debug!(?new_ifd, "got new IFD");
530| 66| false
531| | };
532| |
533| 366| let (ifd_idx, offset) = (new_ifd.ifd_idx, new_ifd.offset);
534| 366| self.ifds.push(new_ifd);
535| |
536| 366| if is_subifd {
537| | // Check for duplicates before returning sub-ifd entry
538| 300| let tc = tag_code.unwrap();
539| 300| if !self.should_include_tag(ifd_idx, tc.code()) {
540| 0| continue;
541| 300| }
542| | // Return sub-ifd as an entry
543| 300| return Some(ExifIterEntry::make_ok(
544| 300| ifd_idx,
545| 300| tc,
546| 300| EntryValue::U32(offset as u32),
547| 300| ));
548| 66| }
549| | }
550| 8.53k| IfdEntry::Entry(v) => {
551| 8.53k| let tc = tag_code.unwrap();
552| | // Check for duplicates before returning entry
553| 8.53k| if !self.should_include_tag(ifd.ifd_idx, tc.code()) {
554| 0| self.ifds.push(ifd);
555| 0| continue;
556| 8.53k| }
557| 8.53k| let res = Some(ExifIterEntry::make_ok(ifd.ifd_idx, tc, v));
558| 8.53k| self.ifds.push(ifd);
559| 8.53k| return res;
560| | }
561| 0| IfdEntry::Err(e) => {
562| 0| tracing::warn!(?tag_code, ?e, "parse ifd entry error");
563| 0| self.ifds.push(ifd);
564| 0| continue;
565| | }
566| | }
567| | }
568| 340| None => continue,
569| | }
570| | }
571| 8.94k| }
572| |}
573| |
574| |#[derive(Clone)]
575| |pub(crate) struct IfdIter {
576| | ifd_idx: usize,
577| | tag_code: Option<TagOrCode>,
578| |
579| | // starts from TIFF header
580| | input: Bytes,
581| |
582| | // ifd data offset
583| | offset: usize,
584| |
585| | header: TiffHeader,
586| | entry_num: u16,
587| |
588| | pub tz: Option<String>,
589| |
590| | // Iterating status
591| | index: u16,
592| | pos: usize,
593| |}
594| |
595| |impl Debug for IfdIter {
596| 0| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
597| 0| f.debug_struct("IfdIter")
598| 0| .field("ifd_idx", &self.ifd_idx)
599| 0| .field("tag", &self.tag_code)
600| 0| .field("data len", &self.input.len())
601| 0| .field("tz", &self.tz)
602| 0| .field("header", &self.header)
603| 0| .field("entry_num", &self.entry_num)
604| 0| .field("index", &self.index)
605| 0| .field("pos", &self.pos)
606| 0| .finish()
607| 0| }
608| |}
609| |
610| |impl IfdIter {
611| 117| pub fn rewind(&mut self) {
612| 117| self.index = 0;
613| | // Skip the first two bytes, which is the entry num
614| 117| self.pos = self.offset + 2;
615| 117| }
616| |
617| 117| pub fn clone_and_rewind(&self) -> Self {
618| 117| let mut it = self.clone();
619| 117| it.rewind();
620| 117| it
621| 117| }
622| |
623| 462| pub fn tag_code_maybe(mut self, code: Option<u16>) -> Self {
624| 462| self.tag_code = code.map(|x| x.into());
^396^396
625| 462| self
626| 462| }
627| |
628| 71| pub fn tag_code(mut self, code: u16) -> Self {
629| 71| self.tag_code = Some(code.into());
630| 71| self
631| 71| }
632| |
633| 2| fn is_gps_subifd(&self) -> bool {
634| 0| matches!(
635| 2| self.tag_code.as_ref().and_then(|t| t.tag()),
636| | Some(ExifTag::GPSInfo)
637| | )
638| 2| }
639| |
640| | #[allow(unused)]
641| 0| pub fn tag(mut self, tag: TagOrCode) -> Self {
642| 0| self.tag_code = Some(tag);
643| 0| self
644| 0| }
645| |
646| | #[tracing::instrument(skip(input))]
647| 666| pub fn try_new(
648| 666| ifd_idx: usize,
649| 666| input: Bytes,
650| 666| header: TiffHeader,
651| 666| offset: usize,
652| 666| tz: Option<String>,
653| 666| ) -> crate::Result<Self> {
654| 666| if input.len() < 2 {
655| 0| return Err(crate::Error::Malformed {
656| 0| kind: crate::error::MalformedKind::TiffHeader,
657| 0| message: "ifd data too small to decode entry num".into(),
658| 0| });
659| 666| }
660| | // should use the complete header data to parse ifd entry num
661| 666| assert!(offset <= input.len());
662| 666| let ifd_data = input.slice(offset..);
663| 640| let (_, entry_num) =
664| 666| TiffHeader::parse_ifd_entry_num(&ifd_data, header.endian).map_err(|e| {
^26
665| 26| crate::error::nom_err_to_malformed(e, crate::error::MalformedKind::TiffHeader)
666| 26| })?;
667| |
668| 640| Ok(Self {
669| 640| ifd_idx,
670| 640| tag_code: None,
671| 640| input,
672| 640| offset,
673| 640| header,
674| 640| entry_num,
675| 640| tz,
676| 640| // Skip the first two bytes, which is the entry num
677| 640| pos: offset + 2,
678| 640| index: 0,
679| 640| })
680| 666| }
681| |
682| 10.7k| fn parse_tag_entry(&self, entry_data: &[u8]) -> Option<(u16, IfdEntry)> {
683| 10.7k| let endian = self.header.endian;
684| 10.7k| let (_, (tag, data_format, components_num, value_or_offset)) = (
685| 10.7k| complete::u16::<_, nom::error::Error<_>>(endian),
686| 10.7k| complete::u16(endian),
687| 10.7k| complete::u32(endian),
688| 10.7k| complete::u32(endian),
689| 10.7k| )
690| 10.7k| .parse(entry_data)
691| 10.7k| .ok()?;
^0
692| |
693| | // Tag 0 outside the GPS sub-IFD is treated as a sentinel for
694| | // zero-padded malformed IFDs (overstated `entry_num`) and aborts
695| | // iteration. Inside the GPS sub-IFD it is the legitimate
696| | // GPSVersionID — let it parse normally.
697| 10.7k| if tag == 0 && !self.is_gps_subifd() {
^2
698| 0| return None;
699| 10.7k| }
700| |
701| 10.7k| let df: DataFormat = match DataFormat::try_from(data_format) {
702| 10.7k| Ok(df) => df,
703| 0| Err(bad) => {
704| 0| let t: TagOrCode = tag.into();
705| 0| tracing::warn!(tag = ?t, format = bad, "invalid entry data format");
706| 0| return Some((
707| 0| tag,
708| 0| IfdEntry::Err(EntryError::InvalidShape {
709| 0| format: bad,
710| 0| count: components_num,
711| 0| }),
712| 0| ));
713| | }
714| | };
715| 10.7k| let (tag, res) = self.parse_entry(tag, df, components_num, entry_data, value_or_offset);
716| 10.7k| Some((tag, res))
717| 10.7k| }
718| |
719| 4.96k| fn get_data_pos(&self, value_or_offset: u32) -> usize {
720| | // value_or_offset.saturating_sub(self.offset)
721| 4.96k| value_or_offset as usize
722| 4.96k| }
723| |
724| 10.7k| fn parse_entry(
725| 10.7k| &self,
726| 10.7k| tag: u16,
727| 10.7k| data_format: DataFormat,
728| 10.7k| components_num: u32,
729| 10.7k| entry_data: &[u8],
730| 10.7k| value_or_offset: u32,
731| 10.7k| ) -> (u16, IfdEntry) {
732| | // get component_size according to data format
733| 10.7k| let component_size = data_format.component_size();
734| |
735| | // get entry data
736| 10.7k| let size = components_num as usize * component_size;
737| 10.7k| let data = if size <= 4 {
738| 6.26k| &entry_data[8..8 + size] // Safe-slice
739| | } else {
740| 4.48k| let start = self.get_data_pos(value_or_offset);
741| 4.48k| let end = start + size;
742| 4.48k| let Some(data) = self.input.slice_checked(start..end) else {
743| 0| tracing::warn!(
744| | "entry data overflow, tag: {:04x} start: {:08x} end: {:08x} ifd data len {:08x}",
745| | tag,
746| | start,
747| | end,
748| 0| self.input.len(),
749| | );
750| 0| return (
751| 0| tag,
752| 0| IfdEntry::Err(EntryError::Truncated {
753| 0| needed: size,
754| 0| available: self.input.len().saturating_sub(start),
755| 0| }),
756| 0| );
757| | };
758| |
759| 4.48k| data
760| | };
761| |
762| 10.7k| if SUBIFD_TAGS.contains(&tag) {
763| 409| if let Some(value) = self.new_ifd_iter(self.ifd_idx, value_or_offset, Some(tag)) {
^396
764| 396| return (tag, value);
765| 13| }
766| 10.3k| }
767| |
768| 10.3k| let entry = EntryData {
769| 10.3k| endian: self.header.endian,
770| 10.3k| tag,
771| 10.3k| data,
772| 10.3k| data_format,
773| 10.3k| components_num,
774| 10.3k| };
775| 10.3k| match EntryValue::parse(&entry, &self.tz) {
776| 10.3k| Ok(v) => (tag, IfdEntry::Entry(v)),
777| 0| Err(e) => (tag, IfdEntry::Err(e)),
778| | }
779| 10.7k| }
780| |
781| 480| fn new_ifd_iter(
782| 480| &self,
783| 480| ifd_idx: usize,
784| 480| value_or_offset: u32,
785| 480| tag: Option<u16>,
786| 480| ) -> Option<IfdEntry> {
787| 480| let offset = self.get_data_pos(value_or_offset);
788| 480| if offset < self.input.len() {
789| 480| match IfdIter::try_new(
790| 480| ifd_idx,
791| 480| self.input.clone(),
792| 480| self.header.to_owned(),
793| 480| offset,
794| 480| self.tz.clone(),
795| 480| ) {
796| 462| Ok(iter) => return Some(IfdEntry::IfdNew(iter.tag_code_maybe(tag))),
797| 18| Err(e) => {
798| 18| tracing::warn!(?tag, ?e, "Create next/sub IFD failed");
799| | }
800| | }
801| | // return (
802| | // tag,
803| | // // IfdEntry::Ifd {
804| | // // idx: self.ifd_idx,
805| | // // offset: value_or_offset,
806| | // // },
807| | // IfdEntry::IfdNew(),
808| | // );
809| 0| }
810| 18| None
811| 480| }
812| |
813| 107| pub fn find_exif_iter(&self) -> Option<IfdIter> {
814| 107| let endian = self.header.endian;
815| | // find ExifOffset
816| 834| for i in 0..self.entry_num {
^107
817| 834| let pos = self.pos + i as usize * IFD_ENTRY_SIZE;
818| 834| let (_, tag) =
819| 834| complete::u16::<_, nom::error::Error<_>>(endian)(&self.input[pos..]).ok()?;
^0
820| 834| if tag == ExifTag::ExifOffset.code() {
821| 96| let entry_data = self.input.slice_checked(pos..pos + IFD_ENTRY_SIZE)?;
^0
822| 96| let (_, entry) = self.parse_tag_entry(entry_data)?;
^0
823| 96| match entry {
824| 96| IfdEntry::IfdNew(iter) => return Some(iter),
825| 0| IfdEntry::Entry(_) | IfdEntry::Err(_) => return None,
826| | }
827| 738| }
828| | }
829| 11| None
830| 107| }
831| |
832| 107| pub fn find_tz_offset(&self) -> Option<String> {
833| 107| let iter = self.find_exif_iter()?;
^96 ^11
834| 96| let mut offset = None;
835| 1.11k| for entry in iter {
^96
836| 1.11k| let Some(tag) = entry.0 else {
837| 0| continue;
838| | };
839| 1.11k| if tag.code() == ExifTag::OffsetTimeOriginal.code()
840| 1.02k| || tag.code() == ExifTag::OffsetTimeDigitized.code()
841| | {
842| 89| return entry.1.as_str().map(|x| x.to_owned());
843| 1.02k| } else if tag.code() == ExifTag::OffsetTime.code() {
844| 13| offset = entry.1.as_str().map(|x| x.to_owned());
845| 1.01k| }
846| | }
847| |
848| 7| offset
849| 107| }
850| |
851| | // Assume the current ifd is GPSInfo subifd.
852| 71| pub fn parse_gps_info(&mut self) -> Option<GPSInfo> {
853| | use crate::exif::gps::{Altitude, LatRef, LonRef, Speed, SpeedUnit};
854| |
855| 71| let mut latitude_ref = None;
856| 71| let mut latitude = None;
857| 71| let mut longitude_ref = None;
858| 71| let mut longitude = None;
859| 71| let mut altitude_ref = None;
860| 71| let mut altitude_value = None;
861| 71| let mut speed_unit = None;
862| 71| let mut speed_value = None;
863| 71| let mut has_data = false;
864| |
865| 699| for (tag, entry) in self {
^71
866| 699| let Some(tag) = tag.and_then(|x| x.tag()) else {
^683
867| 16| continue;
868| | };
869| 683| has_data = true;
870| 683| match tag {
871| 71| ExifTag::GPSLatitudeRef => {
872| 71| latitude_ref = entry.as_char().and_then(LatRef::from_char);
873| 71| }
874| 71| ExifTag::GPSLongitudeRef => {
875| 71| longitude_ref = entry.as_char().and_then(LonRef::from_char);
876| 71| }
877| 70| ExifTag::GPSAltitudeRef => {
878| 70| altitude_ref = entry.as_u8();
879| 70| }
880| | ExifTag::GPSLatitude => {
881| 71| if let Some(v) = entry.as_urational_slice() {
^17
882| 17| latitude = LatLng::try_from(v).ok();
883| 54| } else if let Some(v) = entry.as_irational_slice() {
884| 54| latitude = LatLng::try_from(v).ok();
885| 54| }
^0
886| | }
887| | ExifTag::GPSLongitude => {
888| 71| if let Some(v) = entry.as_urational_slice() {
^17
889| 17| longitude = LatLng::try_from(v).ok();
890| 54| } else if let Some(v) = entry.as_irational_slice() {
891| 54| longitude = LatLng::try_from(v).ok();
892| 54| }
^0
893| | }
894| | ExifTag::GPSAltitude => {
895| 70| if let Some(v) = entry.as_urational() {
896| 70| altitude_value = Some(*v);
897| 70| } else if let Some(v) = entry.as_irational() {
^0 ^0
898| 0| if let Ok(u) = URational::try_from(*v) {
899| 0| altitude_value = Some(u);
900| 0| }
901| 0| }
902| | }
903| 16| ExifTag::GPSSpeedRef => {
904| 16| speed_unit = entry.as_char().and_then(SpeedUnit::from_char);
905| 16| }
906| | ExifTag::GPSSpeed => {
907| 16| if let Some(v) = entry.as_urational() {
908| 16| speed_value = Some(*v);
909| 16| } else if let Some(v) = entry.as_irational() {
^0 ^0
910| 0| if let Ok(u) = URational::try_from(*v) {
911| 0| speed_value = Some(u);
912| 0| }
913| 0| }
914| | }
915| 227| _ => (),
916| | }
917| | }
918| |
919| 71| if !has_data {
920| 0| tracing::warn!("GPSInfo data not found");
921| 0| return None;
922| 71| }
923| |
924| 71| let altitude = match (altitude_ref, altitude_value) {
925| 70| (Some(0), Some(v)) => Altitude::AboveSeaLevel(v),
926| 0| (Some(1), Some(v)) => Altitude::BelowSeaLevel(v),
927| 1| _ => Altitude::Unknown,
928| | };
929| |
930| 71| let speed = match (speed_unit, speed_value) {
931| 16| (Some(unit), Some(value)) => Some(Speed { unit, value }),
932| 55| _ => None,
933| | };
934| |
935| 71| Some(GPSInfo {
936| 71| latitude_ref: latitude_ref.unwrap_or(LatRef::North),
937| 71| latitude: latitude.unwrap_or_default(),
938| 71| longitude_ref: longitude_ref.unwrap_or(LonRef::East),
939| 71| longitude: longitude.unwrap_or_default(),
940| 71| altitude,
941| 71| speed,
942| 71| })
943| 71| }
944| |
945| 0| fn clone_with_state(&self) -> IfdIter {
946| 0| let mut it = self.clone();
947| 0| it.index = self.index;
948| 0| it.pos = self.pos;
949| 0| it
950| 0| }
951| |}
952| |
953| |#[derive(Debug)]
954| |pub(crate) enum IfdEntry {
955| | IfdNew(IfdIter), // ifd index
956| | Entry(EntryValue),
957| | Err(EntryError),
958| |}
959| |
960| |impl IfdEntry {
961| 70| pub fn as_u8(&self) -> Option<u8> {
962| 70| if let IfdEntry::Entry(EntryValue::U8(v)) = self {
963| 70| Some(*v)
964| | } else {
965| 0| None
966| | }
967| 70| }
968| |
969| 158| pub fn as_char(&self) -> Option<char> {
970| 158| if let IfdEntry::Entry(EntryValue::Text(s)) = self {
971| 158| s.chars().next()
972| | } else {
973| 0| None
974| | }
975| 158| }
976| |
977| 0| fn as_irational(&self) -> Option<&IRational> {
978| 0| if let IfdEntry::Entry(EntryValue::IRational(v)) = self {
979| 0| Some(v)
980| | } else {
981| 0| None
982| | }
983| 0| }
984| |
985| 108| fn as_irational_slice(&self) -> Option<&Vec<IRational>> {
986| 108| if let IfdEntry::Entry(EntryValue::IRationalArray(v)) = self {
987| 108| Some(v)
988| | } else {
989| 0| None
990| | }
991| 108| }
992| |
993| 86| fn as_urational(&self) -> Option<&URational> {
994| 86| if let IfdEntry::Entry(EntryValue::URational(v)) = self {
995| 86| Some(v)
996| | } else {
997| 0| None
998| | }
999| 86| }
1000| |
1001| 142| fn as_urational_slice(&self) -> Option<&Vec<URational>> {
1002| 142| if let IfdEntry::Entry(EntryValue::URationalArray(v)) = self {
^34
1003| 34| Some(v)
1004| | } else {
1005| 108| None
1006| | }
1007| 142| }
1008| |
1009| 102| fn as_str(&self) -> Option<&str> {
1010| 102| if let IfdEntry::Entry(e) = self {
1011| 102| e.as_str()
1012| | } else {
1013| 0| None
1014| | }
1015| 102| }
1016| |}
1017| |
1018| |pub(crate) const SUBIFD_TAGS: &[u16] = &[ExifTag::ExifOffset.code(), ExifTag::GPSInfo.code()];
1019| |
1020| |impl Iterator for IfdIter {
1021| | type Item = (Option<TagOrCode>, IfdEntry);
1022| |
1023| | #[tracing::instrument(skip(self))]
1024| 11.1k| fn next(&mut self) -> Option<Self::Item> {
1025| 11.1k| tracing::debug!(
1026| | ifd = self.ifd_idx,
1027| | index = self.index,
1028| | entry_num = self.entry_num,
1029| 0| offset = format!("{:08x}", self.offset),
1030| 0| pos = format!("{:08x}", self.pos),
1031| | "next IFD entry"
1032| | );
1033| 11.1k| if self.input.len() < self.pos + IFD_ENTRY_SIZE {
1034| 12| return None;
1035| 11.1k| }
1036| |
1037| 11.1k| let endian = self.header.endian;
1038| 11.1k| if self.index > self.entry_num {
1039| 0| return None;
1040| 11.1k| }
1041| 11.1k| if self.index == self.entry_num {
1042| 472| tracing::debug!(
1043| | self.ifd_idx,
1044| | self.index,
1045| | pos = self.pos,
1046| | "try to get next ifd"
1047| | );
1048| 472| self.index += 1;
1049| |
1050| | // next IFD offset
1051| 472| let (_, offset) =
1052| 472| complete::u32::<_, nom::error::Error<_>>(endian)(&self.input[self.pos..]).ok()?;
^0
1053| |
1054| 472| if offset == 0 {
1055| | // IFD parsing completed
1056| 401| tracing::debug!(?self, "IFD parsing completed");
1057| 401| return None;
1058| 71| }
1059| |
1060| 71| return self
1061| 71| .new_ifd_iter(self.ifd_idx + 1, offset, None)
1062| 71| .map(|x| (None, x));
^66 ^66
1063| 10.6k| }
1064| |
1065| 10.6k| let entry_data = self
1066| 10.6k| .input
1067| 10.6k| .slice_checked(self.pos..self.pos + IFD_ENTRY_SIZE)?;
^0
1068| 10.6k| self.index += 1;
1069| 10.6k| self.pos += IFD_ENTRY_SIZE;
1070| |
1071| 10.6k| let (tag, res) = self.parse_tag_entry(entry_data)?;
^0
1072| |
1073| 10.6k| Some((Some(tag.into()), res)) // Safe-slice
1074| 11.1k| }
1075| |}
1076| |
1077| |#[cfg(test)]
1078| |mod tests {
1079| |
1080| | use crate::exif::extract_exif_with_mime;
1081| | use crate::exif::input_into_iter;
1082| | use crate::file::MediaMimeImage;
1083| | use crate::slice::SubsliceRange;
1084| | use crate::testkit::read_sample;
1085| | use crate::Exif;
1086| | use test_case::test_case;
1087| |
1088| | #[test_case(
1089| | "exif.jpg",
1090| | "+08:00",
1091| | "2023-07-09T20:36:33+08:00",
1092| | MediaMimeImage::Jpeg
1093| | )]
1094| | #[test_case("exif-no-tz.jpg", "", "2023-07-09 20:36:33", MediaMimeImage::Jpeg)]
1095| | #[test_case("broken.jpg", "-", "2014-09-21 15:51:22", MediaMimeImage::Jpeg)]
1096| | #[test_case(
1097| | "exif.heic",
1098| | "+08:00",
1099| | "2022-07-22T21:26:32+08:00",
1100| | MediaMimeImage::Heic
1101| | )]
1102| | #[test_case(
1103| | "exif.avif",
1104| | "+08:00",
1105| | "2022-07-22T21:26:32+08:00",
1106| | MediaMimeImage::Avif
1107| | )]
1108| | #[test_case("tif.tif", "-", "-", MediaMimeImage::Tiff)]
1109| | #[test_case(
1110| | "fujifilm_x_t1_01.raf.meta",
1111| | "-",
1112| | "2014-01-30 12:49:13",
1113| | MediaMimeImage::Raf
1114| | )]
1115| 7| fn exif_iter_tz(path: &str, tz: &str, time: &str, img_type: MediaMimeImage) {
1116| 7| let buf = read_sample(path).unwrap();
1117| 7| let (data, _) = extract_exif_with_mime(img_type, &buf, None).unwrap();
1118| 7| let range = data.and_then(|x| buf.subslice_in_range(x)).unwrap();
1119| 7| let iter = input_into_iter(bytes::Bytes::from(buf).slice(range), None).unwrap();
1120| 7| let expect = if tz == "-" {
1121| 3| None
1122| | } else {
1123| 4| Some(tz.to_string())
1124| | };
1125| 7| assert_eq!(iter.tz, expect);
1126| 7| let exif: Exif = iter.into();
1127| 7| let value = exif.get(crate::ExifTag::DateTimeOriginal);
1128| 7| if time == "-" {
1129| 1| assert!(value.is_none());
1130| | } else {
1131| 6| let value = value.unwrap();
1132| 6| assert_eq!(value.to_string(), time);
1133| | }
1134| 7| }
1135| |
1136| | #[test]
1137| 1| fn ifd_index_constants() {
1138| | use crate::IfdIndex;
1139| 1| assert_eq!(IfdIndex::MAIN.as_usize(), 0);
1140| 1| assert_eq!(IfdIndex::THUMBNAIL.as_usize(), 1);
1141| 1| }
1142| |
1143| | #[test]
1144| 1| fn ifd_index_roundtrip_via_new_and_as_usize() {
1145| | use crate::IfdIndex;
1146| 6| for raw in [0, 1, 2, 3, 7, 99] {
^1
1147| 6| assert_eq!(IfdIndex::new(raw).as_usize(), raw);
1148| | }
1149| 1| }
1150| |
1151| | #[test]
1152| 1| fn ifd_index_equality_and_hash() {
1153| | use crate::IfdIndex;
1154| | use std::collections::HashSet;
1155| 1| let mut set: HashSet<IfdIndex> = HashSet::new();
1156| 1| set.insert(IfdIndex::MAIN);
1157| 1| set.insert(IfdIndex::new(0)); // duplicate
1158| 1| set.insert(IfdIndex::THUMBNAIL);
1159| 1| assert_eq!(set.len(), 2);
1160| 1| }
1161| |
1162| | #[test]
1163| 1| fn ifd_index_display_format() {
1164| | use crate::IfdIndex;
1165| 1| assert_eq!(format!("{}", IfdIndex::MAIN), "ifd0");
1166| 1| assert_eq!(format!("{}", IfdIndex::new(7)), "ifd7");
1167| 1| }
1168| |
1169| | #[test]
1170| 1| fn tag_or_code_for_known_tag_resolves_to_tag_variant() {
1171| | use crate::{ExifTag, TagOrCode};
1172| 1| let t: TagOrCode = ExifTag::Make.code().into();
1173| 1| assert_eq!(t, TagOrCode::Tag(ExifTag::Make));
1174| 1| assert_eq!(t.code(), ExifTag::Make.code());
1175| 1| }
1176| |
1177| | #[test]
1178| 1| fn tag_or_code_for_unknown_tag_resolves_to_unknown_variant() {
1179| | use crate::TagOrCode;
1180| 1| let t: TagOrCode = 0xffff_u16.into();
1181| 1| assert_eq!(t, TagOrCode::Unknown(0xffff));
1182| 1| assert_eq!(t.code(), 0xffff);
1183| 1| }
1184| |
1185| | #[test]
1186| 1| fn exif_entry_pub_fields_construct_and_destructure() {
1187| | use crate::{EntryValue, ExifEntry, ExifTag, IfdIndex, TagOrCode};
1188| 1| let val = EntryValue::Text("vivo X90 Pro+".into());
1189| 1| let e = ExifEntry {
1190| 1| ifd: IfdIndex::MAIN,
1191| 1| tag: TagOrCode::Tag(ExifTag::Model),
1192| 1| value: &val,
1193| 1| };
1194| | // Pub fields: just match.
1195| 1| let ExifEntry { ifd, tag, value } = e;
1196| 1| assert_eq!(ifd, IfdIndex::MAIN);
1197| 1| assert_eq!(tag.code(), ExifTag::Model.code());
1198| 1| assert!(matches!(value, EntryValue::Text(_)));
^0
1199| | // Copy works because EntryValue is borrowed.
1200| 1| let _e2 = e;
1201| 1| let _e3 = e;
1202| 1| }
1203| |
1204| | #[test]
1205| 1| fn exif_iter_entry_value_xor_error_invariant() {
1206| | use crate::{MediaParser, MediaSource};
1207| 1| let mut parser = MediaParser::new();
1208| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1209| 66| for entry in parser.parse_exif(ms).unwrap() {
^1 ^1 ^1 ^1
1210| | // Exactly one of value / error is Some.
1211| 66| let has_v = entry.value().is_some();
1212| 66| let has_e = entry.error().is_some();
1213| 66| assert!(has_v ^ has_e, "entry must be value xor error");
1214| | // result() agrees with value()/error().
1215| 66| match entry.result() {
1216| 66| Ok(v) => assert_eq!(Some(v), entry.value()),
1217| 0| Err(e) => assert_eq!(Some(e), entry.error()),
1218| | }
1219| | }
1220| 1| }
1221| |
1222| | #[test]
1223| 1| fn exif_iter_entry_into_result_consumes_self() {
1224| | use crate::{MediaParser, MediaSource};
1225| 1| let mut parser = MediaParser::new();
1226| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1227| 1| let mut count_ok = 0usize;
1228| 66| for entry in parser.parse_exif(ms).unwrap() {
^1 ^1 ^1 ^1
1229| | // into_result consumes; once consumed, we can't call any other
1230| | // method (the entry is gone). This is the spec's panic-free
1231| | // replacement for v2's take_result.
1232| 66| if entry.into_result().is_ok() {
1233| 66| count_ok += 1;
1234| 66| }
^0
1235| | }
1236| 1| assert!(count_ok > 0);
1237| 1| }
1238| |
1239| | #[test]
1240| 1| fn exif_iter_entry_tag_returns_tag_or_code() {
1241| | use crate::{ExifTag, MediaParser, MediaSource, TagOrCode};
1242| 1| let mut parser = MediaParser::new();
1243| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1244| 1| let make_present = parser
1245| 1| .parse_exif(ms)
1246| 1| .unwrap()
1247| 58| .any(|e| matches!(e.tag(), TagOrCode::Tag(ExifTag::Make)));
^1
1248| 1| assert!(make_present);
1249| 1| }
1250| |
1251| | #[test]
1252| 1| fn exif_iter_rewind_resets_iteration_state() {
1253| | use crate::{MediaParser, MediaSource};
1254| 1| let mut parser = MediaParser::new();
1255| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1256| 1| let mut iter = parser.parse_exif(ms).unwrap();
1257| 1| let first_count = iter.by_ref().count();
1258| 1| assert!(first_count > 0);
1259| | // Already exhausted.
1260| 1| assert_eq!(iter.by_ref().count(), 0);
1261| 1| iter.rewind();
1262| 1| let after_rewind = iter.count();
1263| 1| assert_eq!(first_count, after_rewind);
1264| 1| }
1265| |
1266| | #[test]
1267| 1| fn exif_iter_clone_rewound_yields_independent_full_iter() {
1268| | use crate::{MediaParser, MediaSource};
1269| 1| let mut parser = MediaParser::new();
1270| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1271| 1| let mut iter = parser.parse_exif(ms).unwrap();
1272| 1| let _consumed = iter.by_ref().take(2).count();
1273| 1| let cloned = iter.clone_rewound();
1274| | // cloned starts from entry 0 even though `iter` consumed 2 entries.
1275| 1| let cloned_total = cloned.count();
1276| 1| let remaining = iter.count();
1277| 1| assert!(cloned_total > remaining);
1278| 1| }
1279| |
1280| | #[test]
1281| 1| fn exif_iter_parse_gps_returns_option_no_iteration_advance() {
1282| | use crate::{MediaParser, MediaSource};
1283| 1| let mut parser = MediaParser::new();
1284| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1285| 1| let iter = parser.parse_exif(ms).unwrap();
1286| 1| let gps = iter.parse_gps().unwrap();
1287| 1| assert!(gps.is_some());
1288| | // parse_gps doesn't drive the outer iterator.
1289| 1| let count = iter.count();
1290| 1| assert!(count > 0);
1291| 1| }
1292| |
1293| | // Regression test for https://github.com/mindeng/nom-exif/issues/50:
1294| | // GPS sub-IFDs whose first entry is GPSVersionID (tag 0x0000), as emitted
1295| | // by Sony A7C2 HIF files. A previous defensive `tag == 0` short-circuit
1296| | // in `parse_tag_entry` aborted iteration on that entry and discarded the
1297| | // whole sub-IFD. This builds the minimal little-endian TIFF that triggers
1298| | // it: IFD0 → GPSInfo → GPS sub-IFD with GPSVersionID up front.
1299| | #[test]
1300| 1| fn gps_subifd_first_entry_is_gpsversion_id_issue_50() {
1301| | use crate::exif::exif_iter::input_into_iter;
1302| | #[rustfmt::skip]
1303| 1| let tiff: &[u8] = &[
1304| 1| // TIFF header: little-endian, IFD0 at 0x08
1305| 1| b'I', b'I', 0x2a, 0x00,
1306| 1| 0x08, 0x00, 0x00, 0x00,
1307| 1|
1308| 1| // IFD0 @ 0x08: 1 entry → GPSInfo pointer to GPS sub-IFD @ 0x1a
1309| 1| 0x01, 0x00,
1310| 1| 0x25, 0x88, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00,
1311| 1| 0x1a, 0x00, 0x00, 0x00,
1312| 1| 0x00, 0x00, 0x00, 0x00, // no IFD1
1313| 1|
1314| 1| // GPS sub-IFD @ 0x1a: 5 entries
1315| 1| 0x05, 0x00,
1316| 1| // [0] GPSVersionID tag=0, BYTE×4, inline [2,3,0,0]
1317| 1| 0x00, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00,
1318| 1| 0x02, 0x03, 0x00, 0x00,
1319| 1| // [1] GPSLatitudeRef tag=1, ASCII×2 "N\0"
1320| 1| 0x01, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
1321| 1| b'N', 0x00, 0x00, 0x00,
1322| 1| // [2] GPSLatitude tag=2, RATIONAL×3 @ 0x5c
1323| 1| 0x02, 0x00, 0x05, 0x00, 0x03, 0x00, 0x00, 0x00,
1324| 1| 0x5c, 0x00, 0x00, 0x00,
1325| 1| // [3] GPSLongitudeRef tag=3, ASCII×2 "E\0"
1326| 1| 0x03, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
1327| 1| b'E', 0x00, 0x00, 0x00,
1328| 1| // [4] GPSLongitude tag=4, RATIONAL×3 @ 0x74
1329| 1| 0x04, 0x00, 0x05, 0x00, 0x03, 0x00, 0x00, 0x00,
1330| 1| 0x74, 0x00, 0x00, 0x00,
1331| 1| 0x00, 0x00, 0x00, 0x00, // no next sub-IFD
1332| 1|
1333| 1| // GPSLatitude rational data @ 0x5c: 36/1, 0/1, 0/1
1334| 1| 0x24, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
1335| 1| 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
1336| 1| 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
1337| 1|
1338| 1| // GPSLongitude rational data @ 0x74: 120/1, 0/1, 0/1
1339| 1| 0x78, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
1340| 1| 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
1341| 1| 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
1342| 1| ];
1343| |
1344| 1| let iter = input_into_iter(tiff.to_vec(), None).unwrap();
1345| |
1346| | // parse_gps recovers the full sub-IFD despite GPSVersionID being first.
1347| 1| let gps = iter
1348| 1| .parse_gps()
1349| 1| .expect("parse_gps must succeed")
1350| 1| .expect("GPS sub-IFD with GPSVersionID first must yield GPSInfo");
1351| 1| assert_eq!(gps.latitude_decimal(), Some(36.0));
1352| 1| assert_eq!(gps.longitude_decimal(), Some(120.0));
1353| |
1354| | // GPSVersionID itself is also surfaced through normal iteration —
1355| | // tag 0 is no longer dropped inside the GPS sub-IFD.
1356| 6| let tags: Vec<u16> = iter.map(|e| e.tag().code()).collect();
^1 ^1 ^1 ^1 ^1
1357| 1| assert!(
1358| 1| tags.contains(&crate::ExifTag::GPSVersionID.code()),
1359| | "GPSVersionID (tag 0) should be visible to iterators; got {tags:?}"
1360| | );
1361| 1| }
1362| |}
/home/min/dev/nom-exif/src/exif/gps.rs:
1| |use std::str::FromStr;
2| |
3| |use iso6709parse::ISO6709Coord;
4| |
5| |use crate::values::{IRational, URational};
6| |
7| |/// Parsed GPS information from the GPSInfo subIFD.
8| |#[derive(Debug, Clone, PartialEq, Eq)]
9| |pub struct GPSInfo {
10| | pub latitude_ref: LatRef,
11| | pub latitude: LatLng,
12| | pub longitude_ref: LonRef,
13| | pub longitude: LatLng,
14| | pub altitude: Altitude,
15| | pub speed: Option<Speed>,
16| |}
17| |
18| |impl Default for GPSInfo {
19| 0| fn default() -> Self {
20| 0| Self {
21| 0| latitude_ref: LatRef::North,
22| 0| latitude: LatLng::default(),
23| 0| longitude_ref: LonRef::East,
24| 0| longitude: LatLng::default(),
25| 0| altitude: Altitude::Unknown,
26| 0| speed: None,
27| 0| }
28| 0| }
29| |}
30| |
31| |/// Latitude or longitude expressed as degrees / minutes / seconds.
32| |#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
33| |pub struct LatLng {
34| | pub degrees: URational,
35| | pub minutes: URational,
36| | pub seconds: URational,
37| |}
38| |
39| |/// Latitude hemisphere reference.
40| |#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41| |pub enum LatRef {
42| | North,
43| | South,
44| |}
45| |
46| |impl LatRef {
47| | /// Construct from the 'N' / 'S' character carried in EXIF GPSLatitudeRef.
48| 76| pub fn from_char(c: char) -> Option<Self> {
49| 76| match c {
50| 73| 'N' | 'n' => Some(Self::North),
51| 2| 'S' | 's' => Some(Self::South),
52| 1| _ => None,
53| | }
54| 76| }
55| |
56| 1| pub fn as_char(self) -> char {
57| 1| match self {
58| 1| Self::North => 'N',
59| 0| Self::South => 'S',
60| | }
61| 1| }
62| |
63| | /// +1.0 or -1.0 — useful when assembling decimal-degrees latitude.
64| 3| pub fn sign(self) -> f64 {
65| 3| match self {
66| 2| Self::North => 1.0,
67| 1| Self::South => -1.0,
68| | }
69| 3| }
70| |}
71| |
72| |/// Longitude hemisphere reference.
73| |#[derive(Debug, Clone, Copy, PartialEq, Eq)]
74| |pub enum LonRef {
75| | East,
76| | West,
77| |}
78| |
79| |impl LonRef {
80| 75| pub fn from_char(c: char) -> Option<Self> {
81| 75| match c {
82| 73| 'E' | 'e' => Some(Self::East),
83| 2| 'W' | 'w' => Some(Self::West),
84| 0| _ => None,
85| | }
86| 75| }
87| |
88| 1| pub fn as_char(self) -> char {
89| 1| match self {
90| 0| Self::East => 'E',
91| 1| Self::West => 'W',
92| | }
93| 1| }
94| |
95| 3| pub fn sign(self) -> f64 {
96| 3| match self {
97| 2| Self::East => 1.0,
98| 1| Self::West => -1.0,
99| | }
100| 3| }
101| |}
102| |
103| |/// Altitude relative to sea level.
104| |///
105| |/// Combines EXIF's `GPSAltitudeRef` (0 = above, 1 = below) with the magnitude
106| |/// from `GPSAltitude` so the two cannot drift out of sync.
107| |#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
108| |pub enum Altitude {
109| | /// Absent or unparseable.
110| | #[default]
111| | Unknown,
112| | AboveSeaLevel(URational),
113| | BelowSeaLevel(URational),
114| |}
115| |
116| |impl Altitude {
117| | /// Signed altitude in meters; `None` when Unknown or denominator=0.
118| 12| pub fn meters(&self) -> Option<f64> {
119| 12| match self {
120| 1| Altitude::Unknown => None,
121| 8| Altitude::AboveSeaLevel(r) => r.to_f64(),
122| 3| Altitude::BelowSeaLevel(r) => r.to_f64().map(|m| -m),
123| | }
124| 12| }
125| |
126| | /// The underlying magnitude rational, regardless of sign. None for `Unknown`.
127| 0| pub fn magnitude(&self) -> Option<URational> {
128| 0| match self {
129| 0| Altitude::Unknown => None,
130| 0| Altitude::AboveSeaLevel(r) | Altitude::BelowSeaLevel(r) => Some(*r),
131| | }
132| 0| }
133| |}
134| |
135| |/// EXIF GPS speed reference unit (`GPSSpeedRef`).
136| |#[derive(Debug, Clone, Copy, PartialEq, Eq)]
137| |pub enum SpeedUnit {
138| | KmPerHour,
139| | MilesPerHour,
140| | Knots,
141| |}
142| |
143| |impl SpeedUnit {
144| 20| pub fn from_char(c: char) -> Option<Self> {
145| 20| match c {
146| 17| 'K' | 'k' => Some(Self::KmPerHour),
147| 1| 'M' | 'm' => Some(Self::MilesPerHour),
148| 1| 'N' | 'n' => Some(Self::Knots),
149| 1| _ => None,
150| | }
151| 20| }
152| |
153| 1| pub fn as_char(self) -> char {
154| 1| match self {
155| 0| Self::KmPerHour => 'K',
156| 0| Self::MilesPerHour => 'M',
157| 1| Self::Knots => 'N',
158| | }
159| 1| }
160| |}
161| |
162| |/// EXIF GPS speed: unit + value paired so they cannot drift out of sync.
163| |#[derive(Debug, Clone, Copy, PartialEq, Eq)]
164| |pub struct Speed {
165| | pub unit: SpeedUnit,
166| | pub value: URational,
167| |}
168| |
169| |impl LatLng {
170| 172| pub const fn new(degrees: URational, minutes: URational, seconds: URational) -> Self {
171| 172| Self {
172| 172| degrees,
173| 172| minutes,
174| 172| seconds,
175| 172| }
176| 172| }
177| |
178| | /// Convert to decimal degrees. Returns `None` if any component has a zero
179| | /// denominator.
180| 21| pub fn to_decimal_degrees(&self) -> Option<f64> {
181| 21| let d = self.degrees.to_f64()?;
^20 ^1
182| 20| let m = self.minutes.to_f64()?;
^0
183| 20| let s = self.seconds.to_f64()?;
^0
184| 20| Some(d + m / 60.0 + s / 3600.0)
185| 21| }
186| |
187| | /// Construct from decimal degrees. Rejects NaN / ±inf and values whose
188| | /// magnitude exceeds 180° with `ConvertError::InvalidDecimalDegrees`.
189| 158| pub fn try_from_decimal_degrees(degrees: f64) -> Result<Self, crate::ConvertError> {
190| 158| if !degrees.is_finite() || degrees.abs() > 180.0 {
^155
191| 4| return Err(crate::ConvertError::InvalidDecimalDegrees(degrees));
192| 154| }
193| 154| let abs = degrees.abs();
194| 154| let d = abs.trunc() as u32;
195| 154| let mins_total = (abs - d as f64) * 60.0;
196| 154| let m = mins_total.trunc() as u32;
197| 154| let secs_hundredths = ((mins_total - m as f64) * 60.0 * 100.0).round() as u32;
198| 154| Ok(Self::new(
199| 154| URational::new(d, 1),
200| 154| URational::new(m, 1),
201| 154| URational::new(secs_hundredths, 100),
202| 154| ))
203| 158| }
204| |}
205| |
206| |impl GPSInfo {
207| | /// Latitude in decimal degrees, signed by `latitude_ref` (positive = north).
208| 2| pub fn latitude_decimal(&self) -> Option<f64> {
209| 2| Some(self.latitude.to_decimal_degrees()? * self.latitude_ref.sign())
^0
210| 2| }
211| |
212| | /// Longitude in decimal degrees, signed by `longitude_ref` (positive = east).
213| 2| pub fn longitude_decimal(&self) -> Option<f64> {
214| 2| Some(self.longitude.to_decimal_degrees()? * self.longitude_ref.sign())
^0
215| 2| }
216| |
217| | /// Signed altitude in meters; `None` if altitude is `Unknown` or denominator=0.
218| 1| pub fn altitude_meters(&self) -> Option<f64> {
219| 1| self.altitude.meters()
220| 1| }
221| |
222| | /// Returns an ISO 6709 geographic point location string such as
223| | /// `+48.8577+002.295/`.
224| 7| pub fn to_iso6709(&self) -> String {
225| 7| let latitude = self.latitude.to_decimal_degrees().unwrap_or(0.0);
226| 7| let longitude = self.longitude.to_decimal_degrees().unwrap_or(0.0);
227| 7| let altitude_meters = self.altitude.meters();
228| 7| format!(
229| | "{}{latitude:08.5}{}{longitude:09.5}{}/",
230| 7| match self.latitude_ref {
231| 7| LatRef::North => '+',
232| 0| LatRef::South => '-',
233| | },
234| 7| match self.longitude_ref {
235| 3| LonRef::East => '+',
236| 4| LonRef::West => '-',
237| | },
238| 7| match altitude_meters {
239| 7| None | Some(0.0) => String::new(),
^4
240| 3| Some(m) => format!(
241| | "{}{}CRSWGS_84",
242| 3| if m >= 0.0 { "+" } else { "-" },
^1 ^2
243| 3| Self::format_float(m.abs())
244| | ),
245| | }
246| | )
247| 7| }
248| |
249| 3| fn format_float(f: f64) -> String {
250| 3| if f.fract() == 0.0 {
251| 2| f.to_string()
252| | } else {
253| 1| format!("{f:.3}")
254| | }
255| 3| }
256| |}
257| |
258| |impl TryFrom<&[URational]> for LatLng {
259| | type Error = crate::Error;
260| 34| fn try_from(value: &[URational]) -> Result<Self, Self::Error> {
261| 34| if value.len() < 3 {
262| 0| return Err(crate::Error::Malformed {
263| 0| kind: crate::error::MalformedKind::IfdEntry,
264| 0| message: "need at least 3 URational components for LatLng".into(),
265| 0| });
266| 34| }
267| 34| Ok(Self {
268| 34| degrees: value[0],
269| 34| minutes: value[1],
270| 34| seconds: value[2],
271| 34| })
272| 34| }
273| |}
274| |
275| |impl TryFrom<&[IRational]> for LatLng {
276| | type Error = crate::Error;
277| 108| fn try_from(value: &[IRational]) -> Result<Self, Self::Error> {
278| 108| if value.len() < 3 {
279| 0| return Err(crate::Error::Malformed {
280| 0| kind: crate::error::MalformedKind::IfdEntry,
281| 0| message: "need at least 3 IRational components for LatLng".into(),
282| 0| });
283| 108| }
284| 108| let map_negative = |_| crate::Error::Malformed {
285| 0| kind: crate::error::MalformedKind::IfdEntry,
286| 0| message: "negative LatLng component".into(),
287| 0| };
288| | Ok(Self {
289| 108| degrees: URational::try_from(value[0]).map_err(map_negative)?,
^0
290| 108| minutes: URational::try_from(value[1]).map_err(map_negative)?,
^0
291| 108| seconds: URational::try_from(value[2]).map_err(map_negative)?,
^0
292| | })
293| 108| }
294| |}
295| |
296| |impl TryFrom<&Vec<URational>> for LatLng {
297| | type Error = crate::Error;
298| 34| fn try_from(value: &Vec<URational>) -> Result<Self, Self::Error> {
299| 34| Self::try_from(value.as_slice())
300| 34| }
301| |}
302| |
303| |impl TryFrom<&Vec<IRational>> for LatLng {
304| | type Error = crate::Error;
305| 108| fn try_from(value: &Vec<IRational>) -> Result<Self, Self::Error> {
306| 108| Self::try_from(value.as_slice())
307| 108| }
308| |}
309| |
310| |impl FromStr for GPSInfo {
311| | type Err = crate::ConvertError;
312| 77| fn from_str(s: &str) -> Result<Self, Self::Err> {
313| 77| iso6709parse::parse::<ISO6709Coord>(s)
314| 77| .map(GPSInfo::from_iso6709_coord)
315| 77| .map_err(|_| crate::ConvertError::InvalidIso6709(s.to_string()))
^1^1
316| 77| }
317| |}
318| |
319| |impl GPSInfo {
320| | /// Build a `GPSInfo` from a parsed ISO 6709 coordinate. Crate-internal:
321| | /// the public path is [`GPSInfo::from_str`] / `<GPSInfo as FromStr>::from_str`,
322| | /// which keeps `iso6709parse::ISO6709Coord` out of the public API surface
323| | /// (so an `iso6709parse` major-version bump does not force one here).
324| 76| pub(crate) fn from_iso6709_coord(v: ISO6709Coord) -> Self {
325| 76| let latitude_ref = if v.lat >= 0.0 {
326| 76| LatRef::North
327| | } else {
328| 0| LatRef::South
329| | };
330| 76| let longitude_ref = if v.lon >= 0.0 {
331| 75| LonRef::East
332| | } else {
333| 1| LonRef::West
334| | };
335| 76| let latitude = LatLng::try_from_decimal_degrees(v.lat.abs()).unwrap_or_default();
336| 76| let longitude = LatLng::try_from_decimal_degrees(v.lon.abs()).unwrap_or_default();
337| 76| let altitude = match v.altitude {
338| 76| None => Altitude::Unknown,
339| 0| Some(x) => {
340| 0| let mag = URational::new((x.abs() * 1000.0).trunc() as u32, 1000);
341| 0| if x >= 0.0 {
342| 0| Altitude::AboveSeaLevel(mag)
343| | } else {
344| 0| Altitude::BelowSeaLevel(mag)
345| | }
346| | }
347| | };
348| 76| Self {
349| 76| latitude_ref,
350| 76| latitude,
351| 76| longitude_ref,
352| 76| longitude,
353| 76| altitude,
354| 76| speed: None,
355| 76| }
356| 76| }
357| |}
358| |
359| |#[cfg(test)]
360| |mod tests {
361| | use super::*;
362| |
363| | #[test]
364| 1| fn gps_iso6709() {
365| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
366| |
367| 1| let palace = GPSInfo {
368| 1| latitude_ref: LatRef::North,
369| 1| latitude: LatLng::new(
370| 1| URational::new(39, 1),
371| 1| URational::new(55, 1),
372| 1| URational::new(0, 1),
373| 1| ),
374| 1| longitude_ref: LonRef::East,
375| 1| longitude: LatLng::new(
376| 1| URational::new(116, 1),
377| 1| URational::new(23, 1),
378| 1| URational::new(27, 1),
379| 1| ),
380| 1| altitude: Altitude::AboveSeaLevel(URational::new(0, 1)),
381| 1| speed: None,
382| 1| };
383| 1| assert_eq!(palace.to_iso6709(), "+39.91667+116.39083/");
384| |
385| 1| let liberty = GPSInfo {
386| 1| latitude_ref: LatRef::North,
387| 1| latitude: LatLng::new(
388| 1| URational::new(40, 1),
389| 1| URational::new(41, 1),
390| 1| URational::new(21, 1),
391| 1| ),
392| 1| longitude_ref: LonRef::West,
393| 1| longitude: LatLng::new(
394| 1| URational::new(74, 1),
395| 1| URational::new(2, 1),
396| 1| URational::new(40, 1),
397| 1| ),
398| 1| altitude: Altitude::AboveSeaLevel(URational::new(0, 1)),
399| 1| speed: None,
400| 1| };
401| 1| assert_eq!(liberty.to_iso6709(), "+40.68917-074.04444/");
402| |
403| 1| let above = GPSInfo {
404| 1| latitude_ref: LatRef::North,
405| 1| latitude: LatLng::new(
406| 1| URational::new(40, 1),
407| 1| URational::new(41, 1),
408| 1| URational::new(21, 1),
409| 1| ),
410| 1| longitude_ref: LonRef::West,
411| 1| longitude: LatLng::new(
412| 1| URational::new(74, 1),
413| 1| URational::new(2, 1),
414| 1| URational::new(40, 1),
415| 1| ),
416| 1| altitude: Altitude::AboveSeaLevel(URational::new(123, 1)),
417| 1| speed: None,
418| 1| };
419| 1| assert_eq!(above.to_iso6709(), "+40.68917-074.04444+123CRSWGS_84/");
420| |
421| 1| let below = GPSInfo {
422| 1| latitude_ref: LatRef::North,
423| 1| latitude: LatLng::new(
424| 1| URational::new(40, 1),
425| 1| URational::new(41, 1),
426| 1| URational::new(21, 1),
427| 1| ),
428| 1| longitude_ref: LonRef::West,
429| 1| longitude: LatLng::new(
430| 1| URational::new(74, 1),
431| 1| URational::new(2, 1),
432| 1| URational::new(40, 1),
433| 1| ),
434| 1| altitude: Altitude::BelowSeaLevel(URational::new(123, 1)),
435| 1| speed: None,
436| 1| };
437| 1| assert_eq!(below.to_iso6709(), "+40.68917-074.04444-123CRSWGS_84/");
438| |
439| 1| let below = GPSInfo {
440| 1| latitude_ref: LatRef::North,
441| 1| latitude: LatLng::new(
442| 1| URational::new(40, 1),
443| 1| URational::new(41, 1),
444| 1| URational::new(21, 1),
445| 1| ),
446| 1| longitude_ref: LonRef::West,
447| 1| longitude: LatLng::new(
448| 1| URational::new(74, 1),
449| 1| URational::new(2, 1),
450| 1| URational::new(40, 1),
451| 1| ),
452| 1| altitude: Altitude::BelowSeaLevel(URational::new(100, 3)),
453| 1| speed: None,
454| 1| };
455| 1| assert_eq!(below.to_iso6709(), "+40.68917-074.04444-33.333CRSWGS_84/");
456| 1| }
457| |
458| | #[test]
459| 1| fn gps_iso6709_with_invalid_alt() {
460| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
461| |
462| 1| let iso: ISO6709Coord = iso6709parse::parse("+26.5322-078.1969+019.099/").unwrap();
463| 1| assert_eq!(iso.lat, 26.5322);
464| 1| assert_eq!(iso.lon, -78.1969);
465| 1| assert_eq!(iso.altitude, None);
466| |
467| 1| let iso: GPSInfo = "+26.5322-078.1969+019.099/".parse().unwrap();
468| 1| assert_eq!(iso.latitude_ref, LatRef::North);
469| 1| assert_eq!(
470| | iso.latitude,
471| 1| LatLng::new(
472| 1| URational::new(26, 1),
473| 1| URational::new(31, 1),
474| 1| URational::new(5592, 100),
475| | )
476| | );
477| |
478| 1| assert_eq!(iso.longitude_ref, LonRef::West);
479| 1| assert_eq!(
480| | iso.longitude,
481| 1| LatLng::new(
482| 1| URational::new(78, 1),
483| 1| URational::new(11, 1),
484| 1| URational::new(4884, 100),
485| | )
486| | );
487| |
488| 1| assert_eq!(iso.altitude, Altitude::Unknown);
489| 1| }
490| |
491| | #[test]
492| 1| fn latlng_to_decimal_degrees() {
493| 1| let p = LatLng::new(
494| 1| URational::new(40, 1),
495| 1| URational::new(41, 1),
496| 1| URational::new(21, 1),
497| | );
498| 1| let d = p.to_decimal_degrees().unwrap();
499| 1| assert!((d - 40.689_167).abs() < 1e-5);
500| 1| }
501| |
502| | #[test]
503| 1| fn latlng_to_decimal_degrees_zero_denominator() {
504| 1| let p = LatLng::new(
505| 1| URational::new(40, 0),
506| 1| URational::new(41, 1),
507| 1| URational::new(21, 1),
508| | );
509| 1| assert_eq!(p.to_decimal_degrees(), None);
510| 1| }
511| |
512| | #[test]
513| 1| fn latlng_try_from_decimal_degrees_ok() {
514| 1| let p = LatLng::try_from_decimal_degrees(43.5).unwrap();
515| 1| let back = p.to_decimal_degrees().unwrap();
516| 1| assert!((back - 43.5).abs() < 1e-3);
517| 1| }
518| |
519| | #[test]
520| 1| fn latlng_try_from_decimal_degrees_rejects_nan_inf_oob() {
521| | use crate::ConvertError;
522| 1| assert!(matches!(
^0
523| 1| LatLng::try_from_decimal_degrees(f64::NAN),
524| | Err(ConvertError::InvalidDecimalDegrees(_))
525| | ));
526| 1| assert!(matches!(
^0
527| 1| LatLng::try_from_decimal_degrees(f64::INFINITY),
528| | Err(ConvertError::InvalidDecimalDegrees(_))
529| | ));
530| 1| assert!(matches!(
^0
531| 1| LatLng::try_from_decimal_degrees(181.0),
532| | Err(ConvertError::InvalidDecimalDegrees(_))
533| | ));
534| 1| }
535| |
536| | #[test]
537| 1| fn lat_lon_ref_round_trip() {
538| 4| for c in ['N', 'S', 'n', 's'] {
^1
539| 4| assert!(LatRef::from_char(c).is_some());
540| | }
541| 4| for c in ['E', 'W', 'e', 'w'] {
^1
542| 4| assert!(LonRef::from_char(c).is_some());
543| | }
544| 1| assert_eq!(LatRef::North.as_char(), 'N');
545| 1| assert_eq!(LonRef::West.as_char(), 'W');
546| 1| assert_eq!(LatRef::South.sign(), -1.0);
547| 1| assert_eq!(LonRef::East.sign(), 1.0);
548| 1| assert_eq!(LatRef::from_char('X'), None);
549| 1| }
550| |
551| | #[test]
552| 1| fn altitude_meters_signed() {
553| 1| let above = Altitude::AboveSeaLevel(URational::new(123, 1));
554| 1| let below = Altitude::BelowSeaLevel(URational::new(123, 1));
555| 1| assert_eq!(above.meters(), Some(123.0));
556| 1| assert_eq!(below.meters(), Some(-123.0));
557| 1| assert_eq!(Altitude::Unknown.meters(), None);
558| 1| assert_eq!(Altitude::AboveSeaLevel(URational::new(1, 0)).meters(), None);
559| 1| }
560| |
561| | #[test]
562| 1| fn speed_unit_round_trip() {
563| 1| assert_eq!(SpeedUnit::from_char('K'), Some(SpeedUnit::KmPerHour));
564| 1| assert_eq!(SpeedUnit::from_char('M'), Some(SpeedUnit::MilesPerHour));
565| 1| assert_eq!(SpeedUnit::from_char('N'), Some(SpeedUnit::Knots));
566| 1| assert_eq!(SpeedUnit::from_char('X'), None);
567| 1| assert_eq!(SpeedUnit::Knots.as_char(), 'N');
568| 1| }
569| |
570| | #[test]
571| 1| fn gps_info_decimal_accessors() {
572| 1| let liberty = GPSInfo {
573| 1| latitude_ref: LatRef::North,
574| 1| latitude: LatLng::new(
575| 1| URational::new(40, 1),
576| 1| URational::new(41, 1),
577| 1| URational::new(21, 1),
578| 1| ),
579| 1| longitude_ref: LonRef::West,
580| 1| longitude: LatLng::new(
581| 1| URational::new(74, 1),
582| 1| URational::new(2, 1),
583| 1| URational::new(40, 1),
584| 1| ),
585| 1| altitude: Altitude::AboveSeaLevel(URational::new(123, 1)),
586| 1| speed: None,
587| 1| };
588| 1| let lat = liberty.latitude_decimal().unwrap();
589| 1| let lon = liberty.longitude_decimal().unwrap();
590| 1| assert!((lat - 40.689_167).abs() < 1e-5);
591| 1| assert!((lon - (-74.044_444)).abs() < 1e-5);
592| 1| assert_eq!(liberty.altitude_meters(), Some(123.0));
593| 1| }
594| |
595| | #[test]
596| 1| fn gps_info_from_str_uses_convert_error() {
597| | use crate::ConvertError;
598| 1| let err = "garbage".parse::<GPSInfo>().unwrap_err();
599| 1| assert!(matches!(err, ConvertError::InvalidIso6709(_)));
^0
600| 1| }
601| |}
/home/min/dev/nom-exif/src/exif/ifd.rs:
1| |use crate::EntryValue;
2| |use std::collections::HashMap;
3| |
4| |/// <https://www.media.mit.edu/pia/Research/deepview/exif.html>
5| |#[derive(Clone, Debug, PartialEq)]
6| |pub(crate) struct ParsedImageFileDirectory {
7| | pub entries: HashMap<u16, ParsedIdfEntry>,
8| |}
9| |
10| |impl ParsedImageFileDirectory {
11| 125| pub fn new() -> Self {
12| 125| Self {
13| 125| entries: HashMap::new(),
14| 125| }
15| 125| }
16| |}
17| |
18| |#[derive(Clone, Debug, PartialEq)]
19| |pub(crate) struct ParsedIdfEntry {
20| | pub value: EntryValue,
21| |}
22| |
23| |impl ParsedImageFileDirectory {
24| 216| pub(crate) fn get(&self, tag: u16) -> Option<&EntryValue> {
25| 216| self.entries.get(&tag).map(|x| &x.value)
26| 216| }
27| |
28| 4.33k| pub(crate) fn put(&mut self, code: u16, v: EntryValue) {
29| 4.33k| self.entries.insert(code, ParsedIdfEntry { value: v });
30| 4.33k| }
31| |
32| 7| pub(crate) fn iter(&self) -> impl Iterator<Item = (u16, &EntryValue)> {
33| 238| self.entries.iter().map(|(code, e)| (*code, &e.value))
^7 ^7 ^7
34| 7| }
35| |}
/home/min/dev/nom-exif/src/exif/png_text.rs:
1| |//! PNG `tEXt` chunks as Latin-1-decoded key/value pairs.
2| |//!
3| |//! See [`PngTextChunks`] for accessors. Used as the payload of
4| |//! [`crate::ImageFormatMetadata::Png`].
5| |
6| |/// PNG `tEXt` chunks, decoded as Latin-1 `(key, value)` pairs in file
7| |/// order.
8| |///
9| |/// Duplicate keys are preserved (PNG spec permits multiple `tEXt`
10| |/// chunks with the same keyword). Encoding is strict Latin-1 per spec
11| |/// — no UTF-8 sniffing.
12| |///
13| |/// **Note**: when a PNG carries EXIF inside a `Raw profile type exif` /
14| |/// `Raw profile type APP1` text chunk (legacy ImageMagick / Photoshop
15| |/// pattern), the EXIF entries are merged into the `Exif` (under
16| |/// `ImageMetadata.exif`) transparently; the original text chunk is
17| |/// also visible here.
18| |///
19| |/// Forward-compatible: future iTXt / zTXt support can extend
20| |/// `PngTextChunks` non-breakingly.
21| |#[derive(Debug, Clone, Default)]
22| |#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
23| |pub struct PngTextChunks {
24| | pub(crate) entries: Vec<(String, String)>,
25| |}
26| |
27| |impl PngTextChunks {
28| | /// First value whose key matches exactly, or `None`.
29| 7| pub fn get(&self, key: &str) -> Option<&str> {
30| 7| self.entries
31| 7| .iter()
32| 13| .find(|(k, _)| k == key)
^7
33| 7| .map(|(_, v)| v.as_str())
^6^6
34| 7| }
35| |
36| | /// All values whose key matches exactly, in file order.
37| 3| pub fn get_all<'a>(&'a self, key: &'a str) -> impl Iterator<Item = &'a str> + 'a {
38| 3| self.entries
39| 3| .iter()
40| 12| .filter(move |(k, _)| k == key)
^3
41| 3| .map(|(_, v)| v.as_str())
42| 3| }
43| |
44| | /// All `(key, value)` pairs in file order, including duplicates.
45| 1| pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
46| 4| self.entries.iter().map(|(k, v)| (k.as_str(), v.as_str()))
^1 ^1
47| 1| }
48| |
49| | /// Number of `(key, value)` pairs (counts duplicates).
50| 2| pub fn len(&self) -> usize {
51| 2| self.entries.len()
52| 2| }
53| |
54| | /// `true` if no `tEXt` entries are present.
55| 3| pub fn is_empty(&self) -> bool {
56| 3| self.entries.is_empty()
57| 3| }
58| |}
59| |
60| |#[cfg(test)]
61| |mod tests {
62| | use super::*;
63| |
64| 4| fn fixture() -> PngTextChunks {
65| 4| PngTextChunks {
66| 4| entries: vec![
67| 4| ("Title".into(), "Hello".into()),
68| 4| ("Author".into(), "Alice".into()),
69| 4| ("Comment".into(), "first comment".into()),
70| 4| ("Comment".into(), "second comment".into()),
71| 4| ],
72| 4| }
73| 4| }
74| |
75| | #[test]
76| 1| fn get_returns_first_match() {
77| 1| let t = fixture();
78| 1| assert_eq!(t.get("Title"), Some("Hello"));
79| 1| assert_eq!(t.get("Comment"), Some("first comment"));
80| 1| assert_eq!(t.get("nonexistent"), None);
81| 1| }
82| |
83| | #[test]
84| 1| fn get_all_returns_all_in_order() {
85| 1| let t = fixture();
86| 1| let comments: Vec<&str> = t.get_all("Comment").collect();
87| 1| assert_eq!(comments, vec!["first comment", "second comment"]);
88| 1| let titles: Vec<&str> = t.get_all("Title").collect();
89| 1| assert_eq!(titles, vec!["Hello"]);
90| 1| let nothing: Vec<&str> = t.get_all("nonexistent").collect();
91| 1| assert!(nothing.is_empty());
92| 1| }
93| |
94| | #[test]
95| 1| fn iter_in_file_order_with_duplicates() {
96| 1| let t = fixture();
97| 1| let pairs: Vec<(&str, &str)> = t.iter().collect();
98| 1| assert_eq!(pairs.len(), 4);
99| 1| assert_eq!(pairs[2], ("Comment", "first comment"));
100| 1| assert_eq!(pairs[3], ("Comment", "second comment"));
101| 1| }
102| |
103| | #[test]
104| 1| fn len_and_is_empty() {
105| 1| let t = fixture();
106| 1| assert_eq!(t.len(), 4);
107| 1| assert!(!t.is_empty());
108| |
109| 1| let empty = PngTextChunks::default();
110| 1| assert_eq!(empty.len(), 0);
111| 1| assert!(empty.is_empty());
112| 1| }
113| |}
/home/min/dev/nom-exif/src/exif/tags.rs:
1| |//! Define exif tags and related enums, see
2| |//! <https://exiftool.org/TagNames/EXIF.html>
3| |
4| |use std::fmt::{Debug, Display};
5| |
6| |#[cfg(feature = "serde")]
7| |use serde::{Deserialize, Serialize};
8| |
9| |/// Either a recognized [`ExifTag`] or a raw `u16` tag code that the parser did
10| |/// not recognize. Yielded by [`crate::ExifIterEntry::tag`] and by
11| |/// [`crate::ExifEntry::tag`].
12| |#[allow(unused)]
13| |#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
14| |#[derive(Eq, PartialEq, Hash, Clone, Copy)]
15| |pub enum TagOrCode {
16| | /// Tag is in the recognized [`ExifTag`] enum.
17| | Tag(ExifTag),
18| |
19| | /// Tag is *not* in [`ExifTag`]; raw `u16` code is preserved.
20| | Unknown(u16),
21| |}
22| |
23| |impl TagOrCode {
24| | /// The recognized [`ExifTag`], if any. `None` for `Unknown(_)`.
25| 4.31k| pub fn tag(&self) -> Option<ExifTag> {
26| 4.31k| match self {
27| 4.23k| TagOrCode::Tag(t) => Some(*t),
28| 86| TagOrCode::Unknown(_) => None,
29| | }
30| 4.31k| }
31| |
32| | /// The raw `u16` tag code (always available, recognized or not).
33| 16.4k| pub fn code(&self) -> u16 {
34| 16.4k| match self {
35| 16.2k| TagOrCode::Tag(t) => t.code(),
36| 201| TagOrCode::Unknown(c) => *c,
37| | }
38| 16.4k| }
39| |}
40| |
41| |impl From<u16> for TagOrCode {
42| 11.3k| fn from(v: u16) -> Self {
43| 11.3k| match ExifTag::from_code(v) {
44| 11.2k| Some(tag) => TagOrCode::Tag(tag),
45| 151| None => TagOrCode::Unknown(v),
46| | }
47| 11.3k| }
48| |}
49| |
50| |impl Debug for TagOrCode {
51| 2.79k| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52| 2.79k| match self {
53| 2.73k| TagOrCode::Tag(v) => Debug::fmt(v, f),
54| 61| TagOrCode::Unknown(v) => Debug::fmt(&format!("Unknown(0x{v:04x})"), f),
55| | }
56| 2.79k| }
57| |}
58| |
59| |impl Display for TagOrCode {
60| 0| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61| 0| match self {
62| 0| TagOrCode::Tag(t) => Display::fmt(t, f),
63| 0| TagOrCode::Unknown(c) => write!(f, "Unknown(0x{c:04x})"),
64| | }
65| 0| }
66| |}
67| |
68| |/// Defines recognized Exif tags. All tags can be parsed, no matter if it is
69| |/// defined here. This enum definition is just for ease of use.
70| |///
71| |/// You can always get the entry value by raw tag code which is an `u16` value.
72| |/// See [`ExifIterEntry::tag`](crate::ExifIterEntry::tag) (returns [`TagOrCode`]
73| |/// which exposes `.code()`) and [`Exif::get_by_code`](crate::Exif::get_by_code).
74| |#[allow(unused)]
75| |#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
76| |#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
77| |#[non_exhaustive]
78| |pub enum ExifTag {
79| | Make = 0x0000_010f,
80| | Model = 0x0000_0110,
81| | Orientation = 0x0000_0112,
82| |
83| | ImageWidth = 0x0000_0100,
84| | ImageHeight = 0x0000_0101,
85| |
86| | ISOSpeedRatings = 0x0000_8827,
87| | ShutterSpeedValue = 0x0000_9201,
88| | ExposureTime = 0x0000_829a,
89| | FNumber = 0x0000_829d,
90| |
91| | ExifImageWidth = 0x0000_a002,
92| | ExifImageHeight = 0x0000_a003,
93| |
94| | DateTimeOriginal = 0x0000_9003,
95| | CreateDate = 0x0000_9004,
96| | ModifyDate = 0x0000_0132,
97| | OffsetTime = 0x0000_9010,
98| | OffsetTimeOriginal = 0x0000_9011,
99| | OffsetTimeDigitized = 0x0000_9012,
100| |
101| | GPSLatitudeRef = 0x00001,
102| | GPSLatitude = 0x00002,
103| | GPSLongitudeRef = 0x00003,
104| | GPSLongitude = 0x00004,
105| | GPSAltitudeRef = 0x00005,
106| | GPSAltitude = 0x00006,
107| | GPSVersionID = 0x00000,
108| |
109| | // sub ifd
110| | ExifOffset = 0x0000_8769,
111| | GPSInfo = 0x0000_8825,
112| |
113| | ImageDescription = 0x0000_010e,
114| | XResolution = 0x0000_011a,
115| | YResolution = 0x0000_011b,
116| | ResolutionUnit = 0x0000_0128,
117| | Software = 0x0000_0131,
118| | HostComputer = 0x0000_013c,
119| | WhitePoint = 0x0000_013e,
120| | PrimaryChromaticities = 0x0000_013f,
121| | YCbCrCoefficients = 0x0000_0211,
122| | ReferenceBlackWhite = 0x0000_0214,
123| | Copyright = 0x0000_8298,
124| |
125| | ExposureProgram = 0x0000_8822,
126| | SpectralSensitivity = 0x0000_8824,
127| | OECF = 0x0000_8828,
128| | SensitivityType = 0x0000_8830,
129| | ExifVersion = 0x0000_9000,
130| | ApertureValue = 0x0000_9202,
131| | BrightnessValue = 0x0000_9203,
132| | ExposureBiasValue = 0x0000_9204,
133| | MaxApertureValue = 0x0000_9205,
134| | SubjectDistance = 0x0000_9206,
135| | MeteringMode = 0x0000_9207,
136| | LightSource = 0x0000_9208,
137| | Flash = 0x0000_9209,
138| | FocalLength = 0x0000_920a,
139| | SubjectArea = 0x0000_9214,
140| | MakerNote = 0x0000_927c,
141| | UserComment = 0x0000_9286,
142| | FlashPixVersion = 0x0000_a000,
143| | ColorSpace = 0x0000_a001,
144| | RelatedSoundFile = 0x0000_a004,
145| | FlashEnergy = 0x0000_a20b,
146| | FocalPlaneXResolution = 0x0000_a20e,
147| | FocalPlaneYResolution = 0x0000_a20f,
148| | FocalPlaneResolutionUnit = 0x0000_a210,
149| | SubjectLocation = 0x0000_a214,
150| | ExposureIndex = 0x0000_a215,
151| | SensingMethod = 0x0000_a217,
152| | FileSource = 0x0000_a300,
153| | SceneType = 0x0000_a301,
154| | CFAPattern = 0x0000_a302,
155| | CustomRendered = 0x0000_a401,
156| | ExposureMode = 0x0000_a402,
157| | WhiteBalanceMode = 0x0000_a403,
158| | DigitalZoomRatio = 0x0000_a404,
159| | FocalLengthIn35mmFilm = 0x0000_a405,
160| | SceneCaptureType = 0x0000_a406,
161| | GainControl = 0x0000_a407,
162| | Contrast = 0x0000_a408,
163| | Saturation = 0x0000_a409,
164| | Sharpness = 0x0000_a40a,
165| | DeviceSettingDescription = 0x0000_a40b,
166| | SubjectDistanceRange = 0x0000_a40c,
167| | ImageUniqueID = 0x0000_a420,
168| | CameraSerialNumber = 0x0000_a431,
169| | LensSpecification = 0x0000_a432,
170| | LensMake = 0x0000_a433,
171| | LensModel = 0x0000_a434,
172| | LensSerialNumber = 0x0000_a435,
173| | Gamma = 0x0000_a500,
174| |
175| | GPSTimeStamp = 0x00007,
176| | GPSSatellites = 0x00008,
177| | GPSStatus = 0x00009,
178| | GPSMeasureMode = 0x0000a,
179| | GPSDOP = 0x0000b,
180| | GPSSpeedRef = 0x0000c,
181| | GPSSpeed = 0x0000d,
182| | GPSTrackRef = 0x0000e,
183| | GPSTrack = 0x0000f,
184| | GPSImgDirectionRef = 0x0000_0010,
185| | GPSImgDirection = 0x0000_0011,
186| | GPSMapDatum = 0x0000_0012,
187| | GPSDestLatitudeRef = 0x0000_0013,
188| | GPSDestLatitude = 0x0000_0014,
189| | GPSDestLongitudeRef = 0x0000_0015,
190| | GPSDestLongitude = 0x0000_0016,
191| | GPSDestBearingRef = 0x0000_0017,
192| | GPSDestBearing = 0x0000_0018,
193| | GPSDestDistanceRef = 0x0000_0019,
194| | GPSDestDistance = 0x0000_001a,
195| | GPSProcessingMethod = 0x0000_001b,
196| | GPSAreaInformation = 0x0000_001c,
197| | GPSDateStamp = 0x0000_001d,
198| | GPSDifferential = 0x0000_001e,
199| |
200| | YCbCrPositioning = 0x0000_0213,
201| | RecommendedExposureIndex = 0x0000_8832,
202| | SubSecTimeDigitized = 0x0000_9292,
203| | SubSecTimeOriginal = 0x0000_9291,
204| | SubSecTime = 0x0000_9290,
205| | InteropOffset = 0x0000_a005,
206| | ComponentsConfiguration = 0x0000_9101,
207| | ThumbnailOffset = 0x0000_0201,
208| | ThumbnailLength = 0x0000_0202,
209| | Compression = 0x0000_0103,
210| | BitsPerSample = 0x0000_0102,
211| | PhotometricInterpretation = 0x0000_0106,
212| | SamplesPerPixel = 0x0000_0115,
213| | RowsPerStrip = 0x0000_0116,
214| | PlanarConfiguration = 0x0000_011c,
215| |}
216| |
217| |impl ExifTag {
218| 1.05M| pub const fn code(self) -> u16 {
219| 1.05M| self as u16
220| 1.05M| }
221| |
222| 521| pub const fn name(self) -> &'static str {
223| 521| match self {
224| 13| ExifTag::Make => "Make",
225| 7| ExifTag::Model => "Model",
226| 4| ExifTag::Orientation => "Orientation",
227| 7| ExifTag::ImageWidth => "ImageWidth",
228| 7| ExifTag::ImageHeight => "ImageHeight",
229| 7| ExifTag::ISOSpeedRatings => "ISOSpeedRatings",
230| 7| ExifTag::ShutterSpeedValue => "ShutterSpeedValue",
231| 7| ExifTag::ExposureTime => "ExposureTime",
232| 7| ExifTag::FNumber => "FNumber",
233| 10| ExifTag::ExifImageWidth => "ExifImageWidth",
234| 10| ExifTag::ExifImageHeight => "ExifImageHeight",
235| 7| ExifTag::DateTimeOriginal => "DateTimeOriginal",
236| 7| ExifTag::CreateDate => "CreateDate",
237| 7| ExifTag::ModifyDate => "ModifyDate",
238| 7| ExifTag::OffsetTime => "OffsetTime",
239| 7| ExifTag::OffsetTimeOriginal => "OffsetTimeOriginal",
240| 3| ExifTag::OffsetTimeDigitized => "OffsetTimeDigitized",
241| 6| ExifTag::GPSLatitudeRef => "GPSLatitudeRef",
242| 6| ExifTag::GPSLatitude => "GPSLatitude",
243| 6| ExifTag::GPSLongitudeRef => "GPSLongitudeRef",
244| 6| ExifTag::GPSLongitude => "GPSLongitude",
245| 6| ExifTag::GPSAltitudeRef => "GPSAltitudeRef",
246| 6| ExifTag::GPSAltitude => "GPSAltitude",
247| 3| ExifTag::GPSVersionID => "GPSVersionID",
248| 7| ExifTag::ExifOffset => "ExifOffset",
249| 5| ExifTag::GPSInfo => "GPSInfo",
250| 2| ExifTag::ImageDescription => "ImageDescription",
251| 9| ExifTag::XResolution => "XResolution",
252| 9| ExifTag::YResolution => "YResolution",
253| 9| ExifTag::ResolutionUnit => "ResolutionUnit",
254| 2| ExifTag::Software => "Software",
255| 2| ExifTag::HostComputer => "HostComputer",
256| 2| ExifTag::WhitePoint => "WhitePoint",
257| 2| ExifTag::PrimaryChromaticities => "PrimaryChromaticities",
258| 2| ExifTag::YCbCrCoefficients => "YCbCrCoefficients",
259| 2| ExifTag::ReferenceBlackWhite => "ReferenceBlackWhite",
260| 3| ExifTag::Copyright => "Copyright",
261| 6| ExifTag::ExposureProgram => "ExposureProgram",
262| 2| ExifTag::SpectralSensitivity => "SpectralSensitivity",
263| 2| ExifTag::OECF => "OECF",
264| 6| ExifTag::SensitivityType => "SensitivityType",
265| 6| ExifTag::ExifVersion => "ExifVersion",
266| 6| ExifTag::ApertureValue => "ApertureValue",
267| 5| ExifTag::BrightnessValue => "BrightnessValue",
268| 6| ExifTag::ExposureBiasValue => "ExposureBiasValue",
269| 5| ExifTag::MaxApertureValue => "MaxApertureValue",
270| 2| ExifTag::SubjectDistance => "SubjectDistance",
271| 6| ExifTag::MeteringMode => "MeteringMode",
272| 5| ExifTag::LightSource => "LightSource",
273| 6| ExifTag::Flash => "Flash",
274| 6| ExifTag::FocalLength => "FocalLength",
275| 2| ExifTag::SubjectArea => "SubjectArea",
276| 5| ExifTag::MakerNote => "MakerNote",
277| 5| ExifTag::UserComment => "UserComment",
278| 6| ExifTag::FlashPixVersion => "FlashPixVersion",
279| 6| ExifTag::ColorSpace => "ColorSpace",
280| 2| ExifTag::RelatedSoundFile => "RelatedSoundFile",
281| 2| ExifTag::FlashEnergy => "FlashEnergy",
282| 3| ExifTag::FocalPlaneXResolution => "FocalPlaneXResolution",
283| 3| ExifTag::FocalPlaneYResolution => "FocalPlaneYResolution",
284| 3| ExifTag::FocalPlaneResolutionUnit => "FocalPlaneResolutionUnit",
285| 2| ExifTag::SubjectLocation => "SubjectLocation",
286| 2| ExifTag::ExposureIndex => "ExposureIndex",
287| 5| ExifTag::SensingMethod => "SensingMethod",
288| 2| ExifTag::FileSource => "FileSource",
289| 5| ExifTag::SceneType => "SceneType",
290| 2| ExifTag::CFAPattern => "CFAPattern",
291| 3| ExifTag::CustomRendered => "CustomRendered",
292| 6| ExifTag::ExposureMode => "ExposureMode",
293| 6| ExifTag::WhiteBalanceMode => "WhiteBalanceMode",
294| 5| ExifTag::DigitalZoomRatio => "DigitalZoomRatio",
295| 5| ExifTag::FocalLengthIn35mmFilm => "FocalLengthIn35mmFilm",
296| 6| ExifTag::SceneCaptureType => "SceneCaptureType",
297| 2| ExifTag::GainControl => "GainControl",
298| 2| ExifTag::Contrast => "Contrast",
299| 2| ExifTag::Saturation => "Saturation",
300| 2| ExifTag::Sharpness => "Sharpness",
301| 2| ExifTag::DeviceSettingDescription => "DeviceSettingDescription",
302| 2| ExifTag::SubjectDistanceRange => "SubjectDistanceRange",
303| 2| ExifTag::ImageUniqueID => "ImageUniqueID",
304| 3| ExifTag::CameraSerialNumber => "CameraSerialNumber",
305| 3| ExifTag::LensSpecification => "LensSpecification",
306| 2| ExifTag::LensMake => "LensMake",
307| 3| ExifTag::LensModel => "LensModel",
308| 3| ExifTag::LensSerialNumber => "LensSerialNumber",
309| 2| ExifTag::Gamma => "Gamma",
310| 5| ExifTag::GPSTimeStamp => "GPSTimeStamp",
311| 2| ExifTag::GPSSatellites => "GPSSatellites",
312| 2| ExifTag::GPSStatus => "GPSStatus",
313| 2| ExifTag::GPSMeasureMode => "GPSMeasureMode",
314| 2| ExifTag::GPSDOP => "GPSDOP",
315| 2| ExifTag::GPSSpeedRef => "GPSSpeedRef",
316| 2| ExifTag::GPSSpeed => "GPSSpeed",
317| 2| ExifTag::GPSTrackRef => "GPSTrackRef",
318| 2| ExifTag::GPSTrack => "GPSTrack",
319| 2| ExifTag::GPSImgDirectionRef => "GPSImgDirectionRef",
320| 2| ExifTag::GPSImgDirection => "GPSImgDirection",
321| 2| ExifTag::GPSMapDatum => "GPSMapDatum",
322| 2| ExifTag::GPSDestLatitudeRef => "GPSDestLatitudeRef",
323| 2| ExifTag::GPSDestLatitude => "GPSDestLatitude",
324| 2| ExifTag::GPSDestLongitudeRef => "GPSDestLongitudeRef",
325| 2| ExifTag::GPSDestLongitude => "GPSDestLongitude",
326| 2| ExifTag::GPSDestBearingRef => "GPSDestBearingRef",
327| 2| ExifTag::GPSDestBearing => "GPSDestBearing",
328| 2| ExifTag::GPSDestDistanceRef => "GPSDestDistanceRef",
329| 2| ExifTag::GPSDestDistance => "GPSDestDistance",
330| 5| ExifTag::GPSProcessingMethod => "GPSProcessingMethod",
331| 2| ExifTag::GPSAreaInformation => "GPSAreaInformation",
332| 5| ExifTag::GPSDateStamp => "GPSDateStamp",
333| 2| ExifTag::GPSDifferential => "GPSDifferential",
334| 5| ExifTag::YCbCrPositioning => "YCbCrPositioning",
335| 5| ExifTag::RecommendedExposureIndex => "RecommendedExposureIndex",
336| 5| ExifTag::SubSecTimeDigitized => "SubSecTimeDigitized",
337| 5| ExifTag::SubSecTimeOriginal => "SubSecTimeOriginal",
338| 5| ExifTag::SubSecTime => "SubSecTime",
339| 5| ExifTag::InteropOffset => "InteropOffset",
340| 5| ExifTag::ComponentsConfiguration => "ComponentsConfiguration",
341| 5| ExifTag::ThumbnailOffset => "ThumbnailOffset",
342| 5| ExifTag::ThumbnailLength => "ThumbnailLength",
343| 5| ExifTag::Compression => "Compression",
344| 2| ExifTag::BitsPerSample => "BitsPerSample",
345| 2| ExifTag::PhotometricInterpretation => "PhotometricInterpretation",
346| 2| ExifTag::SamplesPerPixel => "SamplesPerPixel",
347| 2| ExifTag::RowsPerStrip => "RowsPerStrip",
348| 2| ExifTag::PlanarConfiguration => "PlanarConfiguration",
349| | }
350| 521| }
351| |
352| 21.7k| pub fn from_code(code: u16) -> Option<Self> {
353| | use ExifTag::*;
354| |
355| 21.7k| let tag = match code {
^21.4k
356| 21.7k| x if x == Make.code() => Self::Make,
^211 ^211
357| 21.5k| x if x == Model.code() => Self::Model,
^317 ^317
358| 21.2k| x if x == Orientation.code() => Self::Orientation,
^136 ^136
359| 21.0k| x if x == ImageWidth.code() => Self::ImageWidth,
^272 ^272
360| 20.8k| x if x == ImageHeight.code() => Self::ImageHeight,
^272 ^272
361| 20.5k| x if x == ISOSpeedRatings.code() => Self::ISOSpeedRatings,
^508 ^508
362| 20.0k| x if x == ShutterSpeedValue.code() => Self::ShutterSpeedValue,
^330 ^330
363| 19.7k| x if x == ExposureTime.code() => Self::ExposureTime,
^508 ^508
364| 19.2k| x if x == FNumber.code() => Self::FNumber,
^508 ^508
365| 18.6k| x if x == ExifImageWidth.code() => Self::ExifImageWidth,
^455 ^455
366| 18.2k| x if x == ExifImageHeight.code() => Self::ExifImageHeight,
^455 ^455
367| 17.7k| x if x == DateTimeOriginal.code() => Self::DateTimeOriginal,
^356 ^356
368| 17.4k| x if x == CreateDate.code() => Self::CreateDate,
^355 ^355
369| 17.0k| x if x == ModifyDate.code() => Self::ModifyDate,
^316 ^316
370| 16.7k| x if x == OffsetTime.code() => Self::OffsetTime,
^322 ^322
371| 16.4k| x if x == OffsetTimeOriginal.code() => Self::OffsetTimeOriginal,
^474 ^474
372| 15.9k| x if x == OffsetTimeDigitized.code() => Self::OffsetTimeDigitized,
^60 ^60
373| 15.9k| x if x == GPSLatitudeRef.code() => Self::GPSLatitudeRef,
^296 ^296
374| 15.6k| x if x == GPSLatitude.code() => Self::GPSLatitude,
^296 ^296
375| 15.3k| x if x == GPSLongitudeRef.code() => Self::GPSLongitudeRef,
^296 ^296
376| 15.0k| x if x == GPSLongitude.code() => Self::GPSLongitude,
^295 ^295
377| 14.7k| x if x == GPSAltitudeRef.code() => Self::GPSAltitudeRef,
^292 ^292
378| 14.4k| x if x == GPSAltitude.code() => Self::GPSAltitude,
^292 ^292
379| 14.1k| x if x == GPSVersionID.code() => Self::GPSVersionID,
^41 ^41
380| 14.0k| x if x == ExifOffset.code() => Self::ExifOffset,
^408 ^408
381| 13.6k| x if x == GPSInfo.code() => Self::GPSInfo,
^393 ^393
382| 13.2k| x if x == ImageDescription.code() => Self::ImageDescription,
^22 ^22
383| 13.2k| x if x == XResolution.code() => Self::XResolution,
^364 ^364
384| 12.9k| x if x == YResolution.code() => Self::YResolution,
^365 ^365
385| 12.5k| x if x == ResolutionUnit.code() => Self::ResolutionUnit,
^473 ^473
386| 12.0k| x if x == Software.code() => Self::Software,
^76 ^76
387| 11.9k| x if x == HostComputer.code() => Self::HostComputer,
^56 ^56
388| 11.9k| x if x == WhitePoint.code() => Self::WhitePoint,
^0 ^0
389| 11.9k| x if x == PrimaryChromaticities.code() => Self::PrimaryChromaticities,
^0 ^0
390| 11.9k| x if x == YCbCrCoefficients.code() => Self::YCbCrCoefficients,
^0 ^0
391| 11.9k| x if x == ReferenceBlackWhite.code() => Self::ReferenceBlackWhite,
^0 ^0
392| 11.9k| x if x == Copyright.code() => Self::Copyright,
^14 ^14
393| 11.9k| x if x == ExposureProgram.code() => Self::ExposureProgram,
^507 ^507
394| 11.4k| x if x == SpectralSensitivity.code() => Self::SpectralSensitivity,
^0 ^0
395| 11.4k| x if x == OECF.code() => Self::OECF,
^0 ^0
396| 11.4k| x if x == SensitivityType.code() => Self::SensitivityType,
^406 ^406
397| 11.0k| x if x == ExifVersion.code() => Self::ExifVersion,
^355 ^355
398| 10.6k| x if x == ApertureValue.code() => Self::ApertureValue,
^330 ^330
399| 10.3k| x if x == BrightnessValue.code() => Self::BrightnessValue,
^326 ^326
400| 9.99k| x if x == ExposureBiasValue.code() => Self::ExposureBiasValue,
^329 ^329
401| 9.67k| x if x == MaxApertureValue.code() => Self::MaxApertureValue,
^270 ^270
402| 9.40k| x if x == SubjectDistance.code() => Self::SubjectDistance,
^0 ^0
403| 9.40k| x if x == MeteringMode.code() => Self::MeteringMode,
^329 ^329
404| 9.07k| x if x == LightSource.code() => Self::LightSource,
^270 ^270
405| 8.80k| x if x == Flash.code() => Self::Flash,
^330 ^330
406| 8.47k| x if x == FocalLength.code() => Self::FocalLength,
^330 ^330
407| 8.14k| x if x == SubjectArea.code() => Self::SubjectArea,
^56 ^56
408| 8.08k| x if x == MakerNote.code() => Self::MakerNote,
^477 ^477
409| 7.60k| x if x == UserComment.code() => Self::UserComment,
^259 ^259
410| 7.34k| x if x == FlashPixVersion.code() => Self::FlashPixVersion,
^274 ^274
411| 7.07k| x if x == ColorSpace.code() => Self::ColorSpace,
^330 ^330
412| 6.74k| x if x == RelatedSoundFile.code() => Self::RelatedSoundFile,
^0 ^0
413| 6.74k| x if x == FlashEnergy.code() => Self::FlashEnergy,
^0 ^0
414| 6.74k| x if x == FocalPlaneXResolution.code() => Self::FocalPlaneXResolution,
^18 ^18
415| 6.72k| x if x == FocalPlaneYResolution.code() => Self::FocalPlaneYResolution,
^18 ^18
416| 6.70k| x if x == FocalPlaneResolutionUnit.code() => Self::FocalPlaneResolutionUnit,
^18 ^18
417| 6.69k| x if x == SubjectLocation.code() => Self::SubjectLocation,
^0 ^0
418| 6.69k| x if x == ExposureIndex.code() => Self::ExposureIndex,
^0 ^0
419| 6.69k| x if x == SensingMethod.code() => Self::SensingMethod,
^478 ^478
420| 6.21k| x if x == FileSource.code() => Self::FileSource,
^14 ^14
421| 6.19k| x if x == SceneType.code() => Self::SceneType,
^325 ^325
422| 5.87k| x if x == CFAPattern.code() => Self::CFAPattern,
^0 ^0
423| 5.87k| x if x == CustomRendered.code() => Self::CustomRendered,
^18 ^18
424| 5.85k| x if x == ExposureMode.code() => Self::ExposureMode,
^329 ^329
425| 5.52k| x if x == WhiteBalanceMode.code() => Self::WhiteBalanceMode,
^330 ^330
426| 5.19k| x if x == DigitalZoomRatio.code() => Self::DigitalZoomRatio,
^236 ^236
427| 4.96k| x if x == FocalLengthIn35mmFilm.code() => Self::FocalLengthIn35mmFilm,
^325 ^325
428| 4.63k| x if x == SceneCaptureType.code() => Self::SceneCaptureType,
^274 ^274
429| 4.36k| x if x == GainControl.code() => Self::GainControl,
^0 ^0
430| 4.36k| x if x == Contrast.code() => Self::Contrast,
^0 ^0
431| 4.36k| x if x == Saturation.code() => Self::Saturation,
^0 ^0
432| 4.36k| x if x == Sharpness.code() => Self::Sharpness,
^14 ^14
433| 4.34k| x if x == DeviceSettingDescription.code() => Self::DeviceSettingDescription,
^0 ^0
434| 4.34k| x if x == SubjectDistanceRange.code() => Self::SubjectDistanceRange,
^14 ^14
435| 4.33k| x if x == ImageUniqueID.code() => Self::ImageUniqueID,
^20 ^20
436| 4.31k| x if x == CameraSerialNumber.code() => Self::CameraSerialNumber,
^4 ^4
437| 4.31k| x if x == LensSpecification.code() => Self::LensSpecification,
^74 ^74
438| 4.23k| x if x == LensMake.code() => Self::LensMake,
^70 ^70
439| 4.16k| x if x == LensModel.code() => Self::LensModel,
^74 ^74
440| 4.09k| x if x == LensSerialNumber.code() => Self::LensSerialNumber,
^18 ^18
441| 4.07k| x if x == Gamma.code() => Self::Gamma,
^0 ^0
442| 4.07k| x if x == GPSTimeStamp.code() => Self::GPSTimeStamp,
^235 ^235
443| 3.83k| x if x == GPSSatellites.code() => Self::GPSSatellites,
^0 ^0
444| 3.83k| x if x == GPSStatus.code() => Self::GPSStatus,
^0 ^0
445| 3.83k| x if x == GPSMeasureMode.code() => Self::GPSMeasureMode,
^0 ^0
446| 3.83k| x if x == GPSDOP.code() => Self::GPSDOP,
^0 ^0
447| 3.83k| x if x == GPSSpeedRef.code() => Self::GPSSpeedRef,
^56 ^56
448| 3.78k| x if x == GPSSpeed.code() => Self::GPSSpeed,
^56 ^56
449| 3.72k| x if x == GPSTrackRef.code() => Self::GPSTrackRef,
^0 ^0
450| 3.72k| x if x == GPSTrack.code() => Self::GPSTrack,
^0 ^0
451| 3.72k| x if x == GPSImgDirectionRef.code() => Self::GPSImgDirectionRef,
^56 ^56
452| 3.67k| x if x == GPSImgDirection.code() => Self::GPSImgDirection,
^56 ^56
453| 3.61k| x if x == GPSMapDatum.code() => Self::GPSMapDatum,
^0 ^0
454| 3.61k| x if x == GPSDestLatitudeRef.code() => Self::GPSDestLatitudeRef,
^0 ^0
455| 3.61k| x if x == GPSDestLatitude.code() => Self::GPSDestLatitude,
^0 ^0
456| 3.61k| x if x == GPSDestLongitudeRef.code() => Self::GPSDestLongitudeRef,
^0 ^0
457| 3.61k| x if x == GPSDestLongitude.code() => Self::GPSDestLongitude,
^0 ^0
458| 3.61k| x if x == GPSDestBearingRef.code() => Self::GPSDestBearingRef,
^56 ^56
459| 3.55k| x if x == GPSDestBearing.code() => Self::GPSDestBearing,
^56 ^56
460| 3.50k| x if x == GPSDestDistanceRef.code() => Self::GPSDestDistanceRef,
^0 ^0
461| 3.50k| x if x == GPSDestDistance.code() => Self::GPSDestDistance,
^0 ^0
462| 3.50k| x if x == GPSProcessingMethod.code() => Self::GPSProcessingMethod,
^236 ^236
463| 3.26k| x if x == GPSAreaInformation.code() => Self::GPSAreaInformation,
^0 ^0
464| 3.26k| x if x == GPSDateStamp.code() => Self::GPSDateStamp,
^235 ^235
465| 3.03k| x if x == GPSDifferential.code() => Self::GPSDifferential,
^0 ^0
466| 3.03k| x if x == YCbCrPositioning.code() => Self::YCbCrPositioning,
^266 ^266
467| 2.76k| x if x == RecommendedExposureIndex.code() => Self::RecommendedExposureIndex,
^391 ^391
468| 2.37k| x if x == SubSecTimeDigitized.code() => Self::SubSecTimeDigitized,
^448 ^448
469| 1.92k| x if x == SubSecTimeOriginal.code() => Self::SubSecTimeOriginal,
^295 ^295
470| 1.63k| x if x == SubSecTime.code() => Self::SubSecTime,
^240 ^240
471| 1.39k| x if x == InteropOffset.code() => Self::InteropOffset,
^270 ^270
472| 1.12k| x if x == ComponentsConfiguration.code() => Self::ComponentsConfiguration,
^273 ^273
473| 849| x if x == ThumbnailOffset.code() => Self::ThumbnailOffset,
^135 ^135
474| 714| x if x == ThumbnailLength.code() => Self::ThumbnailLength,
^135 ^135
475| 579| x if x == Compression.code() => Self::Compression,
^161 ^161
476| 418| x if x == BitsPerSample.code() => Self::BitsPerSample,
^26 ^26
477| 392| x if x == PhotometricInterpretation.code() => Self::PhotometricInterpretation,
^22 ^22
478| 370| x if x == SamplesPerPixel.code() => Self::SamplesPerPixel,
^22 ^22
479| 348| x if x == RowsPerStrip.code() => Self::RowsPerStrip,
^22 ^22
480| 326| x if x == PlanarConfiguration.code() => Self::PlanarConfiguration,
^22 ^22
481| 304| _ => return None,
482| | };
483| |
484| 21.4k| Some(tag)
485| 21.7k| }
486| |}
487| |
488| |impl Display for ExifTag {
489| 241| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
490| 241| f.write_str(self.name())
491| 241| }
492| |}
493| |
494| |impl std::str::FromStr for ExifTag {
495| | type Err = crate::ConvertError;
496| |
497| 5| fn from_str(s: &str) -> Result<Self, Self::Err> {
498| | // Linear scan over the recognized tag list. Tag count is ~120;
499| | // a HashMap is overkill and would force a lazy_static.
500| | const ALL: &[ExifTag] = &[
501| | ExifTag::Make,
502| | ExifTag::Model,
503| | ExifTag::Orientation,
504| | ExifTag::ImageWidth,
505| | ExifTag::ImageHeight,
506| | ExifTag::ISOSpeedRatings,
507| | ExifTag::ShutterSpeedValue,
508| | ExifTag::ExposureTime,
509| | ExifTag::FNumber,
510| | ExifTag::ExifImageWidth,
511| | ExifTag::ExifImageHeight,
512| | ExifTag::DateTimeOriginal,
513| | ExifTag::CreateDate,
514| | ExifTag::ModifyDate,
515| | ExifTag::OffsetTime,
516| | ExifTag::OffsetTimeOriginal,
517| | ExifTag::OffsetTimeDigitized,
518| | ExifTag::GPSLatitudeRef,
519| | ExifTag::GPSLatitude,
520| | ExifTag::GPSLongitudeRef,
521| | ExifTag::GPSLongitude,
522| | ExifTag::GPSAltitudeRef,
523| | ExifTag::GPSAltitude,
524| | ExifTag::GPSVersionID,
525| | ExifTag::ExifOffset,
526| | ExifTag::GPSInfo,
527| | ExifTag::ImageDescription,
528| | ExifTag::XResolution,
529| | ExifTag::YResolution,
530| | ExifTag::ResolutionUnit,
531| | ExifTag::Software,
532| | ExifTag::HostComputer,
533| | ExifTag::WhitePoint,
534| | ExifTag::PrimaryChromaticities,
535| | ExifTag::YCbCrCoefficients,
536| | ExifTag::ReferenceBlackWhite,
537| | ExifTag::Copyright,
538| | ExifTag::ExposureProgram,
539| | ExifTag::SpectralSensitivity,
540| | ExifTag::OECF,
541| | ExifTag::SensitivityType,
542| | ExifTag::ExifVersion,
543| | ExifTag::ApertureValue,
544| | ExifTag::BrightnessValue,
545| | ExifTag::ExposureBiasValue,
546| | ExifTag::MaxApertureValue,
547| | ExifTag::SubjectDistance,
548| | ExifTag::MeteringMode,
549| | ExifTag::LightSource,
550| | ExifTag::Flash,
551| | ExifTag::FocalLength,
552| | ExifTag::SubjectArea,
553| | ExifTag::MakerNote,
554| | ExifTag::UserComment,
555| | ExifTag::FlashPixVersion,
556| | ExifTag::ColorSpace,
557| | ExifTag::RelatedSoundFile,
558| | ExifTag::FlashEnergy,
559| | ExifTag::FocalPlaneXResolution,
560| | ExifTag::FocalPlaneYResolution,
561| | ExifTag::FocalPlaneResolutionUnit,
562| | ExifTag::SubjectLocation,
563| | ExifTag::ExposureIndex,
564| | ExifTag::SensingMethod,
565| | ExifTag::FileSource,
566| | ExifTag::SceneType,
567| | ExifTag::CFAPattern,
568| | ExifTag::CustomRendered,
569| | ExifTag::ExposureMode,
570| | ExifTag::WhiteBalanceMode,
571| | ExifTag::DigitalZoomRatio,
572| | ExifTag::FocalLengthIn35mmFilm,
573| | ExifTag::SceneCaptureType,
574| | ExifTag::GainControl,
575| | ExifTag::Contrast,
576| | ExifTag::Saturation,
577| | ExifTag::Sharpness,
578| | ExifTag::DeviceSettingDescription,
579| | ExifTag::SubjectDistanceRange,
580| | ExifTag::ImageUniqueID,
581| | ExifTag::CameraSerialNumber,
582| | ExifTag::LensSpecification,
583| | ExifTag::LensMake,
584| | ExifTag::LensModel,
585| | ExifTag::LensSerialNumber,
586| | ExifTag::Gamma,
587| | ExifTag::GPSTimeStamp,
588| | ExifTag::GPSSatellites,
589| | ExifTag::GPSStatus,
590| | ExifTag::GPSMeasureMode,
591| | ExifTag::GPSDOP,
592| | ExifTag::GPSSpeedRef,
593| | ExifTag::GPSSpeed,
594| | ExifTag::GPSTrackRef,
595| | ExifTag::GPSTrack,
596| | ExifTag::GPSImgDirectionRef,
597| | ExifTag::GPSImgDirection,
598| | ExifTag::GPSMapDatum,
599| | ExifTag::GPSDestLatitudeRef,
600| | ExifTag::GPSDestLatitude,
601| | ExifTag::GPSDestLongitudeRef,
602| | ExifTag::GPSDestLongitude,
603| | ExifTag::GPSDestBearingRef,
604| | ExifTag::GPSDestBearing,
605| | ExifTag::GPSDestDistanceRef,
606| | ExifTag::GPSDestDistance,
607| | ExifTag::GPSProcessingMethod,
608| | ExifTag::GPSAreaInformation,
609| | ExifTag::GPSDateStamp,
610| | ExifTag::GPSDifferential,
611| | ExifTag::YCbCrPositioning,
612| | ExifTag::RecommendedExposureIndex,
613| | ExifTag::SubSecTimeDigitized,
614| | ExifTag::SubSecTimeOriginal,
615| | ExifTag::SubSecTime,
616| | ExifTag::InteropOffset,
617| | ExifTag::ComponentsConfiguration,
618| | ExifTag::ThumbnailOffset,
619| | ExifTag::ThumbnailLength,
620| | ExifTag::Compression,
621| | ExifTag::BitsPerSample,
622| | ExifTag::PhotometricInterpretation,
623| | ExifTag::SamplesPerPixel,
624| | ExifTag::RowsPerStrip,
625| | ExifTag::PlanarConfiguration,
626| | ];
627| 277| for &t in ALL {
628| 277| if t.name() == s {
629| 3| return Ok(t);
630| 274| }
631| | }
632| 2| Err(crate::ConvertError::UnknownTagName(s.to_owned()))
633| 5| }
634| |}
635| |
636| |#[cfg(test)]
637| |mod tests {
638| | use super::*;
639| | use crate::ConvertError;
640| | use std::str::FromStr;
641| |
642| | #[test]
643| 1| fn exif_tag_from_code_recognized() {
644| 1| assert_eq!(ExifTag::from_code(0x010f), Some(ExifTag::Make));
645| 1| assert_eq!(ExifTag::from_code(0x0110), Some(ExifTag::Model));
646| 1| }
647| |
648| | #[test]
649| 1| fn exif_tag_from_code_unknown_returns_none() {
650| 1| assert_eq!(ExifTag::from_code(0xffff), None);
651| 1| assert_eq!(ExifTag::from_code(0x0000), Some(ExifTag::GPSVersionID)); // 0 is GPSVersionID
652| 1| assert_eq!(ExifTag::from_code(0xfffe), None);
653| 1| }
654| |
655| | #[test]
656| 1| fn exif_tag_name_returns_static_str() {
657| 1| assert_eq!(ExifTag::Make.name(), "Make");
658| 1| assert_eq!(ExifTag::ExifOffset.name(), "ExifOffset");
659| | // Display routes through name():
660| 1| assert_eq!(ExifTag::Make.to_string(), "Make");
661| 1| }
662| |
663| | #[test]
664| 1| fn exif_tag_from_str_recognized() {
665| 1| assert_eq!(ExifTag::from_str("Make").unwrap(), ExifTag::Make);
666| 1| assert_eq!(
667| 1| ExifTag::from_str("ExifOffset").unwrap(),
668| | ExifTag::ExifOffset
669| | );
670| 1| }
671| |
672| | #[test]
673| 1| fn exif_tag_from_str_unknown_returns_convert_error() {
674| 1| match ExifTag::from_str("DefinitelyNotATag") {
675| 1| Err(ConvertError::UnknownTagName(s)) => {
676| 1| assert_eq!(s, "DefinitelyNotATag");
677| | }
678| 0| other => panic!("expected UnknownTagName, got {other:?}"),
679| | }
680| 1| }
681| |}
/home/min/dev/nom-exif/src/exif/travel.rs:
1| |use nom::{
2| | number::{streaming, Endianness},
3| | IResult, Needed, Parser,
4| |};
5| |
6| |use crate::{
7| | error::{MalformedKind, ParsingError},
8| | exif::TiffHeader,
9| | values::{array_to_string, DataFormat},
10| | TagOrCode,
11| |};
12| |
13| |use super::{exif_exif::IFD_ENTRY_SIZE, exif_iter::SUBIFD_TAGS};
14| |
15| |/// Only iterates headers, don't parse entries.
16| |///
17| |/// Currently only used to extract Exif data for *.tiff files.
18| |///
19| |/// NOTE: `parse_tag_entry_header` short-circuits on `tag == 0` as a guard
20| |/// against zero-padded malformed IFDs. This is safe **today** because we
21| |/// only observe `sub_ifd_offset`, and tag 0 is never in `SUBIFD_TAGS`. If
22| |/// this struct is ever extended to emit entry values, gate that
23| |/// short-circuit on "not inside the GPS sub-IFD" — tag 0 is the legitimate
24| |/// GPSVersionID and dropping it loses every following GPS field. See
25| |/// `IfdIter::is_gps_subifd` in `exif_iter.rs` and issue #50.
26| |pub(crate) struct IfdHeaderTravel<'a> {
27| | // starts from file beginning
28| | data: &'a [u8],
29| |
30| | tag: TagOrCode,
31| |
32| | endian: Endianness,
33| |
34| | // ifd data offset
35| | offset: usize,
36| |}
37| |
38| |#[derive(Debug, Clone)]
39| |pub(crate) struct EntryInfo<'a> {
40| | pub tag: u16,
41| | #[allow(unused)]
42| | pub data: &'a [u8],
43| | #[allow(unused)]
44| | pub data_format: DataFormat,
45| | #[allow(unused)]
46| | pub data_offset: Option<u32>,
47| | pub sub_ifd_offset: Option<u32>,
48| |}
49| |
50| |impl<'a> IfdHeaderTravel<'a> {
51| 13| pub fn new(input: &'a [u8], offset: usize, tag: TagOrCode, endian: Endianness) -> Self {
52| 13| Self {
53| 13| data: input,
54| 13| tag,
55| 13| endian,
56| 13| offset,
57| 13| }
58| 13| }
59| |
60| | #[tracing::instrument(skip_all)]
61| 80| fn parse_tag_entry_header(
62| 80| &'a self,
63| 80| entry_data: &'a [u8],
64| 80| ) -> IResult<&'a [u8], Option<EntryInfo<'a>>> {
65| 80| let endian = self.endian;
66| 80| let (remain, (tag, data_format, components_num, value_or_offset)) = (
67| 80| streaming::u16::<_, nom::error::Error<_>>(endian),
68| 80| streaming::u16(endian),
69| 80| streaming::u32(endian),
70| 80| streaming::u32(endian),
71| 80| )
72| 80| .parse(entry_data)?;
^0
73| |
74| 80| if tag == 0 {
75| 1| return Ok((remain, None));
76| 79| }
77| |
78| 79| let data_format: DataFormat = match data_format.try_into() {
^78 ^78
79| 78| Ok(df) => df,
80| | // Ignore errors here
81| 1| Err(e) => {
82| 1| tracing::warn!(?e, "Ignored: IFD entry data format error");
83| 1| return Ok((&[][..], None));
84| | }
85| | };
86| |
87| | // get component_size according to data format
88| 78| let component_size = data_format.component_size();
89| |
90| | // get entry data
91| 78| let size = components_num as usize * component_size;
92| 78| let (data, data_offset) = if size > 4 {
^77 ^77
93| 26| let start = self.get_data_pos(value_or_offset) as usize;
94| 26| let end = start + size;
95| 26| tracing::debug!(
96| | components_num,
97| | size,
98| | "tag {:04x} entry data start {:08x} end {:08x} my_offset: {:08x} data len {:08x}",
99| | tag,
100| | value_or_offset,
101| | start,
102| | end,
103| 0| self.data.len(),
104| | );
105| 26| if end > self.data.len() {
106| 1| return Err(nom::Err::Incomplete(Needed::new(end - self.data.len())));
107| 25| }
108| 25| (&self.data[start..end], Some(start as u32))
109| | } else {
110| 52| (entry_data, None)
111| | };
112| |
113| 77| let sub_ifd_offset = if SUBIFD_TAGS.contains(&tag) {
114| 2| let offset = self.get_data_pos(value_or_offset);
115| 2| if offset > 0 {
116| 1| Some(offset)
117| | } else {
118| 1| None
119| | }
120| | } else {
121| 75| None
122| | };
123| |
124| 77| let entry = EntryInfo {
125| 77| tag,
126| 77| data,
127| 77| data_format,
128| 77| data_offset,
129| 77| sub_ifd_offset,
130| 77| };
131| 77| Ok((&[][..], Some(entry)))
132| 80| }
133| |
134| 28| fn get_data_pos(&'a self, value_or_offset: u32) -> u32 {
135| | // value_or_offset.saturating_sub(self.offset)
136| 28| value_or_offset
137| 28| }
138| |
139| | #[tracing::instrument(skip(self))]
140| 80| fn parse_ifd_entry_header(&self, pos: u32) -> IResult<&[u8], Option<IfdHeaderTravel<'a>>> {
141| 80| let (_, entry_data) =
142| 80| nom::bytes::streaming::take(IFD_ENTRY_SIZE)(&self.data[pos as usize..])?;
^0
143| |
144| 80| let (remain, entry) = self.parse_tag_entry_header(entry_data)?;
^79 ^79 ^1
145| |
146| 79| if let Some(entry) = entry {
^77
147| | // if !cb(&entry) {
148| | // return Ok((&[][..], ()));
149| | // }
150| |
151| 77| if let Some(offset) = entry.sub_ifd_offset {
^1
152| 1| let tag: TagOrCode = entry.tag.into();
153| 1| tracing::debug!(?offset, data_len = self.data.len(), "sub-ifd: {:?}", tag);
^0 ^0
154| |
155| | // Full fill bytes until sub-ifd header
156| | let (_, _) =
157| 1| nom::bytes::streaming::take(offset as usize - remain.len() + 2)(self.data)?;
^0
158| |
159| 1| let sub_ifd = IfdHeaderTravel::new(self.data, offset as usize, tag, self.endian);
160| 1| return Ok((remain, Some(sub_ifd)));
161| 76| }
162| 2| }
163| |
164| 78| Ok((remain, None))
165| 80| }
166| |
167| | #[tracing::instrument(skip(self))]
168| 13| pub fn travel_ifd(&mut self, depth: usize) -> Result<(), ParsingError> {
169| 13| if depth >= 3 {
170| 1| let msg = "depth shouldn't be greater than 3";
171| 1| tracing::error!(msg);
172| 1| return Err(ParsingError::Failed {
173| 1| kind: MalformedKind::IfdEntry,
174| 1| message: msg.into(),
175| 1| });
176| 12| }
177| |
178| 12| if self.offset + 2 > self.data.len() {
179| 1| return Err(ParsingError::Failed {
180| 1| kind: MalformedKind::TiffHeader,
181| 1| message: format!("invalid ifd offset: {}", self.offset),
182| 1| });
183| 11| }
184| |
185| 11| let (_, entry_num) =
186| 11| TiffHeader::parse_ifd_entry_num(&self.data[self.offset..], self.endian).map_err(
187| | |e: nom::Err<nom::error::Error<&[u8]>>| ParsingError::Failed {
188| 0| kind: MalformedKind::TiffHeader,
189| 0| message: format!("parse ifd entry count failed: {e:?}"),
190| 0| },
191| 0| )?;
192| 11| let mut pos = self.offset + 2;
193| |
194| 11| let mut sub_ifds = Vec::new();
195| |
196| | // parse entries
197| 11| for _ in 0..entry_num {
198| 80| if pos >= self.data.len() {
199| 0| break;
200| 80| }
201| 80| let (_, sub_ifd) = self.parse_ifd_entry_header(pos as u32).map_err(
^79
202| | |e: nom::Err<nom::error::Error<&[u8]>>| ParsingError::Failed {
203| 1| kind: MalformedKind::IfdEntry,
204| 1| message: format!("parse ifd entry header failed: {e:?}"),
205| 1| },
206| 1| )?;
207| 79| pos += IFD_ENTRY_SIZE;
208| |
209| 79| if let Some(ifd) = sub_ifd {
^1
210| 1| tracing::debug!(
211| 0| data = array_to_string("bytes", self.data),
212| 0| tag = ifd.tag.to_string(),
213| | );
214| 1| sub_ifds.push(ifd);
215| 78| }
216| | }
217| |
218| 10| for mut ifd in sub_ifds {
^1
219| 1| ifd.travel_ifd(depth + 1)?;
^0
220| | }
221| |
222| | // Currently, we ignore ifd1 data in *.tif files
223| 10| Ok(())
224| 13| }
225| |}
226| |
227| |// fn keep_incomplete_err_only<T: Debug>(e: nom::Err<T>) -> nom::Err<String> {
228| |// match e {
229| |// nom::Err::Incomplete(n) => nom::Err::Incomplete(n),
230| |// nom::Err::Error(e) => nom::Err::Error(format!("parse ifd error: {:?}", e)),
231| |// nom::Err::Failure(_) => nom::Err::Failure("parse ifd failure".to_string()),
232| |// }
233| |// }
234| |
235| |#[cfg(test)]
236| |mod tests {
237| | use super::*;
238| | use crate::testkit::read_sample;
239| | use nom::number::Endianness;
240| |
241| | /// Build a single 12-byte little-endian IFD entry: tag(2) + format(2) + count(4) + value/offset(4).
242| 5| fn entry(tag: u16, format: u16, count: u32, value: u32) -> Vec<u8> {
243| 5| let mut v = Vec::with_capacity(12);
244| 5| v.extend_from_slice(&tag.to_le_bytes());
245| 5| v.extend_from_slice(&format.to_le_bytes());
246| 5| v.extend_from_slice(&count.to_le_bytes());
247| 5| v.extend_from_slice(&value.to_le_bytes());
248| 5| v
249| 5| }
250| |
251| | /// Build a little-endian IFD: 2-byte entry_count + entries + 4-byte next-IFD offset (zero).
252| 7| fn ifd(entries: &[Vec<u8>]) -> Vec<u8> {
253| 7| let count = entries.len() as u16;
254| 7| let mut v = count.to_le_bytes().to_vec();
255| 7| for e in entries {
^5
256| 5| v.extend_from_slice(e);
257| 5| }
258| 7| v.extend_from_slice(&[0u8; 4]);
259| 7| v
260| 7| }
261| |
262| | #[test]
263| 1| fn travel_short_circuits_on_tag_zero() {
264| | // tag = 0 must not be emitted as a sub-IFD (covers line 75).
265| 1| let data = ifd(&[entry(0, 1, 1, 0)]);
266| 1| let mut t = IfdHeaderTravel::new(&data, 0, 0u16.into(), Endianness::Little);
267| 1| assert!(t.travel_ifd(0).is_ok());
268| 1| }
269| |
270| | #[test]
271| 1| fn travel_rejects_invalid_data_format() {
272| | // data_format = 99 is out of range — covers the `Err(_)` arm (lines 81-83).
273| 1| let data = ifd(&[entry(0x010F /* Make */, 99, 1, 0)]);
274| 1| let mut t = IfdHeaderTravel::new(&data, 0, 0u16.into(), Endianness::Little);
275| 1| assert!(t.travel_ifd(0).is_ok());
276| 1| }
277| |
278| | #[test]
279| 1| fn travel_data_past_eof_errors() {
280| | // size > 4 with offset past EOF must error (covers lines 103-106 of the
281| | // size>4 branch in parse_tag_entry_header). We do not assert the error
282| | // kind — both Incomplete and Failed satisfy the contract.
283| 1| let data = ifd(&[entry(0x010F /* Make */, 2, 100, 0x0000_FF00)]);
284| 1| let mut t = IfdHeaderTravel::new(&data, 0, 0u16.into(), Endianness::Little);
285| 1| assert!(t.travel_ifd(0).is_err());
286| 1| }
287| |
288| | #[test]
289| 1| fn travel_invalid_offset_guard() {
290| | // offset + 2 > data.len() (covers line 176).
291| 1| let data = vec![0u8; 1];
292| 1| let mut t = IfdHeaderTravel::new(&data, 100, 0u16.into(), Endianness::Little);
293| 1| assert!(t.travel_ifd(0).is_err());
294| 1| }
295| |
296| | #[test]
297| 1| fn travel_depth_guard() {
298| | // depth >= 3 must error (covers lines 170-172).
299| 1| let data = ifd(&[]);
300| 1| let mut t = IfdHeaderTravel::new(&data, 0, 0u16.into(), Endianness::Little);
301| 1| assert!(t.travel_ifd(3).is_err());
302| 1| }
303| |
304| | #[test]
305| 1| fn travel_real_tiff_entry_loop() {
306| | // Real TIFF traversal — exercises parse_ifd_entry_num and the per-entry
307| | // 12-byte step loop on a full IFD0. testdata/tif.tif has no ExifIFD or
308| | // GPSInfo entries, so this test does NOT drive sub-IFD recursion; for
309| | // that, see travel_synthetic_subifd_recursion below.
310| 1| let buf = read_sample("tif.tif").unwrap();
311| 1| let endian = if &buf[0..2] == b"II" {
312| 1| Endianness::Little
313| | } else {
314| 0| Endianness::Big
315| | };
316| 1| let ifd_offset = match endian {
317| 1| Endianness::Little => u32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]),
318| 0| Endianness::Big => u32::from_be_bytes([buf[4], buf[5], buf[6], buf[7]]),
319| 0| _ => unreachable!(),
320| | };
321| 1| let mut t = IfdHeaderTravel::new(&buf, ifd_offset as usize, 0u16.into(), endian);
322| 1| t.travel_ifd(0).unwrap();
323| 1| }
324| |
325| | #[test]
326| 1| fn travel_synthetic_subifd_recursion() {
327| | // Hand-built TIFF body: outer IFD at offset 0 with a single ExifOffset
328| | // (tag 0x8769) entry pointing to a child IFD with zero entries.
329| | // Exercises SUBIFD_TAGS branch (lines 113-118) and the recursion path
330| | // (lines 151-162, 205-206).
331| | const EXIF_OFFSET_TAG: u16 = 0x8769;
332| | // Outer IFD: 2-byte count + one 12-byte entry + 4-byte next-ifd = 18 bytes
333| 1| let sub_ifd_off: u32 = 18;
334| 1| let outer = ifd(&[entry(EXIF_OFFSET_TAG, 4 /* LONG */, 1, sub_ifd_off)]);
335| | // Sub-IFD: 0 entries + zero next-ifd pointer = 6 bytes
336| 1| let sub = ifd(&[]);
337| 1| let mut data = outer;
338| 1| data.extend_from_slice(&sub);
339| 1| let mut t = IfdHeaderTravel::new(&data, 0, 0u16.into(), Endianness::Little);
340| 1| t.travel_ifd(0).unwrap();
341| 1| }
342| |
343| | #[test]
344| 1| fn travel_subifd_zero_offset_is_skipped() {
345| | // ExifOffset tag with value 0 — covers the `else { None }` branch in the
346| | // `if offset > 0` check (line 118).
347| | const EXIF_OFFSET_TAG: u16 = 0x8769;
348| 1| let data = ifd(&[entry(EXIF_OFFSET_TAG, 4 /* LONG */, 1, 0)]);
349| 1| let mut t = IfdHeaderTravel::new(&data, 0, 0u16.into(), Endianness::Little);
350| 1| t.travel_ifd(0).unwrap();
351| 1| }
352| |}
/home/min/dev/nom-exif/src/file.rs:
1| |use nom::{bytes::complete, FindSubstring};
2| |use std::io::Cursor;
3| |
4| |use crate::{
5| | bbox::{travel_header, BoxHolder},
6| | ebml::element::parse_ebml_doc_type,
7| | error::MalformedKind,
8| | exif::TiffHeader,
9| | jpeg::check_jpeg,
10| | raf::RafInfo,
11| | slice::SubsliceRange,
12| |};
13| |
14| |const HEIF_HEIC_BRAND_NAMES: &[&[u8]] = &[
15| | b"heic", // the usual HEIF images
16| | b"heix", // 10bit images, or anything that uses h265 with range extension
17| | b"hevc", // 'hevx': brands for image sequences
18| | b"heim", // multiview
19| | b"heis", // scalable
20| | b"hevm", // multiview sequence
21| | b"hevs", // scalable sequence
22| | b"mif1", b"MiHE", b"miaf", b"MiHB", // HEIC file's compatible brands
23| |];
24| |
25| |const HEIC_BRAND_NAMES: &[&[u8]] = &[b"heic", b"heix", b"heim", b"heis"];
26| |
27| |// TODO: Refer to the information on the website https://www.ftyps.com to add
28| |// other less common MP4 brands.
29| |const MP4_BRAND_NAMES: &[&str] = &[
30| | "3g2a", "3g2b", "3g2c", "3ge6", "3ge7", "3gg6", "3gp4", "3gp5", "3gp6", "3gs7", "avc1", "mp41",
31| | "mp42", "iso2", "isom", "vfj1", "XAVC",
32| |];
33| |
34| |const QT_BRAND_NAMES: &[&str] = &["qt ", "mqt "];
35| |
36| |const CR3_BRAND_NAMES: &[&str] = &["crx "];
37| |
38| |// AVIF (AV1 Image File Format, ISO/IEC 23000-22) brands. `avif` covers single
39| |// images; `avis` covers image sequences; `avio` covers AVIF item-only files.
40| |const AVIF_BRAND_NAMES: &[&[u8]] = &[b"avif", b"avis", b"avio"];
41| |
42| |#[derive(Debug, Clone, PartialEq, Eq, Copy)]
43| |pub(crate) enum MediaMime {
44| | Image(MediaMimeImage),
45| | Track(MediaMimeTrack),
46| |}
47| |
48| |impl MediaMime {
49| 96| pub fn unwrap_image(self) -> MediaMimeImage {
50| 96| match self {
51| 96| MediaMime::Image(val) => val,
52| | MediaMime::Track(_) => {
53| 0| panic!("called `MediaMime::unwrap_image()` on a `MediaMime::Track`")
54| | }
55| | }
56| 96| }
57| |}
58| |
59| |#[derive(Debug, Clone, PartialEq, Eq, Copy)]
60| |pub(crate) enum MediaMimeImage {
61| | Jpeg,
62| | Heic,
63| | Heif,
64| | Avif,
65| | Tiff,
66| | Raf,
67| | Cr3,
68| | Png,
69| |}
70| |
71| |#[derive(Debug, Clone, PartialEq, Eq, Copy)]
72| |pub(crate) enum MediaMimeTrack {
73| | QuickTime,
74| | Mp4,
75| | Webm,
76| | Matroska,
77| | _3gpp,
78| |}
79| |
80| |impl TryFrom<&[u8]> for MediaMime {
81| | type Error = crate::Error;
82| 305| fn try_from(input: &[u8]) -> Result<Self, Self::Error> {
83| 305| let mime = if let Ok(x) = parse_bmff_mime(input) {
^290 ^140
84| 140| x
85| 165| } else if let Ok(x) = get_ebml_doc_type(input) {
^37
86| 37| if x == "webm" {
87| 4| MediaMime::Track(MediaMimeTrack::Webm)
88| | } else {
89| 33| MediaMime::Track(MediaMimeTrack::Matroska)
90| | }
91| 128| } else if TiffHeader::parse(input).is_ok() {
92| 3| MediaMime::Image(MediaMimeImage::Tiff)
93| 125| } else if check_png(input).is_ok() {
94| 22| MediaMime::Image(MediaMimeImage::Png)
95| 103| } else if check_jpeg(input).is_ok() {
96| 86| MediaMime::Image(MediaMimeImage::Jpeg)
97| 17| } else if RafInfo::check(input).is_ok() {
98| 2| MediaMime::Image(MediaMimeImage::Raf)
99| | } else {
100| 15| return Err(crate::Error::UnsupportedFormat);
101| | };
102| |
103| 290| Ok(mime)
104| 305| }
105| |}
106| |
107| 165|fn get_ebml_doc_type(input: &[u8]) -> crate::Result<String> {
108| 165| let mut cursor = Cursor::new(input);
109| 165| let doc = parse_ebml_doc_type(&mut cursor)?;
^37 ^128
110| 37| Ok(doc)
111| 165|}
112| |
113| |#[tracing::instrument(skip_all)]
114| 305|fn parse_bmff_mime(input: &[u8]) -> crate::Result<MediaMime> {
115| 142| let (ftyp, Some(major_brand)) =
116| 305| get_ftyp_and_major_brand(input).map_err(|_| crate::Error::UnsupportedFormat)?
^159
117| | else {
118| 8| if travel_header(input, |header, _| header.box_type != "mdat").is_ok() {
^4 ^4 ^4
119| | // ftyp is None, mdat box is found, assume it's a MOV file extracted from HEIC
120| 4| return Ok(MediaMime::Track(MediaMimeTrack::QuickTime));
121| 0| }
122| |
123| 0| return Err(crate::Error::UnsupportedFormat);
124| | };
125| |
126| 142| tracing::debug!(?ftyp);
127| |
128| | // Check if it is a QuickTime file
129| 235| if QT_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
^142 ^142
130| 49| return Ok(MediaMime::Track(MediaMimeTrack::QuickTime));
131| 93| }
132| |
133| | // Check if it is an AVIF file. Must precede the HEIF compatible-brand
134| | // check below because AVIF files commonly include `mif1`/`miaf` in their
135| | // compatible-brand list.
136| 93| if AVIF_BRAND_NAMES.contains(&major_brand) {
137| 1| return Ok(MediaMime::Image(MediaMimeImage::Avif));
138| 92| }
139| |
140| | // Check if it is a HEIF file
141| 92| if HEIF_HEIC_BRAND_NAMES.contains(&major_brand) {
142| 19| if HEIC_BRAND_NAMES.contains(&major_brand) {
143| 19| return Ok(MediaMime::Image(MediaMimeImage::Heic));
144| 0| }
145| 0| return Ok(MediaMime::Image(MediaMimeImage::Heif));
146| 73| }
147| |
148| | // Check if it is a MP4 file
149| 1.01k| if MP4_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
^73 ^73
150| 56| if major_brand.starts_with(b"3gp") {
151| 8| return Ok(MediaMime::Track(MediaMimeTrack::_3gpp));
152| 48| }
153| 48| return Ok(MediaMime::Track(MediaMimeTrack::Mp4));
154| 17| }
155| |
156| | // Check if it is a CR3 file
157| 17| if CR3_BRAND_NAMES.iter().any(|v| v.as_bytes() == major_brand) {
158| 1| return Ok(MediaMime::Image(MediaMimeImage::Cr3));
159| 16| }
160| |
161| | // Check compatible brands
162| 16| let compatible_brands = ftyp.body_data();
163| |
164| 16| if QT_BRAND_NAMES
165| 16| .iter()
166| 28| .any(|v| compatible_brands.find_substring(v.as_bytes()).is_some())
^16
167| | {
168| 4| return Ok(MediaMime::Track(MediaMimeTrack::QuickTime));
169| 12| }
170| |
171| 12| if AVIF_BRAND_NAMES
172| 12| .iter()
173| 36| .any(|x| compatible_brands.find_substring(*x).is_some())
^12
174| | {
175| 0| return Ok(MediaMime::Image(MediaMimeImage::Avif));
176| 12| }
177| |
178| 12| if HEIF_HEIC_BRAND_NAMES
179| 12| .iter()
180| 92| .any(|x| compatible_brands.find_substring(*x).is_some())
^12
181| | {
182| 4| if HEIC_BRAND_NAMES.contains(&major_brand) {
183| 0| return Ok(MediaMime::Image(MediaMimeImage::Heic));
184| 4| }
185| 4| return Ok(MediaMime::Image(MediaMimeImage::Heif));
186| 8| }
187| |
188| 8| if MP4_BRAND_NAMES
189| 8| .iter()
190| 136| .any(|v| compatible_brands.subslice_in_range(v.as_bytes()).is_some())
^8
191| | {
192| 0| if major_brand.starts_with(b"3gp") {
193| 0| return Ok(MediaMime::Track(MediaMimeTrack::_3gpp));
194| 0| }
195| 0| return Ok(MediaMime::Track(MediaMimeTrack::Mp4));
196| 8| }
197| |
198| 8| tracing::warn!(
199| 32| marjor_brand = major_brand.iter().map(|b| *b as char).collect::<String>(),
^8 ^8 ^8 ^8
200| | "unknown major brand",
201| | );
202| |
203| 15| if travel_header(input, |header, _| header.box_type != "mdat").is_ok() {
^8 ^8 ^8
204| | // mdat box found, assume it's a mp4 file
205| 2| return Ok(MediaMime::Track(MediaMimeTrack::Mp4));
206| 6| }
207| |
208| 6| Err(crate::Error::UnsupportedFormat)
209| 305|}
210| |
211| |const PNG_SIGNATURE: &[u8; 8] = b"\x89PNG\r\n\x1a\n";
212| |
213| 125|fn check_png(input: &[u8]) -> Result<(), ()> {
214| 125| if input.len() >= PNG_SIGNATURE.len() && &input[..PNG_SIGNATURE.len()] == PNG_SIGNATURE {
^123^123
215| 22| Ok(())
216| | } else {
217| 103| Err(())
218| | }
219| 125|}
220| |
221| 305|fn get_ftyp_and_major_brand(input: &[u8]) -> crate::Result<(BoxHolder<'_>, Option<&[u8]>)> {
222| 305| let (_, bbox) = BoxHolder::parse(input).map_err(|e| crate::Error::Malformed {
^146
223| 159| kind: MalformedKind::IsoBmffBox,
224| 159| message: format!("parse ftyp failed: {e}"),
225| 159| })?;
226| |
227| 146| if bbox.box_type() == "ftyp" {
228| 142| if bbox.body_data().len() < 4 {
229| 0| return Err(crate::Error::Malformed {
230| 0| kind: MalformedKind::IsoBmffBox,
231| 0| message: format!(
232| 0| "parse ftyp failed; body size should greater than 4, got {}",
233| 0| bbox.body_data().len()
234| 0| ),
235| 0| });
236| 142| }
237| 142| let (_, ftyp) = complete::take(4_usize)(bbox.body_data())
238| 142| .map_err(|e| crate::error::nom_err_to_malformed(e, MalformedKind::IsoBmffBox))?;
^0 ^0 ^0 ^0
239| 142| Ok((bbox, Some(ftyp)))
240| 4| } else if bbox.box_type() == "wide" {
241| | // MOV files that extracted from HEIC starts with `wide` & `mdat` atoms
242| 4| Ok((bbox, None))
243| | } else {
244| 0| Err(crate::Error::Malformed {
245| 0| kind: MalformedKind::IsoBmffBox,
246| 0| message: format!("parse ftyp failed; first box type is: {}", bbox.box_type()),
247| 0| })
248| | }
249| 305|}
250| |
251| |#[cfg(test)]
252| |mod tests {
253| | use std::ops::Deref;
254| |
255| | use super::*;
256| | use test_case::test_case;
257| | use MediaMime::*;
258| | use MediaMimeImage::*;
259| | use MediaMimeTrack::*;
260| |
261| | use crate::testkit::read_sample;
262| |
263| | #[test_case("exif.heic", Image(Heic))]
264| | #[test_case("exif.avif", Image(Avif))]
265| | #[test_case("exif.jpg", Image(Jpeg))]
266| | #[test_case("exif.png", Image(Png))]
267| | #[test_case("fujifilm_x_t1_01.raf.meta", Image(Raf))]
268| | #[test_case("meta.mp4", Track(Mp4))]
269| | #[test_case("meta.mov", Track(QuickTime))]
270| | #[test_case("embedded-in-heic.mov", Track(QuickTime))]
271| | #[test_case("compatible-brands.mov", Track(QuickTime))]
272| | #[test_case("webm_480.webm", Track(Webm))]
273| | #[test_case("mkv_640x360.mkv", Track(Matroska))]
274| | #[test_case("mka.mka", Track(Matroska))]
275| | #[test_case("3gp_640x360.3gp", Track(_3gpp))]
276| | #[test_case("sony-a7-xavc.MP4", Track(Mp4))]
277| 14| fn mime(path: &str, mime: MediaMime) {
278| 14| let data = read_sample(path).unwrap();
279| 14| let m: MediaMime = data.deref().try_into().unwrap();
280| 14| assert_eq!(m, mime);
281| 14| }
282| |}
283| |
284| |#[cfg(test)]
285| |mod v3_tests {
286| | use super::*;
287| | use crate::error::Error;
288| |
289| | #[test]
290| 1| fn unrecognized_returns_unsupported_format() {
291| 1| let bogus = b"\x00\x00\x00\x00not a real file";
292| 1| let res: Result<MediaMime, Error> = bogus.as_slice().try_into();
293| 1| assert!(matches!(res, Err(Error::UnsupportedFormat)));
^0
294| 1| }
295| |}
/home/min/dev/nom-exif/src/heif.rs:
1| |use nom::combinator::fail;
2| |use nom::{IResult, Parser};
3| |
4| |use crate::bbox::find_box;
5| |use crate::{
6| | bbox::{BoxHolder, MetaBox, ParseBox},
7| | error::{
8| | nom_error_to_parsing_error_with_state, MalformedKind, ParsingError, ParsingErrorState,
9| | },
10| | exif::check_exif_header2,
11| | parser::ParsingState,
12| |};
13| |
14| 28|pub(crate) fn extract_exif_data(
15| 28| state: Option<ParsingState>,
16| 28| buf: &[u8],
17| 28|) -> Result<(Option<&[u8]>, Option<ParsingState>), ParsingErrorState> {
18| 21| let (data, state) = match state {
^3
19| 2| Some(ParsingState::HeifExifSize(size)) => {
20| 2| let (_, data) = nom::bytes::streaming::take(size)(buf).map_err(|e| {
^1 ^1
21| 1| nom_error_to_parsing_error_with_state(e, MalformedKind::IsoBmffBox, state.clone())
22| 1| })?;
23| 1| (Some(data), state)
24| | }
25| | None => {
26| 25| let (_, meta) = parse_meta_box(buf).map_err(|e| {
^21 ^4
27| 4| nom_error_to_parsing_error_with_state(e, MalformedKind::IsoBmffBox, state)
28| 4| })?;
29| |
30| 21| if let Some(meta) = meta {
^20
31| 20| if let Some(range) = meta.exif_data_offset() {
32| 20| if range.end > buf.len() {
33| 1| let state = ParsingState::HeifExifSize(range.len());
34| 1| let clear_and_skip = ParsingError::ClearAndSkip(range.start);
35| 1| return Err(ParsingErrorState::new(clear_and_skip, Some(state)));
36| | } else {
37| 19| (Some(&buf[range]), None)
38| | }
39| | } else {
40| 0| return Err(ParsingErrorState::new(
41| 0| ParsingError::Failed {
42| 0| kind: MalformedKind::IsoBmffBox,
43| 0| message: "no exif offset in meta box".into(),
44| 0| },
45| 0| None,
46| 0| ));
47| | }
48| | } else {
49| 1| (None, None)
50| | }
51| | }
52| | _ => {
53| 1| return Err(ParsingErrorState::new(
54| 1| ParsingError::Failed {
55| 1| kind: MalformedKind::IsoBmffBox,
56| 1| message: "unexpected parsing state for heif".into(),
57| 1| },
58| 1| None,
59| 1| ))
60| | }
61| | };
62| |
63| 21| let data = data.and_then(|x| check_exif_header2(x).map(|x| x.0).ok());
^20 ^20^20 ^20
64| |
65| 21| Ok((data, state))
66| 28|}
67| |
68| 27|pub(crate) fn parse_meta_box(input: &[u8]) -> IResult<&[u8], Option<MetaBox>> {
69| 27| let remain = input;
70| 27| let (remain, bbox) = BoxHolder::parse(remain)?;
^0
71| 27| if bbox.box_type() != "ftyp" {
72| 1| return fail().parse(input);
73| 26| }
74| |
75| 26| let (remain, Some(bbox)) = find_box(remain, "meta")? else {
^24 ^24 ^0
76| 2| tracing::debug!(?bbox, "meta box not found");
77| 2| return Ok((remain, None));
78| | };
79| 24| tracing::debug!(
80| | ?bbox,
81| 0| pos = input.len() - remain.len() - bbox.header.box_size as usize,
82| | "Got meta box"
83| | );
84| 24| let (_, bbox) = MetaBox::parse_box(bbox.data)?;
^20 ^4
85| 20| tracing::debug!(?bbox, "meta box parsed");
86| 20| Ok((remain, Some(bbox)))
87| 27|}
88| |
89| |#[cfg(test)]
90| |mod tests {
91| | use super::*;
92| | use crate::testkit::*;
93| | use test_case::test_case;
94| | use tracing::level_filters::LevelFilter;
95| |
96| | /// Build a minimal `ftyp` box followed by `tail` bytes. The ftyp body
97| | /// holds `heic` major brand, zero minor version, and one `heic` compat
98| | /// brand (20 bytes total including the 8-byte header).
99| 2| fn ftyp_with_tail(tail: &[u8]) -> Vec<u8> {
100| 2| let mut buf = Vec::new();
101| 2| buf.extend_from_slice(&20u32.to_be_bytes()); // box size
102| 2| buf.extend_from_slice(b"ftyp");
103| 2| buf.extend_from_slice(b"heic"); // major brand
104| 2| buf.extend_from_slice(&0u32.to_be_bytes()); // minor
105| 2| buf.extend_from_slice(b"heic"); // compat brand
106| 2| buf.extend_from_slice(tail);
107| 2| buf
108| 2| }
109| |
110| | #[test_case("exif-one-entry.heic", 0x24-10)]
111| | #[test_case("exif.heic", 0xa3a-10)]
112| | #[test_case("exif.avif", 0xa3a-10)]
113| 3| fn heic_exif_data(path: &str, exif_size: usize) {
114| | // Enable DEBUG level so the `tracing::debug!` format-arg expressions
115| | // inside `parse_meta_box` are actually evaluated (covers line 71).
116| 3| let _ = tracing_subscriber::fmt()
117| 3| .with_test_writer()
118| 3| .with_max_level(LevelFilter::DEBUG)
119| 3| .try_init();
120| 3| let buf = read_sample(path).unwrap();
121| 3| let (exif, _state) = extract_exif_data(None, &buf[..]).unwrap();
122| 3| assert_eq!(exif.unwrap().len(), exif_size);
123| 3| }
124| |
125| | #[test]
126| 1| fn heif_second_pass_with_state() {
127| | // Drive the Some(HeifExifSize(size)) branch (lines 17-20).
128| | // check_exif_header2 expects a be_u32 prefix followed by "Exif\0\0".
129| 1| let mut exif_bytes: Vec<u8> = Vec::new();
130| 1| exif_bytes.extend_from_slice(&0u32.to_be_bytes()); // tiff offset prefix
131| 1| exif_bytes.extend_from_slice(b"Exif\0\0");
132| 1| exif_bytes.extend_from_slice(b"II*\0\x08\0\0\0\x00\0\0\0");
133| 1| let state = Some(ParsingState::HeifExifSize(exif_bytes.len()));
134| 1| let (data, _) = extract_exif_data(state, &exif_bytes).unwrap();
135| 1| assert!(data.is_some());
136| 1| }
137| |
138| | #[test]
139| 1| fn heif_second_pass_short_buffer_errors() {
140| | // HeifExifSize advertises more bytes than the buffer carries — the
141| | // streaming `take` fails and the error-mapping closure on line 19
142| | // runs.
143| 1| let state = Some(ParsingState::HeifExifSize(64));
144| 1| let buf = vec![0u8; 4];
145| 1| let result = extract_exif_data(state, &buf);
146| 1| assert!(result.is_err());
147| 1| }
148| |
149| | #[test]
150| 1| fn heif_clear_and_skip_when_exif_past_eof() {
151| | // exif.heic's meta box occupies bytes 0x24..0xE1E. Truncating just
152| | // past the meta box leaves the ftyp + meta intact while cutting the
153| | // exif payload that lives further into the file — exactly the
154| | // ClearAndSkip path on lines 29-31.
155| 1| let buf = read_sample("exif.heic").unwrap();
156| 1| let cut = 0xE1E.min(buf.len());
157| 1| let truncated = &buf[..cut];
158| 1| let err = extract_exif_data(None, truncated).expect_err("expected ClearAndSkip");
159| 1| assert!(matches!(err.err, ParsingError::ClearAndSkip(_)));
^0
160| 1| assert!(matches!(err.state, Some(ParsingState::HeifExifSize(_))));
^0
161| 1| }
162| |
163| | #[test]
164| 1| fn heif_bad_ftyp_fails() {
165| | // Build a syntactically valid box whose type is NOT "ftyp", so
166| | // BoxHolder::parse succeeds but parse_meta_box hits the explicit
167| | // `fail()` on line 62.
168| 1| let mut buf = Vec::new();
169| 1| buf.extend_from_slice(&16u32.to_be_bytes()); // box size = 16
170| 1| buf.extend_from_slice(b"mdat"); // box type, deliberately not ftyp
171| 1| buf.extend_from_slice(&[0u8; 8]); // 8 bytes of body to satisfy take(16)
172| 1| let result = parse_meta_box(&buf);
173| 1| assert!(result.is_err(), "non-ftyp lead box must error");
174| 1| }
175| |
176| | #[test]
177| 1| fn heif_extract_no_meta_returns_none() {
178| | // ftyp present but no meta box afterward — drives
179| | // `extract_exif_data` through the `meta is None` arm on line 42.
180| 1| let buf = ftyp_with_tail(&[]);
181| 1| let (data, state) = extract_exif_data(None, &buf).unwrap();
182| 1| assert!(data.is_none());
183| 1| assert!(state.is_none());
184| 1| }
185| |
186| | #[test]
187| 1| fn heif_meta_box_not_found() {
188| | // ftyp present but no meta box afterward — covers lines 66-67.
189| 1| let buf = ftyp_with_tail(&[]);
190| 1| let (_, meta) = parse_meta_box(&buf).unwrap();
191| 1| assert!(meta.is_none());
192| 1| }
193| |
194| | #[test]
195| | #[should_panic(expected = "unexpected parsing state for heif")]
196| 1| fn heif_unexpected_state_panics() {
197| | // Pass a state that isn't HeifExifSize — covers the `_ =>` arm
198| | // (lines 45-50). extract_exif_data returns Err with the message
199| | // "unexpected parsing state for heif"; .unwrap() panics on it.
200| 1| let state = Some(ParsingState::Cr3ExifSize(10));
201| 1| let buf = vec![0u8; 32];
202| 1| let _ = extract_exif_data(state, &buf).unwrap();
203| 1| }
204| |}
/home/min/dev/nom-exif/src/image_metadata.rs:
1| |//! Structured image-metadata view returned by
2| |//! `MediaParser::parse_image_metadata` (lands in phase 4).
3| |//!
4| |//! See [`ImageMetadata`].
5| |//!
6| |//! # Example: lazy and eager forms compose
7| |//!
8| |//! ```rust
9| |//! use nom_exif::{ImageMetadata, ImageFormatMetadata, ExifIter, Exif};
10| |//!
11| |//! // Default form (eager — type parameter defaults to Exif).
12| |//! let _eager_default: ImageMetadata = ImageMetadata::default();
13| |//!
14| |//! // Explicit lazy form.
15| |//! let lazy: ImageMetadata<ExifIter> = ImageMetadata {
16| |//! exif: None,
17| |//! format: None,
18| |//! };
19| |//!
20| |//! // Lazy → eager conversion via From.
21| |//! let _eager: ImageMetadata<Exif> = lazy.into();
22| |//! ```
23| |
24| |use crate::exif::png_text::PngTextChunks;
25| |
26| |mod sealed {
27| | pub trait Sealed {}
28| |}
29| |
30| |/// Marker trait for the two valid "EXIF representations" held by
31| |/// [`ImageMetadata`]: [`Exif`](crate::Exif) (eager) and [`ExifIter`](crate::ExifIter)
32| |/// (lazy). Sealed — users cannot add their own implementations.
33| |pub trait ExifRepr: sealed::Sealed {}
34| |
35| |impl sealed::Sealed for crate::Exif {}
36| |impl ExifRepr for crate::Exif {}
37| |
38| |impl sealed::Sealed for crate::ExifIter {}
39| |impl ExifRepr for crate::ExifIter {}
40| |
41| |/// Structured image-metadata view: EXIF (if any) plus format-specific
42| |/// metadata (if any).
43| |///
44| |/// Default `E = Exif` — eager EXIF representation. The
45| |/// `MediaParser::parse_image_metadata` method returns
46| |/// `ImageMetadata<ExifIter>` (lazy); convert to the default eager form
47| |/// via `.into()` when desired.
48| |///
49| |/// **Forward-compat note**: this struct is shaped to be reused
50| |/// unchanged by a future v4 redesign of the [`Metadata`](crate::Metadata)
51| |/// enum (`Metadata::Image(ImageMetadata)`).
52| |#[derive(Debug, Clone)]
53| |#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
54| |pub struct ImageMetadata<E: ExifRepr = crate::Exif> {
55| | /// EXIF tags found in the source image, if any. For PNG, this
56| | /// includes legacy `Raw profile type {exif,APP1}` hex-encoded
57| | /// EXIF transparently merged.
58| | pub exif: Option<E>,
59| |
60| | /// Format-specific metadata that does not fit the EXIF/IFD
61| | /// abstraction (e.g. PNG `tEXt` chunks).
62| | pub format: Option<ImageFormatMetadata>,
63| |}
64| |
65| |impl<E: ExifRepr> Default for ImageMetadata<E> {
66| 2| fn default() -> Self {
67| 2| ImageMetadata {
68| 2| exif: None,
69| 2| format: None,
70| 2| }
71| 2| }
72| |}
73| |
74| |impl From<ImageMetadata<crate::ExifIter>> for ImageMetadata<crate::Exif> {
75| 2| fn from(m: ImageMetadata<crate::ExifIter>) -> Self {
76| 2| ImageMetadata {
77| 2| exif: m.exif.map(Into::into),
78| 2| format: m.format,
79| 2| }
80| 2| }
81| |}
82| |
83| |/// Format-specific image metadata. One variant per format that has
84| |/// metadata not expressible as EXIF tags.
85| |///
86| |/// Marked `#[non_exhaustive]` so future formats can be added
87| |/// (`Gif(...)`, `Webp(...)` etc.) without breaking exhaustive `match`
88| |/// statements in user code.
89| |#[derive(Debug, Clone)]
90| |#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
91| |#[non_exhaustive]
92| |pub enum ImageFormatMetadata {
93| | /// PNG `tEXt` chunks. Latin-1 key/value pairs in file order.
94| | Png(PngTextChunks),
95| |}
96| |
97| |#[cfg(test)]
98| |mod tests {
99| | use super::*;
100| |
101| | #[test]
102| 1| fn default_is_empty() {
103| 1| let m: ImageMetadata = ImageMetadata::default();
104| 1| assert!(m.exif.is_none());
105| 1| assert!(m.format.is_none());
106| 1| }
107| |
108| | #[test]
109| 1| fn generic_explicit_lazy_form() {
110| | // ImageMetadata<ExifIter> compiles and is constructible.
111| 1| let m: ImageMetadata<crate::ExifIter> = ImageMetadata {
112| 1| exif: None,
113| 1| format: None,
114| 1| };
115| 1| assert!(m.exif.is_none());
116| 1| }
117| |
118| | #[test]
119| 1| fn from_lazy_to_eager_compiles() {
120| | // We can't easily construct an ExifIter here; just verify the
121| | // type-level conversion exists by going through Default.
122| 1| let lazy: ImageMetadata<crate::ExifIter> = ImageMetadata::default();
123| 1| let _eager: ImageMetadata<crate::Exif> = lazy.into();
124| 1| }
125| |
126| | #[test]
127| 1| fn format_metadata_png_variant() {
128| 1| let png_text = PngTextChunks::default();
129| 1| let fm = ImageFormatMetadata::Png(png_text);
130| 1| match fm {
131| 1| ImageFormatMetadata::Png(t) => assert!(t.is_empty()),
132| | }
133| 1| }
134| |}
/home/min/dev/nom-exif/src/jpeg.rs:
1| |use std::io::{Read, Seek};
2| |
3| |use nom::{bytes::streaming, combinator::fail, number, IResult, Parser};
4| |
5| |use crate::error::MalformedKind;
6| |use crate::exif::check_exif_header;
7| |
8| |/// XMP APP1 segment payload prefix (29 bytes including the trailing NUL).
9| |const XMP_NS_HEADER: &[u8] = b"http://ns.adobe.com/xap/1.0/\x00";
10| |
11| |/// Outcome of scanning a JPEG buffer for a Pixel/Google Motion Photo
12| |/// signal.
13| |pub(crate) enum MotionPhotoScan {
14| | /// Saw `GCamera:MotionPhoto="1"` (or `GCamera:MicroVideo="1"`) with a
15| | /// trailer-length attribute. The MP4 trailer starts at
16| | /// `file_size - N`.
17| | Found(u64),
18| | /// Walked far enough to be sure no Motion Photo signal is present
19| | /// (e.g. reached the SOS marker, or hit a malformed segment).
20| | NotPresent,
21| | /// The buffer ended mid-walk before reaching SOS — caller should
22| | /// load more bytes and retry from the start.
23| | NeedMoreBytes,
24| |}
25| |
26| |/// Scan a JPEG buffer for a Pixel/Google Motion Photo signal.
27| |///
28| |/// Walks JPEG markers up to SOS, looking for an APP1 XMP segment that
29| |/// contains `GCamera:MotionPhoto="1"` together with a
30| |/// `GCamera:MotionPhotoOffset="N"` attribute (or the older
31| |/// `MicroVideo` / `MicroVideoOffset` pair). Returns
32| |/// [`MotionPhotoScan::Found(N)`] when both are present, where `N` is the
33| |/// trailer length in bytes.
34| |///
35| |/// The 3-state result lets callers distinguish "definitively no
36| |/// trailer" (NotPresent — the scanner reached SOS or a malformed marker)
37| |/// from "ran out of buffer" (NeedMoreBytes — the answer is unknown until
38| |/// more bytes are loaded).
39| 1.42k|pub(crate) fn scan_motion_photo(input: &[u8]) -> MotionPhotoScan {
40| 1.42k| let mut remain = input;
41| | loop {
42| 7.99k| let parsed: IResult<&[u8], (&[u8], u8)> =
43| 7.99k| (streaming::tag(&[0xFF_u8][..]), number::streaming::u8).parse(remain);
44| 7.98k| let (rem, (_, code)) = match parsed {
^9
45| 7.98k| Ok(t) => t,
46| 3| Err(nom::Err::Incomplete(_)) => return MotionPhotoScan::NeedMoreBytes,
47| 6| Err(_) => return MotionPhotoScan::NotPresent,
48| | };
49| 7.98k| let (rem, segment) = match parse_segment(code, rem) {
^6.59k^6.59k
50| 6.59k| Ok(t) => t,
51| 1.39k| Err(nom::Err::Incomplete(_)) => return MotionPhotoScan::NeedMoreBytes,
52| 1| Err(_) => return MotionPhotoScan::NotPresent,
53| | };
54| 6.59k| remain = rem;
55| |
56| 6.59k| if segment.marker_code == MarkerCode::Sos.code() {
57| 18| return MotionPhotoScan::NotPresent;
58| 6.57k| }
59| 6.57k| if segment.marker_code == MarkerCode::APP1.code()
60| 1.42k| && segment.payload.starts_with(XMP_NS_HEADER)
61| | {
62| 7| let xmp = &segment.payload[XMP_NS_HEADER.len()..];
63| 7| if let Some(offset) = parse_motion_photo_offset(xmp) {
64| 7| return MotionPhotoScan::Found(offset);
65| 0| }
66| | // Some files may carry XMP without a Motion Photo signal, or
67| | // split it across multiple APP1 segments — keep walking.
68| 6.56k| }
69| | }
70| 1.42k|}
71| |
72| |/// Convenience wrapper: returns the trailer offset if (and only if) the
73| |/// scan finishes with a definite answer of "found". Used by
74| |/// `parse_track`'s polymorphic JPEG path which always sees the full
75| |/// file in memory and therefore can't get `NeedMoreBytes`.
76| 5|pub(crate) fn find_motion_photo_offset(input: &[u8]) -> Option<u64> {
77| 5| match scan_motion_photo(input) {
78| 2| MotionPhotoScan::Found(n) => Some(n),
79| 3| MotionPhotoScan::NotPresent | MotionPhotoScan::NeedMoreBytes => None,
80| | }
81| 5|}
82| |
83| |/// Parse a Motion Photo trailer length from an XMP packet body.
84| |///
85| |/// Pixel cameras have used three layouts over time; this function tries
86| |/// them in order:
87| |///
88| |/// 1. **Adobe XMP Container directory** (modern Pixel, including Ultra
89| |/// HDR Motion Photos). The XMP carries a `<Container:Directory>`
90| |/// with an item whose `Item:Mime="video/mp4"` and
91| |/// `Item:Semantic="MotionPhoto"`; trailer length is the sum of
92| |/// `Item:Length` (+ optional `Item:Padding`) for that item plus all
93| |/// items after it in directory order.
94| |/// 2. **`GCamera:MotionPhotoOffset`** attribute (older Pixel
95| |/// `PXL_*.MP.jpg`).
96| |/// 3. **`GCamera:MicroVideoOffset`** attribute (pre-2018 Pixel
97| |/// `MVIMG_*.jpg`).
98| |///
99| |/// Requires `GCamera:MotionPhoto="1"` or `GCamera:MicroVideo="1"` as a
100| |/// gate so plain Ultra HDR JPEGs (Container directory present, no
101| |/// motion photo) don't false-positive.
102| 17|fn parse_motion_photo_offset(xmp: &[u8]) -> Option<u64> {
103| 17| let has_motion_photo = contains_attr_eq(xmp, b"GCamera:MotionPhoto", b"1")
104| 3| || contains_attr_eq(xmp, b"GCamera:MicroVideo", b"1");
105| 17| if !has_motion_photo {
106| 1| return None;
107| 16| }
108| 16| container_motion_photo_offset(xmp).or_else(|| {
^12
109| 12| extract_attr_value(xmp, b"GCamera:MotionPhotoOffset")
110| 12| .or_else(|| extract_attr_value(xmp, b"GCamera:MicroVideoOffset"))
^3 ^3 ^3
111| 12| .and_then(|s| std::str::from_utf8(s).ok()?.parse::<u64>().ok())
^10 ^10^10 ^0^10 ^10
112| 12| })
113| 17|}
114| |
115| |/// Walk `<Container:Directory>` and return the trailer length of the
116| |/// `MotionPhoto` item: its `Item:Length` plus optional `Item:Padding`,
117| |/// plus the same for every item that follows it in directory order.
118| |///
119| |/// Returns `None` if no Container directory is present or if no item
120| |/// matches the MotionPhoto signature.
121| 16|fn container_motion_photo_offset(xmp: &[u8]) -> Option<u64> {
122| 16| let dir_start = memchr_subslice(xmp, b"<Container:Directory")?;
^5 ^11
123| 5| let dir_end_rel = memchr_subslice(&xmp[dir_start..], b"</Container:Directory>")?;
^0
124| 5| let dir = &xmp[dir_start..dir_start + dir_end_rel];
125| |
126| | // Collect every <Container:Item ...> tag in directory order.
127| 5| let mut items: Vec<&[u8]> = Vec::new();
128| 5| let mut pos = 0;
129| 12| while let Some(idx) = memchr_subslice(&dir[pos..], b"<Container:Item") {
^7
130| 7| let abs = pos + idx;
131| 432| let tag_end_rel = dir[abs..].iter().position(|&b| b == b'>')?;
^7 ^7 ^7 ^0
132| 7| items.push(&dir[abs..abs + tag_end_rel]);
133| 7| pos = abs + tag_end_rel;
134| | }
135| |
136| 5| let mp_idx = items.iter().position(|tag| {
^4
137| 5| extract_attr_value(tag, b"Item:Semantic") == Some(&b"MotionPhoto"[..])
138| 2| || extract_attr_value(tag, b"Item:Mime") == Some(&b"video/mp4"[..])
139| 5| })?;
^1
140| |
141| | // Each item's `Item:Padding` is the gap between this item and the
142| | // next one in the container; the last item's padding is therefore
143| | // not part of the file (the Galaxy-1 sample has Length=4299299
144| | // Padding=80 as the last item, and 80 zero-bytes after the MP4
145| | // would push the offset past EOF). Sum all Lengths in [mp_idx..],
146| | // plus Padding only for the non-final entries.
147| 4| let mut total: u64 = 0;
148| 4| let last = items.len() - 1;
149| 6| for (i, tag) in items.iter().enumerate().skip(mp_idx) {
^4 ^4 ^4 ^4
150| 6| let length = extract_attr_value(tag, b"Item:Length")
151| 6| .and_then(|s| std::str::from_utf8(s).ok()?.parse::<u64>().ok())?;
^0 ^0
152| 6| total = total.checked_add(length)?;
^0
153| 6| if i != last {
154| 2| let padding = extract_attr_value(tag, b"Item:Padding")
155| 2| .and_then(|s| std::str::from_utf8(s).ok()?.parse::<u64>().ok())
^0
156| 2| .unwrap_or(0);
157| 2| total = total.checked_add(padding)?;
^0
158| 4| }
159| | }
160| 4| Some(total)
161| 16|}
162| |
163| |/// True if `xmp` contains an attribute `name="value"`.
164| 23|fn contains_attr_eq(xmp: &[u8], name: &[u8], value: &[u8]) -> bool {
165| 23| let needle = [name, b"=\"", value, b"\""].concat();
166| 23| memchr_subslice(xmp, &needle).is_some()
167| 23|}
168| |
169| |/// Extract the quoted value of an attribute named `name`, if present.
170| 33|fn extract_attr_value<'a>(xmp: &'a [u8], name: &[u8]) -> Option<&'a [u8]> {
171| 33| let prefix = [name, b"=\""].concat();
172| 33| let start = memchr_subslice(xmp, &prefix)? + prefix.len();
^25 ^8 ^25 ^25
173| 151| let end = xmp[start..].iter().position(|&b| b == b'"')?;
^24 ^25 ^25 ^1
174| 24| Some(&xmp[start..start + end])
175| 33|}
176| |
177| 94|fn memchr_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
178| 94| if needle.is_empty() || haystack.len() < needle.len() {
^93 ^93
179| 8| return None;
180| 86| }
181| 7.61k| haystack.windows(needle.len()).position(|w| w == needle)
^86 ^86 ^86
182| 94|}
183| |
184| |/// Extract Exif TIFF data from the bytes of a JPEG file.
185| 135|pub(crate) fn extract_exif_data(input: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
186| 135| let (remain, segment) = find_exif_segment(input)?;
^83 ^83 ^52
187| 83| let data = segment.and_then(|segment| {
^78
188| 78| if segment.payload_len() <= 6 {
189| 1| None
190| | } else {
191| 77| Some(&segment.payload[6..]) // Safe-slice
192| | }
193| 78| });
194| 83| Ok((remain, data))
195| 135|}
196| |
197| |struct Segment<'a> {
198| | marker_code: u8,
199| | payload: &'a [u8],
200| |}
201| |
202| |impl Segment<'_> {
203| 80| pub fn payload_len(&self) -> usize {
204| 80| self.payload.len()
205| 80| }
206| |}
207| |
208| 137|fn find_exif_segment(input: &[u8]) -> IResult<&[u8], Option<Segment<'_>>> {
209| 137| let mut remain = input;
210| |
211| 85| let (remain, segment) = loop {
212| 316| let (rem, (_, code)) =
213| 316| (streaming::tag(&[0xFF_u8][..]), number::streaming::u8).parse(remain)?;
^0
214| 316| let (rem, segment) = parse_segment(code, rem)?;
^264 ^264 ^52
215| | // Sanity check
216| 264| assert!(rem.len() < remain.len());
217| 264| remain = rem;
218| 264| tracing::debug!(
219| 0| marker = format!("0x{:04x}", segment.marker_code),
220| 0| size = format!("0x{:04x}", segment.payload.len()),
221| | "got segment"
222| | );
223| |
224| 264| let s = &segment;
225| 264| if (s.marker_code == MarkerCode::APP1.code() && check_exif_header(s.payload)?)
^79 ^79 ^0
226| 185| || s.marker_code == MarkerCode::Sos.code()
227| | // searching stop at SOS
228| | {
229| 85| break (remain, segment);
230| 179| }
231| | };
232| |
233| 85| if segment.marker_code != MarkerCode::Sos.code() {
234| 79| Ok((remain, Some(segment)))
235| | } else {
236| 6| Ok((remain, None))
237| | }
238| 137|}
239| |
240| 113|pub fn check_jpeg(input: &[u8]) -> crate::Result<()> {
241| | // check soi marker [0xff, 0xd8]
242| 113| let (_, (_, code)) = (
^94
243| 113| nom::bytes::complete::tag(&[0xFF_u8][..]),
244| 113| number::complete::u8,
245| 113| )
246| 113| .parse(input)
247| 113| .map_err(|e| {
^19
248| 19| crate::error::nom_err_to_malformed(e, crate::error::MalformedKind::JpegSegment)
249| 19| })?;
250| |
251| | // SOI has no payload
252| 94| if code != MarkerCode::Soi.code() {
253| 1| return Err(crate::Error::Malformed {
254| 1| kind: crate::error::MalformedKind::JpegSegment,
255| 1| message: "SOI marker not found".into(),
256| 1| });
257| 93| }
258| |
259| | // check next marker [0xff, *]
260| 93| let (_, (_, _)) = (
261| 93| nom::bytes::complete::tag(&[0xFF_u8][..]),
262| 93| number::complete::u8,
263| 93| )
264| 93| .parse(input)
265| 93| .map_err(|e| {
^0
266| 0| crate::error::nom_err_to_malformed(e, crate::error::MalformedKind::JpegSegment)
267| 0| })?;
268| 93| Ok(())
269| 113|}
270| |
271| 8.29k|fn parse_segment(marker_code: u8, input: &[u8]) -> IResult<&[u8], Segment<'_>> {
272| 8.29k| let remain = input;
273| |
274| | // SOI has no payload
275| 8.29k| if marker_code == MarkerCode::Soi.code() {
276| 1.55k| Ok((
277| 1.55k| remain,
278| 1.55k| Segment {
279| 1.55k| marker_code,
280| 1.55k| payload: b"",
281| 1.55k| },
282| 1.55k| ))
283| | } else {
284| 6.74k| let (remain, size) = number::streaming::be_u16(remain)?;
^6.73k ^6.73k ^3
285| 6.73k| if size < 2 {
286| 2| return fail().parse(remain);
287| 6.73k| }
288| | // size contains the two bytes of `size` itself
289| 6.73k| let (remain, data) = streaming::take(size - 2).parse(remain)?;
^5.29k ^5.29k ^1.43k
290| 5.29k| Ok((
291| 5.29k| remain,
292| 5.29k| Segment {
293| 5.29k| marker_code,
294| 5.29k| payload: data,
295| 5.29k| },
296| 5.29k| ))
297| | }
298| 8.29k|}
299| |
300| |/// Read all image data after the first SOS marker & before EOI marker.
301| |///
302| |/// The returned data might include several other SOS markers if the image is a
303| |/// progressive JPEG.
304| |#[allow(dead_code)]
305| 2|fn read_image_data<T: Read + Seek>(mut reader: T) -> crate::Result<Vec<u8>> {
306| 2| let mut header = [0u8; 2];
307| | loop {
308| 22| reader.read_exact(&mut header)?;
^0
309| 22| let (tag, marker) = (header[0], header[1]);
310| 22| if tag != 0xFF {
311| 0| return Err(crate::Error::Malformed {
312| 0| kind: MalformedKind::JpegSegment,
313| 0| message: "expected 0xFF marker prefix".to_string(),
314| 0| });
315| 22| }
316| |
317| 22| if marker == MarkerCode::Soi.code() {
318| | // SOI has no body
319| 2| continue;
320| 20| }
321| 20| if marker == MarkerCode::Eoi.code() {
322| 0| return Err(crate::Error::ExifNotFound);
323| 20| }
324| |
325| 20| if marker == MarkerCode::Sos.code() {
326| | // found it
327| 2| let mut data = Vec::new();
328| 2| reader.read_to_end(&mut data)?;
^0
329| |
330| | // remove tail data
331| 461| while let Some(tail) = data.pop() {
332| 461| if tail == MarkerCode::Eoi.code() {
333| 2| if let Some(tail) = data.pop() {
334| 2| if tail == 0xFF {
335| | // EOI marker has been popped
336| 2| break;
337| 0| }
338| 0| }
339| 459| }
340| | }
341| 2| return Ok(data);
342| | } else {
343| | // skip other markers
344| 18| reader.read_exact(&mut header)?;
^0
345| 18| let len = u16::from_be_bytes([header[0], header[1]]);
346| 18| reader.seek(std::io::SeekFrom::Current(len as i64 - 2))?;
^0
347| | }
348| | }
349| 2|}
350| |
351| |/// A marker code is a byte following 0xFF that indicates the kind of marker.
352| |enum MarkerCode {
353| | // Start of Image
354| | Soi = 0xD8,
355| |
356| | // APP1 marker
357| | APP1 = 0xE1,
358| |
359| | // Start of Scan
360| | Sos = 0xDA,
361| |
362| | // End of Image
363| | Eoi = 0xD9,
364| |}
365| |
366| |impl MarkerCode {
367| 22.6k| fn code(self) -> u8 {
368| 22.6k| self as u8
369| 22.6k| }
370| |}
371| |
372| |#[cfg(test)]
373| |mod tests {
374| | use super::*;
375| | use crate::testkit::*;
376| | use test_case::test_case;
377| |
378| | #[test_case("exif.jpg", true)]
379| | #[test_case("broken.jpg", true)]
380| | #[test_case("no-exif.jpg", false)]
381| 3| fn test_check_jpeg(path: &str, has_exif: bool) {
382| 3| let data = read_sample(path).unwrap();
383| 3| check_jpeg(&data).unwrap();
384| 3| let (_, data) = extract_exif_data(&data).unwrap();
385| 3| if has_exif {
386| 2| data.unwrap();
387| 2| }
^1
388| 3| }
389| |
390| | #[test_case("no-exif.jpg", 0)]
391| | #[test_case("exif.jpg", 0x4569-2)]
392| 2| fn jpeg_find_exif(path: &str, exif_size: usize) {
393| 2| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
394| |
395| 2| let buf = read_sample(path).unwrap();
396| 2| let (_, segment) = find_exif_segment(&buf[..]).unwrap();
397| |
398| 2| if exif_size == 0 {
399| 1| assert!(segment.is_none());
400| | } else {
401| 1| assert_eq!(segment.unwrap().payload_len(), exif_size);
402| | }
403| 2| }
404| |
405| | #[test_case("no-exif.jpg", 0)]
406| | #[test_case("exif.jpg", 0x4569-8)]
407| 2| fn jpeg_exif_data(path: &str, exif_size: usize) {
408| 2| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
409| |
410| 2| let buf = read_sample(path).unwrap();
411| 2| let (_, exif) = extract_exif_data(&buf[..]).unwrap();
412| |
413| 2| if exif_size == 0 {
414| 1| assert!(exif.is_none());
415| | } else {
416| 1| assert_eq!(exif.unwrap().len(), exif_size);
417| | }
418| 2| }
419| |
420| | #[test_case("no-exif.jpg", 4089704, 0x000c0301, 0xb3b3e43f)]
421| | #[test_case("exif.jpg", 3564768, 0x000c0301, 0x84a297a9)]
422| 2| fn jpeg_image_data(path: &str, len: usize, start: u32, end: u32) {
423| 2| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
424| |
425| 2| let f = open_sample(path).unwrap();
426| 2| let data = read_image_data(f).unwrap();
427| 2| assert_eq!(data.len(), len);
428| 2| assert_eq!(u32::from_be_bytes(data[..4].try_into().unwrap()), start); // Safe-slice in test_case
429| 2| assert_eq!(
430| 2| u32::from_be_bytes(data[data.len() - 4..].try_into().unwrap()), // Safe-slice in test_case
431| | end
432| | );
433| 2| }
434| |
435| | #[test]
436| 1| fn memchr_subslice_empty_needle_returns_none() {
437| 1| assert_eq!(memchr_subslice(b"hello", b""), None);
438| 1| }
439| |
440| | #[test]
441| 1| fn memchr_subslice_needle_longer_than_haystack() {
442| 1| assert_eq!(memchr_subslice(b"ab", b"abcdef"), None);
443| 1| }
444| |
445| | #[test]
446| 1| fn memchr_subslice_no_match() {
447| 1| assert_eq!(memchr_subslice(b"hello", b"xyz"), None);
448| 1| }
449| |
450| | #[test]
451| 1| fn memchr_subslice_finds_first_match() {
452| 1| assert_eq!(memchr_subslice(b"hello world hello", b"hello"), Some(0));
453| 1| assert_eq!(memchr_subslice(b"xx hello world", b"hello"), Some(3));
454| 1| }
455| |
456| | #[test]
457| 1| fn extract_attr_value_not_found() {
458| 1| assert_eq!(extract_attr_value(b"key=\"val\"", b"Missing"), None);
459| 1| }
460| |
461| | #[test]
462| 1| fn extract_attr_value_unclosed_quote() {
463| 1| assert_eq!(extract_attr_value(b"key=\"val", b"key"), None);
464| 1| }
465| |
466| | #[test]
467| 1| fn extract_attr_value_found() {
468| 1| assert_eq!(
469| 1| extract_attr_value(b"tag key=\"hello\" rest", b"key"),
470| 1| Some(&b"hello"[..])
471| | );
472| 1| }
473| |
474| | #[test]
475| 1| fn contains_attr_eq_true() {
476| 1| assert!(contains_attr_eq(
477| 1| b"GCamera:MotionPhoto=\"1\"",
478| 1| b"GCamera:MotionPhoto",
479| 1| b"1"
480| | ));
481| 1| }
482| |
483| | #[test]
484| 1| fn contains_attr_eq_false() {
485| 1| assert!(!contains_attr_eq(
486| 1| b"GCamera:MotionPhoto=\"0\"",
487| 1| b"GCamera:MotionPhoto",
488| 1| b"1"
489| 1| ));
490| 1| }
491| |
492| | #[test]
493| 1| fn contains_attr_eq_missing() {
494| 1| assert!(!contains_attr_eq(b"", b"GCamera:MotionPhoto", b"1"));
495| 1| }
496| |
497| | #[test]
498| 1| fn parse_motion_photo_offset_no_gate_returns_none() {
499| 1| let xmp = b"SomeOther=\"1\" GCamera:MotionPhotoOffset=\"1234\"";
500| 1| assert_eq!(parse_motion_photo_offset(xmp), None);
501| 1| }
502| |
503| | #[test]
504| 1| fn parse_motion_photo_offset_micro_video_gate_and_offset() {
505| 1| let xmp = b"GCamera:MicroVideo=\"1\" GCamera:MicroVideoOffset=\"5678\"";
506| 1| assert_eq!(parse_motion_photo_offset(xmp), Some(5678));
507| 1| }
508| |
509| | #[test]
510| 1| fn parse_motion_photo_offset_fallback_motion_photo_offset() {
511| 1| let xmp = b"GCamera:MotionPhoto=\"1\" GCamera:MotionPhotoOffset=\"9999\"";
512| 1| assert_eq!(parse_motion_photo_offset(xmp), Some(9999));
513| 1| }
514| |
515| | #[test]
516| 1| fn parse_motion_photo_offset_offset_not_a_number() {
517| 1| let xmp = b"GCamera:MotionPhoto=\"1\" GCamera:MotionPhotoOffset=\"not-a-number\"";
518| 1| assert_eq!(parse_motion_photo_offset(xmp), None);
519| 1| }
520| |
521| | #[test]
522| 1| fn container_motion_photo_offset_single_item_semantic() {
523| 1| let xmp = concat!(
524| | "GCamera:MotionPhoto=\"1\"\n",
525| | "<Container:Directory>\n",
526| | " <Container:Item Item:Semantic=\"MotionPhoto\" Item:Length=\"100\"/>\n",
527| | "</Container:Directory>"
528| | );
529| 1| assert_eq!(parse_motion_photo_offset(xmp.as_bytes()), Some(100));
530| 1| }
531| |
532| | #[test]
533| 1| fn container_motion_photo_offset_single_item_mime() {
534| 1| let xmp = concat!(
535| | "GCamera:MotionPhoto=\"1\"\n",
536| | "<Container:Directory>\n",
537| | " <Container:Item Item:Mime=\"video/mp4\" Item:Length=\"500\"/>\n",
538| | "</Container:Directory>"
539| | );
540| 1| assert_eq!(parse_motion_photo_offset(xmp.as_bytes()), Some(500));
541| 1| }
542| |
543| | #[test]
544| 1| fn container_motion_photo_offset_multiple_items_with_padding() {
545| 1| let xmp = concat!(
546| | "GCamera:MotionPhoto=\"1\"\n",
547| | "<Container:Directory>\n",
548| | " <Container:Item Item:Semantic=\"MotionPhoto\" Item:Length=\"100\" Item:Padding=\"50\"/>\n",
549| | " <Container:Item Item:Length=\"200\" Item:Padding=\"75\"/>\n",
550| | " <Container:Item Item:Length=\"300\" Item:Padding=\"999\"/>\n",
551| | "</Container:Directory>"
552| | );
553| 1| assert_eq!(parse_motion_photo_offset(xmp.as_bytes()), Some(725));
554| 1| }
555| |
556| | #[test]
557| 1| fn container_motion_photo_offset_micro_video_gate() {
558| 1| let xmp = concat!(
559| | "GCamera:MicroVideo=\"1\"\n",
560| | "<Container:Directory>\n",
561| | " <Container:Item Item:Semantic=\"MotionPhoto\" Item:Length=\"42\"/>\n",
562| | "</Container:Directory>"
563| | );
564| 1| assert_eq!(parse_motion_photo_offset(xmp.as_bytes()), Some(42));
565| 1| }
566| |
567| | #[test]
568| 1| fn container_motion_photo_offset_no_container_returns_none() {
569| 1| let xmp = b"GCamera:MotionPhoto=\"1\" no container directory";
570| 1| assert_eq!(parse_motion_photo_offset(xmp), None);
571| 1| }
572| |
573| | #[test]
574| 1| fn container_motion_photo_offset_no_motion_photo_item_returns_none() {
575| 1| let xmp = concat!(
576| | "GCamera:MotionPhoto=\"1\"\n",
577| | "<Container:Directory>\n",
578| | " <Container:Item Item:Semantic=\"StillImage\" Item:Length=\"100\"/>\n",
579| | "</Container:Directory>"
580| | );
581| 1| assert_eq!(parse_motion_photo_offset(xmp.as_bytes()), None);
582| 1| }
583| |
584| | #[test]
585| 1| fn scan_motion_photo_truncated_returns_need_more() {
586| 1| assert!(matches!(
^0
587| 1| scan_motion_photo(&[0xFF]),
588| | MotionPhotoScan::NeedMoreBytes
589| | ));
590| 1| }
591| |
592| | #[test]
593| 1| fn scan_motion_photo_empty_buffer_returns_need_more() {
594| 1| assert!(matches!(
^0
595| 1| scan_motion_photo(b""),
596| | MotionPhotoScan::NeedMoreBytes
597| | ));
598| 1| }
599| |
600| | #[test]
601| 1| fn scan_motion_photo_truncated_segment_returns_need_more() {
602| 1| let buf = vec![0xFF, 0xD8, 0xFF, 0xE1, 0x01, 0x00];
603| 1| assert!(matches!(
^0
604| 1| scan_motion_photo(&buf),
605| | MotionPhotoScan::NeedMoreBytes
606| | ));
607| 1| }
608| |
609| | #[test]
610| 1| fn scan_motion_photo_malformed_returns_not_present() {
611| 1| assert!(matches!(
^0
612| 1| scan_motion_photo(&[0x00, 0x00, 0x00, 0x00]),
613| | MotionPhotoScan::NotPresent
614| | ));
615| 1| }
616| |
617| | #[test]
618| 1| fn scan_motion_photo_sos_returns_not_present() {
619| 1| let buf = vec![0xFF, 0xD8, 0xFF, 0xDA, 0x00, 0x02];
620| 1| assert!(matches!(
^0
621| 1| scan_motion_photo(&buf),
622| | MotionPhotoScan::NotPresent
623| | ));
624| 1| }
625| |
626| | #[test]
627| 1| fn scan_motion_photo_app1_without_xmp_header_not_found() {
628| 1| let exif_header = b"Exif\x00\x00";
629| 1| let mut buf = vec![0xFF, 0xD8];
630| 1| let payload_len = exif_header.len() + 4;
631| 1| buf.extend_from_slice(&[0xFF, 0xE1]);
632| 1| buf.extend_from_slice(&(payload_len as u16 + 2).to_be_bytes());
633| 1| buf.extend_from_slice(exif_header);
634| 1| buf.extend_from_slice(b"dummy");
635| 1| buf.extend_from_slice(&[0xFF, 0xDA, 0x00, 0x02]);
636| 1| assert!(matches!(
^0
637| 1| scan_motion_photo(&buf),
638| | MotionPhotoScan::NotPresent
639| | ));
640| 1| }
641| |
642| | #[test]
643| 1| fn scan_motion_photo_finds_offset() {
644| 1| let xmp_payload = b"http://ns.adobe.com/xap/1.0/\x00GCamera:MotionPhoto=\"1\" GCamera:MotionPhotoOffset=\"777\"";
645| 1| let mut buf = vec![0xFF, 0xD8];
646| 1| buf.extend_from_slice(&[0xFF, 0xE1]);
647| 1| buf.extend_from_slice(&(xmp_payload.len() as u16 + 2).to_be_bytes());
648| 1| buf.extend_from_slice(xmp_payload);
649| 1| buf.extend_from_slice(&[0xFF, 0xDA, 0x00, 0x02]);
650| 1| assert!(matches!(
^0
651| 1| scan_motion_photo(&buf),
652| | MotionPhotoScan::Found(777)
653| | ));
654| 1| }
655| |
656| | #[test]
657| 1| fn find_motion_photo_offset_found() {
658| 1| let xmp = b"http://ns.adobe.com/xap/1.0/\x00GCamera:MotionPhoto=\"1\" GCamera:MotionPhotoOffset=\"888\"";
659| 1| let mut buf = vec![0xFF, 0xD8];
660| 1| buf.extend_from_slice(&[0xFF, 0xE1]);
661| 1| buf.extend_from_slice(&(xmp.len() as u16 + 2).to_be_bytes());
662| 1| buf.extend_from_slice(xmp);
663| 1| buf.extend_from_slice(&[0xFF, 0xDA, 0x00, 0x02]);
664| 1| assert_eq!(find_motion_photo_offset(&buf), Some(888));
665| 1| }
666| |
667| | #[test]
668| 1| fn find_motion_photo_offset_not_present() {
669| 1| assert_eq!(find_motion_photo_offset(b""), None);
670| 1| }
671| |
672| | #[test]
673| 1| fn parse_segment_size_too_small() {
674| 1| let result = parse_segment(0xE1, &[0x00, 0x01]);
675| 1| assert!(result.is_err());
676| 1| }
677| |
678| | #[test]
679| 1| fn parse_segment_soi_returns_empty_payload() {
680| 1| let (_, segment) = parse_segment(0xD8, b"dummy").unwrap();
681| 1| assert_eq!(segment.payload_len(), 0);
682| 1| }
683| |
684| | #[test]
685| 1| fn extract_exif_data_payload_too_short() {
686| 1| let exif_header = b"Exif\x00\x00";
687| 1| let mut buf = vec![0xFF, 0xD8];
688| 1| buf.extend_from_slice(&[0xFF, 0xE1]);
689| 1| buf.extend_from_slice(&(exif_header.len() as u16 + 2).to_be_bytes());
690| 1| buf.extend_from_slice(exif_header);
691| 1| buf.extend_from_slice(&[0xFF, 0xDA, 0x00, 0x02]);
692| 1| let (_, data) = extract_exif_data(&buf).unwrap();
693| 1| assert!(data.is_none());
694| 1| }
695| |
696| | #[test]
697| 1| fn check_jpeg_empty_input() {
698| 1| assert!(check_jpeg(b"").is_err());
699| 1| }
700| |
701| | #[test]
702| 1| fn check_jpeg_not_ff_first_byte() {
703| 1| assert!(check_jpeg(b"\x00\x00").is_err());
704| 1| }
705| |
706| | #[test]
707| 1| fn check_jpeg_not_soi() {
708| 1| assert!(check_jpeg(&[0xFF, 0xD9]).is_err());
709| 1| }
710| |
711| | #[test]
712| 1| fn scan_motion_photo_parsed_segment_error_returns_not_present() {
713| 1| let mut buf = vec![0xFF, 0xD8, 0xFF, 0xE1];
714| 1| buf.extend_from_slice(&0u16.to_be_bytes());
715| 1| buf.extend_from_slice(b"short");
716| 1| assert!(matches!(
^0
717| 1| scan_motion_photo(&buf),
718| | MotionPhotoScan::NotPresent
719| | ));
720| 1| }
721| |}
/home/min/dev/nom-exif/src/lib.rs:
1| |//! `nom-exif` is a pure Rust library for **both image EXIF and
2| |//! video / audio track metadata** through a single unified API.
3| |//!
4| |//! # Highlights
5| |//!
6| |//! - Pure Rust — no FFmpeg, no libexif, no system deps; cross-compiles
7| |//! cleanly.
8| |//! - Image **and** video / audio in one crate — [`MediaParser`] dispatches
9| |//! to the right backend by detected MIME, no per-format wrappers.
10| |//! - RAW format support — Canon CR3, Fujifilm RAF, Phase One IIQ,
11| |//! alongside JPEG / HEIC / AVIF / PNG / TIFF.
12| |//! - **Motion Photo** support — Pixel and Samsung Motion Photos (JPEG
13| |//! with an embedded MP4) are detected automatically; `parse_track`
14| |//! extracts the embedded video's track metadata.
15| |//! - Three input modes — files, arbitrary `Read` / `Read + Seek`
16| |//! (network streams, pipes), or in-RAM bytes (WASM, mobile, HTTP
17| |//! proxies).
18| |//! - Sync and async unified under one [`MediaParser`].
19| |//! - Eager ([`Exif`], get-by-tag) or lazy ([`ExifIter`], parse-on-demand)
20| |//! — per-entry errors surface in both modes ([`Exif::errors`] /
21| |//! per-iter `Result`), so one bad tag doesn't poison the parse.
22| |//! - Allocation-frugal — parser buffer is recycled across calls;
23| |//! sub-IFDs share the same allocation (no deep copies).
24| |//! - Fuzz-tested with `cargo-fuzz` against malformed and adversarial input.
25| |//!
26| |//! # Quick start
27| |//!
28| |//! For a one-shot read, use the helpers:
29| |//!
30| |//! ```rust
31| |//! use nom_exif::{read_exif, ExifTag};
32| |//!
33| |//! let exif = read_exif("./testdata/exif.jpg")?;
34| |//! let make = exif.get(ExifTag::Make).and_then(|v| v.as_str());
35| |//! assert_eq!(make, Some("vivo"));
36| |//! # Ok::<(), nom_exif::Error>(())
37| |//! ```
38| |//!
39| |//! For batch processing, build a [`MediaParser`] once and reuse its
40| |//! buffer:
41| |//!
42| |//! ```rust
43| |//! use nom_exif::{MediaKind, MediaParser, MediaSource};
44| |//!
45| |//! let mut parser = MediaParser::new();
46| |//! for path in ["./testdata/exif.jpg", "./testdata/meta.mov"] {
47| |//! let ms = MediaSource::open(path)?;
48| |//! match ms.kind() {
49| |//! MediaKind::Image => { let _ = parser.parse_exif(ms)?; }
50| |//! MediaKind::Track => { let _ = parser.parse_track(ms)?; }
51| |//! }
52| |//! }
53| |//! # Ok::<(), nom_exif::Error>(())
54| |//! ```
55| |//!
56| |//! Async variants live behind `feature = "tokio"`:
57| |//! [`read_exif_async`], [`read_track_async`], [`read_metadata_async`],
58| |//! plus [`MediaParser::parse_exif_async`] / [`MediaParser::parse_track_async`].
59| |//!
60| |//! # Motion Photos (embedded media tracks)
61| |//!
62| |//! Some images embed a media track that `parse_exif` doesn't surface —
63| |//! most commonly **Pixel/Google Motion Photo** JPEGs, which carry a short
64| |//! MP4 video appended after the JPEG image data. The
65| |//! [`Exif::has_embedded_track`] / [`ExifIter::has_embedded_track`] flags
66| |//! are set by `parse_exif` when it observes a concrete content signal
67| |//! (e.g. the `GCamera:MotionPhoto="1"` XMP attribute). When the flag is
68| |//! `true`, call [`MediaParser::parse_track`] on the same source to
69| |//! extract the embedded MP4's metadata — `parse_track` automatically
70| |//! locates and parses the trailer.
71| |//!
72| |//! ```no_run
73| |//! use nom_exif::{MediaParser, MediaSource};
74| |//! let mut parser = MediaParser::new();
75| |//! let path = "PXL_20240101_120000000.MP.jpg";
76| |//! let iter = parser.parse_exif(MediaSource::open(path)?)?;
77| |//! if iter.has_embedded_track() {
78| |//! // Re-open: MediaSource is consumed by parse_exif.
79| |//! let track = parser.parse_track(MediaSource::open(path)?)?;
80| |//! // ...
81| |//! }
82| |//! # Ok::<(), nom_exif::Error>(())
83| |//! ```
84| |//!
85| |//! **Coverage**: Pixel/Google Motion Photos and Samsung Galaxy Motion
86| |//! Photos that use the Adobe XMP Container directory format (modern
87| |//! Pixel including Ultra HDR, modern Galaxy JPEGs).
88| |//!
89| |//! # Reading from in-memory bytes
90| |//!
91| |//! When the payload is already in RAM (WASM, mobile, HTTP proxy, decoded
92| |//! response body), use [`MediaSource::from_memory`] to skip the `File` /
93| |//! `Read` round-trip entirely. Memory mode is **zero-copy**: the underlying
94| |//! allocation is shared with the returned [`Exif`] / [`ExifIter`] /
95| |//! [`TrackInfo`] via [`bytes::Bytes`] reference counting.
96| |//!
97| |//! ```rust
98| |//! use nom_exif::{MediaSource, MediaParser, ExifTag};
99| |//!
100| |//! let raw = std::fs::read("./testdata/exif.jpg")?;
101| |//! let ms = MediaSource::from_memory(raw)?;
102| |//! let mut parser = MediaParser::new();
103| |//! let iter = parser.parse_exif(ms)?;
104| |//! let exif: nom_exif::Exif = iter.into();
105| |//! assert_eq!(exif.get(ExifTag::Make).and_then(|v| v.as_str()), Some("vivo"));
106| |//! # Ok::<(), nom_exif::Error>(())
107| |//! ```
108| |//!
109| |//! # Image metadata beyond EXIF
110| |//!
111| |//! Some image formats carry metadata that does not fit the EXIF / IFD
112| |//! model. PNG's `tEXt` chunks are the headline example: arbitrary
113| |//! Latin-1 key/value pairs (`Title`, `Author`, `Comment`, …). For
114| |//! PNG-aware (or future GIF / WebP / JXL extras-aware) callers, use
115| |//! [`MediaParser::parse_image_metadata`]:
116| |//!
117| |//! ```rust
118| |//! use nom_exif::{MediaParser, MediaSource, ImageFormatMetadata};
119| |//!
120| |//! let mut parser = MediaParser::new();
121| |//! let ms = MediaSource::open("./testdata/exif.png")?;
122| |//! let img = parser.parse_image_metadata(ms)?;
123| |//!
124| |//! if let Some(ImageFormatMetadata::Png(text_chunks)) = img.format {
125| |//! let _title = text_chunks.get("Title");
126| |//! }
127| |//! # Ok::<(), nom_exif::Error>(())
128| |//! ```
129| |//!
130| |//! Returns [`ImageMetadata<ExifIter>`](ImageMetadata) (lazy form);
131| |//! convert to the eager `ImageMetadata<Exif>` via `.into()` if
132| |//! needed. Top-level `read_image_metadata` helpers are deferred to
133| |//! v4 alongside the [`Metadata`] enum redesign.
134| |//!
135| |//! # API surface
136| |//!
137| |//! - **One-shot helpers**: [`read_exif`], [`read_exif_iter`], [`read_track`], [`read_metadata`].
138| |//! - **Reusable parser**: [`MediaParser`] + [`MediaSource`] (or [`AsyncMediaSource`])
139| |//! + [`MediaKind`]. Use [`MediaSource::from_memory`] for in-RAM bytes.
140| |//! - **Image metadata**: [`Exif`] (eager, get-by-tag) or [`ExifIter`]
141| |//! (lazy iterator with per-entry errors). Convert: `let exif: Exif = iter.into();`.
142| |//! - **Track metadata**: [`TrackInfo`] (audio/video container metadata).
143| |//! - **Discriminated union**: [`Metadata`] returned by [`read_metadata`].
144| |//! - **Errors**: [`Error`] for parse-level, [`EntryError`] for per-entry
145| |//! IFD errors, [`ConvertError`] for type-conversion peer errors.
146| |//! - **Convenience**: [`prelude`] re-exports the symbols you most often need.
147| |//!
148| |//! See `docs/MIGRATION.md` for the v2 → v3 migration guide and
149| |//! `docs/V3_API_DESIGN.md` for the internal design contract.
150| |//!
151| |//! # Cargo features
152| |//!
153| |//! - `tokio` — async API via tokio (`AsyncMediaSource`, `read_*_async`,
154| |//! `MediaParser::parse_*_async`).
155| |//! - `serde` — derives `Serialize`/`Deserialize` on the public types.
156| |
157| |pub use parser::{MediaKind, MediaParser, MediaSource};
158| |pub use video::{TrackInfo, TrackInfoTag};
159| |
160| |#[cfg(feature = "tokio")]
161| |pub use parser_async::AsyncMediaSource;
162| |
163| |pub use exif::gps::{Altitude, LatRef, LonRef, Speed, SpeedUnit};
164| |pub use exif::png_text::PngTextChunks;
165| |pub use exif::{
166| | Exif, ExifEntry, ExifIter, ExifIterEntry, ExifTag, GPSInfo, IfdIndex, LatLng, TagOrCode,
167| |};
168| |pub use image_metadata::{ExifRepr, ImageFormatMetadata, ImageMetadata};
169| |pub use values::{EntryValue, ExifDateTime, IRational, Rational, URational};
170| |
171| |pub use error::{ConvertError, EntryError, Error, MalformedKind};
172| |
173| |/// Convenient one-line import of the most common v3 symbols.
174| |///
175| |/// ```rust
176| |/// use nom_exif::prelude::*;
177| |/// # fn main() -> Result<()> { Ok(()) }
178| |/// ```
179| |///
180| |/// Includes [`Error`] and [`MalformedKind`] so error-matching code does
181| |/// not need a second import. Cold-path types (e.g. `Rational`,
182| |/// `LatLng`, `ConvertError`, `ExifDateTime`) are intentionally **not**
183| |/// in the prelude — import them explicitly via `nom_exif::Type`.
184| |pub mod prelude {
185| | pub use crate::{read_exif, read_metadata, read_track};
186| | pub use crate::{
187| | EntryValue, Error, Exif, ExifIter, ExifTag, GPSInfo, IfdIndex, MalformedKind, MediaKind,
188| | MediaParser, MediaSource, Metadata, Result, TrackInfo, TrackInfoTag,
189| | };
190| |}
191| |
192| |/// Crate-wide convenience alias for `std::result::Result<T, Error>`.
193| |pub type Result<T> = std::result::Result<T, Error>;
194| |
195| |/// One-shot result of [`read_metadata`]: either Exif (image) or TrackInfo
196| |/// (video/audio). Closed enum — see spec §8.6 for why there's no `Both`
197| |/// variant.
198| |#[derive(Debug, Clone)]
199| |pub enum Metadata {
200| | Exif(Exif),
201| | Track(TrackInfo),
202| |}
203| |
204| |use std::path::Path;
205| |
206| |/// Read EXIF metadata from a file in a single call.
207| |///
208| |/// For batch processing, prefer constructing a [`MediaParser`] once and
209| |/// reusing its parse buffer via [`MediaParser::parse_exif`].
210| 15|pub fn read_exif(path: impl AsRef<Path>) -> Result<Exif> {
211| 15| let iter = read_exif_iter(path)?;
^14 ^1
212| 14| Ok(iter.into())
213| 15|}
214| |
215| |/// Read EXIF metadata from a file as a lazy iterator. Like [`read_exif`]
216| |/// but returns an [`ExifIter`] so per-entry errors can be inspected and
217| |/// values fetched without materializing the full [`Exif`] map.
218| |///
219| |/// For batch processing, reuse a [`MediaParser`] via [`MediaParser::parse_exif`].
220| 15|pub fn read_exif_iter(path: impl AsRef<Path>) -> Result<ExifIter> {
221| 15| let file = std::fs::File::open(path)?;
^0
222| 15| let ms = MediaSource::seekable(file)?;
^0
223| 15| let mut parser = MediaParser::new();
224| 15| parser.parse_exif(ms)
225| 15|}
226| |
227| |/// Read track metadata from a video / audio file in a single call.
228| |///
229| |/// For batch processing, reuse a [`MediaParser`] via [`MediaParser::parse_track`].
230| 2|pub fn read_track(path: impl AsRef<Path>) -> Result<TrackInfo> {
231| 2| let file = std::fs::File::open(path)?;
^0
232| 2| let ms = MediaSource::seekable(file)?;
^0
233| 2| let mut parser = MediaParser::new();
234| 2| parser.parse_track(ms)
235| 2|}
236| |
237| |/// Read metadata from a file, dispatching by detected [`MediaKind`]:
238| |/// images return [`Metadata::Exif`], video / audio containers return
239| |/// [`Metadata::Track`].
240| |///
241| |/// Use this when the caller does not know up-front whether the file is an
242| |/// image or a track. For batch processing, reuse a [`MediaParser`] and
243| |/// branch on [`MediaSource::kind`] manually.
244| 3|pub fn read_metadata(path: impl AsRef<Path>) -> Result<Metadata> {
245| 3| let file = std::fs::File::open(path)?;
^0
246| 3| let ms = MediaSource::seekable(file)?;
^0
247| 3| let mut parser = MediaParser::new();
248| 3| match ms.kind() {
249| 2| MediaKind::Image => parser.parse_exif(ms).map(|i| Metadata::Exif(i.into())),
250| 1| MediaKind::Track => parser.parse_track(ms).map(Metadata::Track),
251| | }
252| 3|}
253| |
254| |/// **Deprecated since v3.3.0**: use [`read_exif`] with
255| |/// [`MediaSource::from_memory`] directly.
256| |#[deprecated(
257| | since = "3.3.0",
258| | note = "Use `read_exif` with `MediaSource::from_memory`."
259| |)]
260| 2|pub fn read_exif_from_bytes(bytes: impl Into<bytes::Bytes>) -> Result<Exif> {
261| | #[allow(deprecated)]
262| 2| let iter = read_exif_iter_from_bytes(bytes)?;
^0
263| 2| Ok(iter.into())
264| 2|}
265| |
266| |#[deprecated(
267| | since = "3.3.0",
268| | note = "Use `read_exif_iter` with `MediaSource::from_memory`."
269| |)]
270| 3|pub fn read_exif_iter_from_bytes(bytes: impl Into<bytes::Bytes>) -> Result<ExifIter> {
271| 3| let ms = MediaSource::from_memory(bytes)?;
^0
272| 3| let mut parser = MediaParser::new();
273| 3| parser.parse_exif(ms)
274| 3|}
275| |
276| |#[deprecated(
277| | since = "3.3.0",
278| | note = "Use `read_track` with `MediaSource::from_memory`."
279| |)]
280| 1|pub fn read_track_from_bytes(bytes: impl Into<bytes::Bytes>) -> Result<TrackInfo> {
281| 1| let ms = MediaSource::from_memory(bytes)?;
^0
282| 1| let mut parser = MediaParser::new();
283| 1| parser.parse_track(ms)
284| 1|}
285| |
286| |#[deprecated(
287| | since = "3.3.0",
288| | note = "Use `read_metadata` with `MediaSource::from_memory`."
289| |)]
290| 2|pub fn read_metadata_from_bytes(bytes: impl Into<bytes::Bytes>) -> Result<Metadata> {
291| 2| let ms = MediaSource::from_memory(bytes)?;
^0
292| 2| let mut parser = MediaParser::new();
293| 2| match ms.kind() {
294| 1| MediaKind::Image => parser.parse_exif(ms).map(|i| Metadata::Exif(i.into())),
295| 1| MediaKind::Track => parser.parse_track(ms).map(Metadata::Track),
296| | }
297| 2|}
298| |
299| |#[cfg(feature = "tokio")]
300| |mod tokio_top_level {
301| | use super::*;
302| |
303| 2| pub async fn read_exif_async(path: impl AsRef<std::path::Path>) -> Result<Exif> {
304| 2| let iter = read_exif_iter_async(path).await?;
^0
305| 2| Ok(iter.into())
306| 2| }
307| |
308| 2| pub async fn read_exif_iter_async(path: impl AsRef<std::path::Path>) -> Result<ExifIter> {
309| 2| let file = tokio::fs::File::open(path).await?;
^0
310| 2| let ms = parser_async::AsyncMediaSource::seekable(file).await?;
^0
311| 2| let mut parser = MediaParser::new();
312| 2| parser.parse_exif_async(ms).await
313| 2| }
314| |
315| 1| pub async fn read_track_async(path: impl AsRef<std::path::Path>) -> Result<TrackInfo> {
316| 1| let file = tokio::fs::File::open(path).await?;
^0
317| 1| let ms = parser_async::AsyncMediaSource::seekable(file).await?;
^0
318| 1| let mut parser = MediaParser::new();
319| 1| parser.parse_track_async(ms).await
320| 1| }
321| |
322| 0| pub async fn read_metadata_async(path: impl AsRef<std::path::Path>) -> Result<Metadata> {
323| 0| let file = tokio::fs::File::open(path).await?;
324| 0| let ms = parser_async::AsyncMediaSource::seekable(file).await?;
325| 0| let mut parser = MediaParser::new();
326| 0| match ms.kind() {
327| 0| MediaKind::Image => parser
328| 0| .parse_exif_async(ms)
329| 0| .await
330| 0| .map(|i| Metadata::Exif(i.into())),
331| 0| MediaKind::Track => parser.parse_track_async(ms).await.map(Metadata::Track),
332| | }
333| 0| }
334| |}
335| |
336| |#[cfg(feature = "tokio")]
337| |pub use tokio_top_level::{
338| | read_exif_async, read_exif_iter_async, read_metadata_async, read_track_async,
339| |};
340| |
341| |mod bbox;
342| |mod cr3;
343| |mod ebml;
344| |mod error;
345| |mod exif;
346| |mod file;
347| |mod heif;
348| |mod image_metadata;
349| |mod jpeg;
350| |mod mov;
351| |mod parser;
352| |#[cfg(feature = "tokio")]
353| |mod parser_async;
354| |mod png;
355| |mod raf;
356| |mod slice;
357| |mod utils;
358| |mod values;
359| |mod video;
360| |
361| |#[cfg(test)]
362| |mod testkit;
363| |
364| |#[cfg(test)]
365| |mod v3_top_level_tests {
366| | use super::*;
367| |
368| | #[test]
369| 1| fn read_exif_jpg() {
370| 1| let exif = read_exif("testdata/exif.jpg").unwrap();
371| 1| assert!(exif.get(ExifTag::Make).is_some());
372| 1| }
373| |
374| | #[test]
375| 1| fn read_track_mov() {
376| 1| let info = read_track("testdata/meta.mov").unwrap();
377| 1| assert!(info.get(TrackInfoTag::Make).is_some());
378| 1| }
379| |
380| | #[test]
381| 1| fn read_metadata_dispatches_image() {
382| 1| match read_metadata("testdata/exif.jpg").unwrap() {
383| 1| Metadata::Exif(_) => {}
384| 0| Metadata::Track(_) => panic!("expected Exif variant"),
385| | }
386| 1| }
387| |
388| | #[test]
389| 1| fn read_metadata_dispatches_track() {
390| 1| match read_metadata("testdata/meta.mov").unwrap() {
391| 1| Metadata::Track(_) => {}
392| 0| Metadata::Exif(_) => panic!("expected Track variant"),
393| | }
394| 1| }
395| |
396| | #[cfg(feature = "tokio")]
397| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
398| 1| async fn read_exif_async_jpg() {
399| 1| let exif = read_exif_async("testdata/exif.jpg").await.unwrap();
400| 1| assert!(exif.get(ExifTag::Make).is_some());
401| 1| }
402| |
403| | #[cfg(feature = "tokio")]
404| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
405| 1| async fn read_track_async_mov() {
406| 1| let info = read_track_async("testdata/meta.mov").await.unwrap();
407| 1| assert!(info.get(TrackInfoTag::Make).is_some());
408| 1| }
409| |
410| | #[test]
411| | #[allow(deprecated)]
412| 1| fn read_exif_from_bytes_jpg() {
413| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
414| 1| let exif = read_exif_from_bytes(raw).unwrap();
415| 1| assert!(exif.get(ExifTag::Make).is_some());
416| 1| }
417| |
418| | #[test]
419| | #[allow(deprecated)]
420| 1| fn read_exif_iter_from_bytes_jpg() {
421| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
422| 1| let iter = read_exif_iter_from_bytes(raw).unwrap();
423| 1| assert!(iter.into_iter().count() > 0);
424| 1| }
425| |
426| | #[test]
427| | #[allow(deprecated)]
428| 1| fn read_track_from_bytes_mov() {
429| 1| let raw = std::fs::read("testdata/meta.mov").unwrap();
430| 1| let info = read_track_from_bytes(raw).unwrap();
431| 1| assert!(info.get(TrackInfoTag::Make).is_some());
432| 1| }
433| |
434| | #[test]
435| | #[allow(deprecated)]
436| 1| fn read_metadata_from_bytes_dispatches_image() {
437| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
438| 1| match read_metadata_from_bytes(raw).unwrap() {
439| 1| Metadata::Exif(_) => {}
440| 0| Metadata::Track(_) => panic!("expected Exif variant"),
441| | }
442| 1| }
443| |
444| | #[test]
445| | #[allow(deprecated)]
446| 1| fn read_metadata_from_bytes_dispatches_track() {
447| 1| let raw = std::fs::read("testdata/meta.mov").unwrap();
448| 1| match read_metadata_from_bytes(raw).unwrap() {
449| 1| Metadata::Track(_) => {}
450| 0| Metadata::Exif(_) => panic!("expected Track variant"),
451| | }
452| 1| }
453| |
454| | #[test]
455| | #[allow(deprecated)]
456| 1| fn read_exif_from_bytes_static_slice() {
457| 1| let raw: &'static [u8] = include_bytes!("../testdata/exif.jpg");
458| 1| let exif = read_exif_from_bytes(raw).unwrap();
459| 1| assert!(exif.get(ExifTag::Make).is_some());
460| 1| }
461| |
462| | #[test]
463| 1| fn prelude_imports_compile() {
464| | use crate::prelude::*;
465| 0| fn _consume(_: Option<Exif>, _: Option<TrackInfo>, _: Option<MediaParser>) {}
466| | // Verify the function symbols are in scope (compilation is the test).
467| 1| let _e = read_exif("testdata/exif.jpg");
468| 1| let _t = read_track("testdata/meta.mov");
469| 1| let _m = read_metadata("testdata/exif.jpg");
470| 1| }
471| |}
/home/min/dev/nom-exif/src/mov.rs:
1| |use std::{collections::BTreeMap, ops::Range};
2| |
3| |use chrono::DateTime;
4| |use nom::{bytes::streaming, IResult};
5| |
6| |use crate::{bbox::to_boxes, values::filter_zero};
7| |use crate::{
8| | bbox::{
9| | find_box, parse_video_tkhd_in_moov, travel_header, IlstBox, KeysBox, MvhdBox, ParseBox,
10| | },
11| | error::{MalformedKind, ParsingError},
12| | video::TrackInfoTag,
13| | EntryValue,
14| |};
15| |
16| |#[tracing::instrument(skip_all)]
17| 93|pub(crate) fn parse_isobmff(moov_body: &[u8]) -> Result<crate::TrackInfo, ParsingError> {
18| 93| let (_, entries) = match parse_moov_body(moov_body) {
19| 93| Ok((remain, Some(entries))) => (remain, entries),
20| 0| Ok((remain, None)) => (remain, Vec::new()),
21| | Err(_) => {
22| 0| return Err(ParsingError::Failed {
23| 0| kind: MalformedKind::IsoBmffBox,
24| 0| message: "invalid moov body".into(),
25| 0| });
26| | }
27| | };
28| |
29| 93| let mut entries: BTreeMap<TrackInfoTag, EntryValue> = convert_video_tags(entries);
30| 93| let mut extras = parse_mvhd_tkhd(moov_body);
31| 93| if entries.contains_key(&TrackInfoTag::CreateDate) {
32| 40| extras.remove(&TrackInfoTag::CreateDate);
33| 53| }
34| 93| entries.extend(extras);
35| |
36| 93| let mut info = crate::TrackInfo::default();
37| 567| for (k, v) in entries {
^93
38| 567| info.put(k, v);
39| 567| }
40| 93| Ok(info)
41| 93|}
42| |
43| 93|fn parse_mvhd_tkhd(moov_body: &[u8]) -> BTreeMap<TrackInfoTag, EntryValue> {
44| 93| let mut entries = BTreeMap::new();
45| 93| if let Ok((_, Some(bbox))) = find_box(moov_body, "mvhd") {
46| 93| if let Ok((_, mvhd)) = MvhdBox::parse_box(bbox.data) {
47| 93| entries.insert(TrackInfoTag::DurationMs, mvhd.duration_ms().into());
48| 93|
49| 93| entries.insert(
50| 93| TrackInfoTag::CreateDate,
51| 93| EntryValue::DateTime(mvhd.creation_time()),
52| 93| );
53| 93| }
^0
54| 0| }
55| |
56| 93| if let Ok(Some(tkhd)) = parse_video_tkhd_in_moov(moov_body) {
^91
57| 91| entries.insert(TrackInfoTag::Width, tkhd.width.into());
58| 91| entries.insert(TrackInfoTag::Height, tkhd.height.into());
59| 91| }
^2
60| |
61| 93| entries
62| 93|}
63| |
64| 93|fn convert_video_tags(entries: Vec<(String, EntryValue)>) -> BTreeMap<TrackInfoTag, EntryValue> {
65| 93| entries
66| 93| .into_iter()
67| 256| .filter_map(|(k, v)| {
^93
68| 256| if k == "com.apple.quicktime.creationdate" {
69| 40| v.as_str()
70| 40| .and_then(|s| DateTime::parse_from_str(s, "%+").ok())
71| 40| .map(|t| (TrackInfoTag::CreateDate, EntryValue::DateTime(t)))
72| 216| } else if k == "com.apple.quicktime.make" {
73| 40| Some((TrackInfoTag::Make, v))
74| 176| } else if k == "com.apple.quicktime.model" {
75| 40| Some((TrackInfoTag::Model, v))
76| 136| } else if k == "com.apple.quicktime.software" {
77| 40| Some((TrackInfoTag::Software, v))
78| 96| } else if k == "com.apple.quicktime.author" {
79| 2| Some((TrackInfoTag::Author, v))
80| 94| } else if k == "com.apple.quicktime.location.ISO6709" {
81| 40| Some((TrackInfoTag::GpsIso6709, v))
82| 54| } else if k == "udta.©xyz" {
83| | // For mp4 files, Android phones store GPS info in that box.
84| 35| v.as_u8_slice()
85| 35| .and_then(parse_udta_gps)
86| 35| .map(|v| (TrackInfoTag::GpsIso6709, EntryValue::Text(v)))
87| 19| } else if k == "udta.auth" {
88| 2| v.as_u8_slice()
89| 2| .and_then(parse_udta_auth)
90| 2| .map(|v| (TrackInfoTag::Author, EntryValue::Text(v)))
91| 17| } else if k.starts_with("udta.") {
92| 2| let tag = k.as_str().parse::<TrackInfoTag>().ok();
93| 2| tag.map(|t| (t, v))
^0 ^0
94| | } else {
95| 15| None
96| | }
97| 256| })
98| 93| .collect()
99| 93|}
100| |
101| |/// Try to find GPS info from box `moov/udta/©xyz`. For mp4 files, Android
102| |/// phones store GPS info in that box.
103| |// fn parse_mp4_gps(moov_body: &[u8]) -> Option<String> {
104| |// let bbox = match find_box(moov_body, "udta/©xyz") {
105| |// Ok((_, b)) => b,
106| |// Err(_) => None,
107| |// };
108| |// if let Some(bbox) = bbox {
109| |// return parse_udta_gps(bbox.body_data());
110| |// }
111| |// None
112| |// }
113| 35|fn parse_udta_gps(data: &[u8]) -> Option<String> {
114| 35| if data.len() <= 4 {
115| 0| tracing::warn!("moov/udta/©xyz body is too small");
116| 0| None
117| | } else {
118| | // The first 4 bytes is zero, skip them
119| 35| let location = data[4..] // Safe-slice
120| 35| .iter()
121| 630| .map(|b| *b as char)
^35
122| 35| .collect::<String>();
123| 35| Some(location)
124| | }
125| 35|}
126| |
127| |const ISO_639_2_UND: [u8; 2] = [0x55, 0xc4];
128| |
129| 2|fn parse_udta_auth(data: &[u8]) -> Option<String> {
130| | // Skip leading zero bytes
131| 2| let data = filter_zero(data);
132| |
133| | // Skip leading language flags.
134| | // Refer to: https://exiftool.org/forum/index.php?topic=11498.0
135| 2| if data.starts_with(&ISO_639_2_UND) {
136| 2| String::from_utf8(data.into_iter().skip(2).collect()).ok()
137| | } else {
138| 0| String::from_utf8(data).ok()
139| | }
140| 2|}
141| |
142| |/// Parse the byte data of an ISOBMFF file and return the potential body data of
143| |/// moov atom it may contain.
144| |///
145| |/// Regarding error handling, please refer to [`ParsingError`] for more information.
146| |#[tracing::instrument(skip_all)]
147| 173|pub(crate) fn extract_moov_body_from_buf(input: &[u8]) -> Result<Range<usize>, ParsingError> {
148| | // parse metadata from moov/meta/keys & moov/meta/ilst
149| 173| let remain = input;
150| |
151| 173| let convert_error = |e: nom::Err<_>, msg: &str| match e {
^0
152| 0| nom::Err::Incomplete(needed) => match needed {
153| 0| nom::Needed::Unknown => ParsingError::Need(1),
154| 0| nom::Needed::Size(n) => ParsingError::Need(n.get()),
155| | },
156| 0| nom::Err::Failure(_) | nom::Err::Error(_) => ParsingError::Failed {
157| 0| kind: MalformedKind::IsoBmffBox,
158| 0| message: msg.to_string(),
159| 0| },
160| 0| };
161| |
162| 173| let mut to_skip = 0;
163| 173| let mut skipped = 0;
164| 336| let (remain, header) = travel_header(remain, |h, remain| {
^173 ^173 ^173 ^173
165| 336| tracing::debug!(?h.box_type, ?h.box_size, "Got");
166| 336| if h.box_type == "moov" {
167| | // stop travelling
168| 94| skipped += h.header_size;
169| 94| false
170| 242| } else if (remain.len() as u64) < h.body_size() {
171| | // stop travelling & skip unused box data
172| 79| to_skip = h.body_size() as usize - remain.len();
173| 79| false
174| | } else {
175| | // body has been read, so just consume it
176| 163| skipped += h.box_size as usize;
177| 163| true
178| | }
179| 336| })
180| 173| .map_err(|e| convert_error(e, "search atom moov failed"))?;
^0 ^0 ^0 ^0
181| |
182| 173| if to_skip > 0 {
183| | return Err(ParsingError::ClearAndSkip(
184| 79| to_skip
185| 79| .checked_add(input.len())
186| 79| .ok_or_else(|| ParsingError::Failed {
187| 2| kind: MalformedKind::IsoBmffBox,
188| 2| message: "to_skip is too big".into(),
189| 2| })?,
190| | ));
191| 94| }
192| |
193| 94| let size: usize = header.body_size().try_into().expect("must fit");
194| 94| let (_, body) =
195| 94| streaming::take(size)(remain).map_err(|e| convert_error(e, "moov is too small"))?;
^0 ^0 ^0 ^0
196| |
197| 94| Ok(skipped..skipped + body.len())
198| 173|}
199| |
200| |type EntriesResult<'a> = IResult<&'a [u8], Option<Vec<(String, EntryValue)>>>;
201| |
202| |#[tracing::instrument(skip(input))]
203| 94|fn parse_moov_body(input: &[u8]) -> EntriesResult<'_> {
204| 94| tracing::debug!("parse_moov_body");
205| |
206| 94| let mut entries = parse_meta(input).unwrap_or_default();
207| |
208| 94| if let Ok((_, Some(udta))) = find_box(input, "udta") {
^39
209| 39| tracing::debug!("udta");
210| 39| if let Ok(boxes) = to_boxes(udta.body_data()) {
211| 39| for entry in boxes.iter() {
212| 39| tracing::debug!(?entry, "udta entry");
213| 39| entries.push((
214| 39| format!("udta.{}", entry.box_type()),
215| 39| EntryValue::U8Array(Vec::from(entry.body_data())),
216| 39| ));
217| | }
218| 0| }
219| 55| }
220| |
221| 94| Ok((input, Some(entries)))
222| 94|}
223| |
224| 94|fn parse_meta(input: &[u8]) -> Option<Vec<(String, EntryValue)>> {
225| 94| let (_, Some(meta)) = find_box(input, "meta").ok()? else {
^43 ^0
226| 51| return None;
227| | };
228| |
229| 43| let (_, Some(keys)) = find_box(meta.body_data(), "keys").ok()? else {
^0
230| 0| return None;
231| | };
232| |
233| 43| let (_, Some(ilst)) = find_box(meta.body_data(), "ilst").ok()? else {
^0
234| 0| return None;
235| | };
236| |
237| 43| let (_, keys) = KeysBox::parse_box(keys.data).ok()?;
^0
238| 43| let (_, ilst) = IlstBox::parse_box(ilst.data).ok()?;
^0
239| |
240| 43| let entries = keys
241| 43| .entries
242| 43| .into_iter()
243| 43| .map(|k| k.key)
244| 43| .zip(ilst.items.into_iter().map(|v| v.value))
245| 43| .collect::<Vec<_>>();
246| |
247| 43| Some(entries)
248| 94|}
249| |
250| |#[cfg(test)]
251| |mod tests {
252| | use super::*;
253| | use crate::testkit::*;
254| | use test_case::test_case;
255| |
256| | #[test_case("meta.mov")]
257| 1| fn mov_extract_mov(path: &str) {
258| 1| let _ = tracing_subscriber::fmt().with_test_writer().try_init();
259| |
260| 1| let buf = read_sample(path).unwrap();
261| 1| tracing::info!(bytes = buf.len(), "File size.");
262| 1| let range = extract_moov_body_from_buf(&buf).unwrap();
263| 1| let (_, entries) = parse_moov_body(&buf[range]).unwrap();
264| 1| assert_eq!(
265| 1| entries
266| 1| .unwrap()
267| 1| .iter()
268| 5| .map(|x| format!("{x:?}"))
^1
269| 1| .collect::<Vec<_>>()
270| 1| .join("\n"),
271| | "(\"com.apple.quicktime.make\", Text(\"Apple\"))
272| |(\"com.apple.quicktime.model\", Text(\"iPhone X\"))
273| |(\"com.apple.quicktime.software\", Text(\"12.1.2\"))
274| |(\"com.apple.quicktime.location.ISO6709\", Text(\"+27.1281+100.2508+000.000/\"))
275| |(\"com.apple.quicktime.creationdate\", Text(\"2019-02-12T15:27:12+08:00\"))"
276| | );
277| 1| }
278| |}
/home/min/dev/nom-exif/src/parser.rs:
1| |use std::{
2| | cmp::{max, min},
3| | fmt::{Debug, Display},
4| | fs::File,
5| | io::{self, Read, Seek},
6| | path::Path,
7| |};
8| |
9| |use crate::{
10| | error::{ParsedError, ParsingError, ParsingErrorState},
11| | exif::TiffHeader,
12| | file::MediaMime,
13| | ExifIter, TrackInfo,
14| |};
15| |
16| |/// A function that tries to skip `n` bytes of `reader` by seeking. Returns
17| |/// `Ok(true)` on success, `Ok(false)` if the reader does not support seek
18| |/// (so the caller should fall back to reading-and-discarding), or
19| |/// `Err(io::Error)` if seek itself failed (e.g. truncated file handle).
20| |///
21| |/// This is captured at construction time by `MediaSource::seekable` /
22| |/// `unseekable`, replacing the v2 `S: Skip<R>` phantom parameter with a
23| |/// runtime fn pointer.
24| |pub(crate) type SkipBySeekFn<R> = fn(&mut R, u64) -> io::Result<bool>;
25| |
26| |/// `MediaSource` represents a media data source that can be parsed by
27| |/// [`MediaParser`].
28| |///
29| |/// - Use [`MediaSource::open`] to create a MediaSource from a file path.
30| |///
31| |/// - Use [`MediaSource::from_bytes`] for zero-copy in-memory input
32| |/// (`Vec<u8>`, `&'static [u8]`, [`bytes::Bytes`], …). Pair with
33| |/// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`].
34| |///
35| |/// - In other cases:
36| |///
37| |/// - Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
38| |/// (an already-open `File` goes here).
39| |///
40| |/// - Use [`MediaSource::unseekable`] to create a MediaSource from a
41| |/// reader that only impl `Read`
42| |///
43| |/// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
44| |/// since the former is more efficient when the parser needs to skip a large number of bytes.
45| |///
46| |/// Passing in a `BufRead` should be avoided because [`MediaParser`] comes with
47| |/// its own buffer management and the buffers can be shared between multiple
48| |/// parsing tasks, thus avoiding frequent memory allocations.
49| |pub struct MediaSource<R> {
50| | pub(crate) reader: R,
51| | pub(crate) buf: Vec<u8>,
52| | pub(crate) mime: MediaMime,
53| | pub(crate) skip_by_seek: SkipBySeekFn<R>,
54| | /// P7: zero-copy memory-mode payload. `Some` only when the source was
55| | /// built via [`MediaSource::<()>::from_bytes`]; `reader`, `buf`, and
56| | /// `skip_by_seek` are placeholders (and never consulted) in that mode.
57| | pub(crate) memory: Option<bytes::Bytes>,
58| |}
59| |
60| |/// Top-level classification of a media source.
61| |///
62| |/// `Image` files carry EXIF metadata (parse with `MediaParser::parse_exif`);
63| |/// `Track` files are time-based containers — video, audio, or both — and
64| |/// carry track-info metadata (parse with `MediaParser::parse_track`). Pure
65| |/// audio containers like `.mka` are classified as `Track`.
66| |#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67| |pub enum MediaKind {
68| | Image,
69| | Track,
70| |}
71| |
72| |impl<R> Debug for MediaSource<R> {
73| 0| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74| 0| f.debug_struct("MediaSource")
75| 0| .field("mime", &self.mime)
76| 0| .finish_non_exhaustive()
77| 0| }
78| |}
79| |
80| |// Should be enough for parsing header
81| |const HEADER_PARSE_BUF_SIZE: usize = 128;
82| |
83| |impl<R> MediaSource<R> {
84| | /// Top-level classification of this media source.
85| 42| pub fn kind(&self) -> MediaKind {
86| 42| match self.mime {
87| 29| MediaMime::Image(_) => MediaKind::Image,
88| 13| MediaMime::Track(_) => MediaKind::Track,
89| | }
90| 42| }
91| |}
92| |
93| |impl<R: Read> MediaSource<R> {
94| 147| fn build(mut reader: R, skip_by_seek: SkipBySeekFn<R>) -> crate::Result<Self> {
95| 147| let mut buf = Vec::with_capacity(HEADER_PARSE_BUF_SIZE);
96| 147| reader
97| 147| .by_ref()
98| 147| .take(HEADER_PARSE_BUF_SIZE as u64)
99| 147| .read_to_end(&mut buf)?;
^0
100| 147| let mime: MediaMime = buf.as_slice().try_into()?;
^144 ^144 ^3
101| 144| Ok(Self {
102| 144| reader,
103| 144| buf,
104| 144| mime,
105| 144| skip_by_seek,
106| 144| memory: None,
107| 144| })
108| 147| }
109| |
110| | /// Use [`MediaSource::unseekable`] to create a MediaSource from a
111| | /// reader that only impl `Read`
112| | ///
113| | /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
114| | /// since the former is more efficient when the parser needs to skip a large number of bytes.
115| 29| pub fn unseekable(reader: R) -> crate::Result<Self> {
116| 29| Self::build(reader, |_, _| Ok(false))
117| 29| }
118| |}
119| |
120| |impl<R: Read + Seek> MediaSource<R> {
121| | /// Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
122| | ///
123| | /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
124| | /// since the former is more efficient when the parser needs to skip a large number of bytes.
125| 118| pub fn seekable(reader: R) -> crate::Result<Self> {
126| 118| Self::build(reader, |r, n| {
^25
127| 25| let signed: i64 = n
^24 ^24
128| 25| .try_into()
129| 25| .map_err(|_| io::Error::from(io::ErrorKind::InvalidInput))?;
^1 ^1 ^1
130| 24| r.seek_relative(signed)?;
^0
131| 24| Ok(true)
132| 25| })
133| 118| }
134| |}
135| |
136| |impl MediaSource<File> {
137| | /// Open a file at `path` and parse its header to detect the media format.
138| | ///
139| | /// This is the v3-preferred entry point for the common case of "I have a
140| | /// path on disk". For an already-open `File` use [`Self::seekable`].
141| 68| pub fn open<P: AsRef<Path>>(path: P) -> crate::Result<Self> {
142| 68| Self::seekable(File::open(path)?)
^0
143| 68| }
144| |}
145| |
146| |impl MediaSource<()> {
147| | /// Build a [`MediaSource`] from an in-memory byte payload.
148| | ///
149| | /// Accepts any type convertible into [`bytes::Bytes`] — `Bytes`,
150| | /// `Vec<u8>`, `&'static [u8]`, [`bytes::Bytes::from_owner`] outputs, and
151| | /// HTTP-stack body types that implement `Into<Bytes>` directly.
152| | ///
153| | /// The header (first up to 128 bytes) is sniffed for media kind, the
154| | /// same way [`MediaSource::open`] does it for files. The full payload is
155| | /// stored zero-copy: subsequent parsing through
156| | /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
157| | /// shares this `Bytes` directly with the returned `ExifIter` / sub-IFDs
158| | /// via reference counting.
159| | ///
160| | /// The returned source is parsed by the dedicated
161| | /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
162| | /// methods. The streaming `parse_exif` / `parse_track` methods do not
163| | /// accept `MediaSource<()>` (their `R: Read` bound is unsatisfiable).
164| | ///
165| | /// # Example
166| | ///
167| | /// ```rust
168| | /// use nom_exif::{MediaSource, MediaParser, MediaKind};
169| | ///
170| | /// let bytes = std::fs::read("./testdata/exif.jpg")?;
171| | /// let ms = MediaSource::from_bytes(bytes)?;
172| | /// assert_eq!(ms.kind(), MediaKind::Image);
173| | ///
174| | /// let mut parser = MediaParser::new();
175| | /// let _iter = parser.parse_exif_from_bytes(ms)?;
176| | /// # Ok::<(), nom_exif::Error>(())
177| | /// ```
178| | #[deprecated(
179| | since = "3.3.0",
180| | note = "Use `MediaSource::from_memory` and the unified `parse_*` \
181| | methods (which now accept memory-mode sources directly). \
182| | The `MediaSource<()>` shape will be removed in v4."
183| | )]
184| 15| pub fn from_bytes(bytes: impl Into<bytes::Bytes>) -> crate::Result<Self> {
185| 15| let bytes = bytes.into();
186| 15| let head_end = bytes.len().min(HEADER_PARSE_BUF_SIZE);
187| 15| let mime: MediaMime = bytes[..head_end].try_into()?;
^13 ^13 ^2
188| | Ok(Self {
189| 13| reader: (),
190| 13| buf: Vec::new(),
191| 13| mime,
192| | // Placeholder: never invoked in memory mode (clear_and_skip's
193| | // AdvanceOnly path is the only one taken).
194| | skip_by_seek: |_, _| Ok(false),
195| 13| memory: Some(bytes),
196| | })
197| 15| }
198| |
199| | /// Internal adapter: convert a v3.0-style `MediaSource<()>` (built via
200| | /// the deprecated `from_bytes`) into the unified `MediaSource<Empty>`
201| | /// shape so the deprecated `parse_*_from_bytes` methods can delegate to
202| | /// the unified `parse_*` methods. Memory contents are moved over
203| | /// verbatim, preserving zero-copy.
204| 10| pub(crate) fn into_empty(self) -> MediaSource<std::io::Empty> {
205| | MediaSource {
206| 10| reader: std::io::empty(),
207| 10| buf: self.buf,
208| 10| mime: self.mime,
209| | // Placeholder: never invoked in memory mode (clear_and_skip's
210| | // AdvanceOnly path is the only one taken).
211| | skip_by_seek: |_, _| Ok(false),
212| 10| memory: self.memory,
213| | }
214| 10| }
215| |}
216| |
217| |impl MediaSource<std::io::Empty> {
218| | /// Build a [`MediaSource`] from an in-memory byte payload.
219| | ///
220| | /// This is the v3.3 replacement for [`MediaSource::<()>::from_bytes`]
221| | /// (which is now `#[deprecated]`). Functionally identical — same
222| | /// zero-copy semantics, same accepted input types — but produces a
223| | /// `MediaSource<std::io::Empty>` so that the unified `parse_*<R: Read>`
224| | /// methods can accept it directly without a separate `_from_bytes`
225| | /// sibling.
226| | ///
227| | /// Accepts any type convertible into [`bytes::Bytes`] — `Bytes`,
228| | /// `Vec<u8>`, `&'static [u8]`, `String`, `Box<[u8]>`, and HTTP-stack
229| | /// body types that implement `Into<Bytes>` directly.
230| | ///
231| | /// The header (first up to 128 bytes) is sniffed for media kind, the
232| | /// same way [`MediaSource::open`] does it for files. The full payload
233| | /// is stored zero-copy: subsequent parsing through
234| | /// [`MediaParser::parse_exif`] / [`MediaParser::parse_track`] shares
235| | /// this `Bytes` directly with the returned `ExifIter` / sub-IFDs via
236| | /// reference counting.
237| | ///
238| | /// # Example
239| | ///
240| | /// ```rust
241| | /// use nom_exif::{MediaSource, MediaParser, MediaKind};
242| | ///
243| | /// let bytes = std::fs::read("./testdata/exif.jpg")?;
244| | /// let ms = MediaSource::from_memory(bytes)?;
245| | /// assert_eq!(ms.kind(), MediaKind::Image);
246| | ///
247| | /// let mut parser = MediaParser::new();
248| | /// let _iter = parser.parse_exif(ms)?; // unified entry point
249| | /// # Ok::<(), nom_exif::Error>(())
250| | /// ```
251| 21| pub fn from_memory(bytes: impl Into<bytes::Bytes>) -> crate::Result<Self> {
252| 21| let bytes = bytes.into();
253| 21| let head_end = bytes.len().min(HEADER_PARSE_BUF_SIZE);
254| 21| let mime: MediaMime = bytes[..head_end].try_into()?;
^19 ^19 ^2
255| | Ok(Self {
256| 19| reader: std::io::empty(),
257| 19| buf: Vec::new(),
258| 19| mime,
259| | // Placeholder: never invoked in memory mode (AdvanceOnly path).
260| | skip_by_seek: |_, _| Ok(false),
261| 19| memory: Some(bytes),
262| | })
263| 21| }
264| |}
265| |
266| |// ----- Parse-time buffer policy -----
267| |//
268| |// Layered by lifecycle:
269| |//
270| |// - `INIT_BUF_SIZE` — first fill into the parse loop and the initial
271| |// `Vec::with_capacity` for fresh allocations. Modest so cold one-shot
272| |// helpers don't over-commit.
273| |// - `MIN_GROW_SIZE` — floor for every subsequent fill once we're in deep
274| |// parse. Larger than `INIT_BUF_SIZE` to amortize syscalls / async
275| |// blocking-pool dispatches.
276| |// - `MAX_PARSE_BUF_SIZE` — hard cap on cumulative buffer growth during a
277| |// single parse. Anything that would push past this is rejected as
278| |// `io::ErrorKind::Unsupported`; defense against crafted box/IFD headers
279| |// that declare absurd sizes.
280| |// - `MAX_REUSE_BUF_SIZE` — soft cap on the buffer kept between parses for
281| |// recycling. After a parse whose buffer ended above this, `shrink_to`
282| |// gives the excess back to the allocator. Tuned for typical metadata
283| |// sizes (HEIC Live Photo / large CR3 / IIQ all fit under 4 MB) so the
284| |// recycle path stays warm for batch workloads.
285| |pub(crate) const INIT_BUF_SIZE: usize = 8 * 1024;
286| |pub(crate) const MIN_GROW_SIZE: usize = 16 * 1024;
287| |pub(crate) const MAX_PARSE_BUF_SIZE: usize = 1024 * 1024 * 1024;
288| |const MAX_REUSE_BUF_SIZE: usize = 4 * 1024 * 1024;
289| |
290| |pub(crate) trait Buf {
291| | fn buffer(&self) -> &[u8];
292| | fn clear(&mut self);
293| |
294| | fn set_position(&mut self, pos: usize);
295| | #[allow(unused)]
296| | fn position(&self) -> usize;
297| |}
298| |
299| |/// Buffer-management state used by `MediaParser` (sync and async paths share it).
300| |///
301| |/// Holds at most one *active* `Vec<u8>` (being filled by the current parse) and
302| |/// one *cached* `Bytes` clone of the most recently shared buffer. When the
303| |/// next parse starts, the cache is consulted: if `Bytes::try_into_mut`
304| |/// succeeds the underlying allocation is reused (the previous `ExifIter`
305| |/// has been dropped); otherwise the clone is discarded and a fresh
306| |/// `Vec<u8>` is allocated.
307| |///
308| |/// This replaces the v2 multi-slot `Buffers` pool — `MediaParser` methods
309| |/// are `&mut self`, so a single slot is sufficient.
310| |#[derive(Debug, Default)]
311| |pub(crate) struct BufferedParserState {
312| | cached: Option<bytes::Bytes>,
313| | buf: Option<Vec<u8>>,
314| | /// P7: memory-mode storage. When `Some`, the parser is feeding from a
315| | /// caller-owned `Bytes` instead of streaming via a reader. `buf` and
316| | /// `cached` are unused in this mode — the user owns the allocation,
317| | /// so there is nothing to recycle.
318| | memory: Option<bytes::Bytes>,
319| | position: usize,
320| |}
321| |
322| |impl BufferedParserState {
323| 169| pub(crate) fn new() -> Self {
324| 169| Self::default()
325| 169| }
326| |
327| 521| pub(crate) fn reset(&mut self) {
328| | // If a parse failed mid-way the buf may still be present; drop it.
329| | // Cache stays — recycle on next acquire if eligible.
330| 521| self.buf = None;
331| 521| self.memory = None;
332| 521| self.position = 0;
333| 521| }
334| |
335| | /// Switch the parser state into memory mode, owning `bytes` directly.
336| | /// Caller must have already called `reset()` (asserted in debug). Subsequent
337| | /// `share_buf` returns a clone of `bytes` (zero-copy: `Bytes::clone` is a
338| | /// refcount bump). Subsequent `Buf::buffer()` returns `&bytes[position..]`.
339| 35| pub(crate) fn set_memory(&mut self, bytes: bytes::Bytes) {
340| 35| debug_assert!(
341| 35| self.buf.is_none() && self.memory.is_none(),
342| | "set_memory called on non-clean state"
343| | );
344| 35| self.memory = Some(bytes);
345| 35| self.position = 0;
346| 35| }
347| |
348| 1.75k| pub(crate) fn is_memory_mode(&self) -> bool {
349| 1.75k| self.memory.is_some()
350| 1.75k| }
351| |
352| 229| pub(crate) fn acquire_buf(&mut self) {
353| 229| if self.memory.is_some() {
354| | // Memory mode: nothing to acquire — `buffer()` reads from `memory`.
355| 1| return;
356| 228| }
357| 228| debug_assert!(self.buf.is_none());
358| 228| let buf = match self.cached.take() {
359| 17| Some(b) => match b.try_into_mut() {
360| 17| Ok(bm) => {
361| 17| let mut v = Vec::<u8>::from(bm);
362| 17| v.clear();
363| 17| if v.capacity() > MAX_REUSE_BUF_SIZE {
364| 0| v.shrink_to(MAX_REUSE_BUF_SIZE);
365| 17| }
366| 17| v
367| | }
368| 0| Err(_still_shared) => Vec::with_capacity(INIT_BUF_SIZE),
369| | },
370| 211| None => Vec::with_capacity(INIT_BUF_SIZE),
371| | };
372| 228| self.buf = Some(buf);
373| 229| }
374| |
375| 4.98k| pub(crate) fn buf(&self) -> &Vec<u8> {
376| 4.98k| self.buf.as_ref().expect("no buf here")
377| 4.98k| }
378| |
379| 2.09k| pub(crate) fn buf_mut(&mut self) -> &mut Vec<u8> {
380| 2.09k| self.buf.as_mut().expect("no buf here")
381| 2.09k| }
382| |
383| | #[cfg(test)]
384| 6| pub(crate) fn cached_ptr_for_test(&self) -> Option<*const u8> {
385| 6| self.cached.as_ref().map(|b| b.as_ptr())
^2^2
386| 6| }
387| |
388| | #[cfg(test)]
389| 1| pub(crate) fn buf_is_none_for_test(&self) -> bool {
390| 1| self.buf.is_none()
391| 1| }
392| |}
393| |
394| |impl Buf for BufferedParserState {
395| 3.28k| fn buffer(&self) -> &[u8] {
396| 3.28k| if let Some(m) = &self.memory {
^71
397| 71| return &m[self.position..];
398| 3.21k| }
399| 3.21k| &self.buf()[self.position..]
400| 3.28k| }
401| 109| fn clear(&mut self) {
402| | // In memory mode `clear` is a no-op: there is no scratch buffer to
403| | // truncate, and the caller's bytes must remain available for further
404| | // parse_loop_step iterations. clear_and_skip's AdvanceOnly path is
405| | // what advances `position` in memory mode.
406| 109| if self.memory.is_some() {
407| 0| return;
408| 109| }
409| 109| self.buf_mut().clear();
410| 109| }
411| 1| fn set_position(&mut self, pos: usize) {
412| 1| self.position = pos;
413| 1| }
414| 0| fn position(&self) -> usize {
415| 0| self.position
416| 0| }
417| |}
418| |
419| |impl ShareBuf for BufferedParserState {
420| 92| fn share_buf(&mut self) -> (bytes::Bytes, usize) {
421| 92| if let Some(m) = self.memory.take() {
^17
422| | // Zero-copy share: caller already owns the allocation. No cache
423| | // write — recycle is irrelevant when the user holds the alloc.
424| 17| let position = self.position;
425| 17| return (m, position);
426| 75| }
427| 75| let vec = self.buf.take().expect("no buf to share");
428| 75| let bytes = bytes::Bytes::from(vec);
429| 75| let position = self.position;
430| 75| self.cached = Some(bytes.clone());
431| 75| (bytes, position)
432| 92| }
433| |}
434| |
435| |/// What `clear_and_skip` should do, given the current buffer state and
436| |/// the requested skip count.
437| |pub(crate) enum SkipPlan {
438| | /// Skip is fully within the current buffer; just advance position.
439| | AdvanceOnly,
440| | /// Buffer must be cleared and `extra` bytes skipped from the reader.
441| | ClearAndSkip { extra: usize },
442| |}
443| |
444| 81|pub(crate) fn clear_and_skip_decide(buffer_len: usize, n: usize) -> SkipPlan {
445| 81| if n <= buffer_len {
446| 0| SkipPlan::AdvanceOnly
447| | } else {
448| 81| SkipPlan::ClearAndSkip {
449| 81| extra: n - buffer_len,
450| 81| }
451| | }
452| 81|}
453| |
454| 1.75k|pub(crate) fn check_fill_size(existing_len: usize, requested: usize) -> io::Result<()> {
455| 1.75k| if requested.saturating_add(existing_len) > MAX_PARSE_BUF_SIZE {
456| 1| tracing::error!(?requested, "the requested buffer size is too big");
457| 1| return Err(io::ErrorKind::Unsupported.into());
458| 1.75k| }
459| 1.75k| Ok(())
460| 1.75k|}
461| |
462| |pub(crate) enum LoopAction<O> {
463| | /// Parse succeeded; return this value to the caller.
464| | Done(O),
465| | /// Need more bytes — call `fill_buf(reader, n)` then re-step.
466| | NeedFill(usize),
467| | /// Need to skip bytes — call `clear_and_skip(reader, n)` then re-step.
468| | Skip(usize),
469| | /// Parse failed permanently. Carries the structural-unit kind so
470| | /// the eventual `Error::Malformed` is labelled correctly.
471| | Failed {
472| | kind: crate::error::MalformedKind,
473| | message: String,
474| | },
475| |}
476| |
477| |/// Closure type passed to [`parse_loop_step`].
478| |pub(crate) type ParseFn<'a, O> =
479| | dyn FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState> + 'a;
480| |
481| |/// Drives one iteration of the parse-loop algorithm. Pure (no I/O).
482| 390|pub(crate) fn parse_loop_step<O>(
483| 390| buffer: &[u8],
484| 390| offset: usize,
485| 390| parsing_state: &mut Option<ParsingState>,
486| 390| parse: &mut ParseFn<'_, O>,
487| 390|) -> LoopAction<O> {
488| 390| match parse(buffer, offset, parsing_state.take()) {
489| 234| Ok(o) => LoopAction::Done(o),
490| 156| Err(es) => {
491| 156| *parsing_state = es.state;
492| 156| match es.err {
493| 73| ParsingError::Need(n) => LoopAction::NeedFill(n),
494| 81| ParsingError::ClearAndSkip(n) => LoopAction::Skip(n),
495| 2| ParsingError::Failed { kind, message } => LoopAction::Failed { kind, message },
496| | }
497| | }
498| | }
499| 390|}
500| |
501| |#[derive(Debug, Clone)]
502| |pub(crate) enum ParsingState {
503| | TiffHeader(TiffHeader),
504| | HeifExifSize(usize),
505| | Cr3ExifSize(usize),
506| | /// PNG chunk walker has already validated the 8-byte signature.
507| | /// Carried across `Need` / `ClearAndSkip` retries so the resumed
508| | /// call doesn't re-check signature against a mid-stream slice.
509| | PngPastSignature,
510| |}
511| |
512| |impl Display for ParsingState {
513| 0| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
514| 0| match self {
515| 0| ParsingState::TiffHeader(h) => Display::fmt(&format!("ParsingState: {h:?})"), f),
516| 0| ParsingState::HeifExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
517| 0| ParsingState::Cr3ExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
518| 0| ParsingState::PngPastSignature => f.write_str("ParsingState: PngPastSignature"),
519| | }
520| 0| }
521| |}
522| |
523| |// Modern replacement for the `Load` trait in loader.rs. Adds offset-aware
524| |// parsing and `ParsingState` threading for format-specific state machines.
525| |pub(crate) trait BufParser: Buf + Debug {
526| | fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize>;
527| |
528| 152| fn load_and_parse<R: Read, P, O>(
529| 152| &mut self,
530| 152| reader: &mut R,
531| 152| skip_by_seek: SkipBySeekFn<R>,
532| 152| mut parse: P,
533| 152| ) -> Result<O, ParsedError>
534| 152| where
535| 152| P: FnMut(&[u8], Option<ParsingState>) -> Result<O, ParsingErrorState>,
536| | {
537| 152| self.load_and_parse_with_offset(
538| 152| reader,
539| 152| skip_by_seek,
540| 247| |data, _, state| parse(data, state),
541| | 0,
542| | )
543| 152| }
544| |
545| | #[tracing::instrument(skip_all)]
546| 152| fn load_and_parse_with_offset<R: Read, P, O>(
547| 152| &mut self,
548| 152| reader: &mut R,
549| 152| skip_by_seek: SkipBySeekFn<R>,
550| 152| mut parse: P,
551| 152| offset: usize,
552| 152| ) -> Result<O, ParsedError>
553| 152| where
554| 152| P: FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState>,
555| | {
556| 152| if offset >= self.buffer().len() {
557| 0| self.fill_buf(reader, MIN_GROW_SIZE)?;
558| 152| }
559| 152| let mut parsing_state: Option<ParsingState> = None;
560| | loop {
561| 247| match parse_loop_step(self.buffer(), offset, &mut parsing_state, &mut parse) {
562| 144| LoopAction::Done(o) => return Ok(o),
563| 63| LoopAction::NeedFill(needed) => {
564| 63| let to_read = max(needed, MIN_GROW_SIZE);
565| 63| let n = self.fill_buf(reader, to_read)?;
^59 ^4
566| 59| if n == 0 {
567| 0| return Err(ParsedError::NoEnoughBytes);
568| 59| }
569| | }
570| 38| LoopAction::Skip(n) => {
571| 38| self.clear_and_skip(reader, skip_by_seek, n)?;
^2
572| | }
573| 2| LoopAction::Failed { kind, message } => {
574| 2| return Err(ParsedError::Failed { kind, message })
575| | }
576| | }
577| | }
578| 152| }
579| |
580| | #[tracing::instrument(skip(reader, skip_by_seek))]
581| 38| fn clear_and_skip<R: Read>(
582| 38| &mut self,
583| 38| reader: &mut R,
584| 38| skip_by_seek: SkipBySeekFn<R>,
585| 38| n: usize,
586| 38| ) -> Result<(), ParsedError> {
587| 38| match clear_and_skip_decide(self.buffer().len(), n) {
588| | SkipPlan::AdvanceOnly => {
589| 0| self.set_position(self.position() + n);
590| 0| Ok(())
591| | }
592| 38| SkipPlan::ClearAndSkip { extra: skip_n } => {
593| 38| self.clear();
594| 38| let done = (skip_by_seek)(
^37
595| 38| reader,
596| 38| skip_n.try_into().map_err(|_| ParsedError::Failed {
597| | // No format context available here: the parser
598| | // hit an internal limit honoring a caller's skip.
599| | // Pick a sensible default — see #55 follow-up.
600| 0| kind: crate::error::MalformedKind::IsoBmffBox,
601| 0| message: "skip too many bytes".into(),
602| 0| })?,
603| 1| )?;
604| 37| if !done {
605| 13| let mut skipped = 0;
606| 25| while skipped < skip_n {
607| 13| let mut to_skip = skip_n - skipped;
608| 13| to_skip = min(to_skip, MAX_PARSE_BUF_SIZE);
609| 13| let n = self.fill_buf(reader, to_skip)?;
^12 ^1
610| 12| skipped += n;
611| 12| if skipped <= skip_n {
612| 12| self.clear();
613| 12| } else {
614| 0| let remain = skipped - skip_n;
615| 0| self.set_position(self.buffer().len() - remain);
616| 0| break;
617| | }
618| | }
619| 24| }
620| |
621| 36| if self.buffer().is_empty() {
622| 36| self.fill_buf(reader, MIN_GROW_SIZE)?;
^0
623| 0| }
624| 36| Ok(())
625| | }
626| | }
627| 38| }
628| |}
629| |
630| |impl BufParser for MediaParser {
631| | #[tracing::instrument(skip(self, reader), fields(buf_len=self.state.buffer().len()))]
632| 1.37k| fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize> {
633| 1.37k| if self.state.is_memory_mode() {
634| | // Memory mode owns every byte it will ever have. A request for
635| | // more is "the parser walked off the end of the input"; surface
636| | // it the same way the streaming path surfaces a 0-byte read.
637| 2| return Err(std::io::ErrorKind::UnexpectedEof.into());
638| 1.37k| }
639| 1.37k| check_fill_size(self.state.buf().len(), size)?;
^0
640| |
641| | // Do not pre-allocate `size` bytes: a crafted box header can declare a
642| | // huge extended size (up to MAX_PARSE_BUF_SIZE) that far exceeds the actual
643| | // stream length. reserve_exact would allocate that memory immediately
644| | // even when the reader has only a few bytes left. read_to_end grows the
645| | // buffer from the reader's actual size hint instead.
646| 1.37k| let n = reader.take(size as u64).read_to_end(self.state.buf_mut())?;
^0
647| 1.37k| if n == 0 {
648| 13| tracing::error!(buf_len = self.state.buf().len(), "fill_buf: EOF");
^11 ^11
649| 13| return Err(std::io::ErrorKind::UnexpectedEof.into());
650| 1.35k| }
651| |
652| 1.35k| tracing::debug!(
653| | ?size,
654| | ?n,
655| 0| buf_len = self.state.buf().len(),
656| | "fill_buf: read bytes"
657| | );
658| |
659| 1.35k| Ok(n)
660| 1.37k| }
661| |}
662| |
663| |impl Buf for MediaParser {
664| 2.21k| fn buffer(&self) -> &[u8] {
665| 2.21k| self.state.buffer()
666| 2.21k| }
667| |
668| 109| fn clear(&mut self) {
669| 109| self.state.clear();
670| 109| }
671| |
672| 0| fn set_position(&mut self, pos: usize) {
673| 0| self.state.set_position(pos);
674| 0| }
675| |
676| 0| fn position(&self) -> usize {
677| 0| self.state.position()
678| 0| }
679| |}
680| |
681| |/// A `MediaParser` can parse media info from a [`MediaSource`].
682| |///
683| |/// `MediaParser` manages inner parse buffers that can be shared between
684| |/// multiple parsing tasks, thus avoiding frequent memory allocations.
685| |///
686| |/// Therefore:
687| |///
688| |/// - Try to reuse a `MediaParser` instead of creating a new one every time
689| |/// you need it.
690| |///
691| |/// - `MediaSource` should be created directly from `Read`, not from `BufRead`.
692| |///
693| |/// ## Example
694| |///
695| |/// ```rust
696| |/// use nom_exif::*;
697| |/// use chrono::DateTime;
698| |///
699| |/// let mut parser = MediaParser::new();
700| |///
701| |/// // ------------------- Parse Exif Info
702| |/// let ms = MediaSource::open("./testdata/exif.heic").unwrap();
703| |/// assert_eq!(ms.kind(), MediaKind::Image);
704| |/// let mut iter = parser.parse_exif(ms).unwrap();
705| |///
706| |/// let entry = iter.next().unwrap();
707| |/// assert!(matches!(entry.tag(), nom_exif::TagOrCode::Tag(ExifTag::Make)));
708| |/// assert_eq!(entry.value().unwrap().as_str().unwrap(), "Apple");
709| |///
710| |/// // Convert `ExifIter` into an `Exif`. Clone it before converting, so that
711| |/// // we can start the iteration from the beginning.
712| |/// let exif: Exif = iter.clone().into();
713| |/// assert_eq!(exif.get(ExifTag::Make).unwrap().as_str().unwrap(), "Apple");
714| |///
715| |/// // ------------------- Parse Track Info
716| |/// let ms = MediaSource::open("./testdata/meta.mov").unwrap();
717| |/// assert_eq!(ms.kind(), MediaKind::Track);
718| |/// let info = parser.parse_track(ms).unwrap();
719| |///
720| |/// assert_eq!(info.get(TrackInfoTag::Make), Some(&"Apple".into()));
721| |/// assert_eq!(info.get(TrackInfoTag::Model), Some(&"iPhone X".into()));
722| |/// assert_eq!(info.get(TrackInfoTag::GpsIso6709), Some(&"+27.1281+100.2508+000.000/".into()));
723| |/// assert_eq!(info.gps_info().unwrap().latitude_ref, LatRef::North);
724| |/// assert_eq!(
725| |/// info.gps_info().unwrap().latitude,
726| |/// LatLng::new(URational::new(27, 1), URational::new(7, 1), URational::new(4116, 100)),
727| |/// );
728| |/// ```
729| |pub struct MediaParser {
730| | state: BufferedParserState,
731| |}
732| |
733| |impl Debug for MediaParser {
734| 148| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
735| 148| f.debug_struct("MediaParser")
736| 148| .field("state", &self.state)
737| 148| .finish_non_exhaustive()
738| 148| }
739| |}
740| |
741| |impl Default for MediaParser {
742| 165| fn default() -> Self {
743| 165| Self {
744| 165| state: BufferedParserState::new(),
745| 165| }
746| 165| }
747| |}
748| |
749| |pub(crate) trait ShareBuf {
750| | /// Take ownership of the parser's active buffer and return the full
751| | /// allocation as `Bytes` plus the parser's `position` at share-time.
752| | /// Caller is responsible for slicing: a parse-loop range `r` corresponds
753| | /// to absolute range `(r.start + position)..(r.end + position)`.
754| | fn share_buf(&mut self) -> (bytes::Bytes, usize);
755| |}
756| |
757| |impl ShareBuf for MediaParser {
758| 91| fn share_buf(&mut self) -> (bytes::Bytes, usize) {
759| 91| self.state.share_buf()
760| 91| }
761| |}
762| |
763| |impl MediaParser {
764| 165| pub fn new() -> Self {
765| 165| Self::default()
766| 165| }
767| |
768| | /// Parse Exif metadata from an image source. Returns `Error::ExifNotFound`
769| | /// if the source is a `Track` (use [`Self::parse_track`] instead).
770| | ///
771| | /// As of v3.3, this method also accepts memory-mode sources built via
772| | /// [`MediaSource::from_memory`]. The deprecated [`Self::parse_exif_from_bytes`]
773| | /// is now a thin adapter that delegates here.
774| | ///
775| | /// `MediaParser` reuses its internal parse buffer across calls, so prefer
776| | /// reusing a single `MediaParser` over creating a new one per file. Drop
777| | /// the returned [`ExifIter`] (or convert it into [`crate::Exif`]) before
778| | /// the next `parse_*` call so the buffer can be reclaimed.
779| 82| pub fn parse_exif<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<ExifIter> {
780| 82| self.reset();
781| 82| let res: crate::Result<ExifIter> = (|| {
782| 82| if let Some(memory) = ms.memory.take() {
^15
783| | // Memory-mode: zero-copy share of caller-owned bytes.
784| 15| self.state.set_memory(memory);
785| 15| if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
^2
786| 2| return Err(crate::Error::ExifNotFound);
787| 13| }
788| 13| crate::exif::parse_exif_iter(
789| 13| self,
790| 13| ms.mime.unwrap_image(),
791| 13| &mut ms.reader,
792| 13| ms.skip_by_seek,
793| | )
794| | } else {
795| | // Streaming-mode: existing path verbatim.
796| 67| self.acquire_buf();
797| 67| self.buf_mut().append(&mut ms.buf);
798| | // PNG-only EOF tolerance: a tEXt-only PNG can be smaller
799| | // than HEADER_PARSE_BUF_SIZE (e.g. 117-byte text-only.png),
800| | // so the mime-detection prefill consumes the whole reader
801| | // and fill_buf returns UnexpectedEof. The bytes we need
802| | // are already in the parse buffer — proceed. Other formats
803| | // keep the strict-EOF contract.
804| 67| let is_png = matches!(
^60
805| 66| ms.mime,
806| | crate::file::MediaMime::Image(crate::file::MediaMimeImage::Png)
807| | );
808| 67| match self.fill_buf(&mut ms.reader, INIT_BUF_SIZE) {
809| 63| Ok(_) => {}
810| 2| Err(e)
811| 4| if is_png
812| 2| && !self.buffer().is_empty()
813| 2| && e.kind() == io::ErrorKind::UnexpectedEof => {}
814| 2| Err(e) => return Err(e.into()),
815| | }
816| 65| if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
^1
817| 1| return Err(crate::Error::ExifNotFound);
818| 64| }
819| 64| crate::exif::parse_exif_iter(
820| 64| self,
821| 64| ms.mime.unwrap_image(),
822| 64| &mut ms.reader,
823| 64| ms.skip_by_seek,
824| | )
825| | }
826| | })();
827| 82| self.reset();
828| 82| res
829| 82| }
830| |
831| | /// Parse track info from a video/audio source.
832| | ///
833| | /// Parse track info from a video/audio source.
834| | ///
835| | /// In v3.1, this also accepts JPEG images that carry an embedded
836| | /// Pixel/Google Motion Photo trailer. As of v3.3, it also accepts
837| | /// memory-mode sources built via [`MediaSource::from_memory`]; the
838| | /// deprecated [`Self::parse_track_from_bytes`] is now a thin
839| | /// adapter that delegates here.
840| 73| pub fn parse_track<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<TrackInfo> {
841| 73| self.reset();
842| 73| let res: crate::Result<TrackInfo> = (|| {
843| 73| if let Some(memory) = ms.memory.take() {
^9
844| | // Memory mode: zero-copy.
845| 9| self.state.set_memory(memory);
846| 9| let mime_track = match ms.mime {
^7
847| 2| crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
848| 7| crate::file::MediaMime::Track(t) => t,
849| | };
850| 7| let out = self.load_and_parse(&mut ms.reader, ms.skip_by_seek, |data, _| {
851| 7| crate::video::parse_track_info(data, mime_track)
852| 7| .map_err(|e| ParsingErrorState::new(e, None))
^0 ^0 ^0
853| 7| })?;
^0
854| 7| Ok(out)
855| | } else {
856| | // Streaming mode: existing path verbatim.
857| 64| self.acquire_buf();
858| 64| self.buf_mut().append(&mut ms.buf);
859| 64| self.fill_buf(&mut ms.reader, INIT_BUF_SIZE)?;
^1
860| 5| match ms.mime {
861| | crate::file::MediaMime::Image(crate::file::MediaMimeImage::Jpeg) => {
862| 3| self.parse_jpeg_motion_photo(&mut ms.reader)
863| | }
864| 2| crate::file::MediaMime::Image(_) => Err(crate::Error::TrackNotFound),
865| 58| crate::file::MediaMime::Track(mime_track) => {
866| 58| let skip = ms.skip_by_seek;
867| 91| Ok(self.load_and_parse(ms.reader.by_ref(), skip, |data, _| {
^58 ^58 ^58 ^58
868| 91| crate::video::parse_track_info(data, mime_track)
869| 91| .map_err(|e| ParsingErrorState::new(e, None))
^37 ^37^37
870| 91| })?)
^4
871| | }
872| | }
873| | }
874| | })();
875| 73| self.reset();
876| 73| res
877| 73| }
878| |
879| | /// Read a JPEG to EOF, locate a Pixel-style Motion Photo MP4 trailer,
880| | /// and parse it as track metadata. Returns
881| | /// [`crate::Error::TrackNotFound`] if no Motion Photo signal is
882| | /// present in the JPEG's XMP.
883| 3| fn parse_jpeg_motion_photo<R: Read>(&mut self, reader: &mut R) -> crate::Result<TrackInfo> {
884| | // Drain the rest of the JPEG into the parse buffer so we can
885| | // address the trailing MP4 by its byte offset from EOF.
886| 3| reader.read_to_end(self.buf_mut())?;
^0
887| 3| let buf = self.buf_mut();
888| 3| let Some(offset) = crate::jpeg::find_motion_photo_offset(buf) else {
^1
889| 2| return Err(crate::Error::TrackNotFound);
890| | };
891| 1| let trailer_start = (buf.len() as u64)
892| 1| .checked_sub(offset)
893| 1| .ok_or(crate::Error::TrackNotFound)? as usize;
^0
894| 1| let trailer = &buf[trailer_start..];
895| |
896| | // The trailer can be MP4 / MOV / 3gp depending on the source device;
897| | // dispatch by sniffing it as a fresh ISO BMFF input.
898| 1| let trailer_mime =
899| 1| crate::file::MediaMime::try_from(trailer).map_err(|_| crate::Error::TrackNotFound)?;
^0
900| 1| let mime_track = match trailer_mime {
901| 1| crate::file::MediaMime::Track(t) => t,
902| 0| crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
903| | };
904| 1| crate::video::parse_track_info(trailer, mime_track).map_err(|e| match e {
^0
905| | crate::error::ParsingError::Need(_) | crate::error::ParsingError::ClearAndSkip(_) => {
906| 0| crate::Error::UnexpectedEof {
907| 0| context: "motion-photo trailer",
908| 0| }
909| | }
910| 0| crate::error::ParsingError::Failed { kind, message } => {
911| 0| crate::Error::Malformed { kind, message }
912| | }
913| 0| })
914| 3| }
915| |
916| | /// Parse Exif metadata from an in-memory byte payload built via
917| | /// the deprecated [`MediaSource::<()>::from_bytes`].
918| | ///
919| | /// **Deprecated since v3.3.0**: use [`Self::parse_exif`] with
920| | /// [`MediaSource::from_memory`] directly.
921| | #[deprecated(
922| | since = "3.3.0",
923| | note = "Use `parse_exif` directly — it now accepts memory-mode \
924| | sources built via `MediaSource::from_memory`."
925| | )]
926| 6| pub fn parse_exif_from_bytes(&mut self, ms: MediaSource<()>) -> crate::Result<ExifIter> {
927| 6| self.parse_exif(ms.into_empty())
928| 6| }
929| |
930| | /// **Deprecated since v3.3.0**: use [`Self::parse_track`] with
931| | /// [`MediaSource::from_memory`] directly.
932| | #[deprecated(
933| | since = "3.3.0",
934| | note = "Use `parse_track` with `MediaSource::from_memory`."
935| | )]
936| 4| pub fn parse_track_from_bytes(&mut self, ms: MediaSource<()>) -> crate::Result<TrackInfo> {
937| 4| self.parse_track(ms.into_empty())
938| 4| }
939| |
940| | /// Parse all metadata from an image source: EXIF (if any) and
941| | /// format-specific extras (PNG `tEXt` chunks, etc.).
942| | ///
943| | /// Returns `Err(Error::ExifNotFound)` if neither EXIF nor any
944| | /// format-specific metadata is found. Returns
945| | /// `Err(Error::TrackNotFound)`-style errors on track inputs (use
946| | /// `parse_track` instead).
947| | ///
948| | /// **Lazy form** — this method returns `ImageMetadata<ExifIter>`.
949| | /// Convert to the eager `ImageMetadata<Exif>` via `.into()` if
950| | /// desired.
951| 11| pub fn parse_image_metadata<R: Read>(
952| 11| &mut self,
953| 11| mut ms: MediaSource<R>,
954| 11| ) -> crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> {
955| 11| self.reset();
956| 11| let res: crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> = (|| {
957| | // Reject track inputs early (parse_track is the right API).
958| 11| let mime_img = match ms.mime {
^10
959| 10| crate::file::MediaMime::Image(img) => img,
960| 1| crate::file::MediaMime::Track(_) => return Err(crate::Error::ExifNotFound),
961| | };
962| |
963| | // Memory-mode shortcut + buffer setup mirrors parse_exif.
964| 10| if let Some(memory) = ms.memory.take() {
^2
965| 2| self.state.set_memory(memory);
966| 2| } else {
967| 8| self.acquire_buf();
968| 8| self.buf_mut().append(&mut ms.buf);
969| | // PNG-only EOF tolerance: a tEXt-only PNG can be smaller than
970| | // HEADER_PARSE_BUF_SIZE (e.g. 117-byte text-only.png) so the
971| | // mime-detection prefill consumes the whole reader and
972| | // fill_buf returns UnexpectedEof. The bytes we need are
973| | // already in the parse buffer — proceed. Other formats keep
974| | // the strict-EOF contract.
975| 8| let is_png = mime_img == crate::file::MediaMimeImage::Png;
976| 8| match self.fill_buf(&mut ms.reader, INIT_BUF_SIZE) {
977| 6| Ok(_) => {}
978| 2| Err(e)
979| 2| if is_png
980| 2| && !self.buffer().is_empty()
981| 2| && e.kind() == io::ErrorKind::UnexpectedEof => {}
982| 0| Err(e) => return Err(e.into()),
983| | }
984| | }
985| |
986| 10| if mime_img == crate::file::MediaMimeImage::Png {
987| 8| let (exif, text_chunks) =
988| 8| crate::exif::parse_png_full(self, &mut ms.reader, ms.skip_by_seek)?;
^0
989| 8| let format = if text_chunks.is_empty() {
990| 2| None
991| | } else {
992| 6| Some(crate::ImageFormatMetadata::Png(crate::PngTextChunks {
993| 6| entries: text_chunks,
994| 6| }))
995| | };
996| 8| if exif.is_none() && format.is_none() {
^4 ^4
997| 2| return Err(crate::Error::ExifNotFound);
998| 6| }
999| 6| Ok(crate::ImageMetadata { exif, format })
1000| | } else {
1001| | // Non-PNG: existing parse_exif_iter path; format always None.
1002| 2| let iter =
1003| 2| crate::exif::parse_exif_iter(self, mime_img, &mut ms.reader, ms.skip_by_seek)?;
^0
1004| 2| Ok(crate::ImageMetadata {
1005| 2| exif: Some(iter),
1006| 2| format: None,
1007| 2| })
1008| | }
1009| | })(
1010| | );
1011| 11| self.reset();
1012| 11| res
1013| 11| }
1014| |
1015| 520| fn reset(&mut self) {
1016| 520| self.state.reset();
1017| 520| }
1018| |
1019| 234| fn buf_mut(&mut self) -> &mut Vec<u8> {
1020| 234| self.state.buf_mut()
1021| 234| }
1022| |
1023| 228| fn acquire_buf(&mut self) {
1024| 228| self.state.acquire_buf();
1025| 228| }
1026| |}
1027| |
1028| |#[cfg(feature = "tokio")]
1029| |mod tokio_impl {
1030| | use super::*;
1031| | use crate::error::ParsingErrorState;
1032| | use crate::parser_async::{AsyncBufParser, AsyncMediaSource};
1033| | use tokio::io::{AsyncRead, AsyncReadExt};
1034| |
1035| | impl AsyncBufParser for MediaParser {
1036| 382| async fn fill_buf<R: AsyncRead + Unpin>(
1037| 382| &mut self,
1038| 382| reader: &mut R,
1039| 382| size: usize,
1040| 382| ) -> std::io::Result<usize> {
1041| 382| if self.state.is_memory_mode() {
1042| | // Memory mode owns every byte it will ever have. Surface
1043| | // "walked off end of input" the same way the streaming path
1044| | // surfaces a 0-byte read.
1045| 0| return Err(std::io::ErrorKind::UnexpectedEof.into());
1046| 382| }
1047| 382| check_fill_size(self.state.buf().len(), size)?;
^0
1048| | // Same rationale as the sync version: do not pre-allocate `size` bytes.
1049| 382| let n = reader
1050| 382| .take(size as u64)
1051| 382| .read_to_end(self.state.buf_mut())
1052| 382| .await?;
^0
1053| 382| if n == 0 {
1054| 6| return Err(std::io::ErrorKind::UnexpectedEof.into());
1055| 376| }
1056| 376| Ok(n)
1057| 382| }
1058| | }
1059| |
1060| | impl MediaParser {
1061| | /// Parse Exif metadata from an async image source. Returns
1062| | /// `Error::ExifNotFound` if the source is a `Track`.
1063| | ///
1064| | /// As of v3.3, also accepts memory-mode sources built via
1065| | /// [`AsyncMediaSource::from_memory`]; the memory branch shares
1066| | /// caller-owned `Bytes` zero-copy through `state.set_memory`.
1067| 21| pub async fn parse_exif_async<R: AsyncRead + Unpin + Send>(
1068| 21| &mut self,
1069| 21| mut ms: AsyncMediaSource<R>,
1070| 21| ) -> crate::Result<ExifIter> {
1071| 21| self.reset();
1072| 21| let res: crate::Result<ExifIter> = async {
1073| 21| if let Some(memory) = ms.memory.take() {
^2
1074| | // Memory-mode: zero-copy share of caller-owned bytes.
1075| 2| self.state.set_memory(memory);
1076| 2| if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
^0
1077| 0| return Err(crate::Error::ExifNotFound);
1078| 2| }
1079| 2| crate::exif::parse_exif_iter_async(
1080| 2| self,
1081| 2| ms.mime.unwrap_image(),
1082| 2| &mut ms.reader,
1083| 2| ms.skip_by_seek,
1084| 2| )
1085| 2| .await
1086| | } else {
1087| 19| self.acquire_buf();
1088| 19| self.buf_mut().append(&mut ms.buf);
1089| | // PNG-only EOF tolerance mirrors the sync path: small
1090| | // tEXt-only PNGs (<HEADER_PARSE_BUF_SIZE) are fully
1091| | // consumed during mime detection, so fill_buf returns
1092| | // UnexpectedEof. The bytes are already in the parse
1093| | // buffer; proceed.
1094| 19| let is_png = matches!(
^18
1095| 19| ms.mime,
1096| | crate::file::MediaMime::Image(crate::file::MediaMimeImage::Png)
1097| | );
1098| 19| match <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE)
1099| 19| .await
1100| | {
1101| 16| Ok(_) => {}
1102| 1| Err(e)
1103| 3| if is_png
1104| 1| && !self.buffer().is_empty()
1105| 1| && e.kind() == io::ErrorKind::UnexpectedEof => {}
1106| 2| Err(e) => return Err(e.into()),
1107| | }
1108| 17| if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
^0
1109| 0| return Err(crate::Error::ExifNotFound);
1110| 17| }
1111| 17| crate::exif::parse_exif_iter_async(
1112| 17| self,
1113| 17| ms.mime.unwrap_image(),
1114| 17| &mut ms.reader,
1115| 17| ms.skip_by_seek,
1116| 17| )
1117| 17| .await
1118| | }
1119| 21| }
1120| 21| .await;
1121| 21| self.reset();
1122| 21| res
1123| 21| }
1124| |
1125| 6| pub async fn parse_image_metadata_async<R: AsyncRead + Unpin + Send>(
1126| 6| &mut self,
1127| 6| mut ms: AsyncMediaSource<R>,
1128| 6| ) -> crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> {
1129| 6| self.reset();
1130| 6| let res: crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> = async {
1131| 6| let mime_img = match ms.mime {
1132| 6| crate::file::MediaMime::Image(img) => img,
1133| 0| crate::file::MediaMime::Track(_) => return Err(crate::Error::ExifNotFound),
1134| | };
1135| |
1136| 6| if let Some(memory) = ms.memory.take() {
^2
1137| 2| self.state.set_memory(memory);
1138| 2| } else {
1139| 4| self.acquire_buf();
1140| 4| self.buf_mut().append(&mut ms.buf);
1141| | // PNG-only EOF tolerance mirrors the sync path: small
1142| | // tEXt-only PNGs (<HEADER_PARSE_BUF_SIZE) are fully
1143| | // consumed during mime detection, so fill_buf returns
1144| | // UnexpectedEof; the bytes we need are already in the
1145| | // parse buffer.
1146| 4| let is_png = mime_img == crate::file::MediaMimeImage::Png;
1147| 4| match <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE)
1148| 4| .await
1149| | {
1150| 3| Ok(_) => {}
1151| 1| Err(e)
1152| 1| if is_png
1153| 1| && !self.buffer().is_empty()
1154| 1| && e.kind() == io::ErrorKind::UnexpectedEof => {}
1155| 0| Err(e) => return Err(e.into()),
1156| | }
1157| | }
1158| |
1159| 6| if mime_img == crate::file::MediaMimeImage::Png {
1160| 5| let (exif, text_chunks) =
1161| 5| crate::exif::parse_png_full_async(self, &mut ms.reader, ms.skip_by_seek)
1162| 5| .await?;
^0
1163| 5| let format = if text_chunks.is_empty() {
1164| 1| None
1165| | } else {
1166| 4| Some(crate::ImageFormatMetadata::Png(crate::PngTextChunks {
1167| 4| entries: text_chunks,
1168| 4| }))
1169| | };
1170| 5| if exif.is_none() && format.is_none() {
^3 ^3
1171| 1| return Err(crate::Error::ExifNotFound);
1172| 4| }
1173| 4| Ok(crate::image_metadata::ImageMetadata { exif, format })
1174| | } else {
1175| 1| let iter = crate::exif::parse_exif_iter_async(
1176| 1| self,
1177| 1| mime_img,
1178| 1| &mut ms.reader,
1179| 1| ms.skip_by_seek,
1180| 1| )
1181| 1| .await?;
^0
1182| 1| Ok(crate::image_metadata::ImageMetadata {
1183| 1| exif: Some(iter),
1184| 1| format: None,
1185| 1| })
1186| | }
1187| 6| }
1188| 6| .await;
1189| 6| self.reset();
1190| 6| res
1191| 6| }
1192| |
1193| | /// Parse track info from an async video/audio source. Returns
1194| | /// `Error::TrackNotFound` if the source is an `Image`.
1195| | ///
1196| | /// As of v3.3, also accepts memory-mode sources built via
1197| | /// [`AsyncMediaSource::from_memory`].
1198| 67| pub async fn parse_track_async<R: AsyncRead + Unpin + Send>(
1199| 67| &mut self,
1200| 67| mut ms: AsyncMediaSource<R>,
1201| 67| ) -> crate::Result<TrackInfo> {
1202| 67| self.reset();
1203| 67| let res: crate::Result<TrackInfo> = async {
1204| 67| let mime_track = match ms.mime {
1205| 0| crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
1206| 67| crate::file::MediaMime::Track(t) => t,
1207| | };
1208| 67| if let Some(memory) = ms.memory.take() {
^1
1209| 1| self.state.set_memory(memory);
1210| 1| } else {
1211| 66| self.acquire_buf();
1212| 66| self.buf_mut().append(&mut ms.buf);
1213| 66| <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE).await?;
^2
1214| | }
1215| 65| let skip = ms.skip_by_seek;
1216| 65| let out = <Self as AsyncBufParser>::load_and_parse(
1217| 65| self,
1218| 65| &mut ms.reader,
1219| 65| skip,
1220| 107| |data, _| {
1221| 107| crate::video::parse_track_info(data, mime_track)
1222| 107| .map_err(|e| ParsingErrorState::new(e, None))
^42 ^42^42
1223| 107| },
1224| | )
1225| 65| .await?;
^0
1226| 65| Ok(out)
1227| 67| }
1228| 67| .await;
1229| 67| self.reset();
1230| 67| res
1231| 67| }
1232| | }
1233| |}
1234| |
1235| |#[cfg(test)]
1236| |mod tests {
1237| | use std::sync::{LazyLock, Mutex, MutexGuard};
1238| |
1239| | use super::*;
1240| | use test_case::case;
1241| |
1242| | enum TrackExif {
1243| | Track,
1244| | Exif,
1245| | NoData,
1246| | Invalid,
1247| | }
1248| | use TrackExif::*;
1249| |
1250| 1| static PARSER: LazyLock<Mutex<MediaParser>> = LazyLock::new(|| Mutex::new(MediaParser::new()));
1251| 51| fn parser() -> MutexGuard<'static, MediaParser> {
1252| 51| PARSER.lock().unwrap()
1253| 51| }
1254| |
1255| | #[case("3gp_640x360.3gp", Track)]
1256| | #[case("broken.jpg", Exif)]
1257| | #[case("compatible-brands-fail.heic", Invalid)]
1258| | #[case("compatible-brands-fail.mov", Invalid)]
1259| | #[case("compatible-brands.heic", NoData)]
1260| | #[case("compatible-brands.mov", NoData)]
1261| | #[case("embedded-in-heic.mov", Track)]
1262| | #[case("exif.heic", Exif)]
1263| | #[case("exif.jpg", Exif)]
1264| | #[case("exif-no-tz.jpg", Exif)]
1265| | #[case("fujifilm_x_t1_01.raf.meta", Exif)]
1266| | #[case("meta.mov", Track)]
1267| | #[case("meta.mp4", Track)]
1268| | #[case("mka.mka", Track)]
1269| | #[case("mkv_640x360.mkv", Track)]
1270| | #[case("exif-one-entry.heic", Exif)]
1271| | #[case("no-exif.jpg", NoData)]
1272| | #[case("tif.tif", Exif)]
1273| | #[case("ramdisk.img", Invalid)]
1274| | #[case("webm_480.webm", Track)]
1275| 20| fn parse_media(path: &str, te: TrackExif) {
1276| 20| let mut parser = parser();
1277| 20| let ms = MediaSource::open(Path::new("testdata").join(path));
1278| 20| match te {
1279| | Track => {
1280| 7| let ms = ms.unwrap();
1281| 7| assert_eq!(ms.kind(), MediaKind::Track);
1282| 7| let _: TrackInfo = parser.parse_track(ms).unwrap();
1283| | }
1284| | Exif => {
1285| 7| let ms = ms.unwrap();
1286| 7| assert_eq!(ms.kind(), MediaKind::Image);
1287| 7| let mut it: ExifIter = parser.parse_exif(ms).unwrap();
1288| 7| let _ = it.parse_gps();
1289| |
1290| 7| if path.contains("one-entry") {
1291| 1| assert!(it.next().is_some());
1292| 1| assert!(it.next().is_none());
1293| |
1294| 1| let exif: crate::Exif = it.clone_rewound().into();
1295| 1| assert!(exif.get(ExifTag::Orientation).is_some());
1296| 6| } else {
1297| 6| let _: crate::Exif = it.clone_rewound().into();
1298| 6| }
1299| | }
1300| | NoData => {
1301| 3| let ms = ms.unwrap();
1302| 3| match ms.kind() {
1303| 2| MediaKind::Image => {
1304| 2| let res = parser.parse_exif(ms);
1305| 2| res.unwrap_err();
1306| 2| }
1307| 1| MediaKind::Track => {
1308| 1| let res = parser.parse_track(ms);
1309| 1| res.unwrap_err();
1310| 1| }
1311| | }
1312| | }
1313| 3| Invalid => {
1314| 3| ms.unwrap_err();
1315| 3| }
1316| | }
1317| 20| }
1318| |
1319| | use crate::testkit::open_sample;
1320| | use crate::{EntryValue, Exif, ExifTag, TrackInfoTag};
1321| | use chrono::{DateTime, FixedOffset, NaiveDateTime};
1322| | use test_case::test_case;
1323| |
1324| | #[test_case("exif.jpg", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2023-07-09T20:36:33+08:00", "%+").unwrap().into())]
1325| | #[test_case("exif.heic", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2022-07-22T21:26:32+08:00", "%+").unwrap().into())]
1326| | #[test_case("exif.jpg", ExifTag::DateTimeOriginal,
1327| | (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(),
1328| | Some(FixedOffset::east_opt(8*3600).unwrap())).into())]
1329| | #[test_case("exif-no-tz.jpg", ExifTag::DateTimeOriginal,
1330| | (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), None).into())]
1331| 4| fn parse_exif(path: &str, tag: ExifTag, v: EntryValue) {
1332| 4| let mut parser = parser();
1333| |
1334| 4| let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1335| 4| assert_eq!(mf.kind(), MediaKind::Image);
1336| 4| let iter: ExifIter = parser.parse_exif(mf).unwrap();
1337| 4| let exif: Exif = iter.into();
1338| 4| assert_eq!(exif.get(tag).unwrap(), &v);
1339| |
1340| 4| let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1341| 4| assert_eq!(mf.kind(), MediaKind::Image);
1342| 4| let iter: ExifIter = parser.parse_exif(mf).unwrap();
1343| 4| let exif: Exif = iter.into();
1344| 4| assert_eq!(exif.get(tag).unwrap(), &v);
1345| 4| }
1346| |
1347| | use crate::video::TrackInfoTag::*;
1348| |
1349| | #[test_case("mkv_640x360.mkv", Width, 640_u32.into())]
1350| | #[test_case("mkv_640x360.mkv", Height, 360_u32.into())]
1351| | #[test_case("mkv_640x360.mkv", DurationMs, 13346_u64.into())]
1352| | #[test_case("mkv_640x360.mkv", CreateDate, DateTime::parse_from_str("2008-08-08T08:08:08Z", "%+").unwrap().into())]
1353| | #[test_case("meta.mov", Make, "Apple".into())]
1354| | #[test_case("meta.mov", Model, "iPhone X".into())]
1355| | #[test_case("meta.mov", GpsIso6709, "+27.1281+100.2508+000.000/".into())]
1356| | #[test_case("meta.mov", CreateDate, DateTime::parse_from_str("2019-02-12T15:27:12+08:00", "%+").unwrap().into())]
1357| | #[test_case("meta.mp4", Width, 1920_u32.into())]
1358| | #[test_case("meta.mp4", Height, 1080_u32.into())]
1359| | #[test_case("meta.mp4", DurationMs, 1063_u64.into())]
1360| | #[test_case("meta.mp4", GpsIso6709, "+27.2939+112.6932/".into())]
1361| | #[test_case("meta.mp4", CreateDate, DateTime::parse_from_str("2024-02-03T07:05:38Z", "%+").unwrap().into())]
1362| | #[test_case("udta.auth.mp4", Author, "ReplayKitRecording".into(); "udta author")]
1363| | #[test_case("auth.mov", Author, "ReplayKitRecording".into(); "mov author")]
1364| | #[test_case("sony-a7-xavc.MP4", Width, 1920_u32.into())]
1365| | #[test_case("sony-a7-xavc.MP4", Height, 1080_u32.into())]
1366| | #[test_case("sony-a7-xavc.MP4", DurationMs, 1440_u64.into())]
1367| | #[test_case("sony-a7-xavc.MP4", CreateDate, DateTime::parse_from_str("2026-04-26T09:25:15+00:00", "%+").unwrap().into())]
1368| 19| fn parse_track_info(path: &str, tag: TrackInfoTag, v: EntryValue) {
1369| 19| let mut parser = parser();
1370| |
1371| 19| let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1372| 19| let info: TrackInfo = parser.parse_track(mf).unwrap();
1373| 19| assert_eq!(info.get(tag).unwrap(), &v);
1374| |
1375| 19| let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1376| 19| let info: TrackInfo = parser.parse_track(mf).unwrap();
1377| 19| assert_eq!(info.get(tag).unwrap(), &v);
1378| 19| }
1379| |
1380| | #[test_case("crash_moov-trak")]
1381| | #[test_case("crash_skip_large")]
1382| | #[test_case("crash_add_large")]
1383| 3| fn parse_track_crash(path: &str) {
1384| 3| let mut parser = parser();
1385| |
1386| 3| let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1387| 3| let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1388| |
1389| 3| let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1390| 3| let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1391| 3| }
1392| |
1393| | // Regression: a crafted ISOBMFF file declares an extended 64-bit box size
1394| | // just under MAX_PARSE_BUF_SIZE (~1 GB). Pre-fix, the unseekable parser called
1395| | // reserve_exact() with that size before reading, allocating ~1 GB even when
1396| | // the actual stream contained only a few KB. See commit 81f9e8a.
1397| | #[test]
1398| 1| fn parse_oom_large_box() {
1399| 1| let mut parser = parser();
1400| |
1401| 1| let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1402| 1| let _: Result<ExifIter, _> = parser.parse_exif(mf);
1403| |
1404| 1| let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1405| 1| let _: Result<ExifIter, _> = parser.parse_exif(mf);
1406| |
1407| 1| let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1408| 1| let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1409| |
1410| 1| let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1411| 1| let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1412| 1| }
1413| |
1414| | #[test]
1415| 1| fn media_kind_classifies_image_and_track() {
1416| 1| let img = MediaSource::open("testdata/exif.jpg").unwrap();
1417| 1| assert_eq!(img.kind(), MediaKind::Image);
1418| |
1419| 1| let trk = MediaSource::open("testdata/meta.mov").unwrap();
1420| 1| assert_eq!(trk.kind(), MediaKind::Track);
1421| 1| }
1422| |
1423| | #[test]
1424| 1| fn media_source_open() {
1425| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1426| 1| assert_eq!(ms.kind(), MediaKind::Image);
1427| 1| }
1428| |
1429| | #[test]
1430| 1| fn parse_exif_returns_exif_iter() {
1431| 1| let mut parser = parser();
1432| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1433| 1| let _: ExifIter = parser.parse_exif(ms).unwrap();
1434| 1| }
1435| |
1436| | #[test]
1437| 1| fn parse_track_returns_track_info() {
1438| 1| let mut parser = parser();
1439| 1| let ms = MediaSource::open("testdata/meta.mov").unwrap();
1440| 1| let _: TrackInfo = parser.parse_track(ms).unwrap();
1441| 1| }
1442| |
1443| | #[test]
1444| 1| fn parse_exif_on_track_returns_exif_not_found_v3() {
1445| 1| let mut parser = parser();
1446| 1| let ms = MediaSource::open("testdata/meta.mov").unwrap();
1447| 1| let res = parser.parse_exif(ms);
1448| 1| assert!(matches!(res, Err(crate::Error::ExifNotFound)));
^0
1449| 1| }
1450| |
1451| | #[test]
1452| 1| fn parse_track_on_image_returns_track_not_found_v3() {
1453| 1| let mut parser = parser();
1454| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1455| 1| let res = parser.parse_track(ms);
1456| 1| assert!(matches!(res, Err(crate::Error::TrackNotFound)));
^0
1457| 1| }
1458| |
1459| | #[cfg(feature = "tokio")]
1460| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1461| 1| async fn media_parser_parse_exif_async() {
1462| | use crate::parser_async::AsyncMediaSource;
1463| 1| let mut parser = MediaParser::new();
1464| 1| let ms = AsyncMediaSource::open("testdata/exif.jpg").await.unwrap();
1465| 1| let _: ExifIter = parser.parse_exif_async(ms).await.unwrap();
1466| 1| }
1467| |
1468| | #[cfg(feature = "tokio")]
1469| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1470| 1| async fn media_parser_parse_track_async() {
1471| | use crate::parser_async::AsyncMediaSource;
1472| 1| let mut parser = MediaParser::new();
1473| 1| let ms = AsyncMediaSource::open("testdata/meta.mov").await.unwrap();
1474| 1| let _: TrackInfo = parser.parse_track_async(ms).await.unwrap();
1475| 1| }
1476| |
1477| | #[test]
1478| 1| fn parser_recycles_alloc_when_exif_iter_dropped() {
1479| 1| let mut parser = MediaParser::new();
1480| |
1481| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1482| 1| let iter = parser.parse_exif(ms).unwrap();
1483| 1| let exif: crate::Exif = iter.into();
1484| 1| drop(exif);
1485| 1| let ptr_after_first = parser.state.cached_ptr_for_test();
1486| |
1487| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1488| 1| let iter = parser.parse_exif(ms).unwrap();
1489| 1| let _exif: crate::Exif = iter.into();
1490| 1| let ptr_after_second = parser.state.cached_ptr_for_test();
1491| |
1492| 1| assert!(
1493| 1| ptr_after_first.is_some() && ptr_after_first == ptr_after_second,
1494| | "expected recycled allocation, got {:?} -> {:?}",
1495| | ptr_after_first,
1496| | ptr_after_second
1497| | );
1498| 1| }
1499| |
1500| | #[test]
1501| 1| fn parser_new_does_no_upfront_allocation() {
1502| 1| let parser = MediaParser::new();
1503| 1| assert!(parser.state.cached_ptr_for_test().is_none());
1504| 1| assert!(parser.state.buf_is_none_for_test());
1505| 1| }
1506| |
1507| | #[test]
1508| 1| fn buffered_state_memory_mode_sets_and_reads() {
1509| 1| let mut s = BufferedParserState::new();
1510| 1| s.set_memory(bytes::Bytes::from_static(b"abcdefgh"));
1511| 1| assert!(s.is_memory_mode());
1512| 1| assert_eq!(s.buffer(), b"abcdefgh");
1513| 1| s.set_position(3);
1514| 1| assert_eq!(s.buffer(), b"defgh");
1515| 1| }
1516| |
1517| | #[test]
1518| 1| fn buffered_state_share_buf_memory_mode_is_zero_copy() {
1519| 1| let original = bytes::Bytes::from_static(b"the parser owns nothing here");
1520| 1| let original_ptr = original.as_ptr();
1521| 1| let mut s = BufferedParserState::new();
1522| 1| s.set_memory(original);
1523| 1| let (shared, position) = s.share_buf();
1524| 1| assert_eq!(position, 0);
1525| 1| assert_eq!(
1526| 1| shared.as_ptr(),
1527| | original_ptr,
1528| | "memory share must be a Bytes::clone, not a Vec round-trip"
1529| | );
1530| | // After share_buf, the parser's memory slot is taken — leaving the state
1531| | // ready for the next `reset()` cycle.
1532| 1| assert!(!s.is_memory_mode());
1533| 1| }
1534| |
1535| | #[test]
1536| 1| fn buffered_state_reset_clears_memory() {
1537| 1| let mut s = BufferedParserState::new();
1538| 1| s.set_memory(bytes::Bytes::from_static(b"x"));
1539| 1| s.reset();
1540| 1| assert!(!s.is_memory_mode());
1541| 1| assert_eq!(s.position, 0);
1542| 1| }
1543| |
1544| | #[test]
1545| 1| fn buffered_state_acquire_buf_skips_in_memory_mode() {
1546| 1| let mut s = BufferedParserState::new();
1547| 1| s.set_memory(bytes::Bytes::from_static(b"data"));
1548| 1| s.acquire_buf();
1549| | // No streaming buf was allocated.
1550| 1| assert!(s.buf.is_none());
1551| | // Memory still readable.
1552| 1| assert_eq!(s.buffer(), b"data");
1553| 1| }
1554| |
1555| | #[test]
1556| 1| fn media_source_from_memory_image_jpg() {
1557| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
1558| 1| let ms = MediaSource::from_memory(raw).unwrap();
1559| 1| assert_eq!(ms.kind(), MediaKind::Image);
1560| 1| assert!(ms.memory.is_some());
1561| 1| }
1562| |
1563| | #[test]
1564| 1| fn media_source_from_memory_track_mov() {
1565| 1| let raw = std::fs::read("testdata/meta.mov").unwrap();
1566| 1| let ms = MediaSource::from_memory(raw).unwrap();
1567| 1| assert_eq!(ms.kind(), MediaKind::Track);
1568| 1| }
1569| |
1570| | #[test]
1571| 1| fn media_source_from_memory_static_slice() {
1572| 1| let raw: &'static [u8] = include_bytes!("../testdata/exif.jpg");
1573| 1| let ms = MediaSource::from_memory(raw).unwrap();
1574| 1| assert_eq!(ms.kind(), MediaKind::Image);
1575| 1| }
1576| |
1577| | #[test]
1578| 1| fn media_source_from_memory_rejects_too_short() {
1579| 1| let raw = vec![0u8; 4];
1580| 1| let res = MediaSource::from_memory(raw);
1581| 1| assert!(res.is_err());
1582| 1| }
1583| |
1584| | #[test]
1585| 1| fn media_source_from_memory_rejects_unknown_mime() {
1586| 1| let raw = vec![0xAAu8; 256];
1587| 1| let res = MediaSource::from_memory(raw);
1588| 1| assert!(res.is_err());
1589| 1| }
1590| |
1591| | #[test]
1592| 1| fn parse_exif_unified_from_memory_jpg() {
1593| 1| let mut parser = MediaParser::new();
1594| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
1595| 1| let ms = MediaSource::from_memory(raw).unwrap();
1596| 1| let iter = parser.parse_exif(ms).unwrap();
1597| 1| let exif: crate::Exif = iter.into();
1598| 1| assert!(exif.get(crate::ExifTag::Make).is_some());
1599| 1| }
1600| |
1601| | #[test]
1602| 1| fn parse_exif_unified_from_memory_heic() {
1603| 1| let mut parser = MediaParser::new();
1604| 1| let raw = std::fs::read("testdata/exif.heic").unwrap();
1605| 1| let ms = MediaSource::from_memory(raw).unwrap();
1606| 1| let iter = parser.parse_exif(ms).unwrap();
1607| 1| let exif: crate::Exif = iter.into();
1608| 1| assert_eq!(
1609| 1| exif.get(crate::ExifTag::Make).and_then(|v| v.as_str()),
1610| | Some("Apple")
1611| | );
1612| 1| }
1613| |
1614| | #[test]
1615| 1| fn parse_exif_unified_from_memory_zero_copy_preserved() {
1616| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
1617| 1| let bytes = bytes::Bytes::from(raw);
1618| 1| let _original_ptr = bytes.as_ptr();
1619| |
1620| 1| let mut parser = MediaParser::new();
1621| 1| let ms = MediaSource::from_memory(bytes).unwrap();
1622| 1| let iter = parser.parse_exif(ms).unwrap();
1623| |
1624| | // Memory mode must not poison the recycle cache — same invariant
1625| | // the old parse_exif_from_bytes route asserts.
1626| 1| assert!(
1627| 1| parser.state.cached_ptr_for_test().is_none(),
1628| | "memory mode must not write to the streaming-buf recycle cache"
1629| | );
1630| 1| drop(iter);
1631| 1| }
1632| |
1633| | #[test]
1634| 1| fn parse_exif_unified_on_track_returns_exif_not_found() {
1635| 1| let mut parser = MediaParser::new();
1636| 1| let raw = std::fs::read("testdata/meta.mov").unwrap();
1637| 1| let ms = MediaSource::from_memory(raw).unwrap();
1638| 1| let res = parser.parse_exif(ms);
1639| 1| assert!(matches!(res, Err(crate::Error::ExifNotFound)));
^0
1640| 1| }
1641| |
1642| | #[test]
1643| 1| fn parse_exif_unified_on_truncated_returns_io_error() {
1644| 1| let mut raw = std::fs::read("testdata/exif.jpg").unwrap();
1645| 1| raw.truncate(200);
1646| 1| let mut parser = MediaParser::new();
1647| 1| let ms = MediaSource::from_memory(raw).unwrap();
1648| 1| let res = parser.parse_exif(ms);
1649| 1| assert!(
1650| 1| res.is_err(),
1651| | "expected error on truncated bytes, got {:?}",
1652| | res
1653| | );
1654| 1| }
1655| |
1656| | #[test]
1657| 1| fn parse_exif_streaming_tiny_non_png_keeps_strict_eof() {
1658| | // The PNG EOF-tolerance branch is scoped to PNG. A tiny non-PNG file
1659| | // (here: the 36-byte compatible-brands.heic fixture) consumes its
1660| | // entire reader during mime-detection prefill, so the subsequent
1661| | // fill_buf hits UnexpectedEof. parse_exif must surface an error
1662| | // (any error) — never Ok — for non-PNG inputs.
1663| 1| let mut parser = MediaParser::new();
1664| 1| let ms = MediaSource::open("testdata/compatible-brands.heic").unwrap();
1665| 1| let res = parser.parse_exif(ms);
1666| 1| assert!(res.is_err(), "expected Err for tiny HEIC, got {:?}", res);
1667| 1| }
1668| |
1669| | #[test]
1670| | #[allow(deprecated)]
1671| 1| fn media_source_from_bytes_image_jpg() {
1672| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
1673| 1| let ms = MediaSource::from_bytes(raw).unwrap();
1674| 1| assert_eq!(ms.kind(), MediaKind::Image);
1675| 1| assert!(ms.memory.is_some());
1676| 1| }
1677| |
1678| | #[test]
1679| | #[allow(deprecated)]
1680| 1| fn media_source_from_bytes_track_mov() {
1681| 1| let raw = std::fs::read("testdata/meta.mov").unwrap();
1682| 1| let ms = MediaSource::from_bytes(raw).unwrap();
1683| 1| assert_eq!(ms.kind(), MediaKind::Track);
1684| 1| }
1685| |
1686| | #[test]
1687| | #[allow(deprecated)]
1688| 1| fn media_source_from_bytes_static_slice() {
1689| | // &'static [u8] should work via Into<Bytes> because the file is read
1690| | // into a Vec at compile-time-friendly size; here we use include_bytes.
1691| 1| let raw: &'static [u8] = include_bytes!("../testdata/exif.jpg");
1692| 1| let ms = MediaSource::from_bytes(raw).unwrap();
1693| 1| assert_eq!(ms.kind(), MediaKind::Image);
1694| 1| }
1695| |
1696| | #[test]
1697| | #[allow(deprecated)]
1698| 1| fn media_source_from_bytes_rejects_too_short() {
1699| | // Below the smallest mime signature length: should fail mime detection.
1700| 1| let raw = vec![0u8; 4];
1701| 1| let res = MediaSource::from_bytes(raw);
1702| 1| assert!(res.is_err(), "expected mime-detection error");
1703| 1| }
1704| |
1705| | #[test]
1706| | #[allow(deprecated)]
1707| 1| fn media_source_from_bytes_rejects_unknown_mime() {
1708| | // Random bytes long enough to trigger detection but not match any
1709| | // signature.
1710| 1| let raw = vec![0xAAu8; 256];
1711| 1| let res = MediaSource::from_bytes(raw);
1712| 1| assert!(
1713| 1| res.is_err(),
1714| | "expected mime-detection error for unknown bytes"
1715| | );
1716| 1| }
1717| |
1718| | #[test]
1719| 1| fn p4_5_baseline_exif_jpg_full_dump() {
1720| | // Lock down the post-refactor invariant: parsing testdata/exif.jpg through
1721| | // the public API must yield the same set of (ifd, tag, value) triples
1722| | // before and after P4.5. We capture them as a sorted, formatted string so
1723| | // the assertion is a single literal comparison.
1724| 1| let mut parser = MediaParser::new();
1725| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1726| 1| let iter: ExifIter = parser.parse_exif(ms).unwrap();
1727| |
1728| 1| let mut entries: Vec<String> = iter
1729| 66| .map(|e| {
^1
1730| 66| let tag_name = match e.tag() {
1731| 66| crate::TagOrCode::Tag(t) => format!("{t}"),
1732| 0| crate::TagOrCode::Unknown(c) => format!("0x{c:04x}"),
1733| | };
1734| 66| let value_str = e
1735| 66| .value()
1736| 66| .map(|v| format!("{v}"))
1737| 66| .unwrap_or_else(|| "<err>".into());
^0 ^0
1738| 66| format!("{}.{}={:?}", e.ifd(), tag_name, value_str)
1739| 66| })
1740| 1| .collect();
1741| 1| entries.sort();
1742| 1| let snapshot = entries.join("\n");
1743| |
1744| | // Sanity: should produce non-trivial content. Exact content is checked by
1745| | // the existing parse_media tests; this one guards against accidental
1746| | // re-ordering / dedup changes during the refactor.
1747| 1| assert!(
1748| 1| entries.len() > 5,
1749| | "expected >5 entries, got {}",
1750| 0| entries.len()
1751| | );
1752| 1| assert!(snapshot.contains("Make"), "expected Make tag in snapshot");
1753| 1| }
1754| |
1755| | #[test]
1756| | #[allow(deprecated)]
1757| 1| fn parse_exif_from_bytes_jpg_basic() {
1758| 1| let mut parser = MediaParser::new();
1759| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
1760| 1| let ms = MediaSource::from_bytes(raw).unwrap();
1761| 1| let iter = parser.parse_exif_from_bytes(ms).unwrap();
1762| 1| let exif: crate::Exif = iter.into();
1763| 1| assert!(exif.get(crate::ExifTag::Make).is_some());
1764| 1| }
1765| |
1766| | #[test]
1767| | #[allow(deprecated)]
1768| 1| fn parse_exif_from_bytes_heic_basic() {
1769| 1| let mut parser = MediaParser::new();
1770| 1| let raw = std::fs::read("testdata/exif.heic").unwrap();
1771| 1| let ms = MediaSource::from_bytes(raw).unwrap();
1772| 1| let iter = parser.parse_exif_from_bytes(ms).unwrap();
1773| 1| let exif: crate::Exif = iter.into();
1774| 1| assert_eq!(
1775| 1| exif.get(crate::ExifTag::Make).and_then(|v| v.as_str()),
1776| | Some("Apple")
1777| | );
1778| 1| }
1779| |
1780| | #[test]
1781| | #[allow(deprecated)]
1782| 1| fn parse_exif_from_bytes_zero_copy_shared_bytes() {
1783| | // Build a Bytes whose pointer we can compare. The ExifIter's underlying
1784| | // share must point to the same allocation — proving Bytes::clone path.
1785| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
1786| 1| let bytes = bytes::Bytes::from(raw);
1787| 1| let original_ptr = bytes.as_ptr();
1788| |
1789| 1| let mut parser = MediaParser::new();
1790| 1| let ms = MediaSource::from_bytes(bytes).unwrap();
1791| 1| let iter = parser.parse_exif_from_bytes(ms).unwrap();
1792| |
1793| | // The cached pointer in parser state should be None in memory mode
1794| | // (memory mode does not write to cache — the user owns the alloc).
1795| 1| assert!(
1796| 1| parser.state.cached_ptr_for_test().is_none(),
1797| | "memory mode must not poison the recycle cache"
1798| | );
1799| |
1800| | // Drop the iter and confirm parser is clean for the next call.
1801| 1| drop(iter);
1802| |
1803| | // Build again; pointer identity proves we did not duplicate the alloc
1804| | // anywhere along the parse path.
1805| 1| let bytes2 = bytes::Bytes::from(std::fs::read("testdata/exif.jpg").unwrap());
1806| 1| let ms2 = MediaSource::from_bytes(bytes2.clone()).unwrap();
1807| 1| let _iter2 = parser.parse_exif_from_bytes(ms2).unwrap();
1808| | // (We cannot assert pointer-equality across distinct user Bytes; the
1809| | // assertion above on the first parse is the load-bearing one.)
1810| 1| let _ = original_ptr; // explicit: original_ptr is the assertion target.
1811| 1| }
1812| |
1813| | #[test]
1814| | #[allow(deprecated)]
1815| 1| fn parse_exif_from_bytes_on_track_returns_exif_not_found() {
1816| 1| let mut parser = MediaParser::new();
1817| 1| let raw = std::fs::read("testdata/meta.mov").unwrap();
1818| 1| let ms = MediaSource::from_bytes(raw).unwrap();
1819| 1| let res = parser.parse_exif_from_bytes(ms);
1820| 1| assert!(matches!(res, Err(crate::Error::ExifNotFound)));
^0
1821| 1| }
1822| |
1823| | #[test]
1824| | #[allow(deprecated)]
1825| 1| fn parse_exif_from_bytes_on_truncated_returns_io_error() {
1826| | // Truncate exif.jpg to just enough for mime detection but too short
1827| | // for the full EXIF block. Memory-mode fill_buf must surface
1828| | // UnexpectedEof when the parser walks off the end.
1829| 1| let mut raw = std::fs::read("testdata/exif.jpg").unwrap();
1830| 1| raw.truncate(200);
1831| 1| let mut parser = MediaParser::new();
1832| 1| let ms = MediaSource::from_bytes(raw).unwrap();
1833| 1| let res = parser.parse_exif_from_bytes(ms);
1834| 1| assert!(
1835| 1| res.is_err(),
1836| | "expected error on truncated bytes, got {:?}",
1837| | res
1838| | );
1839| 1| }
1840| |
1841| | #[test]
1842| | #[allow(deprecated)]
1843| 1| fn parse_track_from_bytes_mov_basic() {
1844| 1| let mut parser = MediaParser::new();
1845| 1| let raw = std::fs::read("testdata/meta.mov").unwrap();
1846| 1| let ms = MediaSource::from_bytes(raw).unwrap();
1847| 1| let info = parser.parse_track_from_bytes(ms).unwrap();
1848| 1| assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
1849| 1| assert_eq!(
1850| 1| info.get(crate::TrackInfoTag::Model),
1851| 1| Some(&"iPhone X".into())
1852| | );
1853| 1| }
1854| |
1855| | #[test]
1856| | #[allow(deprecated)]
1857| 1| fn parse_track_from_bytes_mp4_basic() {
1858| 1| let mut parser = MediaParser::new();
1859| 1| let raw = std::fs::read("testdata/meta.mp4").unwrap();
1860| 1| let ms = MediaSource::from_bytes(raw).unwrap();
1861| 1| let info = parser.parse_track_from_bytes(ms).unwrap();
1862| 1| assert!(info.get(crate::TrackInfoTag::CreateDate).is_some());
1863| 1| }
1864| |
1865| | #[test]
1866| | #[allow(deprecated)]
1867| 1| fn parse_track_from_bytes_mkv_basic() {
1868| 1| let mut parser = MediaParser::new();
1869| 1| let raw = std::fs::read("testdata/mkv_640x360.mkv").unwrap();
1870| 1| let ms = MediaSource::from_bytes(raw).unwrap();
1871| 1| let info = parser.parse_track_from_bytes(ms).unwrap();
1872| 1| assert_eq!(
1873| 1| info.get(crate::TrackInfoTag::Width),
1874| 1| Some(&(640_u32.into()))
1875| | );
1876| 1| }
1877| |
1878| | #[test]
1879| | #[allow(deprecated)]
1880| 1| fn parse_track_from_bytes_on_image_returns_track_not_found() {
1881| 1| let mut parser = MediaParser::new();
1882| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
1883| 1| let ms = MediaSource::from_bytes(raw).unwrap();
1884| 1| let res = parser.parse_track_from_bytes(ms);
1885| 1| assert!(matches!(res, Err(crate::Error::TrackNotFound)));
^0
1886| 1| }
1887| |
1888| | #[test]
1889| 1| fn parse_track_unified_from_memory_mov() {
1890| 1| let mut parser = MediaParser::new();
1891| 1| let raw = std::fs::read("testdata/meta.mov").unwrap();
1892| 1| let ms = MediaSource::from_memory(raw).unwrap();
1893| 1| let info = parser.parse_track(ms).unwrap();
1894| 1| assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
1895| 1| }
1896| |
1897| | #[test]
1898| 1| fn parse_track_unified_from_memory_mp4() {
1899| 1| let mut parser = MediaParser::new();
1900| 1| let raw = std::fs::read("testdata/meta.mp4").unwrap();
1901| 1| let ms = MediaSource::from_memory(raw).unwrap();
1902| 1| let info = parser.parse_track(ms).unwrap();
1903| 1| assert!(info.get(crate::TrackInfoTag::CreateDate).is_some());
1904| 1| }
1905| |
1906| | #[test]
1907| 1| fn parse_track_unified_on_image_returns_track_not_found() {
1908| 1| let mut parser = MediaParser::new();
1909| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
1910| 1| let ms = MediaSource::from_memory(raw).unwrap();
1911| 1| let res = parser.parse_track(ms);
1912| 1| assert!(matches!(res, Err(crate::Error::TrackNotFound)));
^0
1913| 1| }
1914| |
1915| | #[test]
1916| 1| fn parse_image_metadata_jpeg_returns_exif_only() {
1917| 1| let mut parser = MediaParser::new();
1918| 1| let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1919| 1| let img = parser.parse_image_metadata(ms).unwrap();
1920| 1| assert!(img.exif.is_some());
1921| 1| assert!(img.format.is_none());
1922| 1| }
1923| |
1924| | #[test]
1925| 1| fn parse_image_metadata_jpeg_from_memory() {
1926| 1| let mut parser = MediaParser::new();
1927| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
1928| 1| let ms = MediaSource::from_memory(raw).unwrap();
1929| 1| let img = parser.parse_image_metadata(ms).unwrap();
1930| 1| assert!(img.exif.is_some());
1931| 1| assert!(img.format.is_none());
1932| 1| }
1933| |
1934| | #[test]
1935| 1| fn parse_image_metadata_on_track_returns_exif_not_found() {
1936| 1| let mut parser = MediaParser::new();
1937| 1| let ms = MediaSource::open("testdata/meta.mov").unwrap();
1938| 1| let res = parser.parse_image_metadata(ms);
1939| 1| assert!(matches!(res, Err(crate::Error::ExifNotFound)));
^0
1940| 1| }
1941| |
1942| | /// Regression for issue #55. PNGs whose IDAT body exceeds
1943| | /// `INIT_BUF_SIZE` force the chunk walker through `ClearAndSkip`;
1944| | /// on retry the parse buffer no longer starts at byte 0 of the
1945| | /// file, and the signature recheck must not fire.
1946| | #[test]
1947| 1| fn parse_image_metadata_png_large_idat_streaming() {
1948| | use std::io::Cursor;
1949| 1| let png = build_png_with_large_idat(INIT_BUF_SIZE + 1024);
1950| 1| let mut parser = MediaParser::new();
1951| 1| let ms = MediaSource::seekable(Cursor::new(png)).unwrap();
1952| 1| assert_eq!(ms.kind(), crate::MediaKind::Image);
1953| 1| let res = parser.parse_image_metadata(ms);
1954| 1| assert!(
1955| 1| matches!(res, Err(crate::Error::ExifNotFound)),
^0
1956| | "expected ExifNotFound on PNG with no EXIF / tEXt; got {res:?}"
1957| | );
1958| 1| }
1959| |
1960| | /// Same regression via `parse_exif` — exercises `parse_png_exif_iter`,
1961| | /// the sibling code path to `parse_png_full`.
1962| | #[test]
1963| 1| fn parse_exif_png_large_idat_streaming() {
1964| | use std::io::Cursor;
1965| 1| let png = build_png_with_large_idat(INIT_BUF_SIZE + 1024);
1966| 1| let mut parser = MediaParser::new();
1967| 1| let ms = MediaSource::seekable(Cursor::new(png)).unwrap();
1968| 1| let res = parser.parse_exif(ms);
1969| 1| assert!(
1970| 1| matches!(res, Err(crate::Error::ExifNotFound)),
^0
1971| | "expected ExifNotFound; got {res:?}"
1972| | );
1973| 1| }
1974| |
1975| | /// Same regression via `MediaSource::unseekable` — exercises the
1976| | /// read-and-discard skip path in `clear_and_skip` rather than the
1977| | /// `seek_relative` shortcut.
1978| | #[test]
1979| 1| fn parse_image_metadata_png_large_idat_unseekable() {
1980| 1| let png = build_png_with_large_idat(INIT_BUF_SIZE + 1024);
1981| 1| let mut parser = MediaParser::new();
1982| 1| let ms = MediaSource::unseekable(NoSeek(std::io::Cursor::new(png))).unwrap();
1983| 1| let res = parser.parse_image_metadata(ms);
1984| 1| assert!(
1985| 1| matches!(res, Err(crate::Error::ExifNotFound)),
^0
1986| | "expected ExifNotFound; got {res:?}"
1987| | );
1988| 1| }
1989| |
1990| | /// Wraps a reader to hide its `Seek` impl, so `MediaSource::unseekable`
1991| | /// is forced even when the underlying type happens to implement it.
1992| | struct NoSeek<R>(R);
1993| | impl<R: io::Read> io::Read for NoSeek<R> {
1994| 7| fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1995| 7| self.0.read(buf)
1996| 7| }
1997| | }
1998| |
1999| 4| fn build_png_with_large_idat(idat_body: usize) -> Vec<u8> {
2000| 4| let mut out = Vec::new();
2001| 4| out.extend_from_slice(b"\x89PNG\r\n\x1a\n");
2002| | // IHDR (1x1, 8-bit grayscale)
2003| 4| out.extend_from_slice(&13u32.to_be_bytes());
2004| 4| out.extend_from_slice(b"IHDR");
2005| 4| out.extend_from_slice(&[0, 0, 0, 1, 0, 0, 0, 1, 8, 0, 0, 0, 0]);
2006| 4| out.extend_from_slice(&[0, 0, 0, 0]); // CRC (chunk walker ignores it)
2007| | // IDAT
2008| 4| out.extend_from_slice(&(idat_body as u32).to_be_bytes());
2009| 4| out.extend_from_slice(b"IDAT");
2010| 4| out.resize(out.len() + idat_body, 0);
2011| 4| out.extend_from_slice(&[0, 0, 0, 0]); // CRC
2012| | // IEND
2013| 4| out.extend_from_slice(&0u32.to_be_bytes());
2014| 4| out.extend_from_slice(b"IEND");
2015| 4| out.extend_from_slice(&[0, 0, 0, 0]); // CRC
2016| 4| out
2017| 4| }
2018| |
2019| | #[cfg(feature = "tokio")]
2020| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2021| 1| async fn parse_image_metadata_async_jpeg() {
2022| | use crate::parser_async::AsyncMediaSource;
2023| 1| let mut parser = MediaParser::new();
2024| 1| let ms = AsyncMediaSource::open("testdata/exif.jpg").await.unwrap();
2025| 1| let img = parser.parse_image_metadata_async(ms).await.unwrap();
2026| 1| assert!(img.exif.is_some());
2027| 1| assert!(img.format.is_none());
2028| 1| }
2029| |
2030| | #[cfg(feature = "tokio")]
2031| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2032| 1| async fn async_media_source_from_memory_image_jpg() {
2033| | use crate::parser_async::AsyncMediaSource;
2034| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
2035| 1| let ms = AsyncMediaSource::from_memory(raw).unwrap();
2036| 1| assert_eq!(ms.kind(), MediaKind::Image);
2037| 1| assert!(ms.memory.is_some());
2038| 1| }
2039| |
2040| | #[cfg(feature = "tokio")]
2041| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2042| 1| async fn async_media_source_from_memory_track_mov() {
2043| | use crate::parser_async::AsyncMediaSource;
2044| 1| let raw = std::fs::read("testdata/meta.mov").unwrap();
2045| 1| let ms = AsyncMediaSource::from_memory(raw).unwrap();
2046| 1| assert_eq!(ms.kind(), MediaKind::Track);
2047| 1| }
2048| |
2049| | #[cfg(feature = "tokio")]
2050| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2051| 1| async fn async_media_source_from_memory_rejects_unknown_mime() {
2052| | use crate::parser_async::AsyncMediaSource;
2053| 1| let raw = vec![0xAAu8; 256];
2054| 1| let res = AsyncMediaSource::from_memory(raw);
2055| 1| assert!(res.is_err());
2056| 1| }
2057| |
2058| | #[cfg(feature = "tokio")]
2059| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2060| 1| async fn parse_exif_async_from_memory_jpg() {
2061| | use crate::parser_async::AsyncMediaSource;
2062| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
2063| 1| let mut parser = MediaParser::new();
2064| 1| let ms = AsyncMediaSource::from_memory(raw).unwrap();
2065| 1| let iter = parser.parse_exif_async(ms).await.unwrap();
2066| 1| let exif: crate::Exif = iter.into();
2067| 1| assert!(exif.get(crate::ExifTag::Make).is_some());
2068| 1| }
2069| |
2070| | #[cfg(feature = "tokio")]
2071| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2072| 1| async fn parse_exif_async_from_memory_zero_copy_preserved() {
2073| | use crate::parser_async::AsyncMediaSource;
2074| 1| let raw = std::fs::read("testdata/exif.jpg").unwrap();
2075| 1| let bytes = bytes::Bytes::from(raw);
2076| 1| let mut parser = MediaParser::new();
2077| 1| let ms = AsyncMediaSource::from_memory(bytes).unwrap();
2078| 1| let iter = parser.parse_exif_async(ms).await.unwrap();
2079| | // Memory mode must not poison the recycle cache — same invariant
2080| | // as the sync route asserts.
2081| 1| assert!(
2082| 1| parser.state.cached_ptr_for_test().is_none(),
2083| | "async memory mode must not write to the streaming-buf recycle cache"
2084| | );
2085| 1| drop(iter);
2086| 1| }
2087| |
2088| | #[cfg(feature = "tokio")]
2089| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2090| 1| async fn parse_track_async_from_memory_mov() {
2091| | use crate::parser_async::AsyncMediaSource;
2092| 1| let raw = std::fs::read("testdata/meta.mov").unwrap();
2093| 1| let mut parser = MediaParser::new();
2094| 1| let ms = AsyncMediaSource::from_memory(raw).unwrap();
2095| 1| let info = parser.parse_track_async(ms).await.unwrap();
2096| 1| assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
2097| 1| }
2098| |
2099| | #[cfg(feature = "tokio")]
2100| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2101| 1| async fn parse_image_metadata_async_from_memory_png() {
2102| | use crate::parser_async::AsyncMediaSource;
2103| 1| let raw = std::fs::read("testdata/exif.png").unwrap();
2104| 1| let mut parser = MediaParser::new();
2105| 1| let ms = AsyncMediaSource::from_memory(raw).unwrap();
2106| 1| let img = parser.parse_image_metadata_async(ms).await.unwrap();
2107| 1| assert!(img.exif.is_some());
2108| 1| assert!(img.format.is_some());
2109| 1| }
2110| |
2111| | /// Async counterpart of `parse_image_metadata_png_large_idat_streaming`
2112| | /// (regression for issue #55). `parser_async::clear_and_skip` is a
2113| | /// separate implementation; the state-threading through the shared
2114| | /// `parse_loop_step` must work identically in the async path.
2115| | #[cfg(feature = "tokio")]
2116| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2117| 1| async fn parse_image_metadata_async_png_large_idat_streaming() {
2118| | use crate::parser_async::AsyncMediaSource;
2119| 1| let png = build_png_with_large_idat(INIT_BUF_SIZE + 1024);
2120| | // tokio's async I/O traits aren't on std::io::Cursor, so route
2121| | // through a real file. Pick a unique path to avoid concurrent
2122| | // test collisions.
2123| 1| let path =
2124| 1| std::env::temp_dir().join(format!("nom-exif-issue55-{}.png", std::process::id()));
2125| 1| tokio::fs::write(&path, &png).await.unwrap();
2126| 1| let mut parser = MediaParser::new();
2127| 1| let ms = AsyncMediaSource::open(&path).await.unwrap();
2128| 1| let res = parser.parse_image_metadata_async(ms).await;
2129| 1| let _ = tokio::fs::remove_file(&path).await;
2130| 1| assert!(
2131| 1| matches!(res, Err(crate::Error::ExifNotFound)),
^0
2132| 1| "expected ExifNotFound; got {res:?}"
2133| 1| );
2134| 1| }
2135| |
2136| | #[cfg(feature = "tokio")]
2137| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2138| 1| async fn parse_image_metadata_async_from_memory_text_only_png() {
2139| | // Memory route: bypasses fill_buf entirely, just verifies the
2140| | // memory-mode path returns format-only metadata for a PNG with
2141| | // no EXIF.
2142| | use crate::parser_async::AsyncMediaSource;
2143| 1| let raw = std::fs::read("testdata/text-only.png").unwrap();
2144| 1| let mut parser = MediaParser::new();
2145| 1| let ms = AsyncMediaSource::from_memory(raw).unwrap();
2146| 1| let img = parser.parse_image_metadata_async(ms).await.unwrap();
2147| 1| assert!(img.exif.is_none());
2148| 1| assert!(img.format.is_some());
2149| 1| }
2150| |
2151| | // Streaming-path coverage for the PNG-scoped EOF tolerance. The
2152| | // 117-byte text-only.png is fully consumed during mime detection
2153| | // (HEADER_PARSE_BUF_SIZE = 128), so the parse-time fill_buf hits
2154| | // UnexpectedEof. The PNG-scoped tolerance must let the bytes already
2155| | // in the parse buffer drive the parse to completion. These tests
2156| | // would have caught the missed-async-tolerance bug the previous
2157| | // memory-mode tests did not.
2158| |
2159| | #[test]
2160| 1| fn parse_exif_streaming_text_only_png_returns_exif_not_found() {
2161| | // text-only.png has no EXIF — the contract is ExifNotFound, not
2162| | // UnexpectedEof. Pre-EOF-tolerance, this would surface
2163| | // UnexpectedEof because mime detection consumed all 117 bytes.
2164| 1| let mut parser = MediaParser::new();
2165| 1| let ms = MediaSource::open("testdata/text-only.png").unwrap();
2166| 1| let res = parser.parse_exif(ms);
2167| 1| assert!(
2168| 1| matches!(res, Err(crate::Error::ExifNotFound)),
^0
2169| | "expected ExifNotFound for tEXt-only PNG, got {:?}",
2170| | res
2171| | );
2172| 1| }
2173| |
2174| | #[test]
2175| 1| fn parse_image_metadata_streaming_text_only_png() {
2176| 1| let mut parser = MediaParser::new();
2177| 1| let ms = MediaSource::open("testdata/text-only.png").unwrap();
2178| 1| let img = parser.parse_image_metadata(ms).unwrap();
2179| 1| assert!(img.exif.is_none());
2180| 1| assert!(img.format.is_some());
2181| 1| }
2182| |
2183| | #[cfg(feature = "tokio")]
2184| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2185| 1| async fn parse_exif_async_streaming_text_only_png_returns_exif_not_found() {
2186| | use crate::parser_async::AsyncMediaSource;
2187| 1| let mut parser = MediaParser::new();
2188| 1| let f = tokio::fs::File::open("testdata/text-only.png")
2189| 1| .await
2190| 1| .unwrap();
2191| 1| let ms = AsyncMediaSource::seekable(f).await.unwrap();
2192| 1| let res = parser.parse_exif_async(ms).await;
2193| 1| assert!(
2194| 1| matches!(res, Err(crate::Error::ExifNotFound)),
^0
2195| 1| "expected ExifNotFound for tEXt-only PNG via async streaming, got {:?}",
2196| 1| res
2197| 1| );
2198| 1| }
2199| |
2200| | #[cfg(feature = "tokio")]
2201| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2202| 1| async fn parse_image_metadata_async_streaming_text_only_png() {
2203| | use crate::parser_async::AsyncMediaSource;
2204| 1| let mut parser = MediaParser::new();
2205| 1| let f = tokio::fs::File::open("testdata/text-only.png")
2206| 1| .await
2207| 1| .unwrap();
2208| 1| let ms = AsyncMediaSource::seekable(f).await.unwrap();
2209| 1| let img = parser.parse_image_metadata_async(ms).await.unwrap();
2210| 1| assert!(img.exif.is_none());
2211| 1| assert!(img.format.is_some());
2212| 1| }
2213| |}
/home/min/dev/nom-exif/src/parser_async.rs:
1| |use std::{
2| | cmp::{max, min},
3| | fmt::Debug,
4| | io::{self},
5| | path::Path,
6| |};
7| |
8| |use tokio::{
9| | fs::File,
10| | io::{AsyncRead, AsyncReadExt, AsyncSeek},
11| |};
12| |
13| |use crate::{
14| | error::{ParsedError, ParsingErrorState},
15| | parser::{
16| | clear_and_skip_decide, parse_loop_step, Buf, LoopAction, ParsingState, SkipPlan,
17| | MAX_PARSE_BUF_SIZE, MIN_GROW_SIZE,
18| | },
19| |};
20| |
21| |// Should be enough for parsing header
22| |const HEADER_PARSE_BUF_SIZE: usize = 128;
23| |
24| |/// Async counterpart to `crate::parser::SkipBySeekFn<R>`. Closures that
25| |/// return a future cannot coerce to a plain `fn` type, so we use a fn pointer
26| |/// to a `Pin<Box<dyn Future>>`-returning closure. The Box-per-skip overhead
27| |/// is trivial against actual async I/O.
28| |pub(crate) type AsyncSkipBySeekFn<R> = for<'a> fn(
29| | &'a mut R,
30| | u64,
31| |) -> std::pin::Pin<
32| | Box<dyn std::future::Future<Output = io::Result<bool>> + Send + 'a>,
33| |>;
34| |
35| |pub struct AsyncMediaSource<R> {
36| | pub(crate) reader: R,
37| | pub(crate) buf: Vec<u8>,
38| | pub(crate) mime: crate::file::MediaMime,
39| | pub(crate) skip_by_seek: AsyncSkipBySeekFn<R>,
40| | /// Set when this source was constructed via [`Self::from_memory`].
41| | /// The full payload lives here as a zero-copy [`bytes::Bytes`]; the
42| | /// async parse methods branch on this field to take the memory path
43| | /// instead of `fill_buf`-ing from `reader`.
44| | pub(crate) memory: Option<bytes::Bytes>,
45| |}
46| |
47| |impl<R> Debug for AsyncMediaSource<R> {
48| 0| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49| 0| f.debug_struct("AsyncMediaSource")
50| 0| .field("mime", &self.mime)
51| 0| .finish_non_exhaustive()
52| 0| }
53| |}
54| |
55| |impl<R: AsyncRead + Unpin> AsyncMediaSource<R> {
56| 98| async fn build(mut reader: R, skip_by_seek: AsyncSkipBySeekFn<R>) -> crate::Result<Self> {
57| 98| let mut buf = Vec::with_capacity(HEADER_PARSE_BUF_SIZE);
58| 98| (&mut reader)
59| 98| .take(HEADER_PARSE_BUF_SIZE as u64)
60| 98| .read_to_end(&mut buf)
61| 98| .await?;
^0
62| 98| let mime: crate::file::MediaMime = buf.as_slice().try_into()?;
^92 ^92 ^6
63| 92| Ok(Self {
64| 92| reader,
65| 92| buf,
66| 92| mime,
67| 92| skip_by_seek,
68| 92| memory: None,
69| 92| })
70| 98| }
71| |
72| 35| pub fn kind(&self) -> crate::MediaKind {
73| 35| match self.mime {
74| 17| crate::file::MediaMime::Image(_) => crate::MediaKind::Image,
75| 18| crate::file::MediaMime::Track(_) => crate::MediaKind::Track,
76| | }
77| 35| }
78| |}
79| |
80| |impl AsyncMediaSource<tokio::io::Empty> {
81| | /// Build an [`AsyncMediaSource`] from an in-memory byte payload.
82| | ///
83| | /// Async counterpart of [`crate::MediaSource::from_memory`]. Returns
84| | /// `AsyncMediaSource<tokio::io::Empty>`, which satisfies the
85| | /// `<R: AsyncRead + Unpin + Send>` bound on
86| | /// [`MediaParser::parse_exif_async`](crate::MediaParser::parse_exif_async),
87| | /// [`parse_track_async`](crate::MediaParser::parse_track_async), and
88| | /// [`parse_image_metadata_async`](crate::MediaParser::parse_image_metadata_async)
89| | /// so a single async entry point per "what to parse" handles both
90| | /// streaming and in-memory inputs.
91| | ///
92| | /// Accepts any type convertible into [`bytes::Bytes`] — `Bytes`,
93| | /// `Vec<u8>`, `&'static [u8]`, `String`, `Box<[u8]>`, plus HTTP-stack
94| | /// body types implementing `Into<Bytes>`. Zero-copy: parsed
95| | /// `ExifIter` / sub-IFDs share the original `Bytes` via reference
96| | /// counting, no copy.
97| | ///
98| | /// # Example
99| | ///
100| | /// ```rust,no_run
101| | /// # async fn run() -> Result<(), nom_exif::Error> {
102| | /// use nom_exif::{AsyncMediaSource, MediaKind, MediaParser};
103| | ///
104| | /// let bytes = tokio::fs::read("./testdata/exif.jpg").await?;
105| | /// let ms = AsyncMediaSource::from_memory(bytes)?;
106| | /// assert_eq!(ms.kind(), MediaKind::Image);
107| | ///
108| | /// let mut parser = MediaParser::new();
109| | /// let _iter = parser.parse_exif_async(ms).await?;
110| | /// # Ok(()) }
111| | /// ```
112| 8| pub fn from_memory(bytes: impl Into<bytes::Bytes>) -> crate::Result<Self> {
113| 8| let bytes = bytes.into();
114| 8| let head_end = bytes.len().min(HEADER_PARSE_BUF_SIZE);
115| 8| let mime: crate::file::MediaMime = bytes[..head_end].try_into()?;
^7 ^7 ^1
116| | Ok(Self {
117| 7| reader: tokio::io::empty(),
118| 7| buf: Vec::new(),
119| 7| mime,
120| | // Placeholder: never invoked in memory mode (AdvanceOnly path).
121| 0| skip_by_seek: |_, _| Box::pin(async move { Ok(false) }),
122| 7| memory: Some(bytes),
123| | })
124| 8| }
125| |}
126| |
127| 74|fn make_seekable_skip<R: AsyncRead + AsyncSeek + Unpin + Send>() -> AsyncSkipBySeekFn<R> {
128| 27| |r, n| {
129| 27| Box::pin(async move {
130| | use std::io::SeekFrom;
131| | use tokio::io::AsyncSeekExt;
132| 27| let signed: i64 = n
133| 27| .try_into()
134| 27| .map_err(|_| io::Error::from(io::ErrorKind::InvalidInput))?;
^0 ^0 ^0
135| 27| r.seek(SeekFrom::Current(signed)).await?;
^0
136| 27| Ok(true)
137| 27| })
138| 27| }
139| 74|}
140| |
141| 24|fn make_unseekable_skip<R: AsyncRead + Unpin + Send>() -> AsyncSkipBySeekFn<R> {
142| 16| |_, _| Box::pin(async move { Ok(false) })
143| 24|}
144| |
145| |impl<R: AsyncRead + AsyncSeek + Unpin + Send> AsyncMediaSource<R> {
146| 74| pub async fn seekable(reader: R) -> crate::Result<Self> {
147| 74| Self::build(reader, make_seekable_skip::<R>()).await
148| 74| }
149| |}
150| |
151| |impl<R: AsyncRead + Unpin + Send> AsyncMediaSource<R> {
152| 24| pub async fn unseekable(reader: R) -> crate::Result<Self> {
153| 24| Self::build(reader, make_unseekable_skip::<R>()).await
154| 24| }
155| |}
156| |
157| |impl AsyncMediaSource<File> {
158| | /// Open a file at `path` (via `tokio::fs::File`) and parse its header.
159| | /// For an already-open async `File` use [`Self::seekable`].
160| 45| pub async fn open<P: AsRef<Path>>(path: P) -> crate::Result<Self> {
161| 45| Self::seekable(File::open(path).await?).await
^0
162| 45| }
163| |}
164| |
165| |pub(crate) trait AsyncBufParser: Buf + Debug {
166| | async fn fill_buf<R: AsyncRead + Unpin>(
167| | &mut self,
168| | reader: &mut R,
169| | size: usize,
170| | ) -> io::Result<usize>;
171| |
172| 90| async fn load_and_parse<R: AsyncRead + Unpin, P, O>(
173| 90| &mut self,
174| 90| reader: &mut R,
175| 90| skip_by_seek: AsyncSkipBySeekFn<R>,
176| 90| parse: P,
177| 90| ) -> Result<O, ParsedError>
178| 90| where
179| 90| P: Fn(&[u8], Option<ParsingState>) -> Result<O, ParsingErrorState>,
180| 90| {
181| 90| self.load_and_parse_with_offset(
182| 90| reader,
183| 90| skip_by_seek,
184| 143| |data, _, state| parse(data, state),
185| | 0,
186| | )
187| 90| .await
188| 90| }
189| |
190| | #[tracing::instrument(skip_all)]
191| 90| async fn load_and_parse_with_offset<R: AsyncRead + Unpin, P, O>(
192| 90| &mut self,
193| 90| reader: &mut R,
194| 90| skip_by_seek: AsyncSkipBySeekFn<R>,
195| 90| parse: P,
196| 90| offset: usize,
197| 90| ) -> Result<O, ParsedError>
198| 90| where
199| 90| P: Fn(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState>,
200| 90| {
201| | if offset >= self.buffer().len() {
202| | self.fill_buf(reader, MIN_GROW_SIZE).await?;
203| | }
204| |
205| | let mut parsing_state: Option<ParsingState> = None;
206| 90| let mut parse = parse; // coerce Fn → FnMut
207| | loop {
208| | match parse_loop_step(self.buffer(), offset, &mut parsing_state, &mut parse) {
209| | LoopAction::Done(o) => return Ok(o),
210| | LoopAction::NeedFill(needed) => {
211| | let to_read = max(needed, MIN_GROW_SIZE);
212| | let n = self.fill_buf(reader, to_read).await?;
213| | if n == 0 {
214| | return Err(ParsedError::NoEnoughBytes);
215| | }
216| | }
217| | LoopAction::Skip(n) => {
218| | self.clear_and_skip(reader, skip_by_seek, n).await?;
219| | }
220| | LoopAction::Failed { kind, message } => {
221| | return Err(ParsedError::Failed { kind, message })
222| | }
223| | }
224| | }
225| 90| }
226| |
227| | #[tracing::instrument(skip(reader, skip_by_seek))]
228| 43| async fn clear_and_skip<R: AsyncRead + Unpin>(
229| 43| &mut self,
230| 43| reader: &mut R,
231| 43| skip_by_seek: AsyncSkipBySeekFn<R>,
232| 43| n: usize,
233| 43| ) -> Result<(), ParsedError> {
234| | match clear_and_skip_decide(self.buffer().len(), n) {
235| | SkipPlan::AdvanceOnly => {
236| | self.set_position(self.position() + n);
237| | return Ok(());
238| | }
239| | SkipPlan::ClearAndSkip { extra: skip_n } => {
240| | self.clear();
241| | let done = (skip_by_seek)(
242| | reader,
243| | skip_n.try_into().map_err(|_| ParsedError::Failed {
244| | // No format context available here: the parser
245| | // hit an internal limit honoring a caller's skip.
246| | // Pick a sensible default — see #55 follow-up.
247| 0| kind: crate::error::MalformedKind::IsoBmffBox,
248| 0| message: "skip too many bytes".into(),
249| 0| })?,
250| | )
251| | .await?;
252| | if !done {
253| | let mut skipped = 0;
254| | while skipped < skip_n {
255| | let mut to_skip = skip_n - skipped;
256| | to_skip = min(to_skip, MAX_PARSE_BUF_SIZE);
257| | let n = self.fill_buf(reader, to_skip).await?;
258| | skipped += n;
259| | if skipped <= skip_n {
260| | self.clear();
261| | } else {
262| | let remain = skipped - skip_n;
263| | self.set_position(self.buffer().len() - remain);
264| | break;
265| | }
266| | }
267| | }
268| |
269| | if self.buffer().is_empty() {
270| | self.fill_buf(reader, MIN_GROW_SIZE).await?;
271| | }
272| | Ok(())
273| | }
274| | }
275| 43| }
276| |}
277| |
278| |#[cfg(test)]
279| |mod tests {
280| | use std::path::Path;
281| |
282| | use super::*;
283| | use crate::{ExifIter, TrackInfo};
284| | use test_case::case;
285| |
286| | enum TrackExif {
287| | Track,
288| | Exif,
289| | NoData,
290| | Invalid,
291| | }
292| | use tokio::fs::File;
293| | use TrackExif::*;
294| |
295| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
296| | #[case("3gp_640x360.3gp", Track)]
297| | #[case("broken.jpg", Exif)]
298| | #[case("compatible-brands-fail.heic", Invalid)]
299| | #[case("compatible-brands-fail.mov", Invalid)]
300| | #[case("compatible-brands.heic", NoData)]
301| | #[case("compatible-brands.mov", NoData)]
302| | #[case("embedded-in-heic.mov", Track)]
303| | #[case("exif.heic", Exif)]
304| | #[case("exif.jpg", Exif)]
305| | #[case("meta.mov", Track)]
306| | #[case("meta.mp4", Track)]
307| | #[case("mka.mka", Track)]
308| | #[case("mkv_640x360.mkv", Track)]
309| | #[case("exif-one-entry.heic", Exif)]
310| | #[case("no-exif.jpg", NoData)]
311| | #[case("tif.tif", Exif)]
312| | #[case("ramdisk.img", Invalid)]
313| | #[case("webm_480.webm", Track)]
314| 36| async fn parse_media(path: &str, te: TrackExif) {
315| | use crate::MediaParser;
316| 36| let mut parser = MediaParser::new();
317| 36| let ms = AsyncMediaSource::open(Path::new("testdata").join(path)).await;
318| 36| match te {
319| 36| Track => {
320| 36| let ms = ms.unwrap();
^14 ^14^14
321| 36| assert_eq!(ms.kind(), crate::MediaKind::Track);
^14 ^14^14
322| 36| let _: TrackInfo = parser.parse_track_async(ms).await.unwrap();
^14 ^14 ^14 ^14
323| 36| }
324| 36| Exif => {
325| 36| let ms = ms.unwrap();
^10 ^10^10
326| 36| assert_eq!(ms.kind(), crate::MediaKind::Image);
^10 ^10^10
327| 36| let mut it: ExifIter = parser.parse_exif_async(ms).await.unwrap();
^10 ^10 ^10 ^10 ^10
328| 36| let _ = it.parse_gps();
^10
329| 36|
330| 36| if path.contains("one-entry") {
^10 ^10
331| 36| assert!(it.next().is_some());
^2 ^2 ^2
332| 36| assert!(it.next().is_none());
^2 ^2 ^2
333| 36|
334| 36| let exif: crate::Exif = it.clone_rewound().into();
^2 ^2 ^2 ^2 ^2
335| 36| assert!(exif.get(ExifTag::Orientation).is_some());
^2 ^2 ^2
336| 36| } else {
337| 8| let _: crate::Exif = it.clone_rewound().into();
338| 8| }
339| 36| }
340| 36| NoData => {
341| 36| let ms = ms.unwrap();
^6 ^6 ^6
342| 36| match ms.kind() {
^6
343| 36| crate::MediaKind::Image => {
344| 36| let res = parser.parse_exif_async(ms).await;
^4 ^4 ^4
345| 36| res.unwrap_err();
^4 ^4
346| 36| }
347| 36| crate::MediaKind::Track => {
348| 36| let res = parser.parse_track_async(ms).await;
^2 ^2 ^2
349| 36| res.unwrap_err();
^2 ^2
350| 36| }
351| 36| }
352| 36| }
353| 36| Invalid => {
354| 6| ms.unwrap_err();
355| 6| }
356| 36| }
357| 36| }
358| |
359| | use crate::{EntryValue, ExifTag, TrackInfoTag};
360| | use chrono::DateTime;
361| | use test_case::test_case;
362| |
363| | use crate::video::TrackInfoTag::*;
364| |
365| | #[test]
366| 1| fn fill_buf_check_rejects_oversize_when_combined_with_existing() {
367| | use crate::parser::check_fill_size;
368| | // The combined size guard used by both sync and async fill_buf.
369| | // existing=MAX-1024, requested=2*1024 => existing+requested > MAX => Err.
370| 1| let res = check_fill_size(MAX_PARSE_BUF_SIZE - 1024, 2 * 1024);
371| 1| assert!(res.is_err(), "expected Err, got Ok");
372| | // Below the threshold passes.
373| 1| let res = check_fill_size(MAX_PARSE_BUF_SIZE - 4096, 1024);
374| 1| assert!(res.is_ok());
375| 1| }
376| |
377| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
378| | #[test_case("mkv_640x360.mkv", Width, 640_u32.into())]
379| | #[test_case("mkv_640x360.mkv", Height, 360_u32.into())]
380| | #[test_case("mkv_640x360.mkv", DurationMs, 13346_u64.into())]
381| | #[test_case("mkv_640x360.mkv", CreateDate, DateTime::parse_from_str("2008-08-08T08:08:08Z", "%+").unwrap().into())]
382| | #[test_case("meta.mov", Make, "Apple".into())]
383| | #[test_case("meta.mov", Model, "iPhone X".into())]
384| | #[test_case("meta.mov", GpsIso6709, "+27.1281+100.2508+000.000/".into())]
385| | #[test_case("meta.mp4", Width, 1920_u32.into())]
386| | #[test_case("meta.mp4", Height, 1080_u32.into())]
387| | #[test_case("meta.mp4", DurationMs, 1063_u64.into())]
388| | #[test_case("meta.mp4", GpsIso6709, "+27.2939+112.6932/".into())]
389| | #[test_case("meta.mp4", CreateDate, DateTime::parse_from_str("2024-02-03T07:05:38Z", "%+").unwrap().into())]
390| 24| async fn parse_track_info(path: &str, tag: TrackInfoTag, v: EntryValue) {
391| | use crate::MediaParser;
392| 24| let mut parser = MediaParser::new();
393| |
394| 24| let f = File::open(Path::new("testdata").join(path)).await.unwrap();
395| 24| let ms = AsyncMediaSource::seekable(f).await.unwrap();
396| 24| let info: TrackInfo = parser.parse_track_async(ms).await.unwrap();
397| 24| assert_eq!(info.get(tag).unwrap(), &v);
398| |
399| 24| let f = File::open(Path::new("testdata").join(path)).await.unwrap();
400| 24| let ms = AsyncMediaSource::unseekable(f).await.unwrap();
401| 24| let info: TrackInfo = parser.parse_track_async(ms).await.unwrap();
402| 24| assert_eq!(info.get(tag).unwrap(), &v);
403| 24| }
404| |
405| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
406| 1| async fn async_media_kind_classifies_image_and_track() {
407| 1| let img = AsyncMediaSource::open("testdata/exif.jpg").await.unwrap();
408| 1| assert_eq!(img.kind(), crate::MediaKind::Image);
409| |
410| 1| let trk = AsyncMediaSource::open("testdata/meta.mov").await.unwrap();
411| 1| assert_eq!(trk.kind(), crate::MediaKind::Track);
412| 1| }
413| |
414| | #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
415| 1| async fn async_media_source_open() {
416| 1| let ms = AsyncMediaSource::open("testdata/exif.jpg").await.unwrap();
417| 1| assert_eq!(ms.kind(), crate::MediaKind::Image);
418| 1| }
419| |}
/home/min/dev/nom-exif/src/png.rs:
1| |//! PNG chunk parser — pure-function implementation.
2| |//!
3| |//! This module is the layer that walks the PNG chunk stream and extracts:
4| |//! - The EXIF data range (either an `eXIf` chunk or a hex-encoded TIFF blob
5| |//! in a legacy `Raw profile type {exif,APP1}` `tEXt` chunk — phase 5
6| |//! adds the legacy decoding).
7| |//! - The `tEXt` chunks as Latin-1-decoded `(key, value)` pairs.
8| |//!
9| |//! The parser is **stateless and pure**: it operates on a `&[u8]` buffer
10| |//! and returns either a `PngParseOut` (success) or a `ParsingErrorState`
11| |//! (`Need(n)` to fill more bytes, `Skip(n)` to clear-and-skip, or
12| |//! `Failed(msg)` for unrecoverable parse errors). The caller (`MediaParser`)
13| |//! drives I/O.
14| |
15| |use std::ops::Range;
16| |
17| |use crate::error::{MalformedKind, ParsingError, ParsingErrorState};
18| |use crate::parser::ParsingState;
19| |
20| |/// Output of [`extract_chunks`]: where the EXIF data lives (if any) and
21| |/// every `tEXt` (key, value) pair encountered, in file order.
22| |#[derive(Debug)]
23| |pub(crate) struct PngParseOut {
24| | pub exif: Option<PngExifSource>,
25| | pub text_chunks: Vec<(String, String)>,
26| |}
27| |
28| |/// Where the EXIF data was found in the PNG.
29| |#[derive(Debug)]
30| |pub(crate) enum PngExifSource {
31| | /// PNG 1.5 `eXIf` chunk — TIFF body sits at this byte range inside
32| | /// the parser buffer. Use this with `bytes::Bytes::slice` for zero-copy.
33| | EXif(Range<usize>),
34| |
35| | /// Legacy hex-encoded TIFF inside `Raw profile type {exif,APP1}` `tEXt`.
36| | /// Already hex-decoded + APP1 prefix stripped — owned bytes. Phase 5
37| | /// adds the actual decoding logic; until then this variant is unused.
38| | Legacy(Vec<u8>),
39| |}
40| |
41| |const PNG_SIGNATURE: &[u8; 8] = b"\x89PNG\r\n\x1a\n";
42| |
43| |/// Maximum size of a single `tEXt` chunk we'll capture. Above this
44| |/// threshold the chunk is skipped (defensive against crafted inputs).
45| |const MAX_TEXT_CHUNK_SIZE: u32 = 1024 * 1024; // 1 MiB
46| |
47| |/// Maximum cumulative captured `tEXt` byte-length. After exceeding this,
48| |/// further `tEXt` chunks are skipped (already-captured entries kept).
49| |const MAX_TEXT_CHUNKS_TOTAL: usize = 16 * 1024 * 1024; // 16 MiB
50| |
51| |/// Decode bytes as Latin-1 into a `String`. Infallible — every Latin-1
52| |/// byte maps to a Unicode code point (U+0000..U+00FF). Per PNG spec, `tEXt`
53| |/// chunks use Latin-1 encoding; we do not sniff for UTF-8.
54| 102|fn decode_latin1(bytes: &[u8]) -> String {
55| 325k| bytes.iter().map(|&b| b as char).collect()
^102 ^102 ^102 ^102
56| 102|}
57| |
58| |/// Decode the value of a `Raw profile type *` `tEXt` chunk.
59| |///
60| |/// ImageMagick writes these chunks with a header preamble:
61| |/// ```text
62| |/// \n
63| |/// exif\n
64| |/// 54\n <- length in bytes (decimal, with leading whitespace)
65| |/// 4949 2a00 0800 0000 ... <- hex bytes
66| |/// ```
67| |///
68| |/// This helper:
69| |/// 1. Skips the leading `\n` line.
70| |/// 2. Skips the second line (`exif`, `app1`, etc).
71| |/// 3. Skips the third line (length).
72| |/// 4. Hex-decodes the rest, ignoring all whitespace.
73| 14|fn decode_raw_profile_value(s: &str) -> Result<Vec<u8>, ()> {
74| 14| let mut lines = s.lines();
75| | // Skip the empty first line, the type line, and the length line.
76| | // Tolerate variations: just consume the first 3 newlines worth of header.
77| 14| lines.next().ok_or(())?;
^0
78| 14| lines.next().ok_or(())?;
^0
79| 14| lines.next().ok_or(())?;
^1
80| 13| let body: String = lines.collect();
81| 13| hex_decode(&body)
82| 14|}
83| |
84| 19|fn hex_decode(s: &str) -> Result<Vec<u8>, ()> {
85| 19| let mut out = Vec::with_capacity(s.len() / 2);
86| 19| let mut high: Option<u8> = None;
87| 319k| for c in s.bytes() {
^19^19
88| 319k| let nibble = match c {
^319k
89| 319k| b'0'..=b'9' => c - b'0',
^208k
90| 111k| b'a'..=b'f' => c - b'a' + 10,
91| 3| b'A'..=b'F' => c - b'A' + 10,
^2
92| 3| b' ' | b'\n' | b'\r' | b'\t' => continue,
93| 1| _ => return Err(()),
94| | };
95| 319k| match high.take() {
96| 159k| None => high = Some(nibble),
97| 159k| Some(h) => out.push((h << 4) | nibble),
98| | }
99| | }
100| 18| if high.is_some() {
101| 1| return Err(());
102| 17| }
103| 17| Ok(out)
104| 19|}
105| |
106| |/// Walk the PNG chunk stream and extract EXIF + tEXt entries.
107| |///
108| |/// Pure function: no I/O, takes a buffer slice plus the resume-state
109| |/// from any prior call, returns either output or a `ParsingErrorState`
110| |/// requesting more bytes / skipping bytes.
111| |///
112| |/// `state` is `None` while `buf` is anchored at byte 0 of the file
113| |/// (initial call, or after a `Need` which only grows the buffer).
114| |/// After a `ClearAndSkip` the parser has dropped the buffer and the
115| |/// resumed `buf` starts at a fresh file offset, so the returned state
116| |/// flips to `Some(ParsingState::PngPastSignature)` to tell the next
117| |/// call not to look for the 8-byte signature at `buf[..8]`.
118| |///
119| |/// `ClearAndSkip(n)` is interpreted by the parser as "advance the
120| |/// parser's logical position by `n` bytes from where it is now". The
121| |/// closure sees `buf` already offset to that position, so the skip
122| |/// request must cover *both* the bytes the walker consumed inside
123| |/// `buf` (`cursor`) and the chunk bytes still beyond it. That is
124| |/// `cursor + total`, not `total - remaining` — the latter would only
125| |/// account for bytes past the buffer's end and leave the walker
126| |/// stranded mid-chunk on retry (issue #55).
127| |#[tracing::instrument(skip(buf))]
128| 56|pub(crate) fn extract_chunks(
129| 56| buf: &[u8],
130| 56| state: Option<ParsingState>,
131| 56|) -> Result<PngParseOut, ParsingErrorState> {
132| 56| let past_signature = matches!(state, Some(ParsingState::PngPastSignature));
^51 ^5
133| | // Preserves the incoming flag across error returns. A `Need` keeps
134| | // whatever the caller already had; only `ClearAndSkip` flips a
135| | // previously-false flag to true (handled at the skip sites).
136| 56| let preserve = || past_signature.then_some(ParsingState::PngPastSignature);
^14 ^14 ^14
137| 56| let skipped = || Some(ParsingState::PngPastSignature);
^8
138| |
139| 56| let mut cursor = if past_signature {
^54
140| | // Resumed after a ClearAndSkip; buf[0] is a chunk-header
141| | // boundary, not the PNG signature.
142| 5| 0
143| | } else {
144| 51| if buf.len() < PNG_SIGNATURE.len() {
145| 1| return Err(ParsingErrorState::new(
146| 1| ParsingError::Need(PNG_SIGNATURE.len() - buf.len()),
147| 1| None,
148| 1| ));
149| 50| }
150| 50| if &buf[..PNG_SIGNATURE.len()] != PNG_SIGNATURE {
151| 1| return Err(ParsingErrorState::new(
152| 1| ParsingError::Failed {
153| 1| kind: MalformedKind::PngChunk,
154| 1| message: "PNG: bad signature".into(),
155| 1| },
156| 1| None,
157| 1| ));
158| 49| }
159| 49| PNG_SIGNATURE.len()
160| | };
161| |
162| 54| let mut out = PngParseOut {
163| 54| exif: None,
164| 54| text_chunks: Vec::new(),
165| 54| };
166| 54| let mut text_total: usize = 0;
167| 54| let mut exif_priority: u8 = 0; // 0 = none, 1 = legacy exif, 2 = legacy APP1, 3 = eXIf
168| |
169| | loop {
170| | // Need 8 bytes for the chunk header (length:4 + type:4).
171| 181| if buf.len() - cursor < 8 {
172| 4| return Err(ParsingErrorState::new(
173| 4| ParsingError::Need(8 - (buf.len() - cursor)),
174| 4| preserve(),
175| 4| ));
176| 177| }
177| 177| let length = u32::from_be_bytes([
178| 177| buf[cursor],
179| 177| buf[cursor + 1],
180| 177| buf[cursor + 2],
181| 177| buf[cursor + 3],
182| 177| ]);
183| 177| let ctype = &buf[cursor + 4..cursor + 8];
184| |
185| | // Compute total chunk size = 8 (header) + length (data) + 4 (CRC).
186| | // On 32-bit targets, `length as usize + 12` can wrap when length is
187| | // close to u32::MAX; bail out as malformed instead.
188| 177| let total = match (length as usize).checked_add(12) {
189| 177| Some(t) => t,
190| | None => {
191| 0| return Err(ParsingErrorState::new(
192| 0| ParsingError::Failed {
193| 0| kind: MalformedKind::PngChunk,
194| 0| message: "PNG: chunk length overflows addressable size".into(),
195| 0| },
196| 0| preserve(),
197| 0| ));
198| | }
199| | };
200| |
201| 177| match ctype {
202| 177| b"IEND" => break,
^32
203| | b"eXIf" => {
204| 15| let remaining = buf.len() - cursor;
205| 15| if total > remaining {
206| 6| return Err(ParsingErrorState::new(
207| 6| ParsingError::Need(total - remaining),
208| 6| preserve(),
209| 6| ));
210| 9| }
211| 9| let data_start = cursor + 8;
212| 9| let data_end = data_start + length as usize;
213| | // eXIf has priority 3 (highest), always wins.
214| 9| out.exif = Some(PngExifSource::EXif(data_start..data_end));
215| 9| exif_priority = 3;
216| 9| cursor += total;
217| | }
218| | b"tEXt" => {
219| 58| if length > MAX_TEXT_CHUNK_SIZE {
220| | // Defensive: skip oversized chunks.
221| 2| let remaining = buf.len() - cursor;
222| 2| if total > remaining {
223| 2| return Err(ParsingErrorState::new(
224| 2| ParsingError::ClearAndSkip(cursor + total),
225| 2| skipped(),
226| 2| ));
227| 0| }
228| 0| cursor += total;
229| 0| continue;
230| 56| }
231| 56| let remaining = buf.len() - cursor;
232| 56| if total > remaining {
233| 4| return Err(ParsingErrorState::new(
234| 4| ParsingError::Need(total - remaining),
235| 4| preserve(),
236| 4| ));
237| 52| }
238| 52| let data = &buf[cursor + 8..cursor + 8 + length as usize];
239| | // tEXt format: Latin-1 keyword + 0x00 + Latin-1 text
240| 571| if let Some(nul_pos) = data.iter().position(|&b| b == 0) {
^51 ^52 ^52
241| 51| let key = decode_latin1(&data[..nul_pos]);
242| 51| let value = decode_latin1(&data[nul_pos + 1..]);
243| |
244| | // Legacy EXIF detection
245| 51| let candidate_priority: u8 = match key.as_str() {
246| 51| "Raw profile type APP1" => 2,
^3
247| 48| "Raw profile type exif" => 1,
^10
248| 38| _ => 0,
249| | };
250| 51| if candidate_priority > 0 && candidate_priority > exif_priority {
^13
251| 13| if let Ok(mut bytes) = decode_raw_profile_value(&value) {
^12
252| | // Strip APP1's leading "Exif\0\0" if present.
253| 12| if key.ends_with("APP1") && bytes.starts_with(b"Exif\0\0") {
^3
254| 3| bytes.drain(0..6);
255| 9| }
256| | // Validate as TIFF (must have a valid byte-order marker
257| | // + magic number) before accepting.
258| 12| if bytes.len() >= 8 && crate::exif::TiffHeader::parse(&bytes).is_ok() {
259| 12| out.exif = Some(PngExifSource::Legacy(bytes));
260| 12| exif_priority = candidate_priority;
261| 12| }
^0
262| | // else: silently drop the legacy candidate, keep raw text entry below
263| 1| }
264| | // hex_decode failure → silently drop too
265| 38| }
266| |
267| 51| let entry_size = key.len() + value.len();
268| 51| if text_total + entry_size <= MAX_TEXT_CHUNKS_TOTAL {
269| 51| text_total += entry_size;
270| 51| out.text_chunks.push((key, value));
271| 51| }
^0
272| | // else: silently skip (already-captured entries kept).
273| 1| }
274| | // else: malformed tEXt (no NUL separator) — silently skip.
275| 52| cursor += total;
276| | }
277| | _ => {
278| 72| let remaining = buf.len() - cursor;
279| 72| if total > remaining {
280| 6| return Err(ParsingErrorState::new(
281| 6| ParsingError::ClearAndSkip(cursor + total),
282| 6| skipped(),
283| 6| ));
284| 66| }
285| 66| cursor += total;
286| | }
287| | }
288| | }
289| |
290| 32| Ok(out)
291| 56|}
292| |
293| |#[cfg(test)]
294| |mod tests {
295| | use super::*;
296| |
297| 1| fn build_minimal_png() -> Vec<u8> {
298| 1| let mut out = Vec::new();
299| 1| out.extend_from_slice(PNG_SIGNATURE);
300| | // IHDR chunk (1x1 grayscale)
301| 1| out.extend_from_slice(&13u32.to_be_bytes());
302| 1| out.extend_from_slice(b"IHDR");
303| 1| out.extend_from_slice(&[0, 0, 0, 1, 0, 0, 0, 1, 8, 0, 0, 0, 0]);
304| 1| out.extend_from_slice(&[0, 0, 0, 0]); // CRC
305| | // IEND chunk
306| 1| out.extend_from_slice(&0u32.to_be_bytes());
307| 1| out.extend_from_slice(b"IEND");
308| 1| out.extend_from_slice(&[0, 0, 0, 0]); // CRC
309| 1| out
310| 1| }
311| |
312| | #[test]
313| 1| fn extract_chunks_minimal_png() {
314| 1| let buf = build_minimal_png();
315| 1| let result = extract_chunks(&buf, None).unwrap();
316| 1| assert!(result.exif.is_none());
317| 1| assert!(result.text_chunks.is_empty());
318| 1| }
319| |
320| | #[test]
321| 1| fn extract_chunks_bad_signature() {
322| | use crate::error::MalformedKind;
323| 1| let buf = b"\x00\x00\x00\x00\x00\x00\x00\x00not_png".to_vec();
324| 1| let err = extract_chunks(&buf, None).unwrap_err();
325| | // The kind must be PngChunk (the structural unit) — the old
326| | // ParsedError-conversion fallback labelled every parse failure
327| | // `IsoBmffBox`, which was misleading for PNG input.
328| 1| assert!(matches!(
^0
329| 1| err.err,
330| | ParsingError::Failed {
331| | kind: MalformedKind::PngChunk,
332| | ..
333| | }
334| | ));
335| 1| }
336| |
337| | #[test]
338| 1| fn extract_chunks_truncated_signature() {
339| 1| let buf = b"\x89PNG".to_vec();
340| 1| let err = extract_chunks(&buf, None).unwrap_err();
341| 1| assert!(matches!(err.err, ParsingError::Need(_)));
^0
342| 1| }
343| |
344| 34| fn build_chunk(ctype: &[u8; 4], data: &[u8]) -> Vec<u8> {
345| 34| let mut out = Vec::new();
346| 34| out.extend_from_slice(&(data.len() as u32).to_be_bytes());
347| 34| out.extend_from_slice(ctype);
348| 34| out.extend_from_slice(data);
349| 34| out.extend_from_slice(&[0, 0, 0, 0]); // CRC (unverified)
350| 34| out
351| 34| }
352| |
353| 9| fn build_png_with_chunks(chunks: &[Vec<u8>]) -> Vec<u8> {
354| 9| let mut out = Vec::new();
355| 9| out.extend_from_slice(PNG_SIGNATURE);
356| 9| out.extend_from_slice(&build_chunk(
357| 9| b"IHDR",
358| 9| &[0, 0, 0, 1, 0, 0, 0, 1, 8, 0, 0, 0, 0],
359| 9| ));
360| 11| for c in chunks {
^9
361| 11| out.extend_from_slice(c);
362| 11| }
363| 9| out.extend_from_slice(&build_chunk(b"IEND", &[]));
364| 9| out
365| 9| }
366| |
367| | #[test]
368| 1| fn extract_chunks_with_exif() {
369| | // Tiny "TIFF" body — content doesn't matter at this layer.
370| 1| let exif_payload = b"II*\x00\x08\x00\x00\x00MM\x00\x2a";
371| 1| let exif_chunk = build_chunk(b"eXIf", exif_payload);
372| 1| let buf = build_png_with_chunks(&[exif_chunk]);
373| 1| let result = extract_chunks(&buf, None).unwrap();
374| 1| let exif_range = match result.exif {
375| 1| Some(PngExifSource::EXif(r)) => r,
376| 0| _ => panic!("expected EXif source"),
377| | };
378| 1| assert_eq!(&buf[exif_range], exif_payload);
379| 1| assert!(result.text_chunks.is_empty());
380| 1| }
381| |
382| | #[test]
383| 1| fn extract_chunks_with_text() {
384| 1| let mut text_data = Vec::new();
385| 1| text_data.extend_from_slice(b"Title");
386| 1| text_data.push(0);
387| 1| text_data.extend_from_slice(b"Hello world");
388| 1| let chunks = vec![build_chunk(b"tEXt", &text_data)];
389| 1| let buf = build_png_with_chunks(&chunks);
390| 1| let result = extract_chunks(&buf, None).unwrap();
391| 1| assert!(result.exif.is_none());
392| 1| assert_eq!(result.text_chunks.len(), 1);
393| 1| assert_eq!(result.text_chunks[0].0, "Title");
394| 1| assert_eq!(result.text_chunks[0].1, "Hello world");
395| 1| }
396| |
397| | #[test]
398| 1| fn extract_chunks_text_duplicate_keys() {
399| 1| let mut t1 = Vec::new();
400| 1| t1.extend_from_slice(b"Comment");
401| 1| t1.push(0);
402| 1| t1.extend_from_slice(b"first");
403| 1| let mut t2 = Vec::new();
404| 1| t2.extend_from_slice(b"Comment");
405| 1| t2.push(0);
406| 1| t2.extend_from_slice(b"second");
407| 1| let chunks = vec![build_chunk(b"tEXt", &t1), build_chunk(b"tEXt", &t2)];
408| 1| let buf = build_png_with_chunks(&chunks);
409| 1| let result = extract_chunks(&buf, None).unwrap();
410| 1| assert_eq!(result.text_chunks.len(), 2);
411| 1| assert_eq!(result.text_chunks[0], ("Comment".into(), "first".into()));
412| 1| assert_eq!(result.text_chunks[1], ("Comment".into(), "second".into()));
413| 1| }
414| |
415| | #[test]
416| 1| fn extract_chunks_text_no_nul_separator() {
417| | // Malformed tEXt with no NUL byte — should be silently skipped.
418| 1| let chunks = vec![build_chunk(b"tEXt", b"NoNulSeparator")];
419| 1| let buf = build_png_with_chunks(&chunks);
420| 1| let result = extract_chunks(&buf, None).unwrap();
421| 1| assert!(result.text_chunks.is_empty());
422| 1| }
423| |
424| | #[test]
425| 1| fn extract_chunks_text_latin1_decode() {
426| | // Latin-1 character outside ASCII (é = 0xE9)
427| 1| let mut data = Vec::new();
428| 1| data.extend_from_slice(b"Caption");
429| 1| data.push(0);
430| 1| data.extend_from_slice(b"caf\xE9");
431| 1| let chunks = vec![build_chunk(b"tEXt", &data)];
432| 1| let buf = build_png_with_chunks(&chunks);
433| 1| let result = extract_chunks(&buf, None).unwrap();
434| 1| assert_eq!(result.text_chunks[0].1, "café");
435| 1| }
436| |
437| | #[test]
438| 1| fn extract_chunks_truncated_inside_exif() {
439| | // PNG signature + IHDR + start of eXIf chunk header (claiming a 100-byte
440| | // body) but the body is missing.
441| 1| let mut buf = Vec::new();
442| 1| buf.extend_from_slice(PNG_SIGNATURE);
443| 1| buf.extend_from_slice(&build_chunk(b"IHDR", &[0; 13]));
444| | // Manually emit eXIf header claiming 100 bytes
445| 1| buf.extend_from_slice(&100u32.to_be_bytes());
446| 1| buf.extend_from_slice(b"eXIf");
447| | // No body — caller must request Need.
448| |
449| 1| let err = extract_chunks(&buf, None).unwrap_err();
450| 1| match err.err {
451| 1| ParsingError::Need(n) => assert!(n >= 100),
452| 0| other => panic!("expected Need(>=100), got {other:?}"),
453| | }
454| 1| }
455| |
456| | #[test]
457| 1| fn extract_chunks_skips_large_idat() {
458| | // IDAT chunk declaring a 50_000-byte body that is NOT in the buffer —
459| | // should produce ParsingError::ClearAndSkip with PngPastSignature so
460| | // the resumed call (whose buf no longer starts at the signature)
461| | // doesn't re-check buf[..8].
462| 1| let mut buf = Vec::new();
463| 1| buf.extend_from_slice(PNG_SIGNATURE);
464| 1| buf.extend_from_slice(&build_chunk(b"IHDR", &[0; 13]));
465| | // IDAT header only, claiming 50_000 bytes
466| 1| buf.extend_from_slice(&50_000u32.to_be_bytes());
467| 1| buf.extend_from_slice(b"IDAT");
468| |
469| 1| let err = extract_chunks(&buf, None).unwrap_err();
470| | // Skip distance must equal `cursor + total` — i.e. PNG signature (8)
471| | // + IHDR chunk (25) + IDAT total (50_000 body + 12 framing). The old
472| | // buggy `total - remaining` formula would have under-counted by the
473| | // entire walker cursor and stranded the parser mid-IDAT on retry.
474| 1| match err.err {
475| 1| ParsingError::ClearAndSkip(n) => assert_eq!(n, 8 + 25 + 50_000 + 12),
476| 0| other => panic!("expected ClearAndSkip, got {other:?}"),
477| | }
478| 1| assert!(
479| 1| matches!(err.state, Some(ParsingState::PngPastSignature)),
^0
480| | "ClearAndSkip must hand back PngPastSignature so the resumed \
481| | call skips the signature check on the mid-stream slice"
482| | );
483| 1| }
484| |
485| | #[test]
486| 1| fn extract_chunks_resumes_past_signature_with_state() {
487| | // After a ClearAndSkip the next call receives a buf that starts
488| | // mid-file. Carrying PngPastSignature in state must let the parser
489| | // skip the buf[..8] signature check and parse the next chunk.
490| 1| let mut tail = Vec::new();
491| | // Just an IEND chunk's bytes — no signature in front.
492| 1| tail.extend_from_slice(&0u32.to_be_bytes());
493| 1| tail.extend_from_slice(b"IEND");
494| 1| tail.extend_from_slice(&[0, 0, 0, 0]); // CRC
495| |
496| 1| let result = extract_chunks(&tail, Some(ParsingState::PngPastSignature))
497| 1| .expect("must not check signature");
498| 1| assert!(result.exif.is_none());
499| 1| assert!(result.text_chunks.is_empty());
500| 1| }
501| |
502| | #[test]
503| 1| fn extract_chunks_text_too_large_skipped() {
504| | // tEXt chunk declaring 2 MiB length — should be skipped without
505| | // entering text_chunks. We don't actually allocate 2 MiB; emit
506| | // the header only and let extract_chunks request a Skip.
507| 1| let mut buf = Vec::new();
508| 1| buf.extend_from_slice(PNG_SIGNATURE);
509| 1| buf.extend_from_slice(&build_chunk(b"IHDR", &[0; 13]));
510| | // tEXt header claiming length > MAX_TEXT_CHUNK_SIZE
511| 1| let bogus_length = MAX_TEXT_CHUNK_SIZE + 1;
512| 1| buf.extend_from_slice(&bogus_length.to_be_bytes());
513| 1| buf.extend_from_slice(b"tEXt");
514| | // No body provided — but since extract_chunks should skip oversized
515| | // tEXt, we expect a ClearAndSkip error (not capture).
516| |
517| 1| let err = extract_chunks(&buf, None).unwrap_err();
518| 1| assert!(matches!(err.err, ParsingError::ClearAndSkip(_)));
^0
519| 1| }
520| |
521| | #[test]
522| 1| fn hex_decode_basic() {
523| 1| assert_eq!(hex_decode("4849").unwrap(), b"HI");
524| 1| assert_eq!(hex_decode("48 49").unwrap(), b"HI");
525| 1| assert_eq!(hex_decode("48\n49").unwrap(), b"HI");
526| 1| assert_eq!(hex_decode("aBcD").unwrap(), vec![0xab, 0xcd]);
527| 1| }
528| |
529| | #[test]
530| 1| fn hex_decode_rejects_invalid() {
531| 1| assert!(hex_decode("XX").is_err());
532| 1| assert!(hex_decode("48a").is_err()); // odd-length
533| 1| }
534| |
535| | #[test]
536| 1| fn decode_raw_profile_imagemagick_format() {
537| | // Mimics ImageMagick's "Raw profile type exif" value layout.
538| 1| let v = "\nexif\n 4\n4849 5050\n";
539| 1| let bytes = decode_raw_profile_value(v).unwrap();
540| 1| assert_eq!(bytes, b"HIPP");
541| 1| }
542| |
543| | #[test]
544| 1| fn extract_chunks_malicious_text_length_max_u32_does_not_panic() {
545| | // tEXt with length = u32::MAX. Must not allocate 4 GB or panic.
546| | // On 32-bit targets, length + 12 overflows usize — the parser must
547| | // bail with Failed rather than wrap. On 64-bit, length + 12 fits
548| | // and the buffer-shortage check produces Need/ClearAndSkip.
549| 1| let mut buf = Vec::new();
550| 1| buf.extend_from_slice(PNG_SIGNATURE);
551| 1| buf.extend_from_slice(&build_chunk(b"IHDR", &[0; 13]));
552| 1| buf.extend_from_slice(&u32::MAX.to_be_bytes());
553| 1| buf.extend_from_slice(b"tEXt");
554| |
555| | // ParsingError has only Need / ClearAndSkip / Failed variants — any
556| | // of the three is acceptable here; the contract is "no panic, no
557| | // wrap-around, no infinite loop".
558| 1| let _err = extract_chunks(&buf, None).unwrap_err();
559| 1| }
560| |
561| | #[test]
562| 1| fn extract_chunks_chunk_length_overflow_is_rejected() {
563| | // Synthesize a length that always overflows usize regardless of
564| | // target pointer width: only achievable on 32-bit usize because
565| | // u32::MAX as u64 + 12 fits u64. We assert the more general
566| | // contract: u32::MAX-length chunks never advance the cursor by a
567| | // wrapped value (no panic, no infinite loop, no out-of-bounds read).
568| 1| let mut buf = Vec::new();
569| 1| buf.extend_from_slice(PNG_SIGNATURE);
570| 1| buf.extend_from_slice(&build_chunk(b"IHDR", &[0; 13]));
571| | // Chunk length = u32::MAX, type = unknown ("XXXX") — the `_` arm.
572| 1| buf.extend_from_slice(&u32::MAX.to_be_bytes());
573| 1| buf.extend_from_slice(b"XXXX");
574| |
575| 1| let _err = extract_chunks(&buf, None).unwrap_err();
576| 1| }
577| |
578| | /// Minimal little-endian TIFF: II + 0x002A + IFD0 offset = 8 + IFD0 with 0 entries.
579| 4| fn minimal_tiff_le() -> Vec<u8> {
580| 4| let mut t = Vec::new();
581| 4| t.extend_from_slice(b"II"); // little-endian
582| 4| t.extend_from_slice(&[0x2a, 0x00]); // magic 42
583| 4| t.extend_from_slice(&[0x08, 0, 0, 0]); // IFD0 offset = 8
584| 4| t.extend_from_slice(&[0, 0]); // IFD0: 0 entries
585| 4| t.extend_from_slice(&[0, 0, 0, 0]); // next IFD = 0
586| 4| t
587| 4| }
588| |
589| | /// Encode a TIFF blob into the ImageMagick "Raw profile type X" tEXt
590| | /// value layout: 3-line header + hex bytes.
591| 3| fn raw_profile_value(profile_type: &str, tiff: &[u8]) -> String {
592| 48| let hex: String = tiff.iter().map(|b| format!("{b:02x}")).collect();
^3 ^3 ^3 ^3 ^3 ^3
593| | // Wrap the hex into 72-char lines like ImageMagick (not strictly
594| | // necessary for our parser; ignored as whitespace).
595| 3| let mut wrapped = String::new();
596| 3| for chunk in hex.as_bytes().chunks(72) {
597| 3| wrapped.push_str(std::str::from_utf8(chunk).unwrap());
598| 3| wrapped.push('\n');
599| 3| }
600| 3| format!("\n{}\n {}\n{}", profile_type, tiff.len(), wrapped)
601| 3| }
602| |
603| | #[test]
604| 1| fn extract_chunks_legacy_exif() {
605| 1| let tiff = minimal_tiff_le();
606| 1| let value = raw_profile_value("exif", &tiff);
607| 1| let mut data = Vec::new();
608| 1| data.extend_from_slice(b"Raw profile type exif");
609| 1| data.push(0);
610| 1| data.extend_from_slice(value.as_bytes());
611| 1| let chunks = vec![build_chunk(b"tEXt", &data)];
612| 1| let buf = build_png_with_chunks(&chunks);
613| |
614| 1| let result = extract_chunks(&buf, None).unwrap();
615| 1| match result.exif {
616| 1| Some(PngExifSource::Legacy(bytes)) => assert_eq!(bytes, tiff),
617| 0| other => panic!("expected Legacy, got {:?}", other),
618| | }
619| | // Original tEXt entry is preserved.
620| 1| assert_eq!(result.text_chunks.len(), 1);
621| 1| assert_eq!(result.text_chunks[0].0, "Raw profile type exif");
622| 1| }
623| |
624| | #[test]
625| 1| fn extract_chunks_legacy_app1() {
626| 1| let tiff = minimal_tiff_le();
627| | // APP1 carries an "Exif\0\0" prefix before TIFF.
628| 1| let mut app1 = Vec::new();
629| 1| app1.extend_from_slice(b"Exif\0\0");
630| 1| app1.extend_from_slice(&tiff);
631| 1| let value = raw_profile_value("app1", &app1);
632| 1| let mut data = Vec::new();
633| 1| data.extend_from_slice(b"Raw profile type APP1");
634| 1| data.push(0);
635| 1| data.extend_from_slice(value.as_bytes());
636| 1| let chunks = vec![build_chunk(b"tEXt", &data)];
637| 1| let buf = build_png_with_chunks(&chunks);
638| |
639| 1| let result = extract_chunks(&buf, None).unwrap();
640| 1| match result.exif {
641| 1| Some(PngExifSource::Legacy(bytes)) => assert_eq!(bytes, tiff),
642| 0| other => panic!("expected Legacy, got {:?}", other),
643| | }
644| 1| }
645| |
646| | #[test]
647| 1| fn extract_chunks_exif_overrides_legacy() {
648| 1| let tiff_legacy = minimal_tiff_le();
649| 1| let tiff_exif = {
650| 1| let mut t = minimal_tiff_le();
651| | // Differentiate so we can verify which one was kept.
652| 1| t.extend_from_slice(&[0xFF; 4]);
653| 1| t
654| | };
655| 1| let legacy_value = raw_profile_value("exif", &tiff_legacy);
656| 1| let mut legacy_data = Vec::new();
657| 1| legacy_data.extend_from_slice(b"Raw profile type exif");
658| 1| legacy_data.push(0);
659| 1| legacy_data.extend_from_slice(legacy_value.as_bytes());
660| |
661| | // Order: legacy first, then eXIf. eXIf must still win.
662| 1| let chunks = vec![
663| 1| build_chunk(b"tEXt", &legacy_data),
664| 1| build_chunk(b"eXIf", &tiff_exif),
665| | ];
666| 1| let buf = build_png_with_chunks(&chunks);
667| |
668| 1| let result = extract_chunks(&buf, None).unwrap();
669| 1| match result.exif {
670| 1| Some(PngExifSource::EXif(range)) => {
671| 1| assert_eq!(&buf[range], tiff_exif);
672| | }
673| 0| other => panic!("expected EXif (eXIf wins), got {:?}", other),
674| | }
675| 1| }
676| |
677| | #[test]
678| 1| fn extract_chunks_invalid_legacy_silently_dropped() {
679| | // Malformed value: not valid hex.
680| 1| let mut data = Vec::new();
681| 1| data.extend_from_slice(b"Raw profile type exif");
682| 1| data.push(0);
683| 1| data.extend_from_slice(b"not hex at all\nzzz");
684| 1| let chunks = vec![build_chunk(b"tEXt", &data)];
685| 1| let buf = build_png_with_chunks(&chunks);
686| |
687| 1| let result = extract_chunks(&buf, None).unwrap();
688| 1| assert!(result.exif.is_none(), "malformed legacy must be dropped");
689| | // Raw tEXt entry still preserved.
690| 1| assert_eq!(result.text_chunks.len(), 1);
691| 1| }
692| |}
/home/min/dev/nom-exif/src/raf.rs:
1| |use nom::{
2| | bytes::streaming::{tag, take},
3| | number, IResult,
4| |};
5| |
6| |use crate::{jpeg, utils::parse_cstr};
7| |
8| |const MAGIC: &[u8] = b"FUJIFILMCCD-RAW ";
9| |
10| |/// Refer to: [Fujifilm RAF](http://fileformats.archiveteam.org/wiki/Fujifilm_RAF)
11| |#[allow(unused)]
12| |pub struct RafInfo<'a> {
13| | pub version: &'a [u8],
14| | pub camera_num_id: &'a [u8],
15| | pub camera_string: String,
16| | pub directory_ver: &'a [u8],
17| | pub image_offset: u32,
18| | pub exif_data: Option<&'a [u8]>,
19| |}
20| |
21| |impl RafInfo<'_> {
22| 18| pub fn check(input: &[u8]) -> crate::Result<()> {
23| | // check magic
24| 18| let _ = nom::bytes::complete::tag(MAGIC)(input).map_err(|e| {
^15
25| 15| crate::error::nom_err_to_malformed(e, crate::error::MalformedKind::TiffHeader)
26| 15| })?;
27| 3| Ok(())
28| 18| }
29| |
30| 4| pub(crate) fn parse(input: &[u8]) -> IResult<&[u8], RafInfo<'_>> {
31| | // magic
32| 4| let (remain, _) = tag(MAGIC)(input)?;
^0
33| 4| let (remain, version) = take(4usize)(remain)?;
^0
34| 4| let (remain, camera_num_id) = take(8usize)(remain)?;
^0
35| 4| let (remain, camera_string) = take(32usize)(remain)?;
^0
36| 4| let (remain, directory_ver) = take(4usize)(remain)?;
^0
37| |
38| | // 20 bytes unknown
39| 4| let (remain, _) = take(20usize)(remain)?;
^0
40| |
41| 4| let (remain, image_offset) = number::streaming::be_u32(remain)?;
^0
42| |
43| | // skip to image_offset
44| 4| let skip_n = image_offset
45| 4| .checked_sub((input.len() - remain.len()) as u32)
46| 4| .ok_or_else(|| {
^0
47| 0| nom::Err::Failure(nom::error::make_error(remain, nom::error::ErrorKind::Fail))
48| 0| })?;
49| 4| let (remain, _) = take(skip_n)(remain)?;
^0
50| |
51| | // parse as a JPEG
52| 4| jpeg::check_jpeg(remain).map_err(|_| {
^0
53| 0| nom::Err::Failure(nom::error::make_error(remain, nom::error::ErrorKind::Fail))
54| 0| })?;
55| 4| let (remain, exif_data) = jpeg::extract_exif_data(remain)?;
^3 ^3 ^1
56| |
57| 3| let (_, camera_string) = parse_cstr(camera_string)?;
^0
58| |
59| 3| Ok((
60| 3| remain,
61| 3| RafInfo {
62| 3| version,
63| 3| camera_num_id,
64| 3| camera_string,
65| 3| directory_ver,
66| 3| image_offset,
67| 3| exif_data,
68| 3| },
69| 3| ))
70| 4| }
71| |}
72| |
73| |#[cfg(test)]
74| |mod tests {
75| | use std::{fs::File, io::Write, path::Path};
76| |
77| | use test_case::case;
78| |
79| | use crate::testkit::read_sample;
80| |
81| | use super::*;
82| |
83| | #[case("fujifilm_x_t1_01.raf.meta")]
84| 1| fn test_check_raf(path: &str) {
85| 1| let data = read_sample(path).unwrap();
86| 1| RafInfo::check(&data).unwrap();
87| 1| }
88| |
89| | // #[case("fujifilm_x_t1_01.raf", b"0201", b"FF119503", "X-T1", 0x94)]
90| | #[case("fujifilm_x_t1_01.raf.meta", b"0201", b"FF119503", "X-T1", 0x94)]
91| 1| fn test_extract_exif(
92| 1| path: &str,
93| 1| version: &[u8],
94| 1| camera_num_id: &[u8],
95| 1| camera_string: &str,
96| 1| image_offset: u32,
97| 1| ) {
98| 1| let data = read_sample(path).unwrap();
99| 1| let (remain, raf) = RafInfo::parse(&data).unwrap();
100| 1| assert_eq!(raf.version, version);
101| 1| assert_eq!(raf.camera_num_id, camera_num_id);
102| 1| assert_eq!(raf.camera_string, camera_string);
103| 1| assert_eq!(raf.image_offset, image_offset);
104| 1| raf.exif_data.unwrap();
105| |
106| | // save header + exif_data
107| 1| let p = Path::new("./testdata").join("fujifilm_x_t1_01.raf.meta");
108| 1| if !p.exists() {
109| 0| let size = data.len() - remain.len();
110| 0| let mut f = File::create(p).unwrap();
111| 0| f.write_all(&data[..size]).unwrap();
112| 1| }
113| 1| }
114| |}
/home/min/dev/nom-exif/src/slice.rs:
1| |use std::ops::Range;
2| |
3| |pub trait SliceChecked {
4| | fn slice_checked(&self, range: Range<usize>) -> Option<&Self>;
5| |}
6| |
7| |impl<T> SliceChecked for [T] {
8| 15.2k| fn slice_checked(&self, range: Range<usize>) -> Option<&Self> {
9| 15.2k| if range.end <= self.len() {
10| 15.2k| Some(&self[range])
11| | } else {
12| 0| None
13| | }
14| 15.2k| }
15| |}
16| |
17| |pub trait SubsliceOffset {
18| | fn subslice_offset(&self, inner: &Self) -> Option<usize>;
19| |}
20| |
21| |pub trait SubsliceRange {
22| | fn subslice_in_range(&self, inner: &Self) -> Option<Range<usize>>;
23| |}
24| |
25| |impl<T> SubsliceOffset for [T] {
26| 237| fn subslice_offset(&self, inner: &Self) -> Option<usize> {
27| 237| let start = self.as_ptr() as usize;
28| 237| let inner_start = inner.as_ptr() as usize;
29| 237| if inner_start < start || inner_start > start.wrapping_add(self.len()) {
^99 ^99 ^99
30| 138| None
31| | } else {
32| 99| inner_start.checked_sub(start)
33| | }
34| 237| }
35| |}
36| |
37| |impl<T> SubsliceRange for [T]
38| |where
39| | [T]: SubsliceOffset,
40| |{
41| 229| fn subslice_in_range(&self, inner: &Self) -> Option<Range<usize>> {
42| 229| let offset = self.subslice_offset(inner)?;
^93 ^136
43| 93| let end = offset.checked_add(inner.len())?;
^0
44| 93| let start = self.as_ptr() as usize;
45| 93| if end > start + self.len() {
46| 0| None
47| | } else {
48| 93| Some(Range { start: offset, end })
49| | }
50| 229| }
51| |}
52| |
53| |#[cfg(test)]
54| |mod tests {
55| | use super::SubsliceOffset;
56| |
57| | #[test]
58| 1| fn subslice_offset() {
59| 1| let a = &[0u8];
60| 1| let v: Vec<u8> = vec![0, 1, 2, 3, 4, 5];
61| 1| let b = &[0u8];
62| |
63| 1| assert_eq!(v.subslice_offset(&v).unwrap(), 0);
64| 1| assert_eq!(v.subslice_offset(&v[1..2]).unwrap(), 1);
65| 1| assert_eq!(v.subslice_offset(&v[1..]).unwrap(), 1);
66| 1| assert_eq!(v.subslice_offset(&v[2..]).unwrap(), 2);
67| 1| assert_eq!(v.subslice_offset(&v[3..]).unwrap(), 3);
68| 1| assert_eq!(v.subslice_offset(&v[5..]).unwrap(), 5);
69| |
70| 1| assert!(v.subslice_offset(a).is_none());
71| 1| assert!(v.subslice_offset(b).is_none());
72| 1| }
73| |}
/home/min/dev/nom-exif/src/testkit.rs:
1| |use std::{fs::File, io::Read, path::Path};
2| |
3| |use crate::exif::Exif;
4| |use crate::exif::ExifTag::*;
5| |
6| 67|pub fn read_sample(path: &str) -> Result<Vec<u8>, std::io::Error> {
7| 67| let mut f = open_sample(path)?;
^0
8| 67| let mut buf = Vec::new();
9| 67| f.read_to_end(&mut buf)?;
^0
10| 67| Ok(buf)
11| 67|}
12| |
13| 127|pub fn open_sample(path: &str) -> Result<File, std::io::Error> {
14| 127| let p = Path::new(path);
15| 127| let p = if p.is_absolute() {
16| 0| p.to_path_buf()
17| | } else {
18| 127| Path::new("./testdata").join(p)
19| | };
20| 127| File::open(p)
21| 127|}
22| |
23| |#[allow(unused)]
24| 0|pub fn open_sample_w(path: &str) -> Result<File, std::io::Error> {
25| 0| let p = Path::new(path);
26| 0| let p = if p.is_absolute() {
27| 0| p.to_path_buf()
28| | } else {
29| 0| Path::new("./testdata").join(p)
30| | };
31| 0| File::create(p)
32| 0|}
33| |
34| 1|pub fn sorted_exif_entries(exif: &Exif) -> Vec<String> {
35| 1| let tags = [
36| 1| Make,
37| 1| Model,
38| 1| Orientation,
39| 1| ImageWidth,
40| 1| ImageHeight,
41| 1| ISOSpeedRatings,
42| 1| ShutterSpeedValue,
43| 1| ExposureTime,
44| 1| FNumber,
45| 1| ExifImageWidth,
46| 1| ExifImageHeight,
47| 1| DateTimeOriginal,
48| 1| CreateDate,
49| 1| ModifyDate,
50| 1| OffsetTimeOriginal,
51| 1| OffsetTime,
52| 1| GPSLatitudeRef,
53| 1| GPSLatitude,
54| 1| GPSLongitudeRef,
55| 1| GPSLongitude,
56| 1| GPSAltitudeRef,
57| 1| GPSAltitude,
58| 1| GPSVersionID,
59| 1| ExifOffset,
60| 1| GPSInfo,
61| 1| ImageDescription,
62| 1| XResolution,
63| 1| YResolution,
64| 1| ResolutionUnit,
65| 1| Software,
66| 1| HostComputer,
67| 1| WhitePoint,
68| 1| PrimaryChromaticities,
69| 1| YCbCrCoefficients,
70| 1| ReferenceBlackWhite,
71| 1| Copyright,
72| 1| ExposureProgram,
73| 1| SpectralSensitivity,
74| 1| OECF,
75| 1| SensitivityType,
76| 1| ExifVersion,
77| 1| ApertureValue,
78| 1| BrightnessValue,
79| 1| ExposureBiasValue,
80| 1| MaxApertureValue,
81| 1| SubjectDistance,
82| 1| MeteringMode,
83| 1| LightSource,
84| 1| Flash,
85| 1| FocalLength,
86| 1| SubjectArea,
87| 1| MakerNote,
88| 1| FlashPixVersion,
89| 1| ColorSpace,
90| 1| RelatedSoundFile,
91| 1| FlashEnergy,
92| 1| FocalPlaneXResolution,
93| 1| FocalPlaneYResolution,
94| 1| FocalPlaneResolutionUnit,
95| 1| SubjectLocation,
96| 1| ExposureIndex,
97| 1| SensingMethod,
98| 1| FileSource,
99| 1| SceneType,
100| 1| CFAPattern,
101| 1| CustomRendered,
102| 1| ExposureMode,
103| 1| WhiteBalanceMode,
104| 1| DigitalZoomRatio,
105| 1| FocalLengthIn35mmFilm,
106| 1| SceneCaptureType,
107| 1| GainControl,
108| 1| Contrast,
109| 1| Saturation,
110| 1| Sharpness,
111| 1| DeviceSettingDescription,
112| 1| SubjectDistanceRange,
113| 1| ImageUniqueID,
114| 1| CameraSerialNumber,
115| 1| LensSpecification,
116| 1| LensMake,
117| 1| LensModel,
118| 1| LensSerialNumber,
119| 1| Gamma,
120| 1| GPSTimeStamp,
121| 1| GPSSatellites,
122| 1| GPSStatus,
123| 1| GPSMeasureMode,
124| 1| GPSDOP,
125| 1| GPSSpeedRef,
126| 1| GPSSpeed,
127| 1| GPSTrackRef,
128| 1| GPSTrack,
129| 1| GPSImgDirectionRef,
130| 1| GPSImgDirection,
131| 1| GPSMapDatum,
132| 1| GPSDestLatitudeRef,
133| 1| GPSDestLatitude,
134| 1| GPSDestLongitudeRef,
135| 1| GPSDestLongitude,
136| 1| GPSDestBearingRef,
137| 1| GPSDestBearing,
138| 1| GPSDestDistanceRef,
139| 1| GPSDestDistance,
140| 1| GPSProcessingMethod,
141| 1| GPSAreaInformation,
142| 1| GPSDateStamp,
143| 1| GPSDifferential,
144| 1| ];
145| 1| let mut entries = tags
146| 1| .iter()
147| 108| .filter_map(|tag| exif.get(*tag).map(|v| format!("{} » {}", tag, v)))
^1 ^41
148| 1| .collect::<Vec<_>>();
149| 1| entries.sort();
150| |
151| 1| entries
152| 1|}
/home/min/dev/nom-exif/src/utils.rs:
1| |use nom::{combinator::map_res, IResult, Parser};
2| |
3| 1.01k|pub(crate) fn parse_cstr(input: &[u8]) -> IResult<&[u8], String> {
4| 1.01k| let (remain, s) = map_res(
^1.00k ^1.00k
5| 1.64k| nom::bytes::streaming::take_till(|b| b == 0),
^1.01k
6| 1.00k| |bs: &[u8]| {
7| 1.00k| if bs.is_empty() {
8| 972| Ok("".to_owned())
9| | } else {
10| 37| String::from_utf8(bs.to_vec())
11| | }
12| 1.00k| },
13| | )
14| 1.01k| .parse(input)?;
^2
15| |
16| | // consumes the zero byte
17| 1.00k| Ok((&remain[1..], s)) // Safe-slice
18| 1.01k|}
19| |
20| |#[cfg(test)]
21| |mod tests {
22| | use super::*;
23| | use test_case::case;
24| |
25| | #[case(b"", None)]
26| | #[case(b"\0", Some(""))]
27| | #[case(b"h\0", Some("h"))]
28| | #[case(b"hello\0", Some("hello"))]
29| | #[case(b"hello", None)]
30| 5| fn test_check_raf(data: &[u8], expect: Option<&str>) {
31| 5| let res = parse_cstr(data);
32| 5| match expect {
33| 3| Some(s) => assert_eq!(res.unwrap().1, s),
34| 2| None => {
35| 2| res.unwrap_err();
36| 2| }
37| | }
38| 5| }
39| |}
/home/min/dev/nom-exif/src/values.rs:
1| |use std::{
2| | fmt::{Display, LowerHex},
3| | string::FromUtf8Error,
4| |};
5| |
6| |use chrono::{DateTime, FixedOffset, NaiveDateTime};
7| |
8| |use nom::{multi::many_m_n, number::Endianness, AsChar, Parser};
9| |#[cfg(feature = "serde")]
10| |use serde::{Deserialize, Serialize, Serializer};
11| |
12| |use crate::{error::EntryError, ExifTag};
13| |
14| |/// EXIF datetime value with timezone awareness preserved.
15| |#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16| |pub enum ExifDateTime {
17| | /// Original value carried a timezone (e.g. assembled with `OffsetTimeOriginal`).
18| | Aware(DateTime<FixedOffset>),
19| | /// Original value had no timezone (raw `DateTime` tag).
20| | Naive(NaiveDateTime),
21| |}
22| |
23| |impl ExifDateTime {
24| | /// Returns the timezone-aware form only when the original value carried one.
25| 2| pub fn aware(&self) -> Option<DateTime<FixedOffset>> {
26| 2| match self {
27| 1| ExifDateTime::Aware(dt) => Some(*dt),
28| 1| ExifDateTime::Naive(_) => None,
29| | }
30| 2| }
31| |
32| | /// Always returns a `NaiveDateTime` — strips the timezone if present.
33| 2| pub fn into_naive(self) -> NaiveDateTime {
34| 2| match self {
35| 1| ExifDateTime::Aware(dt) => dt.naive_local(),
36| 1| ExifDateTime::Naive(ndt) => ndt,
37| | }
38| 2| }
39| |
40| | /// If naive, attaches `fallback`; if already aware, returns the original offset.
41| 2| pub fn or_offset(self, fallback: FixedOffset) -> DateTime<FixedOffset> {
42| 2| match self {
43| 1| ExifDateTime::Aware(dt) => dt,
44| 1| ExifDateTime::Naive(ndt) => ndt
45| 1| .and_local_timezone(fallback)
46| 1| .single()
47| 1| .unwrap_or_else(|| ndt.and_utc().with_timezone(&fallback)),
^0 ^0 ^0
48| | }
49| 2| }
50| |}
51| |
52| |/// Represent a parsed entry value.
53| |#[derive(Debug, Clone, PartialEq)]
54| |#[non_exhaustive]
55| |pub enum EntryValue {
56| | Text(String),
57| | URational(URational),
58| | IRational(IRational),
59| |
60| | U8(u8),
61| | U16(u16),
62| | U32(u32),
63| | U64(u64),
64| |
65| | I8(i8),
66| | I16(i16),
67| | I32(i32),
68| | I64(i64),
69| |
70| | F32(f32),
71| | F64(f64),
72| |
73| | DateTime(DateTime<FixedOffset>),
74| | NaiveDateTime(NaiveDateTime),
75| | Undefined(Vec<u8>),
76| |
77| | URationalArray(Vec<URational>),
78| | IRationalArray(Vec<IRational>),
79| |
80| | U8Array(Vec<u8>),
81| | U16Array(Vec<u16>),
82| | U32Array(Vec<u32>),
83| |}
84| |
85| |#[derive(Clone, Debug, PartialEq, Eq)]
86| |pub(crate) struct EntryData<'a> {
87| | pub endian: Endianness,
88| | pub tag: u16,
89| | pub data: &'a [u8],
90| | pub data_format: DataFormat,
91| | pub components_num: u32,
92| |}
93| |
94| |impl EntryData<'_> {
95| | // Ensure that the returned Vec is not empty.
96| 2.65k| fn try_as_rationals<T: TryFromBytes + Copy>(&self) -> Result<Vec<Rational<T>>, EntryError> {
97| 2.65k| if self.components_num == 0 {
98| 0| return Err(EntryError::InvalidShape {
99| 0| format: self.data_format as u16,
100| 0| count: self.components_num,
101| 0| });
102| 2.65k| }
103| |
104| 2.65k| let mut vec = Vec::with_capacity(self.components_num as usize);
105| 3.58k| for i in 0..self.components_num {
^2.65k
106| 3.58k| let rational = decode_rational::<T>(&self.data[i as usize * 8..], self.endian)?;
^3.58k ^1
107| 3.58k| vec.push(rational);
108| | }
109| 2.65k| Ok(vec)
110| 2.65k| }
111| |}
112| |
113| |impl EntryValue {
114| | /// Parse an IFD entry value.
115| | ///
116| | /// # Structure of IFD Entry
117| | ///
118| | /// ```txt
119| | /// | 2 | 2 | 4 | 4 |
120| | /// | tag | data format | components num | data (value or offset) |
121| | /// ```
122| | ///
123| | /// # Data size
124| | ///
125| | /// `data_size = components_num * bytes_per_component`
126| | ///
127| | /// `bytes_per_component` is determined by tag & data format.
128| | ///
129| | /// If data_size > 4, then the data area of entry stores the offset of the
130| | /// value, not the value itself.
131| | ///
132| | /// # Data format
133| | ///
134| | /// See: [`DataFormat`].
135| 10.3k| pub(crate) fn parse(entry: &EntryData, tz: &Option<String>) -> Result<EntryValue, EntryError> {
136| 10.3k| if entry.data.is_empty() {
137| 1| return Err(EntryError::InvalidShape {
138| 1| format: entry.data_format as u16,
139| 1| count: entry.components_num,
140| 1| });
141| 10.3k| }
142| |
143| 10.3k| let endian = entry.endian;
144| 10.3k| let tag = entry.tag;
145| 10.3k| let data_format = entry.data_format;
146| 10.3k| let data = entry.data;
147| 10.3k| let components_num = entry.components_num;
148| |
149| 10.3k| if data.is_empty() || components_num == 0 {
150| 12| return Ok(EntryValue::variant_default(data_format));
151| 10.3k| }
152| |
153| 10.3k| let exif_tag = ExifTag::from_code(tag);
154| 10.3k| if let Some(tag) = exif_tag {
^10.2k
155| 10.2k| if tag == ExifTag::DateTimeOriginal
156| 10.0k| || tag == ExifTag::CreateDate
157| 9.87k| || tag == ExifTag::ModifyDate
158| | {
159| 508| let s = get_cstr(data).map_err(|_| EntryError::InvalidValue("invalid utf-8"))?;
^0
160| |
161| 508| let t = if let Some(tz) = tz {
^387 ^432
162| 432| let tz = repair_tz_str(tz);
163| 432| let ss = format!("{s} {tz}");
164| 432| match DateTime::parse_from_str(&ss, "%Y:%m:%d %H:%M:%S %z") {
165| 387| Ok(t) => t,
166| 45| Err(_) => return Ok(EntryValue::NaiveDateTime(parse_naive_time(s)?)),
^0
167| | }
168| | } else {
169| 76| return Ok(EntryValue::NaiveDateTime(parse_naive_time(s)?));
^0
170| | };
171| |
172| 387| return Ok(EntryValue::DateTime(t));
173| 9.72k| }
174| 150| }
175| |
176| 9.87k| match data_format {
177| 148| DataFormat::U8 => match components_num {
178| 145| 1 => Ok(Self::U8(data[0])),
179| 3| _ => Ok(Self::U8Array(data.into())),
180| | },
181| | DataFormat::Text => Ok(EntryValue::Text(
182| 1.95k| get_cstr(data).map_err(|_| EntryError::InvalidValue("invalid utf-8"))?,
^0
183| | )),
184| | DataFormat::U16 => {
185| 3.10k| if components_num == 1 {
186| 3.07k| Ok(Self::U16(u16::try_from_bytes(data, endian)?))
^1
187| | } else {
188| 32| let (_, v) = many_m_n::<_, nom::error::Error<_>, _>(
^31
189| 32| components_num as usize,
190| 32| components_num as usize,
191| 32| nom::number::complete::u16(endian),
192| 32| )
193| 32| .parse(data)
194| 32| .map_err(|_| EntryError::InvalidShape {
195| 1| format: DataFormat::U16 as u16,
196| 1| count: components_num,
197| 1| })?;
198| 31| Ok(Self::U16Array(v))
199| | }
200| | }
201| | DataFormat::U32 => {
202| 1.00k| if components_num == 1 {
203| 983| Ok(Self::U32(u32::try_from_bytes(data, endian)?))
^0
204| | } else {
205| 24| let (_, v) = many_m_n::<_, nom::error::Error<_>, _>(
^23
206| 24| components_num as usize,
207| 24| components_num as usize,
208| 24| nom::number::complete::u32(endian),
209| 24| )
210| 24| .parse(data)
211| 24| .map_err(|_| EntryError::InvalidShape {
212| 1| format: DataFormat::U32 as u16,
213| 1| count: components_num,
214| 1| })?;
215| 23| Ok(Self::U32Array(v))
216| | }
217| | }
218| | DataFormat::URational => {
219| 1.93k| let rationals = entry.try_as_rationals::<u32>()?;
^1.93k ^1
220| 1.93k| if rationals.len() == 1 {
221| 1.71k| Ok(Self::URational(rationals[0]))
222| | } else {
223| 214| Ok(Self::URationalArray(rationals))
224| | }
225| | }
226| 2| DataFormat::I8 => match components_num {
227| 1| 1 => Ok(Self::I8(data[0] as i8)),
228| 1| x => Err(EntryError::InvalidShape {
229| 1| format: data_format as u16,
230| 1| count: x,
231| 1| }),
232| | },
233| 985| DataFormat::Undefined => Ok(Self::Undefined(data.to_vec())),
234| 2| DataFormat::I16 => match components_num {
235| 1| 1 => Ok(Self::I16(i16::try_from_bytes(data, endian)?)),
^0
236| 1| x => Err(EntryError::InvalidShape {
237| 1| format: data_format as u16,
238| 1| count: x,
239| 1| }),
240| | },
241| 2| DataFormat::I32 => match components_num {
242| 1| 1 => Ok(Self::I32(i32::try_from_bytes(data, endian)?)),
^0
243| 1| x => Err(EntryError::InvalidShape {
244| 1| format: data_format as u16,
245| 1| count: x,
246| 1| }),
247| | },
248| | DataFormat::IRational => {
249| 721| let rationals = entry.try_as_rationals::<i32>()?;
^0
250| 721| if rationals.len() == 1 {
251| 488| Ok(Self::IRational(rationals[0]))
252| | } else {
253| 233| Ok(Self::IRationalArray(rationals))
254| | }
255| | }
256| 2| DataFormat::F32 => match components_num {
257| 1| 1 => Ok(Self::F32(f32::try_from_bytes(data, endian)?)),
^0
258| 1| x => Err(EntryError::InvalidShape {
259| 1| format: data_format as u16,
260| 1| count: x,
261| 1| }),
262| | },
263| 2| DataFormat::F64 => match components_num {
264| 1| 1 => Ok(Self::F64(f64::try_from_bytes(data, endian)?)),
^0
265| 1| x => Err(EntryError::InvalidShape {
266| 1| format: data_format as u16,
267| 1| count: x,
268| 1| }),
269| | },
270| | }
271| 10.3k| }
272| |
273| 12| fn variant_default(data_format: DataFormat) -> EntryValue {
274| 12| match data_format {
275| 1| DataFormat::U8 => Self::U8(0),
276| 1| DataFormat::Text => Self::Text(String::default()),
277| 1| DataFormat::U16 => Self::U16(0),
278| 1| DataFormat::U32 => Self::U32(0),
279| 1| DataFormat::URational => Self::URational(URational::default()),
280| 1| DataFormat::I8 => Self::I8(0),
281| 1| DataFormat::Undefined => Self::Undefined(Vec::default()),
282| 1| DataFormat::I16 => Self::I16(0),
283| 1| DataFormat::I32 => Self::I32(0),
284| 1| DataFormat::IRational => Self::IRational(IRational::default()),
285| 1| DataFormat::F32 => Self::F32(0.0),
286| 1| DataFormat::F64 => Self::F64(0.0),
287| | }
288| 12| }
289| |
290| 220| pub fn as_str(&self) -> Option<&str> {
291| 220| match self {
292| 219| EntryValue::Text(v) => Some(v),
293| 1| _ => None,
294| | }
295| 220| }
296| |
297| | /// EXIF datetime accessor.
298| | ///
299| | /// Returns `Some(ExifDateTime::Aware)` when the parsed value carried a
300| | /// timezone (e.g. composed with `OffsetTimeOriginal`); returns
301| | /// `Some(ExifDateTime::Naive)` for tags that ship without timezone info;
302| | /// returns `None` for non-datetime values.
303| | ///
304| | /// ```rust
305| | /// use nom_exif::*;
306| | /// use chrono::{DateTime, NaiveDateTime, FixedOffset};
307| | ///
308| | /// let dt = DateTime::parse_from_str("2023-07-09T20:36:33+08:00", "%+").unwrap();
309| | /// let ev = EntryValue::DateTime(dt);
310| | /// assert!(matches!(ev.as_datetime(), Some(ExifDateTime::Aware(_))));
311| | ///
312| | /// let ndt = NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap();
313| | /// let ev = EntryValue::NaiveDateTime(ndt);
314| | /// assert!(matches!(ev.as_datetime(), Some(ExifDateTime::Naive(_))));
315| | /// ```
316| 4| pub fn as_datetime(&self) -> Option<ExifDateTime> {
317| 4| match self {
318| 2| EntryValue::DateTime(v) => Some(ExifDateTime::Aware(*v)),
319| 1| EntryValue::NaiveDateTime(v) => Some(ExifDateTime::Naive(*v)),
320| 1| _ => None,
321| | }
322| 4| }
323| |
324| 1| pub fn as_u8(&self) -> Option<u8> {
325| 1| match self {
326| 0| EntryValue::U8(v) => Some(*v),
327| 1| _ => None,
328| | }
329| 1| }
330| |
331| 0| pub fn as_i8(&self) -> Option<i8> {
332| 0| match self {
333| 0| EntryValue::I8(v) => Some(*v),
334| 0| _ => None,
335| | }
336| 0| }
337| |
338| 0| pub fn as_u16(&self) -> Option<u16> {
339| 0| match self {
340| 0| EntryValue::U16(v) => Some(*v),
341| 0| _ => None,
342| | }
343| 0| }
344| |
345| 0| pub fn as_i16(&self) -> Option<i16> {
346| 0| match self {
347| 0| EntryValue::I16(v) => Some(*v),
348| 0| _ => None,
349| | }
350| 0| }
351| |
352| 0| pub fn as_u64(&self) -> Option<u64> {
353| 0| match self {
354| 0| EntryValue::U64(v) => Some(*v),
355| 0| _ => None,
356| | }
357| 0| }
358| |
359| 79| pub fn as_u32(&self) -> Option<u32> {
360| 79| match self {
361| 79| EntryValue::U32(v) => Some(*v),
362| 0| _ => None,
363| | }
364| 79| }
365| |
366| 0| pub fn as_i32(&self) -> Option<i32> {
367| 0| match self {
368| 0| EntryValue::I32(v) => Some(*v),
369| 0| _ => None,
370| | }
371| 0| }
372| |
373| 2| pub fn as_i64(&self) -> Option<i64> {
374| 2| if let EntryValue::I64(v) = self {
^1
375| 1| Some(*v)
376| | } else {
377| 1| None
378| | }
379| 2| }
380| |
381| 2| pub fn as_f64(&self) -> Option<f64> {
382| 2| if let EntryValue::F64(v) = self {
^1
383| 1| Some(*v)
384| | } else {
385| 1| None
386| | }
387| 2| }
388| |
389| | /// Widen any integer EntryValue to i64. Returns None for non-integer values
390| | /// (and for U64 values exceeding i64::MAX).
391| 7| pub fn try_as_integer(&self) -> Option<i64> {
392| 7| match self {
393| 2| EntryValue::U8(v) => Some(*v as i64),
394| 0| EntryValue::U16(v) => Some(*v as i64),
395| 1| EntryValue::U32(v) => Some(*v as i64),
396| 1| EntryValue::U64(v) => i64::try_from(*v).ok(),
397| 0| EntryValue::I8(v) => Some(*v as i64),
398| 0| EntryValue::I16(v) => Some(*v as i64),
399| 1| EntryValue::I32(v) => Some(*v as i64),
400| 0| EntryValue::I64(v) => Some(*v),
401| 2| _ => None,
402| | }
403| 7| }
404| |
405| | /// Widen any numeric EntryValue (integer / rational / float) to f64.
406| | /// Rationals with denominator=0 return None.
407| 5| pub fn try_as_float(&self) -> Option<f64> {
408| 5| match self {
409| 1| EntryValue::F32(v) => Some(*v as f64),
410| 0| EntryValue::F64(v) => Some(*v),
411| 2| EntryValue::URational(v) => v.to_f64(),
412| 0| EntryValue::IRational(v) => v.to_f64(),
413| 2| v => v.try_as_integer().map(|x| x as f64),
^1
414| | }
415| 5| }
416| |
417| 0| pub fn as_urational(&self) -> Option<URational> {
418| 0| if let EntryValue::URational(v) = self {
419| 0| Some(*v)
420| | } else {
421| 0| None
422| | }
423| 0| }
424| |
425| 0| pub fn as_irational(&self) -> Option<IRational> {
426| 0| if let EntryValue::IRational(v) = self {
427| 0| Some(*v)
428| | } else {
429| 0| None
430| | }
431| 0| }
432| |
433| 1| pub fn as_urational_slice(&self) -> Option<&[URational]> {
434| 1| if let EntryValue::URationalArray(v) = self {
435| 1| Some(v)
436| | } else {
437| 0| None
438| | }
439| 1| }
440| |
441| 0| pub fn as_irational_slice(&self) -> Option<&[IRational]> {
442| 0| if let EntryValue::IRationalArray(v) = self {
443| 0| Some(v)
444| | } else {
445| 0| None
446| | }
447| 0| }
448| |
449| 39| pub fn as_u8_slice(&self) -> Option<&[u8]> {
450| 39| if let EntryValue::U8Array(v) = self {
^38
451| 38| Some(v)
452| | } else {
453| 1| None
454| | }
455| 39| }
456| |
457| 1| pub fn as_u16_slice(&self) -> Option<&[u16]> {
458| 1| if let EntryValue::U16Array(v) = self {
459| 1| Some(v)
460| | } else {
461| 0| None
462| | }
463| 1| }
464| |
465| 1| pub fn as_u32_slice(&self) -> Option<&[u32]> {
466| 1| if let EntryValue::U32Array(v) = self {
467| 1| Some(v)
468| | } else {
469| 0| None
470| | }
471| 1| }
472| |
473| 1| pub fn as_undefined(&self) -> Option<&[u8]> {
474| 1| if let EntryValue::Undefined(v) = self {
475| 1| Some(v)
476| | } else {
477| 0| None
478| | }
479| 1| }
480| |}
481| |
482| |// Convert time components to EntryValue
483| |impl From<(NaiveDateTime, Option<FixedOffset>)> for EntryValue {
484| 2| fn from(value: (NaiveDateTime, Option<FixedOffset>)) -> Self {
485| 2| if let Some(offset) = value.1 {
^1
486| 1| EntryValue::DateTime(value.0.and_local_timezone(offset).unwrap())
487| | } else {
488| 1| EntryValue::NaiveDateTime(value.0)
489| | }
490| 2| }
491| |}
492| |
493| 121|fn parse_naive_time(s: String) -> Result<NaiveDateTime, EntryError> {
494| 121| NaiveDateTime::parse_from_str(&s, "%Y:%m:%d %H:%M:%S")
495| 121| .map_err(|_| EntryError::InvalidValue("invalid time format"))
496| 121|}
497| |
498| 432|fn repair_tz_str(tz: &str) -> String {
499| 432| if let Some(idx) = tz.find(":") {
^387
500| 387| if tz[idx..].len() < 3 {
501| | // Add tailed 0
502| 0| return format!("{tz}0");
503| 387| }
504| 45| }
505| 432| tz.into()
506| 432|}
507| |
508| |/// # Exif Data format
509| |///
510| |/// ```txt
511| |/// | Value | 1 | 2 | 3 | 4 | 5 | 6 |
512| |/// |-----------------+---------------+---------------+----------------+-----------------+-------------------+--------------|
513| |/// | Format | unsigned byte | ascii strings | unsigned short | unsigned long | unsigned rational | signed byte |
514| |/// | Bytes/component | 1 | 1 | 2 | 4 | 8 | 1 |
515| |///
516| |/// | Value | 7 | 8 | 9 | 10 | 11 | 12 |
517| |/// |-----------------+---------------+---------------+----------------+-----------------+-------------------+--------------|
518| |/// | Format | undefined | signed short | signed long | signed rational | single float | double float |
519| |/// | Bytes/component | 1 | 2 | 4 | 8 | 4 | 8 |
520| |/// ```
521| |///
522| |/// See: [Exif](https://www.media.mit.edu/pia/Research/deepview/exif.html).
523| |#[repr(u16)]
524| |#[derive(Clone, Copy, Debug, PartialEq, Eq)]
525| |#[allow(unused)]
526| |pub(crate) enum DataFormat {
527| | U8 = 1,
528| | Text = 2,
529| | U16 = 3,
530| | U32 = 4,
531| | URational = 5,
532| | I8 = 6,
533| | Undefined = 7,
534| | I16 = 8,
535| | I32 = 9,
536| | IRational = 10,
537| | F32 = 11,
538| | F64 = 12,
539| |}
540| |
541| |impl DataFormat {
542| 10.8k| pub fn component_size(&self) -> usize {
543| 10.8k| match self {
544| 3.60k| Self::U8 | Self::I8 | Self::Text | Self::Undefined => 1,
545| 3.15k| Self::U16 | Self::I16 => 2,
546| 1.41k| Self::U32 | Self::I32 | Self::F32 => 4,
547| 2.66k| Self::URational | Self::IRational | Self::F64 => 8,
548| | }
549| 10.8k| }
550| |}
551| |
552| |impl TryFrom<u16> for DataFormat {
553| | /// On failure, returns the unrecognized format value so call sites can
554| | /// pair it with the entry's `count` and build a richer `EntryError`.
555| | type Error = u16;
556| 10.8k| fn try_from(v: u16) -> Result<Self, Self::Error> {
557| 10.8k| if v >= Self::U8 as u16 && v <= Self::F64 as u16 {
^10.8k
558| 10.8k| Ok(unsafe { std::mem::transmute::<u16, Self>(v) })
559| | } else {
560| 4| Err(v)
561| | }
562| 10.8k| }
563| |}
564| |
565| |#[cfg(feature = "serde")]
566| |impl Serialize for EntryValue {
567| | /// Structured per-variant serialization. Numeric variants serialize as
568| | /// JSON numbers, [`EntryValue::Text`] / [`EntryValue::DateTime`] /
569| | /// [`EntryValue::NaiveDateTime`] as strings, rationals as
570| | /// `{"numerator", "denominator"}` objects (and arrays thereof),
571| | /// [`EntryValue::Undefined`] as a continuous lowercase hex string with no
572| | /// truncation, and integer arrays as JSON arrays of numbers.
573| 0| fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
574| 0| where
575| 0| S: Serializer,
576| | {
577| | use serde::ser::SerializeSeq;
578| 0| match self {
579| 0| EntryValue::Text(s) => serializer.serialize_str(s),
580| 0| EntryValue::URational(r) => r.serialize(serializer),
581| 0| EntryValue::IRational(r) => r.serialize(serializer),
582| 0| EntryValue::U8(v) => serializer.serialize_u8(*v),
583| 0| EntryValue::U16(v) => serializer.serialize_u16(*v),
584| 0| EntryValue::U32(v) => serializer.serialize_u32(*v),
585| 0| EntryValue::U64(v) => serializer.serialize_u64(*v),
586| 0| EntryValue::I8(v) => serializer.serialize_i8(*v),
587| 0| EntryValue::I16(v) => serializer.serialize_i16(*v),
588| 0| EntryValue::I32(v) => serializer.serialize_i32(*v),
589| 0| EntryValue::I64(v) => serializer.serialize_i64(*v),
590| 0| EntryValue::F32(v) => serializer.serialize_f32(*v),
591| 0| EntryValue::F64(v) => serializer.serialize_f64(*v),
592| 0| EntryValue::DateTime(t) => serializer.serialize_str(&t.to_rfc3339()),
593| 0| EntryValue::NaiveDateTime(t) => {
594| 0| serializer.serialize_str(&t.format("%Y-%m-%d %H:%M:%S").to_string())
595| | }
596| 0| EntryValue::Undefined(bytes) => {
597| 0| let mut hex = String::with_capacity(bytes.len() * 2);
598| 0| for b in bytes {
599| | use std::fmt::Write;
600| 0| let _ = write!(&mut hex, "{b:02x}");
601| | }
602| 0| serializer.serialize_str(&hex)
603| | }
604| 0| EntryValue::URationalArray(v) => {
605| 0| let mut seq = serializer.serialize_seq(Some(v.len()))?;
606| 0| for r in v {
607| 0| seq.serialize_element(r)?;
608| | }
609| 0| seq.end()
610| | }
611| 0| EntryValue::IRationalArray(v) => {
612| 0| let mut seq = serializer.serialize_seq(Some(v.len()))?;
613| 0| for r in v {
614| 0| seq.serialize_element(r)?;
615| | }
616| 0| seq.end()
617| | }
618| 0| EntryValue::U8Array(v) => v.serialize(serializer),
619| 0| EntryValue::U16Array(v) => v.serialize(serializer),
620| 0| EntryValue::U32Array(v) => v.serialize(serializer),
621| | }
622| 0| }
623| |}
624| |
625| |// impl std::fmt::Debug for EntryValue {
626| |// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
627| |// Display::fmt(self, f)
628| |// }
629| |// }
630| |
631| |impl Display for EntryValue {
632| 3.13k| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
633| 3.13k| match self {
634| 521| EntryValue::Text(v) => v.fmt(f),
635| 429| EntryValue::URational(v) => format!(
636| | "{}/{} ({:.04})",
637| 429| v.numerator(),
638| 429| v.denominator(),
639| 429| v.numerator() as f64 / v.denominator() as f64
640| | )
641| 429| .fmt(f),
642| 185| EntryValue::IRational(v) => format!(
643| | "{}/{} ({:.04})",
644| 185| v.numerator(),
645| 185| v.denominator(),
646| 185| v.numerator() as f64 / v.denominator() as f64
647| | )
648| 185| .fmt(f),
649| 389| EntryValue::U32(v) => Display::fmt(&v, f),
650| 1.04k| EntryValue::U16(v) => Display::fmt(&v, f),
651| 2| EntryValue::U64(v) => Display::fmt(&v, f),
652| 1| EntryValue::I16(v) => Display::fmt(&v, f),
653| 1| EntryValue::I32(v) => Display::fmt(&v, f),
654| 1| EntryValue::I64(v) => Display::fmt(&v, f),
655| 1| EntryValue::F32(v) => Display::fmt(&v, f),
656| 1| EntryValue::F64(v) => Display::fmt(&v, f),
657| 5| EntryValue::U8(v) => Display::fmt(&v, f),
658| 1| EntryValue::I8(v) => Display::fmt(&v, f),
659| 154| EntryValue::DateTime(v) => Display::fmt(&v.to_rfc3339(), f),
660| 40| EntryValue::NaiveDateTime(v) => Display::fmt(&v.format("%Y-%m-%d %H:%M:%S"), f),
661| 293| EntryValue::Undefined(v) => fmt_undefined(v, f),
662| 23| EntryValue::URationalArray(v) => {
663| 23| format!("URationalArray[{}]", rationals_to_string::<u32>(v)).fmt(f)
664| | }
665| 9| EntryValue::IRationalArray(v) => {
666| 9| format!("IRationalArray[{}]", rationals_to_string::<i32>(v)).fmt(f)
667| | }
668| 1| EntryValue::U8Array(v) => fmt_array_to_string("U8Array", v, f),
669| 15| EntryValue::U32Array(v) => fmt_array_to_string("U32Array", v, f),
670| 15| EntryValue::U16Array(v) => fmt_array_to_string("U16Array", v, f),
671| | }
672| 3.13k| }
673| |}
674| |
675| 31|pub(crate) fn fmt_array_to_string<T: Display + LowerHex>(
676| 31| name: &str,
677| 31| v: &[T],
678| 31| f: &mut std::fmt::Formatter,
679| 31|) -> Result<(), std::fmt::Error> {
680| 31| array_to_string(name, v).fmt(f)
681| 31|}
682| |
683| 31|pub(crate) fn array_to_string<T: Display + LowerHex>(name: &str, v: &[T]) -> String {
684| 31| let s = v
685| 31| .iter()
686| 622| .map(|x| format!("0x{x:02x}"))
^31
687| 31| .collect::<Vec<String>>()
688| 31| .join(", ");
689| 31| format!("{name}[{s}]")
690| 31|}
691| |
692| 32|fn rationals_to_string<T>(rationals: &[Rational<T>]) -> String
693| 32|where
694| 32| T: Display + Into<f64> + Copy,
695| |{
696| 32| rationals
697| 32| .iter()
698| 111| .map(|x| {
^32
699| 111| format!(
700| | "{}/{} ({:.04})",
701| 111| x.numerator(),
702| 111| x.denominator(),
703| 111| x.numerator().into() / x.denominator().into()
704| | )
705| 111| })
706| 32| .collect::<Vec<String>>()
707| 32| .join(", ")
708| 32|}
709| |
710| |/// Render `EntryValue::Undefined` for human display.
711| |///
712| |/// All bytes printable ASCII (`0x20..=0x7E`) → quoted text, e.g. `"0220"`.
713| |/// Otherwise → continuous lowercase hex prefixed with `0x`, e.g. `0x01020300`.
714| |/// Empty → `0x`. The lossy `Undefined[0xNN, 0xNN, ..., ...]` rendering with
715| |/// the 9-element ellipsis cap from earlier versions is gone — callers that
716| |/// need a length cap should impose it at their layer.
717| 293|fn fmt_undefined(v: &[u8], f: &mut std::fmt::Formatter) -> std::fmt::Result {
718| 810| if !v.is_empty() && v.iter().all(|b| (0x20..=0x7e).contains(b)) {
^293 ^292 ^292
719| 160| let s = std::str::from_utf8(v).expect("ASCII subset is valid UTF-8");
720| 160| write!(f, "\"{s}\"")
721| | } else {
722| 133| f.write_str("0x")?;
^0
723| 22.8k| for b in v {
^133
724| 22.8k| write!(f, "{b:02x}")?;
^0
725| | }
726| 133| Ok(())
727| | }
728| 293|}
729| |
730| |impl From<DateTime<FixedOffset>> for EntryValue {
731| 11| fn from(value: DateTime<FixedOffset>) -> Self {
732| 11| EntryValue::DateTime(value)
733| 11| }
734| |}
735| |
736| |impl From<u8> for EntryValue {
737| 5| fn from(value: u8) -> Self {
738| 5| EntryValue::U8(value)
739| 5| }
740| |}
741| |impl From<u16> for EntryValue {
742| 1| fn from(value: u16) -> Self {
743| 1| EntryValue::U16(value)
744| 1| }
745| |}
746| |impl From<u32> for EntryValue {
747| 266| fn from(value: u32) -> Self {
748| 266| EntryValue::U32(value)
749| 266| }
750| |}
751| |impl From<u64> for EntryValue {
752| 135| fn from(value: u64) -> Self {
753| 135| EntryValue::U64(value)
754| 135| }
755| |}
756| |
757| |impl From<i8> for EntryValue {
758| 1| fn from(value: i8) -> Self {
759| 1| EntryValue::I8(value)
760| 1| }
761| |}
762| |impl From<i16> for EntryValue {
763| 1| fn from(value: i16) -> Self {
764| 1| EntryValue::I16(value)
765| 1| }
766| |}
767| |impl From<i32> for EntryValue {
768| 1| fn from(value: i32) -> Self {
769| 1| EntryValue::I32(value)
770| 1| }
771| |}
772| |impl From<i64> for EntryValue {
773| 5| fn from(value: i64) -> Self {
774| 5| EntryValue::I64(value)
775| 5| }
776| |}
777| |
778| |impl From<f32> for EntryValue {
779| 5| fn from(value: f32) -> Self {
780| 5| EntryValue::F32(value)
781| 5| }
782| |}
783| |impl From<f64> for EntryValue {
784| 1| fn from(value: f64) -> Self {
785| 1| EntryValue::F64(value)
786| 1| }
787| |}
788| |
789| |impl From<String> for EntryValue {
790| 20| fn from(value: String) -> Self {
791| 20| EntryValue::Text(value)
792| 20| }
793| |}
794| |
795| |impl From<&str> for EntryValue {
796| 19| fn from(value: &str) -> Self {
797| 19| value.to_owned().into()
798| 19| }
799| |}
800| |
801| |pub type URational = Rational<u32>;
802| |pub type IRational = Rational<i32>;
803| |
804| |#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
805| |#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
806| |pub struct Rational<T> {
807| | numerator: T,
808| | denominator: T,
809| |}
810| |
811| |impl<T: Copy> Rational<T> {
812| 4.45k| pub const fn new(numerator: T, denominator: T) -> Self {
813| 4.45k| Self {
814| 4.45k| numerator,
815| 4.45k| denominator,
816| 4.45k| }
817| 4.45k| }
818| |
819| 1.78k| pub const fn numerator(&self) -> T {
820| 1.78k| self.numerator
821| 1.78k| }
822| |
823| 1.78k| pub const fn denominator(&self) -> T {
824| 1.78k| self.denominator
825| 1.78k| }
826| |}
827| |
828| |impl<T: Copy + Into<f64> + PartialEq + Default> Rational<T> {
829| | /// Returns `None` if the denominator is zero.
830| | #[allow(clippy::wrong_self_convention)]
831| 78| pub fn to_f64(&self) -> Option<f64> {
832| 78| if self.denominator == T::default() {
833| 5| None
834| | } else {
835| 73| Some(self.numerator.into() / self.denominator.into())
836| | }
837| 78| }
838| |}
839| |
840| |impl TryFrom<IRational> for URational {
841| | type Error = crate::ConvertError;
842| 329| fn try_from(value: IRational) -> Result<Self, Self::Error> {
843| 329| let n = value.numerator();
844| 329| let d = value.denominator();
845| 329| if n < 0 || d < 0 {
^327
846| 3| Err(crate::ConvertError::NegativeRational)
847| | } else {
848| 326| Ok(URational::new(n as u32, d as u32))
849| | }
850| 329| }
851| |}
852| |
853| 2.46k|pub(crate) fn get_cstr(data: &[u8]) -> std::result::Result<String, FromUtf8Error> {
854| 2.46k| let vec = filter_zero(data);
855| 2.46k| if let Ok(s) = String::from_utf8(vec) {
^2.46k
856| 2.46k| Ok(s)
857| | } else {
858| 1| Ok(filter_zero(data)
859| 1| .into_iter()
860| 2| .map(|x| x.as_char())
^1
861| 1| .collect::<String>())
862| | }
863| 2.46k|}
864| |
865| 2.47k|pub(crate) fn filter_zero(data: &[u8]) -> Vec<u8> {
866| 2.47k| data.iter()
867| | // skip leading zero bytes
868| 2.73k| .skip_while(|b| **b == 0)
^2.47k
869| | // ignore tailing zero bytes, and all bytes after zero bytes
870| 67.1k| .take_while(|b| **b != 0)
^2.47k
871| 2.47k| .cloned()
872| 2.47k| .collect::<Vec<u8>>()
873| 2.47k|}
874| |
875| |pub(crate) trait TryFromBytes: Sized {
876| | fn try_from_bytes(bs: &[u8], endian: Endianness) -> Result<Self, EntryError>;
877| |}
878| |
879| |macro_rules! impl_try_from_bytes {
880| | ($type:ty) => {
881| | impl TryFromBytes for $type {
882| 11.2k| fn try_from_bytes(bs: &[u8], endian: Endianness) -> Result<Self, EntryError> {
883| 1| fn make_err<T>(available: usize) -> EntryError {
884| 1| EntryError::Truncated {
885| 1| needed: std::mem::size_of::<T>(),
886| 1| available,
887| 1| }
888| 1| }
889| 11.2k| match endian {
890| | Endianness::Big => {
891| 10.5k| let (int_bytes, _) = bs
^10.5k
892| 10.5k| .split_at_checked(std::mem::size_of::<Self>())
893| 10.5k| .ok_or_else(|| make_err::<Self>(bs.len()))?;
^1 ^1 ^1 ^1
894| 10.5k| Ok(Self::from_be_bytes(
895| 10.5k| int_bytes
896| 10.5k| .try_into()
897| 10.5k| .map_err(|_| make_err::<Self>(bs.len()))?,
^0 ^0 ^0 ^0
898| | ))
899| | }
900| | Endianness::Little => {
901| 718| let (int_bytes, _) = bs
902| 718| .split_at_checked(std::mem::size_of::<Self>())
903| 718| .ok_or_else(|| make_err::<Self>(bs.len()))?;
^0 ^0 ^0 ^0
904| 718| Ok(Self::from_le_bytes(
905| 718| int_bytes
906| 718| .try_into()
907| 718| .map_err(|_| make_err::<Self>(bs.len()))?,
^0 ^0 ^0 ^0
908| | ))
909| | }
910| 0| Endianness::Native => unimplemented!(),
911| | }
912| 11.2k| }
913| | }
914| | };
915| |}
916| |
917| |impl_try_from_bytes!(u32);
918| |impl_try_from_bytes!(i32);
919| |impl_try_from_bytes!(u16);
920| |impl_try_from_bytes!(i16);
921| |impl_try_from_bytes!(f32);
922| |impl_try_from_bytes!(f64);
923| |
924| 3.58k|pub(crate) fn decode_rational<T: TryFromBytes + Copy>(
925| 3.58k| data: &[u8],
926| 3.58k| endian: Endianness,
927| 3.58k|) -> Result<Rational<T>, EntryError> {
928| 3.58k| if data.len() < 8 {
929| 1| return Err(EntryError::Truncated {
930| 1| needed: 8,
931| 1| available: data.len(),
932| 1| });
933| 3.58k| }
934| |
935| 3.58k| let numerator = T::try_from_bytes(data, endian)?;
^0
936| 3.58k| let denominator = T::try_from_bytes(&data[4..], endian)?; // Safe-slice
^0
937| 3.58k| Ok(Rational::<T>::new(numerator, denominator))
938| 3.58k|}
939| |
940| |#[cfg(test)]
941| |mod tests {
942| | use chrono::{Local, NaiveDateTime, TimeZone};
943| |
944| | use super::*;
945| |
946| | #[test]
947| 1| fn test_parse_time() {
948| 1| let s = "2023:07:09 20:36:33";
949| 1| let t1 = NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S").unwrap();
950| 1| let t1 = Local.from_local_datetime(&t1).unwrap();
951| |
952| 1| let tz = t1.format("%:z").to_string();
953| |
954| 1| let s = format!("2023:07:09 20:36:33 {tz}");
955| 1| let t2 = DateTime::parse_from_str(&s, "%Y:%m:%d %H:%M:%S %z").unwrap();
956| |
957| 1| let t3 = t2.with_timezone(t2.offset());
958| |
959| 1| assert_eq!(t1, t2);
960| 1| assert_eq!(t1, t3);
961| 1| }
962| |
963| | #[test]
964| 1| fn test_iso_8601() {
965| 1| let s = "2023-11-02T19:58:34+0800";
966| 1| let t1 = DateTime::parse_from_str(s, "%+").unwrap();
967| |
968| 1| let s = "2023-11-02T19:58:34+08:00";
969| 1| let t2 = DateTime::parse_from_str(s, "%+").unwrap();
970| |
971| 1| let s = "2023-11-02T19:58:34.026490+08:00";
972| 1| let t3 = DateTime::parse_from_str(s, "%+").unwrap();
973| |
974| 1| assert_eq!(t1, t2);
975| 1| assert!(t3 > t2);
976| 1| }
977| |
978| | #[test]
979| 1| fn test_date_time_components() {
980| 1| let dt = DateTime::parse_from_str("2023-07-09T20:36:33+08:00", "%+").unwrap();
981| 1| let ndt =
982| 1| NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap();
983| 1| let offset = FixedOffset::east_opt(8 * 3600).unwrap();
984| |
985| 1| let ev = EntryValue::DateTime(dt);
986| 1| let edt = ev.as_datetime().unwrap();
987| 1| assert_eq!(edt.aware(), Some(dt));
988| 1| assert_eq!(edt.into_naive(), ndt);
989| 1| assert_eq!(edt.or_offset(FixedOffset::east_opt(0).unwrap()), dt);
990| |
991| 1| let ev = EntryValue::NaiveDateTime(ndt);
992| 1| let edt = ev.as_datetime().unwrap();
993| 1| assert_eq!(edt.aware(), None);
994| 1| assert_eq!(edt.into_naive(), ndt);
995| 1| assert_eq!(edt.or_offset(offset), dt);
996| 1| }
997| |
998| | #[test]
999| 1| fn rational_to_f64_normal() {
1000| 1| let r = URational::new(1, 2);
1001| 1| assert_eq!(r.numerator(), 1);
1002| 1| assert_eq!(r.denominator(), 2);
1003| 1| assert_eq!(r.to_f64(), Some(0.5));
1004| 1| }
1005| |
1006| | #[test]
1007| 1| fn rational_to_f64_zero_denominator() {
1008| 1| let r = URational::new(1, 0);
1009| 1| assert_eq!(r.to_f64(), None);
1010| |
1011| 1| let r = IRational::new(-1, 0);
1012| 1| assert_eq!(r.to_f64(), None);
1013| 1| }
1014| |
1015| | #[test]
1016| 1| fn rational_default() {
1017| 1| let r = URational::default();
1018| 1| assert_eq!(r.numerator(), 0);
1019| 1| assert_eq!(r.denominator(), 0);
1020| 1| }
1021| |
1022| | #[test]
1023| 1| fn irational_to_urational_positive() {
1024| 1| let i = IRational::new(3, 4);
1025| 1| let u: URational = i.try_into().unwrap();
1026| 1| assert_eq!(u, URational::new(3, 4));
1027| 1| }
1028| |
1029| | #[test]
1030| 1| fn irational_to_urational_negative_numerator() {
1031| 1| let i = IRational::new(-3, 4);
1032| 1| let err = URational::try_from(i).unwrap_err();
1033| 1| assert!(matches!(err, crate::ConvertError::NegativeRational));
^0
1034| 1| }
1035| |
1036| | #[test]
1037| 1| fn irational_to_urational_negative_denominator() {
1038| 1| let i = IRational::new(3, -4);
1039| 1| let err = URational::try_from(i).unwrap_err();
1040| 1| assert!(matches!(err, crate::ConvertError::NegativeRational));
^0
1041| 1| }
1042| |
1043| | #[test]
1044| 1| fn entry_value_as_i64_f64() {
1045| 1| assert_eq!(EntryValue::I64(-7).as_i64(), Some(-7));
1046| 1| assert_eq!(EntryValue::F64(2.5).as_f64(), Some(2.5));
1047| 1| assert_eq!(EntryValue::I32(7).as_i64(), None);
1048| 1| assert_eq!(EntryValue::F32(2.5).as_f64(), None);
1049| 1| }
1050| |
1051| | #[test]
1052| 1| fn entry_value_try_as_integer() {
1053| 1| assert_eq!(EntryValue::U8(7).try_as_integer(), Some(7));
1054| 1| assert_eq!(
1055| 1| EntryValue::U32(0xffff_ffff).try_as_integer(),
1056| | Some(0xffff_ffff_i64)
1057| | );
1058| 1| assert_eq!(EntryValue::I32(-7).try_as_integer(), Some(-7));
1059| 1| assert_eq!(EntryValue::U64(u64::MAX).try_as_integer(), None);
1060| 1| assert_eq!(EntryValue::Text("x".into()).try_as_integer(), None);
1061| 1| }
1062| |
1063| | #[test]
1064| 1| fn entry_value_try_as_float() {
1065| 1| assert_eq!(EntryValue::U8(7).try_as_float(), Some(7.0));
1066| 1| assert_eq!(EntryValue::F32(1.5).try_as_float(), Some(1.5));
1067| 1| assert_eq!(
1068| 1| EntryValue::URational(URational::new(1, 2)).try_as_float(),
1069| | Some(0.5)
1070| | );
1071| 1| assert_eq!(
1072| 1| EntryValue::URational(URational::new(1, 0)).try_as_float(),
1073| | None
1074| | );
1075| 1| assert_eq!(EntryValue::Text("x".into()).try_as_float(), None);
1076| 1| }
1077| |
1078| | #[test]
1079| 1| fn entry_value_slice_accessors() {
1080| 1| assert_eq!(
1081| 1| EntryValue::U8Array(vec![1, 2]).as_u8_slice(),
1082| 1| Some(&[1u8, 2][..])
1083| | );
1084| 1| assert_eq!(
1085| 1| EntryValue::U16Array(vec![1, 2]).as_u16_slice(),
1086| 1| Some(&[1u16, 2][..])
1087| | );
1088| 1| assert_eq!(
1089| 1| EntryValue::U32Array(vec![1, 2]).as_u32_slice(),
1090| 1| Some(&[1u32, 2][..])
1091| | );
1092| 1| assert_eq!(
1093| 1| EntryValue::Undefined(vec![1, 2]).as_undefined(),
1094| 1| Some(&[1u8, 2][..])
1095| | );
1096| 1| let r = URational::new(1, 2);
1097| 1| assert_eq!(
1098| 1| EntryValue::URationalArray(vec![r]).as_urational_slice(),
1099| 1| Some(&[r][..])
1100| | );
1101| 1| }
1102| |
1103| | #[test]
1104| 1| fn entry_parse_invalid_shape_for_each_format() {
1105| | // Each non-array variant returns InvalidShape when components_num
1106| | // doesn't match the format constraints (covers lines 195-197,
1107| | // 212-214, 226-228, 234-236, 241-243, 256-258, 263-265).
1108| | use crate::error::EntryError;
1109| |
1110| | // Note: for U16/U32 with count=1 and short data, the single-component
1111| | // branch goes through try_from_bytes and yields Truncated, not
1112| | // InvalidShape. To hit the InvalidShape arm in the many_m_n path
1113| | // (lines 195-197 and 212-214) we pass count=2 with too few bytes for
1114| | // 2 components but enough so the slice itself isn't empty.
1115| 1| let cases: &[(DataFormat, &[u8], u32)] = &[
1116| 1| (DataFormat::U16, &[0u8, 0], 2),
1117| 1| (DataFormat::U32, &[0u8; 4], 2),
1118| 1| (DataFormat::I8, &[0u8, 0], 2),
1119| 1| (DataFormat::I16, &[0u8, 0], 2),
1120| 1| (DataFormat::I32, &[0u8; 4], 2),
1121| 1| (DataFormat::F32, &[0u8; 4], 2),
1122| 1| (DataFormat::F64, &[0u8; 8], 2),
1123| 1| ];
1124| 7| for (fmt, data, count) in cases {
^1
1125| 7| let entry = EntryData {
1126| 7| tag: 0,
1127| 7| endian: Endianness::Little,
1128| 7| data,
1129| 7| data_format: *fmt,
1130| 7| components_num: *count,
1131| 7| };
1132| 7| let err = EntryValue::parse(&entry, &None).unwrap_err();
1133| 7| assert!(
1134| 7| matches!(err, EntryError::InvalidShape { .. }),
^0
1135| | "{fmt:?} should yield InvalidShape, got {err:?}"
1136| | );
1137| | }
1138| 1| }
1139| |
1140| | #[test]
1141| 1| fn entry_parse_variant_default_for_each_format() {
1142| | // Drive variant_default for every DataFormat variant by passing
1143| | // components_num=0 with non-empty data (covers lines 149-151 and
1144| | // the matching arms in variant_default at 273-288).
1145| | type Check = fn(&EntryValue) -> bool;
1146| 1| let formats: &[(DataFormat, Check)] = &[
1147| 1| (DataFormat::U8, |v| matches!(v, EntryValue::U8(0))),
1148| | (
1149| | DataFormat::Text,
1150| 1| |v| matches!(v, EntryValue::Text(s) if s.is_empty()),
1151| | ),
1152| 1| (DataFormat::U16, |v| matches!(v, EntryValue::U16(0))),
1153| 1| (DataFormat::U32, |v| matches!(v, EntryValue::U32(0))),
1154| | (
1155| | DataFormat::URational,
1156| 1| |v| matches!(v, EntryValue::URational(r) if r.numerator() == 0 && r.denominator() == 0),
1157| | ),
1158| 1| (DataFormat::I8, |v| matches!(v, EntryValue::I8(0))),
1159| | (
1160| | DataFormat::Undefined,
1161| 1| |v| matches!(v, EntryValue::Undefined(d) if d.is_empty()),
1162| | ),
1163| 1| (DataFormat::I16, |v| matches!(v, EntryValue::I16(0))),
1164| 1| (DataFormat::I32, |v| matches!(v, EntryValue::I32(0))),
1165| | (
1166| | DataFormat::IRational,
1167| 1| |v| matches!(v, EntryValue::IRational(r) if r.numerator() == 0 && r.denominator() == 0),
1168| | ),
1169| | (
1170| | DataFormat::F32,
1171| 1| |v| matches!(v, EntryValue::F32(x) if *x == 0.0),
1172| | ),
1173| | (
1174| | DataFormat::F64,
1175| 1| |v| matches!(v, EntryValue::F64(x) if *x == 0.0),
1176| | ),
1177| | ];
1178| 12| for (fmt, check) in formats {
^1
1179| 12| let entry = EntryData {
1180| 12| tag: 0,
1181| 12| endian: Endianness::Little,
1182| 12| data: &[0u8],
1183| 12| data_format: *fmt,
1184| 12| components_num: 0,
1185| 12| };
1186| 12| let v = EntryValue::parse(&entry, &None).unwrap();
1187| 12| assert!(check(&v), "variant_default for {fmt:?} returned {v:?}");
1188| | }
1189| 1| }
1190| |
1191| | #[test]
1192| 1| fn entry_urational_truncated_data_errors() {
1193| | // URational format needs 8 bytes per component; passing 1 byte with
1194| | // components_num=1 should error (drives the rational decode path
1195| | // through to an error result — covers parts of try_as_rationals).
1196| 1| let entry = EntryData {
1197| 1| tag: 0,
1198| 1| endian: Endianness::Little,
1199| 1| data: &[0u8; 1],
1200| 1| data_format: DataFormat::URational,
1201| 1| components_num: 1,
1202| 1| };
1203| 1| let res = EntryValue::parse(&entry, &None);
1204| 1| assert!(res.is_err(), "URational with truncated data should error");
1205| 1| }
1206| |
1207| | #[test]
1208| 1| fn entry_value_accessor_none_arms() {
1209| | // Cover the `_ => None` arms in the various as_* accessors.
1210| 1| let v = EntryValue::U16(5);
1211| 1| assert!(v.as_str().is_none());
1212| 1| assert!(v.as_datetime().is_none());
1213| 1| assert!(v.as_u8().is_none());
1214| 1| }
1215| |
1216| | #[test]
1217| 1| fn entry_value_display_for_each_variant() {
1218| | // Drive Display::fmt for every variant (covers lines 631-672 and
1219| | // the helper fmt_array_to_string / rationals_to_string).
1220| 1| assert_eq!(format!("{}", EntryValue::Text("hi".into())), "hi");
1221| 1| assert_eq!(
1222| 1| format!("{}", EntryValue::URational(URational::new(1, 2))),
1223| | "1/2 (0.5000)"
1224| | );
1225| 1| assert_eq!(
1226| 1| format!("{}", EntryValue::IRational(IRational::new(-1, 2))),
1227| | "-1/2 (-0.5000)"
1228| | );
1229| 1| assert_eq!(format!("{}", EntryValue::U8(8)), "8");
1230| 1| assert_eq!(format!("{}", EntryValue::U16(16)), "16");
1231| 1| assert_eq!(format!("{}", EntryValue::U32(32)), "32");
1232| 1| assert_eq!(format!("{}", EntryValue::U64(64)), "64");
1233| 1| assert_eq!(format!("{}", EntryValue::I8(-8)), "-8");
1234| 1| assert_eq!(format!("{}", EntryValue::I16(-16)), "-16");
1235| 1| assert_eq!(format!("{}", EntryValue::I32(-32)), "-32");
1236| 1| assert_eq!(format!("{}", EntryValue::I64(-64)), "-64");
1237| 1| assert_eq!(format!("{}", EntryValue::F32(1.5)), "1.5");
1238| 1| assert_eq!(format!("{}", EntryValue::F64(2.5)), "2.5");
1239| |
1240| | // DateTime / NaiveDateTime
1241| 1| let ndt =
1242| 1| NaiveDateTime::parse_from_str("2024-01-02 03:04:05", "%Y-%m-%d %H:%M:%S").unwrap();
1243| 1| let dt = ndt
1244| 1| .and_local_timezone(FixedOffset::east_opt(0).unwrap())
1245| 1| .unwrap();
1246| 1| assert!(format!("{}", EntryValue::DateTime(dt)).contains("2024-01-02"));
1247| 1| assert_eq!(
1248| 1| format!("{}", EntryValue::NaiveDateTime(ndt)),
1249| | "2024-01-02 03:04:05"
1250| | );
1251| |
1252| | // fmt_undefined: printable ASCII → quoted, non-printable → 0xhex.
1253| 1| assert_eq!(
1254| 1| format!("{}", EntryValue::Undefined(b"0220".to_vec())),
1255| | "\"0220\""
1256| | );
1257| 1| assert_eq!(
1258| 1| format!("{}", EntryValue::Undefined(vec![0x01, 0x02, 0x03])),
1259| | "0x010203"
1260| | );
1261| 1| assert_eq!(format!("{}", EntryValue::Undefined(vec![])), "0x");
1262| |
1263| | // Arrays
1264| 1| let s = format!(
1265| | "{}",
1266| 1| EntryValue::URationalArray(vec![URational::new(1, 2), URational::new(3, 4)])
1267| | );
1268| 1| assert!(s.starts_with("URationalArray["));
1269| 1| let s = format!(
1270| | "{}",
1271| 1| EntryValue::IRationalArray(vec![IRational::new(-1, 2)])
1272| | );
1273| 1| assert!(s.starts_with("IRationalArray["));
1274| 1| let s = format!("{}", EntryValue::U8Array(vec![1, 2, 3]));
1275| 1| assert!(s.starts_with("U8Array"));
1276| 1| let s = format!("{}", EntryValue::U16Array(vec![1, 2]));
1277| 1| assert!(s.starts_with("U16Array"));
1278| 1| let s = format!("{}", EntryValue::U32Array(vec![1, 2]));
1279| 1| assert!(s.starts_with("U32Array"));
1280| 1| }
1281| |
1282| | #[test]
1283| 1| fn entry_value_from_impls() {
1284| | // Cover all From<numeric> / From<String> / From<&str> /
1285| | // From<DateTime<FixedOffset>> impls (lines 730-799).
1286| 1| assert_eq!(EntryValue::from(1u8), EntryValue::U8(1));
1287| 1| assert_eq!(EntryValue::from(1u16), EntryValue::U16(1));
1288| 1| assert_eq!(EntryValue::from(1u32), EntryValue::U32(1));
1289| 1| assert_eq!(EntryValue::from(1u64), EntryValue::U64(1));
1290| 1| assert_eq!(EntryValue::from(-1i8), EntryValue::I8(-1));
1291| 1| assert_eq!(EntryValue::from(-1i16), EntryValue::I16(-1));
1292| 1| assert_eq!(EntryValue::from(-1i32), EntryValue::I32(-1));
1293| 1| assert_eq!(EntryValue::from(-1i64), EntryValue::I64(-1));
1294| 1| assert_eq!(EntryValue::from(1.5f32), EntryValue::F32(1.5));
1295| 1| assert_eq!(EntryValue::from(1.5f64), EntryValue::F64(1.5));
1296| 1| assert_eq!(
1297| 1| EntryValue::from(String::from("abc")),
1298| 1| EntryValue::Text("abc".into())
1299| | );
1300| 1| assert_eq!(EntryValue::from("abc"), EntryValue::Text("abc".into()));
1301| |
1302| 1| let ndt =
1303| 1| NaiveDateTime::parse_from_str("2024-01-02 03:04:05", "%Y-%m-%d %H:%M:%S").unwrap();
1304| 1| let dt = ndt
1305| 1| .and_local_timezone(FixedOffset::east_opt(0).unwrap())
1306| 1| .unwrap();
1307| 1| assert_eq!(EntryValue::from(dt), EntryValue::DateTime(dt));
1308| 1| }
1309| |
1310| | #[test]
1311| 1| fn data_format_component_size_and_try_from() {
1312| | // Cover DataFormat::component_size (lines 542-549) and the
1313| | // TryFrom<u16> impl (lines 552-562) including the error arm.
1314| 1| assert_eq!(DataFormat::U8.component_size(), 1);
1315| 1| assert_eq!(DataFormat::I8.component_size(), 1);
1316| 1| assert_eq!(DataFormat::Text.component_size(), 1);
1317| 1| assert_eq!(DataFormat::Undefined.component_size(), 1);
1318| 1| assert_eq!(DataFormat::U16.component_size(), 2);
1319| 1| assert_eq!(DataFormat::I16.component_size(), 2);
1320| 1| assert_eq!(DataFormat::U32.component_size(), 4);
1321| 1| assert_eq!(DataFormat::I32.component_size(), 4);
1322| 1| assert_eq!(DataFormat::F32.component_size(), 4);
1323| 1| assert_eq!(DataFormat::URational.component_size(), 8);
1324| 1| assert_eq!(DataFormat::IRational.component_size(), 8);
1325| 1| assert_eq!(DataFormat::F64.component_size(), 8);
1326| |
1327| 12| for code in 1u16..=12 {
^1
1328| 12| assert!(DataFormat::try_from(code).is_ok(), "code {code} should map");
1329| | }
1330| 1| assert_eq!(DataFormat::try_from(0), Err(0));
1331| 1| assert_eq!(DataFormat::try_from(13), Err(13));
1332| 1| assert_eq!(DataFormat::try_from(0xFFFF), Err(0xFFFF));
1333| 1| }
1334| |
1335| | #[test]
1336| 1| fn entry_parse_single_component_success_paths() {
1337| | // Cover the single-component success arms of parse() for the
1338| | // numeric formats (lines 178, 186, 203, 227, 235, 242, 257, 264)
1339| | // and Undefined (line 233).
1340| | type Check = fn(&EntryValue) -> bool;
1341| 1| let cases: &[(DataFormat, &[u8], Check)] = &[
1342| 1| (DataFormat::U8, &[42], |v| matches!(v, EntryValue::U8(42))),
1343| 1| (DataFormat::U16, &[1, 0], |v| {
1344| 1| matches!(v, EntryValue::U16(1))
^0
1345| 1| }),
1346| 1| (DataFormat::U32, &[1, 0, 0, 0], |v| {
1347| 1| matches!(v, EntryValue::U32(1))
^0
1348| 1| }),
1349| 1| (DataFormat::I8, &[0xFF], |v| matches!(v, EntryValue::I8(-1))),
1350| 1| (DataFormat::I16, &[0xFF, 0xFF], |v| {
1351| 1| matches!(v, EntryValue::I16(-1))
^0
1352| 1| }),
1353| 1| (DataFormat::I32, &[0xFF, 0xFF, 0xFF, 0xFF], |v| {
1354| 1| matches!(v, EntryValue::I32(-1))
^0
1355| 1| }),
1356| | (
1357| | DataFormat::F32,
1358| | &[0, 0, 0x80, 0x3F],
1359| 1| |v| matches!(v, EntryValue::F32(x) if (*x - 1.0).abs() < 1e-6),
1360| | ),
1361| | (
1362| | DataFormat::F64,
1363| | &[0, 0, 0, 0, 0, 0, 0xF0, 0x3F],
1364| 1| |v| matches!(v, EntryValue::F64(x) if (*x - 1.0).abs() < 1e-9),
1365| | ),
1366| | (
1367| | DataFormat::Undefined,
1368| | &[0xAA, 0xBB],
1369| 1| |v| matches!(v, EntryValue::Undefined(d) if d == &[0xAA, 0xBB]),
1370| | ),
1371| | ];
1372| 9| for (fmt, data, check) in cases {
^1
1373| 9| let entry = EntryData {
1374| 9| tag: 0,
1375| 9| endian: Endianness::Little,
1376| 9| data,
1377| 9| data_format: *fmt,
1378| 9| components_num: 1,
1379| 9| };
1380| 9| let v = EntryValue::parse(&entry, &None).unwrap();
1381| 9| assert!(check(&v), "{fmt:?} single-component returned {v:?}");
1382| | }
1383| |
1384| | // Multi-component success paths for U16/U32/U8 arrays + rationals.
1385| 1| let entry = EntryData {
1386| 1| tag: 0,
1387| 1| endian: Endianness::Little,
1388| 1| data: &[1, 0, 2, 0],
1389| 1| data_format: DataFormat::U16,
1390| 1| components_num: 2,
1391| 1| };
1392| 1| let v = EntryValue::parse(&entry, &None).unwrap();
1393| 1| assert!(matches!(v, EntryValue::U16Array(ref a) if a == &[1u16, 2]));
1394| |
1395| 1| let entry = EntryData {
1396| 1| tag: 0,
1397| 1| endian: Endianness::Little,
1398| 1| data: &[1, 0, 0, 0, 2, 0, 0, 0],
1399| 1| data_format: DataFormat::U32,
1400| 1| components_num: 2,
1401| 1| };
1402| 1| let v = EntryValue::parse(&entry, &None).unwrap();
1403| 1| assert!(matches!(v, EntryValue::U32Array(ref a) if a == &[1u32, 2]));
1404| |
1405| 1| let entry = EntryData {
1406| 1| tag: 0,
1407| 1| endian: Endianness::Little,
1408| 1| data: &[1, 2, 3],
1409| 1| data_format: DataFormat::U8,
1410| 1| components_num: 3,
1411| 1| };
1412| 1| let v = EntryValue::parse(&entry, &None).unwrap();
1413| 1| assert!(matches!(v, EntryValue::U8Array(ref a) if a == &[1u8, 2, 3]));
1414| |
1415| | // URational single + array.
1416| 1| let entry = EntryData {
1417| 1| tag: 0,
1418| 1| endian: Endianness::Little,
1419| 1| data: &[1, 0, 0, 0, 2, 0, 0, 0],
1420| 1| data_format: DataFormat::URational,
1421| 1| components_num: 1,
1422| 1| };
1423| 1| let v = EntryValue::parse(&entry, &None).unwrap();
1424| 1| assert!(
1425| 1| matches!(v, EntryValue::URational(r) if r.numerator() == 1 && r.denominator() == 2)
1426| | );
1427| |
1428| 1| let entry = EntryData {
1429| 1| tag: 0,
1430| 1| endian: Endianness::Little,
1431| 1| data: &[1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0],
1432| 1| data_format: DataFormat::URational,
1433| 1| components_num: 2,
1434| 1| };
1435| 1| let v = EntryValue::parse(&entry, &None).unwrap();
1436| 1| assert!(matches!(v, EntryValue::URationalArray(ref a) if a.len() == 2));
1437| |
1438| | // IRational single + array.
1439| 1| let entry = EntryData {
1440| 1| tag: 0,
1441| 1| endian: Endianness::Little,
1442| 1| data: &[0xFF, 0xFF, 0xFF, 0xFF, 2, 0, 0, 0],
1443| 1| data_format: DataFormat::IRational,
1444| 1| components_num: 1,
1445| 1| };
1446| 1| let v = EntryValue::parse(&entry, &None).unwrap();
1447| 1| assert!(
1448| 1| matches!(v, EntryValue::IRational(r) if r.numerator() == -1 && r.denominator() == 2)
1449| | );
1450| |
1451| 1| let entry = EntryData {
1452| 1| tag: 0,
1453| 1| endian: Endianness::Little,
1454| 1| data: &[0xFF, 0xFF, 0xFF, 0xFF, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0],
1455| 1| data_format: DataFormat::IRational,
1456| 1| components_num: 2,
1457| 1| };
1458| 1| let v = EntryValue::parse(&entry, &None).unwrap();
1459| 1| assert!(matches!(v, EntryValue::IRationalArray(ref a) if a.len() == 2));
1460| |
1461| | // Text path.
1462| 1| let entry = EntryData {
1463| 1| tag: 0,
1464| 1| endian: Endianness::Little,
1465| 1| data: b"hello\0",
1466| 1| data_format: DataFormat::Text,
1467| 1| components_num: 6,
1468| 1| };
1469| 1| let v = EntryValue::parse(&entry, &None).unwrap();
1470| 1| assert!(matches!(v, EntryValue::Text(ref s) if s == "hello"));
1471| 1| }
1472| |
1473| | #[test]
1474| 1| fn entry_parse_empty_data_errors() {
1475| | // Cover lines 136-141: data.is_empty() returns InvalidShape.
1476| | use crate::error::EntryError;
1477| 1| let entry = EntryData {
1478| 1| tag: 0,
1479| 1| endian: Endianness::Little,
1480| 1| data: &[],
1481| 1| data_format: DataFormat::U16,
1482| 1| components_num: 1,
1483| 1| };
1484| 1| let err = EntryValue::parse(&entry, &None).unwrap_err();
1485| 1| assert!(matches!(err, EntryError::InvalidShape { .. }));
^0
1486| 1| }
1487| |
1488| | #[test]
1489| 1| fn get_cstr_non_utf8_falls_back() {
1490| | // Hitting the fall-back branch in get_cstr (lines 858-861) by
1491| | // routing invalid UTF-8 through the Text variant of parse().
1492| 1| let entry = EntryData {
1493| 1| tag: 0,
1494| 1| endian: Endianness::Little,
1495| 1| // 0xFF is not valid UTF-8 alone.
1496| 1| data: &[0xFFu8, b'a', 0],
1497| 1| data_format: DataFormat::Text,
1498| 1| components_num: 3,
1499| 1| };
1500| 1| let v = EntryValue::parse(&entry, &None).unwrap();
1501| 1| assert!(matches!(v, EntryValue::Text(_)));
^0
1502| 1| }
1503| |
1504| | #[test]
1505| 1| fn try_from_bytes_big_endian_and_truncated() {
1506| | // Cover the big-endian arm (lines 891-898) and the Truncated error
1507| | // (lines 883-887, 893) of TryFromBytes.
1508| 1| let entry = EntryData {
1509| 1| tag: 0,
1510| 1| endian: Endianness::Big,
1511| 1| data: &[0, 1],
1512| 1| data_format: DataFormat::U16,
1513| 1| components_num: 1,
1514| 1| };
1515| 1| let v = EntryValue::parse(&entry, &None).unwrap();
1516| 1| assert!(matches!(v, EntryValue::U16(1)));
^0
1517| |
1518| | // Truncated.
1519| 1| let entry = EntryData {
1520| 1| tag: 0,
1521| 1| endian: Endianness::Big,
1522| 1| data: &[0],
1523| 1| data_format: DataFormat::U16,
1524| 1| components_num: 1,
1525| 1| };
1526| 1| let err = EntryValue::parse(&entry, &None).unwrap_err();
1527| 1| assert!(matches!(err, crate::error::EntryError::Truncated { .. }));
^0
1528| 1| }
1529| |}
/home/min/dev/nom-exif/src/video.rs:
1| |use std::collections::BTreeMap;
2| |
3| |use crate::{
4| | ebml::webm::parse_webm,
5| | error::ParsingError,
6| | file::MediaMimeTrack,
7| | mov::{extract_moov_body_from_buf, parse_isobmff},
8| | EntryValue, GPSInfo,
9| |};
10| |
11| |/// Try to keep the tag name consistent with [`crate::ExifTag`], and add some
12| |/// unique to video/audio, such as `DurationMs`.
13| |///
14| |/// Different variants of `TrackInfoTag` may have different value types, please
15| |/// refer to the documentation of each variant.
16| |#[derive(Debug, Clone, PartialEq, Eq, Copy, PartialOrd, Ord, Hash)]
17| |#[non_exhaustive]
18| |pub enum TrackInfoTag {
19| | /// Its value is an `EntryValue::Text`.
20| | Make,
21| |
22| | /// Its value is an `EntryValue::Text`.
23| | Model,
24| |
25| | /// Its value is an `EntryValue::Text`.
26| | Software,
27| |
28| | /// Its value is an [`EntryValue::DateTime`].
29| | CreateDate,
30| |
31| | /// Duration in millisecond, its value is an `EntryValue::U64`.
32| | DurationMs,
33| |
34| | /// Its value is an `EntryValue::U32`.
35| | Width,
36| |
37| | /// Its value is an `EntryValue::U32`.
38| | Height,
39| |
40| | /// Its value is an `EntryValue::Text`, location presented in ISO6709.
41| | ///
42| | /// If you need a parsed [`GPSInfo`] which provides more detailed GPS info,
43| | /// please use [`TrackInfo::gps_info`].
44| | GpsIso6709,
45| |
46| | /// Its value is an `EntryValue::Text`.
47| | Author,
48| |}
49| |
50| |/// Represents parsed track info.
51| |#[derive(Debug, Clone, Default)]
52| |pub struct TrackInfo {
53| | entries: BTreeMap<TrackInfoTag, EntryValue>,
54| | gps_info: Option<GPSInfo>,
55| |}
56| |
57| |impl TrackInfo {
58| | /// Get value for `tag`. Different variants of `TrackInfoTag` may have
59| | /// different value types, please refer to [`TrackInfoTag`].
60| 230| pub fn get(&self, tag: TrackInfoTag) -> Option<&EntryValue> {
61| 230| self.entries.get(&tag)
62| 230| }
63| |
64| | /// Parsed GPS info, if `GpsIso6709` was present in the source. Mirrors
65| | /// [`Exif::gps_info`](crate::Exif::gps_info).
66| 0| pub fn gps_info(&self) -> Option<&GPSInfo> {
67| 0| self.gps_info.as_ref()
68| 0| }
69| |
70| | /// Iterate over `(tag, value)` pairs. The tag is yielded by value
71| | /// because [`TrackInfoTag`] is `Copy`. The parsed `GPSInfo` is **not**
72| | /// included here — get it via [`TrackInfo::gps_info`].
73| 0| pub fn iter(&self) -> impl Iterator<Item = (TrackInfoTag, &EntryValue)> {
74| 0| self.entries.iter().map(|(k, v)| (*k, v))
75| 0| }
76| |
77| | /// Deprecated: 3.0.0 reserved this for "track source also embeds a
78| | /// secondary track" cases (e.g. `.mka` audio container that also
79| | /// carries video) but the detection was never wired up — the method
80| | /// always returned `false`. v3.1 drops the symmetric counterpart on
81| | /// the image side ([`Exif::has_embedded_track`](crate::Exif::has_embedded_track))
82| | /// to a content-detected flag, but the track-source variant stays
83| | /// without a real use case, so this remains a no-op for source
84| | /// compatibility only. May be re-introduced if a concrete use case
85| | /// emerges.
86| | #[deprecated(
87| | since = "3.1.0",
88| | note = "no concrete use case in v3.x; always returned false in 3.0.0. Kept as a no-op for source-compat; will be removed if no use case emerges by v4."
89| | )]
90| 1| pub fn has_embedded_media(&self) -> bool {
91| 1| false
92| 1| }
93| |
94| 700| pub(crate) fn put(&mut self, tag: TrackInfoTag, value: EntryValue) {
95| 700| self.entries.insert(tag, value);
96| 700| }
97| |}
98| |
99| |/// Parse video/audio info from `reader`. The file format will be detected
100| |/// automatically by parser, if the format is not supported, an `Err` will be
101| |/// returned.
102| |///
103| |/// Currently supported file formats are:
104| |///
105| |/// - ISO base media file format (ISOBMFF): *.mp4, *.mov, *.3gp, etc.
106| |/// - Matroska based file format: *.webm, *.mkv, *.mka, etc.
107| |///
108| |/// ## Explanation of the generic parameters of this function:
109| |///
110| |/// - In order to improve parsing efficiency, the parser will internally skip
111| |/// some useless bytes during parsing the byte stream, which is called
112| |/// `Skip` internally.
113| |///
114| |/// - In order to support both `Read` and `Read` + `Seek` types, the interface
115| |/// of input parameters is defined as `Read`.
116| |///
117| |/// - Since Rust does not support specialization, the parser cannot internally
118| |/// distinguish between `Read` and `Seek` and provide different `Skip`
119| |/// implementations for them.
120| |///
121| |/// Therefore, We chose to let the user specify how `Skip` works:
122| |///
123| |/// - `parse_track_info::<SkipSeek, _>(reader)` means the `reader` supports
124| |/// `Seek`, so `Skip` will use the `Seek` trait to implement efficient skip
125| |/// operations.
126| |///
127| |/// - `parse_track_info::<SkipRead, _>(reader)` means the `reader` dosn't
128| |/// support `Seek`, so `Skip` will fall back to using `Read` to implement the
129| |/// skip operations.
130| |///
131| |/// ## Performance impact
132| |///
133| |/// If your `reader` only supports `Read`, it may cause performance loss when
134| |/// processing certain large files. For example, *.mov files place metadata at
135| |/// the end of the file, therefore, when parsing such files, locating metadata
136| |/// will be slightly slower.
137| |///
138| |/// ## Examples
139| |///
140| |/// ```rust
141| |/// use nom_exif::*;
142| |/// use std::fs::File;
143| |/// use chrono::DateTime;
144| |///
145| |/// let ms = MediaSource::open("./testdata/meta.mov").unwrap();
146| |/// assert_eq!(ms.kind(), MediaKind::Track);
147| |/// let mut parser = MediaParser::new();
148| |/// let info: TrackInfo = parser.parse_track(ms).unwrap();
149| |///
150| |/// assert_eq!(info.get(TrackInfoTag::Make), Some(&"Apple".into()));
151| |/// assert_eq!(info.get(TrackInfoTag::Model), Some(&"iPhone X".into()));
152| |/// assert_eq!(info.get(TrackInfoTag::GpsIso6709), Some(&"+27.1281+100.2508+000.000/".into()));
153| |/// assert_eq!(info.gps_info().unwrap().latitude_ref, LatRef::North);
154| |/// assert_eq!(
155| |/// info.gps_info().unwrap().latitude,
156| |/// LatLng::new(URational::new(27, 1), URational::new(7, 1), URational::new(4116, 100)),
157| |/// );
158| |/// ```
159| |#[tracing::instrument(skip(input))]
160| 206|pub(crate) fn parse_track_info(
161| 206| input: &[u8],
162| 206| mime_video: MediaMimeTrack,
163| 206|) -> Result<TrackInfo, ParsingError> {
164| 206| let mut info: TrackInfo = match mime_video {
^127 ^127
165| | crate::file::MediaMimeTrack::QuickTime
166| | | crate::file::MediaMimeTrack::_3gpp
167| | | crate::file::MediaMimeTrack::Mp4 => {
168| 172| let range = extract_moov_body_from_buf(input)?;
^93 ^79
169| 93| let moov_body = &input[range];
170| 93| parse_isobmff(moov_body)?
^0
171| | }
172| | crate::file::MediaMimeTrack::Webm | crate::file::MediaMimeTrack::Matroska => {
173| 34| parse_webm(input)?.into()
^0
174| | }
175| | };
176| |
177| 127| if let Some(gps) = info.get(TrackInfoTag::GpsIso6709) {
^75
178| 75| info.gps_info = gps.as_str().and_then(|s| s.parse().ok());
179| 52| }
180| |
181| 127| Ok(info)
182| 206|}
183| |
184| |impl TrackInfoTag {
185| | /// Stable, programmatic name of this tag (matches the `Display` output).
186| 12| pub const fn name(self) -> &'static str {
187| 12| match self {
188| 2| TrackInfoTag::Make => "Make",
189| 1| TrackInfoTag::Model => "Model",
190| 1| TrackInfoTag::Software => "Software",
191| 1| TrackInfoTag::CreateDate => "CreateDate",
192| 2| TrackInfoTag::DurationMs => "DurationMs",
193| 1| TrackInfoTag::Width => "Width",
194| 1| TrackInfoTag::Height => "Height",
195| 2| TrackInfoTag::GpsIso6709 => "GpsIso6709",
196| 1| TrackInfoTag::Author => "Author",
197| | }
198| 12| }
199| |}
200| |
201| |impl std::fmt::Display for TrackInfoTag {
202| 0| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
203| 0| f.write_str(self.name())
204| 0| }
205| |}
206| |
207| |impl std::str::FromStr for TrackInfoTag {
208| | type Err = crate::ConvertError;
209| |
210| 12| fn from_str(s: &str) -> Result<Self, Self::Err> {
211| 12| Ok(match s {
212| 12| "Make" => TrackInfoTag::Make,
^1
213| 11| "Model" => TrackInfoTag::Model,
^1
214| 10| "Software" => TrackInfoTag::Software,
^1
215| 9| "CreateDate" => TrackInfoTag::CreateDate,
^1
216| 8| "DurationMs" => TrackInfoTag::DurationMs,
^1
217| 7| "Width" => TrackInfoTag::Width,
^1
218| 6| "Height" => TrackInfoTag::Height,
^1
219| 5| "GpsIso6709" => TrackInfoTag::GpsIso6709,
^1
220| 4| "Author" => TrackInfoTag::Author,
^1
221| 3| other => return Err(crate::ConvertError::UnknownTagName(other.to_owned())),
222| | })
223| 12| }
224| |}
225| |
226| |#[cfg(test)]
227| |mod p6_baseline {
228| | use crate::{MediaParser, MediaSource, TrackInfoTag};
229| |
230| | #[test]
231| 1| fn p6_baseline_meta_mov_dump_snapshot() {
232| | // Lock down the post-refactor invariant: parsing testdata/meta.mov
233| | // through the public API yields the same set of (tag, value) pairs
234| | // before and after every P6 task. Captured as a sorted formatted
235| | // string so the assertion is a single Vec compare.
236| 1| let mut parser = MediaParser::new();
237| 1| let ms = MediaSource::open("testdata/meta.mov").unwrap();
238| 1| let info = parser.parse_track(ms).unwrap();
239| |
240| | // Probe the well-known tags (Make/Model/GpsIso6709/DurationMs).
241| | // The rest is exercised indirectly by other tests.
242| 1| let mut entries: Vec<String> = [
243| 1| TrackInfoTag::Make,
244| 1| TrackInfoTag::Model,
245| 1| TrackInfoTag::GpsIso6709,
246| 1| TrackInfoTag::DurationMs,
247| 1| TrackInfoTag::Width,
248| 1| TrackInfoTag::Height,
249| 1| ]
250| 1| .into_iter()
251| 6| .filter_map(|t| info.get(t).map(|v| format!("{t:?}={v}")))
^1
252| 1| .collect();
253| 1| entries.sort();
254| 1| assert!(
255| 1| entries.len() >= 4,
256| | "expected >=4 well-known tags, got {entries:?}"
257| | );
258| 1| assert!(
259| 4| entries.iter().any(|s| s.starts_with("Make=")),
^1 ^1
260| | "expected Make tag in snapshot, got {entries:?}"
261| | );
262| 1| }
263| |
264| | #[test]
265| 1| fn track_info_tag_name_is_const_str() {
266| | const _: &str = TrackInfoTag::Make.name();
267| 1| assert_eq!(TrackInfoTag::Make.name(), "Make");
268| 1| assert_eq!(TrackInfoTag::GpsIso6709.name(), "GpsIso6709");
269| 1| assert_eq!(TrackInfoTag::DurationMs.name(), "DurationMs");
270| 1| }
271| |
272| | #[test]
273| 1| fn track_info_tag_from_str_round_trip() {
274| | use std::str::FromStr;
275| 9| for t in [
276| 1| TrackInfoTag::Make,
277| 1| TrackInfoTag::Model,
278| 1| TrackInfoTag::Software,
279| 1| TrackInfoTag::CreateDate,
280| 1| TrackInfoTag::DurationMs,
281| 1| TrackInfoTag::Width,
282| 1| TrackInfoTag::Height,
283| 1| TrackInfoTag::GpsIso6709,
284| 1| TrackInfoTag::Author,
285| 1| ] {
286| 9| assert_eq!(TrackInfoTag::from_str(t.name()).unwrap(), t);
287| | }
288| 1| }
289| |
290| | #[test]
291| 1| fn track_info_tag_from_str_unknown_returns_convert_error() {
292| | use crate::ConvertError;
293| | use std::str::FromStr;
294| 1| let err = TrackInfoTag::from_str("Bogus").unwrap_err();
295| 1| assert!(matches!(err, ConvertError::UnknownTagName(s) if s == "Bogus"));
296| 1| }
297| |
298| | #[test]
299| | #[allow(deprecated)]
300| 1| fn track_info_deprecated_has_embedded_media_returns_false() {
301| | // 3.0.0 reserved this method for a "track source carries another
302| | // embedded track" detection that never materialized. v3.1 leaves
303| | // it as a deprecated no-op until a real use case shows up.
304| 1| let mut parser = crate::MediaParser::new();
305| 1| let info = parser
306| 1| .parse_track(crate::MediaSource::open("testdata/meta.mov").unwrap())
307| 1| .unwrap();
308| 1| assert!(!info.has_embedded_media());
309| 1| }
310| |}