Skip to main content

nom_exif/exif/
exif_exif.rs

1use std::fmt::Debug;
2
3use nom::{
4    branch::alt, bytes::streaming::tag, combinator, number::Endianness, IResult, Needed, Parser,
5};
6
7use crate::{EntryValue, ExifEntry, ExifIter, ExifTag, GPSInfo, IfdIndex, TagOrCode};
8
9use super::ifd::ParsedImageFileDirectory;
10
11/// Represents parsed Exif information, can be converted from an [`ExifIter`]
12/// like this: `let exif: Exif = iter.into()`.
13#[derive(Clone, Debug, PartialEq)]
14pub struct Exif {
15    ifds: Vec<ParsedImageFileDirectory>,
16    gps_info: Option<GPSInfo>,
17    errors: Vec<(IfdIndex, TagOrCode, crate::EntryError)>,
18    has_embedded_track: bool,
19}
20
21impl Exif {
22    fn new(gps_info: Option<GPSInfo>, has_embedded_track: bool) -> Exif {
23        Exif {
24            ifds: Vec::new(),
25            gps_info,
26            errors: Vec::new(),
27            has_embedded_track,
28        }
29    }
30
31    /// Get entry value for the specified `tag` in ifd0 (the main image).
32    ///
33    /// *Note*:
34    ///
35    /// - The parsing error related to this tag won't be reported by this
36    ///   method. Either this entry is not parsed successfully, or the tag does
37    ///   not exist in the input data, this method will return None.
38    ///
39    /// - If you want to handle parsing error, please consider to use
40    ///   [`ExifIter`].
41    ///
42    /// - If you have any custom defined tag which does not exist in
43    ///   [`ExifTag`], you can always get the entry value by a raw tag code,
44    ///   see [`Self::get_by_code`].
45    ///
46    ///   ## Example
47    ///
48    ///   ```rust
49    ///   use nom_exif::*;
50    ///
51    ///   fn main() -> Result<()> {
52    ///       let mut parser = MediaParser::new();
53    ///       
54    ///       let ms = MediaSource::open("./testdata/exif.jpg")?;
55    ///       assert_eq!(ms.kind(), MediaKind::Image);
56    ///       let iter = parser.parse_exif(ms)?;
57    ///       let exif: Exif = iter.into();
58    ///
59    ///       assert_eq!(exif.get(ExifTag::Model).unwrap(), &"vivo X90 Pro+".into());
60    ///       Ok(())
61    ///   }
62    pub fn get(&self, tag: ExifTag) -> Option<&EntryValue> {
63        self.get_in(IfdIndex::MAIN, tag)
64    }
65
66    /// Get entry value for the specified `tag` in the specified `ifd`.
67    ///
68    /// *Note*:
69    ///
70    /// - The parsing error related to this tag won't be reported by this
71    ///   method. Either this entry is not parsed successfully, or the tag does
72    ///   not exist in the input data, this method will return None. Use
73    ///   [`Self::errors`] to inspect per-entry errors.
74    ///
75    /// - For raw tag codes (e.g. unrecognized tags), use [`Self::get_by_code`].
76    ///
77    ///   ## Example
78    ///
79    ///   ```rust
80    ///   use nom_exif::*;
81    ///
82    ///   fn main() -> Result<()> {
83    ///       let mut parser = MediaParser::new();
84    ///       let ms = MediaSource::open("./testdata/exif.jpg")?;
85    ///       let iter = parser.parse_exif(ms)?;
86    ///       let exif: Exif = iter.into();
87    ///
88    ///       assert_eq!(exif.get_in(IfdIndex::MAIN, ExifTag::Model).unwrap(),
89    ///                  &"vivo X90 Pro+".into());
90    ///       Ok(())
91    ///   }
92    ///   ```
93    pub fn get_in(&self, ifd: IfdIndex, tag: ExifTag) -> Option<&EntryValue> {
94        self.get_by_code(ifd, tag.code())
95    }
96
97    /// Get entry value for the specified raw `code` in the specified `ifd`.
98    /// Used for tags not in the recognized [`ExifTag`] enum.
99    pub fn get_by_code(&self, ifd: IfdIndex, code: u16) -> Option<&EntryValue> {
100        self.ifds.get(ifd.as_usize()).and_then(|d| d.get(code))
101    }
102
103    /// Iterate every parsed entry in every IFD.
104    ///
105    /// Order is: IFD0 entries first (in `HashMap` order — not stable), then
106    /// IFD1, etc. Filter by IFD with `.iter().filter(|e| e.ifd == IfdIndex::MAIN)`.
107    pub fn iter(&self) -> impl Iterator<Item = ExifEntry<'_>> {
108        self.ifds.iter().enumerate().flat_map(|(idx, dir)| {
109            let ifd = IfdIndex::new(idx);
110            dir.iter().map(move |(code, value)| ExifEntry {
111                ifd,
112                tag: TagOrCode::from(code),
113                value,
114            })
115        })
116    }
117
118    /// Get parsed GPS information.
119    ///
120    /// Returns `None` if the source had no `GPSInfo` IFD or if its parse
121    /// failed (failures land in [`Self::errors`]).
122    pub fn gps_info(&self) -> Option<&GPSInfo> {
123        self.gps_info.as_ref()
124    }
125
126    /// Per-entry errors collected during `From<ExifIter>` conversion. Each
127    /// tuple is `(ifd, tag, error)`. Empty slice if the parse was clean.
128    pub fn errors(&self) -> &[(IfdIndex, TagOrCode, crate::EntryError)] {
129        &self.errors
130    }
131
132    /// Whether the source file is known to embed a paired media track
133    /// that this parse path did *not* surface — a Pixel/Google or Samsung
134    /// Galaxy Motion Photo (JPEG with `GCamera:MotionPhoto` XMP and an
135    /// MP4 trailer). Use [`crate::MediaParser::parse_track`] on the same
136    /// source to extract the embedded track.
137    ///
138    /// **Content-detected, not MIME-guessed**: returns `true` only when
139    /// `parse_exif` observed a concrete content signal
140    /// (`GCamera:MotionPhoto="1"` plus a `Container:Directory` /
141    /// `MotionPhotoOffset` / `MicroVideoOffset`). A plain JPEG or HEIC
142    /// without such signals returns `false`.
143    ///
144    /// **Coverage**: Pixel/Google Motion Photos and Samsung Galaxy
145    /// Motion Photos that use the Adobe XMP Container directory format
146    /// (JPEG variants).
147    pub fn has_embedded_track(&self) -> bool {
148        self.has_embedded_track
149    }
150
151    /// Deprecated alias for [`Self::has_embedded_track`].
152    #[deprecated(
153        since = "3.1.0",
154        note = "renamed to `has_embedded_track` to reflect the actual semantics (paired track hint, not arbitrary embedded media)"
155    )]
156    pub fn has_embedded_media(&self) -> bool {
157        self.has_embedded_track()
158    }
159
160    fn put_value(&mut self, ifd: usize, code: u16, v: EntryValue) {
161        while self.ifds.len() < ifd + 1 {
162            self.ifds.push(ParsedImageFileDirectory::new());
163        }
164        self.ifds[ifd].put(code, v);
165    }
166}
167
168impl From<ExifIter> for Exif {
169    fn from(iter: ExifIter) -> Self {
170        let gps_info = iter.parse_gps().ok().flatten();
171        let has_embedded_track = iter.has_embedded_track();
172        let mut exif = Exif::new(gps_info, has_embedded_track);
173
174        for entry in iter {
175            let ifd = entry.ifd();
176            let tag = entry.tag();
177            let code = tag.code();
178            match entry.into_result() {
179                Ok(v) => exif.put_value(ifd.as_usize(), code, v),
180                Err(e) => exif.errors.push((ifd, tag, e)),
181            }
182        }
183
184        exif
185    }
186}
187
188pub(crate) const TIFF_HEADER_LEN: usize = 8;
189
190/// TIFF Header
191#[derive(Clone, PartialEq, Eq)]
192pub(crate) struct TiffHeader {
193    pub endian: Endianness,
194    pub ifd0_offset: u32,
195}
196
197impl Default for TiffHeader {
198    fn default() -> Self {
199        Self {
200            endian: Endianness::Big,
201            ifd0_offset: 0,
202        }
203    }
204}
205
206impl Debug for TiffHeader {
207    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
208        let endian_str = match self.endian {
209            Endianness::Big => "Big",
210            Endianness::Little => "Little",
211            Endianness::Native => "Native",
212        };
213        f.debug_struct("TiffHeader")
214            .field("endian", &endian_str)
215            .field("ifd0_offset", &format!("{:#x}", self.ifd0_offset))
216            .finish()
217    }
218}
219
220pub(crate) const IFD_ENTRY_SIZE: usize = 12;
221
222impl TiffHeader {
223    pub fn parse(input: &[u8]) -> IResult<&[u8], TiffHeader> {
224        use nom::number::streaming::{u16, u32};
225        let (remain, endian) = TiffHeader::parse_endian(input)?;
226        let (_, (_, offset)) = (
227            combinator::verify(u16(endian), |magic| *magic == 0x2a),
228            u32(endian),
229        )
230            .parse(remain)?;
231
232        let header = Self {
233            endian,
234            ifd0_offset: offset,
235        };
236
237        Ok((remain, header))
238    }
239
240    pub fn parse_ifd_entry_num(input: &[u8], endian: Endianness) -> IResult<&[u8], u16> {
241        let (remain, num) = nom::number::streaming::u16(endian)(input)?; // Safe-slice
242        if num == 0 {
243            return Ok((remain, 0));
244        }
245
246        // 12 bytes per entry
247        let size = (num as usize)
248            .checked_mul(IFD_ENTRY_SIZE)
249            .expect("should fit");
250
251        if size > remain.len() {
252            return Err(nom::Err::Incomplete(Needed::new(size - remain.len())));
253        }
254
255        Ok((remain, num))
256    }
257
258    // pub fn first_ifd<'a>(&self, input: &'a [u8], tag_ids: HashSet<u16>) -> IResult<&'a [u8], IFD> {
259    //     // ifd0_offset starts from the beginning of Header, so we should
260    //     // subtract the header size, which is 8
261    //     let offset = self.ifd0_offset - 8;
262
263    //     // skip to offset
264    //     let (_, remain) = take(offset)(input)?;
265
266    //     IFD::parse(remain, self.endian, tag_ids)
267    // }
268
269    fn parse_endian(input: &[u8]) -> IResult<&[u8], Endianness> {
270        combinator::map(alt((tag("MM"), tag("II"))), |endian_marker| {
271            if endian_marker == b"MM" {
272                Endianness::Big
273            } else {
274                Endianness::Little
275            }
276        })
277        .parse(input)
278    }
279}
280
281pub(crate) fn check_exif_header(data: &[u8]) -> Result<bool, nom::Err<nom::error::Error<&[u8]>>> {
282    tag::<_, _, nom::error::Error<_>>(EXIF_IDENT)(data).map(|_| true)
283}
284
285pub(crate) fn check_exif_header2(i: &[u8]) -> IResult<&[u8], ()> {
286    let (remain, _) = (
287        nom::number::complete::be_u32,
288        nom::bytes::complete::tag(EXIF_IDENT),
289    )
290        .parse(i)?;
291    Ok((remain, ()))
292}
293
294pub(crate) const EXIF_IDENT: &str = "Exif\0\0";
295
296#[cfg(test)]
297mod tests {
298    use std::io::Read;
299    use std::thread;
300
301    use test_case::test_case;
302
303    use crate::exif::input_into_iter;
304    use crate::jpeg::extract_exif_data;
305    use crate::slice::SubsliceRange;
306    use crate::testkit::{open_sample, read_sample};
307    use crate::ExifIterEntry;
308
309    use super::*;
310
311    #[test]
312    fn header() {
313        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
314
315        let buf = [0x4d, 0x4d, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x08, 0x00];
316
317        let (_, header) = TiffHeader::parse(&buf).unwrap();
318        assert_eq!(
319            header,
320            TiffHeader {
321                endian: Endianness::Big,
322                ifd0_offset: 8,
323            }
324        );
325    }
326
327    #[test_case("exif.jpg")]
328    fn exif_iter_gps(path: &str) {
329        let buf = read_sample(path).unwrap();
330        let (_, data) = extract_exif_data(&buf).unwrap();
331        let range = data.and_then(|x| buf.subslice_in_range(x)).unwrap();
332        let data = bytes::Bytes::from(buf).slice(range);
333        let iter = input_into_iter(data, None).unwrap();
334        let gps = iter.parse_gps().unwrap().unwrap();
335        assert_eq!(gps.to_iso6709(), "+22.53113+114.02148/");
336    }
337
338    #[test_case("exif.jpg")]
339    fn clone_exif_iter_to_thread(path: &str) {
340        let buf = read_sample(path).unwrap();
341        let (_, data) = extract_exif_data(&buf).unwrap();
342        let range = data.and_then(|x| buf.subslice_in_range(x)).unwrap();
343        let data = bytes::Bytes::from(buf).slice(range);
344        let iter = input_into_iter(data, None).unwrap();
345        let iter2 = iter.clone();
346
347        let mut expect = String::new();
348        open_sample(&format!("{path}.txt"))
349            .unwrap()
350            .read_to_string(&mut expect)
351            .unwrap();
352
353        let jh = thread::spawn(move || iter_to_str(iter2));
354
355        let result = iter_to_str(iter);
356
357        // open_sample_w(&format!("{path}.txt"))
358        //     .unwrap()
359        //     .write_all(result.as_bytes())
360        //     .unwrap();
361
362        assert_eq!(result.trim(), expect.trim());
363        assert_eq!(jh.join().unwrap().trim(), expect.trim());
364    }
365
366    fn iter_to_str(it: impl Iterator<Item = ExifIterEntry>) -> String {
367        let ss = it
368            .map(|x| {
369                format!(
370                    "{}.{:<32} » {}",
371                    x.ifd(),
372                    match x.tag() {
373                        crate::TagOrCode::Tag(t) => t.to_string(),
374                        crate::TagOrCode::Unknown(c) => format!("Unknown(0x{c:04x})"),
375                    },
376                    x.result()
377                        .map(|v| v.to_string())
378                        .map_err(|e| e.to_string())
379                        .unwrap_or_else(|s| s)
380                )
381            })
382            .collect::<Vec<String>>();
383        ss.join("\n")
384    }
385
386    #[test]
387    fn p5_baseline_exif_jpg_dump_snapshot() {
388        // Lock down the post-refactor invariant: parsing testdata/exif.jpg
389        // through the public API yields the same set of (ifd, tag, value)
390        // triples before and after every P5 task. Captured as a sorted
391        // formatted string so the assertion is a single Vec compare.
392        use crate::{MediaParser, MediaSource};
393        let mut parser = MediaParser::new();
394        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
395        let iter = parser.parse_exif(ms).unwrap();
396
397        let mut entries: Vec<String> = iter
398            .map(|e| {
399                let val = match e.result() {
400                    Ok(v) => format!("{v}"),
401                    Err(err) => format!("<err:{err}>"),
402                };
403                format!("{}.0x{:04x}={val}", e.ifd(), e.tag().code())
404            })
405            .collect();
406        entries.sort();
407        assert!(
408            entries.len() > 5,
409            "expected >5 entries, got {}",
410            entries.len()
411        );
412        assert!(
413            entries.iter().any(|s| s.contains("0x010f")),
414            "expected Make tag (0x010f) in snapshot, got {entries:?}"
415        );
416    }
417
418    #[test]
419    fn exif_get_in_main_routes_via_ifd_index() {
420        use crate::{ExifTag, IfdIndex, MediaParser, MediaSource};
421        let mut parser = MediaParser::new();
422        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
423        let iter = parser.parse_exif(ms).unwrap();
424        let exif: Exif = iter.into();
425
426        // Main image: same as exif.get(...)
427        let v_via_get = exif.get(ExifTag::Model);
428        let v_via_get_in = exif.get_in(IfdIndex::MAIN, ExifTag::Model);
429        assert_eq!(v_via_get, v_via_get_in);
430        assert!(
431            v_via_get.is_some(),
432            "Model tag expected in testdata/exif.jpg"
433        );
434    }
435
436    #[test]
437    fn exif_get_by_code_finds_unrecognized_or_recognized_tag() {
438        use crate::{ExifTag, IfdIndex, MediaParser, MediaSource};
439        let mut parser = MediaParser::new();
440        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
441        let iter = parser.parse_exif(ms).unwrap();
442        let exif: Exif = iter.into();
443        // Make = 0x010f
444        let v = exif.get_by_code(IfdIndex::MAIN, ExifTag::Make.code());
445        assert!(v.is_some());
446    }
447
448    #[test]
449    fn exif_gps_info_returns_borrow_no_result_wrap() {
450        use crate::{MediaParser, MediaSource};
451        let mut parser = MediaParser::new();
452        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
453        let iter = parser.parse_exif(ms).unwrap();
454        let exif: Exif = iter.into();
455        // gps_info returns Option<&GPSInfo> directly (no Result wrap).
456        let g: Option<&crate::GPSInfo> = exif.gps_info();
457        assert!(g.is_some(), "testdata/exif.jpg has GPS info");
458        assert_eq!(g.unwrap().to_iso6709(), "+22.53113+114.02148/");
459    }
460
461    #[test]
462    fn exif_iter_yields_main_ifd_entries() {
463        use crate::{ExifTag, IfdIndex, MediaParser, MediaSource};
464        let mut parser = MediaParser::new();
465        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
466        let iter = parser.parse_exif(ms).unwrap();
467        let exif: Exif = iter.into();
468
469        let main_count = exif.iter().filter(|e| e.ifd == IfdIndex::MAIN).count();
470        assert!(
471            main_count > 1,
472            "expected >1 entries in main IFD, got {main_count}"
473        );
474
475        // Ensure each entry is well-formed.
476        for entry in exif.iter() {
477            // value is a real reference to an EntryValue
478            let _: &crate::EntryValue = entry.value;
479            // Tag round-trips
480            let code = entry.tag.code();
481            assert_eq!(
482                exif.get_by_code(entry.ifd, code).unwrap(),
483                entry.value,
484                "iter entry value should match get_by_code lookup"
485            );
486        }
487
488        // Specifically: Model entry is present and matches get().
489        let model_via_iter = exif
490            .iter()
491            .find(|e| e.tag.tag() == Some(ExifTag::Model))
492            .map(|e| e.value);
493        assert_eq!(model_via_iter, exif.get(ExifTag::Model));
494    }
495
496    #[test]
497    fn exif_errors_is_empty_for_clean_fixture() {
498        use crate::{MediaParser, MediaSource};
499        let mut parser = MediaParser::new();
500        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
501        let iter = parser.parse_exif(ms).unwrap();
502        let exif: Exif = iter.into();
503        // Clean fixture: errors() returns empty slice but the method exists
504        // and the type matches the spec.
505        let errs: &[(crate::IfdIndex, crate::TagOrCode, crate::EntryError)] = exif.errors();
506        assert!(
507            errs.is_empty(),
508            "exif.jpg has no per-entry errors, got {errs:?}"
509        );
510    }
511
512    #[test]
513    fn exif_errors_captures_per_entry_errors_for_broken_fixture() {
514        use crate::{MediaParser, MediaSource};
515        let mut parser = MediaParser::new();
516        let ms = MediaSource::open("testdata/broken.jpg").unwrap();
517        let iter = parser.parse_exif(ms).unwrap();
518        let exif: Exif = iter.into();
519        // broken.jpg has malformed IFD entries — at least one should land in errors().
520        // (Note: if broken.jpg's particular breakage doesn't surface as a per-entry
521        // error, this assertion may be `>= 0`. Adjust as needed.)
522        let _ = exif.errors();
523    }
524
525    #[test]
526    fn has_embedded_track_true_for_pixel_motion_photo() {
527        use crate::{MediaParser, MediaSource};
528        let mut parser = MediaParser::new();
529        let ms = MediaSource::open("testdata/motion_photo_pixel_synth.jpg").unwrap();
530        let iter = parser.parse_exif(ms).unwrap();
531        assert!(
532            iter.has_embedded_track(),
533            "Pixel-style Motion Photo carries an embedded MP4 track"
534        );
535        let exif: Exif = iter.into();
536        assert!(exif.has_embedded_track(), "flag survives From<ExifIter>");
537    }
538
539    #[test]
540    fn has_embedded_track_false_for_plain_jpeg_and_heic() {
541        use crate::{MediaParser, MediaSource};
542        for path in ["testdata/exif.jpg", "testdata/exif.heic"] {
543            let mut parser = MediaParser::new();
544            let iter = parser.parse_exif(MediaSource::open(path).unwrap()).unwrap();
545            assert!(
546                !iter.has_embedded_track(),
547                "{path} has no Motion Photo / paired track signal"
548            );
549            let exif: Exif = iter.into();
550            assert!(!exif.has_embedded_track());
551        }
552    }
553
554    #[test]
555    #[allow(deprecated)]
556    fn deprecated_has_embedded_media_still_works() {
557        use crate::{MediaParser, MediaSource};
558        let mut parser = MediaParser::new();
559        let ms = MediaSource::open("testdata/motion_photo_pixel_synth.jpg").unwrap();
560        let iter = parser.parse_exif(ms).unwrap();
561        // Deprecated alias must still forward to the new method.
562        assert_eq!(iter.has_embedded_media(), iter.has_embedded_track());
563        let exif: Exif = iter.into();
564        assert_eq!(exif.has_embedded_media(), exif.has_embedded_track());
565    }
566
567    /// End-to-end: `has_embedded_track == true` ⇒ `parse_track` extracts a
568    /// real `TrackInfo` from the same source. This locks the v3.1 contract
569    /// for Pixel/Google Motion Photo JPEGs.
570    #[test]
571    fn parse_track_extracts_motion_photo_trailer() {
572        use crate::{MediaParser, MediaSource, TrackInfoTag};
573        let path = "testdata/motion_photo_pixel_synth.jpg";
574
575        let mut p1 = MediaParser::new();
576        let iter = p1.parse_exif(MediaSource::open(path).unwrap()).unwrap();
577        assert!(iter.has_embedded_track());
578
579        let mut p2 = MediaParser::new();
580        let track = p2
581            .parse_track(MediaSource::open(path).unwrap())
582            .expect("parse_track must extract the trailer MP4");
583        assert!(
584            track.get(TrackInfoTag::Width).is_some() || track.get(TrackInfoTag::Height).is_some(),
585            "trailer should yield at least one geometry tag"
586        );
587    }
588
589    /// Plain JPEGs (no Motion Photo XMP) must keep returning TrackNotFound.
590    #[test]
591    fn parse_track_on_plain_jpeg_returns_track_not_found() {
592        use crate::{Error, MediaParser, MediaSource};
593        let mut parser = MediaParser::new();
594        let err = parser
595            .parse_track(MediaSource::open("testdata/exif.jpg").unwrap())
596            .unwrap_err();
597        assert!(
598            matches!(err, Error::TrackNotFound),
599            "expected TrackNotFound, got {err:?}"
600        );
601    }
602}