Skip to main content

nom_exif/exif/
exif_exif.rs

1use std::fmt::Debug;
2
3use nom::{
4    branch::alt, bytes::streaming::tag, combinator, number::Endianness, IResult, Needed, Parser,
5};
6
7use crate::{EntryValue, ExifEntry, ExifIter, ExifTag, GPSInfo, IfdIndex, TagOrCode};
8
9use super::ifd::ParsedImageFileDirectory;
10
11/// Represents parsed Exif information, can be converted from an [`ExifIter`]
12/// like this: `let exif: Exif = iter.into()`.
13#[derive(Clone, Debug, PartialEq)]
14pub struct Exif {
15    ifds: Vec<ParsedImageFileDirectory>,
16    gps_info: Option<GPSInfo>,
17    errors: Vec<(IfdIndex, TagOrCode, crate::EntryError)>,
18    has_embedded_media: bool,
19}
20
21impl Exif {
22    fn new(gps_info: Option<GPSInfo>, has_embedded_media: bool) -> Exif {
23        Exif {
24            ifds: Vec::new(),
25            gps_info,
26            errors: Vec::new(),
27            has_embedded_media,
28        }
29    }
30
31    /// Get entry value for the specified `tag` in ifd0 (the main image).
32    ///
33    /// *Note*:
34    ///
35    /// - The parsing error related to this tag won't be reported by this
36    ///   method. Either this entry is not parsed successfully, or the tag does
37    ///   not exist in the input data, this method will return None.
38    ///
39    /// - If you want to handle parsing error, please consider to use
40    ///   [`ExifIter`].
41    ///
42    /// - If you have any custom defined tag which does not exist in
43    ///   [`ExifTag`], you can always get the entry value by a raw tag code,
44    ///   see [`Self::get_by_code`].
45    ///
46    ///   ## Example
47    ///
48    ///   ```rust
49    ///   use nom_exif::*;
50    ///
51    ///   fn main() -> Result<()> {
52    ///       let mut parser = MediaParser::new();
53    ///       
54    ///       let ms = MediaSource::open("./testdata/exif.jpg")?;
55    ///       assert_eq!(ms.kind(), MediaKind::Image);
56    ///       let iter = parser.parse_exif(ms)?;
57    ///       let exif: Exif = iter.into();
58    ///
59    ///       assert_eq!(exif.get(ExifTag::Model).unwrap(), &"vivo X90 Pro+".into());
60    ///       Ok(())
61    ///   }
62    pub fn get(&self, tag: ExifTag) -> Option<&EntryValue> {
63        self.get_in(IfdIndex::MAIN, tag)
64    }
65
66    /// Get entry value for the specified `tag` in the specified `ifd`.
67    ///
68    /// *Note*:
69    ///
70    /// - The parsing error related to this tag won't be reported by this
71    ///   method. Either this entry is not parsed successfully, or the tag does
72    ///   not exist in the input data, this method will return None. Use
73    ///   [`Self::errors`] to inspect per-entry errors.
74    ///
75    /// - For raw tag codes (e.g. unrecognized tags), use [`Self::get_by_code`].
76    ///
77    ///   ## Example
78    ///
79    ///   ```rust
80    ///   use nom_exif::*;
81    ///
82    ///   fn main() -> Result<()> {
83    ///       let mut parser = MediaParser::new();
84    ///       let ms = MediaSource::open("./testdata/exif.jpg")?;
85    ///       let iter = parser.parse_exif(ms)?;
86    ///       let exif: Exif = iter.into();
87    ///
88    ///       assert_eq!(exif.get_in(IfdIndex::MAIN, ExifTag::Model).unwrap(),
89    ///                  &"vivo X90 Pro+".into());
90    ///       Ok(())
91    ///   }
92    ///   ```
93    pub fn get_in(&self, ifd: IfdIndex, tag: ExifTag) -> Option<&EntryValue> {
94        self.get_by_code(ifd, tag.code())
95    }
96
97    /// Get entry value for the specified raw `code` in the specified `ifd`.
98    /// Used for tags not in the recognized [`ExifTag`] enum.
99    pub fn get_by_code(&self, ifd: IfdIndex, code: u16) -> Option<&EntryValue> {
100        self.ifds.get(ifd.as_usize()).and_then(|d| d.get(code))
101    }
102
103    /// Iterate every parsed entry in every IFD.
104    ///
105    /// Order is: IFD0 entries first (in `HashMap` order โ€” not stable), then
106    /// IFD1, etc. Filter by IFD with `.iter().filter(|e| e.ifd == IfdIndex::MAIN)`.
107    pub fn iter(&self) -> impl Iterator<Item = ExifEntry<'_>> {
108        self.ifds.iter().enumerate().flat_map(|(idx, dir)| {
109            let ifd = IfdIndex::new(idx);
110            dir.iter().map(move |(code, value)| ExifEntry {
111                ifd,
112                tag: TagOrCode::from(code),
113                value,
114            })
115        })
116    }
117
118    /// Get parsed GPS information.
119    ///
120    /// Returns `None` if the source had no `GPSInfo` IFD or if its parse
121    /// failed (failures land in [`Self::errors`]).
122    pub fn gps_info(&self) -> Option<&GPSInfo> {
123        self.gps_info.as_ref()
124    }
125
126    /// Per-entry errors collected during `From<ExifIter>` conversion. Each
127    /// tuple is `(ifd, tag, error)`. Empty slice if the parse was clean.
128    pub fn errors(&self) -> &[(IfdIndex, TagOrCode, crate::EntryError)] {
129        &self.errors
130    }
131
132    /// Whether the source file carries additional embedded media that this
133    /// parse path did *not* extract โ€” e.g. HEIC Live Photo MOV, RAF JPEG
134    /// preview.
135    pub fn has_embedded_media(&self) -> bool {
136        self.has_embedded_media
137    }
138
139    fn put_value(&mut self, ifd: usize, code: u16, v: EntryValue) {
140        while self.ifds.len() < ifd + 1 {
141            self.ifds.push(ParsedImageFileDirectory::new());
142        }
143        self.ifds[ifd].put(code, v);
144    }
145}
146
147impl From<ExifIter> for Exif {
148    fn from(iter: ExifIter) -> Self {
149        let gps_info = iter.parse_gps().ok().flatten();
150        let has_embedded_media = iter.has_embedded_media();
151        let mut exif = Exif::new(gps_info, has_embedded_media);
152
153        for entry in iter {
154            let ifd = entry.ifd();
155            let tag = entry.tag();
156            let code = tag.code();
157            match entry.into_result() {
158                Ok(v) => exif.put_value(ifd.as_usize(), code, v),
159                Err(e) => exif.errors.push((ifd, tag, e)),
160            }
161        }
162
163        exif
164    }
165}
166
167pub(crate) const TIFF_HEADER_LEN: usize = 8;
168
169/// TIFF Header
170#[derive(Clone, PartialEq, Eq)]
171pub(crate) struct TiffHeader {
172    pub endian: Endianness,
173    pub ifd0_offset: u32,
174}
175
176impl Default for TiffHeader {
177    fn default() -> Self {
178        Self {
179            endian: Endianness::Big,
180            ifd0_offset: 0,
181        }
182    }
183}
184
185impl Debug for TiffHeader {
186    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
187        let endian_str = match self.endian {
188            Endianness::Big => "Big",
189            Endianness::Little => "Little",
190            Endianness::Native => "Native",
191        };
192        f.debug_struct("TiffHeader")
193            .field("endian", &endian_str)
194            .field("ifd0_offset", &format!("{:#x}", self.ifd0_offset))
195            .finish()
196    }
197}
198
199pub(crate) const IFD_ENTRY_SIZE: usize = 12;
200
201impl TiffHeader {
202    pub fn parse(input: &[u8]) -> IResult<&[u8], TiffHeader> {
203        use nom::number::streaming::{u16, u32};
204        let (remain, endian) = TiffHeader::parse_endian(input)?;
205        let (_, (_, offset)) = (
206            combinator::verify(u16(endian), |magic| *magic == 0x2a),
207            u32(endian),
208        )
209            .parse(remain)?;
210
211        let header = Self {
212            endian,
213            ifd0_offset: offset,
214        };
215
216        Ok((remain, header))
217    }
218
219    pub fn parse_ifd_entry_num(input: &[u8], endian: Endianness) -> IResult<&[u8], u16> {
220        let (remain, num) = nom::number::streaming::u16(endian)(input)?; // Safe-slice
221        if num == 0 {
222            return Ok((remain, 0));
223        }
224
225        // 12 bytes per entry
226        let size = (num as usize)
227            .checked_mul(IFD_ENTRY_SIZE)
228            .expect("should fit");
229
230        if size > remain.len() {
231            return Err(nom::Err::Incomplete(Needed::new(size - remain.len())));
232        }
233
234        Ok((remain, num))
235    }
236
237    // pub fn first_ifd<'a>(&self, input: &'a [u8], tag_ids: HashSet<u16>) -> IResult<&'a [u8], IFD> {
238    //     // ifd0_offset starts from the beginning of Header, so we should
239    //     // subtract the header size, which is 8
240    //     let offset = self.ifd0_offset - 8;
241
242    //     // skip to offset
243    //     let (_, remain) = take(offset)(input)?;
244
245    //     IFD::parse(remain, self.endian, tag_ids)
246    // }
247
248    fn parse_endian(input: &[u8]) -> IResult<&[u8], Endianness> {
249        combinator::map(alt((tag("MM"), tag("II"))), |endian_marker| {
250            if endian_marker == b"MM" {
251                Endianness::Big
252            } else {
253                Endianness::Little
254            }
255        })
256        .parse(input)
257    }
258}
259
260pub(crate) fn check_exif_header(data: &[u8]) -> Result<bool, nom::Err<nom::error::Error<&[u8]>>> {
261    tag::<_, _, nom::error::Error<_>>(EXIF_IDENT)(data).map(|_| true)
262}
263
264pub(crate) fn check_exif_header2(i: &[u8]) -> IResult<&[u8], ()> {
265    let (remain, _) = (
266        nom::number::complete::be_u32,
267        nom::bytes::complete::tag(EXIF_IDENT),
268    )
269        .parse(i)?;
270    Ok((remain, ()))
271}
272
273pub(crate) const EXIF_IDENT: &str = "Exif\0\0";
274
275#[cfg(test)]
276mod tests {
277    use std::io::Read;
278    use std::thread;
279
280    use test_case::test_case;
281
282    use crate::exif::input_into_iter;
283    use crate::jpeg::extract_exif_data;
284    use crate::slice::SubsliceRange;
285    use crate::testkit::{open_sample, read_sample};
286    use crate::ExifIterEntry;
287
288    use super::*;
289
290    #[test]
291    fn header() {
292        let _ = tracing_subscriber::fmt().with_test_writer().try_init();
293
294        let buf = [0x4d, 0x4d, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x08, 0x00];
295
296        let (_, header) = TiffHeader::parse(&buf).unwrap();
297        assert_eq!(
298            header,
299            TiffHeader {
300                endian: Endianness::Big,
301                ifd0_offset: 8,
302            }
303        );
304    }
305
306    #[test_case("exif.jpg")]
307    fn exif_iter_gps(path: &str) {
308        let buf = read_sample(path).unwrap();
309        let (_, data) = extract_exif_data(&buf).unwrap();
310        let range = data.and_then(|x| buf.subslice_in_range(x)).unwrap();
311        let data = bytes::Bytes::from(buf).slice(range);
312        let iter = input_into_iter(data, None).unwrap();
313        let gps = iter.parse_gps().unwrap().unwrap();
314        assert_eq!(gps.to_iso6709(), "+22.53113+114.02148/");
315    }
316
317    #[test_case("exif.jpg")]
318    fn clone_exif_iter_to_thread(path: &str) {
319        let buf = read_sample(path).unwrap();
320        let (_, data) = extract_exif_data(&buf).unwrap();
321        let range = data.and_then(|x| buf.subslice_in_range(x)).unwrap();
322        let data = bytes::Bytes::from(buf).slice(range);
323        let iter = input_into_iter(data, None).unwrap();
324        let iter2 = iter.clone();
325
326        let mut expect = String::new();
327        open_sample(&format!("{path}.txt"))
328            .unwrap()
329            .read_to_string(&mut expect)
330            .unwrap();
331
332        let jh = thread::spawn(move || iter_to_str(iter2));
333
334        let result = iter_to_str(iter);
335
336        // open_sample_w(&format!("{path}.txt"))
337        //     .unwrap()
338        //     .write_all(result.as_bytes())
339        //     .unwrap();
340
341        assert_eq!(result.trim(), expect.trim());
342        assert_eq!(jh.join().unwrap().trim(), expect.trim());
343    }
344
345    fn iter_to_str(it: impl Iterator<Item = ExifIterEntry>) -> String {
346        let ss = it
347            .map(|x| {
348                format!(
349                    "{}.{:<32} ยป {}",
350                    x.ifd(),
351                    match x.tag() {
352                        crate::TagOrCode::Tag(t) => t.to_string(),
353                        crate::TagOrCode::Unknown(c) => format!("Unknown(0x{c:04x})"),
354                    },
355                    x.result()
356                        .map(|v| v.to_string())
357                        .map_err(|e| e.to_string())
358                        .unwrap_or_else(|s| s)
359                )
360            })
361            .collect::<Vec<String>>();
362        ss.join("\n")
363    }
364
365    #[test]
366    fn p5_baseline_exif_jpg_dump_snapshot() {
367        // Lock down the post-refactor invariant: parsing testdata/exif.jpg
368        // through the public API yields the same set of (ifd, tag, value)
369        // triples before and after every P5 task. Captured as a sorted
370        // formatted string so the assertion is a single Vec compare.
371        use crate::{MediaParser, MediaSource};
372        let mut parser = MediaParser::new();
373        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
374        let iter = parser.parse_exif(ms).unwrap();
375
376        let mut entries: Vec<String> = iter
377            .map(|e| {
378                let val = match e.result() {
379                    Ok(v) => format!("{v}"),
380                    Err(err) => format!("<err:{err}>"),
381                };
382                format!("{}.0x{:04x}={val}", e.ifd(), e.tag().code())
383            })
384            .collect();
385        entries.sort();
386        assert!(
387            entries.len() > 5,
388            "expected >5 entries, got {}",
389            entries.len()
390        );
391        assert!(
392            entries.iter().any(|s| s.contains("0x010f")),
393            "expected Make tag (0x010f) in snapshot, got {entries:?}"
394        );
395    }
396
397    #[test]
398    fn exif_get_in_main_routes_via_ifd_index() {
399        use crate::{ExifTag, IfdIndex, MediaParser, MediaSource};
400        let mut parser = MediaParser::new();
401        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
402        let iter = parser.parse_exif(ms).unwrap();
403        let exif: Exif = iter.into();
404
405        // Main image: same as exif.get(...)
406        let v_via_get = exif.get(ExifTag::Model);
407        let v_via_get_in = exif.get_in(IfdIndex::MAIN, ExifTag::Model);
408        assert_eq!(v_via_get, v_via_get_in);
409        assert!(
410            v_via_get.is_some(),
411            "Model tag expected in testdata/exif.jpg"
412        );
413    }
414
415    #[test]
416    fn exif_get_by_code_finds_unrecognized_or_recognized_tag() {
417        use crate::{ExifTag, IfdIndex, MediaParser, MediaSource};
418        let mut parser = MediaParser::new();
419        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
420        let iter = parser.parse_exif(ms).unwrap();
421        let exif: Exif = iter.into();
422        // Make = 0x010f
423        let v = exif.get_by_code(IfdIndex::MAIN, ExifTag::Make.code());
424        assert!(v.is_some());
425    }
426
427    #[test]
428    fn exif_gps_info_returns_borrow_no_result_wrap() {
429        use crate::{MediaParser, MediaSource};
430        let mut parser = MediaParser::new();
431        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
432        let iter = parser.parse_exif(ms).unwrap();
433        let exif: Exif = iter.into();
434        // gps_info returns Option<&GPSInfo> directly (no Result wrap).
435        let g: Option<&crate::GPSInfo> = exif.gps_info();
436        assert!(g.is_some(), "testdata/exif.jpg has GPS info");
437        assert_eq!(g.unwrap().to_iso6709(), "+22.53113+114.02148/");
438    }
439
440    #[test]
441    fn exif_iter_yields_main_ifd_entries() {
442        use crate::{ExifTag, IfdIndex, MediaParser, MediaSource};
443        let mut parser = MediaParser::new();
444        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
445        let iter = parser.parse_exif(ms).unwrap();
446        let exif: Exif = iter.into();
447
448        let main_count = exif.iter().filter(|e| e.ifd == IfdIndex::MAIN).count();
449        assert!(
450            main_count > 1,
451            "expected >1 entries in main IFD, got {main_count}"
452        );
453
454        // Ensure each entry is well-formed.
455        for entry in exif.iter() {
456            // value is a real reference to an EntryValue
457            let _: &crate::EntryValue = entry.value;
458            // Tag round-trips
459            let code = entry.tag.code();
460            assert_eq!(
461                exif.get_by_code(entry.ifd, code).unwrap(),
462                entry.value,
463                "iter entry value should match get_by_code lookup"
464            );
465        }
466
467        // Specifically: Model entry is present and matches get().
468        let model_via_iter = exif
469            .iter()
470            .find(|e| e.tag.tag() == Some(ExifTag::Model))
471            .map(|e| e.value);
472        assert_eq!(model_via_iter, exif.get(ExifTag::Model));
473    }
474
475    #[test]
476    fn exif_errors_is_empty_for_clean_fixture() {
477        use crate::{MediaParser, MediaSource};
478        let mut parser = MediaParser::new();
479        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
480        let iter = parser.parse_exif(ms).unwrap();
481        let exif: Exif = iter.into();
482        // Clean fixture: errors() returns empty slice but the method exists
483        // and the type matches the spec.
484        let errs: &[(crate::IfdIndex, crate::TagOrCode, crate::EntryError)] = exif.errors();
485        assert!(
486            errs.is_empty(),
487            "exif.jpg has no per-entry errors, got {errs:?}"
488        );
489    }
490
491    #[test]
492    fn exif_errors_captures_per_entry_errors_for_broken_fixture() {
493        use crate::{MediaParser, MediaSource};
494        let mut parser = MediaParser::new();
495        let ms = MediaSource::open("testdata/broken.jpg").unwrap();
496        let iter = parser.parse_exif(ms).unwrap();
497        let exif: Exif = iter.into();
498        // broken.jpg has malformed IFD entries โ€” at least one should land in errors().
499        // (Note: if broken.jpg's particular breakage doesn't surface as a per-entry
500        // error, this assertion may be `>= 0`. Adjust as needed.)
501        let _ = exif.errors();
502    }
503
504    #[test]
505    fn has_embedded_media_true_for_heic() {
506        use crate::{MediaParser, MediaSource};
507        let mut parser = MediaParser::new();
508        let ms = MediaSource::open("testdata/exif.heic").unwrap();
509        let iter = parser.parse_exif(ms).unwrap();
510        assert!(
511            iter.has_embedded_media(),
512            "HEIC files may carry an embedded MOV (Live Photo)"
513        );
514        let exif: Exif = iter.into();
515        assert!(exif.has_embedded_media(), "flag survives From<ExifIter>");
516    }
517
518    #[test]
519    fn has_embedded_media_false_for_plain_jpeg() {
520        use crate::{MediaParser, MediaSource};
521        let mut parser = MediaParser::new();
522        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
523        let iter = parser.parse_exif(ms).unwrap();
524        assert!(
525            !iter.has_embedded_media(),
526            "plain JPEG does not carry embedded media"
527        );
528        let exif: Exif = iter.into();
529        assert!(!exif.has_embedded_media());
530    }
531}