Skip to main content

nom_exif/exif/
exif_iter.rs

1use std::{collections::HashSet, fmt::Debug};
2
3use bytes::Bytes;
4use nom::{number::complete, Parser};
5
6use crate::{
7    error::EntryError,
8    slice::SliceChecked,
9    values::{DataFormat, EntryData, IRational, URational},
10    EntryValue, ExifTag,
11};
12
13use super::{exif_exif::IFD_ENTRY_SIZE, GPSInfo, LatLng, TiffHeader};
14use crate::TagOrCode;
15
16/// Index of an IFD (Image File Directory) within an EXIF blob.
17///
18/// `0` = main image (`IfdIndex::MAIN`), `1` = thumbnail (`IfdIndex::THUMBNAIL`),
19/// `>=2` = sub-IFDs in the order encountered. Use the constants for the common
20/// cases and [`IfdIndex::new`] for raw indexing.
21#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
23pub struct IfdIndex(usize);
24
25impl IfdIndex {
26    /// Index of the main image IFD (always `0`).
27    pub const MAIN: Self = IfdIndex(0);
28
29    /// Index of the thumbnail IFD (`1` when present).
30    pub const THUMBNAIL: Self = IfdIndex(1);
31
32    /// Construct from a raw index. `0`/`1` correspond to [`Self::MAIN`] /
33    /// [`Self::THUMBNAIL`]; values `>= 2` are sub-IFDs.
34    pub const fn new(index: usize) -> Self {
35        IfdIndex(index)
36    }
37
38    /// Underlying raw index as a `usize`.
39    pub const fn as_usize(self) -> usize {
40        self.0
41    }
42}
43
44impl std::fmt::Display for IfdIndex {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        write!(f, "ifd{}", self.0)
47    }
48}
49
50/// Eager view into a single Exif entry. Yielded by [`crate::Exif::iter`] and
51/// designed to be cheap to copy: the `value` is a borrow into the parent
52/// [`crate::Exif`].
53///
54/// # Why pub fields instead of getters?
55///
56/// `ifd`, `tag`, and `value` are independent — there is no cross-field
57/// invariant to enforce. The Rust idiom for plain data carriers (cf.
58/// [`std::ops::Range`]) is `pub` fields. The lazy yield type
59/// [`crate::ExifIterEntry`] uses *private* fields because it carries a
60/// `value xor error` invariant.
61#[derive(Clone, Copy, Debug)]
62pub struct ExifEntry<'a> {
63    pub ifd: IfdIndex,
64    pub tag: TagOrCode,
65    pub value: &'a crate::EntryValue,
66}
67
68/// Represents an additional TIFF data block to be processed after the primary block.
69/// Used for CR3 files with multiple CMT boxes (CMT1, CMT2, CMT3).
70#[derive(Clone)]
71pub(crate) struct TiffDataBlock {
72    /// Block identifier (e.g., "CMT1", "CMT2", "CMT3")
73    #[allow(dead_code)]
74    pub block_id: String,
75    /// Pre-sliced bytes view for this block's data
76    pub data: Bytes,
77    /// TIFF header information (optional, if known)
78    pub header: Option<TiffHeader>,
79}
80
81/// Parses header from input data, and returns an [`ExifIter`].
82///
83/// All entries are lazy-parsed. That is, only when you iterate over
84/// [`ExifIter`] will the IFD entries be parsed one by one.
85///
86/// The one exception is the time zone entries. The method will try to find
87/// and parse the time zone data first, so we can correctly parse all time
88/// information in subsequent iterates.
89#[tracing::instrument]
90pub(crate) fn input_into_iter(
91    input: impl Into<bytes::Bytes> + Debug,
92    state: Option<TiffHeader>,
93) -> crate::Result<ExifIter> {
94    let input: bytes::Bytes = input.into();
95    let header = match state {
96        // header has been parsed, and header has been skipped, input data
97        // is the IFD data
98        Some(header) => header,
99        _ => {
100            // header has not been parsed, input data includes IFD header
101            let (_, header) = TiffHeader::parse(&input[..])?;
102
103            tracing::debug!(
104                ?header,
105                data_len = format!("{:#x}", input.len()),
106                "TIFF header parsed"
107            );
108            header
109        }
110    };
111
112    let start = header.ifd0_offset as usize;
113    if start > input.len() {
114        return Err(crate::Error::UnexpectedEof {
115            context: "exif iter init",
116        });
117    }
118    tracing::debug!(?header, offset = start);
119
120    let mut ifd0 = IfdIter::try_new(0, input.clone(), header.to_owned(), start, None)?;
121
122    let tz = ifd0.find_tz_offset();
123    ifd0.tz = tz.clone();
124    let iter: ExifIter = ExifIter::new(input, header, tz, ifd0);
125
126    tracing::debug!(?iter, "got IFD0");
127
128    Ok(iter)
129}
130
131/// An iterator version of [`Exif`](crate::Exif). Use [`ExifIterEntry`] as
132/// iterator items.
133///
134/// Clone an `ExifIter` is very cheap; the underlying data is shared
135/// via `bytes::Bytes` reference counting.
136///
137/// The new cloned `ExifIter`'s iteration index will be reset to the first one.
138///
139/// If you want to convert an `ExifIter` `into` an [`Exif`](crate::Exif), you probably want
140/// to clone the `ExifIter` and use the new cloned one to do the converting.
141/// Since the original's iteration index may have been modified by
142/// `Iterator::next()` calls.
143pub struct ExifIter {
144    input: Bytes,
145    tiff_header: TiffHeader,
146    tz: Option<String>,
147    ifd0: IfdIter,
148
149    // Iterating status
150    ifds: Vec<IfdIter>,
151    visited_offsets: HashSet<usize>,
152
153    // Multi-block support for CR3 files with multiple CMT boxes
154    /// Additional TIFF data blocks to process after the primary block
155    additional_blocks: Vec<TiffDataBlock>,
156    /// Current block index: 0 = primary block, 1+ = additional blocks
157    current_block_index: usize,
158    /// Tags encountered so far for duplicate filtering (ifd_index, tag_code)
159    encountered_tags: HashSet<(usize, u16)>,
160    has_embedded_track: bool,
161}
162
163impl Debug for ExifIter {
164    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
165        f.debug_struct("ExifIter")
166            .field("data len", &self.input.len())
167            .field("tiff_header", &self.tiff_header)
168            .field("ifd0", &self.ifd0)
169            .field("state", &self.ifds.first().map(|x| (x.index, x.pos)))
170            .field("ifds num", &self.ifds.len())
171            .field("additional_blocks", &self.additional_blocks.len())
172            .field("current_block_index", &self.current_block_index)
173            .finish_non_exhaustive()
174    }
175}
176
177impl Clone for ExifIter {
178    fn clone(&self) -> Self {
179        self.clone_rewound()
180    }
181}
182
183impl ExifIter {
184    pub(crate) fn new(
185        input: bytes::Bytes,
186        tiff_header: TiffHeader,
187        tz: Option<String>,
188        ifd0: IfdIter,
189    ) -> ExifIter {
190        let ifds = vec![ifd0.clone()];
191        ExifIter {
192            input,
193            tiff_header,
194            tz,
195            ifd0,
196            ifds,
197            visited_offsets: HashSet::new(),
198            additional_blocks: Vec::new(),
199            current_block_index: 0,
200            encountered_tags: HashSet::new(),
201            has_embedded_track: false,
202        }
203    }
204
205    /// Clone with iteration state reset to entry 0.
206    ///
207    /// Cheap: `ExifIter` shares its underlying `bytes::Bytes` via refcount.
208    pub fn clone_rewound(&self) -> Self {
209        let ifd0 = self.ifd0.clone_and_rewind();
210        let ifds = vec![ifd0.clone()];
211        Self {
212            input: self.input.clone(),
213            tiff_header: self.tiff_header.clone(),
214            tz: self.tz.clone(),
215            ifd0,
216            ifds,
217            visited_offsets: HashSet::new(),
218            additional_blocks: self.additional_blocks.clone(),
219            current_block_index: 0,
220            encountered_tags: HashSet::new(),
221            has_embedded_track: self.has_embedded_track,
222        }
223    }
224
225    /// Reset iteration to the first entry (in-place). After this call,
226    /// `next()` yields entries starting from IFD0 entry 0 again.
227    pub fn rewind(&mut self) {
228        let ifd0 = self.ifd0.clone_and_rewind();
229        self.ifds = vec![ifd0.clone()];
230        self.ifd0 = ifd0;
231        self.visited_offsets.clear();
232        self.current_block_index = 0;
233        self.encountered_tags.clear();
234    }
235
236    /// Try to find and parse GPS information.
237    ///
238    /// Calling this method won't affect the iterator's state.
239    ///
240    /// Returns:
241    ///
242    /// - An `Ok<Some<GPSInfo>>` if gps info is found and parsed successfully.
243    /// - An `Ok<None>` if gps info is not found.
244    /// - An `Err` if gps info is found but parsing failed.
245    #[tracing::instrument(skip_all)]
246    pub fn parse_gps(&self) -> crate::Result<Option<GPSInfo>> {
247        let mut iter = self.clone_rewound();
248        let Some(gps) = iter.find(|x| {
249            tracing::info!(?x, "find");
250            x.tag().tag().is_some_and(|t| t == ExifTag::GPSInfo)
251        }) else {
252            tracing::warn!(ifd0 = ?iter.ifds.first(), "GPSInfo not found");
253            return Ok(None);
254        };
255
256        let offset = match gps.result() {
257            Ok(v) => {
258                if let Some(offset) = v.as_u32() {
259                    offset
260                } else {
261                    return Err(EntryError::InvalidValue("invalid gps offset").into());
262                }
263            }
264            Err(e) => return Err(e.clone().into()),
265        };
266        if offset as usize >= iter.input.len() {
267            return Err(crate::Error::Malformed {
268                kind: crate::error::MalformedKind::IfdEntry,
269                message: "GPSInfo offset out of range".into(),
270            });
271        }
272
273        let mut gps_subifd = match IfdIter::try_new(
274            gps.ifd().as_usize(),
275            iter.input.clone(),
276            iter.tiff_header,
277            offset as usize,
278            iter.tz.clone(),
279        ) {
280            Ok(ifd0) => ifd0.tag_code(ExifTag::GPSInfo.code()),
281            Err(e) => return Err(e),
282        };
283        Ok(gps_subifd.parse_gps_info())
284    }
285
286    /// Add an additional TIFF data block to be iterated after the current block.
287    /// Used internally for CR3 files with multiple CMT boxes.
288    ///
289    /// # Arguments
290    /// * `block_id` - Identifier for this TIFF block (e.g., "CMT2", "CMT3")
291    /// * `data` - Pre-sliced `Bytes` view containing this block's TIFF data
292    /// * `header` - Optional TIFF header if already parsed
293    pub(crate) fn add_tiff_block(
294        &mut self,
295        block_id: String,
296        data: bytes::Bytes,
297        header: Option<TiffHeader>,
298    ) {
299        self.additional_blocks.push(TiffDataBlock {
300            block_id,
301            data,
302            header,
303        });
304    }
305
306    /// Internal-only setter used by [`crate::MediaParser::parse_exif`] to
307    /// stamp the iterator with content-detected embedded-track information.
308    pub(crate) fn set_has_embedded_track(&mut self, v: bool) {
309        self.has_embedded_track = v;
310    }
311
312    /// Whether the source file is known to embed a paired media track that
313    /// `parse_exif` did *not* surface — a Pixel/Google or Samsung Galaxy
314    /// Motion Photo (JPEG with `GCamera:MotionPhoto` XMP and an MP4
315    /// trailer). Use [`crate::MediaParser::parse_track`] on the same
316    /// source to extract the embedded track.
317    ///
318    /// **Content-detected, not MIME-guessed**: returns `true` only when
319    /// the parser observes concrete signals during `parse_exif`
320    /// (`GCamera:MotionPhoto="1"` plus a `Container:Directory` /
321    /// `MotionPhotoOffset` / `MicroVideoOffset`). A plain JPEG or HEIC
322    /// without such signals returns `false`.
323    ///
324    /// **Coverage**: Pixel/Google Motion Photos and Samsung Galaxy
325    /// Motion Photos that use the Adobe XMP Container directory format
326    /// (JPEG variants).
327    pub fn has_embedded_track(&self) -> bool {
328        self.has_embedded_track
329    }
330
331    /// Deprecated alias for [`Self::has_embedded_track`].
332    #[deprecated(
333        since = "3.1.0",
334        note = "renamed to `has_embedded_track`; the original `has_embedded_media` was too vague and lumped in still-image previews"
335    )]
336    pub fn has_embedded_media(&self) -> bool {
337        self.has_embedded_track()
338    }
339}
340
341/// Lazy yield from [`ExifIter`]. Carries a *value xor error* invariant —
342/// every entry holds exactly one of [`Self::value`] or [`Self::error`].
343///
344/// # Why private fields?
345///
346/// Public fields would let callers construct nonsense like `value=Some,
347/// error=Some`. Private fields + getters preserve the invariant while
348/// exposing the natural API: [`Self::result`] for borrowed access,
349/// [`Self::into_result`] for ownership transfer (consumes `self`, no panic
350/// path).
351#[derive(Clone)]
352pub struct ExifIterEntry {
353    ifd: IfdIndex,
354    tag: TagOrCode,
355    res: Result<EntryValue, crate::error::EntryError>,
356}
357
358impl ExifIterEntry {
359    /// IFD this entry was found in (`IfdIndex::MAIN` for the primary image).
360    pub fn ifd(&self) -> IfdIndex {
361        self.ifd
362    }
363
364    /// Recognized tag, or raw `u16` code if not in [`ExifTag`].
365    pub fn tag(&self) -> TagOrCode {
366        self.tag
367    }
368
369    /// Borrow the value. `None` iff this entry hit a parse error.
370    pub fn value(&self) -> Option<&EntryValue> {
371        self.res.as_ref().ok()
372    }
373
374    /// Borrow the error. `None` iff this entry parsed successfully.
375    pub fn error(&self) -> Option<&crate::error::EntryError> {
376        self.res.as_ref().err()
377    }
378
379    /// Borrow either value or error, mirroring the underlying invariant.
380    pub fn result(&self) -> Result<&EntryValue, &crate::error::EntryError> {
381        self.res.as_ref()
382    }
383
384    /// Consume self and return the value or error. No second-call panic
385    /// path (the entry is moved out).
386    pub fn into_result(self) -> Result<EntryValue, crate::error::EntryError> {
387        self.res
388    }
389
390    pub(crate) fn make_ok(ifd: usize, tag: TagOrCode, v: EntryValue) -> Self {
391        Self {
392            ifd: IfdIndex::new(ifd),
393            tag,
394            res: Ok(v),
395        }
396    }
397}
398
399impl std::fmt::Debug for ExifIterEntry {
400    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
401        let value = match &self.res {
402            Ok(v) => format!("{v}"),
403            Err(e) => format!("{e:?}"),
404        };
405        f.debug_struct("ExifIterEntry")
406            .field("ifd", &self.ifd)
407            .field("tag", &self.tag)
408            .field("value", &value)
409            .finish()
410    }
411}
412
413const MAX_IFD_DEPTH: usize = 8;
414
415impl ExifIter {
416    /// Attempt to load and start iterating the next additional TIFF block.
417    /// Returns true if a new block was successfully loaded, false if no more blocks.
418    fn load_next_block(&mut self) -> bool {
419        // Move to the next additional block
420        let block_index = self.current_block_index;
421        if block_index >= self.additional_blocks.len() {
422            return false;
423        }
424
425        let block = &self.additional_blocks[block_index];
426        tracing::debug!(
427            block_id = block.block_id,
428            block_index,
429            "Loading additional TIFF block"
430        );
431
432        // Get the data for this block from the shared input
433        let block_data = block.data.clone();
434        let header = block.header.clone();
435
436        // Try to create an ExifIter for this block
437        match input_into_iter(block_data, header) {
438            Ok(iter) => {
439                // Update our state with the new block's data
440                self.ifd0 = iter.ifd0;
441                self.ifds = vec![self.ifd0.clone()];
442                self.visited_offsets.clear();
443                self.current_block_index += 1;
444
445                tracing::debug!(block_index, "Successfully loaded additional TIFF block");
446                true
447            }
448            Err(e) => {
449                tracing::warn!(
450                    block_index,
451                    error = %e,
452                    "Failed to load additional TIFF block, skipping"
453                );
454                // Move to next block and try again
455                self.current_block_index += 1;
456                self.load_next_block()
457            }
458        }
459    }
460
461    /// Check if a tag should be included based on duplicate filtering.
462    /// Returns true if the tag should be included, false if it's a duplicate.
463    fn should_include_tag(&mut self, ifd_index: usize, tag_code: u16) -> bool {
464        let tag_key = (ifd_index, tag_code);
465        if self.encountered_tags.contains(&tag_key) {
466            tracing::debug!(ifd_index, tag_code, "Skipping duplicate tag");
467            false
468        } else {
469            self.encountered_tags.insert(tag_key);
470            true
471        }
472    }
473}
474
475impl Iterator for ExifIter {
476    type Item = ExifIterEntry;
477
478    #[tracing::instrument(skip_all)]
479    fn next(&mut self) -> Option<Self::Item> {
480        loop {
481            if self.ifds.is_empty() {
482                // Current block exhausted, try to load next additional block
483                if !self.load_next_block() {
484                    tracing::debug!(?self, "all IFDs and blocks have been parsed");
485                    return None;
486                }
487                // Continue with the newly loaded block
488                continue;
489            }
490
491            if self.ifds.len() > MAX_IFD_DEPTH {
492                let depth = self.ifds.len();
493                self.ifds.clear();
494                tracing::error!(
495                    ifds_depth = depth,
496                    "ifd depth is too deep, just go back to ifd0"
497                );
498                self.ifds.push(self.ifd0.clone_with_state());
499            }
500
501            let mut ifd = self.ifds.pop()?;
502            let cur_ifd_idx = ifd.ifd_idx;
503            match ifd.next() {
504                Some((tag_code, entry)) => {
505                    tracing::debug!(ifd = ifd.ifd_idx, ?tag_code, "next tag entry");
506
507                    match entry {
508                        IfdEntry::IfdNew(new_ifd) => {
509                            if new_ifd.offset > 0 {
510                                if self.visited_offsets.contains(&new_ifd.offset) {
511                                    // Ignore repeated ifd parsing to avoid dead looping
512                                    continue;
513                                }
514                                self.visited_offsets.insert(new_ifd.offset);
515                            }
516
517                            let is_subifd = if new_ifd.ifd_idx == ifd.ifd_idx {
518                                // Push the current ifd before enter sub-ifd.
519                                self.ifds.push(ifd);
520                                tracing::debug!(?tag_code, ?new_ifd, "got new SUB-IFD");
521                                true
522                            } else {
523                                // Otherwise this is a next ifd. It means that the
524                                // current ifd has been parsed, so we don't need to
525                                // push it.
526                                tracing::debug!("IFD{} parsing completed", cur_ifd_idx);
527                                tracing::debug!(?new_ifd, "got new IFD");
528                                false
529                            };
530
531                            let (ifd_idx, offset) = (new_ifd.ifd_idx, new_ifd.offset);
532                            self.ifds.push(new_ifd);
533
534                            if is_subifd {
535                                // Check for duplicates before returning sub-ifd entry
536                                let tc = tag_code.unwrap();
537                                if !self.should_include_tag(ifd_idx, tc.code()) {
538                                    continue;
539                                }
540                                // Return sub-ifd as an entry
541                                return Some(ExifIterEntry::make_ok(
542                                    ifd_idx,
543                                    tc,
544                                    EntryValue::U32(offset as u32),
545                                ));
546                            }
547                        }
548                        IfdEntry::Entry(v) => {
549                            let tc = tag_code.unwrap();
550                            // Check for duplicates before returning entry
551                            if !self.should_include_tag(ifd.ifd_idx, tc.code()) {
552                                self.ifds.push(ifd);
553                                continue;
554                            }
555                            let res = Some(ExifIterEntry::make_ok(ifd.ifd_idx, tc, v));
556                            self.ifds.push(ifd);
557                            return res;
558                        }
559                        IfdEntry::Err(e) => {
560                            tracing::warn!(?tag_code, ?e, "parse ifd entry error");
561                            self.ifds.push(ifd);
562                            continue;
563                        }
564                    }
565                }
566                None => continue,
567            }
568        }
569    }
570}
571
572#[derive(Clone)]
573pub(crate) struct IfdIter {
574    ifd_idx: usize,
575    tag_code: Option<TagOrCode>,
576
577    // starts from TIFF header
578    input: Bytes,
579
580    // ifd data offset
581    offset: usize,
582
583    header: TiffHeader,
584    entry_num: u16,
585
586    pub tz: Option<String>,
587
588    // Iterating status
589    index: u16,
590    pos: usize,
591}
592
593impl Debug for IfdIter {
594    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
595        f.debug_struct("IfdIter")
596            .field("ifd_idx", &self.ifd_idx)
597            .field("tag", &self.tag_code)
598            .field("data len", &self.input.len())
599            .field("tz", &self.tz)
600            .field("header", &self.header)
601            .field("entry_num", &self.entry_num)
602            .field("index", &self.index)
603            .field("pos", &self.pos)
604            .finish()
605    }
606}
607
608impl IfdIter {
609    pub fn rewind(&mut self) {
610        self.index = 0;
611        // Skip the first two bytes, which is the entry num
612        self.pos = self.offset + 2;
613    }
614
615    pub fn clone_and_rewind(&self) -> Self {
616        let mut it = self.clone();
617        it.rewind();
618        it
619    }
620
621    pub fn tag_code_maybe(mut self, code: Option<u16>) -> Self {
622        self.tag_code = code.map(|x| x.into());
623        self
624    }
625
626    pub fn tag_code(mut self, code: u16) -> Self {
627        self.tag_code = Some(code.into());
628        self
629    }
630
631    #[allow(unused)]
632    pub fn tag(mut self, tag: TagOrCode) -> Self {
633        self.tag_code = Some(tag);
634        self
635    }
636
637    #[tracing::instrument(skip(input))]
638    pub fn try_new(
639        ifd_idx: usize,
640        input: Bytes,
641        header: TiffHeader,
642        offset: usize,
643        tz: Option<String>,
644    ) -> crate::Result<Self> {
645        if input.len() < 2 {
646            return Err(crate::Error::Malformed {
647                kind: crate::error::MalformedKind::TiffHeader,
648                message: "ifd data too small to decode entry num".into(),
649            });
650        }
651        // should use the complete header data to parse ifd entry num
652        assert!(offset <= input.len());
653        let ifd_data = input.slice(offset..);
654        let (_, entry_num) = TiffHeader::parse_ifd_entry_num(&ifd_data, header.endian)?;
655
656        Ok(Self {
657            ifd_idx,
658            tag_code: None,
659            input,
660            offset,
661            header,
662            entry_num,
663            tz,
664            // Skip the first two bytes, which is the entry num
665            pos: offset + 2,
666            index: 0,
667        })
668    }
669
670    fn parse_tag_entry(&self, entry_data: &[u8]) -> Option<(u16, IfdEntry)> {
671        let endian = self.header.endian;
672        let (_, (tag, data_format, components_num, value_or_offset)) = (
673            complete::u16::<_, nom::error::Error<_>>(endian),
674            complete::u16(endian),
675            complete::u32(endian),
676            complete::u32(endian),
677        )
678            .parse(entry_data)
679            .ok()?;
680
681        if tag == 0 {
682            return None;
683        }
684
685        let df: DataFormat = match DataFormat::try_from(data_format) {
686            Ok(df) => df,
687            Err(bad) => {
688                let t: TagOrCode = tag.into();
689                tracing::warn!(tag = ?t, format = bad, "invalid entry data format");
690                return Some((
691                    tag,
692                    IfdEntry::Err(EntryError::InvalidShape {
693                        format: bad,
694                        count: components_num,
695                    }),
696                ));
697            }
698        };
699        let (tag, res) = self.parse_entry(tag, df, components_num, entry_data, value_or_offset);
700        Some((tag, res))
701    }
702
703    fn get_data_pos(&self, value_or_offset: u32) -> usize {
704        // value_or_offset.saturating_sub(self.offset)
705        value_or_offset as usize
706    }
707
708    fn parse_entry(
709        &self,
710        tag: u16,
711        data_format: DataFormat,
712        components_num: u32,
713        entry_data: &[u8],
714        value_or_offset: u32,
715    ) -> (u16, IfdEntry) {
716        // get component_size according to data format
717        let component_size = data_format.component_size();
718
719        // get entry data
720        let size = components_num as usize * component_size;
721        let data = if size <= 4 {
722            &entry_data[8..8 + size] // Safe-slice
723        } else {
724            let start = self.get_data_pos(value_or_offset);
725            let end = start + size;
726            let Some(data) = self.input.slice_checked(start..end) else {
727                tracing::warn!(
728                    "entry data overflow, tag: {:04x} start: {:08x} end: {:08x} ifd data len {:08x}",
729                    tag,
730                    start,
731                    end,
732                    self.input.len(),
733                );
734                return (
735                    tag,
736                    IfdEntry::Err(EntryError::Truncated {
737                        needed: size,
738                        available: self.input.len().saturating_sub(start),
739                    }),
740                );
741            };
742
743            data
744        };
745
746        if SUBIFD_TAGS.contains(&tag) {
747            if let Some(value) = self.new_ifd_iter(self.ifd_idx, value_or_offset, Some(tag)) {
748                return (tag, value);
749            }
750        }
751
752        let entry = EntryData {
753            endian: self.header.endian,
754            tag,
755            data,
756            data_format,
757            components_num,
758        };
759        match EntryValue::parse(&entry, &self.tz) {
760            Ok(v) => (tag, IfdEntry::Entry(v)),
761            Err(e) => (tag, IfdEntry::Err(e)),
762        }
763    }
764
765    fn new_ifd_iter(
766        &self,
767        ifd_idx: usize,
768        value_or_offset: u32,
769        tag: Option<u16>,
770    ) -> Option<IfdEntry> {
771        let offset = self.get_data_pos(value_or_offset);
772        if offset < self.input.len() {
773            match IfdIter::try_new(
774                ifd_idx,
775                self.input.clone(),
776                self.header.to_owned(),
777                offset,
778                self.tz.clone(),
779            ) {
780                Ok(iter) => return Some(IfdEntry::IfdNew(iter.tag_code_maybe(tag))),
781                Err(e) => {
782                    tracing::warn!(?tag, ?e, "Create next/sub IFD failed");
783                }
784            }
785            // return (
786            //     tag,
787            //     // IfdEntry::Ifd {
788            //     //     idx: self.ifd_idx,
789            //     //     offset: value_or_offset,
790            //     // },
791            //     IfdEntry::IfdNew(),
792            // );
793        }
794        None
795    }
796
797    pub fn find_exif_iter(&self) -> Option<IfdIter> {
798        let endian = self.header.endian;
799        // find ExifOffset
800        for i in 0..self.entry_num {
801            let pos = self.pos + i as usize * IFD_ENTRY_SIZE;
802            let (_, tag) =
803                complete::u16::<_, nom::error::Error<_>>(endian)(&self.input[pos..]).ok()?;
804            if tag == ExifTag::ExifOffset.code() {
805                let entry_data = self.input.slice_checked(pos..pos + IFD_ENTRY_SIZE)?;
806                let (_, entry) = self.parse_tag_entry(entry_data)?;
807                match entry {
808                    IfdEntry::IfdNew(iter) => return Some(iter),
809                    IfdEntry::Entry(_) | IfdEntry::Err(_) => return None,
810                }
811            }
812        }
813        None
814    }
815
816    pub fn find_tz_offset(&self) -> Option<String> {
817        let iter = self.find_exif_iter()?;
818        let mut offset = None;
819        for entry in iter {
820            let Some(tag) = entry.0 else {
821                continue;
822            };
823            if tag.code() == ExifTag::OffsetTimeOriginal.code()
824                || tag.code() == ExifTag::OffsetTimeDigitized.code()
825            {
826                return entry.1.as_str().map(|x| x.to_owned());
827            } else if tag.code() == ExifTag::OffsetTime.code() {
828                offset = entry.1.as_str().map(|x| x.to_owned());
829            }
830        }
831
832        offset
833    }
834
835    // Assume the current ifd is GPSInfo subifd.
836    pub fn parse_gps_info(&mut self) -> Option<GPSInfo> {
837        use crate::exif::gps::{Altitude, LatRef, LonRef, Speed, SpeedUnit};
838
839        let mut latitude_ref = None;
840        let mut latitude = None;
841        let mut longitude_ref = None;
842        let mut longitude = None;
843        let mut altitude_ref = None;
844        let mut altitude_value = None;
845        let mut speed_unit = None;
846        let mut speed_value = None;
847        let mut has_data = false;
848
849        for (tag, entry) in self {
850            let Some(tag) = tag.and_then(|x| x.tag()) else {
851                continue;
852            };
853            has_data = true;
854            match tag {
855                ExifTag::GPSLatitudeRef => {
856                    latitude_ref = entry.as_char().and_then(LatRef::from_char);
857                }
858                ExifTag::GPSLongitudeRef => {
859                    longitude_ref = entry.as_char().and_then(LonRef::from_char);
860                }
861                ExifTag::GPSAltitudeRef => {
862                    altitude_ref = entry.as_u8();
863                }
864                ExifTag::GPSLatitude => {
865                    if let Some(v) = entry.as_urational_slice() {
866                        latitude = LatLng::try_from(v).ok();
867                    } else if let Some(v) = entry.as_irational_slice() {
868                        latitude = LatLng::try_from(v).ok();
869                    }
870                }
871                ExifTag::GPSLongitude => {
872                    if let Some(v) = entry.as_urational_slice() {
873                        longitude = LatLng::try_from(v).ok();
874                    } else if let Some(v) = entry.as_irational_slice() {
875                        longitude = LatLng::try_from(v).ok();
876                    }
877                }
878                ExifTag::GPSAltitude => {
879                    if let Some(v) = entry.as_urational() {
880                        altitude_value = Some(*v);
881                    } else if let Some(v) = entry.as_irational() {
882                        if let Ok(u) = URational::try_from(*v) {
883                            altitude_value = Some(u);
884                        }
885                    }
886                }
887                ExifTag::GPSSpeedRef => {
888                    speed_unit = entry.as_char().and_then(SpeedUnit::from_char);
889                }
890                ExifTag::GPSSpeed => {
891                    if let Some(v) = entry.as_urational() {
892                        speed_value = Some(*v);
893                    } else if let Some(v) = entry.as_irational() {
894                        if let Ok(u) = URational::try_from(*v) {
895                            speed_value = Some(u);
896                        }
897                    }
898                }
899                _ => (),
900            }
901        }
902
903        if !has_data {
904            tracing::warn!("GPSInfo data not found");
905            return None;
906        }
907
908        let altitude = match (altitude_ref, altitude_value) {
909            (Some(0), Some(v)) => Altitude::AboveSeaLevel(v),
910            (Some(1), Some(v)) => Altitude::BelowSeaLevel(v),
911            _ => Altitude::Unknown,
912        };
913
914        let speed = match (speed_unit, speed_value) {
915            (Some(unit), Some(value)) => Some(Speed { unit, value }),
916            _ => None,
917        };
918
919        Some(GPSInfo {
920            latitude_ref: latitude_ref.unwrap_or(LatRef::North),
921            latitude: latitude.unwrap_or_default(),
922            longitude_ref: longitude_ref.unwrap_or(LonRef::East),
923            longitude: longitude.unwrap_or_default(),
924            altitude,
925            speed,
926        })
927    }
928
929    fn clone_with_state(&self) -> IfdIter {
930        let mut it = self.clone();
931        it.index = self.index;
932        it.pos = self.pos;
933        it
934    }
935}
936
937#[derive(Debug)]
938pub(crate) enum IfdEntry {
939    IfdNew(IfdIter), // ifd index
940    Entry(EntryValue),
941    Err(EntryError),
942}
943
944impl IfdEntry {
945    pub fn as_u8(&self) -> Option<u8> {
946        if let IfdEntry::Entry(EntryValue::U8(v)) = self {
947            Some(*v)
948        } else {
949            None
950        }
951    }
952
953    pub fn as_char(&self) -> Option<char> {
954        if let IfdEntry::Entry(EntryValue::Text(s)) = self {
955            s.chars().next()
956        } else {
957            None
958        }
959    }
960
961    fn as_irational(&self) -> Option<&IRational> {
962        if let IfdEntry::Entry(EntryValue::IRational(v)) = self {
963            Some(v)
964        } else {
965            None
966        }
967    }
968
969    fn as_irational_slice(&self) -> Option<&Vec<IRational>> {
970        if let IfdEntry::Entry(EntryValue::IRationalArray(v)) = self {
971            Some(v)
972        } else {
973            None
974        }
975    }
976
977    fn as_urational(&self) -> Option<&URational> {
978        if let IfdEntry::Entry(EntryValue::URational(v)) = self {
979            Some(v)
980        } else {
981            None
982        }
983    }
984
985    fn as_urational_slice(&self) -> Option<&Vec<URational>> {
986        if let IfdEntry::Entry(EntryValue::URationalArray(v)) = self {
987            Some(v)
988        } else {
989            None
990        }
991    }
992
993    fn as_str(&self) -> Option<&str> {
994        if let IfdEntry::Entry(e) = self {
995            e.as_str()
996        } else {
997            None
998        }
999    }
1000}
1001
1002pub(crate) const SUBIFD_TAGS: &[u16] = &[ExifTag::ExifOffset.code(), ExifTag::GPSInfo.code()];
1003
1004impl Iterator for IfdIter {
1005    type Item = (Option<TagOrCode>, IfdEntry);
1006
1007    #[tracing::instrument(skip(self))]
1008    fn next(&mut self) -> Option<Self::Item> {
1009        tracing::debug!(
1010            ifd = self.ifd_idx,
1011            index = self.index,
1012            entry_num = self.entry_num,
1013            offset = format!("{:08x}", self.offset),
1014            pos = format!("{:08x}", self.pos),
1015            "next IFD entry"
1016        );
1017        if self.input.len() < self.pos + IFD_ENTRY_SIZE {
1018            return None;
1019        }
1020
1021        let endian = self.header.endian;
1022        if self.index > self.entry_num {
1023            return None;
1024        }
1025        if self.index == self.entry_num {
1026            tracing::debug!(
1027                self.ifd_idx,
1028                self.index,
1029                pos = self.pos,
1030                "try to get next ifd"
1031            );
1032            self.index += 1;
1033
1034            // next IFD offset
1035            let (_, offset) =
1036                complete::u32::<_, nom::error::Error<_>>(endian)(&self.input[self.pos..]).ok()?;
1037
1038            if offset == 0 {
1039                // IFD parsing completed
1040                tracing::debug!(?self, "IFD parsing completed");
1041                return None;
1042            }
1043
1044            return self
1045                .new_ifd_iter(self.ifd_idx + 1, offset, None)
1046                .map(|x| (None, x));
1047        }
1048
1049        let entry_data = self
1050            .input
1051            .slice_checked(self.pos..self.pos + IFD_ENTRY_SIZE)?;
1052        self.index += 1;
1053        self.pos += IFD_ENTRY_SIZE;
1054
1055        let (tag, res) = self.parse_tag_entry(entry_data)?;
1056
1057        Some((Some(tag.into()), res)) // Safe-slice
1058    }
1059}
1060
1061#[cfg(test)]
1062mod tests {
1063
1064    use crate::exif::extract_exif_with_mime;
1065    use crate::exif::input_into_iter;
1066    use crate::file::MediaMimeImage;
1067    use crate::slice::SubsliceRange;
1068    use crate::testkit::read_sample;
1069    use crate::Exif;
1070    use test_case::test_case;
1071
1072    #[test_case(
1073        "exif.jpg",
1074        "+08:00",
1075        "2023-07-09T20:36:33+08:00",
1076        MediaMimeImage::Jpeg
1077    )]
1078    #[test_case("exif-no-tz.jpg", "", "2023-07-09 20:36:33", MediaMimeImage::Jpeg)]
1079    #[test_case("broken.jpg", "-", "2014-09-21 15:51:22", MediaMimeImage::Jpeg)]
1080    #[test_case(
1081        "exif.heic",
1082        "+08:00",
1083        "2022-07-22T21:26:32+08:00",
1084        MediaMimeImage::Heic
1085    )]
1086    #[test_case(
1087        "exif.avif",
1088        "+08:00",
1089        "2022-07-22T21:26:32+08:00",
1090        MediaMimeImage::Avif
1091    )]
1092    #[test_case("tif.tif", "-", "-", MediaMimeImage::Tiff)]
1093    #[test_case(
1094        "fujifilm_x_t1_01.raf.meta",
1095        "-",
1096        "2014-01-30 12:49:13",
1097        MediaMimeImage::Raf
1098    )]
1099    fn exif_iter_tz(path: &str, tz: &str, time: &str, img_type: MediaMimeImage) {
1100        let buf = read_sample(path).unwrap();
1101        let (data, _) = extract_exif_with_mime(img_type, &buf, None).unwrap();
1102        let range = data.and_then(|x| buf.subslice_in_range(x)).unwrap();
1103        let iter = input_into_iter(bytes::Bytes::from(buf).slice(range), None).unwrap();
1104        let expect = if tz == "-" {
1105            None
1106        } else {
1107            Some(tz.to_string())
1108        };
1109        assert_eq!(iter.tz, expect);
1110        let exif: Exif = iter.into();
1111        let value = exif.get(crate::ExifTag::DateTimeOriginal);
1112        if time == "-" {
1113            assert!(value.is_none());
1114        } else {
1115            let value = value.unwrap();
1116            assert_eq!(value.to_string(), time);
1117        }
1118    }
1119
1120    #[test]
1121    fn ifd_index_constants() {
1122        use crate::IfdIndex;
1123        assert_eq!(IfdIndex::MAIN.as_usize(), 0);
1124        assert_eq!(IfdIndex::THUMBNAIL.as_usize(), 1);
1125    }
1126
1127    #[test]
1128    fn ifd_index_roundtrip_via_new_and_as_usize() {
1129        use crate::IfdIndex;
1130        for raw in [0, 1, 2, 3, 7, 99] {
1131            assert_eq!(IfdIndex::new(raw).as_usize(), raw);
1132        }
1133    }
1134
1135    #[test]
1136    fn ifd_index_equality_and_hash() {
1137        use crate::IfdIndex;
1138        use std::collections::HashSet;
1139        let mut set: HashSet<IfdIndex> = HashSet::new();
1140        set.insert(IfdIndex::MAIN);
1141        set.insert(IfdIndex::new(0)); // duplicate
1142        set.insert(IfdIndex::THUMBNAIL);
1143        assert_eq!(set.len(), 2);
1144    }
1145
1146    #[test]
1147    fn ifd_index_display_format() {
1148        use crate::IfdIndex;
1149        assert_eq!(format!("{}", IfdIndex::MAIN), "ifd0");
1150        assert_eq!(format!("{}", IfdIndex::new(7)), "ifd7");
1151    }
1152
1153    #[test]
1154    fn tag_or_code_for_known_tag_resolves_to_tag_variant() {
1155        use crate::{ExifTag, TagOrCode};
1156        let t: TagOrCode = ExifTag::Make.code().into();
1157        assert_eq!(t, TagOrCode::Tag(ExifTag::Make));
1158        assert_eq!(t.code(), ExifTag::Make.code());
1159    }
1160
1161    #[test]
1162    fn tag_or_code_for_unknown_tag_resolves_to_unknown_variant() {
1163        use crate::TagOrCode;
1164        let t: TagOrCode = 0xffff_u16.into();
1165        assert_eq!(t, TagOrCode::Unknown(0xffff));
1166        assert_eq!(t.code(), 0xffff);
1167    }
1168
1169    #[test]
1170    fn exif_entry_pub_fields_construct_and_destructure() {
1171        use crate::{EntryValue, ExifEntry, ExifTag, IfdIndex, TagOrCode};
1172        let val = EntryValue::Text("vivo X90 Pro+".into());
1173        let e = ExifEntry {
1174            ifd: IfdIndex::MAIN,
1175            tag: TagOrCode::Tag(ExifTag::Model),
1176            value: &val,
1177        };
1178        // Pub fields: just match.
1179        let ExifEntry { ifd, tag, value } = e;
1180        assert_eq!(ifd, IfdIndex::MAIN);
1181        assert_eq!(tag.code(), ExifTag::Model.code());
1182        assert!(matches!(value, EntryValue::Text(_)));
1183        // Copy works because EntryValue is borrowed.
1184        let _e2 = e;
1185        let _e3 = e;
1186    }
1187
1188    #[test]
1189    fn exif_iter_entry_value_xor_error_invariant() {
1190        use crate::{MediaParser, MediaSource};
1191        let mut parser = MediaParser::new();
1192        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1193        for entry in parser.parse_exif(ms).unwrap() {
1194            // Exactly one of value / error is Some.
1195            let has_v = entry.value().is_some();
1196            let has_e = entry.error().is_some();
1197            assert!(has_v ^ has_e, "entry must be value xor error");
1198            // result() agrees with value()/error().
1199            match entry.result() {
1200                Ok(v) => assert_eq!(Some(v), entry.value()),
1201                Err(e) => assert_eq!(Some(e), entry.error()),
1202            }
1203        }
1204    }
1205
1206    #[test]
1207    fn exif_iter_entry_into_result_consumes_self() {
1208        use crate::{MediaParser, MediaSource};
1209        let mut parser = MediaParser::new();
1210        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1211        let mut count_ok = 0usize;
1212        for entry in parser.parse_exif(ms).unwrap() {
1213            // into_result consumes; once consumed, we can't call any other
1214            // method (the entry is gone). This is the spec's panic-free
1215            // replacement for v2's take_result.
1216            if entry.into_result().is_ok() {
1217                count_ok += 1;
1218            }
1219        }
1220        assert!(count_ok > 0);
1221    }
1222
1223    #[test]
1224    fn exif_iter_entry_tag_returns_tag_or_code() {
1225        use crate::{ExifTag, MediaParser, MediaSource, TagOrCode};
1226        let mut parser = MediaParser::new();
1227        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1228        let make_present = parser
1229            .parse_exif(ms)
1230            .unwrap()
1231            .any(|e| matches!(e.tag(), TagOrCode::Tag(ExifTag::Make)));
1232        assert!(make_present);
1233    }
1234
1235    #[test]
1236    fn exif_iter_rewind_resets_iteration_state() {
1237        use crate::{MediaParser, MediaSource};
1238        let mut parser = MediaParser::new();
1239        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1240        let mut iter = parser.parse_exif(ms).unwrap();
1241        let first_count = iter.by_ref().count();
1242        assert!(first_count > 0);
1243        // Already exhausted.
1244        assert_eq!(iter.by_ref().count(), 0);
1245        iter.rewind();
1246        let after_rewind = iter.count();
1247        assert_eq!(first_count, after_rewind);
1248    }
1249
1250    #[test]
1251    fn exif_iter_clone_rewound_yields_independent_full_iter() {
1252        use crate::{MediaParser, MediaSource};
1253        let mut parser = MediaParser::new();
1254        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1255        let mut iter = parser.parse_exif(ms).unwrap();
1256        let _consumed = iter.by_ref().take(2).count();
1257        let cloned = iter.clone_rewound();
1258        // cloned starts from entry 0 even though `iter` consumed 2 entries.
1259        let cloned_total = cloned.count();
1260        let remaining = iter.count();
1261        assert!(cloned_total > remaining);
1262    }
1263
1264    #[test]
1265    fn exif_iter_parse_gps_returns_option_no_iteration_advance() {
1266        use crate::{MediaParser, MediaSource};
1267        let mut parser = MediaParser::new();
1268        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1269        let iter = parser.parse_exif(ms).unwrap();
1270        let gps = iter.parse_gps().unwrap();
1271        assert!(gps.is_some());
1272        // parse_gps doesn't drive the outer iterator.
1273        let count = iter.count();
1274        assert!(count > 0);
1275    }
1276}