Skip to main content

nom_exif/
parser.rs

1use std::{
2    cmp::{max, min},
3    fmt::{Debug, Display},
4    fs::File,
5    io::{self, Read, Seek},
6    path::Path,
7};
8
9use crate::{
10    error::{ParsedError, ParsingError, ParsingErrorState},
11    exif::TiffHeader,
12    file::MediaMime,
13    ExifIter, TrackInfo,
14};
15
16/// A function that tries to skip `n` bytes of `reader` by seeking. Returns
17/// `Ok(true)` on success, `Ok(false)` if the reader does not support seek
18/// (so the caller should fall back to reading-and-discarding), or
19/// `Err(io::Error)` if seek itself failed (e.g. truncated file handle).
20///
21/// This is captured at construction time by `MediaSource::seekable` /
22/// `unseekable`, replacing the v2 `S: Skip<R>` phantom parameter with a
23/// runtime fn pointer.
24pub(crate) type SkipBySeekFn<R> = fn(&mut R, u64) -> io::Result<bool>;
25
26/// `MediaSource` represents a media data source that can be parsed by
27/// [`MediaParser`].
28///
29/// - Use [`MediaSource::open`] to create a MediaSource from a file path.
30///
31/// - Use [`MediaSource::from_bytes`] for zero-copy in-memory input
32///   (`Vec<u8>`, `&'static [u8]`, [`bytes::Bytes`], …). Pair with
33///   [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`].
34///
35/// - In other cases:
36///
37///   - Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
38///     (an already-open `File` goes here).
39///
40///   - Use [`MediaSource::unseekable`] to create a MediaSource from a
41///     reader that only impl `Read`
42///
43/// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
44/// since the former is more efficient when the parser needs to skip a large number of bytes.
45///
46/// Passing in a `BufRead` should be avoided because [`MediaParser`] comes with
47/// its own buffer management and the buffers can be shared between multiple
48/// parsing tasks, thus avoiding frequent memory allocations.
49pub struct MediaSource<R> {
50    pub(crate) reader: R,
51    pub(crate) buf: Vec<u8>,
52    pub(crate) mime: MediaMime,
53    pub(crate) skip_by_seek: SkipBySeekFn<R>,
54    /// P7: zero-copy memory-mode payload. `Some` only when the source was
55    /// built via [`MediaSource::<()>::from_bytes`]; `reader`, `buf`, and
56    /// `skip_by_seek` are placeholders (and never consulted) in that mode.
57    pub(crate) memory: Option<bytes::Bytes>,
58}
59
60/// Top-level classification of a media source.
61///
62/// `Image` files carry EXIF metadata (parse with `MediaParser::parse_exif`);
63/// `Track` files are time-based containers — video, audio, or both — and
64/// carry track-info metadata (parse with `MediaParser::parse_track`). Pure
65/// audio containers like `.mka` are classified as `Track`.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum MediaKind {
68    Image,
69    Track,
70}
71
72impl<R> Debug for MediaSource<R> {
73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74        f.debug_struct("MediaSource")
75            .field("mime", &self.mime)
76            .finish_non_exhaustive()
77    }
78}
79
80// Should be enough for parsing header
81const HEADER_PARSE_BUF_SIZE: usize = 128;
82
83impl<R> MediaSource<R> {
84    /// Top-level classification of this media source.
85    pub fn kind(&self) -> MediaKind {
86        match self.mime {
87            MediaMime::Image(_) => MediaKind::Image,
88            MediaMime::Track(_) => MediaKind::Track,
89        }
90    }
91}
92
93impl<R: Read> MediaSource<R> {
94    fn build(mut reader: R, skip_by_seek: SkipBySeekFn<R>) -> crate::Result<Self> {
95        let mut buf = Vec::with_capacity(HEADER_PARSE_BUF_SIZE);
96        reader
97            .by_ref()
98            .take(HEADER_PARSE_BUF_SIZE as u64)
99            .read_to_end(&mut buf)?;
100        let mime: MediaMime = buf.as_slice().try_into()?;
101        Ok(Self {
102            reader,
103            buf,
104            mime,
105            skip_by_seek,
106            memory: None,
107        })
108    }
109
110    /// Use [`MediaSource::unseekable`] to create a MediaSource from a
111    /// reader that only impl `Read`
112    ///
113    /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
114    /// since the former is more efficient when the parser needs to skip a large number of bytes.
115    pub fn unseekable(reader: R) -> crate::Result<Self> {
116        Self::build(reader, |_, _| Ok(false))
117    }
118}
119
120impl<R: Read + Seek> MediaSource<R> {
121    /// Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
122    ///
123    /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
124    /// since the former is more efficient when the parser needs to skip a large number of bytes.
125    pub fn seekable(reader: R) -> crate::Result<Self> {
126        Self::build(reader, |r, n| {
127            let signed: i64 = n
128                .try_into()
129                .map_err(|_| io::Error::from(io::ErrorKind::InvalidInput))?;
130            r.seek_relative(signed)?;
131            Ok(true)
132        })
133    }
134}
135
136impl MediaSource<File> {
137    /// Open a file at `path` and parse its header to detect the media format.
138    ///
139    /// This is the v3-preferred entry point for the common case of "I have a
140    /// path on disk". For an already-open `File` use [`Self::seekable`].
141    pub fn open<P: AsRef<Path>>(path: P) -> crate::Result<Self> {
142        Self::seekable(File::open(path)?)
143    }
144}
145
146impl MediaSource<()> {
147    /// Build a [`MediaSource`] from an in-memory byte payload.
148    ///
149    /// Accepts any type convertible into [`bytes::Bytes`] — `Bytes`,
150    /// `Vec<u8>`, `&'static [u8]`, [`bytes::Bytes::from_owner`] outputs, and
151    /// HTTP-stack body types that implement `Into<Bytes>` directly.
152    ///
153    /// The header (first up to 128 bytes) is sniffed for media kind, the
154    /// same way [`MediaSource::open`] does it for files. The full payload is
155    /// stored zero-copy: subsequent parsing through
156    /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
157    /// shares this `Bytes` directly with the returned `ExifIter` / sub-IFDs
158    /// via reference counting.
159    ///
160    /// The returned source is parsed by the dedicated
161    /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
162    /// methods. The streaming `parse_exif` / `parse_track` methods do not
163    /// accept `MediaSource<()>` (their `R: Read` bound is unsatisfiable).
164    ///
165    /// # Example
166    ///
167    /// ```rust
168    /// use nom_exif::{MediaSource, MediaParser, MediaKind};
169    ///
170    /// let bytes = std::fs::read("./testdata/exif.jpg")?;
171    /// let ms = MediaSource::from_bytes(bytes)?;
172    /// assert_eq!(ms.kind(), MediaKind::Image);
173    ///
174    /// let mut parser = MediaParser::new();
175    /// let _iter = parser.parse_exif_from_bytes(ms)?;
176    /// # Ok::<(), nom_exif::Error>(())
177    /// ```
178    #[deprecated(
179        since = "3.3.0",
180        note = "Use `MediaSource::from_memory` and the unified `parse_*` \
181                methods (which now accept memory-mode sources directly). \
182                The `MediaSource<()>` shape will be removed in v4."
183    )]
184    pub fn from_bytes(bytes: impl Into<bytes::Bytes>) -> crate::Result<Self> {
185        let bytes = bytes.into();
186        let head_end = bytes.len().min(HEADER_PARSE_BUF_SIZE);
187        let mime: MediaMime = bytes[..head_end].try_into()?;
188        Ok(Self {
189            reader: (),
190            buf: Vec::new(),
191            mime,
192            // Placeholder: never invoked in memory mode (clear_and_skip's
193            // AdvanceOnly path is the only one taken).
194            skip_by_seek: |_, _| Ok(false),
195            memory: Some(bytes),
196        })
197    }
198
199    /// Internal adapter: convert a v3.0-style `MediaSource<()>` (built via
200    /// the deprecated `from_bytes`) into the unified `MediaSource<Empty>`
201    /// shape so the deprecated `parse_*_from_bytes` methods can delegate to
202    /// the unified `parse_*` methods. Memory contents are moved over
203    /// verbatim, preserving zero-copy.
204    pub(crate) fn into_empty(self) -> MediaSource<std::io::Empty> {
205        MediaSource {
206            reader: std::io::empty(),
207            buf: self.buf,
208            mime: self.mime,
209            // Placeholder: never invoked in memory mode (clear_and_skip's
210            // AdvanceOnly path is the only one taken).
211            skip_by_seek: |_, _| Ok(false),
212            memory: self.memory,
213        }
214    }
215}
216
217impl MediaSource<std::io::Empty> {
218    /// Build a [`MediaSource`] from an in-memory byte payload.
219    ///
220    /// This is the v3.3 replacement for [`MediaSource::<()>::from_bytes`]
221    /// (which is now `#[deprecated]`). Functionally identical — same
222    /// zero-copy semantics, same accepted input types — but produces a
223    /// `MediaSource<std::io::Empty>` so that the unified `parse_*<R: Read>`
224    /// methods can accept it directly without a separate `_from_bytes`
225    /// sibling.
226    ///
227    /// Accepts any type convertible into [`bytes::Bytes`] — `Bytes`,
228    /// `Vec<u8>`, `&'static [u8]`, `String`, `Box<[u8]>`, and HTTP-stack
229    /// body types that implement `Into<Bytes>` directly.
230    ///
231    /// The header (first up to 128 bytes) is sniffed for media kind, the
232    /// same way [`MediaSource::open`] does it for files. The full payload
233    /// is stored zero-copy: subsequent parsing through
234    /// [`MediaParser::parse_exif`] / [`MediaParser::parse_track`] shares
235    /// this `Bytes` directly with the returned `ExifIter` / sub-IFDs via
236    /// reference counting.
237    ///
238    /// # Example
239    ///
240    /// ```rust
241    /// use nom_exif::{MediaSource, MediaParser, MediaKind};
242    ///
243    /// let bytes = std::fs::read("./testdata/exif.jpg")?;
244    /// let ms = MediaSource::from_memory(bytes)?;
245    /// assert_eq!(ms.kind(), MediaKind::Image);
246    ///
247    /// let mut parser = MediaParser::new();
248    /// let _iter = parser.parse_exif(ms)?;  // unified entry point
249    /// # Ok::<(), nom_exif::Error>(())
250    /// ```
251    pub fn from_memory(bytes: impl Into<bytes::Bytes>) -> crate::Result<Self> {
252        let bytes = bytes.into();
253        let head_end = bytes.len().min(HEADER_PARSE_BUF_SIZE);
254        let mime: MediaMime = bytes[..head_end].try_into()?;
255        Ok(Self {
256            reader: std::io::empty(),
257            buf: Vec::new(),
258            mime,
259            // Placeholder: never invoked in memory mode (AdvanceOnly path).
260            skip_by_seek: |_, _| Ok(false),
261            memory: Some(bytes),
262        })
263    }
264}
265
266// ----- Parse-time buffer policy -----
267//
268// Layered by lifecycle:
269//
270// - `INIT_BUF_SIZE` — first fill into the parse loop and the initial
271//   `Vec::with_capacity` for fresh allocations. Modest so cold one-shot
272//   helpers don't over-commit.
273// - `MIN_GROW_SIZE` — floor for every subsequent fill once we're in deep
274//   parse. Larger than `INIT_BUF_SIZE` to amortize syscalls / async
275//   blocking-pool dispatches.
276// - `MAX_PARSE_BUF_SIZE` — hard cap on cumulative buffer growth during a
277//   single parse. Anything that would push past this is rejected as
278//   `io::ErrorKind::Unsupported`; defense against crafted box/IFD headers
279//   that declare absurd sizes.
280// - `MAX_REUSE_BUF_SIZE` — soft cap on the buffer kept between parses for
281//   recycling. After a parse whose buffer ended above this, `shrink_to`
282//   gives the excess back to the allocator. Tuned for typical metadata
283//   sizes (HEIC Live Photo / large CR3 / IIQ all fit under 4 MB) so the
284//   recycle path stays warm for batch workloads.
285pub(crate) const INIT_BUF_SIZE: usize = 8 * 1024;
286pub(crate) const MIN_GROW_SIZE: usize = 16 * 1024;
287pub(crate) const MAX_PARSE_BUF_SIZE: usize = 1024 * 1024 * 1024;
288const MAX_REUSE_BUF_SIZE: usize = 4 * 1024 * 1024;
289
290pub(crate) trait Buf {
291    fn buffer(&self) -> &[u8];
292    fn clear(&mut self);
293
294    fn set_position(&mut self, pos: usize);
295    #[allow(unused)]
296    fn position(&self) -> usize;
297}
298
299/// Buffer-management state used by `MediaParser` (sync and async paths share it).
300///
301/// Holds at most one *active* `Vec<u8>` (being filled by the current parse) and
302/// one *cached* `Bytes` clone of the most recently shared buffer. When the
303/// next parse starts, the cache is consulted: if `Bytes::try_into_mut`
304/// succeeds the underlying allocation is reused (the previous `ExifIter`
305/// has been dropped); otherwise the clone is discarded and a fresh
306/// `Vec<u8>` is allocated.
307///
308/// This replaces the v2 multi-slot `Buffers` pool — `MediaParser` methods
309/// are `&mut self`, so a single slot is sufficient.
310#[derive(Debug, Default)]
311pub(crate) struct BufferedParserState {
312    cached: Option<bytes::Bytes>,
313    buf: Option<Vec<u8>>,
314    /// P7: memory-mode storage. When `Some`, the parser is feeding from a
315    /// caller-owned `Bytes` instead of streaming via a reader. `buf` and
316    /// `cached` are unused in this mode — the user owns the allocation,
317    /// so there is nothing to recycle.
318    memory: Option<bytes::Bytes>,
319    position: usize,
320}
321
322impl BufferedParserState {
323    pub(crate) fn new() -> Self {
324        Self::default()
325    }
326
327    pub(crate) fn reset(&mut self) {
328        // If a parse failed mid-way the buf may still be present; drop it.
329        // Cache stays — recycle on next acquire if eligible.
330        self.buf = None;
331        self.memory = None;
332        self.position = 0;
333    }
334
335    /// Switch the parser state into memory mode, owning `bytes` directly.
336    /// Caller must have already called `reset()` (asserted in debug). Subsequent
337    /// `share_buf` returns a clone of `bytes` (zero-copy: `Bytes::clone` is a
338    /// refcount bump). Subsequent `Buf::buffer()` returns `&bytes[position..]`.
339    pub(crate) fn set_memory(&mut self, bytes: bytes::Bytes) {
340        debug_assert!(
341            self.buf.is_none() && self.memory.is_none(),
342            "set_memory called on non-clean state"
343        );
344        self.memory = Some(bytes);
345        self.position = 0;
346    }
347
348    pub(crate) fn is_memory_mode(&self) -> bool {
349        self.memory.is_some()
350    }
351
352    pub(crate) fn acquire_buf(&mut self) {
353        if self.memory.is_some() {
354            // Memory mode: nothing to acquire — `buffer()` reads from `memory`.
355            return;
356        }
357        debug_assert!(self.buf.is_none());
358        let buf = match self.cached.take() {
359            Some(b) => match b.try_into_mut() {
360                Ok(bm) => {
361                    let mut v = Vec::<u8>::from(bm);
362                    v.clear();
363                    if v.capacity() > MAX_REUSE_BUF_SIZE {
364                        v.shrink_to(MAX_REUSE_BUF_SIZE);
365                    }
366                    v
367                }
368                Err(_still_shared) => Vec::with_capacity(INIT_BUF_SIZE),
369            },
370            None => Vec::with_capacity(INIT_BUF_SIZE),
371        };
372        self.buf = Some(buf);
373    }
374
375    pub(crate) fn buf(&self) -> &Vec<u8> {
376        self.buf.as_ref().expect("no buf here")
377    }
378
379    pub(crate) fn buf_mut(&mut self) -> &mut Vec<u8> {
380        self.buf.as_mut().expect("no buf here")
381    }
382
383    #[cfg(test)]
384    pub(crate) fn cached_ptr_for_test(&self) -> Option<*const u8> {
385        self.cached.as_ref().map(|b| b.as_ptr())
386    }
387
388    #[cfg(test)]
389    pub(crate) fn buf_is_none_for_test(&self) -> bool {
390        self.buf.is_none()
391    }
392}
393
394impl Buf for BufferedParserState {
395    fn buffer(&self) -> &[u8] {
396        if let Some(m) = &self.memory {
397            return &m[self.position..];
398        }
399        &self.buf()[self.position..]
400    }
401    fn clear(&mut self) {
402        // In memory mode `clear` is a no-op: there is no scratch buffer to
403        // truncate, and the caller's bytes must remain available for further
404        // parse_loop_step iterations. clear_and_skip's AdvanceOnly path is
405        // what advances `position` in memory mode.
406        if self.memory.is_some() {
407            return;
408        }
409        self.buf_mut().clear();
410    }
411    fn set_position(&mut self, pos: usize) {
412        self.position = pos;
413    }
414    fn position(&self) -> usize {
415        self.position
416    }
417}
418
419impl ShareBuf for BufferedParserState {
420    fn share_buf(&mut self) -> (bytes::Bytes, usize) {
421        if let Some(m) = self.memory.take() {
422            // Zero-copy share: caller already owns the allocation. No cache
423            // write — recycle is irrelevant when the user holds the alloc.
424            let position = self.position;
425            return (m, position);
426        }
427        let vec = self.buf.take().expect("no buf to share");
428        let bytes = bytes::Bytes::from(vec);
429        let position = self.position;
430        self.cached = Some(bytes.clone());
431        (bytes, position)
432    }
433}
434
435/// What `clear_and_skip` should do, given the current buffer state and
436/// the requested skip count.
437pub(crate) enum SkipPlan {
438    /// Skip is fully within the current buffer; just advance position.
439    AdvanceOnly,
440    /// Buffer must be cleared and `extra` bytes skipped from the reader.
441    ClearAndSkip { extra: usize },
442}
443
444pub(crate) fn clear_and_skip_decide(buffer_len: usize, n: usize) -> SkipPlan {
445    if n <= buffer_len {
446        SkipPlan::AdvanceOnly
447    } else {
448        SkipPlan::ClearAndSkip {
449            extra: n - buffer_len,
450        }
451    }
452}
453
454pub(crate) fn check_fill_size(existing_len: usize, requested: usize) -> io::Result<()> {
455    if requested.saturating_add(existing_len) > MAX_PARSE_BUF_SIZE {
456        tracing::error!(?requested, "the requested buffer size is too big");
457        return Err(io::ErrorKind::Unsupported.into());
458    }
459    Ok(())
460}
461
462pub(crate) enum LoopAction<O> {
463    /// Parse succeeded; return this value to the caller.
464    Done(O),
465    /// Need more bytes — call `fill_buf(reader, n)` then re-step.
466    NeedFill(usize),
467    /// Need to skip bytes — call `clear_and_skip(reader, n)` then re-step.
468    Skip(usize),
469    /// Parse failed permanently. Carries the structural-unit kind so
470    /// the eventual `Error::Malformed` is labelled correctly.
471    Failed {
472        kind: crate::error::MalformedKind,
473        message: String,
474    },
475}
476
477/// Closure type passed to [`parse_loop_step`].
478pub(crate) type ParseFn<'a, O> =
479    dyn FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState> + 'a;
480
481/// Drives one iteration of the parse-loop algorithm. Pure (no I/O).
482pub(crate) fn parse_loop_step<O>(
483    buffer: &[u8],
484    offset: usize,
485    parsing_state: &mut Option<ParsingState>,
486    parse: &mut ParseFn<'_, O>,
487) -> LoopAction<O> {
488    match parse(buffer, offset, parsing_state.take()) {
489        Ok(o) => LoopAction::Done(o),
490        Err(es) => {
491            *parsing_state = es.state;
492            match es.err {
493                ParsingError::Need(n) => LoopAction::NeedFill(n),
494                ParsingError::ClearAndSkip(n) => LoopAction::Skip(n),
495                ParsingError::Failed { kind, message } => LoopAction::Failed { kind, message },
496            }
497        }
498    }
499}
500
501#[derive(Debug, Clone)]
502pub(crate) enum ParsingState {
503    TiffHeader(TiffHeader),
504    HeifExifSize(usize),
505    Cr3ExifSize(usize),
506    /// PNG chunk walker has already validated the 8-byte signature.
507    /// Carried across `Need` / `ClearAndSkip` retries so the resumed
508    /// call doesn't re-check signature against a mid-stream slice.
509    PngPastSignature,
510}
511
512impl Display for ParsingState {
513    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
514        match self {
515            ParsingState::TiffHeader(h) => Display::fmt(&format!("ParsingState: {h:?})"), f),
516            ParsingState::HeifExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
517            ParsingState::Cr3ExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
518            ParsingState::PngPastSignature => f.write_str("ParsingState: PngPastSignature"),
519        }
520    }
521}
522
523// Modern replacement for the `Load` trait in loader.rs. Adds offset-aware
524// parsing and `ParsingState` threading for format-specific state machines.
525pub(crate) trait BufParser: Buf + Debug {
526    fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize>;
527
528    fn load_and_parse<R: Read, P, O>(
529        &mut self,
530        reader: &mut R,
531        skip_by_seek: SkipBySeekFn<R>,
532        mut parse: P,
533    ) -> Result<O, ParsedError>
534    where
535        P: FnMut(&[u8], Option<ParsingState>) -> Result<O, ParsingErrorState>,
536    {
537        self.load_and_parse_with_offset(
538            reader,
539            skip_by_seek,
540            |data, _, state| parse(data, state),
541            0,
542        )
543    }
544
545    #[tracing::instrument(skip_all)]
546    fn load_and_parse_with_offset<R: Read, P, O>(
547        &mut self,
548        reader: &mut R,
549        skip_by_seek: SkipBySeekFn<R>,
550        mut parse: P,
551        offset: usize,
552    ) -> Result<O, ParsedError>
553    where
554        P: FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState>,
555    {
556        if offset >= self.buffer().len() {
557            self.fill_buf(reader, MIN_GROW_SIZE)?;
558        }
559        let mut parsing_state: Option<ParsingState> = None;
560        loop {
561            match parse_loop_step(self.buffer(), offset, &mut parsing_state, &mut parse) {
562                LoopAction::Done(o) => return Ok(o),
563                LoopAction::NeedFill(needed) => {
564                    let to_read = max(needed, MIN_GROW_SIZE);
565                    let n = self.fill_buf(reader, to_read)?;
566                    if n == 0 {
567                        return Err(ParsedError::NoEnoughBytes);
568                    }
569                }
570                LoopAction::Skip(n) => {
571                    self.clear_and_skip(reader, skip_by_seek, n)?;
572                }
573                LoopAction::Failed { kind, message } => {
574                    return Err(ParsedError::Failed { kind, message })
575                }
576            }
577        }
578    }
579
580    #[tracing::instrument(skip(reader, skip_by_seek))]
581    fn clear_and_skip<R: Read>(
582        &mut self,
583        reader: &mut R,
584        skip_by_seek: SkipBySeekFn<R>,
585        n: usize,
586    ) -> Result<(), ParsedError> {
587        match clear_and_skip_decide(self.buffer().len(), n) {
588            SkipPlan::AdvanceOnly => {
589                self.set_position(self.position() + n);
590                Ok(())
591            }
592            SkipPlan::ClearAndSkip { extra: skip_n } => {
593                self.clear();
594                let done = (skip_by_seek)(
595                    reader,
596                    skip_n.try_into().map_err(|_| ParsedError::Failed {
597                        // No format context available here: the parser
598                        // hit an internal limit honoring a caller's skip.
599                        // Pick a sensible default — see #55 follow-up.
600                        kind: crate::error::MalformedKind::IsoBmffBox,
601                        message: "skip too many bytes".into(),
602                    })?,
603                )?;
604                if !done {
605                    let mut skipped = 0;
606                    while skipped < skip_n {
607                        let mut to_skip = skip_n - skipped;
608                        to_skip = min(to_skip, MAX_PARSE_BUF_SIZE);
609                        let n = self.fill_buf(reader, to_skip)?;
610                        skipped += n;
611                        if skipped <= skip_n {
612                            self.clear();
613                        } else {
614                            let remain = skipped - skip_n;
615                            self.set_position(self.buffer().len() - remain);
616                            break;
617                        }
618                    }
619                }
620
621                if self.buffer().is_empty() {
622                    self.fill_buf(reader, MIN_GROW_SIZE)?;
623                }
624                Ok(())
625            }
626        }
627    }
628}
629
630impl BufParser for MediaParser {
631    #[tracing::instrument(skip(self, reader), fields(buf_len=self.state.buffer().len()))]
632    fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize> {
633        if self.state.is_memory_mode() {
634            // Memory mode owns every byte it will ever have. A request for
635            // more is "the parser walked off the end of the input"; surface
636            // it the same way the streaming path surfaces a 0-byte read.
637            return Err(std::io::ErrorKind::UnexpectedEof.into());
638        }
639        check_fill_size(self.state.buf().len(), size)?;
640
641        // Do not pre-allocate `size` bytes: a crafted box header can declare a
642        // huge extended size (up to MAX_PARSE_BUF_SIZE) that far exceeds the actual
643        // stream length. reserve_exact would allocate that memory immediately
644        // even when the reader has only a few bytes left. read_to_end grows the
645        // buffer from the reader's actual size hint instead.
646        let n = reader.take(size as u64).read_to_end(self.state.buf_mut())?;
647        if n == 0 {
648            tracing::error!(buf_len = self.state.buf().len(), "fill_buf: EOF");
649            return Err(std::io::ErrorKind::UnexpectedEof.into());
650        }
651
652        tracing::debug!(
653            ?size,
654            ?n,
655            buf_len = self.state.buf().len(),
656            "fill_buf: read bytes"
657        );
658
659        Ok(n)
660    }
661}
662
663impl Buf for MediaParser {
664    fn buffer(&self) -> &[u8] {
665        self.state.buffer()
666    }
667
668    fn clear(&mut self) {
669        self.state.clear();
670    }
671
672    fn set_position(&mut self, pos: usize) {
673        self.state.set_position(pos);
674    }
675
676    fn position(&self) -> usize {
677        self.state.position()
678    }
679}
680
681/// A `MediaParser` can parse media info from a [`MediaSource`].
682///
683/// `MediaParser` manages inner parse buffers that can be shared between
684/// multiple parsing tasks, thus avoiding frequent memory allocations.
685///
686/// Therefore:
687///
688/// - Try to reuse a `MediaParser` instead of creating a new one every time
689///   you need it.
690///
691/// - `MediaSource` should be created directly from `Read`, not from `BufRead`.
692///
693/// ## Example
694///
695/// ```rust
696/// use nom_exif::*;
697/// use chrono::DateTime;
698///
699/// let mut parser = MediaParser::new();
700///
701/// // ------------------- Parse Exif Info
702/// let ms = MediaSource::open("./testdata/exif.heic").unwrap();
703/// assert_eq!(ms.kind(), MediaKind::Image);
704/// let mut iter = parser.parse_exif(ms).unwrap();
705///
706/// let entry = iter.next().unwrap();
707/// assert!(matches!(entry.tag(), nom_exif::TagOrCode::Tag(ExifTag::Make)));
708/// assert_eq!(entry.value().unwrap().as_str().unwrap(), "Apple");
709///
710/// // Convert `ExifIter` into an `Exif`. Clone it before converting, so that
711/// // we can start the iteration from the beginning.
712/// let exif: Exif = iter.clone().into();
713/// assert_eq!(exif.get(ExifTag::Make).unwrap().as_str().unwrap(), "Apple");
714///
715/// // ------------------- Parse Track Info
716/// let ms = MediaSource::open("./testdata/meta.mov").unwrap();
717/// assert_eq!(ms.kind(), MediaKind::Track);
718/// let info = parser.parse_track(ms).unwrap();
719///
720/// assert_eq!(info.get(TrackInfoTag::Make), Some(&"Apple".into()));
721/// assert_eq!(info.get(TrackInfoTag::Model), Some(&"iPhone X".into()));
722/// assert_eq!(info.get(TrackInfoTag::GpsIso6709), Some(&"+27.1281+100.2508+000.000/".into()));
723/// assert_eq!(info.gps_info().unwrap().latitude_ref, LatRef::North);
724/// assert_eq!(
725///     info.gps_info().unwrap().latitude,
726///     LatLng::new(URational::new(27, 1), URational::new(7, 1), URational::new(4116, 100)),
727/// );
728/// ```
729pub struct MediaParser {
730    state: BufferedParserState,
731}
732
733impl Debug for MediaParser {
734    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
735        f.debug_struct("MediaParser")
736            .field("state", &self.state)
737            .finish_non_exhaustive()
738    }
739}
740
741impl Default for MediaParser {
742    fn default() -> Self {
743        Self {
744            state: BufferedParserState::new(),
745        }
746    }
747}
748
749pub(crate) trait ShareBuf {
750    /// Take ownership of the parser's active buffer and return the full
751    /// allocation as `Bytes` plus the parser's `position` at share-time.
752    /// Caller is responsible for slicing: a parse-loop range `r` corresponds
753    /// to absolute range `(r.start + position)..(r.end + position)`.
754    fn share_buf(&mut self) -> (bytes::Bytes, usize);
755}
756
757impl ShareBuf for MediaParser {
758    fn share_buf(&mut self) -> (bytes::Bytes, usize) {
759        self.state.share_buf()
760    }
761}
762
763impl MediaParser {
764    pub fn new() -> Self {
765        Self::default()
766    }
767
768    /// Parse Exif metadata from an image source. Returns `Error::ExifNotFound`
769    /// if the source is a `Track` (use [`Self::parse_track`] instead).
770    ///
771    /// As of v3.3, this method also accepts memory-mode sources built via
772    /// [`MediaSource::from_memory`]. The deprecated [`Self::parse_exif_from_bytes`]
773    /// is now a thin adapter that delegates here.
774    ///
775    /// `MediaParser` reuses its internal parse buffer across calls, so prefer
776    /// reusing a single `MediaParser` over creating a new one per file. Drop
777    /// the returned [`ExifIter`] (or convert it into [`crate::Exif`]) before
778    /// the next `parse_*` call so the buffer can be reclaimed.
779    pub fn parse_exif<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<ExifIter> {
780        self.reset();
781        let res: crate::Result<ExifIter> = (|| {
782            if let Some(memory) = ms.memory.take() {
783                // Memory-mode: zero-copy share of caller-owned bytes.
784                self.state.set_memory(memory);
785                if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
786                    return Err(crate::Error::ExifNotFound);
787                }
788                crate::exif::parse_exif_iter(
789                    self,
790                    ms.mime.unwrap_image(),
791                    &mut ms.reader,
792                    ms.skip_by_seek,
793                )
794            } else {
795                // Streaming-mode: existing path verbatim.
796                self.acquire_buf();
797                self.buf_mut().append(&mut ms.buf);
798                // PNG-only EOF tolerance: a tEXt-only PNG can be smaller
799                // than HEADER_PARSE_BUF_SIZE (e.g. 117-byte text-only.png),
800                // so the mime-detection prefill consumes the whole reader
801                // and fill_buf returns UnexpectedEof. The bytes we need
802                // are already in the parse buffer — proceed. Other formats
803                // keep the strict-EOF contract.
804                let is_png = matches!(
805                    ms.mime,
806                    crate::file::MediaMime::Image(crate::file::MediaMimeImage::Png)
807                );
808                match self.fill_buf(&mut ms.reader, INIT_BUF_SIZE) {
809                    Ok(_) => {}
810                    Err(e)
811                        if is_png
812                            && !self.buffer().is_empty()
813                            && e.kind() == io::ErrorKind::UnexpectedEof => {}
814                    Err(e) => return Err(e.into()),
815                }
816                if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
817                    return Err(crate::Error::ExifNotFound);
818                }
819                crate::exif::parse_exif_iter(
820                    self,
821                    ms.mime.unwrap_image(),
822                    &mut ms.reader,
823                    ms.skip_by_seek,
824                )
825            }
826        })();
827        self.reset();
828        res
829    }
830
831    /// Parse track info from a video/audio source.
832    ///
833    /// Parse track info from a video/audio source.
834    ///
835    /// In v3.1, this also accepts JPEG images that carry an embedded
836    /// Pixel/Google Motion Photo trailer. As of v3.3, it also accepts
837    /// memory-mode sources built via [`MediaSource::from_memory`]; the
838    /// deprecated [`Self::parse_track_from_bytes`] is now a thin
839    /// adapter that delegates here.
840    pub fn parse_track<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<TrackInfo> {
841        self.reset();
842        let res: crate::Result<TrackInfo> = (|| {
843            if let Some(memory) = ms.memory.take() {
844                // Memory mode: zero-copy.
845                self.state.set_memory(memory);
846                let mime_track = match ms.mime {
847                    crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
848                    crate::file::MediaMime::Track(t) => t,
849                };
850                let out = self.load_and_parse(&mut ms.reader, ms.skip_by_seek, |data, _| {
851                    crate::video::parse_track_info(data, mime_track)
852                        .map_err(|e| ParsingErrorState::new(e, None))
853                })?;
854                Ok(out)
855            } else {
856                // Streaming mode: existing path verbatim.
857                self.acquire_buf();
858                self.buf_mut().append(&mut ms.buf);
859                self.fill_buf(&mut ms.reader, INIT_BUF_SIZE)?;
860                match ms.mime {
861                    crate::file::MediaMime::Image(crate::file::MediaMimeImage::Jpeg) => {
862                        self.parse_jpeg_motion_photo(&mut ms.reader)
863                    }
864                    crate::file::MediaMime::Image(_) => Err(crate::Error::TrackNotFound),
865                    crate::file::MediaMime::Track(mime_track) => {
866                        let skip = ms.skip_by_seek;
867                        Ok(self.load_and_parse(ms.reader.by_ref(), skip, |data, _| {
868                            crate::video::parse_track_info(data, mime_track)
869                                .map_err(|e| ParsingErrorState::new(e, None))
870                        })?)
871                    }
872                }
873            }
874        })();
875        self.reset();
876        res
877    }
878
879    /// Read a JPEG to EOF, locate a Pixel-style Motion Photo MP4 trailer,
880    /// and parse it as track metadata. Returns
881    /// [`crate::Error::TrackNotFound`] if no Motion Photo signal is
882    /// present in the JPEG's XMP.
883    fn parse_jpeg_motion_photo<R: Read>(&mut self, reader: &mut R) -> crate::Result<TrackInfo> {
884        // Drain the rest of the JPEG into the parse buffer so we can
885        // address the trailing MP4 by its byte offset from EOF.
886        reader.read_to_end(self.buf_mut())?;
887        let buf = self.buf_mut();
888        let Some(offset) = crate::jpeg::find_motion_photo_offset(buf) else {
889            return Err(crate::Error::TrackNotFound);
890        };
891        let trailer_start = (buf.len() as u64)
892            .checked_sub(offset)
893            .ok_or(crate::Error::TrackNotFound)? as usize;
894        let trailer = &buf[trailer_start..];
895
896        // The trailer can be MP4 / MOV / 3gp depending on the source device;
897        // dispatch by sniffing it as a fresh ISO BMFF input.
898        let trailer_mime =
899            crate::file::MediaMime::try_from(trailer).map_err(|_| crate::Error::TrackNotFound)?;
900        let mime_track = match trailer_mime {
901            crate::file::MediaMime::Track(t) => t,
902            crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
903        };
904        crate::video::parse_track_info(trailer, mime_track).map_err(|e| match e {
905            crate::error::ParsingError::Need(_) | crate::error::ParsingError::ClearAndSkip(_) => {
906                crate::Error::UnexpectedEof {
907                    context: "motion-photo trailer",
908                }
909            }
910            crate::error::ParsingError::Failed { kind, message } => {
911                crate::Error::Malformed { kind, message }
912            }
913        })
914    }
915
916    /// Parse Exif metadata from an in-memory byte payload built via
917    /// the deprecated [`MediaSource::<()>::from_bytes`].
918    ///
919    /// **Deprecated since v3.3.0**: use [`Self::parse_exif`] with
920    /// [`MediaSource::from_memory`] directly.
921    #[deprecated(
922        since = "3.3.0",
923        note = "Use `parse_exif` directly — it now accepts memory-mode \
924                sources built via `MediaSource::from_memory`."
925    )]
926    pub fn parse_exif_from_bytes(&mut self, ms: MediaSource<()>) -> crate::Result<ExifIter> {
927        self.parse_exif(ms.into_empty())
928    }
929
930    /// **Deprecated since v3.3.0**: use [`Self::parse_track`] with
931    /// [`MediaSource::from_memory`] directly.
932    #[deprecated(
933        since = "3.3.0",
934        note = "Use `parse_track` with `MediaSource::from_memory`."
935    )]
936    pub fn parse_track_from_bytes(&mut self, ms: MediaSource<()>) -> crate::Result<TrackInfo> {
937        self.parse_track(ms.into_empty())
938    }
939
940    /// Parse all metadata from an image source: EXIF (if any) and
941    /// format-specific extras (PNG `tEXt` chunks, etc.).
942    ///
943    /// Returns `Err(Error::ExifNotFound)` if neither EXIF nor any
944    /// format-specific metadata is found. Returns
945    /// `Err(Error::TrackNotFound)`-style errors on track inputs (use
946    /// `parse_track` instead).
947    ///
948    /// **Lazy form** — this method returns `ImageMetadata<ExifIter>`.
949    /// Convert to the eager `ImageMetadata<Exif>` via `.into()` if
950    /// desired.
951    pub fn parse_image_metadata<R: Read>(
952        &mut self,
953        mut ms: MediaSource<R>,
954    ) -> crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> {
955        self.reset();
956        let res: crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> = (|| {
957            // Reject track inputs early (parse_track is the right API).
958            let mime_img = match ms.mime {
959                crate::file::MediaMime::Image(img) => img,
960                crate::file::MediaMime::Track(_) => return Err(crate::Error::ExifNotFound),
961            };
962
963            // Memory-mode shortcut + buffer setup mirrors parse_exif.
964            if let Some(memory) = ms.memory.take() {
965                self.state.set_memory(memory);
966            } else {
967                self.acquire_buf();
968                self.buf_mut().append(&mut ms.buf);
969                // PNG-only EOF tolerance: a tEXt-only PNG can be smaller than
970                // HEADER_PARSE_BUF_SIZE (e.g. 117-byte text-only.png) so the
971                // mime-detection prefill consumes the whole reader and
972                // fill_buf returns UnexpectedEof. The bytes we need are
973                // already in the parse buffer — proceed. Other formats keep
974                // the strict-EOF contract.
975                let is_png = mime_img == crate::file::MediaMimeImage::Png;
976                match self.fill_buf(&mut ms.reader, INIT_BUF_SIZE) {
977                    Ok(_) => {}
978                    Err(e)
979                        if is_png
980                            && !self.buffer().is_empty()
981                            && e.kind() == io::ErrorKind::UnexpectedEof => {}
982                    Err(e) => return Err(e.into()),
983                }
984            }
985
986            if mime_img == crate::file::MediaMimeImage::Png {
987                let (exif, text_chunks) =
988                    crate::exif::parse_png_full(self, &mut ms.reader, ms.skip_by_seek)?;
989                let format = if text_chunks.is_empty() {
990                    None
991                } else {
992                    Some(crate::ImageFormatMetadata::Png(crate::PngTextChunks {
993                        entries: text_chunks,
994                    }))
995                };
996                if exif.is_none() && format.is_none() {
997                    return Err(crate::Error::ExifNotFound);
998                }
999                Ok(crate::ImageMetadata { exif, format })
1000            } else {
1001                // Non-PNG: existing parse_exif_iter path; format always None.
1002                let iter =
1003                    crate::exif::parse_exif_iter(self, mime_img, &mut ms.reader, ms.skip_by_seek)?;
1004                Ok(crate::ImageMetadata {
1005                    exif: Some(iter),
1006                    format: None,
1007                })
1008            }
1009        })(
1010        );
1011        self.reset();
1012        res
1013    }
1014
1015    fn reset(&mut self) {
1016        self.state.reset();
1017    }
1018
1019    fn buf_mut(&mut self) -> &mut Vec<u8> {
1020        self.state.buf_mut()
1021    }
1022
1023    fn acquire_buf(&mut self) {
1024        self.state.acquire_buf();
1025    }
1026}
1027
1028#[cfg(feature = "tokio")]
1029mod tokio_impl {
1030    use super::*;
1031    use crate::error::ParsingErrorState;
1032    use crate::parser_async::{AsyncBufParser, AsyncMediaSource};
1033    use tokio::io::{AsyncRead, AsyncReadExt};
1034
1035    impl AsyncBufParser for MediaParser {
1036        async fn fill_buf<R: AsyncRead + Unpin>(
1037            &mut self,
1038            reader: &mut R,
1039            size: usize,
1040        ) -> std::io::Result<usize> {
1041            if self.state.is_memory_mode() {
1042                // Memory mode owns every byte it will ever have. Surface
1043                // "walked off end of input" the same way the streaming path
1044                // surfaces a 0-byte read.
1045                return Err(std::io::ErrorKind::UnexpectedEof.into());
1046            }
1047            check_fill_size(self.state.buf().len(), size)?;
1048            // Same rationale as the sync version: do not pre-allocate `size` bytes.
1049            let n = reader
1050                .take(size as u64)
1051                .read_to_end(self.state.buf_mut())
1052                .await?;
1053            if n == 0 {
1054                return Err(std::io::ErrorKind::UnexpectedEof.into());
1055            }
1056            Ok(n)
1057        }
1058    }
1059
1060    impl MediaParser {
1061        /// Parse Exif metadata from an async image source. Returns
1062        /// `Error::ExifNotFound` if the source is a `Track`.
1063        ///
1064        /// As of v3.3, also accepts memory-mode sources built via
1065        /// [`AsyncMediaSource::from_memory`]; the memory branch shares
1066        /// caller-owned `Bytes` zero-copy through `state.set_memory`.
1067        pub async fn parse_exif_async<R: AsyncRead + Unpin + Send>(
1068            &mut self,
1069            mut ms: AsyncMediaSource<R>,
1070        ) -> crate::Result<ExifIter> {
1071            self.reset();
1072            let res: crate::Result<ExifIter> = async {
1073                if let Some(memory) = ms.memory.take() {
1074                    // Memory-mode: zero-copy share of caller-owned bytes.
1075                    self.state.set_memory(memory);
1076                    if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
1077                        return Err(crate::Error::ExifNotFound);
1078                    }
1079                    crate::exif::parse_exif_iter_async(
1080                        self,
1081                        ms.mime.unwrap_image(),
1082                        &mut ms.reader,
1083                        ms.skip_by_seek,
1084                    )
1085                    .await
1086                } else {
1087                    self.acquire_buf();
1088                    self.buf_mut().append(&mut ms.buf);
1089                    // PNG-only EOF tolerance mirrors the sync path: small
1090                    // tEXt-only PNGs (<HEADER_PARSE_BUF_SIZE) are fully
1091                    // consumed during mime detection, so fill_buf returns
1092                    // UnexpectedEof. The bytes are already in the parse
1093                    // buffer; proceed.
1094                    let is_png = matches!(
1095                        ms.mime,
1096                        crate::file::MediaMime::Image(crate::file::MediaMimeImage::Png)
1097                    );
1098                    match <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE)
1099                        .await
1100                    {
1101                        Ok(_) => {}
1102                        Err(e)
1103                            if is_png
1104                                && !self.buffer().is_empty()
1105                                && e.kind() == io::ErrorKind::UnexpectedEof => {}
1106                        Err(e) => return Err(e.into()),
1107                    }
1108                    if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
1109                        return Err(crate::Error::ExifNotFound);
1110                    }
1111                    crate::exif::parse_exif_iter_async(
1112                        self,
1113                        ms.mime.unwrap_image(),
1114                        &mut ms.reader,
1115                        ms.skip_by_seek,
1116                    )
1117                    .await
1118                }
1119            }
1120            .await;
1121            self.reset();
1122            res
1123        }
1124
1125        pub async fn parse_image_metadata_async<R: AsyncRead + Unpin + Send>(
1126            &mut self,
1127            mut ms: AsyncMediaSource<R>,
1128        ) -> crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> {
1129            self.reset();
1130            let res: crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> = async {
1131                let mime_img = match ms.mime {
1132                    crate::file::MediaMime::Image(img) => img,
1133                    crate::file::MediaMime::Track(_) => return Err(crate::Error::ExifNotFound),
1134                };
1135
1136                if let Some(memory) = ms.memory.take() {
1137                    self.state.set_memory(memory);
1138                } else {
1139                    self.acquire_buf();
1140                    self.buf_mut().append(&mut ms.buf);
1141                    // PNG-only EOF tolerance mirrors the sync path: small
1142                    // tEXt-only PNGs (<HEADER_PARSE_BUF_SIZE) are fully
1143                    // consumed during mime detection, so fill_buf returns
1144                    // UnexpectedEof; the bytes we need are already in the
1145                    // parse buffer.
1146                    let is_png = mime_img == crate::file::MediaMimeImage::Png;
1147                    match <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE)
1148                        .await
1149                    {
1150                        Ok(_) => {}
1151                        Err(e)
1152                            if is_png
1153                                && !self.buffer().is_empty()
1154                                && e.kind() == io::ErrorKind::UnexpectedEof => {}
1155                        Err(e) => return Err(e.into()),
1156                    }
1157                }
1158
1159                if mime_img == crate::file::MediaMimeImage::Png {
1160                    let (exif, text_chunks) =
1161                        crate::exif::parse_png_full_async(self, &mut ms.reader, ms.skip_by_seek)
1162                            .await?;
1163                    let format = if text_chunks.is_empty() {
1164                        None
1165                    } else {
1166                        Some(crate::ImageFormatMetadata::Png(crate::PngTextChunks {
1167                            entries: text_chunks,
1168                        }))
1169                    };
1170                    if exif.is_none() && format.is_none() {
1171                        return Err(crate::Error::ExifNotFound);
1172                    }
1173                    Ok(crate::image_metadata::ImageMetadata { exif, format })
1174                } else {
1175                    let iter = crate::exif::parse_exif_iter_async(
1176                        self,
1177                        mime_img,
1178                        &mut ms.reader,
1179                        ms.skip_by_seek,
1180                    )
1181                    .await?;
1182                    Ok(crate::image_metadata::ImageMetadata {
1183                        exif: Some(iter),
1184                        format: None,
1185                    })
1186                }
1187            }
1188            .await;
1189            self.reset();
1190            res
1191        }
1192
1193        /// Parse track info from an async video/audio source. Returns
1194        /// `Error::TrackNotFound` if the source is an `Image`.
1195        ///
1196        /// As of v3.3, also accepts memory-mode sources built via
1197        /// [`AsyncMediaSource::from_memory`].
1198        pub async fn parse_track_async<R: AsyncRead + Unpin + Send>(
1199            &mut self,
1200            mut ms: AsyncMediaSource<R>,
1201        ) -> crate::Result<TrackInfo> {
1202            self.reset();
1203            let res: crate::Result<TrackInfo> = async {
1204                let mime_track = match ms.mime {
1205                    crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
1206                    crate::file::MediaMime::Track(t) => t,
1207                };
1208                if let Some(memory) = ms.memory.take() {
1209                    self.state.set_memory(memory);
1210                } else {
1211                    self.acquire_buf();
1212                    self.buf_mut().append(&mut ms.buf);
1213                    <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE).await?;
1214                }
1215                let skip = ms.skip_by_seek;
1216                let out = <Self as AsyncBufParser>::load_and_parse(
1217                    self,
1218                    &mut ms.reader,
1219                    skip,
1220                    |data, _| {
1221                        crate::video::parse_track_info(data, mime_track)
1222                            .map_err(|e| ParsingErrorState::new(e, None))
1223                    },
1224                )
1225                .await?;
1226                Ok(out)
1227            }
1228            .await;
1229            self.reset();
1230            res
1231        }
1232    }
1233}
1234
1235#[cfg(test)]
1236mod tests {
1237    use std::sync::{LazyLock, Mutex, MutexGuard};
1238
1239    use super::*;
1240    use test_case::case;
1241
1242    enum TrackExif {
1243        Track,
1244        Exif,
1245        NoData,
1246        Invalid,
1247    }
1248    use TrackExif::*;
1249
1250    static PARSER: LazyLock<Mutex<MediaParser>> = LazyLock::new(|| Mutex::new(MediaParser::new()));
1251    fn parser() -> MutexGuard<'static, MediaParser> {
1252        PARSER.lock().unwrap()
1253    }
1254
1255    #[case("3gp_640x360.3gp", Track)]
1256    #[case("broken.jpg", Exif)]
1257    #[case("compatible-brands-fail.heic", Invalid)]
1258    #[case("compatible-brands-fail.mov", Invalid)]
1259    #[case("compatible-brands.heic", NoData)]
1260    #[case("compatible-brands.mov", NoData)]
1261    #[case("embedded-in-heic.mov", Track)]
1262    #[case("exif.heic", Exif)]
1263    #[case("exif.jpg", Exif)]
1264    #[case("exif-no-tz.jpg", Exif)]
1265    #[case("fujifilm_x_t1_01.raf.meta", Exif)]
1266    #[case("meta.mov", Track)]
1267    #[case("meta.mp4", Track)]
1268    #[case("mka.mka", Track)]
1269    #[case("mkv_640x360.mkv", Track)]
1270    #[case("exif-one-entry.heic", Exif)]
1271    #[case("no-exif.jpg", NoData)]
1272    #[case("tif.tif", Exif)]
1273    #[case("ramdisk.img", Invalid)]
1274    #[case("webm_480.webm", Track)]
1275    fn parse_media(path: &str, te: TrackExif) {
1276        let mut parser = parser();
1277        let ms = MediaSource::open(Path::new("testdata").join(path));
1278        match te {
1279            Track => {
1280                let ms = ms.unwrap();
1281                assert_eq!(ms.kind(), MediaKind::Track);
1282                let _: TrackInfo = parser.parse_track(ms).unwrap();
1283            }
1284            Exif => {
1285                let ms = ms.unwrap();
1286                assert_eq!(ms.kind(), MediaKind::Image);
1287                let mut it: ExifIter = parser.parse_exif(ms).unwrap();
1288                let _ = it.parse_gps();
1289
1290                if path.contains("one-entry") {
1291                    assert!(it.next().is_some());
1292                    assert!(it.next().is_none());
1293
1294                    let exif: crate::Exif = it.clone_rewound().into();
1295                    assert!(exif.get(ExifTag::Orientation).is_some());
1296                } else {
1297                    let _: crate::Exif = it.clone_rewound().into();
1298                }
1299            }
1300            NoData => {
1301                let ms = ms.unwrap();
1302                match ms.kind() {
1303                    MediaKind::Image => {
1304                        let res = parser.parse_exif(ms);
1305                        res.unwrap_err();
1306                    }
1307                    MediaKind::Track => {
1308                        let res = parser.parse_track(ms);
1309                        res.unwrap_err();
1310                    }
1311                }
1312            }
1313            Invalid => {
1314                ms.unwrap_err();
1315            }
1316        }
1317    }
1318
1319    use crate::testkit::open_sample;
1320    use crate::{EntryValue, Exif, ExifTag, TrackInfoTag};
1321    use chrono::{DateTime, FixedOffset, NaiveDateTime};
1322    use test_case::test_case;
1323
1324    #[test_case("exif.jpg", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2023-07-09T20:36:33+08:00", "%+").unwrap().into())]
1325    #[test_case("exif.heic", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2022-07-22T21:26:32+08:00", "%+").unwrap().into())]
1326    #[test_case("exif.jpg", ExifTag::DateTimeOriginal, 
1327        (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), 
1328            Some(FixedOffset::east_opt(8*3600).unwrap())).into())]
1329    #[test_case("exif-no-tz.jpg", ExifTag::DateTimeOriginal, 
1330        (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), None).into())]
1331    fn parse_exif(path: &str, tag: ExifTag, v: EntryValue) {
1332        let mut parser = parser();
1333
1334        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1335        assert_eq!(mf.kind(), MediaKind::Image);
1336        let iter: ExifIter = parser.parse_exif(mf).unwrap();
1337        let exif: Exif = iter.into();
1338        assert_eq!(exif.get(tag).unwrap(), &v);
1339
1340        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1341        assert_eq!(mf.kind(), MediaKind::Image);
1342        let iter: ExifIter = parser.parse_exif(mf).unwrap();
1343        let exif: Exif = iter.into();
1344        assert_eq!(exif.get(tag).unwrap(), &v);
1345    }
1346
1347    use crate::video::TrackInfoTag::*;
1348
1349    #[test_case("mkv_640x360.mkv", Width, 640_u32.into())]
1350    #[test_case("mkv_640x360.mkv", Height, 360_u32.into())]
1351    #[test_case("mkv_640x360.mkv", DurationMs, 13346_u64.into())]
1352    #[test_case("mkv_640x360.mkv", CreateDate, DateTime::parse_from_str("2008-08-08T08:08:08Z", "%+").unwrap().into())]
1353    #[test_case("meta.mov", Make, "Apple".into())]
1354    #[test_case("meta.mov", Model, "iPhone X".into())]
1355    #[test_case("meta.mov", GpsIso6709, "+27.1281+100.2508+000.000/".into())]
1356    #[test_case("meta.mov", CreateDate, DateTime::parse_from_str("2019-02-12T15:27:12+08:00", "%+").unwrap().into())]
1357    #[test_case("meta.mp4", Width, 1920_u32.into())]
1358    #[test_case("meta.mp4", Height, 1080_u32.into())]
1359    #[test_case("meta.mp4", DurationMs, 1063_u64.into())]
1360    #[test_case("meta.mp4", GpsIso6709, "+27.2939+112.6932/".into())]
1361    #[test_case("meta.mp4", CreateDate, DateTime::parse_from_str("2024-02-03T07:05:38Z", "%+").unwrap().into())]
1362    #[test_case("udta.auth.mp4", Author, "ReplayKitRecording".into(); "udta author")]
1363    #[test_case("auth.mov", Author, "ReplayKitRecording".into(); "mov author")]
1364    #[test_case("sony-a7-xavc.MP4", Width, 1920_u32.into())]
1365    #[test_case("sony-a7-xavc.MP4", Height, 1080_u32.into())]
1366    #[test_case("sony-a7-xavc.MP4", DurationMs, 1440_u64.into())]
1367    #[test_case("sony-a7-xavc.MP4", CreateDate, DateTime::parse_from_str("2026-04-26T09:25:15+00:00", "%+").unwrap().into())]
1368    fn parse_track_info(path: &str, tag: TrackInfoTag, v: EntryValue) {
1369        let mut parser = parser();
1370
1371        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1372        let info: TrackInfo = parser.parse_track(mf).unwrap();
1373        assert_eq!(info.get(tag).unwrap(), &v);
1374
1375        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1376        let info: TrackInfo = parser.parse_track(mf).unwrap();
1377        assert_eq!(info.get(tag).unwrap(), &v);
1378    }
1379
1380    #[test_case("crash_moov-trak")]
1381    #[test_case("crash_skip_large")]
1382    #[test_case("crash_add_large")]
1383    fn parse_track_crash(path: &str) {
1384        let mut parser = parser();
1385
1386        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1387        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1388
1389        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1390        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1391    }
1392
1393    // Regression: a crafted ISOBMFF file declares an extended 64-bit box size
1394    // just under MAX_PARSE_BUF_SIZE (~1 GB). Pre-fix, the unseekable parser called
1395    // reserve_exact() with that size before reading, allocating ~1 GB even when
1396    // the actual stream contained only a few KB. See commit 81f9e8a.
1397    #[test]
1398    fn parse_oom_large_box() {
1399        let mut parser = parser();
1400
1401        let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1402        let _: Result<ExifIter, _> = parser.parse_exif(mf);
1403
1404        let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1405        let _: Result<ExifIter, _> = parser.parse_exif(mf);
1406
1407        let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1408        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1409
1410        let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1411        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1412    }
1413
1414    #[test]
1415    fn media_kind_classifies_image_and_track() {
1416        let img = MediaSource::open("testdata/exif.jpg").unwrap();
1417        assert_eq!(img.kind(), MediaKind::Image);
1418
1419        let trk = MediaSource::open("testdata/meta.mov").unwrap();
1420        assert_eq!(trk.kind(), MediaKind::Track);
1421    }
1422
1423    #[test]
1424    fn media_source_open() {
1425        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1426        assert_eq!(ms.kind(), MediaKind::Image);
1427    }
1428
1429    #[test]
1430    fn parse_exif_returns_exif_iter() {
1431        let mut parser = parser();
1432        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1433        let _: ExifIter = parser.parse_exif(ms).unwrap();
1434    }
1435
1436    #[test]
1437    fn parse_track_returns_track_info() {
1438        let mut parser = parser();
1439        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1440        let _: TrackInfo = parser.parse_track(ms).unwrap();
1441    }
1442
1443    #[test]
1444    fn parse_exif_on_track_returns_exif_not_found_v3() {
1445        let mut parser = parser();
1446        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1447        let res = parser.parse_exif(ms);
1448        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1449    }
1450
1451    #[test]
1452    fn parse_track_on_image_returns_track_not_found_v3() {
1453        let mut parser = parser();
1454        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1455        let res = parser.parse_track(ms);
1456        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1457    }
1458
1459    #[cfg(feature = "tokio")]
1460    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1461    async fn media_parser_parse_exif_async() {
1462        use crate::parser_async::AsyncMediaSource;
1463        let mut parser = MediaParser::new();
1464        let ms = AsyncMediaSource::open("testdata/exif.jpg").await.unwrap();
1465        let _: ExifIter = parser.parse_exif_async(ms).await.unwrap();
1466    }
1467
1468    #[cfg(feature = "tokio")]
1469    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1470    async fn media_parser_parse_track_async() {
1471        use crate::parser_async::AsyncMediaSource;
1472        let mut parser = MediaParser::new();
1473        let ms = AsyncMediaSource::open("testdata/meta.mov").await.unwrap();
1474        let _: TrackInfo = parser.parse_track_async(ms).await.unwrap();
1475    }
1476
1477    #[test]
1478    fn parser_recycles_alloc_when_exif_iter_dropped() {
1479        let mut parser = MediaParser::new();
1480
1481        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1482        let iter = parser.parse_exif(ms).unwrap();
1483        let exif: crate::Exif = iter.into();
1484        drop(exif);
1485        let ptr_after_first = parser.state.cached_ptr_for_test();
1486
1487        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1488        let iter = parser.parse_exif(ms).unwrap();
1489        let _exif: crate::Exif = iter.into();
1490        let ptr_after_second = parser.state.cached_ptr_for_test();
1491
1492        assert!(
1493            ptr_after_first.is_some() && ptr_after_first == ptr_after_second,
1494            "expected recycled allocation, got {:?} -> {:?}",
1495            ptr_after_first,
1496            ptr_after_second
1497        );
1498    }
1499
1500    #[test]
1501    fn parser_new_does_no_upfront_allocation() {
1502        let parser = MediaParser::new();
1503        assert!(parser.state.cached_ptr_for_test().is_none());
1504        assert!(parser.state.buf_is_none_for_test());
1505    }
1506
1507    #[test]
1508    fn buffered_state_memory_mode_sets_and_reads() {
1509        let mut s = BufferedParserState::new();
1510        s.set_memory(bytes::Bytes::from_static(b"abcdefgh"));
1511        assert!(s.is_memory_mode());
1512        assert_eq!(s.buffer(), b"abcdefgh");
1513        s.set_position(3);
1514        assert_eq!(s.buffer(), b"defgh");
1515    }
1516
1517    #[test]
1518    fn buffered_state_share_buf_memory_mode_is_zero_copy() {
1519        let original = bytes::Bytes::from_static(b"the parser owns nothing here");
1520        let original_ptr = original.as_ptr();
1521        let mut s = BufferedParserState::new();
1522        s.set_memory(original);
1523        let (shared, position) = s.share_buf();
1524        assert_eq!(position, 0);
1525        assert_eq!(
1526            shared.as_ptr(),
1527            original_ptr,
1528            "memory share must be a Bytes::clone, not a Vec round-trip"
1529        );
1530        // After share_buf, the parser's memory slot is taken — leaving the state
1531        // ready for the next `reset()` cycle.
1532        assert!(!s.is_memory_mode());
1533    }
1534
1535    #[test]
1536    fn buffered_state_reset_clears_memory() {
1537        let mut s = BufferedParserState::new();
1538        s.set_memory(bytes::Bytes::from_static(b"x"));
1539        s.reset();
1540        assert!(!s.is_memory_mode());
1541        assert_eq!(s.position, 0);
1542    }
1543
1544    #[test]
1545    fn buffered_state_acquire_buf_skips_in_memory_mode() {
1546        let mut s = BufferedParserState::new();
1547        s.set_memory(bytes::Bytes::from_static(b"data"));
1548        s.acquire_buf();
1549        // No streaming buf was allocated.
1550        assert!(s.buf.is_none());
1551        // Memory still readable.
1552        assert_eq!(s.buffer(), b"data");
1553    }
1554
1555    #[test]
1556    fn media_source_from_memory_image_jpg() {
1557        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1558        let ms = MediaSource::from_memory(raw).unwrap();
1559        assert_eq!(ms.kind(), MediaKind::Image);
1560        assert!(ms.memory.is_some());
1561    }
1562
1563    #[test]
1564    fn media_source_from_memory_track_mov() {
1565        let raw = std::fs::read("testdata/meta.mov").unwrap();
1566        let ms = MediaSource::from_memory(raw).unwrap();
1567        assert_eq!(ms.kind(), MediaKind::Track);
1568    }
1569
1570    #[test]
1571    fn media_source_from_memory_static_slice() {
1572        let raw: &'static [u8] = include_bytes!("../testdata/exif.jpg");
1573        let ms = MediaSource::from_memory(raw).unwrap();
1574        assert_eq!(ms.kind(), MediaKind::Image);
1575    }
1576
1577    #[test]
1578    fn media_source_from_memory_rejects_too_short() {
1579        let raw = vec![0u8; 4];
1580        let res = MediaSource::from_memory(raw);
1581        assert!(res.is_err());
1582    }
1583
1584    #[test]
1585    fn media_source_from_memory_rejects_unknown_mime() {
1586        let raw = vec![0xAAu8; 256];
1587        let res = MediaSource::from_memory(raw);
1588        assert!(res.is_err());
1589    }
1590
1591    #[test]
1592    fn parse_exif_unified_from_memory_jpg() {
1593        let mut parser = MediaParser::new();
1594        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1595        let ms = MediaSource::from_memory(raw).unwrap();
1596        let iter = parser.parse_exif(ms).unwrap();
1597        let exif: crate::Exif = iter.into();
1598        assert!(exif.get(crate::ExifTag::Make).is_some());
1599    }
1600
1601    #[test]
1602    fn parse_exif_unified_from_memory_heic() {
1603        let mut parser = MediaParser::new();
1604        let raw = std::fs::read("testdata/exif.heic").unwrap();
1605        let ms = MediaSource::from_memory(raw).unwrap();
1606        let iter = parser.parse_exif(ms).unwrap();
1607        let exif: crate::Exif = iter.into();
1608        assert_eq!(
1609            exif.get(crate::ExifTag::Make).and_then(|v| v.as_str()),
1610            Some("Apple")
1611        );
1612    }
1613
1614    #[test]
1615    fn parse_exif_unified_from_memory_zero_copy_preserved() {
1616        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1617        let bytes = bytes::Bytes::from(raw);
1618        let _original_ptr = bytes.as_ptr();
1619
1620        let mut parser = MediaParser::new();
1621        let ms = MediaSource::from_memory(bytes).unwrap();
1622        let iter = parser.parse_exif(ms).unwrap();
1623
1624        // Memory mode must not poison the recycle cache — same invariant
1625        // the old parse_exif_from_bytes route asserts.
1626        assert!(
1627            parser.state.cached_ptr_for_test().is_none(),
1628            "memory mode must not write to the streaming-buf recycle cache"
1629        );
1630        drop(iter);
1631    }
1632
1633    #[test]
1634    fn parse_exif_unified_on_track_returns_exif_not_found() {
1635        let mut parser = MediaParser::new();
1636        let raw = std::fs::read("testdata/meta.mov").unwrap();
1637        let ms = MediaSource::from_memory(raw).unwrap();
1638        let res = parser.parse_exif(ms);
1639        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1640    }
1641
1642    #[test]
1643    fn parse_exif_unified_on_truncated_returns_io_error() {
1644        let mut raw = std::fs::read("testdata/exif.jpg").unwrap();
1645        raw.truncate(200);
1646        let mut parser = MediaParser::new();
1647        let ms = MediaSource::from_memory(raw).unwrap();
1648        let res = parser.parse_exif(ms);
1649        assert!(
1650            res.is_err(),
1651            "expected error on truncated bytes, got {:?}",
1652            res
1653        );
1654    }
1655
1656    #[test]
1657    fn parse_exif_streaming_tiny_non_png_keeps_strict_eof() {
1658        // The PNG EOF-tolerance branch is scoped to PNG. A tiny non-PNG file
1659        // (here: the 36-byte compatible-brands.heic fixture) consumes its
1660        // entire reader during mime-detection prefill, so the subsequent
1661        // fill_buf hits UnexpectedEof. parse_exif must surface an error
1662        // (any error) — never Ok — for non-PNG inputs.
1663        let mut parser = MediaParser::new();
1664        let ms = MediaSource::open("testdata/compatible-brands.heic").unwrap();
1665        let res = parser.parse_exif(ms);
1666        assert!(res.is_err(), "expected Err for tiny HEIC, got {:?}", res);
1667    }
1668
1669    #[test]
1670    #[allow(deprecated)]
1671    fn media_source_from_bytes_image_jpg() {
1672        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1673        let ms = MediaSource::from_bytes(raw).unwrap();
1674        assert_eq!(ms.kind(), MediaKind::Image);
1675        assert!(ms.memory.is_some());
1676    }
1677
1678    #[test]
1679    #[allow(deprecated)]
1680    fn media_source_from_bytes_track_mov() {
1681        let raw = std::fs::read("testdata/meta.mov").unwrap();
1682        let ms = MediaSource::from_bytes(raw).unwrap();
1683        assert_eq!(ms.kind(), MediaKind::Track);
1684    }
1685
1686    #[test]
1687    #[allow(deprecated)]
1688    fn media_source_from_bytes_static_slice() {
1689        // &'static [u8] should work via Into<Bytes> because the file is read
1690        // into a Vec at compile-time-friendly size; here we use include_bytes.
1691        let raw: &'static [u8] = include_bytes!("../testdata/exif.jpg");
1692        let ms = MediaSource::from_bytes(raw).unwrap();
1693        assert_eq!(ms.kind(), MediaKind::Image);
1694    }
1695
1696    #[test]
1697    #[allow(deprecated)]
1698    fn media_source_from_bytes_rejects_too_short() {
1699        // Below the smallest mime signature length: should fail mime detection.
1700        let raw = vec![0u8; 4];
1701        let res = MediaSource::from_bytes(raw);
1702        assert!(res.is_err(), "expected mime-detection error");
1703    }
1704
1705    #[test]
1706    #[allow(deprecated)]
1707    fn media_source_from_bytes_rejects_unknown_mime() {
1708        // Random bytes long enough to trigger detection but not match any
1709        // signature.
1710        let raw = vec![0xAAu8; 256];
1711        let res = MediaSource::from_bytes(raw);
1712        assert!(
1713            res.is_err(),
1714            "expected mime-detection error for unknown bytes"
1715        );
1716    }
1717
1718    #[test]
1719    fn p4_5_baseline_exif_jpg_full_dump() {
1720        // Lock down the post-refactor invariant: parsing testdata/exif.jpg through
1721        // the public API must yield the same set of (ifd, tag, value) triples
1722        // before and after P4.5. We capture them as a sorted, formatted string so
1723        // the assertion is a single literal comparison.
1724        let mut parser = MediaParser::new();
1725        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1726        let iter: ExifIter = parser.parse_exif(ms).unwrap();
1727
1728        let mut entries: Vec<String> = iter
1729            .map(|e| {
1730                let tag_name = match e.tag() {
1731                    crate::TagOrCode::Tag(t) => format!("{t}"),
1732                    crate::TagOrCode::Unknown(c) => format!("0x{c:04x}"),
1733                };
1734                let value_str = e
1735                    .value()
1736                    .map(|v| format!("{v}"))
1737                    .unwrap_or_else(|| "<err>".into());
1738                format!("{}.{}={:?}", e.ifd(), tag_name, value_str)
1739            })
1740            .collect();
1741        entries.sort();
1742        let snapshot = entries.join("\n");
1743
1744        // Sanity: should produce non-trivial content. Exact content is checked by
1745        // the existing parse_media tests; this one guards against accidental
1746        // re-ordering / dedup changes during the refactor.
1747        assert!(
1748            entries.len() > 5,
1749            "expected >5 entries, got {}",
1750            entries.len()
1751        );
1752        assert!(snapshot.contains("Make"), "expected Make tag in snapshot");
1753    }
1754
1755    #[test]
1756    #[allow(deprecated)]
1757    fn parse_exif_from_bytes_jpg_basic() {
1758        let mut parser = MediaParser::new();
1759        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1760        let ms = MediaSource::from_bytes(raw).unwrap();
1761        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1762        let exif: crate::Exif = iter.into();
1763        assert!(exif.get(crate::ExifTag::Make).is_some());
1764    }
1765
1766    #[test]
1767    #[allow(deprecated)]
1768    fn parse_exif_from_bytes_heic_basic() {
1769        let mut parser = MediaParser::new();
1770        let raw = std::fs::read("testdata/exif.heic").unwrap();
1771        let ms = MediaSource::from_bytes(raw).unwrap();
1772        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1773        let exif: crate::Exif = iter.into();
1774        assert_eq!(
1775            exif.get(crate::ExifTag::Make).and_then(|v| v.as_str()),
1776            Some("Apple")
1777        );
1778    }
1779
1780    #[test]
1781    #[allow(deprecated)]
1782    fn parse_exif_from_bytes_zero_copy_shared_bytes() {
1783        // Build a Bytes whose pointer we can compare. The ExifIter's underlying
1784        // share must point to the same allocation — proving Bytes::clone path.
1785        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1786        let bytes = bytes::Bytes::from(raw);
1787        let original_ptr = bytes.as_ptr();
1788
1789        let mut parser = MediaParser::new();
1790        let ms = MediaSource::from_bytes(bytes).unwrap();
1791        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1792
1793        // The cached pointer in parser state should be None in memory mode
1794        // (memory mode does not write to cache — the user owns the alloc).
1795        assert!(
1796            parser.state.cached_ptr_for_test().is_none(),
1797            "memory mode must not poison the recycle cache"
1798        );
1799
1800        // Drop the iter and confirm parser is clean for the next call.
1801        drop(iter);
1802
1803        // Build again; pointer identity proves we did not duplicate the alloc
1804        // anywhere along the parse path.
1805        let bytes2 = bytes::Bytes::from(std::fs::read("testdata/exif.jpg").unwrap());
1806        let ms2 = MediaSource::from_bytes(bytes2.clone()).unwrap();
1807        let _iter2 = parser.parse_exif_from_bytes(ms2).unwrap();
1808        // (We cannot assert pointer-equality across distinct user Bytes; the
1809        // assertion above on the first parse is the load-bearing one.)
1810        let _ = original_ptr; // explicit: original_ptr is the assertion target.
1811    }
1812
1813    #[test]
1814    #[allow(deprecated)]
1815    fn parse_exif_from_bytes_on_track_returns_exif_not_found() {
1816        let mut parser = MediaParser::new();
1817        let raw = std::fs::read("testdata/meta.mov").unwrap();
1818        let ms = MediaSource::from_bytes(raw).unwrap();
1819        let res = parser.parse_exif_from_bytes(ms);
1820        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1821    }
1822
1823    #[test]
1824    #[allow(deprecated)]
1825    fn parse_exif_from_bytes_on_truncated_returns_io_error() {
1826        // Truncate exif.jpg to just enough for mime detection but too short
1827        // for the full EXIF block. Memory-mode fill_buf must surface
1828        // UnexpectedEof when the parser walks off the end.
1829        let mut raw = std::fs::read("testdata/exif.jpg").unwrap();
1830        raw.truncate(200);
1831        let mut parser = MediaParser::new();
1832        let ms = MediaSource::from_bytes(raw).unwrap();
1833        let res = parser.parse_exif_from_bytes(ms);
1834        assert!(
1835            res.is_err(),
1836            "expected error on truncated bytes, got {:?}",
1837            res
1838        );
1839    }
1840
1841    #[test]
1842    #[allow(deprecated)]
1843    fn parse_track_from_bytes_mov_basic() {
1844        let mut parser = MediaParser::new();
1845        let raw = std::fs::read("testdata/meta.mov").unwrap();
1846        let ms = MediaSource::from_bytes(raw).unwrap();
1847        let info = parser.parse_track_from_bytes(ms).unwrap();
1848        assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
1849        assert_eq!(
1850            info.get(crate::TrackInfoTag::Model),
1851            Some(&"iPhone X".into())
1852        );
1853    }
1854
1855    #[test]
1856    #[allow(deprecated)]
1857    fn parse_track_from_bytes_mp4_basic() {
1858        let mut parser = MediaParser::new();
1859        let raw = std::fs::read("testdata/meta.mp4").unwrap();
1860        let ms = MediaSource::from_bytes(raw).unwrap();
1861        let info = parser.parse_track_from_bytes(ms).unwrap();
1862        assert!(info.get(crate::TrackInfoTag::CreateDate).is_some());
1863    }
1864
1865    #[test]
1866    #[allow(deprecated)]
1867    fn parse_track_from_bytes_mkv_basic() {
1868        let mut parser = MediaParser::new();
1869        let raw = std::fs::read("testdata/mkv_640x360.mkv").unwrap();
1870        let ms = MediaSource::from_bytes(raw).unwrap();
1871        let info = parser.parse_track_from_bytes(ms).unwrap();
1872        assert_eq!(
1873            info.get(crate::TrackInfoTag::Width),
1874            Some(&(640_u32.into()))
1875        );
1876    }
1877
1878    #[test]
1879    #[allow(deprecated)]
1880    fn parse_track_from_bytes_on_image_returns_track_not_found() {
1881        let mut parser = MediaParser::new();
1882        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1883        let ms = MediaSource::from_bytes(raw).unwrap();
1884        let res = parser.parse_track_from_bytes(ms);
1885        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1886    }
1887
1888    #[test]
1889    fn parse_track_unified_from_memory_mov() {
1890        let mut parser = MediaParser::new();
1891        let raw = std::fs::read("testdata/meta.mov").unwrap();
1892        let ms = MediaSource::from_memory(raw).unwrap();
1893        let info = parser.parse_track(ms).unwrap();
1894        assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
1895    }
1896
1897    #[test]
1898    fn parse_track_unified_from_memory_mp4() {
1899        let mut parser = MediaParser::new();
1900        let raw = std::fs::read("testdata/meta.mp4").unwrap();
1901        let ms = MediaSource::from_memory(raw).unwrap();
1902        let info = parser.parse_track(ms).unwrap();
1903        assert!(info.get(crate::TrackInfoTag::CreateDate).is_some());
1904    }
1905
1906    #[test]
1907    fn parse_track_unified_on_image_returns_track_not_found() {
1908        let mut parser = MediaParser::new();
1909        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1910        let ms = MediaSource::from_memory(raw).unwrap();
1911        let res = parser.parse_track(ms);
1912        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1913    }
1914
1915    #[test]
1916    fn parse_image_metadata_jpeg_returns_exif_only() {
1917        let mut parser = MediaParser::new();
1918        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1919        let img = parser.parse_image_metadata(ms).unwrap();
1920        assert!(img.exif.is_some());
1921        assert!(img.format.is_none());
1922    }
1923
1924    #[test]
1925    fn parse_image_metadata_jpeg_from_memory() {
1926        let mut parser = MediaParser::new();
1927        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1928        let ms = MediaSource::from_memory(raw).unwrap();
1929        let img = parser.parse_image_metadata(ms).unwrap();
1930        assert!(img.exif.is_some());
1931        assert!(img.format.is_none());
1932    }
1933
1934    #[test]
1935    fn parse_image_metadata_on_track_returns_exif_not_found() {
1936        let mut parser = MediaParser::new();
1937        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1938        let res = parser.parse_image_metadata(ms);
1939        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1940    }
1941
1942    /// Regression for issue #55. PNGs whose IDAT body exceeds
1943    /// `INIT_BUF_SIZE` force the chunk walker through `ClearAndSkip`;
1944    /// on retry the parse buffer no longer starts at byte 0 of the
1945    /// file, and the signature recheck must not fire.
1946    #[test]
1947    fn parse_image_metadata_png_large_idat_streaming() {
1948        use std::io::Cursor;
1949        let png = build_png_with_large_idat(INIT_BUF_SIZE + 1024);
1950        let mut parser = MediaParser::new();
1951        let ms = MediaSource::seekable(Cursor::new(png)).unwrap();
1952        assert_eq!(ms.kind(), crate::MediaKind::Image);
1953        let res = parser.parse_image_metadata(ms);
1954        assert!(
1955            matches!(res, Err(crate::Error::ExifNotFound)),
1956            "expected ExifNotFound on PNG with no EXIF / tEXt; got {res:?}"
1957        );
1958    }
1959
1960    /// Same regression via `parse_exif` — exercises `parse_png_exif_iter`,
1961    /// the sibling code path to `parse_png_full`.
1962    #[test]
1963    fn parse_exif_png_large_idat_streaming() {
1964        use std::io::Cursor;
1965        let png = build_png_with_large_idat(INIT_BUF_SIZE + 1024);
1966        let mut parser = MediaParser::new();
1967        let ms = MediaSource::seekable(Cursor::new(png)).unwrap();
1968        let res = parser.parse_exif(ms);
1969        assert!(
1970            matches!(res, Err(crate::Error::ExifNotFound)),
1971            "expected ExifNotFound; got {res:?}"
1972        );
1973    }
1974
1975    /// Same regression via `MediaSource::unseekable` — exercises the
1976    /// read-and-discard skip path in `clear_and_skip` rather than the
1977    /// `seek_relative` shortcut.
1978    #[test]
1979    fn parse_image_metadata_png_large_idat_unseekable() {
1980        let png = build_png_with_large_idat(INIT_BUF_SIZE + 1024);
1981        let mut parser = MediaParser::new();
1982        let ms = MediaSource::unseekable(NoSeek(std::io::Cursor::new(png))).unwrap();
1983        let res = parser.parse_image_metadata(ms);
1984        assert!(
1985            matches!(res, Err(crate::Error::ExifNotFound)),
1986            "expected ExifNotFound; got {res:?}"
1987        );
1988    }
1989
1990    /// Wraps a reader to hide its `Seek` impl, so `MediaSource::unseekable`
1991    /// is forced even when the underlying type happens to implement it.
1992    struct NoSeek<R>(R);
1993    impl<R: io::Read> io::Read for NoSeek<R> {
1994        fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1995            self.0.read(buf)
1996        }
1997    }
1998
1999    fn build_png_with_large_idat(idat_body: usize) -> Vec<u8> {
2000        let mut out = Vec::new();
2001        out.extend_from_slice(b"\x89PNG\r\n\x1a\n");
2002        // IHDR (1x1, 8-bit grayscale)
2003        out.extend_from_slice(&13u32.to_be_bytes());
2004        out.extend_from_slice(b"IHDR");
2005        out.extend_from_slice(&[0, 0, 0, 1, 0, 0, 0, 1, 8, 0, 0, 0, 0]);
2006        out.extend_from_slice(&[0, 0, 0, 0]); // CRC (chunk walker ignores it)
2007                                              // IDAT
2008        out.extend_from_slice(&(idat_body as u32).to_be_bytes());
2009        out.extend_from_slice(b"IDAT");
2010        out.resize(out.len() + idat_body, 0);
2011        out.extend_from_slice(&[0, 0, 0, 0]); // CRC
2012                                              // IEND
2013        out.extend_from_slice(&0u32.to_be_bytes());
2014        out.extend_from_slice(b"IEND");
2015        out.extend_from_slice(&[0, 0, 0, 0]); // CRC
2016        out
2017    }
2018
2019    #[cfg(feature = "tokio")]
2020    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2021    async fn parse_image_metadata_async_jpeg() {
2022        use crate::parser_async::AsyncMediaSource;
2023        let mut parser = MediaParser::new();
2024        let ms = AsyncMediaSource::open("testdata/exif.jpg").await.unwrap();
2025        let img = parser.parse_image_metadata_async(ms).await.unwrap();
2026        assert!(img.exif.is_some());
2027        assert!(img.format.is_none());
2028    }
2029
2030    #[cfg(feature = "tokio")]
2031    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2032    async fn async_media_source_from_memory_image_jpg() {
2033        use crate::parser_async::AsyncMediaSource;
2034        let raw = std::fs::read("testdata/exif.jpg").unwrap();
2035        let ms = AsyncMediaSource::from_memory(raw).unwrap();
2036        assert_eq!(ms.kind(), MediaKind::Image);
2037        assert!(ms.memory.is_some());
2038    }
2039
2040    #[cfg(feature = "tokio")]
2041    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2042    async fn async_media_source_from_memory_track_mov() {
2043        use crate::parser_async::AsyncMediaSource;
2044        let raw = std::fs::read("testdata/meta.mov").unwrap();
2045        let ms = AsyncMediaSource::from_memory(raw).unwrap();
2046        assert_eq!(ms.kind(), MediaKind::Track);
2047    }
2048
2049    #[cfg(feature = "tokio")]
2050    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2051    async fn async_media_source_from_memory_rejects_unknown_mime() {
2052        use crate::parser_async::AsyncMediaSource;
2053        let raw = vec![0xAAu8; 256];
2054        let res = AsyncMediaSource::from_memory(raw);
2055        assert!(res.is_err());
2056    }
2057
2058    #[cfg(feature = "tokio")]
2059    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2060    async fn parse_exif_async_from_memory_jpg() {
2061        use crate::parser_async::AsyncMediaSource;
2062        let raw = std::fs::read("testdata/exif.jpg").unwrap();
2063        let mut parser = MediaParser::new();
2064        let ms = AsyncMediaSource::from_memory(raw).unwrap();
2065        let iter = parser.parse_exif_async(ms).await.unwrap();
2066        let exif: crate::Exif = iter.into();
2067        assert!(exif.get(crate::ExifTag::Make).is_some());
2068    }
2069
2070    #[cfg(feature = "tokio")]
2071    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2072    async fn parse_exif_async_from_memory_zero_copy_preserved() {
2073        use crate::parser_async::AsyncMediaSource;
2074        let raw = std::fs::read("testdata/exif.jpg").unwrap();
2075        let bytes = bytes::Bytes::from(raw);
2076        let mut parser = MediaParser::new();
2077        let ms = AsyncMediaSource::from_memory(bytes).unwrap();
2078        let iter = parser.parse_exif_async(ms).await.unwrap();
2079        // Memory mode must not poison the recycle cache — same invariant
2080        // as the sync route asserts.
2081        assert!(
2082            parser.state.cached_ptr_for_test().is_none(),
2083            "async memory mode must not write to the streaming-buf recycle cache"
2084        );
2085        drop(iter);
2086    }
2087
2088    #[cfg(feature = "tokio")]
2089    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2090    async fn parse_track_async_from_memory_mov() {
2091        use crate::parser_async::AsyncMediaSource;
2092        let raw = std::fs::read("testdata/meta.mov").unwrap();
2093        let mut parser = MediaParser::new();
2094        let ms = AsyncMediaSource::from_memory(raw).unwrap();
2095        let info = parser.parse_track_async(ms).await.unwrap();
2096        assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
2097    }
2098
2099    #[cfg(feature = "tokio")]
2100    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2101    async fn parse_image_metadata_async_from_memory_png() {
2102        use crate::parser_async::AsyncMediaSource;
2103        let raw = std::fs::read("testdata/exif.png").unwrap();
2104        let mut parser = MediaParser::new();
2105        let ms = AsyncMediaSource::from_memory(raw).unwrap();
2106        let img = parser.parse_image_metadata_async(ms).await.unwrap();
2107        assert!(img.exif.is_some());
2108        assert!(img.format.is_some());
2109    }
2110
2111    /// Async counterpart of `parse_image_metadata_png_large_idat_streaming`
2112    /// (regression for issue #55). `parser_async::clear_and_skip` is a
2113    /// separate implementation; the state-threading through the shared
2114    /// `parse_loop_step` must work identically in the async path.
2115    #[cfg(feature = "tokio")]
2116    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2117    async fn parse_image_metadata_async_png_large_idat_streaming() {
2118        use crate::parser_async::AsyncMediaSource;
2119        let png = build_png_with_large_idat(INIT_BUF_SIZE + 1024);
2120        // tokio's async I/O traits aren't on std::io::Cursor, so route
2121        // through a real file. Pick a unique path to avoid concurrent
2122        // test collisions.
2123        let path =
2124            std::env::temp_dir().join(format!("nom-exif-issue55-{}.png", std::process::id()));
2125        tokio::fs::write(&path, &png).await.unwrap();
2126        let mut parser = MediaParser::new();
2127        let ms = AsyncMediaSource::open(&path).await.unwrap();
2128        let res = parser.parse_image_metadata_async(ms).await;
2129        let _ = tokio::fs::remove_file(&path).await;
2130        assert!(
2131            matches!(res, Err(crate::Error::ExifNotFound)),
2132            "expected ExifNotFound; got {res:?}"
2133        );
2134    }
2135
2136    #[cfg(feature = "tokio")]
2137    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2138    async fn parse_image_metadata_async_from_memory_text_only_png() {
2139        // Memory route: bypasses fill_buf entirely, just verifies the
2140        // memory-mode path returns format-only metadata for a PNG with
2141        // no EXIF.
2142        use crate::parser_async::AsyncMediaSource;
2143        let raw = std::fs::read("testdata/text-only.png").unwrap();
2144        let mut parser = MediaParser::new();
2145        let ms = AsyncMediaSource::from_memory(raw).unwrap();
2146        let img = parser.parse_image_metadata_async(ms).await.unwrap();
2147        assert!(img.exif.is_none());
2148        assert!(img.format.is_some());
2149    }
2150
2151    // Streaming-path coverage for the PNG-scoped EOF tolerance. The
2152    // 117-byte text-only.png is fully consumed during mime detection
2153    // (HEADER_PARSE_BUF_SIZE = 128), so the parse-time fill_buf hits
2154    // UnexpectedEof. The PNG-scoped tolerance must let the bytes already
2155    // in the parse buffer drive the parse to completion. These tests
2156    // would have caught the missed-async-tolerance bug the previous
2157    // memory-mode tests did not.
2158
2159    #[test]
2160    fn parse_exif_streaming_text_only_png_returns_exif_not_found() {
2161        // text-only.png has no EXIF — the contract is ExifNotFound, not
2162        // UnexpectedEof. Pre-EOF-tolerance, this would surface
2163        // UnexpectedEof because mime detection consumed all 117 bytes.
2164        let mut parser = MediaParser::new();
2165        let ms = MediaSource::open("testdata/text-only.png").unwrap();
2166        let res = parser.parse_exif(ms);
2167        assert!(
2168            matches!(res, Err(crate::Error::ExifNotFound)),
2169            "expected ExifNotFound for tEXt-only PNG, got {:?}",
2170            res
2171        );
2172    }
2173
2174    #[test]
2175    fn parse_image_metadata_streaming_text_only_png() {
2176        let mut parser = MediaParser::new();
2177        let ms = MediaSource::open("testdata/text-only.png").unwrap();
2178        let img = parser.parse_image_metadata(ms).unwrap();
2179        assert!(img.exif.is_none());
2180        assert!(img.format.is_some());
2181    }
2182
2183    #[cfg(feature = "tokio")]
2184    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2185    async fn parse_exif_async_streaming_text_only_png_returns_exif_not_found() {
2186        use crate::parser_async::AsyncMediaSource;
2187        let mut parser = MediaParser::new();
2188        let f = tokio::fs::File::open("testdata/text-only.png")
2189            .await
2190            .unwrap();
2191        let ms = AsyncMediaSource::seekable(f).await.unwrap();
2192        let res = parser.parse_exif_async(ms).await;
2193        assert!(
2194            matches!(res, Err(crate::Error::ExifNotFound)),
2195            "expected ExifNotFound for tEXt-only PNG via async streaming, got {:?}",
2196            res
2197        );
2198    }
2199
2200    #[cfg(feature = "tokio")]
2201    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2202    async fn parse_image_metadata_async_streaming_text_only_png() {
2203        use crate::parser_async::AsyncMediaSource;
2204        let mut parser = MediaParser::new();
2205        let f = tokio::fs::File::open("testdata/text-only.png")
2206            .await
2207            .unwrap();
2208        let ms = AsyncMediaSource::seekable(f).await.unwrap();
2209        let img = parser.parse_image_metadata_async(ms).await.unwrap();
2210        assert!(img.exif.is_none());
2211        assert!(img.format.is_some());
2212    }
2213}