Skip to main content

nom_exif/
parser.rs

1use std::{
2    cmp::{max, min},
3    fmt::{Debug, Display},
4    fs::File,
5    io::{self, Read, Seek},
6    path::Path,
7};
8
9use crate::{
10    error::{ParsedError, ParsingError, ParsingErrorState},
11    exif::TiffHeader,
12    file::MediaMime,
13    ExifIter, TrackInfo,
14};
15
16/// A function that tries to skip `n` bytes of `reader` by seeking. Returns
17/// `Ok(true)` on success, `Ok(false)` if the reader does not support seek
18/// (so the caller should fall back to reading-and-discarding), or
19/// `Err(io::Error)` if seek itself failed (e.g. truncated file handle).
20///
21/// This is captured at construction time by `MediaSource::seekable` /
22/// `unseekable`, replacing the v2 `S: Skip<R>` phantom parameter with a
23/// runtime fn pointer.
24pub(crate) type SkipBySeekFn<R> = fn(&mut R, u64) -> io::Result<bool>;
25
26/// `MediaSource` represents a media data source that can be parsed by
27/// [`MediaParser`].
28///
29/// - Use [`MediaSource::open`] to create a MediaSource from a file path.
30///
31/// - Use [`MediaSource::from_bytes`] for zero-copy in-memory input
32///   (`Vec<u8>`, `&'static [u8]`, [`bytes::Bytes`], …). Pair with
33///   [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`].
34///
35/// - In other cases:
36///
37///   - Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
38///     (an already-open `File` goes here).
39///
40///   - Use [`MediaSource::unseekable`] to create a MediaSource from a
41///     reader that only impl `Read`
42///
43/// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
44/// since the former is more efficient when the parser needs to skip a large number of bytes.
45///
46/// Passing in a `BufRead` should be avoided because [`MediaParser`] comes with
47/// its own buffer management and the buffers can be shared between multiple
48/// parsing tasks, thus avoiding frequent memory allocations.
49pub struct MediaSource<R> {
50    pub(crate) reader: R,
51    pub(crate) buf: Vec<u8>,
52    pub(crate) mime: MediaMime,
53    pub(crate) skip_by_seek: SkipBySeekFn<R>,
54    /// P7: zero-copy memory-mode payload. `Some` only when the source was
55    /// built via [`MediaSource::<()>::from_bytes`]; `reader`, `buf`, and
56    /// `skip_by_seek` are placeholders (and never consulted) in that mode.
57    pub(crate) memory: Option<bytes::Bytes>,
58}
59
60/// Top-level classification of a media source.
61///
62/// `Image` files carry EXIF metadata (parse with `MediaParser::parse_exif`);
63/// `Track` files are time-based containers — video, audio, or both — and
64/// carry track-info metadata (parse with `MediaParser::parse_track`). Pure
65/// audio containers like `.mka` are classified as `Track`.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum MediaKind {
68    Image,
69    Track,
70}
71
72impl<R> Debug for MediaSource<R> {
73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74        f.debug_struct("MediaSource")
75            .field("mime", &self.mime)
76            .finish_non_exhaustive()
77    }
78}
79
80// Should be enough for parsing header
81const HEADER_PARSE_BUF_SIZE: usize = 128;
82
83impl<R> MediaSource<R> {
84    /// Top-level classification of this media source.
85    pub fn kind(&self) -> MediaKind {
86        match self.mime {
87            MediaMime::Image(_) => MediaKind::Image,
88            MediaMime::Track(_) => MediaKind::Track,
89        }
90    }
91}
92
93impl<R: Read> MediaSource<R> {
94    fn build(mut reader: R, skip_by_seek: SkipBySeekFn<R>) -> crate::Result<Self> {
95        let mut buf = Vec::with_capacity(HEADER_PARSE_BUF_SIZE);
96        reader
97            .by_ref()
98            .take(HEADER_PARSE_BUF_SIZE as u64)
99            .read_to_end(&mut buf)?;
100        let mime: MediaMime = buf.as_slice().try_into()?;
101        Ok(Self {
102            reader,
103            buf,
104            mime,
105            skip_by_seek,
106            memory: None,
107        })
108    }
109
110    /// Use [`MediaSource::unseekable`] to create a MediaSource from a
111    /// reader that only impl `Read`
112    ///
113    /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
114    /// since the former is more efficient when the parser needs to skip a large number of bytes.
115    pub fn unseekable(reader: R) -> crate::Result<Self> {
116        Self::build(reader, |_, _| Ok(false))
117    }
118}
119
120impl<R: Read + Seek> MediaSource<R> {
121    /// Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
122    ///
123    /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
124    /// since the former is more efficient when the parser needs to skip a large number of bytes.
125    pub fn seekable(reader: R) -> crate::Result<Self> {
126        Self::build(reader, |r, n| {
127            let signed: i64 = n
128                .try_into()
129                .map_err(|_| io::Error::from(io::ErrorKind::InvalidInput))?;
130            r.seek_relative(signed)?;
131            Ok(true)
132        })
133    }
134}
135
136impl MediaSource<File> {
137    /// Open a file at `path` and parse its header to detect the media format.
138    ///
139    /// This is the v3-preferred entry point for the common case of "I have a
140    /// path on disk". For an already-open `File` use [`Self::seekable`].
141    pub fn open<P: AsRef<Path>>(path: P) -> crate::Result<Self> {
142        Self::seekable(File::open(path)?)
143    }
144}
145
146impl MediaSource<()> {
147    /// Build a [`MediaSource`] from an in-memory byte payload.
148    ///
149    /// Accepts any type convertible into [`bytes::Bytes`] — `Bytes`,
150    /// `Vec<u8>`, `&'static [u8]`, [`bytes::Bytes::from_owner`] outputs, and
151    /// HTTP-stack body types that implement `Into<Bytes>` directly.
152    ///
153    /// The header (first up to 128 bytes) is sniffed for media kind, the
154    /// same way [`MediaSource::open`] does it for files. The full payload is
155    /// stored zero-copy: subsequent parsing through
156    /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
157    /// shares this `Bytes` directly with the returned `ExifIter` / sub-IFDs
158    /// via reference counting.
159    ///
160    /// The returned source is parsed by the dedicated
161    /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
162    /// methods. The streaming `parse_exif` / `parse_track` methods do not
163    /// accept `MediaSource<()>` (their `R: Read` bound is unsatisfiable).
164    ///
165    /// # Example
166    ///
167    /// ```rust
168    /// use nom_exif::{MediaSource, MediaParser, MediaKind};
169    ///
170    /// let bytes = std::fs::read("./testdata/exif.jpg")?;
171    /// let ms = MediaSource::from_bytes(bytes)?;
172    /// assert_eq!(ms.kind(), MediaKind::Image);
173    ///
174    /// let mut parser = MediaParser::new();
175    /// let _iter = parser.parse_exif_from_bytes(ms)?;
176    /// # Ok::<(), nom_exif::Error>(())
177    /// ```
178    #[deprecated(
179        since = "3.3.0",
180        note = "Use `MediaSource::from_memory` and the unified `parse_*` \
181                methods (which now accept memory-mode sources directly). \
182                The `MediaSource<()>` shape will be removed in v4."
183    )]
184    pub fn from_bytes(bytes: impl Into<bytes::Bytes>) -> crate::Result<Self> {
185        let bytes = bytes.into();
186        let head_end = bytes.len().min(HEADER_PARSE_BUF_SIZE);
187        let mime: MediaMime = bytes[..head_end].try_into()?;
188        Ok(Self {
189            reader: (),
190            buf: Vec::new(),
191            mime,
192            // Placeholder: never invoked in memory mode (clear_and_skip's
193            // AdvanceOnly path is the only one taken).
194            skip_by_seek: |_, _| Ok(false),
195            memory: Some(bytes),
196        })
197    }
198
199    /// Internal adapter: convert a v3.0-style `MediaSource<()>` (built via
200    /// the deprecated `from_bytes`) into the unified `MediaSource<Empty>`
201    /// shape so the deprecated `parse_*_from_bytes` methods can delegate to
202    /// the unified `parse_*` methods. Memory contents are moved over
203    /// verbatim, preserving zero-copy.
204    pub(crate) fn into_empty(self) -> MediaSource<std::io::Empty> {
205        MediaSource {
206            reader: std::io::empty(),
207            buf: self.buf,
208            mime: self.mime,
209            // Placeholder: never invoked in memory mode (clear_and_skip's
210            // AdvanceOnly path is the only one taken).
211            skip_by_seek: |_, _| Ok(false),
212            memory: self.memory,
213        }
214    }
215}
216
217impl MediaSource<std::io::Empty> {
218    /// Build a [`MediaSource`] from an in-memory byte payload.
219    ///
220    /// This is the v3.3 replacement for [`MediaSource::<()>::from_bytes`]
221    /// (which is now `#[deprecated]`). Functionally identical — same
222    /// zero-copy semantics, same accepted input types — but produces a
223    /// `MediaSource<std::io::Empty>` so that the unified `parse_*<R: Read>`
224    /// methods can accept it directly without a separate `_from_bytes`
225    /// sibling.
226    ///
227    /// Accepts any type convertible into [`bytes::Bytes`] — `Bytes`,
228    /// `Vec<u8>`, `&'static [u8]`, `String`, `Box<[u8]>`, and HTTP-stack
229    /// body types that implement `Into<Bytes>` directly.
230    ///
231    /// The header (first up to 128 bytes) is sniffed for media kind, the
232    /// same way [`MediaSource::open`] does it for files. The full payload
233    /// is stored zero-copy: subsequent parsing through
234    /// [`MediaParser::parse_exif`] / [`MediaParser::parse_track`] shares
235    /// this `Bytes` directly with the returned `ExifIter` / sub-IFDs via
236    /// reference counting.
237    ///
238    /// # Example
239    ///
240    /// ```rust
241    /// use nom_exif::{MediaSource, MediaParser, MediaKind};
242    ///
243    /// let bytes = std::fs::read("./testdata/exif.jpg")?;
244    /// let ms = MediaSource::from_memory(bytes)?;
245    /// assert_eq!(ms.kind(), MediaKind::Image);
246    ///
247    /// let mut parser = MediaParser::new();
248    /// let _iter = parser.parse_exif(ms)?;  // unified entry point
249    /// # Ok::<(), nom_exif::Error>(())
250    /// ```
251    pub fn from_memory(bytes: impl Into<bytes::Bytes>) -> crate::Result<Self> {
252        let bytes = bytes.into();
253        let head_end = bytes.len().min(HEADER_PARSE_BUF_SIZE);
254        let mime: MediaMime = bytes[..head_end].try_into()?;
255        Ok(Self {
256            reader: std::io::empty(),
257            buf: Vec::new(),
258            mime,
259            // Placeholder: never invoked in memory mode (AdvanceOnly path).
260            skip_by_seek: |_, _| Ok(false),
261            memory: Some(bytes),
262        })
263    }
264}
265
266// ----- Parse-time buffer policy -----
267//
268// Layered by lifecycle:
269//
270// - `INIT_BUF_SIZE` — first fill into the parse loop and the initial
271//   `Vec::with_capacity` for fresh allocations. Modest so cold one-shot
272//   helpers don't over-commit.
273// - `MIN_GROW_SIZE` — floor for every subsequent fill once we're in deep
274//   parse. Larger than `INIT_BUF_SIZE` to amortize syscalls / async
275//   blocking-pool dispatches.
276// - `MAX_PARSE_BUF_SIZE` — hard cap on cumulative buffer growth during a
277//   single parse. Anything that would push past this is rejected as
278//   `io::ErrorKind::Unsupported`; defense against crafted box/IFD headers
279//   that declare absurd sizes.
280// - `MAX_REUSE_BUF_SIZE` — soft cap on the buffer kept between parses for
281//   recycling. After a parse whose buffer ended above this, `shrink_to`
282//   gives the excess back to the allocator. Tuned for typical metadata
283//   sizes (HEIC Live Photo / large CR3 / IIQ all fit under 4 MB) so the
284//   recycle path stays warm for batch workloads.
285pub(crate) const INIT_BUF_SIZE: usize = 8 * 1024;
286pub(crate) const MIN_GROW_SIZE: usize = 16 * 1024;
287pub(crate) const MAX_PARSE_BUF_SIZE: usize = 1024 * 1024 * 1024;
288const MAX_REUSE_BUF_SIZE: usize = 4 * 1024 * 1024;
289
290pub(crate) trait Buf {
291    fn buffer(&self) -> &[u8];
292    fn clear(&mut self);
293
294    fn set_position(&mut self, pos: usize);
295    #[allow(unused)]
296    fn position(&self) -> usize;
297}
298
299/// Buffer-management state used by `MediaParser` (sync and async paths share it).
300///
301/// Holds at most one *active* `Vec<u8>` (being filled by the current parse) and
302/// one *cached* `Bytes` clone of the most recently shared buffer. When the
303/// next parse starts, the cache is consulted: if `Bytes::try_into_mut`
304/// succeeds the underlying allocation is reused (the previous `ExifIter`
305/// has been dropped); otherwise the clone is discarded and a fresh
306/// `Vec<u8>` is allocated.
307///
308/// This replaces the v2 multi-slot `Buffers` pool — `MediaParser` methods
309/// are `&mut self`, so a single slot is sufficient.
310#[derive(Debug, Default)]
311pub(crate) struct BufferedParserState {
312    cached: Option<bytes::Bytes>,
313    buf: Option<Vec<u8>>,
314    /// P7: memory-mode storage. When `Some`, the parser is feeding from a
315    /// caller-owned `Bytes` instead of streaming via a reader. `buf` and
316    /// `cached` are unused in this mode — the user owns the allocation,
317    /// so there is nothing to recycle.
318    memory: Option<bytes::Bytes>,
319    position: usize,
320}
321
322impl BufferedParserState {
323    pub(crate) fn new() -> Self {
324        Self::default()
325    }
326
327    pub(crate) fn reset(&mut self) {
328        // If a parse failed mid-way the buf may still be present; drop it.
329        // Cache stays — recycle on next acquire if eligible.
330        self.buf = None;
331        self.memory = None;
332        self.position = 0;
333    }
334
335    /// Switch the parser state into memory mode, owning `bytes` directly.
336    /// Caller must have already called `reset()` (asserted in debug). Subsequent
337    /// `share_buf` returns a clone of `bytes` (zero-copy: `Bytes::clone` is a
338    /// refcount bump). Subsequent `Buf::buffer()` returns `&bytes[position..]`.
339    pub(crate) fn set_memory(&mut self, bytes: bytes::Bytes) {
340        debug_assert!(
341            self.buf.is_none() && self.memory.is_none(),
342            "set_memory called on non-clean state"
343        );
344        self.memory = Some(bytes);
345        self.position = 0;
346    }
347
348    pub(crate) fn is_memory_mode(&self) -> bool {
349        self.memory.is_some()
350    }
351
352    pub(crate) fn acquire_buf(&mut self) {
353        if self.memory.is_some() {
354            // Memory mode: nothing to acquire — `buffer()` reads from `memory`.
355            return;
356        }
357        debug_assert!(self.buf.is_none());
358        let buf = match self.cached.take() {
359            Some(b) => match b.try_into_mut() {
360                Ok(bm) => {
361                    let mut v = Vec::<u8>::from(bm);
362                    v.clear();
363                    if v.capacity() > MAX_REUSE_BUF_SIZE {
364                        v.shrink_to(MAX_REUSE_BUF_SIZE);
365                    }
366                    v
367                }
368                Err(_still_shared) => Vec::with_capacity(INIT_BUF_SIZE),
369            },
370            None => Vec::with_capacity(INIT_BUF_SIZE),
371        };
372        self.buf = Some(buf);
373    }
374
375    pub(crate) fn buf(&self) -> &Vec<u8> {
376        self.buf.as_ref().expect("no buf here")
377    }
378
379    pub(crate) fn buf_mut(&mut self) -> &mut Vec<u8> {
380        self.buf.as_mut().expect("no buf here")
381    }
382
383    #[cfg(test)]
384    pub(crate) fn cached_ptr_for_test(&self) -> Option<*const u8> {
385        self.cached.as_ref().map(|b| b.as_ptr())
386    }
387
388    #[cfg(test)]
389    pub(crate) fn buf_is_none_for_test(&self) -> bool {
390        self.buf.is_none()
391    }
392}
393
394impl Buf for BufferedParserState {
395    fn buffer(&self) -> &[u8] {
396        if let Some(m) = &self.memory {
397            return &m[self.position..];
398        }
399        &self.buf()[self.position..]
400    }
401    fn clear(&mut self) {
402        // In memory mode `clear` is a no-op: there is no scratch buffer to
403        // truncate, and the caller's bytes must remain available for further
404        // parse_loop_step iterations. clear_and_skip's AdvanceOnly path is
405        // what advances `position` in memory mode.
406        if self.memory.is_some() {
407            return;
408        }
409        self.buf_mut().clear();
410    }
411    fn set_position(&mut self, pos: usize) {
412        self.position = pos;
413    }
414    fn position(&self) -> usize {
415        self.position
416    }
417}
418
419impl ShareBuf for BufferedParserState {
420    fn share_buf(&mut self) -> (bytes::Bytes, usize) {
421        if let Some(m) = self.memory.take() {
422            // Zero-copy share: caller already owns the allocation. No cache
423            // write — recycle is irrelevant when the user holds the alloc.
424            let position = self.position;
425            return (m, position);
426        }
427        let vec = self.buf.take().expect("no buf to share");
428        let bytes = bytes::Bytes::from(vec);
429        let position = self.position;
430        self.cached = Some(bytes.clone());
431        (bytes, position)
432    }
433}
434
435/// What `clear_and_skip` should do, given the current buffer state and
436/// the requested skip count.
437pub(crate) enum SkipPlan {
438    /// Skip is fully within the current buffer; just advance position.
439    AdvanceOnly,
440    /// Buffer must be cleared and `extra` bytes skipped from the reader.
441    ClearAndSkip { extra: usize },
442}
443
444pub(crate) fn clear_and_skip_decide(buffer_len: usize, n: usize) -> SkipPlan {
445    if n <= buffer_len {
446        SkipPlan::AdvanceOnly
447    } else {
448        SkipPlan::ClearAndSkip {
449            extra: n - buffer_len,
450        }
451    }
452}
453
454pub(crate) fn check_fill_size(existing_len: usize, requested: usize) -> io::Result<()> {
455    if requested.saturating_add(existing_len) > MAX_PARSE_BUF_SIZE {
456        tracing::error!(?requested, "the requested buffer size is too big");
457        return Err(io::ErrorKind::Unsupported.into());
458    }
459    Ok(())
460}
461
462pub(crate) enum LoopAction<O> {
463    /// Parse succeeded; return this value to the caller.
464    Done(O),
465    /// Need more bytes — call `fill_buf(reader, n)` then re-step.
466    NeedFill(usize),
467    /// Need to skip bytes — call `clear_and_skip(reader, n)` then re-step.
468    Skip(usize),
469    /// Parse failed permanently.
470    Failed(String),
471}
472
473/// Closure type passed to [`parse_loop_step`].
474pub(crate) type ParseFn<'a, O> =
475    dyn FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState> + 'a;
476
477/// Drives one iteration of the parse-loop algorithm. Pure (no I/O).
478pub(crate) fn parse_loop_step<O>(
479    buffer: &[u8],
480    offset: usize,
481    parsing_state: &mut Option<ParsingState>,
482    parse: &mut ParseFn<'_, O>,
483) -> LoopAction<O> {
484    match parse(buffer, offset, parsing_state.take()) {
485        Ok(o) => LoopAction::Done(o),
486        Err(es) => {
487            *parsing_state = es.state;
488            match es.err {
489                ParsingError::Need(n) => LoopAction::NeedFill(n),
490                ParsingError::ClearAndSkip(n) => LoopAction::Skip(n),
491                ParsingError::Failed(s) => LoopAction::Failed(s),
492            }
493        }
494    }
495}
496
497#[derive(Debug, Clone)]
498pub(crate) enum ParsingState {
499    TiffHeader(TiffHeader),
500    HeifExifSize(usize),
501    Cr3ExifSize(usize),
502}
503
504impl Display for ParsingState {
505    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
506        match self {
507            ParsingState::TiffHeader(h) => Display::fmt(&format!("ParsingState: {h:?})"), f),
508            ParsingState::HeifExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
509            ParsingState::Cr3ExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
510        }
511    }
512}
513
514// Modern replacement for the `Load` trait in loader.rs. Adds offset-aware
515// parsing and `ParsingState` threading for format-specific state machines.
516pub(crate) trait BufParser: Buf + Debug {
517    fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize>;
518
519    fn load_and_parse<R: Read, P, O>(
520        &mut self,
521        reader: &mut R,
522        skip_by_seek: SkipBySeekFn<R>,
523        mut parse: P,
524    ) -> Result<O, ParsedError>
525    where
526        P: FnMut(&[u8], Option<ParsingState>) -> Result<O, ParsingErrorState>,
527    {
528        self.load_and_parse_with_offset(
529            reader,
530            skip_by_seek,
531            |data, _, state| parse(data, state),
532            0,
533        )
534    }
535
536    #[tracing::instrument(skip_all)]
537    fn load_and_parse_with_offset<R: Read, P, O>(
538        &mut self,
539        reader: &mut R,
540        skip_by_seek: SkipBySeekFn<R>,
541        mut parse: P,
542        offset: usize,
543    ) -> Result<O, ParsedError>
544    where
545        P: FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState>,
546    {
547        if offset >= self.buffer().len() {
548            self.fill_buf(reader, MIN_GROW_SIZE)?;
549        }
550        let mut parsing_state: Option<ParsingState> = None;
551        loop {
552            match parse_loop_step(self.buffer(), offset, &mut parsing_state, &mut parse) {
553                LoopAction::Done(o) => return Ok(o),
554                LoopAction::NeedFill(needed) => {
555                    let to_read = max(needed, MIN_GROW_SIZE);
556                    let n = self.fill_buf(reader, to_read)?;
557                    if n == 0 {
558                        return Err(ParsedError::NoEnoughBytes);
559                    }
560                }
561                LoopAction::Skip(n) => {
562                    self.clear_and_skip(reader, skip_by_seek, n)?;
563                }
564                LoopAction::Failed(s) => return Err(ParsedError::Failed(s)),
565            }
566        }
567    }
568
569    #[tracing::instrument(skip(reader, skip_by_seek))]
570    fn clear_and_skip<R: Read>(
571        &mut self,
572        reader: &mut R,
573        skip_by_seek: SkipBySeekFn<R>,
574        n: usize,
575    ) -> Result<(), ParsedError> {
576        match clear_and_skip_decide(self.buffer().len(), n) {
577            SkipPlan::AdvanceOnly => {
578                self.set_position(self.position() + n);
579                Ok(())
580            }
581            SkipPlan::ClearAndSkip { extra: skip_n } => {
582                self.clear();
583                let done = (skip_by_seek)(
584                    reader,
585                    skip_n
586                        .try_into()
587                        .map_err(|_| ParsedError::Failed("skip too many bytes".into()))?,
588                )?;
589                if !done {
590                    let mut skipped = 0;
591                    while skipped < skip_n {
592                        let mut to_skip = skip_n - skipped;
593                        to_skip = min(to_skip, MAX_PARSE_BUF_SIZE);
594                        let n = self.fill_buf(reader, to_skip)?;
595                        skipped += n;
596                        if skipped <= skip_n {
597                            self.clear();
598                        } else {
599                            let remain = skipped - skip_n;
600                            self.set_position(self.buffer().len() - remain);
601                            break;
602                        }
603                    }
604                }
605
606                if self.buffer().is_empty() {
607                    self.fill_buf(reader, MIN_GROW_SIZE)?;
608                }
609                Ok(())
610            }
611        }
612    }
613}
614
615impl BufParser for MediaParser {
616    #[tracing::instrument(skip(self, reader), fields(buf_len=self.state.buffer().len()))]
617    fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize> {
618        if self.state.is_memory_mode() {
619            // Memory mode owns every byte it will ever have. A request for
620            // more is "the parser walked off the end of the input"; surface
621            // it the same way the streaming path surfaces a 0-byte read.
622            return Err(std::io::ErrorKind::UnexpectedEof.into());
623        }
624        check_fill_size(self.state.buf().len(), size)?;
625
626        // Do not pre-allocate `size` bytes: a crafted box header can declare a
627        // huge extended size (up to MAX_PARSE_BUF_SIZE) that far exceeds the actual
628        // stream length. reserve_exact would allocate that memory immediately
629        // even when the reader has only a few bytes left. read_to_end grows the
630        // buffer from the reader's actual size hint instead.
631        let n = reader.take(size as u64).read_to_end(self.state.buf_mut())?;
632        if n == 0 {
633            tracing::error!(buf_len = self.state.buf().len(), "fill_buf: EOF");
634            return Err(std::io::ErrorKind::UnexpectedEof.into());
635        }
636
637        tracing::debug!(
638            ?size,
639            ?n,
640            buf_len = self.state.buf().len(),
641            "fill_buf: read bytes"
642        );
643
644        Ok(n)
645    }
646}
647
648impl Buf for MediaParser {
649    fn buffer(&self) -> &[u8] {
650        self.state.buffer()
651    }
652
653    fn clear(&mut self) {
654        self.state.clear();
655    }
656
657    fn set_position(&mut self, pos: usize) {
658        self.state.set_position(pos);
659    }
660
661    fn position(&self) -> usize {
662        self.state.position()
663    }
664}
665
666/// A `MediaParser` can parse media info from a [`MediaSource`].
667///
668/// `MediaParser` manages inner parse buffers that can be shared between
669/// multiple parsing tasks, thus avoiding frequent memory allocations.
670///
671/// Therefore:
672///
673/// - Try to reuse a `MediaParser` instead of creating a new one every time
674///   you need it.
675///
676/// - `MediaSource` should be created directly from `Read`, not from `BufRead`.
677///
678/// ## Example
679///
680/// ```rust
681/// use nom_exif::*;
682/// use chrono::DateTime;
683///
684/// let mut parser = MediaParser::new();
685///
686/// // ------------------- Parse Exif Info
687/// let ms = MediaSource::open("./testdata/exif.heic").unwrap();
688/// assert_eq!(ms.kind(), MediaKind::Image);
689/// let mut iter = parser.parse_exif(ms).unwrap();
690///
691/// let entry = iter.next().unwrap();
692/// assert!(matches!(entry.tag(), nom_exif::TagOrCode::Tag(ExifTag::Make)));
693/// assert_eq!(entry.value().unwrap().as_str().unwrap(), "Apple");
694///
695/// // Convert `ExifIter` into an `Exif`. Clone it before converting, so that
696/// // we can start the iteration from the beginning.
697/// let exif: Exif = iter.clone().into();
698/// assert_eq!(exif.get(ExifTag::Make).unwrap().as_str().unwrap(), "Apple");
699///
700/// // ------------------- Parse Track Info
701/// let ms = MediaSource::open("./testdata/meta.mov").unwrap();
702/// assert_eq!(ms.kind(), MediaKind::Track);
703/// let info = parser.parse_track(ms).unwrap();
704///
705/// assert_eq!(info.get(TrackInfoTag::Make), Some(&"Apple".into()));
706/// assert_eq!(info.get(TrackInfoTag::Model), Some(&"iPhone X".into()));
707/// assert_eq!(info.get(TrackInfoTag::GpsIso6709), Some(&"+27.1281+100.2508+000.000/".into()));
708/// assert_eq!(info.gps_info().unwrap().latitude_ref, LatRef::North);
709/// assert_eq!(
710///     info.gps_info().unwrap().latitude,
711///     LatLng::new(URational::new(27, 1), URational::new(7, 1), URational::new(4116, 100)),
712/// );
713/// ```
714pub struct MediaParser {
715    state: BufferedParserState,
716}
717
718impl Debug for MediaParser {
719    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
720        f.debug_struct("MediaParser")
721            .field("state", &self.state)
722            .finish_non_exhaustive()
723    }
724}
725
726impl Default for MediaParser {
727    fn default() -> Self {
728        Self {
729            state: BufferedParserState::new(),
730        }
731    }
732}
733
734pub(crate) trait ShareBuf {
735    /// Take ownership of the parser's active buffer and return the full
736    /// allocation as `Bytes` plus the parser's `position` at share-time.
737    /// Caller is responsible for slicing: a parse-loop range `r` corresponds
738    /// to absolute range `(r.start + position)..(r.end + position)`.
739    fn share_buf(&mut self) -> (bytes::Bytes, usize);
740}
741
742impl ShareBuf for MediaParser {
743    fn share_buf(&mut self) -> (bytes::Bytes, usize) {
744        self.state.share_buf()
745    }
746}
747
748impl MediaParser {
749    pub fn new() -> Self {
750        Self::default()
751    }
752
753    /// Parse Exif metadata from an image source. Returns `Error::ExifNotFound`
754    /// if the source is a `Track` (use [`Self::parse_track`] instead).
755    ///
756    /// As of v3.3, this method also accepts memory-mode sources built via
757    /// [`MediaSource::from_memory`]. The deprecated [`Self::parse_exif_from_bytes`]
758    /// is now a thin adapter that delegates here.
759    ///
760    /// `MediaParser` reuses its internal parse buffer across calls, so prefer
761    /// reusing a single `MediaParser` over creating a new one per file. Drop
762    /// the returned [`ExifIter`] (or convert it into [`crate::Exif`]) before
763    /// the next `parse_*` call so the buffer can be reclaimed.
764    pub fn parse_exif<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<ExifIter> {
765        self.reset();
766        let res: crate::Result<ExifIter> = (|| {
767            if let Some(memory) = ms.memory.take() {
768                // Memory-mode: zero-copy share of caller-owned bytes.
769                self.state.set_memory(memory);
770                if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
771                    return Err(crate::Error::ExifNotFound);
772                }
773                crate::exif::parse_exif_iter(
774                    self,
775                    ms.mime.unwrap_image(),
776                    &mut ms.reader,
777                    ms.skip_by_seek,
778                )
779            } else {
780                // Streaming-mode: existing path verbatim.
781                self.acquire_buf();
782                self.buf_mut().append(&mut ms.buf);
783                // PNG-only EOF tolerance: a tEXt-only PNG can be smaller
784                // than HEADER_PARSE_BUF_SIZE (e.g. 117-byte text-only.png),
785                // so the mime-detection prefill consumes the whole reader
786                // and fill_buf returns UnexpectedEof. The bytes we need
787                // are already in the parse buffer — proceed. Other formats
788                // keep the strict-EOF contract.
789                let is_png = matches!(
790                    ms.mime,
791                    crate::file::MediaMime::Image(crate::file::MediaMimeImage::Png)
792                );
793                match self.fill_buf(&mut ms.reader, INIT_BUF_SIZE) {
794                    Ok(_) => {}
795                    Err(e)
796                        if is_png
797                            && !self.buffer().is_empty()
798                            && e.kind() == io::ErrorKind::UnexpectedEof => {}
799                    Err(e) => return Err(e.into()),
800                }
801                if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
802                    return Err(crate::Error::ExifNotFound);
803                }
804                crate::exif::parse_exif_iter(
805                    self,
806                    ms.mime.unwrap_image(),
807                    &mut ms.reader,
808                    ms.skip_by_seek,
809                )
810            }
811        })();
812        self.reset();
813        res
814    }
815
816    /// Parse track info from a video/audio source.
817    ///
818    /// Parse track info from a video/audio source.
819    ///
820    /// In v3.1, this also accepts JPEG images that carry an embedded
821    /// Pixel/Google Motion Photo trailer. As of v3.3, it also accepts
822    /// memory-mode sources built via [`MediaSource::from_memory`]; the
823    /// deprecated [`Self::parse_track_from_bytes`] is now a thin
824    /// adapter that delegates here.
825    pub fn parse_track<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<TrackInfo> {
826        self.reset();
827        let res: crate::Result<TrackInfo> = (|| {
828            if let Some(memory) = ms.memory.take() {
829                // Memory mode: zero-copy.
830                self.state.set_memory(memory);
831                let mime_track = match ms.mime {
832                    crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
833                    crate::file::MediaMime::Track(t) => t,
834                };
835                let out = self.load_and_parse(&mut ms.reader, ms.skip_by_seek, |data, _| {
836                    crate::video::parse_track_info(data, mime_track)
837                        .map_err(|e| ParsingErrorState::new(e, None))
838                })?;
839                Ok(out)
840            } else {
841                // Streaming mode: existing path verbatim.
842                self.acquire_buf();
843                self.buf_mut().append(&mut ms.buf);
844                self.fill_buf(&mut ms.reader, INIT_BUF_SIZE)?;
845                match ms.mime {
846                    crate::file::MediaMime::Image(crate::file::MediaMimeImage::Jpeg) => {
847                        self.parse_jpeg_motion_photo(&mut ms.reader)
848                    }
849                    crate::file::MediaMime::Image(_) => Err(crate::Error::TrackNotFound),
850                    crate::file::MediaMime::Track(mime_track) => {
851                        let skip = ms.skip_by_seek;
852                        Ok(self.load_and_parse(ms.reader.by_ref(), skip, |data, _| {
853                            crate::video::parse_track_info(data, mime_track)
854                                .map_err(|e| ParsingErrorState::new(e, None))
855                        })?)
856                    }
857                }
858            }
859        })();
860        self.reset();
861        res
862    }
863
864    /// Read a JPEG to EOF, locate a Pixel-style Motion Photo MP4 trailer,
865    /// and parse it as track metadata. Returns
866    /// [`crate::Error::TrackNotFound`] if no Motion Photo signal is
867    /// present in the JPEG's XMP.
868    fn parse_jpeg_motion_photo<R: Read>(&mut self, reader: &mut R) -> crate::Result<TrackInfo> {
869        // Drain the rest of the JPEG into the parse buffer so we can
870        // address the trailing MP4 by its byte offset from EOF.
871        reader.read_to_end(self.buf_mut())?;
872        let buf = self.buf_mut();
873        let Some(offset) = crate::jpeg::find_motion_photo_offset(buf) else {
874            return Err(crate::Error::TrackNotFound);
875        };
876        let trailer_start = (buf.len() as u64)
877            .checked_sub(offset)
878            .ok_or(crate::Error::TrackNotFound)? as usize;
879        let trailer = &buf[trailer_start..];
880
881        // The trailer can be MP4 / MOV / 3gp depending on the source device;
882        // dispatch by sniffing it as a fresh ISO BMFF input.
883        let trailer_mime =
884            crate::file::MediaMime::try_from(trailer).map_err(|_| crate::Error::TrackNotFound)?;
885        let mime_track = match trailer_mime {
886            crate::file::MediaMime::Track(t) => t,
887            crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
888        };
889        crate::video::parse_track_info(trailer, mime_track).map_err(|e| match e {
890            crate::error::ParsingError::Need(_) | crate::error::ParsingError::ClearAndSkip(_) => {
891                crate::Error::UnexpectedEof {
892                    context: "motion-photo trailer",
893                }
894            }
895            crate::error::ParsingError::Failed(msg) => crate::Error::Malformed {
896                kind: crate::error::MalformedKind::IsoBmffBox,
897                message: msg,
898            },
899        })
900    }
901
902    /// Parse Exif metadata from an in-memory byte payload built via
903    /// the deprecated [`MediaSource::<()>::from_bytes`].
904    ///
905    /// **Deprecated since v3.3.0**: use [`Self::parse_exif`] with
906    /// [`MediaSource::from_memory`] directly.
907    #[deprecated(
908        since = "3.3.0",
909        note = "Use `parse_exif` directly — it now accepts memory-mode \
910                sources built via `MediaSource::from_memory`."
911    )]
912    pub fn parse_exif_from_bytes(&mut self, ms: MediaSource<()>) -> crate::Result<ExifIter> {
913        self.parse_exif(ms.into_empty())
914    }
915
916    /// **Deprecated since v3.3.0**: use [`Self::parse_track`] with
917    /// [`MediaSource::from_memory`] directly.
918    #[deprecated(
919        since = "3.3.0",
920        note = "Use `parse_track` with `MediaSource::from_memory`."
921    )]
922    pub fn parse_track_from_bytes(&mut self, ms: MediaSource<()>) -> crate::Result<TrackInfo> {
923        self.parse_track(ms.into_empty())
924    }
925
926    /// Parse all metadata from an image source: EXIF (if any) and
927    /// format-specific extras (PNG `tEXt` chunks, etc.).
928    ///
929    /// Returns `Err(Error::ExifNotFound)` if neither EXIF nor any
930    /// format-specific metadata is found. Returns
931    /// `Err(Error::TrackNotFound)`-style errors on track inputs (use
932    /// `parse_track` instead).
933    ///
934    /// **Lazy form** — this method returns `ImageMetadata<ExifIter>`.
935    /// Convert to the eager `ImageMetadata<Exif>` via `.into()` if
936    /// desired.
937    pub fn parse_image_metadata<R: Read>(
938        &mut self,
939        mut ms: MediaSource<R>,
940    ) -> crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> {
941        self.reset();
942        let res: crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> = (|| {
943            // Reject track inputs early (parse_track is the right API).
944            let mime_img = match ms.mime {
945                crate::file::MediaMime::Image(img) => img,
946                crate::file::MediaMime::Track(_) => return Err(crate::Error::ExifNotFound),
947            };
948
949            // Memory-mode shortcut + buffer setup mirrors parse_exif.
950            if let Some(memory) = ms.memory.take() {
951                self.state.set_memory(memory);
952            } else {
953                self.acquire_buf();
954                self.buf_mut().append(&mut ms.buf);
955                // PNG-only EOF tolerance: a tEXt-only PNG can be smaller than
956                // HEADER_PARSE_BUF_SIZE (e.g. 117-byte text-only.png) so the
957                // mime-detection prefill consumes the whole reader and
958                // fill_buf returns UnexpectedEof. The bytes we need are
959                // already in the parse buffer — proceed. Other formats keep
960                // the strict-EOF contract.
961                let is_png = mime_img == crate::file::MediaMimeImage::Png;
962                match self.fill_buf(&mut ms.reader, INIT_BUF_SIZE) {
963                    Ok(_) => {}
964                    Err(e)
965                        if is_png
966                            && !self.buffer().is_empty()
967                            && e.kind() == io::ErrorKind::UnexpectedEof => {}
968                    Err(e) => return Err(e.into()),
969                }
970            }
971
972            if mime_img == crate::file::MediaMimeImage::Png {
973                let (exif, text_chunks) =
974                    crate::exif::parse_png_full(self, &mut ms.reader, ms.skip_by_seek)?;
975                let format = if text_chunks.is_empty() {
976                    None
977                } else {
978                    Some(crate::ImageFormatMetadata::Png(crate::PngTextChunks {
979                        entries: text_chunks,
980                    }))
981                };
982                if exif.is_none() && format.is_none() {
983                    return Err(crate::Error::ExifNotFound);
984                }
985                Ok(crate::ImageMetadata { exif, format })
986            } else {
987                // Non-PNG: existing parse_exif_iter path; format always None.
988                let iter =
989                    crate::exif::parse_exif_iter(self, mime_img, &mut ms.reader, ms.skip_by_seek)?;
990                Ok(crate::ImageMetadata {
991                    exif: Some(iter),
992                    format: None,
993                })
994            }
995        })(
996        );
997        self.reset();
998        res
999    }
1000
1001    fn reset(&mut self) {
1002        self.state.reset();
1003    }
1004
1005    fn buf_mut(&mut self) -> &mut Vec<u8> {
1006        self.state.buf_mut()
1007    }
1008
1009    fn acquire_buf(&mut self) {
1010        self.state.acquire_buf();
1011    }
1012}
1013
1014#[cfg(feature = "tokio")]
1015mod tokio_impl {
1016    use super::*;
1017    use crate::error::ParsingErrorState;
1018    use crate::parser_async::{AsyncBufParser, AsyncMediaSource};
1019    use tokio::io::{AsyncRead, AsyncReadExt};
1020
1021    impl AsyncBufParser for MediaParser {
1022        async fn fill_buf<R: AsyncRead + Unpin>(
1023            &mut self,
1024            reader: &mut R,
1025            size: usize,
1026        ) -> std::io::Result<usize> {
1027            if self.state.is_memory_mode() {
1028                // Memory mode owns every byte it will ever have. Surface
1029                // "walked off end of input" the same way the streaming path
1030                // surfaces a 0-byte read.
1031                return Err(std::io::ErrorKind::UnexpectedEof.into());
1032            }
1033            check_fill_size(self.state.buf().len(), size)?;
1034            // Same rationale as the sync version: do not pre-allocate `size` bytes.
1035            let n = reader
1036                .take(size as u64)
1037                .read_to_end(self.state.buf_mut())
1038                .await?;
1039            if n == 0 {
1040                return Err(std::io::ErrorKind::UnexpectedEof.into());
1041            }
1042            Ok(n)
1043        }
1044    }
1045
1046    impl MediaParser {
1047        /// Parse Exif metadata from an async image source. Returns
1048        /// `Error::ExifNotFound` if the source is a `Track`.
1049        ///
1050        /// As of v3.3, also accepts memory-mode sources built via
1051        /// [`AsyncMediaSource::from_memory`]; the memory branch shares
1052        /// caller-owned `Bytes` zero-copy through `state.set_memory`.
1053        pub async fn parse_exif_async<R: AsyncRead + Unpin + Send>(
1054            &mut self,
1055            mut ms: AsyncMediaSource<R>,
1056        ) -> crate::Result<ExifIter> {
1057            self.reset();
1058            let res: crate::Result<ExifIter> = async {
1059                if let Some(memory) = ms.memory.take() {
1060                    // Memory-mode: zero-copy share of caller-owned bytes.
1061                    self.state.set_memory(memory);
1062                    if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
1063                        return Err(crate::Error::ExifNotFound);
1064                    }
1065                    crate::exif::parse_exif_iter_async(
1066                        self,
1067                        ms.mime.unwrap_image(),
1068                        &mut ms.reader,
1069                        ms.skip_by_seek,
1070                    )
1071                    .await
1072                } else {
1073                    self.acquire_buf();
1074                    self.buf_mut().append(&mut ms.buf);
1075                    // PNG-only EOF tolerance mirrors the sync path: small
1076                    // tEXt-only PNGs (<HEADER_PARSE_BUF_SIZE) are fully
1077                    // consumed during mime detection, so fill_buf returns
1078                    // UnexpectedEof. The bytes are already in the parse
1079                    // buffer; proceed.
1080                    let is_png = matches!(
1081                        ms.mime,
1082                        crate::file::MediaMime::Image(crate::file::MediaMimeImage::Png)
1083                    );
1084                    match <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE)
1085                        .await
1086                    {
1087                        Ok(_) => {}
1088                        Err(e)
1089                            if is_png
1090                                && !self.buffer().is_empty()
1091                                && e.kind() == io::ErrorKind::UnexpectedEof => {}
1092                        Err(e) => return Err(e.into()),
1093                    }
1094                    if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
1095                        return Err(crate::Error::ExifNotFound);
1096                    }
1097                    crate::exif::parse_exif_iter_async(
1098                        self,
1099                        ms.mime.unwrap_image(),
1100                        &mut ms.reader,
1101                        ms.skip_by_seek,
1102                    )
1103                    .await
1104                }
1105            }
1106            .await;
1107            self.reset();
1108            res
1109        }
1110
1111        pub async fn parse_image_metadata_async<R: AsyncRead + Unpin + Send>(
1112            &mut self,
1113            mut ms: AsyncMediaSource<R>,
1114        ) -> crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> {
1115            self.reset();
1116            let res: crate::Result<crate::image_metadata::ImageMetadata<crate::ExifIter>> = async {
1117                let mime_img = match ms.mime {
1118                    crate::file::MediaMime::Image(img) => img,
1119                    crate::file::MediaMime::Track(_) => return Err(crate::Error::ExifNotFound),
1120                };
1121
1122                if let Some(memory) = ms.memory.take() {
1123                    self.state.set_memory(memory);
1124                } else {
1125                    self.acquire_buf();
1126                    self.buf_mut().append(&mut ms.buf);
1127                    // PNG-only EOF tolerance mirrors the sync path: small
1128                    // tEXt-only PNGs (<HEADER_PARSE_BUF_SIZE) are fully
1129                    // consumed during mime detection, so fill_buf returns
1130                    // UnexpectedEof; the bytes we need are already in the
1131                    // parse buffer.
1132                    let is_png = mime_img == crate::file::MediaMimeImage::Png;
1133                    match <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE)
1134                        .await
1135                    {
1136                        Ok(_) => {}
1137                        Err(e)
1138                            if is_png
1139                                && !self.buffer().is_empty()
1140                                && e.kind() == io::ErrorKind::UnexpectedEof => {}
1141                        Err(e) => return Err(e.into()),
1142                    }
1143                }
1144
1145                if mime_img == crate::file::MediaMimeImage::Png {
1146                    let (exif, text_chunks) =
1147                        crate::exif::parse_png_full_async(self, &mut ms.reader, ms.skip_by_seek)
1148                            .await?;
1149                    let format = if text_chunks.is_empty() {
1150                        None
1151                    } else {
1152                        Some(crate::ImageFormatMetadata::Png(crate::PngTextChunks {
1153                            entries: text_chunks,
1154                        }))
1155                    };
1156                    if exif.is_none() && format.is_none() {
1157                        return Err(crate::Error::ExifNotFound);
1158                    }
1159                    Ok(crate::image_metadata::ImageMetadata { exif, format })
1160                } else {
1161                    let iter = crate::exif::parse_exif_iter_async(
1162                        self,
1163                        mime_img,
1164                        &mut ms.reader,
1165                        ms.skip_by_seek,
1166                    )
1167                    .await?;
1168                    Ok(crate::image_metadata::ImageMetadata {
1169                        exif: Some(iter),
1170                        format: None,
1171                    })
1172                }
1173            }
1174            .await;
1175            self.reset();
1176            res
1177        }
1178
1179        /// Parse track info from an async video/audio source. Returns
1180        /// `Error::TrackNotFound` if the source is an `Image`.
1181        ///
1182        /// As of v3.3, also accepts memory-mode sources built via
1183        /// [`AsyncMediaSource::from_memory`].
1184        pub async fn parse_track_async<R: AsyncRead + Unpin + Send>(
1185            &mut self,
1186            mut ms: AsyncMediaSource<R>,
1187        ) -> crate::Result<TrackInfo> {
1188            self.reset();
1189            let res: crate::Result<TrackInfo> = async {
1190                let mime_track = match ms.mime {
1191                    crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
1192                    crate::file::MediaMime::Track(t) => t,
1193                };
1194                if let Some(memory) = ms.memory.take() {
1195                    self.state.set_memory(memory);
1196                } else {
1197                    self.acquire_buf();
1198                    self.buf_mut().append(&mut ms.buf);
1199                    <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE).await?;
1200                }
1201                let skip = ms.skip_by_seek;
1202                let out = <Self as AsyncBufParser>::load_and_parse(
1203                    self,
1204                    &mut ms.reader,
1205                    skip,
1206                    |data, _| {
1207                        crate::video::parse_track_info(data, mime_track)
1208                            .map_err(|e| ParsingErrorState::new(e, None))
1209                    },
1210                )
1211                .await?;
1212                Ok(out)
1213            }
1214            .await;
1215            self.reset();
1216            res
1217        }
1218    }
1219}
1220
1221#[cfg(test)]
1222mod tests {
1223    use std::sync::{LazyLock, Mutex, MutexGuard};
1224
1225    use super::*;
1226    use test_case::case;
1227
1228    enum TrackExif {
1229        Track,
1230        Exif,
1231        NoData,
1232        Invalid,
1233    }
1234    use TrackExif::*;
1235
1236    static PARSER: LazyLock<Mutex<MediaParser>> = LazyLock::new(|| Mutex::new(MediaParser::new()));
1237    fn parser() -> MutexGuard<'static, MediaParser> {
1238        PARSER.lock().unwrap()
1239    }
1240
1241    #[case("3gp_640x360.3gp", Track)]
1242    #[case("broken.jpg", Exif)]
1243    #[case("compatible-brands-fail.heic", Invalid)]
1244    #[case("compatible-brands-fail.mov", Invalid)]
1245    #[case("compatible-brands.heic", NoData)]
1246    #[case("compatible-brands.mov", NoData)]
1247    #[case("embedded-in-heic.mov", Track)]
1248    #[case("exif.heic", Exif)]
1249    #[case("exif.jpg", Exif)]
1250    #[case("exif-no-tz.jpg", Exif)]
1251    #[case("fujifilm_x_t1_01.raf.meta", Exif)]
1252    #[case("meta.mov", Track)]
1253    #[case("meta.mp4", Track)]
1254    #[case("mka.mka", Track)]
1255    #[case("mkv_640x360.mkv", Track)]
1256    #[case("exif-one-entry.heic", Exif)]
1257    #[case("no-exif.jpg", NoData)]
1258    #[case("tif.tif", Exif)]
1259    #[case("ramdisk.img", Invalid)]
1260    #[case("webm_480.webm", Track)]
1261    fn parse_media(path: &str, te: TrackExif) {
1262        let mut parser = parser();
1263        let ms = MediaSource::open(Path::new("testdata").join(path));
1264        match te {
1265            Track => {
1266                let ms = ms.unwrap();
1267                assert_eq!(ms.kind(), MediaKind::Track);
1268                let _: TrackInfo = parser.parse_track(ms).unwrap();
1269            }
1270            Exif => {
1271                let ms = ms.unwrap();
1272                assert_eq!(ms.kind(), MediaKind::Image);
1273                let mut it: ExifIter = parser.parse_exif(ms).unwrap();
1274                let _ = it.parse_gps();
1275
1276                if path.contains("one-entry") {
1277                    assert!(it.next().is_some());
1278                    assert!(it.next().is_none());
1279
1280                    let exif: crate::Exif = it.clone_rewound().into();
1281                    assert!(exif.get(ExifTag::Orientation).is_some());
1282                } else {
1283                    let _: crate::Exif = it.clone_rewound().into();
1284                }
1285            }
1286            NoData => {
1287                let ms = ms.unwrap();
1288                match ms.kind() {
1289                    MediaKind::Image => {
1290                        let res = parser.parse_exif(ms);
1291                        res.unwrap_err();
1292                    }
1293                    MediaKind::Track => {
1294                        let res = parser.parse_track(ms);
1295                        res.unwrap_err();
1296                    }
1297                }
1298            }
1299            Invalid => {
1300                ms.unwrap_err();
1301            }
1302        }
1303    }
1304
1305    use crate::testkit::open_sample;
1306    use crate::{EntryValue, Exif, ExifTag, TrackInfoTag};
1307    use chrono::{DateTime, FixedOffset, NaiveDateTime};
1308    use test_case::test_case;
1309
1310    #[test_case("exif.jpg", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2023-07-09T20:36:33+08:00", "%+").unwrap().into())]
1311    #[test_case("exif.heic", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2022-07-22T21:26:32+08:00", "%+").unwrap().into())]
1312    #[test_case("exif.jpg", ExifTag::DateTimeOriginal, 
1313        (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), 
1314            Some(FixedOffset::east_opt(8*3600).unwrap())).into())]
1315    #[test_case("exif-no-tz.jpg", ExifTag::DateTimeOriginal, 
1316        (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), None).into())]
1317    fn parse_exif(path: &str, tag: ExifTag, v: EntryValue) {
1318        let mut parser = parser();
1319
1320        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1321        assert_eq!(mf.kind(), MediaKind::Image);
1322        let iter: ExifIter = parser.parse_exif(mf).unwrap();
1323        let exif: Exif = iter.into();
1324        assert_eq!(exif.get(tag).unwrap(), &v);
1325
1326        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1327        assert_eq!(mf.kind(), MediaKind::Image);
1328        let iter: ExifIter = parser.parse_exif(mf).unwrap();
1329        let exif: Exif = iter.into();
1330        assert_eq!(exif.get(tag).unwrap(), &v);
1331    }
1332
1333    use crate::video::TrackInfoTag::*;
1334
1335    #[test_case("mkv_640x360.mkv", Width, 640_u32.into())]
1336    #[test_case("mkv_640x360.mkv", Height, 360_u32.into())]
1337    #[test_case("mkv_640x360.mkv", DurationMs, 13346_u64.into())]
1338    #[test_case("mkv_640x360.mkv", CreateDate, DateTime::parse_from_str("2008-08-08T08:08:08Z", "%+").unwrap().into())]
1339    #[test_case("meta.mov", Make, "Apple".into())]
1340    #[test_case("meta.mov", Model, "iPhone X".into())]
1341    #[test_case("meta.mov", GpsIso6709, "+27.1281+100.2508+000.000/".into())]
1342    #[test_case("meta.mov", CreateDate, DateTime::parse_from_str("2019-02-12T15:27:12+08:00", "%+").unwrap().into())]
1343    #[test_case("meta.mp4", Width, 1920_u32.into())]
1344    #[test_case("meta.mp4", Height, 1080_u32.into())]
1345    #[test_case("meta.mp4", DurationMs, 1063_u64.into())]
1346    #[test_case("meta.mp4", GpsIso6709, "+27.2939+112.6932/".into())]
1347    #[test_case("meta.mp4", CreateDate, DateTime::parse_from_str("2024-02-03T07:05:38Z", "%+").unwrap().into())]
1348    #[test_case("udta.auth.mp4", Author, "ReplayKitRecording".into(); "udta author")]
1349    #[test_case("auth.mov", Author, "ReplayKitRecording".into(); "mov author")]
1350    #[test_case("sony-a7-xavc.MP4", Width, 1920_u32.into())]
1351    #[test_case("sony-a7-xavc.MP4", Height, 1080_u32.into())]
1352    #[test_case("sony-a7-xavc.MP4", DurationMs, 1440_u64.into())]
1353    #[test_case("sony-a7-xavc.MP4", CreateDate, DateTime::parse_from_str("2026-04-26T09:25:15+00:00", "%+").unwrap().into())]
1354    fn parse_track_info(path: &str, tag: TrackInfoTag, v: EntryValue) {
1355        let mut parser = parser();
1356
1357        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1358        let info: TrackInfo = parser.parse_track(mf).unwrap();
1359        assert_eq!(info.get(tag).unwrap(), &v);
1360
1361        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1362        let info: TrackInfo = parser.parse_track(mf).unwrap();
1363        assert_eq!(info.get(tag).unwrap(), &v);
1364    }
1365
1366    #[test_case("crash_moov-trak")]
1367    #[test_case("crash_skip_large")]
1368    #[test_case("crash_add_large")]
1369    fn parse_track_crash(path: &str) {
1370        let mut parser = parser();
1371
1372        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1373        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1374
1375        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1376        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1377    }
1378
1379    // Regression: a crafted ISOBMFF file declares an extended 64-bit box size
1380    // just under MAX_PARSE_BUF_SIZE (~1 GB). Pre-fix, the unseekable parser called
1381    // reserve_exact() with that size before reading, allocating ~1 GB even when
1382    // the actual stream contained only a few KB. See commit 81f9e8a.
1383    #[test]
1384    fn parse_oom_large_box() {
1385        let mut parser = parser();
1386
1387        let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1388        let _: Result<ExifIter, _> = parser.parse_exif(mf);
1389
1390        let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1391        let _: Result<ExifIter, _> = parser.parse_exif(mf);
1392
1393        let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1394        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1395
1396        let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1397        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1398    }
1399
1400    #[test]
1401    fn media_kind_classifies_image_and_track() {
1402        let img = MediaSource::open("testdata/exif.jpg").unwrap();
1403        assert_eq!(img.kind(), MediaKind::Image);
1404
1405        let trk = MediaSource::open("testdata/meta.mov").unwrap();
1406        assert_eq!(trk.kind(), MediaKind::Track);
1407    }
1408
1409    #[test]
1410    fn media_source_open() {
1411        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1412        assert_eq!(ms.kind(), MediaKind::Image);
1413    }
1414
1415    #[test]
1416    fn parse_exif_returns_exif_iter() {
1417        let mut parser = parser();
1418        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1419        let _: ExifIter = parser.parse_exif(ms).unwrap();
1420    }
1421
1422    #[test]
1423    fn parse_track_returns_track_info() {
1424        let mut parser = parser();
1425        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1426        let _: TrackInfo = parser.parse_track(ms).unwrap();
1427    }
1428
1429    #[test]
1430    fn parse_exif_on_track_returns_exif_not_found_v3() {
1431        let mut parser = parser();
1432        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1433        let res = parser.parse_exif(ms);
1434        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1435    }
1436
1437    #[test]
1438    fn parse_track_on_image_returns_track_not_found_v3() {
1439        let mut parser = parser();
1440        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1441        let res = parser.parse_track(ms);
1442        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1443    }
1444
1445    #[cfg(feature = "tokio")]
1446    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1447    async fn media_parser_parse_exif_async() {
1448        use crate::parser_async::AsyncMediaSource;
1449        let mut parser = MediaParser::new();
1450        let ms = AsyncMediaSource::open("testdata/exif.jpg").await.unwrap();
1451        let _: ExifIter = parser.parse_exif_async(ms).await.unwrap();
1452    }
1453
1454    #[cfg(feature = "tokio")]
1455    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1456    async fn media_parser_parse_track_async() {
1457        use crate::parser_async::AsyncMediaSource;
1458        let mut parser = MediaParser::new();
1459        let ms = AsyncMediaSource::open("testdata/meta.mov").await.unwrap();
1460        let _: TrackInfo = parser.parse_track_async(ms).await.unwrap();
1461    }
1462
1463    #[test]
1464    fn parser_recycles_alloc_when_exif_iter_dropped() {
1465        let mut parser = MediaParser::new();
1466
1467        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1468        let iter = parser.parse_exif(ms).unwrap();
1469        let exif: crate::Exif = iter.into();
1470        drop(exif);
1471        let ptr_after_first = parser.state.cached_ptr_for_test();
1472
1473        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1474        let iter = parser.parse_exif(ms).unwrap();
1475        let _exif: crate::Exif = iter.into();
1476        let ptr_after_second = parser.state.cached_ptr_for_test();
1477
1478        assert!(
1479            ptr_after_first.is_some() && ptr_after_first == ptr_after_second,
1480            "expected recycled allocation, got {:?} -> {:?}",
1481            ptr_after_first,
1482            ptr_after_second
1483        );
1484    }
1485
1486    #[test]
1487    fn parser_new_does_no_upfront_allocation() {
1488        let parser = MediaParser::new();
1489        assert!(parser.state.cached_ptr_for_test().is_none());
1490        assert!(parser.state.buf_is_none_for_test());
1491    }
1492
1493    #[test]
1494    fn buffered_state_memory_mode_sets_and_reads() {
1495        let mut s = BufferedParserState::new();
1496        s.set_memory(bytes::Bytes::from_static(b"abcdefgh"));
1497        assert!(s.is_memory_mode());
1498        assert_eq!(s.buffer(), b"abcdefgh");
1499        s.set_position(3);
1500        assert_eq!(s.buffer(), b"defgh");
1501    }
1502
1503    #[test]
1504    fn buffered_state_share_buf_memory_mode_is_zero_copy() {
1505        let original = bytes::Bytes::from_static(b"the parser owns nothing here");
1506        let original_ptr = original.as_ptr();
1507        let mut s = BufferedParserState::new();
1508        s.set_memory(original);
1509        let (shared, position) = s.share_buf();
1510        assert_eq!(position, 0);
1511        assert_eq!(
1512            shared.as_ptr(),
1513            original_ptr,
1514            "memory share must be a Bytes::clone, not a Vec round-trip"
1515        );
1516        // After share_buf, the parser's memory slot is taken — leaving the state
1517        // ready for the next `reset()` cycle.
1518        assert!(!s.is_memory_mode());
1519    }
1520
1521    #[test]
1522    fn buffered_state_reset_clears_memory() {
1523        let mut s = BufferedParserState::new();
1524        s.set_memory(bytes::Bytes::from_static(b"x"));
1525        s.reset();
1526        assert!(!s.is_memory_mode());
1527        assert_eq!(s.position, 0);
1528    }
1529
1530    #[test]
1531    fn buffered_state_acquire_buf_skips_in_memory_mode() {
1532        let mut s = BufferedParserState::new();
1533        s.set_memory(bytes::Bytes::from_static(b"data"));
1534        s.acquire_buf();
1535        // No streaming buf was allocated.
1536        assert!(s.buf.is_none());
1537        // Memory still readable.
1538        assert_eq!(s.buffer(), b"data");
1539    }
1540
1541    #[test]
1542    fn media_source_from_memory_image_jpg() {
1543        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1544        let ms = MediaSource::from_memory(raw).unwrap();
1545        assert_eq!(ms.kind(), MediaKind::Image);
1546        assert!(ms.memory.is_some());
1547    }
1548
1549    #[test]
1550    fn media_source_from_memory_track_mov() {
1551        let raw = std::fs::read("testdata/meta.mov").unwrap();
1552        let ms = MediaSource::from_memory(raw).unwrap();
1553        assert_eq!(ms.kind(), MediaKind::Track);
1554    }
1555
1556    #[test]
1557    fn media_source_from_memory_static_slice() {
1558        let raw: &'static [u8] = include_bytes!("../testdata/exif.jpg");
1559        let ms = MediaSource::from_memory(raw).unwrap();
1560        assert_eq!(ms.kind(), MediaKind::Image);
1561    }
1562
1563    #[test]
1564    fn media_source_from_memory_rejects_too_short() {
1565        let raw = vec![0u8; 4];
1566        let res = MediaSource::from_memory(raw);
1567        assert!(res.is_err());
1568    }
1569
1570    #[test]
1571    fn media_source_from_memory_rejects_unknown_mime() {
1572        let raw = vec![0xAAu8; 256];
1573        let res = MediaSource::from_memory(raw);
1574        assert!(res.is_err());
1575    }
1576
1577    #[test]
1578    fn parse_exif_unified_from_memory_jpg() {
1579        let mut parser = MediaParser::new();
1580        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1581        let ms = MediaSource::from_memory(raw).unwrap();
1582        let iter = parser.parse_exif(ms).unwrap();
1583        let exif: crate::Exif = iter.into();
1584        assert!(exif.get(crate::ExifTag::Make).is_some());
1585    }
1586
1587    #[test]
1588    fn parse_exif_unified_from_memory_heic() {
1589        let mut parser = MediaParser::new();
1590        let raw = std::fs::read("testdata/exif.heic").unwrap();
1591        let ms = MediaSource::from_memory(raw).unwrap();
1592        let iter = parser.parse_exif(ms).unwrap();
1593        let exif: crate::Exif = iter.into();
1594        assert_eq!(
1595            exif.get(crate::ExifTag::Make).and_then(|v| v.as_str()),
1596            Some("Apple")
1597        );
1598    }
1599
1600    #[test]
1601    fn parse_exif_unified_from_memory_zero_copy_preserved() {
1602        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1603        let bytes = bytes::Bytes::from(raw);
1604        let _original_ptr = bytes.as_ptr();
1605
1606        let mut parser = MediaParser::new();
1607        let ms = MediaSource::from_memory(bytes).unwrap();
1608        let iter = parser.parse_exif(ms).unwrap();
1609
1610        // Memory mode must not poison the recycle cache — same invariant
1611        // the old parse_exif_from_bytes route asserts.
1612        assert!(
1613            parser.state.cached_ptr_for_test().is_none(),
1614            "memory mode must not write to the streaming-buf recycle cache"
1615        );
1616        drop(iter);
1617    }
1618
1619    #[test]
1620    fn parse_exif_unified_on_track_returns_exif_not_found() {
1621        let mut parser = MediaParser::new();
1622        let raw = std::fs::read("testdata/meta.mov").unwrap();
1623        let ms = MediaSource::from_memory(raw).unwrap();
1624        let res = parser.parse_exif(ms);
1625        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1626    }
1627
1628    #[test]
1629    fn parse_exif_unified_on_truncated_returns_io_error() {
1630        let mut raw = std::fs::read("testdata/exif.jpg").unwrap();
1631        raw.truncate(200);
1632        let mut parser = MediaParser::new();
1633        let ms = MediaSource::from_memory(raw).unwrap();
1634        let res = parser.parse_exif(ms);
1635        assert!(
1636            res.is_err(),
1637            "expected error on truncated bytes, got {:?}",
1638            res
1639        );
1640    }
1641
1642    #[test]
1643    fn parse_exif_streaming_tiny_non_png_keeps_strict_eof() {
1644        // The PNG EOF-tolerance branch is scoped to PNG. A tiny non-PNG file
1645        // (here: the 36-byte compatible-brands.heic fixture) consumes its
1646        // entire reader during mime-detection prefill, so the subsequent
1647        // fill_buf hits UnexpectedEof. parse_exif must surface an error
1648        // (any error) — never Ok — for non-PNG inputs.
1649        let mut parser = MediaParser::new();
1650        let ms = MediaSource::open("testdata/compatible-brands.heic").unwrap();
1651        let res = parser.parse_exif(ms);
1652        assert!(res.is_err(), "expected Err for tiny HEIC, got {:?}", res);
1653    }
1654
1655    #[test]
1656    #[allow(deprecated)]
1657    fn media_source_from_bytes_image_jpg() {
1658        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1659        let ms = MediaSource::from_bytes(raw).unwrap();
1660        assert_eq!(ms.kind(), MediaKind::Image);
1661        assert!(ms.memory.is_some());
1662    }
1663
1664    #[test]
1665    #[allow(deprecated)]
1666    fn media_source_from_bytes_track_mov() {
1667        let raw = std::fs::read("testdata/meta.mov").unwrap();
1668        let ms = MediaSource::from_bytes(raw).unwrap();
1669        assert_eq!(ms.kind(), MediaKind::Track);
1670    }
1671
1672    #[test]
1673    #[allow(deprecated)]
1674    fn media_source_from_bytes_static_slice() {
1675        // &'static [u8] should work via Into<Bytes> because the file is read
1676        // into a Vec at compile-time-friendly size; here we use include_bytes.
1677        let raw: &'static [u8] = include_bytes!("../testdata/exif.jpg");
1678        let ms = MediaSource::from_bytes(raw).unwrap();
1679        assert_eq!(ms.kind(), MediaKind::Image);
1680    }
1681
1682    #[test]
1683    #[allow(deprecated)]
1684    fn media_source_from_bytes_rejects_too_short() {
1685        // Below the smallest mime signature length: should fail mime detection.
1686        let raw = vec![0u8; 4];
1687        let res = MediaSource::from_bytes(raw);
1688        assert!(res.is_err(), "expected mime-detection error");
1689    }
1690
1691    #[test]
1692    #[allow(deprecated)]
1693    fn media_source_from_bytes_rejects_unknown_mime() {
1694        // Random bytes long enough to trigger detection but not match any
1695        // signature.
1696        let raw = vec![0xAAu8; 256];
1697        let res = MediaSource::from_bytes(raw);
1698        assert!(
1699            res.is_err(),
1700            "expected mime-detection error for unknown bytes"
1701        );
1702    }
1703
1704    #[test]
1705    fn p4_5_baseline_exif_jpg_full_dump() {
1706        // Lock down the post-refactor invariant: parsing testdata/exif.jpg through
1707        // the public API must yield the same set of (ifd, tag, value) triples
1708        // before and after P4.5. We capture them as a sorted, formatted string so
1709        // the assertion is a single literal comparison.
1710        let mut parser = MediaParser::new();
1711        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1712        let iter: ExifIter = parser.parse_exif(ms).unwrap();
1713
1714        let mut entries: Vec<String> = iter
1715            .map(|e| {
1716                let tag_name = match e.tag() {
1717                    crate::TagOrCode::Tag(t) => format!("{t}"),
1718                    crate::TagOrCode::Unknown(c) => format!("0x{c:04x}"),
1719                };
1720                let value_str = e
1721                    .value()
1722                    .map(|v| format!("{v}"))
1723                    .unwrap_or_else(|| "<err>".into());
1724                format!("{}.{}={:?}", e.ifd(), tag_name, value_str)
1725            })
1726            .collect();
1727        entries.sort();
1728        let snapshot = entries.join("\n");
1729
1730        // Sanity: should produce non-trivial content. Exact content is checked by
1731        // the existing parse_media tests; this one guards against accidental
1732        // re-ordering / dedup changes during the refactor.
1733        assert!(
1734            entries.len() > 5,
1735            "expected >5 entries, got {}",
1736            entries.len()
1737        );
1738        assert!(snapshot.contains("Make"), "expected Make tag in snapshot");
1739    }
1740
1741    #[test]
1742    #[allow(deprecated)]
1743    fn parse_exif_from_bytes_jpg_basic() {
1744        let mut parser = MediaParser::new();
1745        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1746        let ms = MediaSource::from_bytes(raw).unwrap();
1747        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1748        let exif: crate::Exif = iter.into();
1749        assert!(exif.get(crate::ExifTag::Make).is_some());
1750    }
1751
1752    #[test]
1753    #[allow(deprecated)]
1754    fn parse_exif_from_bytes_heic_basic() {
1755        let mut parser = MediaParser::new();
1756        let raw = std::fs::read("testdata/exif.heic").unwrap();
1757        let ms = MediaSource::from_bytes(raw).unwrap();
1758        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1759        let exif: crate::Exif = iter.into();
1760        assert_eq!(
1761            exif.get(crate::ExifTag::Make).and_then(|v| v.as_str()),
1762            Some("Apple")
1763        );
1764    }
1765
1766    #[test]
1767    #[allow(deprecated)]
1768    fn parse_exif_from_bytes_zero_copy_shared_bytes() {
1769        // Build a Bytes whose pointer we can compare. The ExifIter's underlying
1770        // share must point to the same allocation — proving Bytes::clone path.
1771        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1772        let bytes = bytes::Bytes::from(raw);
1773        let original_ptr = bytes.as_ptr();
1774
1775        let mut parser = MediaParser::new();
1776        let ms = MediaSource::from_bytes(bytes).unwrap();
1777        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1778
1779        // The cached pointer in parser state should be None in memory mode
1780        // (memory mode does not write to cache — the user owns the alloc).
1781        assert!(
1782            parser.state.cached_ptr_for_test().is_none(),
1783            "memory mode must not poison the recycle cache"
1784        );
1785
1786        // Drop the iter and confirm parser is clean for the next call.
1787        drop(iter);
1788
1789        // Build again; pointer identity proves we did not duplicate the alloc
1790        // anywhere along the parse path.
1791        let bytes2 = bytes::Bytes::from(std::fs::read("testdata/exif.jpg").unwrap());
1792        let ms2 = MediaSource::from_bytes(bytes2.clone()).unwrap();
1793        let _iter2 = parser.parse_exif_from_bytes(ms2).unwrap();
1794        // (We cannot assert pointer-equality across distinct user Bytes; the
1795        // assertion above on the first parse is the load-bearing one.)
1796        let _ = original_ptr; // explicit: original_ptr is the assertion target.
1797    }
1798
1799    #[test]
1800    #[allow(deprecated)]
1801    fn parse_exif_from_bytes_on_track_returns_exif_not_found() {
1802        let mut parser = MediaParser::new();
1803        let raw = std::fs::read("testdata/meta.mov").unwrap();
1804        let ms = MediaSource::from_bytes(raw).unwrap();
1805        let res = parser.parse_exif_from_bytes(ms);
1806        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1807    }
1808
1809    #[test]
1810    #[allow(deprecated)]
1811    fn parse_exif_from_bytes_on_truncated_returns_io_error() {
1812        // Truncate exif.jpg to just enough for mime detection but too short
1813        // for the full EXIF block. Memory-mode fill_buf must surface
1814        // UnexpectedEof when the parser walks off the end.
1815        let mut raw = std::fs::read("testdata/exif.jpg").unwrap();
1816        raw.truncate(200);
1817        let mut parser = MediaParser::new();
1818        let ms = MediaSource::from_bytes(raw).unwrap();
1819        let res = parser.parse_exif_from_bytes(ms);
1820        assert!(
1821            res.is_err(),
1822            "expected error on truncated bytes, got {:?}",
1823            res
1824        );
1825    }
1826
1827    #[test]
1828    #[allow(deprecated)]
1829    fn parse_track_from_bytes_mov_basic() {
1830        let mut parser = MediaParser::new();
1831        let raw = std::fs::read("testdata/meta.mov").unwrap();
1832        let ms = MediaSource::from_bytes(raw).unwrap();
1833        let info = parser.parse_track_from_bytes(ms).unwrap();
1834        assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
1835        assert_eq!(
1836            info.get(crate::TrackInfoTag::Model),
1837            Some(&"iPhone X".into())
1838        );
1839    }
1840
1841    #[test]
1842    #[allow(deprecated)]
1843    fn parse_track_from_bytes_mp4_basic() {
1844        let mut parser = MediaParser::new();
1845        let raw = std::fs::read("testdata/meta.mp4").unwrap();
1846        let ms = MediaSource::from_bytes(raw).unwrap();
1847        let info = parser.parse_track_from_bytes(ms).unwrap();
1848        assert!(info.get(crate::TrackInfoTag::CreateDate).is_some());
1849    }
1850
1851    #[test]
1852    #[allow(deprecated)]
1853    fn parse_track_from_bytes_mkv_basic() {
1854        let mut parser = MediaParser::new();
1855        let raw = std::fs::read("testdata/mkv_640x360.mkv").unwrap();
1856        let ms = MediaSource::from_bytes(raw).unwrap();
1857        let info = parser.parse_track_from_bytes(ms).unwrap();
1858        assert_eq!(
1859            info.get(crate::TrackInfoTag::Width),
1860            Some(&(640_u32.into()))
1861        );
1862    }
1863
1864    #[test]
1865    #[allow(deprecated)]
1866    fn parse_track_from_bytes_on_image_returns_track_not_found() {
1867        let mut parser = MediaParser::new();
1868        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1869        let ms = MediaSource::from_bytes(raw).unwrap();
1870        let res = parser.parse_track_from_bytes(ms);
1871        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1872    }
1873
1874    #[test]
1875    fn parse_track_unified_from_memory_mov() {
1876        let mut parser = MediaParser::new();
1877        let raw = std::fs::read("testdata/meta.mov").unwrap();
1878        let ms = MediaSource::from_memory(raw).unwrap();
1879        let info = parser.parse_track(ms).unwrap();
1880        assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
1881    }
1882
1883    #[test]
1884    fn parse_track_unified_from_memory_mp4() {
1885        let mut parser = MediaParser::new();
1886        let raw = std::fs::read("testdata/meta.mp4").unwrap();
1887        let ms = MediaSource::from_memory(raw).unwrap();
1888        let info = parser.parse_track(ms).unwrap();
1889        assert!(info.get(crate::TrackInfoTag::CreateDate).is_some());
1890    }
1891
1892    #[test]
1893    fn parse_track_unified_on_image_returns_track_not_found() {
1894        let mut parser = MediaParser::new();
1895        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1896        let ms = MediaSource::from_memory(raw).unwrap();
1897        let res = parser.parse_track(ms);
1898        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1899    }
1900
1901    #[test]
1902    fn parse_image_metadata_jpeg_returns_exif_only() {
1903        let mut parser = MediaParser::new();
1904        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1905        let img = parser.parse_image_metadata(ms).unwrap();
1906        assert!(img.exif.is_some());
1907        assert!(img.format.is_none());
1908    }
1909
1910    #[test]
1911    fn parse_image_metadata_jpeg_from_memory() {
1912        let mut parser = MediaParser::new();
1913        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1914        let ms = MediaSource::from_memory(raw).unwrap();
1915        let img = parser.parse_image_metadata(ms).unwrap();
1916        assert!(img.exif.is_some());
1917        assert!(img.format.is_none());
1918    }
1919
1920    #[test]
1921    fn parse_image_metadata_on_track_returns_exif_not_found() {
1922        let mut parser = MediaParser::new();
1923        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1924        let res = parser.parse_image_metadata(ms);
1925        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1926    }
1927
1928    #[cfg(feature = "tokio")]
1929    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1930    async fn parse_image_metadata_async_jpeg() {
1931        use crate::parser_async::AsyncMediaSource;
1932        let mut parser = MediaParser::new();
1933        let ms = AsyncMediaSource::open("testdata/exif.jpg").await.unwrap();
1934        let img = parser.parse_image_metadata_async(ms).await.unwrap();
1935        assert!(img.exif.is_some());
1936        assert!(img.format.is_none());
1937    }
1938
1939    #[cfg(feature = "tokio")]
1940    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1941    async fn async_media_source_from_memory_image_jpg() {
1942        use crate::parser_async::AsyncMediaSource;
1943        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1944        let ms = AsyncMediaSource::from_memory(raw).unwrap();
1945        assert_eq!(ms.kind(), MediaKind::Image);
1946        assert!(ms.memory.is_some());
1947    }
1948
1949    #[cfg(feature = "tokio")]
1950    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1951    async fn async_media_source_from_memory_track_mov() {
1952        use crate::parser_async::AsyncMediaSource;
1953        let raw = std::fs::read("testdata/meta.mov").unwrap();
1954        let ms = AsyncMediaSource::from_memory(raw).unwrap();
1955        assert_eq!(ms.kind(), MediaKind::Track);
1956    }
1957
1958    #[cfg(feature = "tokio")]
1959    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1960    async fn async_media_source_from_memory_rejects_unknown_mime() {
1961        use crate::parser_async::AsyncMediaSource;
1962        let raw = vec![0xAAu8; 256];
1963        let res = AsyncMediaSource::from_memory(raw);
1964        assert!(res.is_err());
1965    }
1966
1967    #[cfg(feature = "tokio")]
1968    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1969    async fn parse_exif_async_from_memory_jpg() {
1970        use crate::parser_async::AsyncMediaSource;
1971        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1972        let mut parser = MediaParser::new();
1973        let ms = AsyncMediaSource::from_memory(raw).unwrap();
1974        let iter = parser.parse_exif_async(ms).await.unwrap();
1975        let exif: crate::Exif = iter.into();
1976        assert!(exif.get(crate::ExifTag::Make).is_some());
1977    }
1978
1979    #[cfg(feature = "tokio")]
1980    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1981    async fn parse_exif_async_from_memory_zero_copy_preserved() {
1982        use crate::parser_async::AsyncMediaSource;
1983        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1984        let bytes = bytes::Bytes::from(raw);
1985        let mut parser = MediaParser::new();
1986        let ms = AsyncMediaSource::from_memory(bytes).unwrap();
1987        let iter = parser.parse_exif_async(ms).await.unwrap();
1988        // Memory mode must not poison the recycle cache — same invariant
1989        // as the sync route asserts.
1990        assert!(
1991            parser.state.cached_ptr_for_test().is_none(),
1992            "async memory mode must not write to the streaming-buf recycle cache"
1993        );
1994        drop(iter);
1995    }
1996
1997    #[cfg(feature = "tokio")]
1998    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1999    async fn parse_track_async_from_memory_mov() {
2000        use crate::parser_async::AsyncMediaSource;
2001        let raw = std::fs::read("testdata/meta.mov").unwrap();
2002        let mut parser = MediaParser::new();
2003        let ms = AsyncMediaSource::from_memory(raw).unwrap();
2004        let info = parser.parse_track_async(ms).await.unwrap();
2005        assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
2006    }
2007
2008    #[cfg(feature = "tokio")]
2009    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2010    async fn parse_image_metadata_async_from_memory_png() {
2011        use crate::parser_async::AsyncMediaSource;
2012        let raw = std::fs::read("testdata/exif.png").unwrap();
2013        let mut parser = MediaParser::new();
2014        let ms = AsyncMediaSource::from_memory(raw).unwrap();
2015        let img = parser.parse_image_metadata_async(ms).await.unwrap();
2016        assert!(img.exif.is_some());
2017        assert!(img.format.is_some());
2018    }
2019
2020    #[cfg(feature = "tokio")]
2021    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2022    async fn parse_image_metadata_async_from_memory_text_only_png() {
2023        // Memory route: bypasses fill_buf entirely, just verifies the
2024        // memory-mode path returns format-only metadata for a PNG with
2025        // no EXIF.
2026        use crate::parser_async::AsyncMediaSource;
2027        let raw = std::fs::read("testdata/text-only.png").unwrap();
2028        let mut parser = MediaParser::new();
2029        let ms = AsyncMediaSource::from_memory(raw).unwrap();
2030        let img = parser.parse_image_metadata_async(ms).await.unwrap();
2031        assert!(img.exif.is_none());
2032        assert!(img.format.is_some());
2033    }
2034
2035    // Streaming-path coverage for the PNG-scoped EOF tolerance. The
2036    // 117-byte text-only.png is fully consumed during mime detection
2037    // (HEADER_PARSE_BUF_SIZE = 128), so the parse-time fill_buf hits
2038    // UnexpectedEof. The PNG-scoped tolerance must let the bytes already
2039    // in the parse buffer drive the parse to completion. These tests
2040    // would have caught the missed-async-tolerance bug the previous
2041    // memory-mode tests did not.
2042
2043    #[test]
2044    fn parse_exif_streaming_text_only_png_returns_exif_not_found() {
2045        // text-only.png has no EXIF — the contract is ExifNotFound, not
2046        // UnexpectedEof. Pre-EOF-tolerance, this would surface
2047        // UnexpectedEof because mime detection consumed all 117 bytes.
2048        let mut parser = MediaParser::new();
2049        let ms = MediaSource::open("testdata/text-only.png").unwrap();
2050        let res = parser.parse_exif(ms);
2051        assert!(
2052            matches!(res, Err(crate::Error::ExifNotFound)),
2053            "expected ExifNotFound for tEXt-only PNG, got {:?}",
2054            res
2055        );
2056    }
2057
2058    #[test]
2059    fn parse_image_metadata_streaming_text_only_png() {
2060        let mut parser = MediaParser::new();
2061        let ms = MediaSource::open("testdata/text-only.png").unwrap();
2062        let img = parser.parse_image_metadata(ms).unwrap();
2063        assert!(img.exif.is_none());
2064        assert!(img.format.is_some());
2065    }
2066
2067    #[cfg(feature = "tokio")]
2068    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2069    async fn parse_exif_async_streaming_text_only_png_returns_exif_not_found() {
2070        use crate::parser_async::AsyncMediaSource;
2071        let mut parser = MediaParser::new();
2072        let f = tokio::fs::File::open("testdata/text-only.png")
2073            .await
2074            .unwrap();
2075        let ms = AsyncMediaSource::seekable(f).await.unwrap();
2076        let res = parser.parse_exif_async(ms).await;
2077        assert!(
2078            matches!(res, Err(crate::Error::ExifNotFound)),
2079            "expected ExifNotFound for tEXt-only PNG via async streaming, got {:?}",
2080            res
2081        );
2082    }
2083
2084    #[cfg(feature = "tokio")]
2085    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
2086    async fn parse_image_metadata_async_streaming_text_only_png() {
2087        use crate::parser_async::AsyncMediaSource;
2088        let mut parser = MediaParser::new();
2089        let f = tokio::fs::File::open("testdata/text-only.png")
2090            .await
2091            .unwrap();
2092        let ms = AsyncMediaSource::seekable(f).await.unwrap();
2093        let img = parser.parse_image_metadata_async(ms).await.unwrap();
2094        assert!(img.exif.is_none());
2095        assert!(img.format.is_some());
2096    }
2097}