Skip to main content

nom_exif/
parser.rs

1use std::{
2    cmp::{max, min},
3    fmt::{Debug, Display},
4    fs::File,
5    io::{self, Read, Seek},
6    path::Path,
7};
8
9use crate::{
10    error::{ParsedError, ParsingError, ParsingErrorState},
11    exif::TiffHeader,
12    file::MediaMime,
13    ExifIter, TrackInfo,
14};
15
16/// A function that tries to skip `n` bytes of `reader` by seeking. Returns
17/// `Ok(true)` on success, `Ok(false)` if the reader does not support seek
18/// (so the caller should fall back to reading-and-discarding), or
19/// `Err(io::Error)` if seek itself failed (e.g. truncated file handle).
20///
21/// This is captured at construction time by `MediaSource::seekable` /
22/// `unseekable`, replacing the v2 `S: Skip<R>` phantom parameter with a
23/// runtime fn pointer.
24pub(crate) type SkipBySeekFn<R> = fn(&mut R, u64) -> io::Result<bool>;
25
26/// `MediaSource` represents a media data source that can be parsed by
27/// [`MediaParser`].
28///
29/// - Use [`MediaSource::open`] to create a MediaSource from a file path.
30///
31/// - Use [`MediaSource::from_bytes`] for zero-copy in-memory input
32///   (`Vec<u8>`, `&'static [u8]`, [`bytes::Bytes`], …). Pair with
33///   [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`].
34///
35/// - In other cases:
36///
37///   - Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
38///     (an already-open `File` goes here).
39///
40///   - Use [`MediaSource::unseekable`] to create a MediaSource from a
41///     reader that only impl `Read`
42///
43/// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
44/// since the former is more efficient when the parser needs to skip a large number of bytes.
45///
46/// Passing in a `BufRead` should be avoided because [`MediaParser`] comes with
47/// its own buffer management and the buffers can be shared between multiple
48/// parsing tasks, thus avoiding frequent memory allocations.
49pub struct MediaSource<R> {
50    pub(crate) reader: R,
51    pub(crate) buf: Vec<u8>,
52    pub(crate) mime: MediaMime,
53    pub(crate) skip_by_seek: SkipBySeekFn<R>,
54    /// P7: zero-copy memory-mode payload. `Some` only when the source was
55    /// built via [`MediaSource::<()>::from_bytes`]; `reader`, `buf`, and
56    /// `skip_by_seek` are placeholders (and never consulted) in that mode.
57    pub(crate) memory: Option<bytes::Bytes>,
58}
59
60/// Top-level classification of a media source.
61///
62/// `Image` files carry EXIF metadata (parse with `MediaParser::parse_exif`);
63/// `Track` files are time-based containers — video, audio, or both — and
64/// carry track-info metadata (parse with `MediaParser::parse_track`). Pure
65/// audio containers like `.mka` are classified as `Track`.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum MediaKind {
68    Image,
69    Track,
70}
71
72impl<R> Debug for MediaSource<R> {
73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74        f.debug_struct("MediaSource")
75            .field("mime", &self.mime)
76            .finish_non_exhaustive()
77    }
78}
79
80// Should be enough for parsing header
81const HEADER_PARSE_BUF_SIZE: usize = 128;
82
83impl<R> MediaSource<R> {
84    /// Top-level classification of this media source.
85    pub fn kind(&self) -> MediaKind {
86        match self.mime {
87            MediaMime::Image(_) => MediaKind::Image,
88            MediaMime::Track(_) => MediaKind::Track,
89        }
90    }
91}
92
93impl<R: Read> MediaSource<R> {
94    fn build(mut reader: R, skip_by_seek: SkipBySeekFn<R>) -> crate::Result<Self> {
95        let mut buf = Vec::with_capacity(HEADER_PARSE_BUF_SIZE);
96        reader
97            .by_ref()
98            .take(HEADER_PARSE_BUF_SIZE as u64)
99            .read_to_end(&mut buf)?;
100        let mime: MediaMime = buf.as_slice().try_into()?;
101        Ok(Self {
102            reader,
103            buf,
104            mime,
105            skip_by_seek,
106            memory: None,
107        })
108    }
109
110    /// Use [`MediaSource::unseekable`] to create a MediaSource from a
111    /// reader that only impl `Read`
112    ///
113    /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
114    /// since the former is more efficient when the parser needs to skip a large number of bytes.
115    pub fn unseekable(reader: R) -> crate::Result<Self> {
116        Self::build(reader, |_, _| Ok(false))
117    }
118}
119
120impl<R: Read + Seek> MediaSource<R> {
121    /// Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
122    ///
123    /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
124    /// since the former is more efficient when the parser needs to skip a large number of bytes.
125    pub fn seekable(reader: R) -> crate::Result<Self> {
126        Self::build(reader, |r, n| {
127            let signed: i64 = n
128                .try_into()
129                .map_err(|_| io::Error::from(io::ErrorKind::InvalidInput))?;
130            r.seek_relative(signed)?;
131            Ok(true)
132        })
133    }
134}
135
136impl MediaSource<File> {
137    /// Open a file at `path` and parse its header to detect the media format.
138    ///
139    /// This is the v3-preferred entry point for the common case of "I have a
140    /// path on disk". For an already-open `File` use [`Self::seekable`].
141    pub fn open<P: AsRef<Path>>(path: P) -> crate::Result<Self> {
142        Self::seekable(File::open(path)?)
143    }
144}
145
146impl MediaSource<()> {
147    /// Build a [`MediaSource`] from an in-memory byte payload.
148    ///
149    /// Accepts any type convertible into [`bytes::Bytes`] — `Bytes`,
150    /// `Vec<u8>`, `&'static [u8]`, [`bytes::Bytes::from_owner`] outputs, and
151    /// HTTP-stack body types that implement `Into<Bytes>` directly.
152    ///
153    /// The header (first up to 128 bytes) is sniffed for media kind, the
154    /// same way [`MediaSource::open`] does it for files. The full payload is
155    /// stored zero-copy: subsequent parsing through
156    /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
157    /// shares this `Bytes` directly with the returned `ExifIter` / sub-IFDs
158    /// via reference counting.
159    ///
160    /// The returned source is parsed by the dedicated
161    /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
162    /// methods. The streaming `parse_exif` / `parse_track` methods do not
163    /// accept `MediaSource<()>` (their `R: Read` bound is unsatisfiable).
164    ///
165    /// # Example
166    ///
167    /// ```rust
168    /// use nom_exif::{MediaSource, MediaParser, MediaKind};
169    ///
170    /// let bytes = std::fs::read("./testdata/exif.jpg")?;
171    /// let ms = MediaSource::from_bytes(bytes)?;
172    /// assert_eq!(ms.kind(), MediaKind::Image);
173    ///
174    /// let mut parser = MediaParser::new();
175    /// let _iter = parser.parse_exif_from_bytes(ms)?;
176    /// # Ok::<(), nom_exif::Error>(())
177    /// ```
178    pub fn from_bytes(bytes: impl Into<bytes::Bytes>) -> crate::Result<Self> {
179        let bytes = bytes.into();
180        let head_end = bytes.len().min(HEADER_PARSE_BUF_SIZE);
181        let mime: MediaMime = bytes[..head_end].try_into()?;
182        Ok(Self {
183            reader: (),
184            buf: Vec::new(),
185            mime,
186            // Placeholder: never invoked in memory mode (clear_and_skip's
187            // AdvanceOnly path is the only one taken).
188            skip_by_seek: |_, _| Ok(false),
189            memory: Some(bytes),
190        })
191    }
192}
193
194// ----- Parse-time buffer policy -----
195//
196// Layered by lifecycle:
197//
198// - `INIT_BUF_SIZE` — first fill into the parse loop and the initial
199//   `Vec::with_capacity` for fresh allocations. Modest so cold one-shot
200//   helpers don't over-commit.
201// - `MIN_GROW_SIZE` — floor for every subsequent fill once we're in deep
202//   parse. Larger than `INIT_BUF_SIZE` to amortize syscalls / async
203//   blocking-pool dispatches.
204// - `MAX_PARSE_BUF_SIZE` — hard cap on cumulative buffer growth during a
205//   single parse. Anything that would push past this is rejected as
206//   `io::ErrorKind::Unsupported`; defense against crafted box/IFD headers
207//   that declare absurd sizes.
208// - `MAX_REUSE_BUF_SIZE` — soft cap on the buffer kept between parses for
209//   recycling. After a parse whose buffer ended above this, `shrink_to`
210//   gives the excess back to the allocator. Tuned for typical metadata
211//   sizes (HEIC Live Photo / large CR3 / IIQ all fit under 4 MB) so the
212//   recycle path stays warm for batch workloads.
213pub(crate) const INIT_BUF_SIZE: usize = 8 * 1024;
214pub(crate) const MIN_GROW_SIZE: usize = 16 * 1024;
215pub(crate) const MAX_PARSE_BUF_SIZE: usize = 1024 * 1024 * 1024;
216const MAX_REUSE_BUF_SIZE: usize = 4 * 1024 * 1024;
217
218pub(crate) trait Buf {
219    fn buffer(&self) -> &[u8];
220    fn clear(&mut self);
221
222    fn set_position(&mut self, pos: usize);
223    #[allow(unused)]
224    fn position(&self) -> usize;
225}
226
227/// Buffer-management state used by `MediaParser` (sync and async paths share it).
228///
229/// Holds at most one *active* `Vec<u8>` (being filled by the current parse) and
230/// one *cached* `Bytes` clone of the most recently shared buffer. When the
231/// next parse starts, the cache is consulted: if `Bytes::try_into_mut`
232/// succeeds the underlying allocation is reused (the previous `ExifIter`
233/// has been dropped); otherwise the clone is discarded and a fresh
234/// `Vec<u8>` is allocated.
235///
236/// This replaces the v2 multi-slot `Buffers` pool — `MediaParser` methods
237/// are `&mut self`, so a single slot is sufficient.
238#[derive(Debug, Default)]
239pub(crate) struct BufferedParserState {
240    cached: Option<bytes::Bytes>,
241    buf: Option<Vec<u8>>,
242    /// P7: memory-mode storage. When `Some`, the parser is feeding from a
243    /// caller-owned `Bytes` instead of streaming via a reader. `buf` and
244    /// `cached` are unused in this mode — the user owns the allocation,
245    /// so there is nothing to recycle.
246    memory: Option<bytes::Bytes>,
247    position: usize,
248}
249
250impl BufferedParserState {
251    pub(crate) fn new() -> Self {
252        Self::default()
253    }
254
255    pub(crate) fn reset(&mut self) {
256        // If a parse failed mid-way the buf may still be present; drop it.
257        // Cache stays — recycle on next acquire if eligible.
258        self.buf = None;
259        self.memory = None;
260        self.position = 0;
261    }
262
263    /// Switch the parser state into memory mode, owning `bytes` directly.
264    /// Caller must have already called `reset()` (asserted in debug). Subsequent
265    /// `share_buf` returns a clone of `bytes` (zero-copy: `Bytes::clone` is a
266    /// refcount bump). Subsequent `Buf::buffer()` returns `&bytes[position..]`.
267    pub(crate) fn set_memory(&mut self, bytes: bytes::Bytes) {
268        debug_assert!(
269            self.buf.is_none() && self.memory.is_none(),
270            "set_memory called on non-clean state"
271        );
272        self.memory = Some(bytes);
273        self.position = 0;
274    }
275
276    pub(crate) fn is_memory_mode(&self) -> bool {
277        self.memory.is_some()
278    }
279
280    pub(crate) fn acquire_buf(&mut self) {
281        if self.memory.is_some() {
282            // Memory mode: nothing to acquire — `buffer()` reads from `memory`.
283            return;
284        }
285        debug_assert!(self.buf.is_none());
286        let buf = match self.cached.take() {
287            Some(b) => match b.try_into_mut() {
288                Ok(bm) => {
289                    let mut v = Vec::<u8>::from(bm);
290                    v.clear();
291                    if v.capacity() > MAX_REUSE_BUF_SIZE {
292                        v.shrink_to(MAX_REUSE_BUF_SIZE);
293                    }
294                    v
295                }
296                Err(_still_shared) => Vec::with_capacity(INIT_BUF_SIZE),
297            },
298            None => Vec::with_capacity(INIT_BUF_SIZE),
299        };
300        self.buf = Some(buf);
301    }
302
303    pub(crate) fn buf(&self) -> &Vec<u8> {
304        self.buf.as_ref().expect("no buf here")
305    }
306
307    pub(crate) fn buf_mut(&mut self) -> &mut Vec<u8> {
308        self.buf.as_mut().expect("no buf here")
309    }
310
311    #[cfg(test)]
312    pub(crate) fn cached_ptr_for_test(&self) -> Option<*const u8> {
313        self.cached.as_ref().map(|b| b.as_ptr())
314    }
315
316    #[cfg(test)]
317    pub(crate) fn buf_is_none_for_test(&self) -> bool {
318        self.buf.is_none()
319    }
320}
321
322impl Buf for BufferedParserState {
323    fn buffer(&self) -> &[u8] {
324        if let Some(m) = &self.memory {
325            return &m[self.position..];
326        }
327        &self.buf()[self.position..]
328    }
329    fn clear(&mut self) {
330        // In memory mode `clear` is a no-op: there is no scratch buffer to
331        // truncate, and the caller's bytes must remain available for further
332        // parse_loop_step iterations. clear_and_skip's AdvanceOnly path is
333        // what advances `position` in memory mode.
334        if self.memory.is_some() {
335            return;
336        }
337        self.buf_mut().clear();
338    }
339    fn set_position(&mut self, pos: usize) {
340        self.position = pos;
341    }
342    fn position(&self) -> usize {
343        self.position
344    }
345}
346
347impl ShareBuf for BufferedParserState {
348    fn share_buf(&mut self) -> (bytes::Bytes, usize) {
349        if let Some(m) = self.memory.take() {
350            // Zero-copy share: caller already owns the allocation. No cache
351            // write — recycle is irrelevant when the user holds the alloc.
352            let position = self.position;
353            return (m, position);
354        }
355        let vec = self.buf.take().expect("no buf to share");
356        let bytes = bytes::Bytes::from(vec);
357        let position = self.position;
358        self.cached = Some(bytes.clone());
359        (bytes, position)
360    }
361}
362
363/// What `clear_and_skip` should do, given the current buffer state and
364/// the requested skip count.
365pub(crate) enum SkipPlan {
366    /// Skip is fully within the current buffer; just advance position.
367    AdvanceOnly,
368    /// Buffer must be cleared and `extra` bytes skipped from the reader.
369    ClearAndSkip { extra: usize },
370}
371
372pub(crate) fn clear_and_skip_decide(buffer_len: usize, n: usize) -> SkipPlan {
373    if n <= buffer_len {
374        SkipPlan::AdvanceOnly
375    } else {
376        SkipPlan::ClearAndSkip {
377            extra: n - buffer_len,
378        }
379    }
380}
381
382pub(crate) fn check_fill_size(existing_len: usize, requested: usize) -> io::Result<()> {
383    if requested.saturating_add(existing_len) > MAX_PARSE_BUF_SIZE {
384        tracing::error!(?requested, "the requested buffer size is too big");
385        return Err(io::ErrorKind::Unsupported.into());
386    }
387    Ok(())
388}
389
390pub(crate) enum LoopAction<O> {
391    /// Parse succeeded; return this value to the caller.
392    Done(O),
393    /// Need more bytes — call `fill_buf(reader, n)` then re-step.
394    NeedFill(usize),
395    /// Need to skip bytes — call `clear_and_skip(reader, n)` then re-step.
396    Skip(usize),
397    /// Parse failed permanently.
398    Failed(String),
399}
400
401/// Closure type passed to [`parse_loop_step`].
402pub(crate) type ParseFn<'a, O> =
403    dyn FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState> + 'a;
404
405/// Drives one iteration of the parse-loop algorithm. Pure (no I/O).
406pub(crate) fn parse_loop_step<O>(
407    buffer: &[u8],
408    offset: usize,
409    parsing_state: &mut Option<ParsingState>,
410    parse: &mut ParseFn<'_, O>,
411) -> LoopAction<O> {
412    match parse(buffer, offset, parsing_state.take()) {
413        Ok(o) => LoopAction::Done(o),
414        Err(es) => {
415            *parsing_state = es.state;
416            match es.err {
417                ParsingError::Need(n) => LoopAction::NeedFill(n),
418                ParsingError::ClearAndSkip(n) => LoopAction::Skip(n),
419                ParsingError::Failed(s) => LoopAction::Failed(s),
420            }
421        }
422    }
423}
424
425#[derive(Debug, Clone)]
426pub(crate) enum ParsingState {
427    TiffHeader(TiffHeader),
428    HeifExifSize(usize),
429    Cr3ExifSize(usize),
430}
431
432impl Display for ParsingState {
433    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
434        match self {
435            ParsingState::TiffHeader(h) => Display::fmt(&format!("ParsingState: {h:?})"), f),
436            ParsingState::HeifExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
437            ParsingState::Cr3ExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
438        }
439    }
440}
441
442// Modern replacement for the `Load` trait in loader.rs. Adds offset-aware
443// parsing and `ParsingState` threading for format-specific state machines.
444pub(crate) trait BufParser: Buf + Debug {
445    fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize>;
446
447    fn load_and_parse<R: Read, P, O>(
448        &mut self,
449        reader: &mut R,
450        skip_by_seek: SkipBySeekFn<R>,
451        mut parse: P,
452    ) -> Result<O, ParsedError>
453    where
454        P: FnMut(&[u8], Option<ParsingState>) -> Result<O, ParsingErrorState>,
455    {
456        self.load_and_parse_with_offset(
457            reader,
458            skip_by_seek,
459            |data, _, state| parse(data, state),
460            0,
461        )
462    }
463
464    #[tracing::instrument(skip_all)]
465    fn load_and_parse_with_offset<R: Read, P, O>(
466        &mut self,
467        reader: &mut R,
468        skip_by_seek: SkipBySeekFn<R>,
469        mut parse: P,
470        offset: usize,
471    ) -> Result<O, ParsedError>
472    where
473        P: FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState>,
474    {
475        if offset >= self.buffer().len() {
476            self.fill_buf(reader, MIN_GROW_SIZE)?;
477        }
478        let mut parsing_state: Option<ParsingState> = None;
479        loop {
480            match parse_loop_step(self.buffer(), offset, &mut parsing_state, &mut parse) {
481                LoopAction::Done(o) => return Ok(o),
482                LoopAction::NeedFill(needed) => {
483                    let to_read = max(needed, MIN_GROW_SIZE);
484                    let n = self.fill_buf(reader, to_read)?;
485                    if n == 0 {
486                        return Err(ParsedError::NoEnoughBytes);
487                    }
488                }
489                LoopAction::Skip(n) => {
490                    self.clear_and_skip(reader, skip_by_seek, n)?;
491                }
492                LoopAction::Failed(s) => return Err(ParsedError::Failed(s)),
493            }
494        }
495    }
496
497    #[tracing::instrument(skip(reader, skip_by_seek))]
498    fn clear_and_skip<R: Read>(
499        &mut self,
500        reader: &mut R,
501        skip_by_seek: SkipBySeekFn<R>,
502        n: usize,
503    ) -> Result<(), ParsedError> {
504        match clear_and_skip_decide(self.buffer().len(), n) {
505            SkipPlan::AdvanceOnly => {
506                self.set_position(self.position() + n);
507                Ok(())
508            }
509            SkipPlan::ClearAndSkip { extra: skip_n } => {
510                self.clear();
511                let done = (skip_by_seek)(
512                    reader,
513                    skip_n
514                        .try_into()
515                        .map_err(|_| ParsedError::Failed("skip too many bytes".into()))?,
516                )?;
517                if !done {
518                    let mut skipped = 0;
519                    while skipped < skip_n {
520                        let mut to_skip = skip_n - skipped;
521                        to_skip = min(to_skip, MAX_PARSE_BUF_SIZE);
522                        let n = self.fill_buf(reader, to_skip)?;
523                        skipped += n;
524                        if skipped <= skip_n {
525                            self.clear();
526                        } else {
527                            let remain = skipped - skip_n;
528                            self.set_position(self.buffer().len() - remain);
529                            break;
530                        }
531                    }
532                }
533
534                if self.buffer().is_empty() {
535                    self.fill_buf(reader, MIN_GROW_SIZE)?;
536                }
537                Ok(())
538            }
539        }
540    }
541}
542
543impl BufParser for MediaParser {
544    #[tracing::instrument(skip(self, reader), fields(buf_len=self.state.buffer().len()))]
545    fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize> {
546        if self.state.is_memory_mode() {
547            // Memory mode owns every byte it will ever have. A request for
548            // more is "the parser walked off the end of the input"; surface
549            // it the same way the streaming path surfaces a 0-byte read.
550            return Err(std::io::ErrorKind::UnexpectedEof.into());
551        }
552        check_fill_size(self.state.buf().len(), size)?;
553
554        // Do not pre-allocate `size` bytes: a crafted box header can declare a
555        // huge extended size (up to MAX_PARSE_BUF_SIZE) that far exceeds the actual
556        // stream length. reserve_exact would allocate that memory immediately
557        // even when the reader has only a few bytes left. read_to_end grows the
558        // buffer from the reader's actual size hint instead.
559        let n = reader.take(size as u64).read_to_end(self.state.buf_mut())?;
560        if n == 0 {
561            tracing::error!(buf_len = self.state.buf().len(), "fill_buf: EOF");
562            return Err(std::io::ErrorKind::UnexpectedEof.into());
563        }
564
565        tracing::debug!(
566            ?size,
567            ?n,
568            buf_len = self.state.buf().len(),
569            "fill_buf: read bytes"
570        );
571
572        Ok(n)
573    }
574}
575
576impl Buf for MediaParser {
577    fn buffer(&self) -> &[u8] {
578        self.state.buffer()
579    }
580
581    fn clear(&mut self) {
582        self.state.clear();
583    }
584
585    fn set_position(&mut self, pos: usize) {
586        self.state.set_position(pos);
587    }
588
589    fn position(&self) -> usize {
590        self.state.position()
591    }
592}
593
594/// A `MediaParser` can parse media info from a [`MediaSource`].
595///
596/// `MediaParser` manages inner parse buffers that can be shared between
597/// multiple parsing tasks, thus avoiding frequent memory allocations.
598///
599/// Therefore:
600///
601/// - Try to reuse a `MediaParser` instead of creating a new one every time
602///   you need it.
603///
604/// - `MediaSource` should be created directly from `Read`, not from `BufRead`.
605///
606/// ## Example
607///
608/// ```rust
609/// use nom_exif::*;
610/// use chrono::DateTime;
611///
612/// let mut parser = MediaParser::new();
613///
614/// // ------------------- Parse Exif Info
615/// let ms = MediaSource::open("./testdata/exif.heic").unwrap();
616/// assert_eq!(ms.kind(), MediaKind::Image);
617/// let mut iter = parser.parse_exif(ms).unwrap();
618///
619/// let entry = iter.next().unwrap();
620/// assert!(matches!(entry.tag(), nom_exif::TagOrCode::Tag(ExifTag::Make)));
621/// assert_eq!(entry.value().unwrap().as_str().unwrap(), "Apple");
622///
623/// // Convert `ExifIter` into an `Exif`. Clone it before converting, so that
624/// // we can start the iteration from the beginning.
625/// let exif: Exif = iter.clone().into();
626/// assert_eq!(exif.get(ExifTag::Make).unwrap().as_str().unwrap(), "Apple");
627///
628/// // ------------------- Parse Track Info
629/// let ms = MediaSource::open("./testdata/meta.mov").unwrap();
630/// assert_eq!(ms.kind(), MediaKind::Track);
631/// let info = parser.parse_track(ms).unwrap();
632///
633/// assert_eq!(info.get(TrackInfoTag::Make), Some(&"Apple".into()));
634/// assert_eq!(info.get(TrackInfoTag::Model), Some(&"iPhone X".into()));
635/// assert_eq!(info.get(TrackInfoTag::GpsIso6709), Some(&"+27.1281+100.2508+000.000/".into()));
636/// assert_eq!(info.gps_info().unwrap().latitude_ref, LatRef::North);
637/// assert_eq!(
638///     info.gps_info().unwrap().latitude,
639///     LatLng::new(URational::new(27, 1), URational::new(7, 1), URational::new(4116, 100)),
640/// );
641/// ```
642pub struct MediaParser {
643    state: BufferedParserState,
644}
645
646impl Debug for MediaParser {
647    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
648        f.debug_struct("MediaParser")
649            .field("state", &self.state)
650            .finish_non_exhaustive()
651    }
652}
653
654impl Default for MediaParser {
655    fn default() -> Self {
656        Self {
657            state: BufferedParserState::new(),
658        }
659    }
660}
661
662pub(crate) trait ShareBuf {
663    /// Take ownership of the parser's active buffer and return the full
664    /// allocation as `Bytes` plus the parser's `position` at share-time.
665    /// Caller is responsible for slicing: a parse-loop range `r` corresponds
666    /// to absolute range `(r.start + position)..(r.end + position)`.
667    fn share_buf(&mut self) -> (bytes::Bytes, usize);
668}
669
670impl ShareBuf for MediaParser {
671    fn share_buf(&mut self) -> (bytes::Bytes, usize) {
672        self.state.share_buf()
673    }
674}
675
676impl MediaParser {
677    pub fn new() -> Self {
678        Self::default()
679    }
680
681    /// Parse Exif metadata from an image source. Returns `Error::ExifNotFound`
682    /// if the source is a `Track` (use [`Self::parse_track`] instead).
683    ///
684    /// `MediaParser` reuses its internal parse buffer across calls, so prefer
685    /// reusing a single `MediaParser` over creating a new one per file. Drop
686    /// the returned [`ExifIter`] (or convert it into [`crate::Exif`]) before
687    /// the next `parse_*` call so the buffer can be reclaimed.
688    pub fn parse_exif<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<ExifIter> {
689        self.reset();
690        self.acquire_buf();
691        self.buf_mut().append(&mut ms.buf);
692        let res: crate::Result<ExifIter> = (|| {
693            self.fill_buf(&mut ms.reader, INIT_BUF_SIZE)?;
694            if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
695                return Err(crate::Error::ExifNotFound);
696            }
697            crate::exif::parse_exif_iter(
698                self,
699                ms.mime.unwrap_image(),
700                &mut ms.reader,
701                ms.skip_by_seek,
702            )
703        })();
704        self.reset();
705        res
706    }
707
708    /// Parse track info from a video/audio source.
709    ///
710    /// In v3.1, this also accepts JPEG images that carry an embedded
711    /// Pixel/Google Motion Photo trailer: when [`ExifIter::has_embedded_track`]
712    /// returned `true` for such a JPEG, calling `parse_track` on the same
713    /// source extracts the embedded MP4's metadata. Other image formats
714    /// without an embedded track return [`crate::Error::TrackNotFound`].
715    pub fn parse_track<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<TrackInfo> {
716        self.reset();
717        self.acquire_buf();
718        self.buf_mut().append(&mut ms.buf);
719        let res: crate::Result<TrackInfo> = (|| {
720            self.fill_buf(&mut ms.reader, INIT_BUF_SIZE)?;
721            match ms.mime {
722                crate::file::MediaMime::Image(crate::file::MediaMimeImage::Jpeg) => {
723                    self.parse_jpeg_motion_photo(&mut ms.reader)
724                }
725                crate::file::MediaMime::Image(_) => Err(crate::Error::TrackNotFound),
726                crate::file::MediaMime::Track(mime_track) => {
727                    let skip = ms.skip_by_seek;
728                    Ok(self.load_and_parse(ms.reader.by_ref(), skip, |data, _| {
729                        crate::video::parse_track_info(data, mime_track)
730                            .map_err(|e| ParsingErrorState::new(e, None))
731                    })?)
732                }
733            }
734        })();
735        self.reset();
736        res
737    }
738
739    /// Read a JPEG to EOF, locate a Pixel-style Motion Photo MP4 trailer,
740    /// and parse it as track metadata. Returns
741    /// [`crate::Error::TrackNotFound`] if no Motion Photo signal is
742    /// present in the JPEG's XMP.
743    fn parse_jpeg_motion_photo<R: Read>(&mut self, reader: &mut R) -> crate::Result<TrackInfo> {
744        // Drain the rest of the JPEG into the parse buffer so we can
745        // address the trailing MP4 by its byte offset from EOF.
746        reader.read_to_end(self.buf_mut())?;
747        let buf = self.buf_mut();
748        let Some(offset) = crate::jpeg::find_motion_photo_offset(buf) else {
749            return Err(crate::Error::TrackNotFound);
750        };
751        let trailer_start = (buf.len() as u64)
752            .checked_sub(offset)
753            .ok_or(crate::Error::TrackNotFound)? as usize;
754        let trailer = &buf[trailer_start..];
755
756        // The trailer can be MP4 / MOV / 3gp depending on the source device;
757        // dispatch by sniffing it as a fresh ISO BMFF input.
758        let trailer_mime =
759            crate::file::MediaMime::try_from(trailer).map_err(|_| crate::Error::TrackNotFound)?;
760        let mime_track = match trailer_mime {
761            crate::file::MediaMime::Track(t) => t,
762            crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
763        };
764        crate::video::parse_track_info(trailer, mime_track).map_err(|e| match e {
765            crate::error::ParsingError::Need(_) | crate::error::ParsingError::ClearAndSkip(_) => {
766                crate::Error::UnexpectedEof {
767                    context: "motion-photo trailer",
768                }
769            }
770            crate::error::ParsingError::Failed(msg) => crate::Error::Malformed {
771                kind: crate::error::MalformedKind::IsoBmffBox,
772                message: msg,
773            },
774        })
775    }
776
777    /// Parse Exif metadata from an in-memory byte payload built via
778    /// [`MediaSource::<()>::from_bytes`]. Returns `Error::ExifNotFound` if the
779    /// payload is a `Track` (use [`Self::parse_track_from_bytes`] instead).
780    ///
781    /// Memory-mode parsing is **zero-copy**: the underlying `Bytes` is shared
782    /// with the returned [`ExifIter`] (and its sub-IFDs / CR3 CMT blocks) via
783    /// reference counting. No `Vec<u8>` is allocated for the parse buffer.
784    pub fn parse_exif_from_bytes(&mut self, mut ms: MediaSource<()>) -> crate::Result<ExifIter> {
785        self.reset();
786        let memory = ms
787            .memory
788            .take()
789            .expect("MediaSource<()> must have memory (only constructor is from_bytes)");
790        self.state.set_memory(memory);
791        let res: crate::Result<ExifIter> = (|| {
792            if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
793                return Err(crate::Error::ExifNotFound);
794            }
795            // Placeholder reader: never read from in memory mode (fill_buf
796            // short-circuits; clear_and_skip uses AdvanceOnly).
797            let mut empty = std::io::empty();
798            crate::exif::parse_exif_iter(
799                self,
800                ms.mime.unwrap_image(),
801                &mut empty,
802                // Placeholder skip-by-seek: never invoked.
803                |_, _| Ok(false),
804            )
805        })();
806        self.reset();
807        res
808    }
809
810    /// Parse track info from an in-memory video/audio byte payload built via
811    /// [`MediaSource::<()>::from_bytes`]. Returns `Error::TrackNotFound` if the
812    /// payload is an `Image` (use [`Self::parse_exif_from_bytes`] instead).
813    ///
814    /// Like [`Self::parse_exif_from_bytes`], the parse is zero-copy with respect to
815    /// the user-supplied `Bytes`.
816    pub fn parse_track_from_bytes(&mut self, mut ms: MediaSource<()>) -> crate::Result<TrackInfo> {
817        self.reset();
818        let memory = ms
819            .memory
820            .take()
821            .expect("MediaSource<()> must have memory (only constructor is from_bytes)");
822        self.state.set_memory(memory);
823        let res: crate::Result<TrackInfo> = (|| {
824            let mime_track = match ms.mime {
825                crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
826                crate::file::MediaMime::Track(t) => t,
827            };
828            let mut empty = std::io::empty();
829            let out = self.load_and_parse(
830                &mut empty,
831                |_, _| Ok(false),
832                |data, _| {
833                    crate::video::parse_track_info(data, mime_track)
834                        .map_err(|e| ParsingErrorState::new(e, None))
835                },
836            )?;
837            Ok(out)
838        })();
839        self.reset();
840        res
841    }
842
843    fn reset(&mut self) {
844        self.state.reset();
845    }
846
847    fn buf_mut(&mut self) -> &mut Vec<u8> {
848        self.state.buf_mut()
849    }
850
851    fn acquire_buf(&mut self) {
852        self.state.acquire_buf();
853    }
854}
855
856#[cfg(feature = "tokio")]
857mod tokio_impl {
858    use super::*;
859    use crate::error::ParsingErrorState;
860    use crate::parser_async::{AsyncBufParser, AsyncMediaSource};
861    use tokio::io::{AsyncRead, AsyncReadExt};
862
863    impl AsyncBufParser for MediaParser {
864        async fn fill_buf<R: AsyncRead + Unpin>(
865            &mut self,
866            reader: &mut R,
867            size: usize,
868        ) -> std::io::Result<usize> {
869            if self.state.is_memory_mode() {
870                // Memory mode owns every byte it will ever have. Surface
871                // "walked off end of input" the same way the streaming path
872                // surfaces a 0-byte read.
873                return Err(std::io::ErrorKind::UnexpectedEof.into());
874            }
875            check_fill_size(self.state.buf().len(), size)?;
876            // Same rationale as the sync version: do not pre-allocate `size` bytes.
877            let n = reader
878                .take(size as u64)
879                .read_to_end(self.state.buf_mut())
880                .await?;
881            if n == 0 {
882                return Err(std::io::ErrorKind::UnexpectedEof.into());
883            }
884            Ok(n)
885        }
886    }
887
888    impl MediaParser {
889        /// Parse Exif metadata from an async image source. Returns
890        /// `Error::ExifNotFound` if the source is a `Track`.
891        pub async fn parse_exif_async<R: AsyncRead + Unpin + Send>(
892            &mut self,
893            mut ms: AsyncMediaSource<R>,
894        ) -> crate::Result<ExifIter> {
895            self.reset();
896            self.acquire_buf();
897            self.buf_mut().append(&mut ms.buf);
898            let res: crate::Result<ExifIter> = async {
899                <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE).await?;
900                if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
901                    return Err(crate::Error::ExifNotFound);
902                }
903                crate::exif::parse_exif_iter_async(
904                    self,
905                    ms.mime.unwrap_image(),
906                    &mut ms.reader,
907                    ms.skip_by_seek,
908                )
909                .await
910            }
911            .await;
912            self.reset();
913            res
914        }
915
916        /// Parse track info from an async video/audio source. Returns
917        /// `Error::TrackNotFound` if the source is an `Image`.
918        pub async fn parse_track_async<R: AsyncRead + Unpin + Send>(
919            &mut self,
920            mut ms: AsyncMediaSource<R>,
921        ) -> crate::Result<TrackInfo> {
922            self.reset();
923            self.acquire_buf();
924            self.buf_mut().append(&mut ms.buf);
925            let res: crate::Result<TrackInfo> = async {
926                <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE).await?;
927                let mime_track = match ms.mime {
928                    crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
929                    crate::file::MediaMime::Track(t) => t,
930                };
931                let skip = ms.skip_by_seek;
932                let out = <Self as AsyncBufParser>::load_and_parse(
933                    self,
934                    &mut ms.reader,
935                    skip,
936                    |data, _| {
937                        crate::video::parse_track_info(data, mime_track)
938                            .map_err(|e| ParsingErrorState::new(e, None))
939                    },
940                )
941                .await?;
942                Ok(out)
943            }
944            .await;
945            self.reset();
946            res
947        }
948    }
949}
950
951#[cfg(test)]
952mod tests {
953    use std::sync::{LazyLock, Mutex, MutexGuard};
954
955    use super::*;
956    use test_case::case;
957
958    enum TrackExif {
959        Track,
960        Exif,
961        NoData,
962        Invalid,
963    }
964    use TrackExif::*;
965
966    static PARSER: LazyLock<Mutex<MediaParser>> = LazyLock::new(|| Mutex::new(MediaParser::new()));
967    fn parser() -> MutexGuard<'static, MediaParser> {
968        PARSER.lock().unwrap()
969    }
970
971    #[case("3gp_640x360.3gp", Track)]
972    #[case("broken.jpg", Exif)]
973    #[case("compatible-brands-fail.heic", Invalid)]
974    #[case("compatible-brands-fail.mov", Invalid)]
975    #[case("compatible-brands.heic", NoData)]
976    #[case("compatible-brands.mov", NoData)]
977    #[case("embedded-in-heic.mov", Track)]
978    #[case("exif.heic", Exif)]
979    #[case("exif.jpg", Exif)]
980    #[case("exif-no-tz.jpg", Exif)]
981    #[case("fujifilm_x_t1_01.raf.meta", Exif)]
982    #[case("meta.mov", Track)]
983    #[case("meta.mp4", Track)]
984    #[case("mka.mka", Track)]
985    #[case("mkv_640x360.mkv", Track)]
986    #[case("exif-one-entry.heic", Exif)]
987    #[case("no-exif.jpg", NoData)]
988    #[case("tif.tif", Exif)]
989    #[case("ramdisk.img", Invalid)]
990    #[case("webm_480.webm", Track)]
991    fn parse_media(path: &str, te: TrackExif) {
992        let mut parser = parser();
993        let ms = MediaSource::open(Path::new("testdata").join(path));
994        match te {
995            Track => {
996                let ms = ms.unwrap();
997                assert_eq!(ms.kind(), MediaKind::Track);
998                let _: TrackInfo = parser.parse_track(ms).unwrap();
999            }
1000            Exif => {
1001                let ms = ms.unwrap();
1002                assert_eq!(ms.kind(), MediaKind::Image);
1003                let mut it: ExifIter = parser.parse_exif(ms).unwrap();
1004                let _ = it.parse_gps();
1005
1006                if path.contains("one-entry") {
1007                    assert!(it.next().is_some());
1008                    assert!(it.next().is_none());
1009
1010                    let exif: crate::Exif = it.clone_rewound().into();
1011                    assert!(exif.get(ExifTag::Orientation).is_some());
1012                } else {
1013                    let _: crate::Exif = it.clone_rewound().into();
1014                }
1015            }
1016            NoData => {
1017                let ms = ms.unwrap();
1018                match ms.kind() {
1019                    MediaKind::Image => {
1020                        let res = parser.parse_exif(ms);
1021                        res.unwrap_err();
1022                    }
1023                    MediaKind::Track => {
1024                        let res = parser.parse_track(ms);
1025                        res.unwrap_err();
1026                    }
1027                }
1028            }
1029            Invalid => {
1030                ms.unwrap_err();
1031            }
1032        }
1033    }
1034
1035    use crate::testkit::open_sample;
1036    use crate::{EntryValue, Exif, ExifTag, TrackInfoTag};
1037    use chrono::{DateTime, FixedOffset, NaiveDateTime};
1038    use test_case::test_case;
1039
1040    #[test_case("exif.jpg", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2023-07-09T20:36:33+08:00", "%+").unwrap().into())]
1041    #[test_case("exif.heic", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2022-07-22T21:26:32+08:00", "%+").unwrap().into())]
1042    #[test_case("exif.jpg", ExifTag::DateTimeOriginal, 
1043        (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), 
1044            Some(FixedOffset::east_opt(8*3600).unwrap())).into())]
1045    #[test_case("exif-no-tz.jpg", ExifTag::DateTimeOriginal, 
1046        (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), None).into())]
1047    fn parse_exif(path: &str, tag: ExifTag, v: EntryValue) {
1048        let mut parser = parser();
1049
1050        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1051        assert_eq!(mf.kind(), MediaKind::Image);
1052        let iter: ExifIter = parser.parse_exif(mf).unwrap();
1053        let exif: Exif = iter.into();
1054        assert_eq!(exif.get(tag).unwrap(), &v);
1055
1056        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1057        assert_eq!(mf.kind(), MediaKind::Image);
1058        let iter: ExifIter = parser.parse_exif(mf).unwrap();
1059        let exif: Exif = iter.into();
1060        assert_eq!(exif.get(tag).unwrap(), &v);
1061    }
1062
1063    use crate::video::TrackInfoTag::*;
1064
1065    #[test_case("mkv_640x360.mkv", Width, 640_u32.into())]
1066    #[test_case("mkv_640x360.mkv", Height, 360_u32.into())]
1067    #[test_case("mkv_640x360.mkv", DurationMs, 13346_u64.into())]
1068    #[test_case("mkv_640x360.mkv", CreateDate, DateTime::parse_from_str("2008-08-08T08:08:08Z", "%+").unwrap().into())]
1069    #[test_case("meta.mov", Make, "Apple".into())]
1070    #[test_case("meta.mov", Model, "iPhone X".into())]
1071    #[test_case("meta.mov", GpsIso6709, "+27.1281+100.2508+000.000/".into())]
1072    #[test_case("meta.mov", CreateDate, DateTime::parse_from_str("2019-02-12T15:27:12+08:00", "%+").unwrap().into())]
1073    #[test_case("meta.mp4", Width, 1920_u32.into())]
1074    #[test_case("meta.mp4", Height, 1080_u32.into())]
1075    #[test_case("meta.mp4", DurationMs, 1063_u64.into())]
1076    #[test_case("meta.mp4", GpsIso6709, "+27.2939+112.6932/".into())]
1077    #[test_case("meta.mp4", CreateDate, DateTime::parse_from_str("2024-02-03T07:05:38Z", "%+").unwrap().into())]
1078    #[test_case("udta.auth.mp4", Author, "ReplayKitRecording".into(); "udta author")]
1079    #[test_case("auth.mov", Author, "ReplayKitRecording".into(); "mov author")]
1080    #[test_case("sony-a7-xavc.MP4", Width, 1920_u32.into())]
1081    #[test_case("sony-a7-xavc.MP4", Height, 1080_u32.into())]
1082    #[test_case("sony-a7-xavc.MP4", DurationMs, 1440_u64.into())]
1083    #[test_case("sony-a7-xavc.MP4", CreateDate, DateTime::parse_from_str("2026-04-26T09:25:15+00:00", "%+").unwrap().into())]
1084    fn parse_track_info(path: &str, tag: TrackInfoTag, v: EntryValue) {
1085        let mut parser = parser();
1086
1087        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1088        let info: TrackInfo = parser.parse_track(mf).unwrap();
1089        assert_eq!(info.get(tag).unwrap(), &v);
1090
1091        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1092        let info: TrackInfo = parser.parse_track(mf).unwrap();
1093        assert_eq!(info.get(tag).unwrap(), &v);
1094    }
1095
1096    #[test_case("crash_moov-trak")]
1097    #[test_case("crash_skip_large")]
1098    #[test_case("crash_add_large")]
1099    fn parse_track_crash(path: &str) {
1100        let mut parser = parser();
1101
1102        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1103        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1104
1105        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1106        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1107    }
1108
1109    // Regression: a crafted ISOBMFF file declares an extended 64-bit box size
1110    // just under MAX_PARSE_BUF_SIZE (~1 GB). Pre-fix, the unseekable parser called
1111    // reserve_exact() with that size before reading, allocating ~1 GB even when
1112    // the actual stream contained only a few KB. See commit 81f9e8a.
1113    #[test]
1114    fn parse_oom_large_box() {
1115        let mut parser = parser();
1116
1117        let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1118        let _: Result<ExifIter, _> = parser.parse_exif(mf);
1119
1120        let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1121        let _: Result<ExifIter, _> = parser.parse_exif(mf);
1122
1123        let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1124        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1125
1126        let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1127        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1128    }
1129
1130    #[test]
1131    fn media_kind_classifies_image_and_track() {
1132        let img = MediaSource::open("testdata/exif.jpg").unwrap();
1133        assert_eq!(img.kind(), MediaKind::Image);
1134
1135        let trk = MediaSource::open("testdata/meta.mov").unwrap();
1136        assert_eq!(trk.kind(), MediaKind::Track);
1137    }
1138
1139    #[test]
1140    fn media_source_open() {
1141        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1142        assert_eq!(ms.kind(), MediaKind::Image);
1143    }
1144
1145    #[test]
1146    fn parse_exif_returns_exif_iter() {
1147        let mut parser = parser();
1148        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1149        let _: ExifIter = parser.parse_exif(ms).unwrap();
1150    }
1151
1152    #[test]
1153    fn parse_track_returns_track_info() {
1154        let mut parser = parser();
1155        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1156        let _: TrackInfo = parser.parse_track(ms).unwrap();
1157    }
1158
1159    #[test]
1160    fn parse_exif_on_track_returns_exif_not_found_v3() {
1161        let mut parser = parser();
1162        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1163        let res = parser.parse_exif(ms);
1164        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1165    }
1166
1167    #[test]
1168    fn parse_track_on_image_returns_track_not_found_v3() {
1169        let mut parser = parser();
1170        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1171        let res = parser.parse_track(ms);
1172        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1173    }
1174
1175    #[cfg(feature = "tokio")]
1176    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1177    async fn media_parser_parse_exif_async() {
1178        use crate::parser_async::AsyncMediaSource;
1179        let mut parser = MediaParser::new();
1180        let ms = AsyncMediaSource::open("testdata/exif.jpg").await.unwrap();
1181        let _: ExifIter = parser.parse_exif_async(ms).await.unwrap();
1182    }
1183
1184    #[cfg(feature = "tokio")]
1185    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1186    async fn media_parser_parse_track_async() {
1187        use crate::parser_async::AsyncMediaSource;
1188        let mut parser = MediaParser::new();
1189        let ms = AsyncMediaSource::open("testdata/meta.mov").await.unwrap();
1190        let _: TrackInfo = parser.parse_track_async(ms).await.unwrap();
1191    }
1192
1193    #[test]
1194    fn parser_recycles_alloc_when_exif_iter_dropped() {
1195        let mut parser = MediaParser::new();
1196
1197        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1198        let iter = parser.parse_exif(ms).unwrap();
1199        let exif: crate::Exif = iter.into();
1200        drop(exif);
1201        let ptr_after_first = parser.state.cached_ptr_for_test();
1202
1203        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1204        let iter = parser.parse_exif(ms).unwrap();
1205        let _exif: crate::Exif = iter.into();
1206        let ptr_after_second = parser.state.cached_ptr_for_test();
1207
1208        assert!(
1209            ptr_after_first.is_some() && ptr_after_first == ptr_after_second,
1210            "expected recycled allocation, got {:?} -> {:?}",
1211            ptr_after_first,
1212            ptr_after_second
1213        );
1214    }
1215
1216    #[test]
1217    fn parser_new_does_no_upfront_allocation() {
1218        let parser = MediaParser::new();
1219        assert!(parser.state.cached_ptr_for_test().is_none());
1220        assert!(parser.state.buf_is_none_for_test());
1221    }
1222
1223    #[test]
1224    fn buffered_state_memory_mode_sets_and_reads() {
1225        let mut s = BufferedParserState::new();
1226        s.set_memory(bytes::Bytes::from_static(b"abcdefgh"));
1227        assert!(s.is_memory_mode());
1228        assert_eq!(s.buffer(), b"abcdefgh");
1229        s.set_position(3);
1230        assert_eq!(s.buffer(), b"defgh");
1231    }
1232
1233    #[test]
1234    fn buffered_state_share_buf_memory_mode_is_zero_copy() {
1235        let original = bytes::Bytes::from_static(b"the parser owns nothing here");
1236        let original_ptr = original.as_ptr();
1237        let mut s = BufferedParserState::new();
1238        s.set_memory(original);
1239        let (shared, position) = s.share_buf();
1240        assert_eq!(position, 0);
1241        assert_eq!(
1242            shared.as_ptr(),
1243            original_ptr,
1244            "memory share must be a Bytes::clone, not a Vec round-trip"
1245        );
1246        // After share_buf, the parser's memory slot is taken — leaving the state
1247        // ready for the next `reset()` cycle.
1248        assert!(!s.is_memory_mode());
1249    }
1250
1251    #[test]
1252    fn buffered_state_reset_clears_memory() {
1253        let mut s = BufferedParserState::new();
1254        s.set_memory(bytes::Bytes::from_static(b"x"));
1255        s.reset();
1256        assert!(!s.is_memory_mode());
1257        assert_eq!(s.position, 0);
1258    }
1259
1260    #[test]
1261    fn buffered_state_acquire_buf_skips_in_memory_mode() {
1262        let mut s = BufferedParserState::new();
1263        s.set_memory(bytes::Bytes::from_static(b"data"));
1264        s.acquire_buf();
1265        // No streaming buf was allocated.
1266        assert!(s.buf.is_none());
1267        // Memory still readable.
1268        assert_eq!(s.buffer(), b"data");
1269    }
1270
1271    #[test]
1272    fn media_source_from_bytes_image_jpg() {
1273        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1274        let ms = MediaSource::from_bytes(raw).unwrap();
1275        assert_eq!(ms.kind(), MediaKind::Image);
1276        assert!(ms.memory.is_some());
1277    }
1278
1279    #[test]
1280    fn media_source_from_bytes_track_mov() {
1281        let raw = std::fs::read("testdata/meta.mov").unwrap();
1282        let ms = MediaSource::from_bytes(raw).unwrap();
1283        assert_eq!(ms.kind(), MediaKind::Track);
1284    }
1285
1286    #[test]
1287    fn media_source_from_bytes_static_slice() {
1288        // &'static [u8] should work via Into<Bytes> because the file is read
1289        // into a Vec at compile-time-friendly size; here we use include_bytes.
1290        let raw: &'static [u8] = include_bytes!("../testdata/exif.jpg");
1291        let ms = MediaSource::from_bytes(raw).unwrap();
1292        assert_eq!(ms.kind(), MediaKind::Image);
1293    }
1294
1295    #[test]
1296    fn media_source_from_bytes_rejects_too_short() {
1297        // Below the smallest mime signature length: should fail mime detection.
1298        let raw = vec![0u8; 4];
1299        let res = MediaSource::from_bytes(raw);
1300        assert!(res.is_err(), "expected mime-detection error");
1301    }
1302
1303    #[test]
1304    fn media_source_from_bytes_rejects_unknown_mime() {
1305        // Random bytes long enough to trigger detection but not match any
1306        // signature.
1307        let raw = vec![0xAAu8; 256];
1308        let res = MediaSource::from_bytes(raw);
1309        assert!(
1310            res.is_err(),
1311            "expected mime-detection error for unknown bytes"
1312        );
1313    }
1314
1315    #[test]
1316    fn p4_5_baseline_exif_jpg_full_dump() {
1317        // Lock down the post-refactor invariant: parsing testdata/exif.jpg through
1318        // the public API must yield the same set of (ifd, tag, value) triples
1319        // before and after P4.5. We capture them as a sorted, formatted string so
1320        // the assertion is a single literal comparison.
1321        let mut parser = MediaParser::new();
1322        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1323        let iter: ExifIter = parser.parse_exif(ms).unwrap();
1324
1325        let mut entries: Vec<String> = iter
1326            .map(|e| {
1327                let tag_name = match e.tag() {
1328                    crate::TagOrCode::Tag(t) => format!("{t}"),
1329                    crate::TagOrCode::Unknown(c) => format!("0x{c:04x}"),
1330                };
1331                let value_str = e
1332                    .value()
1333                    .map(|v| format!("{v}"))
1334                    .unwrap_or_else(|| "<err>".into());
1335                format!("{}.{}={:?}", e.ifd(), tag_name, value_str)
1336            })
1337            .collect();
1338        entries.sort();
1339        let snapshot = entries.join("\n");
1340
1341        // Sanity: should produce non-trivial content. Exact content is checked by
1342        // the existing parse_media tests; this one guards against accidental
1343        // re-ordering / dedup changes during the refactor.
1344        assert!(
1345            entries.len() > 5,
1346            "expected >5 entries, got {}",
1347            entries.len()
1348        );
1349        assert!(snapshot.contains("Make"), "expected Make tag in snapshot");
1350    }
1351
1352    #[test]
1353    fn parse_exif_from_bytes_jpg_basic() {
1354        let mut parser = MediaParser::new();
1355        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1356        let ms = MediaSource::from_bytes(raw).unwrap();
1357        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1358        let exif: crate::Exif = iter.into();
1359        assert!(exif.get(crate::ExifTag::Make).is_some());
1360    }
1361
1362    #[test]
1363    fn parse_exif_from_bytes_heic_basic() {
1364        let mut parser = MediaParser::new();
1365        let raw = std::fs::read("testdata/exif.heic").unwrap();
1366        let ms = MediaSource::from_bytes(raw).unwrap();
1367        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1368        let exif: crate::Exif = iter.into();
1369        assert_eq!(
1370            exif.get(crate::ExifTag::Make).and_then(|v| v.as_str()),
1371            Some("Apple")
1372        );
1373    }
1374
1375    #[test]
1376    fn parse_exif_from_bytes_zero_copy_shared_bytes() {
1377        // Build a Bytes whose pointer we can compare. The ExifIter's underlying
1378        // share must point to the same allocation — proving Bytes::clone path.
1379        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1380        let bytes = bytes::Bytes::from(raw);
1381        let original_ptr = bytes.as_ptr();
1382
1383        let mut parser = MediaParser::new();
1384        let ms = MediaSource::from_bytes(bytes).unwrap();
1385        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1386
1387        // The cached pointer in parser state should be None in memory mode
1388        // (memory mode does not write to cache — the user owns the alloc).
1389        assert!(
1390            parser.state.cached_ptr_for_test().is_none(),
1391            "memory mode must not poison the recycle cache"
1392        );
1393
1394        // Drop the iter and confirm parser is clean for the next call.
1395        drop(iter);
1396
1397        // Build again; pointer identity proves we did not duplicate the alloc
1398        // anywhere along the parse path.
1399        let bytes2 = bytes::Bytes::from(std::fs::read("testdata/exif.jpg").unwrap());
1400        let ms2 = MediaSource::from_bytes(bytes2.clone()).unwrap();
1401        let _iter2 = parser.parse_exif_from_bytes(ms2).unwrap();
1402        // (We cannot assert pointer-equality across distinct user Bytes; the
1403        // assertion above on the first parse is the load-bearing one.)
1404        let _ = original_ptr; // explicit: original_ptr is the assertion target.
1405    }
1406
1407    #[test]
1408    fn parse_exif_from_bytes_on_track_returns_exif_not_found() {
1409        let mut parser = MediaParser::new();
1410        let raw = std::fs::read("testdata/meta.mov").unwrap();
1411        let ms = MediaSource::from_bytes(raw).unwrap();
1412        let res = parser.parse_exif_from_bytes(ms);
1413        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1414    }
1415
1416    #[test]
1417    fn parse_exif_from_bytes_on_truncated_returns_io_error() {
1418        // Truncate exif.jpg to just enough for mime detection but too short
1419        // for the full EXIF block. Memory-mode fill_buf must surface
1420        // UnexpectedEof when the parser walks off the end.
1421        let mut raw = std::fs::read("testdata/exif.jpg").unwrap();
1422        raw.truncate(200);
1423        let mut parser = MediaParser::new();
1424        let ms = MediaSource::from_bytes(raw).unwrap();
1425        let res = parser.parse_exif_from_bytes(ms);
1426        assert!(
1427            res.is_err(),
1428            "expected error on truncated bytes, got {:?}",
1429            res
1430        );
1431    }
1432
1433    #[test]
1434    fn parse_track_from_bytes_mov_basic() {
1435        let mut parser = MediaParser::new();
1436        let raw = std::fs::read("testdata/meta.mov").unwrap();
1437        let ms = MediaSource::from_bytes(raw).unwrap();
1438        let info = parser.parse_track_from_bytes(ms).unwrap();
1439        assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
1440        assert_eq!(
1441            info.get(crate::TrackInfoTag::Model),
1442            Some(&"iPhone X".into())
1443        );
1444    }
1445
1446    #[test]
1447    fn parse_track_from_bytes_mp4_basic() {
1448        let mut parser = MediaParser::new();
1449        let raw = std::fs::read("testdata/meta.mp4").unwrap();
1450        let ms = MediaSource::from_bytes(raw).unwrap();
1451        let info = parser.parse_track_from_bytes(ms).unwrap();
1452        assert!(info.get(crate::TrackInfoTag::CreateDate).is_some());
1453    }
1454
1455    #[test]
1456    fn parse_track_from_bytes_mkv_basic() {
1457        let mut parser = MediaParser::new();
1458        let raw = std::fs::read("testdata/mkv_640x360.mkv").unwrap();
1459        let ms = MediaSource::from_bytes(raw).unwrap();
1460        let info = parser.parse_track_from_bytes(ms).unwrap();
1461        assert_eq!(
1462            info.get(crate::TrackInfoTag::Width),
1463            Some(&(640_u32.into()))
1464        );
1465    }
1466
1467    #[test]
1468    fn parse_track_from_bytes_on_image_returns_track_not_found() {
1469        let mut parser = MediaParser::new();
1470        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1471        let ms = MediaSource::from_bytes(raw).unwrap();
1472        let res = parser.parse_track_from_bytes(ms);
1473        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1474    }
1475}