Skip to main content

nom_exif/
parser.rs

1use std::{
2    cmp::{max, min},
3    fmt::{Debug, Display},
4    fs::File,
5    io::{self, Read, Seek},
6    path::Path,
7};
8
9use crate::{
10    error::{ParsedError, ParsingError, ParsingErrorState},
11    exif::TiffHeader,
12    file::MediaMime,
13    ExifIter, TrackInfo,
14};
15
16/// A function that tries to skip `n` bytes of `reader` by seeking. Returns
17/// `Ok(true)` on success, `Ok(false)` if the reader does not support seek
18/// (so the caller should fall back to reading-and-discarding), or
19/// `Err(io::Error)` if seek itself failed (e.g. truncated file handle).
20///
21/// This is captured at construction time by `MediaSource::seekable` /
22/// `unseekable`, replacing the v2 `S: Skip<R>` phantom parameter with a
23/// runtime fn pointer.
24pub(crate) type SkipBySeekFn<R> = fn(&mut R, u64) -> io::Result<bool>;
25
26/// `MediaSource` represents a media data source that can be parsed by
27/// [`MediaParser`].
28///
29/// - Use [`MediaSource::open`] to create a MediaSource from a file path.
30///
31/// - Use [`MediaSource::from_bytes`] for zero-copy in-memory input
32///   (`Vec<u8>`, `&'static [u8]`, [`bytes::Bytes`], …). Pair with
33///   [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`].
34///
35/// - In other cases:
36///
37///   - Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
38///     (an already-open `File` goes here).
39///
40///   - Use [`MediaSource::unseekable`] to create a MediaSource from a
41///     reader that only impl `Read`
42///
43/// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
44/// since the former is more efficient when the parser needs to skip a large number of bytes.
45///
46/// Passing in a `BufRead` should be avoided because [`MediaParser`] comes with
47/// its own buffer management and the buffers can be shared between multiple
48/// parsing tasks, thus avoiding frequent memory allocations.
49pub struct MediaSource<R> {
50    pub(crate) reader: R,
51    pub(crate) buf: Vec<u8>,
52    pub(crate) mime: MediaMime,
53    pub(crate) skip_by_seek: SkipBySeekFn<R>,
54    /// P7: zero-copy memory-mode payload. `Some` only when the source was
55    /// built via [`MediaSource::<()>::from_bytes`]; `reader`, `buf`, and
56    /// `skip_by_seek` are placeholders (and never consulted) in that mode.
57    pub(crate) memory: Option<bytes::Bytes>,
58}
59
60/// Top-level classification of a media source.
61///
62/// `Image` files carry EXIF metadata (parse with `MediaParser::parse_exif`);
63/// `Track` files are time-based containers — video, audio, or both — and
64/// carry track-info metadata (parse with `MediaParser::parse_track`). Pure
65/// audio containers like `.mka` are classified as `Track`.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum MediaKind {
68    Image,
69    Track,
70}
71
72impl<R> Debug for MediaSource<R> {
73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74        f.debug_struct("MediaSource")
75            .field("mime", &self.mime)
76            .finish_non_exhaustive()
77    }
78}
79
80// Should be enough for parsing header
81const HEADER_PARSE_BUF_SIZE: usize = 128;
82
83impl<R> MediaSource<R> {
84    /// Top-level classification of this media source.
85    pub fn kind(&self) -> MediaKind {
86        match self.mime {
87            MediaMime::Image(_) => MediaKind::Image,
88            MediaMime::Track(_) => MediaKind::Track,
89        }
90    }
91}
92
93impl<R: Read> MediaSource<R> {
94    fn build(mut reader: R, skip_by_seek: SkipBySeekFn<R>) -> crate::Result<Self> {
95        let mut buf = Vec::with_capacity(HEADER_PARSE_BUF_SIZE);
96        reader
97            .by_ref()
98            .take(HEADER_PARSE_BUF_SIZE as u64)
99            .read_to_end(&mut buf)?;
100        let mime: MediaMime = buf.as_slice().try_into()?;
101        Ok(Self {
102            reader,
103            buf,
104            mime,
105            skip_by_seek,
106            memory: None,
107        })
108    }
109
110    /// Use [`MediaSource::unseekable`] to create a MediaSource from a
111    /// reader that only impl `Read`
112    ///
113    /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
114    /// since the former is more efficient when the parser needs to skip a large number of bytes.
115    pub fn unseekable(reader: R) -> crate::Result<Self> {
116        Self::build(reader, |_, _| Ok(false))
117    }
118}
119
120impl<R: Read + Seek> MediaSource<R> {
121    /// Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
122    ///
123    /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
124    /// since the former is more efficient when the parser needs to skip a large number of bytes.
125    pub fn seekable(reader: R) -> crate::Result<Self> {
126        Self::build(reader, |r, n| {
127            let signed: i64 = n
128                .try_into()
129                .map_err(|_| io::Error::from(io::ErrorKind::InvalidInput))?;
130            r.seek_relative(signed)?;
131            Ok(true)
132        })
133    }
134}
135
136impl MediaSource<File> {
137    /// Open a file at `path` and parse its header to detect the media format.
138    ///
139    /// This is the v3-preferred entry point for the common case of "I have a
140    /// path on disk". For an already-open `File` use [`Self::seekable`].
141    pub fn open<P: AsRef<Path>>(path: P) -> crate::Result<Self> {
142        Self::seekable(File::open(path)?)
143    }
144}
145
146impl MediaSource<()> {
147    /// Build a [`MediaSource`] from an in-memory byte payload.
148    ///
149    /// Accepts any type convertible into [`bytes::Bytes`] — `Bytes`,
150    /// `Vec<u8>`, `&'static [u8]`, [`bytes::Bytes::from_owner`] outputs, and
151    /// HTTP-stack body types that implement `Into<Bytes>` directly.
152    ///
153    /// The header (first up to 128 bytes) is sniffed for media kind, the
154    /// same way [`MediaSource::open`] does it for files. The full payload is
155    /// stored zero-copy: subsequent parsing through
156    /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
157    /// shares this `Bytes` directly with the returned `ExifIter` / sub-IFDs
158    /// via reference counting.
159    ///
160    /// The returned source is parsed by the dedicated
161    /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
162    /// methods. The streaming `parse_exif` / `parse_track` methods do not
163    /// accept `MediaSource<()>` (their `R: Read` bound is unsatisfiable).
164    ///
165    /// # Example
166    ///
167    /// ```rust
168    /// use nom_exif::{MediaSource, MediaParser, MediaKind};
169    ///
170    /// let bytes = std::fs::read("./testdata/exif.jpg")?;
171    /// let ms = MediaSource::from_bytes(bytes)?;
172    /// assert_eq!(ms.kind(), MediaKind::Image);
173    ///
174    /// let mut parser = MediaParser::new();
175    /// let _iter = parser.parse_exif_from_bytes(ms)?;
176    /// # Ok::<(), nom_exif::Error>(())
177    /// ```
178    pub fn from_bytes(bytes: impl Into<bytes::Bytes>) -> crate::Result<Self> {
179        let bytes = bytes.into();
180        let head_end = bytes.len().min(HEADER_PARSE_BUF_SIZE);
181        let mime: MediaMime = bytes[..head_end].try_into()?;
182        Ok(Self {
183            reader: (),
184            buf: Vec::new(),
185            mime,
186            // Placeholder: never invoked in memory mode (clear_and_skip's
187            // AdvanceOnly path is the only one taken).
188            skip_by_seek: |_, _| Ok(false),
189            memory: Some(bytes),
190        })
191    }
192}
193
194// ----- Parse-time buffer policy -----
195//
196// Layered by lifecycle:
197//
198// - `INIT_BUF_SIZE` — first fill into the parse loop and the initial
199//   `Vec::with_capacity` for fresh allocations. Modest so cold one-shot
200//   helpers don't over-commit.
201// - `MIN_GROW_SIZE` — floor for every subsequent fill once we're in deep
202//   parse. Larger than `INIT_BUF_SIZE` to amortize syscalls / async
203//   blocking-pool dispatches.
204// - `MAX_PARSE_BUF_SIZE` — hard cap on cumulative buffer growth during a
205//   single parse. Anything that would push past this is rejected as
206//   `io::ErrorKind::Unsupported`; defense against crafted box/IFD headers
207//   that declare absurd sizes.
208// - `MAX_REUSE_BUF_SIZE` — soft cap on the buffer kept between parses for
209//   recycling. After a parse whose buffer ended above this, `shrink_to`
210//   gives the excess back to the allocator. Tuned for typical metadata
211//   sizes (HEIC Live Photo / large CR3 / IIQ all fit under 4 MB) so the
212//   recycle path stays warm for batch workloads.
213pub(crate) const INIT_BUF_SIZE: usize = 8 * 1024;
214pub(crate) const MIN_GROW_SIZE: usize = 16 * 1024;
215pub(crate) const MAX_PARSE_BUF_SIZE: usize = 1024 * 1024 * 1024;
216const MAX_REUSE_BUF_SIZE: usize = 4 * 1024 * 1024;
217
218pub(crate) trait Buf {
219    fn buffer(&self) -> &[u8];
220    fn clear(&mut self);
221
222    fn set_position(&mut self, pos: usize);
223    #[allow(unused)]
224    fn position(&self) -> usize;
225}
226
227/// Buffer-management state used by `MediaParser` (sync and async paths share it).
228///
229/// Holds at most one *active* `Vec<u8>` (being filled by the current parse) and
230/// one *cached* `Bytes` clone of the most recently shared buffer. When the
231/// next parse starts, the cache is consulted: if `Bytes::try_into_mut`
232/// succeeds the underlying allocation is reused (the previous `ExifIter`
233/// has been dropped); otherwise the clone is discarded and a fresh
234/// `Vec<u8>` is allocated.
235///
236/// This replaces the v2 multi-slot `Buffers` pool — `MediaParser` methods
237/// are `&mut self`, so a single slot is sufficient.
238#[derive(Debug, Default)]
239pub(crate) struct BufferedParserState {
240    cached: Option<bytes::Bytes>,
241    buf: Option<Vec<u8>>,
242    /// P7: memory-mode storage. When `Some`, the parser is feeding from a
243    /// caller-owned `Bytes` instead of streaming via a reader. `buf` and
244    /// `cached` are unused in this mode — the user owns the allocation,
245    /// so there is nothing to recycle.
246    memory: Option<bytes::Bytes>,
247    position: usize,
248}
249
250impl BufferedParserState {
251    pub(crate) fn new() -> Self {
252        Self::default()
253    }
254
255    pub(crate) fn reset(&mut self) {
256        // If a parse failed mid-way the buf may still be present; drop it.
257        // Cache stays — recycle on next acquire if eligible.
258        self.buf = None;
259        self.memory = None;
260        self.position = 0;
261    }
262
263    /// Switch the parser state into memory mode, owning `bytes` directly.
264    /// Caller must have already called `reset()` (asserted in debug). Subsequent
265    /// `share_buf` returns a clone of `bytes` (zero-copy: `Bytes::clone` is a
266    /// refcount bump). Subsequent `Buf::buffer()` returns `&bytes[position..]`.
267    pub(crate) fn set_memory(&mut self, bytes: bytes::Bytes) {
268        debug_assert!(
269            self.buf.is_none() && self.memory.is_none(),
270            "set_memory called on non-clean state"
271        );
272        self.memory = Some(bytes);
273        self.position = 0;
274    }
275
276    pub(crate) fn is_memory_mode(&self) -> bool {
277        self.memory.is_some()
278    }
279
280    pub(crate) fn acquire_buf(&mut self) {
281        if self.memory.is_some() {
282            // Memory mode: nothing to acquire — `buffer()` reads from `memory`.
283            return;
284        }
285        debug_assert!(self.buf.is_none());
286        let buf = match self.cached.take() {
287            Some(b) => match b.try_into_mut() {
288                Ok(bm) => {
289                    let mut v = Vec::<u8>::from(bm);
290                    v.clear();
291                    if v.capacity() > MAX_REUSE_BUF_SIZE {
292                        v.shrink_to(MAX_REUSE_BUF_SIZE);
293                    }
294                    v
295                }
296                Err(_still_shared) => Vec::with_capacity(INIT_BUF_SIZE),
297            },
298            None => Vec::with_capacity(INIT_BUF_SIZE),
299        };
300        self.buf = Some(buf);
301    }
302
303    pub(crate) fn buf(&self) -> &Vec<u8> {
304        self.buf.as_ref().expect("no buf here")
305    }
306
307    pub(crate) fn buf_mut(&mut self) -> &mut Vec<u8> {
308        self.buf.as_mut().expect("no buf here")
309    }
310
311    #[cfg(test)]
312    pub(crate) fn cached_ptr_for_test(&self) -> Option<*const u8> {
313        self.cached.as_ref().map(|b| b.as_ptr())
314    }
315
316    #[cfg(test)]
317    pub(crate) fn buf_is_none_for_test(&self) -> bool {
318        self.buf.is_none()
319    }
320}
321
322impl Buf for BufferedParserState {
323    fn buffer(&self) -> &[u8] {
324        if let Some(m) = &self.memory {
325            return &m[self.position..];
326        }
327        &self.buf()[self.position..]
328    }
329    fn clear(&mut self) {
330        // In memory mode `clear` is a no-op: there is no scratch buffer to
331        // truncate, and the caller's bytes must remain available for further
332        // parse_loop_step iterations. clear_and_skip's AdvanceOnly path is
333        // what advances `position` in memory mode.
334        if self.memory.is_some() {
335            return;
336        }
337        self.buf_mut().clear();
338    }
339    fn set_position(&mut self, pos: usize) {
340        self.position = pos;
341    }
342    fn position(&self) -> usize {
343        self.position
344    }
345}
346
347impl ShareBuf for BufferedParserState {
348    fn share_buf(&mut self) -> (bytes::Bytes, usize) {
349        if let Some(m) = self.memory.take() {
350            // Zero-copy share: caller already owns the allocation. No cache
351            // write — recycle is irrelevant when the user holds the alloc.
352            let position = self.position;
353            return (m, position);
354        }
355        let vec = self.buf.take().expect("no buf to share");
356        let bytes = bytes::Bytes::from(vec);
357        let position = self.position;
358        self.cached = Some(bytes.clone());
359        (bytes, position)
360    }
361}
362
363/// What `clear_and_skip` should do, given the current buffer state and
364/// the requested skip count.
365pub(crate) enum SkipPlan {
366    /// Skip is fully within the current buffer; just advance position.
367    AdvanceOnly,
368    /// Buffer must be cleared and `extra` bytes skipped from the reader.
369    ClearAndSkip { extra: usize },
370}
371
372pub(crate) fn clear_and_skip_decide(buffer_len: usize, n: usize) -> SkipPlan {
373    if n <= buffer_len {
374        SkipPlan::AdvanceOnly
375    } else {
376        SkipPlan::ClearAndSkip {
377            extra: n - buffer_len,
378        }
379    }
380}
381
382pub(crate) fn check_fill_size(existing_len: usize, requested: usize) -> io::Result<()> {
383    if requested.saturating_add(existing_len) > MAX_PARSE_BUF_SIZE {
384        tracing::error!(?requested, "the requested buffer size is too big");
385        return Err(io::ErrorKind::Unsupported.into());
386    }
387    Ok(())
388}
389
390pub(crate) enum LoopAction<O> {
391    /// Parse succeeded; return this value to the caller.
392    Done(O),
393    /// Need more bytes — call `fill_buf(reader, n)` then re-step.
394    NeedFill(usize),
395    /// Need to skip bytes — call `clear_and_skip(reader, n)` then re-step.
396    Skip(usize),
397    /// Parse failed permanently.
398    Failed(String),
399}
400
401/// Closure type passed to [`parse_loop_step`].
402pub(crate) type ParseFn<'a, O> =
403    dyn FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState> + 'a;
404
405/// Drives one iteration of the parse-loop algorithm. Pure (no I/O).
406pub(crate) fn parse_loop_step<O>(
407    buffer: &[u8],
408    offset: usize,
409    parsing_state: &mut Option<ParsingState>,
410    parse: &mut ParseFn<'_, O>,
411) -> LoopAction<O> {
412    match parse(buffer, offset, parsing_state.take()) {
413        Ok(o) => LoopAction::Done(o),
414        Err(es) => {
415            *parsing_state = es.state;
416            match es.err {
417                ParsingError::Need(n) => LoopAction::NeedFill(n),
418                ParsingError::ClearAndSkip(n) => LoopAction::Skip(n),
419                ParsingError::Failed(s) => LoopAction::Failed(s),
420            }
421        }
422    }
423}
424
425#[derive(Debug, Clone)]
426pub(crate) enum ParsingState {
427    TiffHeader(TiffHeader),
428    HeifExifSize(usize),
429    Cr3ExifSize(usize),
430}
431
432impl Display for ParsingState {
433    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
434        match self {
435            ParsingState::TiffHeader(h) => Display::fmt(&format!("ParsingState: {h:?})"), f),
436            ParsingState::HeifExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
437            ParsingState::Cr3ExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
438        }
439    }
440}
441
442// Modern replacement for the `Load` trait in loader.rs. Adds offset-aware
443// parsing and `ParsingState` threading for format-specific state machines.
444pub(crate) trait BufParser: Buf + Debug {
445    fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize>;
446
447    fn load_and_parse<R: Read, P, O>(
448        &mut self,
449        reader: &mut R,
450        skip_by_seek: SkipBySeekFn<R>,
451        mut parse: P,
452    ) -> Result<O, ParsedError>
453    where
454        P: FnMut(&[u8], Option<ParsingState>) -> Result<O, ParsingErrorState>,
455    {
456        self.load_and_parse_with_offset(
457            reader,
458            skip_by_seek,
459            |data, _, state| parse(data, state),
460            0,
461        )
462    }
463
464    #[tracing::instrument(skip_all)]
465    fn load_and_parse_with_offset<R: Read, P, O>(
466        &mut self,
467        reader: &mut R,
468        skip_by_seek: SkipBySeekFn<R>,
469        mut parse: P,
470        offset: usize,
471    ) -> Result<O, ParsedError>
472    where
473        P: FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState>,
474    {
475        if offset >= self.buffer().len() {
476            self.fill_buf(reader, MIN_GROW_SIZE)?;
477        }
478        let mut parsing_state: Option<ParsingState> = None;
479        loop {
480            match parse_loop_step(self.buffer(), offset, &mut parsing_state, &mut parse) {
481                LoopAction::Done(o) => return Ok(o),
482                LoopAction::NeedFill(needed) => {
483                    let to_read = max(needed, MIN_GROW_SIZE);
484                    let n = self.fill_buf(reader, to_read)?;
485                    if n == 0 {
486                        return Err(ParsedError::NoEnoughBytes);
487                    }
488                }
489                LoopAction::Skip(n) => {
490                    self.clear_and_skip(reader, skip_by_seek, n)?;
491                }
492                LoopAction::Failed(s) => return Err(ParsedError::Failed(s)),
493            }
494        }
495    }
496
497    #[tracing::instrument(skip(reader, skip_by_seek))]
498    fn clear_and_skip<R: Read>(
499        &mut self,
500        reader: &mut R,
501        skip_by_seek: SkipBySeekFn<R>,
502        n: usize,
503    ) -> Result<(), ParsedError> {
504        match clear_and_skip_decide(self.buffer().len(), n) {
505            SkipPlan::AdvanceOnly => {
506                self.set_position(self.position() + n);
507                Ok(())
508            }
509            SkipPlan::ClearAndSkip { extra: skip_n } => {
510                self.clear();
511                let done = (skip_by_seek)(
512                    reader,
513                    skip_n
514                        .try_into()
515                        .map_err(|_| ParsedError::Failed("skip too many bytes".into()))?,
516                )?;
517                if !done {
518                    let mut skipped = 0;
519                    while skipped < skip_n {
520                        let mut to_skip = skip_n - skipped;
521                        to_skip = min(to_skip, MAX_PARSE_BUF_SIZE);
522                        let n = self.fill_buf(reader, to_skip)?;
523                        skipped += n;
524                        if skipped <= skip_n {
525                            self.clear();
526                        } else {
527                            let remain = skipped - skip_n;
528                            self.set_position(self.buffer().len() - remain);
529                            break;
530                        }
531                    }
532                }
533
534                if self.buffer().is_empty() {
535                    self.fill_buf(reader, MIN_GROW_SIZE)?;
536                }
537                Ok(())
538            }
539        }
540    }
541}
542
543impl BufParser for MediaParser {
544    #[tracing::instrument(skip(self, reader), fields(buf_len=self.state.buffer().len()))]
545    fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize> {
546        if self.state.is_memory_mode() {
547            // Memory mode owns every byte it will ever have. A request for
548            // more is "the parser walked off the end of the input"; surface
549            // it the same way the streaming path surfaces a 0-byte read.
550            return Err(std::io::ErrorKind::UnexpectedEof.into());
551        }
552        check_fill_size(self.state.buf().len(), size)?;
553
554        // Do not pre-allocate `size` bytes: a crafted box header can declare a
555        // huge extended size (up to MAX_PARSE_BUF_SIZE) that far exceeds the actual
556        // stream length. reserve_exact would allocate that memory immediately
557        // even when the reader has only a few bytes left. read_to_end grows the
558        // buffer from the reader's actual size hint instead.
559        let n = reader.take(size as u64).read_to_end(self.state.buf_mut())?;
560        if n == 0 {
561            tracing::error!(buf_len = self.state.buf().len(), "fill_buf: EOF");
562            return Err(std::io::ErrorKind::UnexpectedEof.into());
563        }
564
565        tracing::debug!(
566            ?size,
567            ?n,
568            buf_len = self.state.buf().len(),
569            "fill_buf: read bytes"
570        );
571
572        Ok(n)
573    }
574}
575
576impl Buf for MediaParser {
577    fn buffer(&self) -> &[u8] {
578        self.state.buffer()
579    }
580
581    fn clear(&mut self) {
582        self.state.clear();
583    }
584
585    fn set_position(&mut self, pos: usize) {
586        self.state.set_position(pos);
587    }
588
589    fn position(&self) -> usize {
590        self.state.position()
591    }
592}
593
594/// A `MediaParser` can parse media info from a [`MediaSource`].
595///
596/// `MediaParser` manages inner parse buffers that can be shared between
597/// multiple parsing tasks, thus avoiding frequent memory allocations.
598///
599/// Therefore:
600///
601/// - Try to reuse a `MediaParser` instead of creating a new one every time
602///   you need it.
603///
604/// - `MediaSource` should be created directly from `Read`, not from `BufRead`.
605///
606/// ## Example
607///
608/// ```rust
609/// use nom_exif::*;
610/// use chrono::DateTime;
611///
612/// let mut parser = MediaParser::new();
613///
614/// // ------------------- Parse Exif Info
615/// let ms = MediaSource::open("./testdata/exif.heic").unwrap();
616/// assert_eq!(ms.kind(), MediaKind::Image);
617/// let mut iter = parser.parse_exif(ms).unwrap();
618///
619/// let entry = iter.next().unwrap();
620/// assert!(matches!(entry.tag(), nom_exif::TagOrCode::Tag(ExifTag::Make)));
621/// assert_eq!(entry.value().unwrap().as_str().unwrap(), "Apple");
622///
623/// // Convert `ExifIter` into an `Exif`. Clone it before converting, so that
624/// // we can start the iteration from the beginning.
625/// let exif: Exif = iter.clone().into();
626/// assert_eq!(exif.get(ExifTag::Make).unwrap().as_str().unwrap(), "Apple");
627///
628/// // ------------------- Parse Track Info
629/// let ms = MediaSource::open("./testdata/meta.mov").unwrap();
630/// assert_eq!(ms.kind(), MediaKind::Track);
631/// let info = parser.parse_track(ms).unwrap();
632///
633/// assert_eq!(info.get(TrackInfoTag::Make), Some(&"Apple".into()));
634/// assert_eq!(info.get(TrackInfoTag::Model), Some(&"iPhone X".into()));
635/// assert_eq!(info.get(TrackInfoTag::GpsIso6709), Some(&"+27.1281+100.2508+000.000/".into()));
636/// assert_eq!(info.gps_info().unwrap().latitude_ref, LatRef::North);
637/// assert_eq!(
638///     info.gps_info().unwrap().latitude,
639///     LatLng::new(URational::new(27, 1), URational::new(7, 1), URational::new(4116, 100)),
640/// );
641/// ```
642pub struct MediaParser {
643    state: BufferedParserState,
644}
645
646impl Debug for MediaParser {
647    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
648        f.debug_struct("MediaParser")
649            .field("state", &self.state)
650            .finish_non_exhaustive()
651    }
652}
653
654impl Default for MediaParser {
655    fn default() -> Self {
656        Self {
657            state: BufferedParserState::new(),
658        }
659    }
660}
661
662pub(crate) trait ShareBuf {
663    /// Take ownership of the parser's active buffer and return the full
664    /// allocation as `Bytes` plus the parser's `position` at share-time.
665    /// Caller is responsible for slicing: a parse-loop range `r` corresponds
666    /// to absolute range `(r.start + position)..(r.end + position)`.
667    fn share_buf(&mut self) -> (bytes::Bytes, usize);
668}
669
670impl ShareBuf for MediaParser {
671    fn share_buf(&mut self) -> (bytes::Bytes, usize) {
672        self.state.share_buf()
673    }
674}
675
676impl MediaParser {
677    pub fn new() -> Self {
678        Self::default()
679    }
680
681    /// Parse Exif metadata from an image source. Returns `Error::ExifNotFound`
682    /// if the source is a `Track` (use [`Self::parse_track`] instead).
683    ///
684    /// `MediaParser` reuses its internal parse buffer across calls, so prefer
685    /// reusing a single `MediaParser` over creating a new one per file. Drop
686    /// the returned [`ExifIter`] (or convert it into [`crate::Exif`]) before
687    /// the next `parse_*` call so the buffer can be reclaimed.
688    pub fn parse_exif<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<ExifIter> {
689        self.reset();
690        self.acquire_buf();
691        self.buf_mut().append(&mut ms.buf);
692        let res: crate::Result<ExifIter> = (|| {
693            self.fill_buf(&mut ms.reader, INIT_BUF_SIZE)?;
694            if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
695                return Err(crate::Error::ExifNotFound);
696            }
697            crate::exif::parse_exif_iter(
698                self,
699                ms.mime.unwrap_image(),
700                &mut ms.reader,
701                ms.skip_by_seek,
702            )
703        })();
704        self.reset();
705        res
706    }
707
708    /// Parse track info from a video/audio source. Returns `Error::TrackNotFound`
709    /// if the source is an `Image` (use [`Self::parse_exif`] instead).
710    pub fn parse_track<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<TrackInfo> {
711        self.reset();
712        self.acquire_buf();
713        self.buf_mut().append(&mut ms.buf);
714        let res: crate::Result<TrackInfo> = (|| {
715            self.fill_buf(&mut ms.reader, INIT_BUF_SIZE)?;
716            let mime_track = match ms.mime {
717                crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
718                crate::file::MediaMime::Track(t) => t,
719            };
720            let skip = ms.skip_by_seek;
721            let out = self.load_and_parse(ms.reader.by_ref(), skip, |data, _| {
722                crate::video::parse_track_info(data, mime_track)
723                    .map_err(|e| ParsingErrorState::new(e, None))
724            })?;
725            Ok(out)
726        })();
727        self.reset();
728        res
729    }
730
731    /// Parse Exif metadata from an in-memory byte payload built via
732    /// [`MediaSource::<()>::from_bytes`]. Returns `Error::ExifNotFound` if the
733    /// payload is a `Track` (use [`Self::parse_track_from_bytes`] instead).
734    ///
735    /// Memory-mode parsing is **zero-copy**: the underlying `Bytes` is shared
736    /// with the returned [`ExifIter`] (and its sub-IFDs / CR3 CMT blocks) via
737    /// reference counting. No `Vec<u8>` is allocated for the parse buffer.
738    pub fn parse_exif_from_bytes(&mut self, mut ms: MediaSource<()>) -> crate::Result<ExifIter> {
739        self.reset();
740        let memory = ms
741            .memory
742            .take()
743            .expect("MediaSource<()> must have memory (only constructor is from_bytes)");
744        self.state.set_memory(memory);
745        let res: crate::Result<ExifIter> = (|| {
746            if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
747                return Err(crate::Error::ExifNotFound);
748            }
749            // Placeholder reader: never read from in memory mode (fill_buf
750            // short-circuits; clear_and_skip uses AdvanceOnly).
751            let mut empty = std::io::empty();
752            crate::exif::parse_exif_iter(
753                self,
754                ms.mime.unwrap_image(),
755                &mut empty,
756                // Placeholder skip-by-seek: never invoked.
757                |_, _| Ok(false),
758            )
759        })();
760        self.reset();
761        res
762    }
763
764    /// Parse track info from an in-memory video/audio byte payload built via
765    /// [`MediaSource::<()>::from_bytes`]. Returns `Error::TrackNotFound` if the
766    /// payload is an `Image` (use [`Self::parse_exif_from_bytes`] instead).
767    ///
768    /// Like [`Self::parse_exif_from_bytes`], the parse is zero-copy with respect to
769    /// the user-supplied `Bytes`.
770    pub fn parse_track_from_bytes(&mut self, mut ms: MediaSource<()>) -> crate::Result<TrackInfo> {
771        self.reset();
772        let memory = ms
773            .memory
774            .take()
775            .expect("MediaSource<()> must have memory (only constructor is from_bytes)");
776        self.state.set_memory(memory);
777        let res: crate::Result<TrackInfo> = (|| {
778            let mime_track = match ms.mime {
779                crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
780                crate::file::MediaMime::Track(t) => t,
781            };
782            let mut empty = std::io::empty();
783            let out = self.load_and_parse(
784                &mut empty,
785                |_, _| Ok(false),
786                |data, _| {
787                    crate::video::parse_track_info(data, mime_track)
788                        .map_err(|e| ParsingErrorState::new(e, None))
789                },
790            )?;
791            Ok(out)
792        })();
793        self.reset();
794        res
795    }
796
797    fn reset(&mut self) {
798        self.state.reset();
799    }
800
801    fn buf_mut(&mut self) -> &mut Vec<u8> {
802        self.state.buf_mut()
803    }
804
805    fn acquire_buf(&mut self) {
806        self.state.acquire_buf();
807    }
808}
809
810#[cfg(feature = "tokio")]
811mod tokio_impl {
812    use super::*;
813    use crate::error::ParsingErrorState;
814    use crate::parser_async::{AsyncBufParser, AsyncMediaSource};
815    use tokio::io::{AsyncRead, AsyncReadExt};
816
817    impl AsyncBufParser for MediaParser {
818        async fn fill_buf<R: AsyncRead + Unpin>(
819            &mut self,
820            reader: &mut R,
821            size: usize,
822        ) -> std::io::Result<usize> {
823            if self.state.is_memory_mode() {
824                // Memory mode owns every byte it will ever have. Surface
825                // "walked off end of input" the same way the streaming path
826                // surfaces a 0-byte read.
827                return Err(std::io::ErrorKind::UnexpectedEof.into());
828            }
829            check_fill_size(self.state.buf().len(), size)?;
830            // Same rationale as the sync version: do not pre-allocate `size` bytes.
831            let n = reader
832                .take(size as u64)
833                .read_to_end(self.state.buf_mut())
834                .await?;
835            if n == 0 {
836                return Err(std::io::ErrorKind::UnexpectedEof.into());
837            }
838            Ok(n)
839        }
840    }
841
842    impl MediaParser {
843        /// Parse Exif metadata from an async image source. Returns
844        /// `Error::ExifNotFound` if the source is a `Track`.
845        pub async fn parse_exif_async<R: AsyncRead + Unpin + Send>(
846            &mut self,
847            mut ms: AsyncMediaSource<R>,
848        ) -> crate::Result<ExifIter> {
849            self.reset();
850            self.acquire_buf();
851            self.buf_mut().append(&mut ms.buf);
852            let res: crate::Result<ExifIter> = async {
853                <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE).await?;
854                if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
855                    return Err(crate::Error::ExifNotFound);
856                }
857                crate::exif::parse_exif_iter_async(
858                    self,
859                    ms.mime.unwrap_image(),
860                    &mut ms.reader,
861                    ms.skip_by_seek,
862                )
863                .await
864            }
865            .await;
866            self.reset();
867            res
868        }
869
870        /// Parse track info from an async video/audio source. Returns
871        /// `Error::TrackNotFound` if the source is an `Image`.
872        pub async fn parse_track_async<R: AsyncRead + Unpin + Send>(
873            &mut self,
874            mut ms: AsyncMediaSource<R>,
875        ) -> crate::Result<TrackInfo> {
876            self.reset();
877            self.acquire_buf();
878            self.buf_mut().append(&mut ms.buf);
879            let res: crate::Result<TrackInfo> = async {
880                <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE).await?;
881                let mime_track = match ms.mime {
882                    crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
883                    crate::file::MediaMime::Track(t) => t,
884                };
885                let skip = ms.skip_by_seek;
886                let out = <Self as AsyncBufParser>::load_and_parse(
887                    self,
888                    &mut ms.reader,
889                    skip,
890                    |data, _| {
891                        crate::video::parse_track_info(data, mime_track)
892                            .map_err(|e| ParsingErrorState::new(e, None))
893                    },
894                )
895                .await?;
896                Ok(out)
897            }
898            .await;
899            self.reset();
900            res
901        }
902    }
903}
904
905#[cfg(test)]
906mod tests {
907    use std::sync::{LazyLock, Mutex, MutexGuard};
908
909    use super::*;
910    use test_case::case;
911
912    enum TrackExif {
913        Track,
914        Exif,
915        NoData,
916        Invalid,
917    }
918    use TrackExif::*;
919
920    static PARSER: LazyLock<Mutex<MediaParser>> = LazyLock::new(|| Mutex::new(MediaParser::new()));
921    fn parser() -> MutexGuard<'static, MediaParser> {
922        PARSER.lock().unwrap()
923    }
924
925    #[case("3gp_640x360.3gp", Track)]
926    #[case("broken.jpg", Exif)]
927    #[case("compatible-brands-fail.heic", Invalid)]
928    #[case("compatible-brands-fail.mov", Invalid)]
929    #[case("compatible-brands.heic", NoData)]
930    #[case("compatible-brands.mov", NoData)]
931    #[case("embedded-in-heic.mov", Track)]
932    #[case("exif.heic", Exif)]
933    #[case("exif.jpg", Exif)]
934    #[case("exif-no-tz.jpg", Exif)]
935    #[case("fujifilm_x_t1_01.raf.meta", Exif)]
936    #[case("meta.mov", Track)]
937    #[case("meta.mp4", Track)]
938    #[case("mka.mka", Track)]
939    #[case("mkv_640x360.mkv", Track)]
940    #[case("exif-one-entry.heic", Exif)]
941    #[case("no-exif.jpg", NoData)]
942    #[case("tif.tif", Exif)]
943    #[case("ramdisk.img", Invalid)]
944    #[case("webm_480.webm", Track)]
945    fn parse_media(path: &str, te: TrackExif) {
946        let mut parser = parser();
947        let ms = MediaSource::open(Path::new("testdata").join(path));
948        match te {
949            Track => {
950                let ms = ms.unwrap();
951                assert_eq!(ms.kind(), MediaKind::Track);
952                let _: TrackInfo = parser.parse_track(ms).unwrap();
953            }
954            Exif => {
955                let ms = ms.unwrap();
956                assert_eq!(ms.kind(), MediaKind::Image);
957                let mut it: ExifIter = parser.parse_exif(ms).unwrap();
958                let _ = it.parse_gps();
959
960                if path.contains("one-entry") {
961                    assert!(it.next().is_some());
962                    assert!(it.next().is_none());
963
964                    let exif: crate::Exif = it.clone_rewound().into();
965                    assert!(exif.get(ExifTag::Orientation).is_some());
966                } else {
967                    let _: crate::Exif = it.clone_rewound().into();
968                }
969            }
970            NoData => {
971                let ms = ms.unwrap();
972                match ms.kind() {
973                    MediaKind::Image => {
974                        let res = parser.parse_exif(ms);
975                        res.unwrap_err();
976                    }
977                    MediaKind::Track => {
978                        let res = parser.parse_track(ms);
979                        res.unwrap_err();
980                    }
981                }
982            }
983            Invalid => {
984                ms.unwrap_err();
985            }
986        }
987    }
988
989    use crate::testkit::open_sample;
990    use crate::{EntryValue, Exif, ExifTag, TrackInfoTag};
991    use chrono::{DateTime, FixedOffset, NaiveDateTime};
992    use test_case::test_case;
993
994    #[test_case("exif.jpg", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2023-07-09T20:36:33+08:00", "%+").unwrap().into())]
995    #[test_case("exif.heic", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2022-07-22T21:26:32+08:00", "%+").unwrap().into())]
996    #[test_case("exif.jpg", ExifTag::DateTimeOriginal, 
997        (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), 
998            Some(FixedOffset::east_opt(8*3600).unwrap())).into())]
999    #[test_case("exif-no-tz.jpg", ExifTag::DateTimeOriginal, 
1000        (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), None).into())]
1001    fn parse_exif(path: &str, tag: ExifTag, v: EntryValue) {
1002        let mut parser = parser();
1003
1004        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1005        assert_eq!(mf.kind(), MediaKind::Image);
1006        let iter: ExifIter = parser.parse_exif(mf).unwrap();
1007        let exif: Exif = iter.into();
1008        assert_eq!(exif.get(tag).unwrap(), &v);
1009
1010        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1011        assert_eq!(mf.kind(), MediaKind::Image);
1012        let iter: ExifIter = parser.parse_exif(mf).unwrap();
1013        let exif: Exif = iter.into();
1014        assert_eq!(exif.get(tag).unwrap(), &v);
1015    }
1016
1017    use crate::video::TrackInfoTag::*;
1018
1019    #[test_case("mkv_640x360.mkv", Width, 640_u32.into())]
1020    #[test_case("mkv_640x360.mkv", Height, 360_u32.into())]
1021    #[test_case("mkv_640x360.mkv", DurationMs, 13346_u64.into())]
1022    #[test_case("mkv_640x360.mkv", CreateDate, DateTime::parse_from_str("2008-08-08T08:08:08Z", "%+").unwrap().into())]
1023    #[test_case("meta.mov", Make, "Apple".into())]
1024    #[test_case("meta.mov", Model, "iPhone X".into())]
1025    #[test_case("meta.mov", GpsIso6709, "+27.1281+100.2508+000.000/".into())]
1026    #[test_case("meta.mov", CreateDate, DateTime::parse_from_str("2019-02-12T15:27:12+08:00", "%+").unwrap().into())]
1027    #[test_case("meta.mp4", Width, 1920_u32.into())]
1028    #[test_case("meta.mp4", Height, 1080_u32.into())]
1029    #[test_case("meta.mp4", DurationMs, 1063_u64.into())]
1030    #[test_case("meta.mp4", GpsIso6709, "+27.2939+112.6932/".into())]
1031    #[test_case("meta.mp4", CreateDate, DateTime::parse_from_str("2024-02-03T07:05:38Z", "%+").unwrap().into())]
1032    #[test_case("udta.auth.mp4", Author, "ReplayKitRecording".into(); "udta author")]
1033    #[test_case("auth.mov", Author, "ReplayKitRecording".into(); "mov author")]
1034    #[test_case("sony-a7-xavc.MP4", Width, 1920_u32.into())]
1035    #[test_case("sony-a7-xavc.MP4", Height, 1080_u32.into())]
1036    #[test_case("sony-a7-xavc.MP4", DurationMs, 1440_u64.into())]
1037    #[test_case("sony-a7-xavc.MP4", CreateDate, DateTime::parse_from_str("2026-04-26T09:25:15+00:00", "%+").unwrap().into())]
1038    fn parse_track_info(path: &str, tag: TrackInfoTag, v: EntryValue) {
1039        let mut parser = parser();
1040
1041        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1042        let info: TrackInfo = parser.parse_track(mf).unwrap();
1043        assert_eq!(info.get(tag).unwrap(), &v);
1044
1045        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1046        let info: TrackInfo = parser.parse_track(mf).unwrap();
1047        assert_eq!(info.get(tag).unwrap(), &v);
1048    }
1049
1050    #[test_case("crash_moov-trak")]
1051    #[test_case("crash_skip_large")]
1052    #[test_case("crash_add_large")]
1053    fn parse_track_crash(path: &str) {
1054        let mut parser = parser();
1055
1056        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1057        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1058
1059        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1060        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1061    }
1062
1063    // Regression: a crafted ISOBMFF file declares an extended 64-bit box size
1064    // just under MAX_PARSE_BUF_SIZE (~1 GB). Pre-fix, the unseekable parser called
1065    // reserve_exact() with that size before reading, allocating ~1 GB even when
1066    // the actual stream contained only a few KB. See commit 81f9e8a.
1067    #[test]
1068    fn parse_oom_large_box() {
1069        let mut parser = parser();
1070
1071        let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1072        let _: Result<ExifIter, _> = parser.parse_exif(mf);
1073
1074        let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1075        let _: Result<ExifIter, _> = parser.parse_exif(mf);
1076
1077        let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1078        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1079
1080        let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1081        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1082    }
1083
1084    #[test]
1085    fn media_kind_classifies_image_and_track() {
1086        let img = MediaSource::open("testdata/exif.jpg").unwrap();
1087        assert_eq!(img.kind(), MediaKind::Image);
1088
1089        let trk = MediaSource::open("testdata/meta.mov").unwrap();
1090        assert_eq!(trk.kind(), MediaKind::Track);
1091    }
1092
1093    #[test]
1094    fn media_source_open() {
1095        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1096        assert_eq!(ms.kind(), MediaKind::Image);
1097    }
1098
1099    #[test]
1100    fn parse_exif_returns_exif_iter() {
1101        let mut parser = parser();
1102        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1103        let _: ExifIter = parser.parse_exif(ms).unwrap();
1104    }
1105
1106    #[test]
1107    fn parse_track_returns_track_info() {
1108        let mut parser = parser();
1109        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1110        let _: TrackInfo = parser.parse_track(ms).unwrap();
1111    }
1112
1113    #[test]
1114    fn parse_exif_on_track_returns_exif_not_found_v3() {
1115        let mut parser = parser();
1116        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1117        let res = parser.parse_exif(ms);
1118        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1119    }
1120
1121    #[test]
1122    fn parse_track_on_image_returns_track_not_found_v3() {
1123        let mut parser = parser();
1124        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1125        let res = parser.parse_track(ms);
1126        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1127    }
1128
1129    #[cfg(feature = "tokio")]
1130    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1131    async fn media_parser_parse_exif_async() {
1132        use crate::parser_async::AsyncMediaSource;
1133        let mut parser = MediaParser::new();
1134        let ms = AsyncMediaSource::open("testdata/exif.jpg").await.unwrap();
1135        let _: ExifIter = parser.parse_exif_async(ms).await.unwrap();
1136    }
1137
1138    #[cfg(feature = "tokio")]
1139    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1140    async fn media_parser_parse_track_async() {
1141        use crate::parser_async::AsyncMediaSource;
1142        let mut parser = MediaParser::new();
1143        let ms = AsyncMediaSource::open("testdata/meta.mov").await.unwrap();
1144        let _: TrackInfo = parser.parse_track_async(ms).await.unwrap();
1145    }
1146
1147    #[test]
1148    fn parser_recycles_alloc_when_exif_iter_dropped() {
1149        let mut parser = MediaParser::new();
1150
1151        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1152        let iter = parser.parse_exif(ms).unwrap();
1153        let exif: crate::Exif = iter.into();
1154        drop(exif);
1155        let ptr_after_first = parser.state.cached_ptr_for_test();
1156
1157        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1158        let iter = parser.parse_exif(ms).unwrap();
1159        let _exif: crate::Exif = iter.into();
1160        let ptr_after_second = parser.state.cached_ptr_for_test();
1161
1162        assert!(
1163            ptr_after_first.is_some() && ptr_after_first == ptr_after_second,
1164            "expected recycled allocation, got {:?} -> {:?}",
1165            ptr_after_first,
1166            ptr_after_second
1167        );
1168    }
1169
1170    #[test]
1171    fn parser_new_does_no_upfront_allocation() {
1172        let parser = MediaParser::new();
1173        assert!(parser.state.cached_ptr_for_test().is_none());
1174        assert!(parser.state.buf_is_none_for_test());
1175    }
1176
1177    #[test]
1178    fn buffered_state_memory_mode_sets_and_reads() {
1179        let mut s = BufferedParserState::new();
1180        s.set_memory(bytes::Bytes::from_static(b"abcdefgh"));
1181        assert!(s.is_memory_mode());
1182        assert_eq!(s.buffer(), b"abcdefgh");
1183        s.set_position(3);
1184        assert_eq!(s.buffer(), b"defgh");
1185    }
1186
1187    #[test]
1188    fn buffered_state_share_buf_memory_mode_is_zero_copy() {
1189        let original = bytes::Bytes::from_static(b"the parser owns nothing here");
1190        let original_ptr = original.as_ptr();
1191        let mut s = BufferedParserState::new();
1192        s.set_memory(original);
1193        let (shared, position) = s.share_buf();
1194        assert_eq!(position, 0);
1195        assert_eq!(
1196            shared.as_ptr(),
1197            original_ptr,
1198            "memory share must be a Bytes::clone, not a Vec round-trip"
1199        );
1200        // After share_buf, the parser's memory slot is taken — leaving the state
1201        // ready for the next `reset()` cycle.
1202        assert!(!s.is_memory_mode());
1203    }
1204
1205    #[test]
1206    fn buffered_state_reset_clears_memory() {
1207        let mut s = BufferedParserState::new();
1208        s.set_memory(bytes::Bytes::from_static(b"x"));
1209        s.reset();
1210        assert!(!s.is_memory_mode());
1211        assert_eq!(s.position, 0);
1212    }
1213
1214    #[test]
1215    fn buffered_state_acquire_buf_skips_in_memory_mode() {
1216        let mut s = BufferedParserState::new();
1217        s.set_memory(bytes::Bytes::from_static(b"data"));
1218        s.acquire_buf();
1219        // No streaming buf was allocated.
1220        assert!(s.buf.is_none());
1221        // Memory still readable.
1222        assert_eq!(s.buffer(), b"data");
1223    }
1224
1225    #[test]
1226    fn media_source_from_bytes_image_jpg() {
1227        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1228        let ms = MediaSource::from_bytes(raw).unwrap();
1229        assert_eq!(ms.kind(), MediaKind::Image);
1230        assert!(ms.memory.is_some());
1231    }
1232
1233    #[test]
1234    fn media_source_from_bytes_track_mov() {
1235        let raw = std::fs::read("testdata/meta.mov").unwrap();
1236        let ms = MediaSource::from_bytes(raw).unwrap();
1237        assert_eq!(ms.kind(), MediaKind::Track);
1238    }
1239
1240    #[test]
1241    fn media_source_from_bytes_static_slice() {
1242        // &'static [u8] should work via Into<Bytes> because the file is read
1243        // into a Vec at compile-time-friendly size; here we use include_bytes.
1244        let raw: &'static [u8] = include_bytes!("../testdata/exif.jpg");
1245        let ms = MediaSource::from_bytes(raw).unwrap();
1246        assert_eq!(ms.kind(), MediaKind::Image);
1247    }
1248
1249    #[test]
1250    fn media_source_from_bytes_rejects_too_short() {
1251        // Below the smallest mime signature length: should fail mime detection.
1252        let raw = vec![0u8; 4];
1253        let res = MediaSource::from_bytes(raw);
1254        assert!(res.is_err(), "expected mime-detection error");
1255    }
1256
1257    #[test]
1258    fn media_source_from_bytes_rejects_unknown_mime() {
1259        // Random bytes long enough to trigger detection but not match any
1260        // signature.
1261        let raw = vec![0xAAu8; 256];
1262        let res = MediaSource::from_bytes(raw);
1263        assert!(
1264            res.is_err(),
1265            "expected mime-detection error for unknown bytes"
1266        );
1267    }
1268
1269    #[test]
1270    fn p4_5_baseline_exif_jpg_full_dump() {
1271        // Lock down the post-refactor invariant: parsing testdata/exif.jpg through
1272        // the public API must yield the same set of (ifd, tag, value) triples
1273        // before and after P4.5. We capture them as a sorted, formatted string so
1274        // the assertion is a single literal comparison.
1275        let mut parser = MediaParser::new();
1276        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1277        let iter: ExifIter = parser.parse_exif(ms).unwrap();
1278
1279        let mut entries: Vec<String> = iter
1280            .map(|e| {
1281                let tag_name = match e.tag() {
1282                    crate::TagOrCode::Tag(t) => format!("{t}"),
1283                    crate::TagOrCode::Unknown(c) => format!("0x{c:04x}"),
1284                };
1285                let value_str = e
1286                    .value()
1287                    .map(|v| format!("{v}"))
1288                    .unwrap_or_else(|| "<err>".into());
1289                format!("{}.{}={:?}", e.ifd(), tag_name, value_str)
1290            })
1291            .collect();
1292        entries.sort();
1293        let snapshot = entries.join("\n");
1294
1295        // Sanity: should produce non-trivial content. Exact content is checked by
1296        // the existing parse_media tests; this one guards against accidental
1297        // re-ordering / dedup changes during the refactor.
1298        assert!(
1299            entries.len() > 5,
1300            "expected >5 entries, got {}",
1301            entries.len()
1302        );
1303        assert!(snapshot.contains("Make"), "expected Make tag in snapshot");
1304    }
1305
1306    #[test]
1307    fn parse_exif_from_bytes_jpg_basic() {
1308        let mut parser = MediaParser::new();
1309        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1310        let ms = MediaSource::from_bytes(raw).unwrap();
1311        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1312        let exif: crate::Exif = iter.into();
1313        assert!(exif.get(crate::ExifTag::Make).is_some());
1314    }
1315
1316    #[test]
1317    fn parse_exif_from_bytes_heic_basic() {
1318        let mut parser = MediaParser::new();
1319        let raw = std::fs::read("testdata/exif.heic").unwrap();
1320        let ms = MediaSource::from_bytes(raw).unwrap();
1321        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1322        let exif: crate::Exif = iter.into();
1323        assert_eq!(
1324            exif.get(crate::ExifTag::Make).and_then(|v| v.as_str()),
1325            Some("Apple")
1326        );
1327    }
1328
1329    #[test]
1330    fn parse_exif_from_bytes_zero_copy_shared_bytes() {
1331        // Build a Bytes whose pointer we can compare. The ExifIter's underlying
1332        // share must point to the same allocation — proving Bytes::clone path.
1333        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1334        let bytes = bytes::Bytes::from(raw);
1335        let original_ptr = bytes.as_ptr();
1336
1337        let mut parser = MediaParser::new();
1338        let ms = MediaSource::from_bytes(bytes).unwrap();
1339        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1340
1341        // The cached pointer in parser state should be None in memory mode
1342        // (memory mode does not write to cache — the user owns the alloc).
1343        assert!(
1344            parser.state.cached_ptr_for_test().is_none(),
1345            "memory mode must not poison the recycle cache"
1346        );
1347
1348        // Drop the iter and confirm parser is clean for the next call.
1349        drop(iter);
1350
1351        // Build again; pointer identity proves we did not duplicate the alloc
1352        // anywhere along the parse path.
1353        let bytes2 = bytes::Bytes::from(std::fs::read("testdata/exif.jpg").unwrap());
1354        let ms2 = MediaSource::from_bytes(bytes2.clone()).unwrap();
1355        let _iter2 = parser.parse_exif_from_bytes(ms2).unwrap();
1356        // (We cannot assert pointer-equality across distinct user Bytes; the
1357        // assertion above on the first parse is the load-bearing one.)
1358        let _ = original_ptr; // explicit: original_ptr is the assertion target.
1359    }
1360
1361    #[test]
1362    fn parse_exif_from_bytes_on_track_returns_exif_not_found() {
1363        let mut parser = MediaParser::new();
1364        let raw = std::fs::read("testdata/meta.mov").unwrap();
1365        let ms = MediaSource::from_bytes(raw).unwrap();
1366        let res = parser.parse_exif_from_bytes(ms);
1367        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1368    }
1369
1370    #[test]
1371    fn parse_exif_from_bytes_on_truncated_returns_io_error() {
1372        // Truncate exif.jpg to just enough for mime detection but too short
1373        // for the full EXIF block. Memory-mode fill_buf must surface
1374        // UnexpectedEof when the parser walks off the end.
1375        let mut raw = std::fs::read("testdata/exif.jpg").unwrap();
1376        raw.truncate(200);
1377        let mut parser = MediaParser::new();
1378        let ms = MediaSource::from_bytes(raw).unwrap();
1379        let res = parser.parse_exif_from_bytes(ms);
1380        assert!(
1381            res.is_err(),
1382            "expected error on truncated bytes, got {:?}",
1383            res
1384        );
1385    }
1386
1387    #[test]
1388    fn parse_track_from_bytes_mov_basic() {
1389        let mut parser = MediaParser::new();
1390        let raw = std::fs::read("testdata/meta.mov").unwrap();
1391        let ms = MediaSource::from_bytes(raw).unwrap();
1392        let info = parser.parse_track_from_bytes(ms).unwrap();
1393        assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
1394        assert_eq!(
1395            info.get(crate::TrackInfoTag::Model),
1396            Some(&"iPhone X".into())
1397        );
1398    }
1399
1400    #[test]
1401    fn parse_track_from_bytes_mp4_basic() {
1402        let mut parser = MediaParser::new();
1403        let raw = std::fs::read("testdata/meta.mp4").unwrap();
1404        let ms = MediaSource::from_bytes(raw).unwrap();
1405        let info = parser.parse_track_from_bytes(ms).unwrap();
1406        assert!(info.get(crate::TrackInfoTag::CreateDate).is_some());
1407    }
1408
1409    #[test]
1410    fn parse_track_from_bytes_mkv_basic() {
1411        let mut parser = MediaParser::new();
1412        let raw = std::fs::read("testdata/mkv_640x360.mkv").unwrap();
1413        let ms = MediaSource::from_bytes(raw).unwrap();
1414        let info = parser.parse_track_from_bytes(ms).unwrap();
1415        assert_eq!(
1416            info.get(crate::TrackInfoTag::Width),
1417            Some(&(640_u32.into()))
1418        );
1419    }
1420
1421    #[test]
1422    fn parse_track_from_bytes_on_image_returns_track_not_found() {
1423        let mut parser = MediaParser::new();
1424        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1425        let ms = MediaSource::from_bytes(raw).unwrap();
1426        let res = parser.parse_track_from_bytes(ms);
1427        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1428    }
1429}