Skip to main content

nom_exif/
parser.rs

1use std::{
2    cmp::{max, min},
3    fmt::{Debug, Display},
4    fs::File,
5    io::{self, Read, Seek},
6    path::Path,
7};
8
9use crate::{
10    error::{ParsedError, ParsingError, ParsingErrorState},
11    exif::TiffHeader,
12    file::MediaMime,
13    ExifIter, TrackInfo,
14};
15
16/// A function that tries to skip `n` bytes of `reader` by seeking. Returns
17/// `Ok(true)` on success, `Ok(false)` if the reader does not support seek
18/// (so the caller should fall back to reading-and-discarding), or
19/// `Err(io::Error)` if seek itself failed (e.g. truncated file handle).
20///
21/// This is captured at construction time by `MediaSource::seekable` /
22/// `unseekable`, replacing the v2 `S: Skip<R>` phantom parameter with a
23/// runtime fn pointer.
24pub(crate) type SkipBySeekFn<R> = fn(&mut R, u64) -> io::Result<bool>;
25
26/// `MediaSource` represents a media data source that can be parsed by
27/// [`MediaParser`].
28///
29/// - Use [`MediaSource::open`] to create a MediaSource from a file path.
30///
31/// - Use [`MediaSource::from_bytes`] for zero-copy in-memory input
32///   (`Vec<u8>`, `&'static [u8]`, [`bytes::Bytes`], …). Pair with
33///   [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`].
34///
35/// - In other cases:
36///
37///   - Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
38///     (an already-open `File` goes here).
39///
40///   - Use [`MediaSource::unseekable`] to create a MediaSource from a
41///     reader that only impl `Read`
42///
43/// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
44/// since the former is more efficient when the parser needs to skip a large number of bytes.
45///
46/// Passing in a `BufRead` should be avoided because [`MediaParser`] comes with
47/// its own buffer management and the buffers can be shared between multiple
48/// parsing tasks, thus avoiding frequent memory allocations.
49pub struct MediaSource<R> {
50    pub(crate) reader: R,
51    pub(crate) buf: Vec<u8>,
52    pub(crate) mime: MediaMime,
53    pub(crate) skip_by_seek: SkipBySeekFn<R>,
54    /// P7: zero-copy memory-mode payload. `Some` only when the source was
55    /// built via [`MediaSource::<()>::from_bytes`]; `reader`, `buf`, and
56    /// `skip_by_seek` are placeholders (and never consulted) in that mode.
57    pub(crate) memory: Option<bytes::Bytes>,
58}
59
60/// Top-level classification of a media source.
61///
62/// `Image` files carry EXIF metadata (parse with `MediaParser::parse_exif`);
63/// `Track` files are time-based containers — video, audio, or both — and
64/// carry track-info metadata (parse with `MediaParser::parse_track`). Pure
65/// audio containers like `.mka` are classified as `Track`.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum MediaKind {
68    Image,
69    Track,
70}
71
72impl<R> Debug for MediaSource<R> {
73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74        f.debug_struct("MediaSource")
75            .field("mime", &self.mime)
76            .finish_non_exhaustive()
77    }
78}
79
80// Should be enough for parsing header
81const HEADER_PARSE_BUF_SIZE: usize = 128;
82
83impl<R> MediaSource<R> {
84    /// Top-level classification of this media source.
85    pub fn kind(&self) -> MediaKind {
86        match self.mime {
87            MediaMime::Image(_) => MediaKind::Image,
88            MediaMime::Track(_) => MediaKind::Track,
89        }
90    }
91}
92
93impl<R: Read> MediaSource<R> {
94    fn build(mut reader: R, skip_by_seek: SkipBySeekFn<R>) -> crate::Result<Self> {
95        let mut buf = Vec::with_capacity(HEADER_PARSE_BUF_SIZE);
96        reader
97            .by_ref()
98            .take(HEADER_PARSE_BUF_SIZE as u64)
99            .read_to_end(&mut buf)?;
100        let mime: MediaMime = buf.as_slice().try_into()?;
101        Ok(Self {
102            reader,
103            buf,
104            mime,
105            skip_by_seek,
106            memory: None,
107        })
108    }
109
110    /// Use [`MediaSource::unseekable`] to create a MediaSource from a
111    /// reader that only impl `Read`
112    ///
113    /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
114    /// since the former is more efficient when the parser needs to skip a large number of bytes.
115    pub fn unseekable(reader: R) -> crate::Result<Self> {
116        Self::build(reader, |_, _| Ok(false))
117    }
118}
119
120impl<R: Read + Seek> MediaSource<R> {
121    /// Use [`MediaSource::seekable`] to create a MediaSource from a `Read + Seek`
122    ///
123    /// *Note*: Please use [`MediaSource::seekable`] in preference to [`MediaSource::unseekable`],
124    /// since the former is more efficient when the parser needs to skip a large number of bytes.
125    pub fn seekable(reader: R) -> crate::Result<Self> {
126        Self::build(reader, |r, n| {
127            let signed: i64 = n
128                .try_into()
129                .map_err(|_| io::Error::from(io::ErrorKind::InvalidInput))?;
130            r.seek_relative(signed)?;
131            Ok(true)
132        })
133    }
134}
135
136impl MediaSource<File> {
137    /// Open a file at `path` and parse its header to detect the media format.
138    ///
139    /// This is the v3-preferred entry point for the common case of "I have a
140    /// path on disk". For an already-open `File` use [`Self::seekable`].
141    pub fn open<P: AsRef<Path>>(path: P) -> crate::Result<Self> {
142        Self::seekable(File::open(path)?)
143    }
144}
145
146impl MediaSource<()> {
147    /// Build a [`MediaSource`] from an in-memory byte payload.
148    ///
149    /// Accepts any type convertible into [`bytes::Bytes`] — `Bytes`,
150    /// `Vec<u8>`, `&'static [u8]`, [`bytes::Bytes::from_owner`] outputs, and
151    /// HTTP-stack body types that implement `Into<Bytes>` directly.
152    ///
153    /// The header (first up to 128 bytes) is sniffed for media kind, the
154    /// same way [`MediaSource::open`] does it for files. The full payload is
155    /// stored zero-copy: subsequent parsing through
156    /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
157    /// shares this `Bytes` directly with the returned `ExifIter` / sub-IFDs
158    /// via reference counting.
159    ///
160    /// The returned source is parsed by the dedicated
161    /// [`MediaParser::parse_exif_from_bytes`] / [`MediaParser::parse_track_from_bytes`]
162    /// methods. The streaming `parse_exif` / `parse_track` methods do not
163    /// accept `MediaSource<()>` (their `R: Read` bound is unsatisfiable).
164    ///
165    /// # Example
166    ///
167    /// ```rust
168    /// use nom_exif::{MediaSource, MediaParser, MediaKind};
169    ///
170    /// let bytes = std::fs::read("./testdata/exif.jpg")?;
171    /// let ms = MediaSource::from_bytes(bytes)?;
172    /// assert_eq!(ms.kind(), MediaKind::Image);
173    ///
174    /// let mut parser = MediaParser::new();
175    /// let _iter = parser.parse_exif_from_bytes(ms)?;
176    /// # Ok::<(), nom_exif::Error>(())
177    /// ```
178    pub fn from_bytes(bytes: impl Into<bytes::Bytes>) -> crate::Result<Self> {
179        let bytes = bytes.into();
180        let head_end = bytes.len().min(HEADER_PARSE_BUF_SIZE);
181        let mime: MediaMime = bytes[..head_end].try_into()?;
182        Ok(Self {
183            reader: (),
184            buf: Vec::new(),
185            mime,
186            // Placeholder: never invoked in memory mode (clear_and_skip's
187            // AdvanceOnly path is the only one taken).
188            skip_by_seek: |_, _| Ok(false),
189            memory: Some(bytes),
190        })
191    }
192}
193
194// ----- Parse-time buffer policy -----
195//
196// Layered by lifecycle:
197//
198// - `INIT_BUF_SIZE` — first fill into the parse loop and the initial
199//   `Vec::with_capacity` for fresh allocations. Modest so cold one-shot
200//   helpers don't over-commit.
201// - `MIN_GROW_SIZE` — floor for every subsequent fill once we're in deep
202//   parse. Larger than `INIT_BUF_SIZE` to amortize syscalls / async
203//   blocking-pool dispatches.
204// - `MAX_PARSE_BUF_SIZE` — hard cap on cumulative buffer growth during a
205//   single parse. Anything that would push past this is rejected as
206//   `io::ErrorKind::Unsupported`; defense against crafted box/IFD headers
207//   that declare absurd sizes.
208// - `MAX_REUSE_BUF_SIZE` — soft cap on the buffer kept between parses for
209//   recycling. After a parse whose buffer ended above this, `shrink_to`
210//   gives the excess back to the allocator. Tuned for typical metadata
211//   sizes (HEIC Live Photo / large CR3 / IIQ all fit under 4 MB) so the
212//   recycle path stays warm for batch workloads.
213pub(crate) const INIT_BUF_SIZE: usize = 8 * 1024;
214pub(crate) const MIN_GROW_SIZE: usize = 16 * 1024;
215pub(crate) const MAX_PARSE_BUF_SIZE: usize = 1024 * 1024 * 1024;
216const MAX_REUSE_BUF_SIZE: usize = 4 * 1024 * 1024;
217
218pub(crate) trait Buf {
219    fn buffer(&self) -> &[u8];
220    fn clear(&mut self);
221
222    fn set_position(&mut self, pos: usize);
223    #[allow(unused)]
224    fn position(&self) -> usize;
225}
226
227/// Buffer-management state used by `MediaParser` (sync and async paths share it).
228///
229/// Holds at most one *active* `Vec<u8>` (being filled by the current parse) and
230/// one *cached* `Bytes` clone of the most recently shared buffer. When the
231/// next parse starts, the cache is consulted: if `Bytes::try_into_mut`
232/// succeeds the underlying allocation is reused (the previous `ExifIter`
233/// has been dropped); otherwise the clone is discarded and a fresh
234/// `Vec<u8>` is allocated.
235///
236/// This replaces the v2 multi-slot `Buffers` pool — `MediaParser` methods
237/// are `&mut self`, so a single slot is sufficient.
238#[derive(Debug, Default)]
239pub(crate) struct BufferedParserState {
240    cached: Option<bytes::Bytes>,
241    buf: Option<Vec<u8>>,
242    /// P7: memory-mode storage. When `Some`, the parser is feeding from a
243    /// caller-owned `Bytes` instead of streaming via a reader. `buf` and
244    /// `cached` are unused in this mode — the user owns the allocation,
245    /// so there is nothing to recycle.
246    memory: Option<bytes::Bytes>,
247    position: usize,
248}
249
250impl BufferedParserState {
251    pub(crate) fn new() -> Self {
252        Self::default()
253    }
254
255    pub(crate) fn reset(&mut self) {
256        // If a parse failed mid-way the buf may still be present; drop it.
257        // Cache stays — recycle on next acquire if eligible.
258        self.buf = None;
259        self.memory = None;
260        self.position = 0;
261    }
262
263    /// Switch the parser state into memory mode, owning `bytes` directly.
264    /// Caller must have already called `reset()` (asserted in debug). Subsequent
265    /// `share_buf` returns a clone of `bytes` (zero-copy: `Bytes::clone` is a
266    /// refcount bump). Subsequent `Buf::buffer()` returns `&bytes[position..]`.
267    pub(crate) fn set_memory(&mut self, bytes: bytes::Bytes) {
268        debug_assert!(
269            self.buf.is_none() && self.memory.is_none(),
270            "set_memory called on non-clean state"
271        );
272        self.memory = Some(bytes);
273        self.position = 0;
274    }
275
276    pub(crate) fn is_memory_mode(&self) -> bool {
277        self.memory.is_some()
278    }
279
280    pub(crate) fn acquire_buf(&mut self) {
281        if self.memory.is_some() {
282            // Memory mode: nothing to acquire — `buffer()` reads from `memory`.
283            return;
284        }
285        debug_assert!(self.buf.is_none());
286        let buf = match self.cached.take() {
287            Some(b) => match b.try_into_mut() {
288                Ok(bm) => {
289                    let mut v = Vec::<u8>::from(bm);
290                    v.clear();
291                    if v.capacity() > MAX_REUSE_BUF_SIZE {
292                        v.shrink_to(MAX_REUSE_BUF_SIZE);
293                    }
294                    v
295                }
296                Err(_still_shared) => Vec::with_capacity(INIT_BUF_SIZE),
297            },
298            None => Vec::with_capacity(INIT_BUF_SIZE),
299        };
300        self.buf = Some(buf);
301    }
302
303    pub(crate) fn buf(&self) -> &Vec<u8> {
304        self.buf.as_ref().expect("no buf here")
305    }
306
307    pub(crate) fn buf_mut(&mut self) -> &mut Vec<u8> {
308        self.buf.as_mut().expect("no buf here")
309    }
310
311    #[cfg(test)]
312    pub(crate) fn cached_ptr_for_test(&self) -> Option<*const u8> {
313        self.cached.as_ref().map(|b| b.as_ptr())
314    }
315
316    #[cfg(test)]
317    pub(crate) fn buf_is_none_for_test(&self) -> bool {
318        self.buf.is_none()
319    }
320}
321
322impl Buf for BufferedParserState {
323    fn buffer(&self) -> &[u8] {
324        if let Some(m) = &self.memory {
325            return &m[self.position..];
326        }
327        &self.buf()[self.position..]
328    }
329    fn clear(&mut self) {
330        // In memory mode `clear` is a no-op: there is no scratch buffer to
331        // truncate, and the caller's bytes must remain available for further
332        // parse_loop_step iterations. clear_and_skip's AdvanceOnly path is
333        // what advances `position` in memory mode.
334        if self.memory.is_some() {
335            return;
336        }
337        self.buf_mut().clear();
338    }
339    fn set_position(&mut self, pos: usize) {
340        self.position = pos;
341    }
342    fn position(&self) -> usize {
343        self.position
344    }
345}
346
347impl ShareBuf for BufferedParserState {
348    fn share_buf(&mut self) -> (bytes::Bytes, usize) {
349        if let Some(m) = self.memory.take() {
350            // Zero-copy share: caller already owns the allocation. No cache
351            // write — recycle is irrelevant when the user holds the alloc.
352            let position = self.position;
353            return (m, position);
354        }
355        let vec = self.buf.take().expect("no buf to share");
356        let bytes = bytes::Bytes::from(vec);
357        let position = self.position;
358        self.cached = Some(bytes.clone());
359        (bytes, position)
360    }
361}
362
363/// What `clear_and_skip` should do, given the current buffer state and
364/// the requested skip count.
365pub(crate) enum SkipPlan {
366    /// Skip is fully within the current buffer; just advance position.
367    AdvanceOnly,
368    /// Buffer must be cleared and `extra` bytes skipped from the reader.
369    ClearAndSkip { extra: usize },
370}
371
372pub(crate) fn clear_and_skip_decide(buffer_len: usize, n: usize) -> SkipPlan {
373    if n <= buffer_len {
374        SkipPlan::AdvanceOnly
375    } else {
376        SkipPlan::ClearAndSkip {
377            extra: n - buffer_len,
378        }
379    }
380}
381
382pub(crate) fn check_fill_size(existing_len: usize, requested: usize) -> io::Result<()> {
383    if requested.saturating_add(existing_len) > MAX_PARSE_BUF_SIZE {
384        tracing::error!(?requested, "the requested buffer size is too big");
385        return Err(io::ErrorKind::Unsupported.into());
386    }
387    Ok(())
388}
389
390pub(crate) enum LoopAction<O> {
391    /// Parse succeeded; return this value to the caller.
392    Done(O),
393    /// Need more bytes — call `fill_buf(reader, n)` then re-step.
394    NeedFill(usize),
395    /// Need to skip bytes — call `clear_and_skip(reader, n)` then re-step.
396    Skip(usize),
397    /// Parse failed permanently.
398    Failed(String),
399}
400
401/// Closure type passed to [`parse_loop_step`].
402pub(crate) type ParseFn<'a, O> =
403    dyn FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState> + 'a;
404
405/// Drives one iteration of the parse-loop algorithm. Pure (no I/O).
406pub(crate) fn parse_loop_step<O>(
407    buffer: &[u8],
408    offset: usize,
409    parsing_state: &mut Option<ParsingState>,
410    parse: &mut ParseFn<'_, O>,
411) -> LoopAction<O> {
412    match parse(buffer, offset, parsing_state.take()) {
413        Ok(o) => LoopAction::Done(o),
414        Err(es) => {
415            *parsing_state = es.state;
416            match es.err {
417                ParsingError::Need(n) => LoopAction::NeedFill(n),
418                ParsingError::ClearAndSkip(n) => LoopAction::Skip(n),
419                ParsingError::Failed(s) => LoopAction::Failed(s),
420            }
421        }
422    }
423}
424
425#[derive(Debug, Clone)]
426pub(crate) enum ParsingState {
427    TiffHeader(TiffHeader),
428    HeifExifSize(usize),
429    Cr3ExifSize(usize),
430}
431
432impl Display for ParsingState {
433    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
434        match self {
435            ParsingState::TiffHeader(h) => Display::fmt(&format!("ParsingState: {h:?})"), f),
436            ParsingState::HeifExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
437            ParsingState::Cr3ExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
438        }
439    }
440}
441
442// Modern replacement for the `Load` trait in loader.rs. Adds offset-aware
443// parsing and `ParsingState` threading for format-specific state machines.
444pub(crate) trait BufParser: Buf + Debug {
445    fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize>;
446
447    fn load_and_parse<R: Read, P, O>(
448        &mut self,
449        reader: &mut R,
450        skip_by_seek: SkipBySeekFn<R>,
451        mut parse: P,
452    ) -> Result<O, ParsedError>
453    where
454        P: FnMut(&[u8], Option<ParsingState>) -> Result<O, ParsingErrorState>,
455    {
456        self.load_and_parse_with_offset(
457            reader,
458            skip_by_seek,
459            |data, _, state| parse(data, state),
460            0,
461        )
462    }
463
464    #[tracing::instrument(skip_all)]
465    fn load_and_parse_with_offset<R: Read, P, O>(
466        &mut self,
467        reader: &mut R,
468        skip_by_seek: SkipBySeekFn<R>,
469        mut parse: P,
470        offset: usize,
471    ) -> Result<O, ParsedError>
472    where
473        P: FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState>,
474    {
475        if offset >= self.buffer().len() {
476            self.fill_buf(reader, MIN_GROW_SIZE)?;
477        }
478        let mut parsing_state: Option<ParsingState> = None;
479        loop {
480            match parse_loop_step(self.buffer(), offset, &mut parsing_state, &mut parse) {
481                LoopAction::Done(o) => return Ok(o),
482                LoopAction::NeedFill(needed) => {
483                    let to_read = max(needed, MIN_GROW_SIZE);
484                    let n = self.fill_buf(reader, to_read)?;
485                    if n == 0 {
486                        return Err(ParsedError::NoEnoughBytes);
487                    }
488                }
489                LoopAction::Skip(n) => {
490                    self.clear_and_skip(reader, skip_by_seek, n)?;
491                }
492                LoopAction::Failed(s) => return Err(ParsedError::Failed(s)),
493            }
494        }
495    }
496
497    #[tracing::instrument(skip(reader, skip_by_seek))]
498    fn clear_and_skip<R: Read>(
499        &mut self,
500        reader: &mut R,
501        skip_by_seek: SkipBySeekFn<R>,
502        n: usize,
503    ) -> Result<(), ParsedError> {
504        match clear_and_skip_decide(self.buffer().len(), n) {
505            SkipPlan::AdvanceOnly => {
506                self.set_position(self.position() + n);
507                Ok(())
508            }
509            SkipPlan::ClearAndSkip { extra: skip_n } => {
510                self.clear();
511                let done = (skip_by_seek)(
512                    reader,
513                    skip_n
514                        .try_into()
515                        .map_err(|_| ParsedError::Failed("skip too many bytes".into()))?,
516                )?;
517                if !done {
518                    let mut skipped = 0;
519                    while skipped < skip_n {
520                        let mut to_skip = skip_n - skipped;
521                        to_skip = min(to_skip, MAX_PARSE_BUF_SIZE);
522                        let n = self.fill_buf(reader, to_skip)?;
523                        skipped += n;
524                        if skipped <= skip_n {
525                            self.clear();
526                        } else {
527                            let remain = skipped - skip_n;
528                            self.set_position(self.buffer().len() - remain);
529                            break;
530                        }
531                    }
532                }
533
534                if self.buffer().is_empty() {
535                    self.fill_buf(reader, MIN_GROW_SIZE)?;
536                }
537                Ok(())
538            }
539        }
540    }
541}
542
543impl BufParser for MediaParser {
544    #[tracing::instrument(skip(self, reader), fields(buf_len=self.state.buffer().len()))]
545    fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize> {
546        if self.state.is_memory_mode() {
547            // Memory mode owns every byte it will ever have. A request for
548            // more is "the parser walked off the end of the input"; surface
549            // it the same way the streaming path surfaces a 0-byte read.
550            return Err(std::io::ErrorKind::UnexpectedEof.into());
551        }
552        check_fill_size(self.state.buf().len(), size)?;
553
554        // Do not pre-allocate `size` bytes: a crafted box header can declare a
555        // huge extended size (up to MAX_PARSE_BUF_SIZE) that far exceeds the actual
556        // stream length. reserve_exact would allocate that memory immediately
557        // even when the reader has only a few bytes left. read_to_end grows the
558        // buffer from the reader's actual size hint instead.
559        let n = reader.take(size as u64).read_to_end(self.state.buf_mut())?;
560        if n == 0 {
561            tracing::error!(buf_len = self.state.buf().len(), "fill_buf: EOF");
562            return Err(std::io::ErrorKind::UnexpectedEof.into());
563        }
564
565        tracing::debug!(
566            ?size,
567            ?n,
568            buf_len = self.state.buf().len(),
569            "fill_buf: read bytes"
570        );
571
572        Ok(n)
573    }
574}
575
576impl Buf for MediaParser {
577    fn buffer(&self) -> &[u8] {
578        self.state.buffer()
579    }
580
581    fn clear(&mut self) {
582        self.state.clear();
583    }
584
585    fn set_position(&mut self, pos: usize) {
586        self.state.set_position(pos);
587    }
588
589    fn position(&self) -> usize {
590        self.state.position()
591    }
592}
593
594/// A `MediaParser` can parse media info from a [`MediaSource`].
595///
596/// `MediaParser` manages inner parse buffers that can be shared between
597/// multiple parsing tasks, thus avoiding frequent memory allocations.
598///
599/// Therefore:
600///
601/// - Try to reuse a `MediaParser` instead of creating a new one every time
602///   you need it.
603///
604/// - `MediaSource` should be created directly from `Read`, not from `BufRead`.
605///
606/// ## Example
607///
608/// ```rust
609/// use nom_exif::*;
610/// use chrono::DateTime;
611///
612/// let mut parser = MediaParser::new();
613///
614/// // ------------------- Parse Exif Info
615/// let ms = MediaSource::open("./testdata/exif.heic").unwrap();
616/// assert_eq!(ms.kind(), MediaKind::Image);
617/// let mut iter = parser.parse_exif(ms).unwrap();
618///
619/// let entry = iter.next().unwrap();
620/// assert!(matches!(entry.tag(), nom_exif::TagOrCode::Tag(ExifTag::Make)));
621/// assert_eq!(entry.value().unwrap().as_str().unwrap(), "Apple");
622///
623/// // Convert `ExifIter` into an `Exif`. Clone it before converting, so that
624/// // we can start the iteration from the beginning.
625/// let exif: Exif = iter.clone().into();
626/// assert_eq!(exif.get(ExifTag::Make).unwrap().as_str().unwrap(), "Apple");
627///
628/// // ------------------- Parse Track Info
629/// let ms = MediaSource::open("./testdata/meta.mov").unwrap();
630/// assert_eq!(ms.kind(), MediaKind::Track);
631/// let info = parser.parse_track(ms).unwrap();
632///
633/// assert_eq!(info.get(TrackInfoTag::Make), Some(&"Apple".into()));
634/// assert_eq!(info.get(TrackInfoTag::Model), Some(&"iPhone X".into()));
635/// assert_eq!(info.get(TrackInfoTag::GpsIso6709), Some(&"+27.1281+100.2508+000.000/".into()));
636/// assert_eq!(info.gps_info().unwrap().latitude_ref, LatRef::North);
637/// assert_eq!(
638///     info.gps_info().unwrap().latitude,
639///     LatLng::new(URational::new(27, 1), URational::new(7, 1), URational::new(4116, 100)),
640/// );
641/// ```
642pub struct MediaParser {
643    state: BufferedParserState,
644}
645
646impl Debug for MediaParser {
647    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
648        f.debug_struct("MediaParser")
649            .field("state", &self.state)
650            .finish_non_exhaustive()
651    }
652}
653
654impl Default for MediaParser {
655    fn default() -> Self {
656        Self {
657            state: BufferedParserState::new(),
658        }
659    }
660}
661
662pub(crate) trait ShareBuf {
663    /// Take ownership of the parser's active buffer and return the full
664    /// allocation as `Bytes` plus the parser's `position` at share-time.
665    /// Caller is responsible for slicing: a parse-loop range `r` corresponds
666    /// to absolute range `(r.start + position)..(r.end + position)`.
667    fn share_buf(&mut self) -> (bytes::Bytes, usize);
668}
669
670impl ShareBuf for MediaParser {
671    fn share_buf(&mut self) -> (bytes::Bytes, usize) {
672        self.state.share_buf()
673    }
674}
675
676impl MediaParser {
677    pub fn new() -> Self {
678        Self::default()
679    }
680
681    /// Parse Exif metadata from an image source. Returns `Error::ExifNotFound`
682    /// if the source is a `Track` (use [`Self::parse_track`] instead).
683    ///
684    /// `MediaParser` reuses its internal parse buffer across calls, so prefer
685    /// reusing a single `MediaParser` over creating a new one per file. Drop
686    /// the returned [`ExifIter`] (or convert it into [`crate::Exif`]) before
687    /// the next `parse_*` call so the buffer can be reclaimed.
688    pub fn parse_exif<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<ExifIter> {
689        self.reset();
690        self.acquire_buf();
691        self.buf_mut().append(&mut ms.buf);
692        let res: crate::Result<ExifIter> = (|| {
693            self.fill_buf(&mut ms.reader, INIT_BUF_SIZE)?;
694            if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
695                return Err(crate::Error::ExifNotFound);
696            }
697            crate::exif::parse_exif_iter(
698                self,
699                ms.mime.unwrap_image(),
700                &mut ms.reader,
701                ms.skip_by_seek,
702            )
703        })();
704        self.reset();
705        res
706    }
707
708    /// Parse track info from a video/audio source.
709    ///
710    /// In v3.1, this also accepts JPEG images that carry an embedded
711    /// Pixel/Google Motion Photo trailer: when [`ExifIter::has_embedded_track`]
712    /// returned `true` for such a JPEG, calling `parse_track` on the same
713    /// source extracts the embedded MP4's metadata. Other image formats
714    /// without an embedded track return [`crate::Error::TrackNotFound`].
715    pub fn parse_track<R: Read>(&mut self, mut ms: MediaSource<R>) -> crate::Result<TrackInfo> {
716        self.reset();
717        self.acquire_buf();
718        self.buf_mut().append(&mut ms.buf);
719        let res: crate::Result<TrackInfo> = (|| {
720            self.fill_buf(&mut ms.reader, INIT_BUF_SIZE)?;
721            match ms.mime {
722                crate::file::MediaMime::Image(crate::file::MediaMimeImage::Jpeg) => {
723                    self.parse_jpeg_motion_photo(&mut ms.reader)
724                }
725                crate::file::MediaMime::Image(_) => Err(crate::Error::TrackNotFound),
726                crate::file::MediaMime::Track(mime_track) => {
727                    let skip = ms.skip_by_seek;
728                    Ok(self.load_and_parse(ms.reader.by_ref(), skip, |data, _| {
729                        crate::video::parse_track_info(data, mime_track)
730                            .map_err(|e| ParsingErrorState::new(e, None))
731                    })?)
732                }
733            }
734        })();
735        self.reset();
736        res
737    }
738
739    /// Read a JPEG to EOF, locate a Pixel-style Motion Photo MP4 trailer,
740    /// and parse it as track metadata. Returns
741    /// [`crate::Error::TrackNotFound`] if no Motion Photo signal is
742    /// present in the JPEG's XMP.
743    fn parse_jpeg_motion_photo<R: Read>(&mut self, reader: &mut R) -> crate::Result<TrackInfo> {
744        // Drain the rest of the JPEG into the parse buffer so we can
745        // address the trailing MP4 by its byte offset from EOF.
746        reader.read_to_end(self.buf_mut())?;
747        let buf = self.buf_mut();
748        let Some(offset) = crate::jpeg::find_motion_photo_offset(buf) else {
749            return Err(crate::Error::TrackNotFound);
750        };
751        let trailer_start = (buf.len() as u64)
752            .checked_sub(offset)
753            .ok_or(crate::Error::TrackNotFound)? as usize;
754        let trailer = &buf[trailer_start..];
755
756        // The trailer can be MP4 / MOV / 3gp depending on the source device;
757        // dispatch by sniffing it as a fresh ISO BMFF input.
758        let trailer_mime = crate::file::MediaMime::try_from(trailer)
759            .map_err(|_| crate::Error::TrackNotFound)?;
760        let mime_track = match trailer_mime {
761            crate::file::MediaMime::Track(t) => t,
762            crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
763        };
764        crate::video::parse_track_info(trailer, mime_track).map_err(|e| match e {
765            crate::error::ParsingError::Need(_)
766            | crate::error::ParsingError::ClearAndSkip(_) => crate::Error::UnexpectedEof {
767                context: "motion-photo trailer",
768            },
769            crate::error::ParsingError::Failed(msg) => crate::Error::Malformed {
770                kind: crate::error::MalformedKind::IsoBmffBox,
771                message: msg,
772            },
773        })
774    }
775
776    /// Parse Exif metadata from an in-memory byte payload built via
777    /// [`MediaSource::<()>::from_bytes`]. Returns `Error::ExifNotFound` if the
778    /// payload is a `Track` (use [`Self::parse_track_from_bytes`] instead).
779    ///
780    /// Memory-mode parsing is **zero-copy**: the underlying `Bytes` is shared
781    /// with the returned [`ExifIter`] (and its sub-IFDs / CR3 CMT blocks) via
782    /// reference counting. No `Vec<u8>` is allocated for the parse buffer.
783    pub fn parse_exif_from_bytes(&mut self, mut ms: MediaSource<()>) -> crate::Result<ExifIter> {
784        self.reset();
785        let memory = ms
786            .memory
787            .take()
788            .expect("MediaSource<()> must have memory (only constructor is from_bytes)");
789        self.state.set_memory(memory);
790        let res: crate::Result<ExifIter> = (|| {
791            if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
792                return Err(crate::Error::ExifNotFound);
793            }
794            // Placeholder reader: never read from in memory mode (fill_buf
795            // short-circuits; clear_and_skip uses AdvanceOnly).
796            let mut empty = std::io::empty();
797            crate::exif::parse_exif_iter(
798                self,
799                ms.mime.unwrap_image(),
800                &mut empty,
801                // Placeholder skip-by-seek: never invoked.
802                |_, _| Ok(false),
803            )
804        })();
805        self.reset();
806        res
807    }
808
809    /// Parse track info from an in-memory video/audio byte payload built via
810    /// [`MediaSource::<()>::from_bytes`]. Returns `Error::TrackNotFound` if the
811    /// payload is an `Image` (use [`Self::parse_exif_from_bytes`] instead).
812    ///
813    /// Like [`Self::parse_exif_from_bytes`], the parse is zero-copy with respect to
814    /// the user-supplied `Bytes`.
815    pub fn parse_track_from_bytes(&mut self, mut ms: MediaSource<()>) -> crate::Result<TrackInfo> {
816        self.reset();
817        let memory = ms
818            .memory
819            .take()
820            .expect("MediaSource<()> must have memory (only constructor is from_bytes)");
821        self.state.set_memory(memory);
822        let res: crate::Result<TrackInfo> = (|| {
823            let mime_track = match ms.mime {
824                crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
825                crate::file::MediaMime::Track(t) => t,
826            };
827            let mut empty = std::io::empty();
828            let out = self.load_and_parse(
829                &mut empty,
830                |_, _| Ok(false),
831                |data, _| {
832                    crate::video::parse_track_info(data, mime_track)
833                        .map_err(|e| ParsingErrorState::new(e, None))
834                },
835            )?;
836            Ok(out)
837        })();
838        self.reset();
839        res
840    }
841
842    fn reset(&mut self) {
843        self.state.reset();
844    }
845
846    fn buf_mut(&mut self) -> &mut Vec<u8> {
847        self.state.buf_mut()
848    }
849
850    fn acquire_buf(&mut self) {
851        self.state.acquire_buf();
852    }
853}
854
855#[cfg(feature = "tokio")]
856mod tokio_impl {
857    use super::*;
858    use crate::error::ParsingErrorState;
859    use crate::parser_async::{AsyncBufParser, AsyncMediaSource};
860    use tokio::io::{AsyncRead, AsyncReadExt};
861
862    impl AsyncBufParser for MediaParser {
863        async fn fill_buf<R: AsyncRead + Unpin>(
864            &mut self,
865            reader: &mut R,
866            size: usize,
867        ) -> std::io::Result<usize> {
868            if self.state.is_memory_mode() {
869                // Memory mode owns every byte it will ever have. Surface
870                // "walked off end of input" the same way the streaming path
871                // surfaces a 0-byte read.
872                return Err(std::io::ErrorKind::UnexpectedEof.into());
873            }
874            check_fill_size(self.state.buf().len(), size)?;
875            // Same rationale as the sync version: do not pre-allocate `size` bytes.
876            let n = reader
877                .take(size as u64)
878                .read_to_end(self.state.buf_mut())
879                .await?;
880            if n == 0 {
881                return Err(std::io::ErrorKind::UnexpectedEof.into());
882            }
883            Ok(n)
884        }
885    }
886
887    impl MediaParser {
888        /// Parse Exif metadata from an async image source. Returns
889        /// `Error::ExifNotFound` if the source is a `Track`.
890        pub async fn parse_exif_async<R: AsyncRead + Unpin + Send>(
891            &mut self,
892            mut ms: AsyncMediaSource<R>,
893        ) -> crate::Result<ExifIter> {
894            self.reset();
895            self.acquire_buf();
896            self.buf_mut().append(&mut ms.buf);
897            let res: crate::Result<ExifIter> = async {
898                <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE).await?;
899                if !matches!(ms.mime, crate::file::MediaMime::Image(_)) {
900                    return Err(crate::Error::ExifNotFound);
901                }
902                crate::exif::parse_exif_iter_async(
903                    self,
904                    ms.mime.unwrap_image(),
905                    &mut ms.reader,
906                    ms.skip_by_seek,
907                )
908                .await
909            }
910            .await;
911            self.reset();
912            res
913        }
914
915        /// Parse track info from an async video/audio source. Returns
916        /// `Error::TrackNotFound` if the source is an `Image`.
917        pub async fn parse_track_async<R: AsyncRead + Unpin + Send>(
918            &mut self,
919            mut ms: AsyncMediaSource<R>,
920        ) -> crate::Result<TrackInfo> {
921            self.reset();
922            self.acquire_buf();
923            self.buf_mut().append(&mut ms.buf);
924            let res: crate::Result<TrackInfo> = async {
925                <Self as AsyncBufParser>::fill_buf(self, &mut ms.reader, INIT_BUF_SIZE).await?;
926                let mime_track = match ms.mime {
927                    crate::file::MediaMime::Image(_) => return Err(crate::Error::TrackNotFound),
928                    crate::file::MediaMime::Track(t) => t,
929                };
930                let skip = ms.skip_by_seek;
931                let out = <Self as AsyncBufParser>::load_and_parse(
932                    self,
933                    &mut ms.reader,
934                    skip,
935                    |data, _| {
936                        crate::video::parse_track_info(data, mime_track)
937                            .map_err(|e| ParsingErrorState::new(e, None))
938                    },
939                )
940                .await?;
941                Ok(out)
942            }
943            .await;
944            self.reset();
945            res
946        }
947    }
948}
949
950#[cfg(test)]
951mod tests {
952    use std::sync::{LazyLock, Mutex, MutexGuard};
953
954    use super::*;
955    use test_case::case;
956
957    enum TrackExif {
958        Track,
959        Exif,
960        NoData,
961        Invalid,
962    }
963    use TrackExif::*;
964
965    static PARSER: LazyLock<Mutex<MediaParser>> = LazyLock::new(|| Mutex::new(MediaParser::new()));
966    fn parser() -> MutexGuard<'static, MediaParser> {
967        PARSER.lock().unwrap()
968    }
969
970    #[case("3gp_640x360.3gp", Track)]
971    #[case("broken.jpg", Exif)]
972    #[case("compatible-brands-fail.heic", Invalid)]
973    #[case("compatible-brands-fail.mov", Invalid)]
974    #[case("compatible-brands.heic", NoData)]
975    #[case("compatible-brands.mov", NoData)]
976    #[case("embedded-in-heic.mov", Track)]
977    #[case("exif.heic", Exif)]
978    #[case("exif.jpg", Exif)]
979    #[case("exif-no-tz.jpg", Exif)]
980    #[case("fujifilm_x_t1_01.raf.meta", Exif)]
981    #[case("meta.mov", Track)]
982    #[case("meta.mp4", Track)]
983    #[case("mka.mka", Track)]
984    #[case("mkv_640x360.mkv", Track)]
985    #[case("exif-one-entry.heic", Exif)]
986    #[case("no-exif.jpg", NoData)]
987    #[case("tif.tif", Exif)]
988    #[case("ramdisk.img", Invalid)]
989    #[case("webm_480.webm", Track)]
990    fn parse_media(path: &str, te: TrackExif) {
991        let mut parser = parser();
992        let ms = MediaSource::open(Path::new("testdata").join(path));
993        match te {
994            Track => {
995                let ms = ms.unwrap();
996                assert_eq!(ms.kind(), MediaKind::Track);
997                let _: TrackInfo = parser.parse_track(ms).unwrap();
998            }
999            Exif => {
1000                let ms = ms.unwrap();
1001                assert_eq!(ms.kind(), MediaKind::Image);
1002                let mut it: ExifIter = parser.parse_exif(ms).unwrap();
1003                let _ = it.parse_gps();
1004
1005                if path.contains("one-entry") {
1006                    assert!(it.next().is_some());
1007                    assert!(it.next().is_none());
1008
1009                    let exif: crate::Exif = it.clone_rewound().into();
1010                    assert!(exif.get(ExifTag::Orientation).is_some());
1011                } else {
1012                    let _: crate::Exif = it.clone_rewound().into();
1013                }
1014            }
1015            NoData => {
1016                let ms = ms.unwrap();
1017                match ms.kind() {
1018                    MediaKind::Image => {
1019                        let res = parser.parse_exif(ms);
1020                        res.unwrap_err();
1021                    }
1022                    MediaKind::Track => {
1023                        let res = parser.parse_track(ms);
1024                        res.unwrap_err();
1025                    }
1026                }
1027            }
1028            Invalid => {
1029                ms.unwrap_err();
1030            }
1031        }
1032    }
1033
1034    use crate::testkit::open_sample;
1035    use crate::{EntryValue, Exif, ExifTag, TrackInfoTag};
1036    use chrono::{DateTime, FixedOffset, NaiveDateTime};
1037    use test_case::test_case;
1038
1039    #[test_case("exif.jpg", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2023-07-09T20:36:33+08:00", "%+").unwrap().into())]
1040    #[test_case("exif.heic", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2022-07-22T21:26:32+08:00", "%+").unwrap().into())]
1041    #[test_case("exif.jpg", ExifTag::DateTimeOriginal, 
1042        (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), 
1043            Some(FixedOffset::east_opt(8*3600).unwrap())).into())]
1044    #[test_case("exif-no-tz.jpg", ExifTag::DateTimeOriginal, 
1045        (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), None).into())]
1046    fn parse_exif(path: &str, tag: ExifTag, v: EntryValue) {
1047        let mut parser = parser();
1048
1049        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1050        assert_eq!(mf.kind(), MediaKind::Image);
1051        let iter: ExifIter = parser.parse_exif(mf).unwrap();
1052        let exif: Exif = iter.into();
1053        assert_eq!(exif.get(tag).unwrap(), &v);
1054
1055        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1056        assert_eq!(mf.kind(), MediaKind::Image);
1057        let iter: ExifIter = parser.parse_exif(mf).unwrap();
1058        let exif: Exif = iter.into();
1059        assert_eq!(exif.get(tag).unwrap(), &v);
1060    }
1061
1062    use crate::video::TrackInfoTag::*;
1063
1064    #[test_case("mkv_640x360.mkv", Width, 640_u32.into())]
1065    #[test_case("mkv_640x360.mkv", Height, 360_u32.into())]
1066    #[test_case("mkv_640x360.mkv", DurationMs, 13346_u64.into())]
1067    #[test_case("mkv_640x360.mkv", CreateDate, DateTime::parse_from_str("2008-08-08T08:08:08Z", "%+").unwrap().into())]
1068    #[test_case("meta.mov", Make, "Apple".into())]
1069    #[test_case("meta.mov", Model, "iPhone X".into())]
1070    #[test_case("meta.mov", GpsIso6709, "+27.1281+100.2508+000.000/".into())]
1071    #[test_case("meta.mov", CreateDate, DateTime::parse_from_str("2019-02-12T15:27:12+08:00", "%+").unwrap().into())]
1072    #[test_case("meta.mp4", Width, 1920_u32.into())]
1073    #[test_case("meta.mp4", Height, 1080_u32.into())]
1074    #[test_case("meta.mp4", DurationMs, 1063_u64.into())]
1075    #[test_case("meta.mp4", GpsIso6709, "+27.2939+112.6932/".into())]
1076    #[test_case("meta.mp4", CreateDate, DateTime::parse_from_str("2024-02-03T07:05:38Z", "%+").unwrap().into())]
1077    #[test_case("udta.auth.mp4", Author, "ReplayKitRecording".into(); "udta author")]
1078    #[test_case("auth.mov", Author, "ReplayKitRecording".into(); "mov author")]
1079    #[test_case("sony-a7-xavc.MP4", Width, 1920_u32.into())]
1080    #[test_case("sony-a7-xavc.MP4", Height, 1080_u32.into())]
1081    #[test_case("sony-a7-xavc.MP4", DurationMs, 1440_u64.into())]
1082    #[test_case("sony-a7-xavc.MP4", CreateDate, DateTime::parse_from_str("2026-04-26T09:25:15+00:00", "%+").unwrap().into())]
1083    fn parse_track_info(path: &str, tag: TrackInfoTag, v: EntryValue) {
1084        let mut parser = parser();
1085
1086        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1087        let info: TrackInfo = parser.parse_track(mf).unwrap();
1088        assert_eq!(info.get(tag).unwrap(), &v);
1089
1090        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1091        let info: TrackInfo = parser.parse_track(mf).unwrap();
1092        assert_eq!(info.get(tag).unwrap(), &v);
1093    }
1094
1095    #[test_case("crash_moov-trak")]
1096    #[test_case("crash_skip_large")]
1097    #[test_case("crash_add_large")]
1098    fn parse_track_crash(path: &str) {
1099        let mut parser = parser();
1100
1101        let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
1102        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1103
1104        let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
1105        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1106    }
1107
1108    // Regression: a crafted ISOBMFF file declares an extended 64-bit box size
1109    // just under MAX_PARSE_BUF_SIZE (~1 GB). Pre-fix, the unseekable parser called
1110    // reserve_exact() with that size before reading, allocating ~1 GB even when
1111    // the actual stream contained only a few KB. See commit 81f9e8a.
1112    #[test]
1113    fn parse_oom_large_box() {
1114        let mut parser = parser();
1115
1116        let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1117        let _: Result<ExifIter, _> = parser.parse_exif(mf);
1118
1119        let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1120        let _: Result<ExifIter, _> = parser.parse_exif(mf);
1121
1122        let mf = MediaSource::seekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1123        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1124
1125        let mf = MediaSource::unseekable(open_sample("oom_large_box.heic").unwrap()).unwrap();
1126        let _: TrackInfo = parser.parse_track(mf).unwrap_or_default();
1127    }
1128
1129    #[test]
1130    fn media_kind_classifies_image_and_track() {
1131        let img = MediaSource::open("testdata/exif.jpg").unwrap();
1132        assert_eq!(img.kind(), MediaKind::Image);
1133
1134        let trk = MediaSource::open("testdata/meta.mov").unwrap();
1135        assert_eq!(trk.kind(), MediaKind::Track);
1136    }
1137
1138    #[test]
1139    fn media_source_open() {
1140        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1141        assert_eq!(ms.kind(), MediaKind::Image);
1142    }
1143
1144    #[test]
1145    fn parse_exif_returns_exif_iter() {
1146        let mut parser = parser();
1147        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1148        let _: ExifIter = parser.parse_exif(ms).unwrap();
1149    }
1150
1151    #[test]
1152    fn parse_track_returns_track_info() {
1153        let mut parser = parser();
1154        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1155        let _: TrackInfo = parser.parse_track(ms).unwrap();
1156    }
1157
1158    #[test]
1159    fn parse_exif_on_track_returns_exif_not_found_v3() {
1160        let mut parser = parser();
1161        let ms = MediaSource::open("testdata/meta.mov").unwrap();
1162        let res = parser.parse_exif(ms);
1163        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1164    }
1165
1166    #[test]
1167    fn parse_track_on_image_returns_track_not_found_v3() {
1168        let mut parser = parser();
1169        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1170        let res = parser.parse_track(ms);
1171        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1172    }
1173
1174    #[cfg(feature = "tokio")]
1175    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1176    async fn media_parser_parse_exif_async() {
1177        use crate::parser_async::AsyncMediaSource;
1178        let mut parser = MediaParser::new();
1179        let ms = AsyncMediaSource::open("testdata/exif.jpg").await.unwrap();
1180        let _: ExifIter = parser.parse_exif_async(ms).await.unwrap();
1181    }
1182
1183    #[cfg(feature = "tokio")]
1184    #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
1185    async fn media_parser_parse_track_async() {
1186        use crate::parser_async::AsyncMediaSource;
1187        let mut parser = MediaParser::new();
1188        let ms = AsyncMediaSource::open("testdata/meta.mov").await.unwrap();
1189        let _: TrackInfo = parser.parse_track_async(ms).await.unwrap();
1190    }
1191
1192    #[test]
1193    fn parser_recycles_alloc_when_exif_iter_dropped() {
1194        let mut parser = MediaParser::new();
1195
1196        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1197        let iter = parser.parse_exif(ms).unwrap();
1198        let exif: crate::Exif = iter.into();
1199        drop(exif);
1200        let ptr_after_first = parser.state.cached_ptr_for_test();
1201
1202        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1203        let iter = parser.parse_exif(ms).unwrap();
1204        let _exif: crate::Exif = iter.into();
1205        let ptr_after_second = parser.state.cached_ptr_for_test();
1206
1207        assert!(
1208            ptr_after_first.is_some() && ptr_after_first == ptr_after_second,
1209            "expected recycled allocation, got {:?} -> {:?}",
1210            ptr_after_first,
1211            ptr_after_second
1212        );
1213    }
1214
1215    #[test]
1216    fn parser_new_does_no_upfront_allocation() {
1217        let parser = MediaParser::new();
1218        assert!(parser.state.cached_ptr_for_test().is_none());
1219        assert!(parser.state.buf_is_none_for_test());
1220    }
1221
1222    #[test]
1223    fn buffered_state_memory_mode_sets_and_reads() {
1224        let mut s = BufferedParserState::new();
1225        s.set_memory(bytes::Bytes::from_static(b"abcdefgh"));
1226        assert!(s.is_memory_mode());
1227        assert_eq!(s.buffer(), b"abcdefgh");
1228        s.set_position(3);
1229        assert_eq!(s.buffer(), b"defgh");
1230    }
1231
1232    #[test]
1233    fn buffered_state_share_buf_memory_mode_is_zero_copy() {
1234        let original = bytes::Bytes::from_static(b"the parser owns nothing here");
1235        let original_ptr = original.as_ptr();
1236        let mut s = BufferedParserState::new();
1237        s.set_memory(original);
1238        let (shared, position) = s.share_buf();
1239        assert_eq!(position, 0);
1240        assert_eq!(
1241            shared.as_ptr(),
1242            original_ptr,
1243            "memory share must be a Bytes::clone, not a Vec round-trip"
1244        );
1245        // After share_buf, the parser's memory slot is taken — leaving the state
1246        // ready for the next `reset()` cycle.
1247        assert!(!s.is_memory_mode());
1248    }
1249
1250    #[test]
1251    fn buffered_state_reset_clears_memory() {
1252        let mut s = BufferedParserState::new();
1253        s.set_memory(bytes::Bytes::from_static(b"x"));
1254        s.reset();
1255        assert!(!s.is_memory_mode());
1256        assert_eq!(s.position, 0);
1257    }
1258
1259    #[test]
1260    fn buffered_state_acquire_buf_skips_in_memory_mode() {
1261        let mut s = BufferedParserState::new();
1262        s.set_memory(bytes::Bytes::from_static(b"data"));
1263        s.acquire_buf();
1264        // No streaming buf was allocated.
1265        assert!(s.buf.is_none());
1266        // Memory still readable.
1267        assert_eq!(s.buffer(), b"data");
1268    }
1269
1270    #[test]
1271    fn media_source_from_bytes_image_jpg() {
1272        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1273        let ms = MediaSource::from_bytes(raw).unwrap();
1274        assert_eq!(ms.kind(), MediaKind::Image);
1275        assert!(ms.memory.is_some());
1276    }
1277
1278    #[test]
1279    fn media_source_from_bytes_track_mov() {
1280        let raw = std::fs::read("testdata/meta.mov").unwrap();
1281        let ms = MediaSource::from_bytes(raw).unwrap();
1282        assert_eq!(ms.kind(), MediaKind::Track);
1283    }
1284
1285    #[test]
1286    fn media_source_from_bytes_static_slice() {
1287        // &'static [u8] should work via Into<Bytes> because the file is read
1288        // into a Vec at compile-time-friendly size; here we use include_bytes.
1289        let raw: &'static [u8] = include_bytes!("../testdata/exif.jpg");
1290        let ms = MediaSource::from_bytes(raw).unwrap();
1291        assert_eq!(ms.kind(), MediaKind::Image);
1292    }
1293
1294    #[test]
1295    fn media_source_from_bytes_rejects_too_short() {
1296        // Below the smallest mime signature length: should fail mime detection.
1297        let raw = vec![0u8; 4];
1298        let res = MediaSource::from_bytes(raw);
1299        assert!(res.is_err(), "expected mime-detection error");
1300    }
1301
1302    #[test]
1303    fn media_source_from_bytes_rejects_unknown_mime() {
1304        // Random bytes long enough to trigger detection but not match any
1305        // signature.
1306        let raw = vec![0xAAu8; 256];
1307        let res = MediaSource::from_bytes(raw);
1308        assert!(
1309            res.is_err(),
1310            "expected mime-detection error for unknown bytes"
1311        );
1312    }
1313
1314    #[test]
1315    fn p4_5_baseline_exif_jpg_full_dump() {
1316        // Lock down the post-refactor invariant: parsing testdata/exif.jpg through
1317        // the public API must yield the same set of (ifd, tag, value) triples
1318        // before and after P4.5. We capture them as a sorted, formatted string so
1319        // the assertion is a single literal comparison.
1320        let mut parser = MediaParser::new();
1321        let ms = MediaSource::open("testdata/exif.jpg").unwrap();
1322        let iter: ExifIter = parser.parse_exif(ms).unwrap();
1323
1324        let mut entries: Vec<String> = iter
1325            .map(|e| {
1326                let tag_name = match e.tag() {
1327                    crate::TagOrCode::Tag(t) => format!("{t}"),
1328                    crate::TagOrCode::Unknown(c) => format!("0x{c:04x}"),
1329                };
1330                let value_str = e
1331                    .value()
1332                    .map(|v| format!("{v}"))
1333                    .unwrap_or_else(|| "<err>".into());
1334                format!("{}.{}={:?}", e.ifd(), tag_name, value_str)
1335            })
1336            .collect();
1337        entries.sort();
1338        let snapshot = entries.join("\n");
1339
1340        // Sanity: should produce non-trivial content. Exact content is checked by
1341        // the existing parse_media tests; this one guards against accidental
1342        // re-ordering / dedup changes during the refactor.
1343        assert!(
1344            entries.len() > 5,
1345            "expected >5 entries, got {}",
1346            entries.len()
1347        );
1348        assert!(snapshot.contains("Make"), "expected Make tag in snapshot");
1349    }
1350
1351    #[test]
1352    fn parse_exif_from_bytes_jpg_basic() {
1353        let mut parser = MediaParser::new();
1354        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1355        let ms = MediaSource::from_bytes(raw).unwrap();
1356        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1357        let exif: crate::Exif = iter.into();
1358        assert!(exif.get(crate::ExifTag::Make).is_some());
1359    }
1360
1361    #[test]
1362    fn parse_exif_from_bytes_heic_basic() {
1363        let mut parser = MediaParser::new();
1364        let raw = std::fs::read("testdata/exif.heic").unwrap();
1365        let ms = MediaSource::from_bytes(raw).unwrap();
1366        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1367        let exif: crate::Exif = iter.into();
1368        assert_eq!(
1369            exif.get(crate::ExifTag::Make).and_then(|v| v.as_str()),
1370            Some("Apple")
1371        );
1372    }
1373
1374    #[test]
1375    fn parse_exif_from_bytes_zero_copy_shared_bytes() {
1376        // Build a Bytes whose pointer we can compare. The ExifIter's underlying
1377        // share must point to the same allocation — proving Bytes::clone path.
1378        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1379        let bytes = bytes::Bytes::from(raw);
1380        let original_ptr = bytes.as_ptr();
1381
1382        let mut parser = MediaParser::new();
1383        let ms = MediaSource::from_bytes(bytes).unwrap();
1384        let iter = parser.parse_exif_from_bytes(ms).unwrap();
1385
1386        // The cached pointer in parser state should be None in memory mode
1387        // (memory mode does not write to cache — the user owns the alloc).
1388        assert!(
1389            parser.state.cached_ptr_for_test().is_none(),
1390            "memory mode must not poison the recycle cache"
1391        );
1392
1393        // Drop the iter and confirm parser is clean for the next call.
1394        drop(iter);
1395
1396        // Build again; pointer identity proves we did not duplicate the alloc
1397        // anywhere along the parse path.
1398        let bytes2 = bytes::Bytes::from(std::fs::read("testdata/exif.jpg").unwrap());
1399        let ms2 = MediaSource::from_bytes(bytes2.clone()).unwrap();
1400        let _iter2 = parser.parse_exif_from_bytes(ms2).unwrap();
1401        // (We cannot assert pointer-equality across distinct user Bytes; the
1402        // assertion above on the first parse is the load-bearing one.)
1403        let _ = original_ptr; // explicit: original_ptr is the assertion target.
1404    }
1405
1406    #[test]
1407    fn parse_exif_from_bytes_on_track_returns_exif_not_found() {
1408        let mut parser = MediaParser::new();
1409        let raw = std::fs::read("testdata/meta.mov").unwrap();
1410        let ms = MediaSource::from_bytes(raw).unwrap();
1411        let res = parser.parse_exif_from_bytes(ms);
1412        assert!(matches!(res, Err(crate::Error::ExifNotFound)));
1413    }
1414
1415    #[test]
1416    fn parse_exif_from_bytes_on_truncated_returns_io_error() {
1417        // Truncate exif.jpg to just enough for mime detection but too short
1418        // for the full EXIF block. Memory-mode fill_buf must surface
1419        // UnexpectedEof when the parser walks off the end.
1420        let mut raw = std::fs::read("testdata/exif.jpg").unwrap();
1421        raw.truncate(200);
1422        let mut parser = MediaParser::new();
1423        let ms = MediaSource::from_bytes(raw).unwrap();
1424        let res = parser.parse_exif_from_bytes(ms);
1425        assert!(
1426            res.is_err(),
1427            "expected error on truncated bytes, got {:?}",
1428            res
1429        );
1430    }
1431
1432    #[test]
1433    fn parse_track_from_bytes_mov_basic() {
1434        let mut parser = MediaParser::new();
1435        let raw = std::fs::read("testdata/meta.mov").unwrap();
1436        let ms = MediaSource::from_bytes(raw).unwrap();
1437        let info = parser.parse_track_from_bytes(ms).unwrap();
1438        assert_eq!(info.get(crate::TrackInfoTag::Make), Some(&"Apple".into()));
1439        assert_eq!(
1440            info.get(crate::TrackInfoTag::Model),
1441            Some(&"iPhone X".into())
1442        );
1443    }
1444
1445    #[test]
1446    fn parse_track_from_bytes_mp4_basic() {
1447        let mut parser = MediaParser::new();
1448        let raw = std::fs::read("testdata/meta.mp4").unwrap();
1449        let ms = MediaSource::from_bytes(raw).unwrap();
1450        let info = parser.parse_track_from_bytes(ms).unwrap();
1451        assert!(info.get(crate::TrackInfoTag::CreateDate).is_some());
1452    }
1453
1454    #[test]
1455    fn parse_track_from_bytes_mkv_basic() {
1456        let mut parser = MediaParser::new();
1457        let raw = std::fs::read("testdata/mkv_640x360.mkv").unwrap();
1458        let ms = MediaSource::from_bytes(raw).unwrap();
1459        let info = parser.parse_track_from_bytes(ms).unwrap();
1460        assert_eq!(
1461            info.get(crate::TrackInfoTag::Width),
1462            Some(&(640_u32.into()))
1463        );
1464    }
1465
1466    #[test]
1467    fn parse_track_from_bytes_on_image_returns_track_not_found() {
1468        let mut parser = MediaParser::new();
1469        let raw = std::fs::read("testdata/exif.jpg").unwrap();
1470        let ms = MediaSource::from_bytes(raw).unwrap();
1471        let res = parser.parse_track_from_bytes(ms);
1472        assert!(matches!(res, Err(crate::Error::TrackNotFound)));
1473    }
1474}