1use std::{
2 cmp::{max, min},
3 fmt::{Debug, Display},
4 fs::File,
5 io::{self, Read, Seek},
6 marker::PhantomData,
7 net::TcpStream,
8 ops::Range,
9 path::Path,
10};
11
12use crate::{
13 buffer::Buffers,
14 error::{ParsedError, ParsingError, ParsingErrorState},
15 exif::{parse_exif_iter, TiffHeader},
16 file::Mime,
17 partial_vec::PartialVec,
18 skip::Skip,
19 video::parse_track_info,
20 ExifIter, Seekable, TrackInfo, Unseekable,
21};
22
23pub struct MediaSource<R, S = Seekable> {
45 pub(crate) reader: R,
46 pub(crate) buf: Vec<u8>,
47 pub(crate) mime: Mime,
48 phantom: PhantomData<S>,
49}
50
51impl<R, S: Skip<R>> Debug for MediaSource<R, S> {
52 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53 f.debug_struct("MediaSource")
54 .field("mime", &self.mime)
56 .field("seekable", &S::debug())
57 .finish_non_exhaustive()
58 }
59}
60
61const HEADER_PARSE_BUF_SIZE: usize = 128;
63
64impl<R: Read, S: Skip<R>> MediaSource<R, S> {
65 #[tracing::instrument(skip(reader))]
66 fn build(mut reader: R) -> crate::Result<Self> {
67 let mut buf = Vec::with_capacity(HEADER_PARSE_BUF_SIZE);
69 reader
70 .by_ref()
71 .take(HEADER_PARSE_BUF_SIZE as u64)
72 .read_to_end(&mut buf)?;
73 let mime: Mime = buf.as_slice().try_into()?;
74 tracing::debug!(?mime);
75 Ok(Self {
76 reader,
77 buf,
78 mime,
79 phantom: PhantomData,
80 })
81 }
82
83 pub fn has_track(&self) -> bool {
84 match self.mime {
85 Mime::Image(_) => false,
86 Mime::Video(_) => true,
87 }
88 }
89
90 pub fn has_exif(&self) -> bool {
91 match self.mime {
92 Mime::Image(_) => true,
93 Mime::Video(_) => false,
94 }
95 }
96}
97
98impl<R: Read + Seek> MediaSource<R, Seekable> {
99 pub fn seekable(reader: R) -> crate::Result<Self> {
104 Self::build(reader)
105 }
106}
107
108impl<R: Read> MediaSource<R, Unseekable> {
109 pub fn unseekable(reader: R) -> crate::Result<Self> {
115 Self::build(reader)
116 }
117}
118
119impl MediaSource<File, Seekable> {
120 pub fn file_path<P: AsRef<Path>>(path: P) -> crate::Result<Self> {
121 Self::seekable(File::open(path)?)
122 }
123
124 pub fn file(file: File) -> crate::Result<Self> {
125 Self::seekable(file)
126 }
127}
128
129impl MediaSource<TcpStream, Unseekable> {
130 pub fn tcp_stream(stream: TcpStream) -> crate::Result<Self> {
131 Self::unseekable(stream)
132 }
133}
134
135pub(crate) const INIT_BUF_SIZE: usize = 4096;
137pub(crate) const MIN_GROW_SIZE: usize = 4096;
138pub(crate) const MAX_ALLOC_SIZE: usize = 1024 * 1024 * 1024;
142
143pub(crate) trait Buf {
144 fn buffer(&self) -> &[u8];
145 fn clear(&mut self);
146
147 fn set_position(&mut self, pos: usize);
148 #[allow(unused)]
149 fn position(&self) -> usize;
150}
151
152#[derive(Debug, Clone)]
153pub(crate) enum ParsingState {
154 TiffHeader(TiffHeader),
155 HeifExifSize(usize),
156 Cr3ExifSize(usize),
157}
158
159impl Display for ParsingState {
160 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
161 match self {
162 ParsingState::TiffHeader(h) => Display::fmt(&format!("ParsingState: {h:?})"), f),
163 ParsingState::HeifExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
164 ParsingState::Cr3ExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
165 }
166 }
167}
168
169pub(crate) trait BufParser: Buf + Debug {
172 fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize>;
173 fn load_and_parse<R: Read, S: Skip<R>, P, O>(
174 &mut self,
175 reader: &mut R,
176 mut parse: P,
177 ) -> Result<O, ParsedError>
178 where
179 P: FnMut(&[u8], Option<ParsingState>) -> Result<O, ParsingErrorState>,
180 {
181 self.load_and_parse_with_offset::<R, S, _, _>(
182 reader,
183 |data, _, state| parse(data, state),
184 0,
185 )
186 }
187
188 #[tracing::instrument(skip_all)]
189 fn load_and_parse_with_offset<R: Read, S: Skip<R>, P, O>(
190 &mut self,
191 reader: &mut R,
192 mut parse: P,
193 offset: usize,
194 ) -> Result<O, ParsedError>
195 where
196 P: FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState>,
197 {
198 if offset >= self.buffer().len() {
199 self.fill_buf(reader, MIN_GROW_SIZE)?;
200 }
201
202 let mut parsing_state: Option<ParsingState> = None;
203 loop {
204 let res = parse(self.buffer(), offset, parsing_state.take());
205 match res {
206 Ok(o) => return Ok(o),
207 Err(es) => {
208 tracing::debug!(?es);
209 parsing_state = es.state;
210
211 match es.err {
212 ParsingError::ClearAndSkip(n) => {
213 self.clear_and_skip::<R, S>(reader, n)?;
214 }
215 ParsingError::Need(i) => {
216 tracing::debug!(need = i, "need more bytes");
217 let to_read = max(i, MIN_GROW_SIZE);
218 let n = self.fill_buf(reader, to_read)?;
221 if n == 0 {
222 return Err(ParsedError::NoEnoughBytes);
223 }
224 tracing::debug!(n, "actual read");
225 }
226 ParsingError::Failed(s) => return Err(ParsedError::Failed(s)),
227 }
228 }
229 }
230 }
231 }
232
233 #[tracing::instrument(skip(reader))]
234 fn clear_and_skip<R: Read, S: Skip<R>>(
235 &mut self,
236 reader: &mut R,
237 n: usize,
238 ) -> Result<(), ParsedError> {
239 tracing::debug!("ClearAndSkip");
240 if n <= self.buffer().len() {
241 tracing::debug!(n, "skip by set_position");
242 self.set_position(n);
243 return Ok(());
244 }
245
246 let skip_n = n - self.buffer().len();
247 tracing::debug!(skip_n, "clear and skip bytes");
248 self.clear();
249
250 let done = S::skip_by_seek(
251 reader,
252 skip_n
253 .try_into()
254 .map_err(|_| ParsedError::Failed("skip too many bytes".into()))?,
255 )?;
256 if !done {
257 tracing::debug!(skip_n, "skip by using our buffer");
258 let mut skipped = 0;
259 while skipped < skip_n {
260 let mut to_skip = skip_n - skipped;
261 to_skip = min(to_skip, MAX_ALLOC_SIZE);
262 let n = self.fill_buf(reader, to_skip)?;
263 skipped += n;
264 if skipped <= skip_n {
265 self.clear();
266 } else {
267 let remain = skipped - skip_n;
268 self.set_position(self.buffer().len() - remain);
269 break;
270 }
271 }
272 } else {
273 tracing::debug!(skip_n, "skip with seek");
274 }
275
276 if self.buffer().is_empty() {
277 self.fill_buf(reader, MIN_GROW_SIZE)?;
278 }
279 Ok(())
280 }
281}
282
283impl BufParser for MediaParser {
284 #[tracing::instrument(skip(self, reader), fields(buf_len=self.buf().len()))]
285 fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize> {
286 if size.saturating_add(self.buf().len()) > MAX_ALLOC_SIZE {
287 tracing::error!(?size, "the requested buffer size is too big");
288 return Err(io::ErrorKind::Unsupported.into());
289 }
290 self.buf_mut().reserve_exact(size);
291
292 let n = reader.take(size as u64).read_to_end(self.buf_mut())?;
293 if n == 0 {
294 tracing::error!(buf_len = self.buf().len(), "fill_buf: EOF");
295 return Err(std::io::ErrorKind::UnexpectedEof.into());
296 }
297
298 tracing::debug!(
299 ?size,
300 ?n,
301 buf_len = self.buf().len(),
302 "fill_buf: read bytes"
303 );
304
305 Ok(n)
306 }
307}
308
309impl Buf for MediaParser {
310 fn buffer(&self) -> &[u8] {
311 &self.buf()[self.position..]
312 }
313
314 fn clear(&mut self) {
315 self.buf_mut().clear();
316 }
317
318 fn set_position(&mut self, pos: usize) {
319 self.position = pos;
320 }
321
322 fn position(&self) -> usize {
323 self.position
324 }
325}
326
327pub trait ParseOutput<R, S>: Sized {
328 fn parse(parser: &mut MediaParser, ms: MediaSource<R, S>) -> crate::Result<Self>;
329}
330
331impl<R: Read, S: Skip<R>> ParseOutput<R, S> for ExifIter {
332 fn parse(parser: &mut MediaParser, mut ms: MediaSource<R, S>) -> crate::Result<Self> {
333 if !ms.has_exif() {
334 return Err(crate::Error::ParseFailed("no Exif data here".into()));
335 }
336 parse_exif_iter::<R, S>(parser, ms.mime.unwrap_image(), &mut ms.reader)
337 }
338}
339
340impl<R: Read, S: Skip<R>> ParseOutput<R, S> for TrackInfo {
341 fn parse(parser: &mut MediaParser, mut ms: MediaSource<R, S>) -> crate::Result<Self> {
342 if !ms.has_track() {
343 return Err(crate::Error::ParseFailed("no track info here".into()));
344 }
345 let out = parser.load_and_parse::<R, S, _, _>(ms.reader.by_ref(), |data, _| {
346 parse_track_info(data, ms.mime.unwrap_video())
347 .map_err(|e| ParsingErrorState::new(e, None))
348 })?;
349 Ok(out)
350 }
351}
352
353pub struct MediaParser {
404 bb: Buffers,
405 buf: Option<Vec<u8>>,
406 position: usize,
407}
408
409impl Debug for MediaParser {
410 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
411 f.debug_struct("MediaParser")
412 .field("buffers", &self.bb)
413 .field("buf len", &self.buf.as_ref().map(|x| x.len()))
414 .field("position", &self.position)
415 .finish_non_exhaustive()
416 }
417}
418
419impl Default for MediaParser {
420 fn default() -> Self {
421 Self {
422 bb: Buffers::new(),
423 buf: None,
424 position: 0,
425 }
426 }
427}
428
429pub(crate) trait ShareBuf {
430 fn share_buf(&mut self, range: Range<usize>) -> PartialVec;
431}
432
433impl ShareBuf for MediaParser {
434 fn share_buf(&mut self, mut range: Range<usize>) -> PartialVec {
435 let buf = self.buf.take().unwrap();
436 let vec = self.bb.release_to_share(buf);
437 range.start += self.position;
438 range.end += self.position;
439 PartialVec::new(vec, range)
440 }
441}
442
443impl MediaParser {
444 pub fn new() -> Self {
445 Self::default()
446 }
447
448 pub fn parse<R: Read, S, O: ParseOutput<R, S>>(
468 &mut self,
469 mut ms: MediaSource<R, S>,
470 ) -> crate::Result<O> {
471 self.reset();
472 self.acquire_buf();
473
474 self.buf_mut().append(&mut ms.buf);
475 let res = self.do_parse(ms);
476
477 self.reset();
478 res
479 }
480
481 fn do_parse<R: Read, S, O: ParseOutput<R, S>>(
482 &mut self,
483 mut ms: MediaSource<R, S>,
484 ) -> Result<O, crate::Error> {
485 self.fill_buf(&mut ms.reader, INIT_BUF_SIZE)?;
486 let res = ParseOutput::parse(self, ms)?;
487 Ok(res)
488 }
489
490 fn reset(&mut self) {
491 if let Some(buf) = self.buf.take() {
493 self.bb.release(buf);
494 }
495
496 self.set_position(0);
498 }
499
500 pub(crate) fn buf(&self) -> &Vec<u8> {
501 match self.buf.as_ref() {
502 Some(b) => b,
503 None => panic!("no buf here"),
504 }
505 }
506
507 fn buf_mut(&mut self) -> &mut Vec<u8> {
508 match self.buf.as_mut() {
509 Some(b) => b,
510 None => panic!("no buf here"),
511 }
512 }
513
514 fn acquire_buf(&mut self) {
515 assert!(self.buf.is_none());
516 self.buf = Some(self.bb.acquire());
517 }
518}
519
520#[cfg(test)]
521mod tests {
522 use std::sync::{LazyLock, Mutex, MutexGuard};
523
524 use super::*;
525 use test_case::case;
526
527 enum TrackExif {
528 Track,
529 Exif,
530 NoData,
531 Invalid,
532 }
533 use TrackExif::*;
534
535 static PARSER: LazyLock<Mutex<MediaParser>> = LazyLock::new(|| Mutex::new(MediaParser::new()));
536 fn parser() -> MutexGuard<'static, MediaParser> {
537 PARSER.lock().unwrap()
538 }
539
540 #[case("3gp_640x360.3gp", Track)]
541 #[case("broken.jpg", Exif)]
542 #[case("compatible-brands-fail.heic", Invalid)]
543 #[case("compatible-brands-fail.mov", Invalid)]
544 #[case("compatible-brands.heic", NoData)]
545 #[case("compatible-brands.mov", NoData)]
546 #[case("embedded-in-heic.mov", Track)]
547 #[case("exif.heic", Exif)]
548 #[case("exif.jpg", Exif)]
549 #[case("exif-no-tz.jpg", Exif)]
550 #[case("fujifilm_x_t1_01.raf.meta", Exif)]
551 #[case("meta.mov", Track)]
552 #[case("meta.mp4", Track)]
553 #[case("mka.mka", Track)]
554 #[case("mkv_640x360.mkv", Track)]
555 #[case("exif-one-entry.heic", Exif)]
556 #[case("no-exif.jpg", NoData)]
557 #[case("tif.tif", Exif)]
558 #[case("ramdisk.img", Invalid)]
559 #[case("webm_480.webm", Track)]
560 fn parse_media(path: &str, te: TrackExif) {
561 let mut parser = parser();
562 let ms = MediaSource::file_path(Path::new("testdata").join(path));
563 match te {
564 Track => {
565 let ms = ms.unwrap();
566 assert!(ms.has_track());
568 let _: TrackInfo = parser.parse(ms).unwrap();
569 }
570 Exif => {
571 let ms = ms.unwrap();
572 assert!(ms.has_exif());
574 let mut it: ExifIter = parser.parse(ms).unwrap();
575 let _ = it.parse_gps_info();
576
577 if path.contains("one-entry") {
578 assert!(it.next().is_some());
579 assert!(it.next().is_none());
580
581 let exif: crate::Exif = it.clone_and_rewind().into();
582 assert!(exif.get(ExifTag::Orientation).is_some());
583 } else {
584 let _: crate::Exif = it.clone_and_rewind().into();
585 }
586 }
587 NoData => {
588 let ms = ms.unwrap();
589 if ms.has_exif() {
591 let res: Result<ExifIter, _> = parser.parse(ms);
592 res.unwrap_err();
593 } else if ms.has_track() {
594 let res: Result<TrackInfo, _> = parser.parse(ms);
595 res.unwrap_err();
596 }
597 }
598 Invalid => {
599 ms.unwrap_err();
600 }
601 }
602 }
603
604 use crate::testkit::open_sample;
605 use crate::{EntryValue, Exif, ExifTag, TrackInfoTag};
606 use chrono::{DateTime, FixedOffset, NaiveDateTime};
607 use test_case::test_case;
608
609 #[test_case("exif.jpg", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2023-07-09T20:36:33+08:00", "%+").unwrap().into())]
610 #[test_case("exif.heic", ExifTag::DateTimeOriginal, DateTime::parse_from_str("2022-07-22T21:26:32+08:00", "%+").unwrap().into())]
611 #[test_case("exif.jpg", ExifTag::DateTimeOriginal,
612 (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(),
613 Some(FixedOffset::east_opt(8*3600).unwrap())).into())]
614 #[test_case("exif-no-tz.jpg", ExifTag::DateTimeOriginal,
615 (NaiveDateTime::parse_from_str("2023-07-09T20:36:33", "%Y-%m-%dT%H:%M:%S").unwrap(), None).into())]
616 fn parse_exif(path: &str, tag: ExifTag, v: EntryValue) {
617 let mut parser = parser();
618
619 let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
620 assert!(mf.has_exif());
621 let iter: ExifIter = parser.parse(mf).unwrap();
622 let exif: Exif = iter.into();
623 assert_eq!(exif.get(tag).unwrap(), &v);
624
625 let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
626 assert!(mf.has_exif());
627 let iter: ExifIter = parser.parse(mf).unwrap();
628 let exif: Exif = iter.into();
629 assert_eq!(exif.get(tag).unwrap(), &v);
630 }
631
632 use crate::video::TrackInfoTag::*;
633
634 #[test_case("mkv_640x360.mkv", ImageWidth, 640_u32.into())]
635 #[test_case("mkv_640x360.mkv", ImageHeight, 360_u32.into())]
636 #[test_case("mkv_640x360.mkv", DurationMs, 13346_u64.into())]
637 #[test_case("mkv_640x360.mkv", CreateDate, DateTime::parse_from_str("2008-08-08T08:08:08Z", "%+").unwrap().into())]
638 #[test_case("meta.mov", Make, "Apple".into())]
639 #[test_case("meta.mov", Model, "iPhone X".into())]
640 #[test_case("meta.mov", GpsIso6709, "+27.1281+100.2508+000.000/".into())]
641 #[test_case("meta.mov", CreateDate, DateTime::parse_from_str("2019-02-12T15:27:12+08:00", "%+").unwrap().into())]
642 #[test_case("meta.mp4", ImageWidth, 1920_u32.into())]
643 #[test_case("meta.mp4", ImageHeight, 1080_u32.into())]
644 #[test_case("meta.mp4", DurationMs, 1063_u64.into())]
645 #[test_case("meta.mp4", GpsIso6709, "+27.2939+112.6932/".into())]
646 #[test_case("meta.mp4", CreateDate, DateTime::parse_from_str("2024-02-03T07:05:38Z", "%+").unwrap().into())]
647 #[test_case("udta.auth.mp4", Author, "ReplayKitRecording".into(); "udta author")]
648 #[test_case("auth.mov", Author, "ReplayKitRecording".into(); "mov author")]
649 #[test_case("sony-a7-xavc.MP4", ImageWidth, 1920_u32.into())]
650 #[test_case("sony-a7-xavc.MP4", ImageHeight, 1080_u32.into())]
651 #[test_case("sony-a7-xavc.MP4", DurationMs, 1440_u64.into())]
652 #[test_case("sony-a7-xavc.MP4", CreateDate, DateTime::parse_from_str("2026-04-26T09:25:15+00:00", "%+").unwrap().into())]
653 fn parse_track_info(path: &str, tag: TrackInfoTag, v: EntryValue) {
654 let mut parser = parser();
655
656 let mf = MediaSource::seekable(open_sample(path).unwrap()).unwrap();
657 let info: TrackInfo = parser.parse(mf).unwrap();
658 assert_eq!(info.get(tag).unwrap(), &v);
659
660 let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
661 let info: TrackInfo = parser.parse(mf).unwrap();
662 assert_eq!(info.get(tag).unwrap(), &v);
663 }
664
665 #[test_case("crash_moov-trak")]
666 #[test_case("crash_skip_large")]
667 #[test_case("crash_add_large")]
668 fn parse_track_crash(path: &str) {
669 let mut parser = parser();
670
671 let mf = MediaSource::file(open_sample(path).unwrap()).unwrap();
672 let _: TrackInfo = parser.parse(mf).unwrap_or_default();
673
674 let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
675 let _: TrackInfo = parser.parse(mf).unwrap_or_default();
676 }
677}