1use std::{
2 cmp::{max, min},
3 fmt::{Debug, Display},
4 fs::File,
5 io::{self, Read, Seek},
6 marker::PhantomData,
7 net::TcpStream,
8 ops::Range,
9 path::Path,
10};
11
12use crate::{
13 buffer::Buffers,
14 error::{ParsedError, ParsingError, ParsingErrorState},
15 exif::{parse_exif_iter, TiffHeader},
16 file::Mime,
17 partial_vec::PartialVec,
18 skip::Skip,
19 video::parse_track_info,
20 ExifIter, Seekable, TrackInfo, Unseekable,
21};
22
23pub struct MediaSource<R, S = Seekable> {
44 pub(crate) reader: R,
45 pub(crate) buf: Vec<u8>,
46 pub(crate) mime: Mime,
47 phantom: PhantomData<S>,
48}
49
50impl<R, S: Skip<R>> Debug for MediaSource<R, S> {
51 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52 f.debug_struct("MediaSource")
53 .field("mime", &self.mime)
55 .field("seekable", &S::debug())
56 .finish_non_exhaustive()
57 }
58}
59
60const HEADER_PARSE_BUF_SIZE: usize = 128;
62
63impl<R: Read, S: Skip<R>> MediaSource<R, S> {
64 #[tracing::instrument(skip(reader))]
65 fn build(mut reader: R) -> crate::Result<Self> {
66 let mut buf = Vec::with_capacity(HEADER_PARSE_BUF_SIZE);
68 reader
69 .by_ref()
70 .take(HEADER_PARSE_BUF_SIZE as u64)
71 .read_to_end(&mut buf)?;
72 let mime: Mime = buf.as_slice().try_into()?;
73 tracing::debug!(?mime);
74 Ok(Self {
75 reader,
76 buf,
77 mime,
78 phantom: PhantomData,
79 })
80 }
81
82 pub fn has_track(&self) -> bool {
83 match self.mime {
84 Mime::Image(_) => false,
85 Mime::Video(_) => true,
86 }
87 }
88
89 pub fn has_exif(&self) -> bool {
90 match self.mime {
91 Mime::Image(_) => true,
92 Mime::Video(_) => false,
93 }
94 }
95}
96
97impl<R: Read + Seek> MediaSource<R, Seekable> {
98 pub fn seekable(reader: R) -> crate::Result<Self> {
99 Self::build(reader)
100 }
101}
102
103impl<R: Read> MediaSource<R, Unseekable> {
104 pub fn unseekable(reader: R) -> crate::Result<Self> {
105 Self::build(reader)
106 }
107}
108
109impl MediaSource<File, Seekable> {
110 pub fn file_path<P: AsRef<Path>>(path: P) -> crate::Result<Self> {
111 Self::seekable(File::open(path)?)
112 }
113
114 pub fn file(file: File) -> crate::Result<Self> {
115 Self::seekable(file)
116 }
117}
118
119impl MediaSource<TcpStream, Unseekable> {
120 pub fn tcp_stream(stream: TcpStream) -> crate::Result<Self> {
121 Self::unseekable(stream)
122 }
123}
124
125pub(crate) const INIT_BUF_SIZE: usize = 4096;
127pub(crate) const MIN_GROW_SIZE: usize = 4096;
128pub(crate) const MAX_ALLOC_SIZE: usize = 1024 * 1024 * 1024;
132
133pub(crate) trait Buf {
134 fn buffer(&self) -> &[u8];
135 fn clear(&mut self);
136
137 fn set_position(&mut self, pos: usize);
138 #[allow(unused)]
139 fn position(&self) -> usize;
140}
141
142#[derive(Debug, Clone)]
143pub(crate) enum ParsingState {
144 TiffHeader(TiffHeader),
145 HeifExifSize(usize),
146 Cr3ExifSize(usize),
147}
148
149impl Display for ParsingState {
150 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
151 match self {
152 ParsingState::TiffHeader(h) => Display::fmt(&format!("ParsingState: {h:?})"), f),
153 ParsingState::HeifExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
154 ParsingState::Cr3ExifSize(n) => Display::fmt(&format!("ParsingState: {n}"), f),
155 }
156 }
157}
158
159pub(crate) trait BufParser: Buf + Debug {
160 fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize>;
161 fn load_and_parse<R: Read, S: Skip<R>, P, O>(
162 &mut self,
163 reader: &mut R,
164 mut parse: P,
165 ) -> Result<O, ParsedError>
166 where
167 P: FnMut(&[u8], Option<ParsingState>) -> Result<O, ParsingErrorState>,
168 {
169 self.load_and_parse_with_offset::<R, S, _, _>(
170 reader,
171 |data, _, state| parse(data, state),
172 0,
173 )
174 }
175
176 #[tracing::instrument(skip_all)]
177 fn load_and_parse_with_offset<R: Read, S: Skip<R>, P, O>(
178 &mut self,
179 reader: &mut R,
180 mut parse: P,
181 offset: usize,
182 ) -> Result<O, ParsedError>
183 where
184 P: FnMut(&[u8], usize, Option<ParsingState>) -> Result<O, ParsingErrorState>,
185 {
186 if offset >= self.buffer().len() {
187 self.fill_buf(reader, MIN_GROW_SIZE)?;
188 }
189
190 let mut parsing_state: Option<ParsingState> = None;
191 loop {
192 let res = parse(self.buffer(), offset, parsing_state.take());
193 match res {
194 Ok(o) => return Ok(o),
195 Err(es) => {
196 tracing::debug!(?es);
197 parsing_state = es.state;
198
199 match es.err {
200 ParsingError::ClearAndSkip(n) => {
201 self.clear_and_skip::<R, S>(reader, n)?;
202 }
203 ParsingError::Need(i) => {
204 tracing::debug!(need = i, "need more bytes");
205 let to_read = max(i, MIN_GROW_SIZE);
206 let n = self.fill_buf(reader, to_read)?;
209 if n == 0 {
210 return Err(ParsedError::NoEnoughBytes);
211 }
212 tracing::debug!(n, "actual read");
213 }
214 ParsingError::Failed(s) => return Err(ParsedError::Failed(s)),
215 }
216 }
217 }
218 }
219 }
220
221 #[tracing::instrument(skip(reader))]
222 fn clear_and_skip<R: Read, S: Skip<R>>(
223 &mut self,
224 reader: &mut R,
225 n: usize,
226 ) -> Result<(), ParsedError> {
227 tracing::debug!("ClearAndSkip");
228 if n <= self.buffer().len() {
229 tracing::debug!(n, "skip by set_position");
230 self.set_position(n);
231 return Ok(());
232 }
233
234 let skip_n = n - self.buffer().len();
235 tracing::debug!(skip_n, "clear and skip bytes");
236 self.clear();
237
238 let done = S::skip_by_seek(
239 reader,
240 skip_n
241 .try_into()
242 .map_err(|_| ParsedError::Failed("skip too many bytes".into()))?,
243 )?;
244 if !done {
245 tracing::debug!(skip_n, "skip by using our buffer");
246 let mut skipped = 0;
247 while skipped < skip_n {
248 let mut to_skip = skip_n - skipped;
249 to_skip = min(to_skip, MAX_ALLOC_SIZE);
250 let n = self.fill_buf(reader, to_skip)?;
251 skipped += n;
252 if skipped <= skip_n {
253 self.clear();
254 } else {
255 let remain = skipped - skip_n;
256 self.set_position(self.buffer().len() - remain);
257 break;
258 }
259 }
260 } else {
261 tracing::debug!(skip_n, "skip with seek");
262 }
263
264 if self.buffer().is_empty() {
265 self.fill_buf(reader, MIN_GROW_SIZE)?;
266 }
267 Ok(())
268 }
269}
270
271impl BufParser for MediaParser {
272 #[tracing::instrument(skip(self, reader), fields(buf_len=self.buf().len()))]
273 fn fill_buf<R: Read>(&mut self, reader: &mut R, size: usize) -> io::Result<usize> {
274 if size.saturating_add(self.buf().len()) > MAX_ALLOC_SIZE {
275 tracing::error!(?size, "the requested buffer size is too big");
276 return Err(io::ErrorKind::Unsupported.into());
277 }
278 self.buf_mut().reserve_exact(size);
279
280 let n = reader.take(size as u64).read_to_end(self.buf_mut())?;
281 if n == 0 {
282 tracing::error!(buf_len = self.buf().len(), "fill_buf: EOF");
283 return Err(std::io::ErrorKind::UnexpectedEof.into());
284 }
285
286 tracing::debug!(
287 ?size,
288 ?n,
289 buf_len = self.buf().len(),
290 "fill_buf: read bytes"
291 );
292
293 Ok(n)
294 }
295}
296
297impl Buf for MediaParser {
298 fn buffer(&self) -> &[u8] {
299 &self.buf()[self.position..]
300 }
301
302 fn clear(&mut self) {
303 self.buf_mut().clear();
304 }
305
306 fn set_position(&mut self, pos: usize) {
307 self.position = pos;
308 }
309
310 fn position(&self) -> usize {
311 self.position
312 }
313}
314
315pub trait ParseOutput<R, S>: Sized {
316 fn parse(parser: &mut MediaParser, ms: MediaSource<R, S>) -> crate::Result<Self>;
317}
318
319impl<R: Read, S: Skip<R>> ParseOutput<R, S> for ExifIter {
320 fn parse(parser: &mut MediaParser, mut ms: MediaSource<R, S>) -> crate::Result<Self> {
321 if !ms.has_exif() {
322 return Err(crate::Error::ParseFailed("no Exif data here".into()));
323 }
324 parse_exif_iter::<R, S>(parser, ms.mime.unwrap_image(), &mut ms.reader)
325 }
326}
327
328impl<R: Read, S: Skip<R>> ParseOutput<R, S> for TrackInfo {
329 fn parse(parser: &mut MediaParser, mut ms: MediaSource<R, S>) -> crate::Result<Self> {
330 if !ms.has_track() {
331 return Err(crate::Error::ParseFailed("no track info here".into()));
332 }
333 let out = parser.load_and_parse::<R, S, _, _>(ms.reader.by_ref(), |data, _| {
334 parse_track_info(data, ms.mime.unwrap_video())
335 .map_err(|e| ParsingErrorState::new(e, None))
336 })?;
337 Ok(out)
338 }
339}
340
341pub struct MediaParser {
392 bb: Buffers,
393 buf: Option<Vec<u8>>,
394 position: usize,
395}
396
397impl Debug for MediaParser {
398 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
399 f.debug_struct("MediaParser")
400 .field("buffers", &self.bb)
401 .field("buf len", &self.buf.as_ref().map(|x| x.len()))
402 .field("position", &self.position)
403 .finish_non_exhaustive()
404 }
405}
406
407impl Default for MediaParser {
408 fn default() -> Self {
409 Self {
410 bb: Buffers::new(),
411 buf: None,
412 position: 0,
413 }
414 }
415}
416
417pub(crate) trait ShareBuf {
418 fn share_buf(&mut self, range: Range<usize>) -> PartialVec;
419}
420
421impl ShareBuf for MediaParser {
422 fn share_buf(&mut self, mut range: Range<usize>) -> PartialVec {
423 let buf = self.buf.take().unwrap();
424 let vec = self.bb.release_to_share(buf);
425 range.start += self.position;
426 range.end += self.position;
427 PartialVec::new(vec, range)
428 }
429}
430
431impl MediaParser {
432 pub fn new() -> Self {
433 Self::default()
434 }
435
436 pub fn parse<R: Read, S, O: ParseOutput<R, S>>(
456 &mut self,
457 mut ms: MediaSource<R, S>,
458 ) -> crate::Result<O> {
459 self.reset();
460 self.acquire_buf();
461
462 self.buf_mut().append(&mut ms.buf);
463 let res = self.do_parse(ms);
464
465 self.reset();
466 res
467 }
468
469 fn do_parse<R: Read, S, O: ParseOutput<R, S>>(
470 &mut self,
471 mut ms: MediaSource<R, S>,
472 ) -> Result<O, crate::Error> {
473 self.fill_buf(&mut ms.reader, INIT_BUF_SIZE)?;
474 let res = ParseOutput::parse(self, ms)?;
475 Ok(res)
476 }
477
478 fn reset(&mut self) {
479 if let Some(buf) = self.buf.take() {
481 self.bb.release(buf);
482 }
483
484 self.set_position(0);
486 }
487
488 pub(crate) fn buf(&self) -> &Vec<u8> {
489 match self.buf.as_ref() {
490 Some(b) => b,
491 None => panic!("no buf here"),
492 }
493 }
494
495 fn buf_mut(&mut self) -> &mut Vec<u8> {
496 match self.buf.as_mut() {
497 Some(b) => b,
498 None => panic!("no buf here"),
499 }
500 }
501
502 fn acquire_buf(&mut self) {
503 assert!(self.buf.is_none());
504 self.buf = Some(self.bb.acquire());
505 }
506}
507
508#[cfg(test)]
509mod tests {
510 use std::sync::{LazyLock, Mutex, MutexGuard};
511
512 use super::*;
513 use test_case::case;
514
515 enum TrackExif {
516 Track,
517 Exif,
518 NoData,
519 Invalid,
520 }
521 use TrackExif::*;
522
523 static PARSER: LazyLock<Mutex<MediaParser>> = LazyLock::new(|| Mutex::new(MediaParser::new()));
524 fn parser() -> MutexGuard<'static, MediaParser> {
525 PARSER.lock().unwrap()
526 }
527
528 #[case("3gp_640x360.3gp", Track)]
529 #[case("broken.jpg", Exif)]
530 #[case("compatible-brands-fail.heic", Invalid)]
531 #[case("compatible-brands-fail.mov", Invalid)]
532 #[case("compatible-brands.heic", NoData)]
533 #[case("compatible-brands.mov", NoData)]
534 #[case("embedded-in-heic.mov", Track)]
535 #[case("exif.heic", Exif)]
536 #[case("exif.jpg", Exif)]
537 #[case("exif-no-tz.jpg", Exif)]
538 #[case("fujifilm_x_t1_01.raf.meta", Exif)]
539 #[case("meta.mov", Track)]
540 #[case("meta.mp4", Track)]
541 #[case("mka.mka", Track)]
542 #[case("mkv_640x360.mkv", Track)]
543 #[case("exif-one-entry.heic", Exif)]
544 #[case("no-exif.jpg", NoData)]
545 #[case("tif.tif", Exif)]
546 #[case("ramdisk.img", Invalid)]
547 #[case("webm_480.webm", Track)]
548 fn parse_media(path: &str, te: TrackExif) {
549 let mut parser = parser();
550 let ms = MediaSource::file_path(Path::new("testdata").join(path));
551 match te {
552 Track => {
553 let ms = ms.unwrap();
554 assert!(ms.has_track());
556 let _: TrackInfo = parser.parse(ms).unwrap();
557 }
558 Exif => {
559 let ms = ms.unwrap();
560 assert!(ms.has_exif());
562 let mut it: ExifIter = parser.parse(ms).unwrap();
563 let _ = it.parse_gps_info();
564
565 if path.contains("one-entry") {
566 assert!(it.next().is_some());
567 assert!(it.next().is_none());
568
569 let exif: crate::Exif = it.clone_and_rewind().into();
570 assert!(exif.get(ExifTag::Orientation).is_some());
571 } else {
572 let _: crate::Exif = it.clone_and_rewind().into();
573 }
574 }
575 NoData => {
576 let ms = ms.unwrap();
577 if ms.has_exif() {
579 let res: Result<ExifIter, _> = parser.parse(ms);
580 res.unwrap_err();
581 } else if ms.has_track() {
582 let res: Result<TrackInfo, _> = parser.parse(ms);
583 res.unwrap_err();
584 }
585 }
586 Invalid => {
587 ms.unwrap_err();
588 }
589 }
590 }
591
592 use crate::testkit::open_sample;
593 use crate::{EntryValue, ExifTag, TrackInfoTag};
594 use chrono::DateTime;
595 use test_case::test_case;
596
597 use crate::video::TrackInfoTag::*;
598
599 #[test_case("mkv_640x360.mkv", ImageWidth, 640_u32.into())]
600 #[test_case("mkv_640x360.mkv", ImageHeight, 360_u32.into())]
601 #[test_case("mkv_640x360.mkv", DurationMs, 13346_u64.into())]
602 #[test_case("mkv_640x360.mkv", CreateDate, DateTime::parse_from_str("2008-08-08T08:08:08Z", "%+").unwrap().into())]
603 #[test_case("meta.mov", Make, "Apple".into())]
604 #[test_case("meta.mov", Model, "iPhone X".into())]
605 #[test_case("meta.mov", GpsIso6709, "+27.1281+100.2508+000.000/".into())]
606 #[test_case("meta.mp4", ImageWidth, 1920_u32.into())]
607 #[test_case("meta.mp4", ImageHeight, 1080_u32.into())]
608 #[test_case("meta.mp4", DurationMs, 1063_u64.into())]
609 #[test_case("meta.mp4", GpsIso6709, "+27.2939+112.6932/".into())]
610 #[test_case("meta.mp4", CreateDate, DateTime::parse_from_str("2024-02-03T07:05:38Z", "%+").unwrap().into())]
611 #[test_case("udta.auth.mp4", Author, "ReplayKitRecording".into(); "udta author")]
612 #[test_case("auth.mov", Author, "ReplayKitRecording".into(); "mov author")]
613 fn parse_track_info(path: &str, tag: TrackInfoTag, v: EntryValue) {
614 let mut parser = parser();
615
616 let mf = MediaSource::file(open_sample(path).unwrap()).unwrap();
617 let info: TrackInfo = parser.parse(mf).unwrap();
618 assert_eq!(info.get(tag).unwrap(), &v);
619
620 let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
621 let info: TrackInfo = parser.parse(mf).unwrap();
622 assert_eq!(info.get(tag).unwrap(), &v);
623 }
624
625 #[test_case("crash_moov-trak")]
626 #[test_case("crash_skip_large")]
627 #[test_case("crash_add_large")]
628 fn parse_track_crash(path: &str) {
629 let mut parser = parser();
630
631 let mf = MediaSource::file(open_sample(path).unwrap()).unwrap();
632 let _: TrackInfo = parser.parse(mf).unwrap_or_default();
633
634 let mf = MediaSource::unseekable(open_sample(path).unwrap()).unwrap();
635 let _: TrackInfo = parser.parse(mf).unwrap_or_default();
636 }
637}