1use std::collections::{HashMap, VecDeque};
9use std::convert::TryFrom;
10use std::io::{Seek, SeekFrom};
11
12use symphonia_core::audio::Layout;
13use symphonia_core::codecs::{CodecParameters, CODEC_TYPE_FLAC, CODEC_TYPE_VORBIS};
14use symphonia_core::errors::{
15 decode_error, end_of_stream_error, seek_error, unsupported_error, Error, Result, SeekErrorKind,
16};
17use symphonia_core::formats::{
18 Cue, FormatOptions, FormatReader, Packet, SeekMode, SeekTo, SeekedTo, Track,
19};
20use symphonia_core::io::{BufReader, MediaSource, MediaSourceStream, ReadBytes};
21use symphonia_core::meta::{Metadata, MetadataLog};
22use symphonia_core::probe::Instantiate;
23use symphonia_core::probe::{Descriptor, QueryDescriptor};
24use symphonia_core::sample::SampleFormat;
25use symphonia_core::support_format;
26use symphonia_core::units::TimeBase;
27use symphonia_utils_xiph::flac::metadata::{MetadataBlockHeader, MetadataBlockType};
28
29use crate::codecs::codec_id_to_type;
30use crate::ebml::{EbmlElement, ElementHeader, ElementIterator};
31use crate::element_ids::{ElementType, ELEMENTS};
32use crate::lacing::{extract_frames, read_xiph_sizes, Frame};
33use crate::segment::{
34 BlockGroupElement, ClusterElement, CuesElement, InfoElement, SeekHeadElement, TagsElement,
35 TracksElement,
36};
37
38#[allow(dead_code)]
39pub struct TrackState {
40 pub(crate) codec_params: CodecParameters,
42 track_num: u32,
44 pub(crate) default_frame_duration: Option<u64>,
46}
47
48pub struct MkvReader {
52 iter: ElementIterator<MediaSourceStream>,
54 tracks: Vec<Track>,
55 track_states: HashMap<u32, TrackState>,
56 current_cluster: Option<ClusterState>,
57 metadata: MetadataLog,
58 cues: Vec<Cue>,
59 frames: VecDeque<Frame>,
60 timestamp_scale: u64,
61 clusters: Vec<ClusterElement>,
62}
63
64#[derive(Debug)]
65struct ClusterState {
66 timestamp: Option<u64>,
67 end: Option<u64>,
68}
69
70fn vorbis_extra_data_from_codec_private(extra: &[u8]) -> Result<Box<[u8]>> {
71 const VORBIS_PACKET_TYPE_IDENTIFICATION: u8 = 1;
72 const VORBIS_PACKET_TYPE_SETUP: u8 = 5;
73
74 let mut reader = BufReader::new(extra);
83 let packet_count = reader.read_byte()? as usize;
84 let packet_lengths = read_xiph_sizes(&mut reader, packet_count)?;
85
86 let mut packets = Vec::new();
87 for length in packet_lengths {
88 packets.push(reader.read_boxed_slice_exact(length as usize)?);
89 }
90
91 let last_packet_length = extra.len() - reader.pos() as usize;
92 packets.push(reader.read_boxed_slice_exact(last_packet_length)?);
93
94 let mut ident_header = None;
95 let mut setup_header = None;
96
97 for packet in packets {
98 match packet.first().copied() {
99 Some(VORBIS_PACKET_TYPE_IDENTIFICATION) => {
100 ident_header = Some(packet);
101 }
102 Some(VORBIS_PACKET_TYPE_SETUP) => {
103 setup_header = Some(packet);
104 }
105 _ => {
106 log::debug!("unsupported vorbis packet type");
107 }
108 }
109 }
110
111 Ok([
113 ident_header.ok_or(Error::DecodeError("mkv: missing vorbis identification packet"))?,
114 setup_header.ok_or(Error::DecodeError("mkv: missing vorbis setup packet"))?,
115 ]
116 .concat()
117 .into_boxed_slice())
118}
119
120fn flac_extra_data_from_codec_private(codec_private: &[u8]) -> Result<Box<[u8]>> {
121 let mut reader = BufReader::new(codec_private);
122
123 let marker = reader.read_quad_bytes()?;
124 if marker != *b"fLaC" {
125 return decode_error("mkv (flac): missing flac stream marker");
126 }
127
128 let header = MetadataBlockHeader::read(&mut reader)?;
129
130 loop {
131 match header.block_type {
132 MetadataBlockType::StreamInfo => {
133 break Ok(reader.read_boxed_slice_exact(header.block_len as usize)?);
134 }
135 _ => reader.ignore_bytes(u64::from(header.block_len))?,
136 }
137 }
138}
139
140impl MkvReader {
141 fn seek_track_by_ts_forward(&mut self, track_id: u32, ts: u64) -> Result<SeekedTo> {
142 let actual_ts = 'out: loop {
143 while let Some(frame) = self.frames.front() {
145 if frame.timestamp + frame.duration >= ts && frame.track == track_id {
146 break 'out frame.timestamp;
147 }
148 else {
149 self.frames.pop_front();
150 }
151 }
152 self.next_element()?
153 };
154
155 Ok(SeekedTo { track_id, required_ts: ts, actual_ts })
156 }
157
158 fn seek_track_by_ts(&mut self, track_id: u32, ts: u64) -> Result<SeekedTo> {
159 if self.clusters.is_empty() {
160 self.seek_track_by_ts_forward(track_id, ts)
161 }
162 else {
163 let mut target_cluster = None;
164 for cluster in &self.clusters {
165 if cluster.timestamp > ts {
166 break;
167 }
168 target_cluster = Some(cluster);
169 }
170 let cluster = target_cluster.ok_or(Error::SeekError(SeekErrorKind::OutOfRange))?;
171
172 let mut target_block = None;
173 for block in cluster.blocks.iter() {
174 if block.track as u32 != track_id {
175 continue;
176 }
177 if block.timestamp > ts {
178 break;
179 }
180 target_block = Some(block);
181 }
182
183 let pos = match target_block {
184 Some(block) => block.pos,
185 None => cluster.pos,
186 };
187 self.iter.seek(pos)?;
188
189 self.current_cluster =
191 Some(ClusterState { timestamp: Some(cluster.timestamp), end: cluster.end });
192
193 self.seek_track_by_ts_forward(track_id, ts)
195 }
196 }
197
198 fn next_element(&mut self) -> Result<()> {
199 if let Some(ClusterState { end: Some(end), .. }) = &self.current_cluster {
200 if self.iter.pos() >= *end {
202 self.current_cluster = None;
204 }
205 }
206
207 let header = match self.iter.read_child_header()? {
211 Some(header) => header,
212 None => {
213 return end_of_stream_error();
215 }
216 };
217
218 match header.etype {
219 ElementType::Cluster => {
220 self.current_cluster = Some(ClusterState { timestamp: None, end: header.end() });
221 }
222 ElementType::Timestamp => match self.current_cluster.as_mut() {
223 Some(cluster) => {
224 cluster.timestamp = Some(self.iter.read_u64()?);
225 }
226 None => {
227 self.iter.ignore_data()?;
228 log::warn!("timestamp element outside of a cluster");
229 return Ok(());
230 }
231 },
232 ElementType::SimpleBlock => {
233 let cluster_ts = match self.current_cluster.as_ref() {
234 Some(ClusterState { timestamp: Some(ts), .. }) => *ts,
235 Some(_) => {
236 self.iter.ignore_data()?;
237 log::warn!("missing cluster timestamp");
238 return Ok(());
239 }
240 None => {
241 self.iter.ignore_data()?;
242 log::warn!("simple block element outside of a cluster");
243 return Ok(());
244 }
245 };
246
247 let data = self.iter.read_boxed_slice()?;
248 extract_frames(
249 &data,
250 None,
251 &self.track_states,
252 cluster_ts,
253 self.timestamp_scale,
254 &mut self.frames,
255 )?;
256 }
257 ElementType::BlockGroup => {
258 let cluster_ts = match self.current_cluster.as_ref() {
259 Some(ClusterState { timestamp: Some(ts), .. }) => *ts,
260 Some(_) => {
261 self.iter.ignore_data()?;
262 log::warn!("missing cluster timestamp");
263 return Ok(());
264 }
265 None => {
266 self.iter.ignore_data()?;
267 log::warn!("block group element outside of a cluster");
268 return Ok(());
269 }
270 };
271
272 let group = self.iter.read_element_data::<BlockGroupElement>()?;
273 extract_frames(
274 &group.data,
275 group.duration,
276 &self.track_states,
277 cluster_ts,
278 self.timestamp_scale,
279 &mut self.frames,
280 )?;
281 }
282 ElementType::Tags => {
283 let tags = self.iter.read_element_data::<TagsElement>()?;
284 self.metadata.push(tags.to_metadata());
285 self.current_cluster = None;
286 }
287 _ if header.etype.is_top_level() => {
288 self.current_cluster = None;
289 }
290 other => {
291 log::debug!("ignored element {:?}", other);
292 self.iter.ignore_data()?;
293 }
294 }
295
296 Ok(())
297 }
298}
299
300impl FormatReader for MkvReader {
301 fn try_new(mut reader: MediaSourceStream, _options: &FormatOptions) -> Result<Self>
302 where
303 Self: Sized,
304 {
305 let is_seekable = reader.is_seekable();
306
307 let total_len = if is_seekable {
309 let pos = reader.pos();
310 let len = reader.seek(SeekFrom::End(0))?;
311 reader.seek(SeekFrom::Start(pos))?;
312 log::info!("stream is seekable with len={} bytes.", len);
313 Some(len)
314 }
315 else {
316 None
317 };
318
319 let mut it = ElementIterator::new(reader, total_len);
320 let ebml = it.read_element::<EbmlElement>()?;
321
322 if !matches!(ebml.header.doc_type.as_str(), "matroska" | "webm") {
323 return unsupported_error("mkv: not a matroska / webm file");
324 }
325
326 let segment_pos = match it.read_child_header()? {
327 Some(ElementHeader { etype: ElementType::Segment, data_pos, .. }) => data_pos,
328 _ => return unsupported_error("mkv: missing segment element"),
329 };
330
331 let mut segment_tracks = None;
332 let mut info = None;
333 let mut clusters = Vec::new();
334 let mut metadata = MetadataLog::default();
335 let mut current_cluster = None;
336
337 let mut seek_positions = Vec::new();
338 while let Ok(Some(header)) = it.read_child_header() {
339 match header.etype {
340 ElementType::SeekHead => {
341 let seek_head = it.read_element_data::<SeekHeadElement>()?;
342 for element in seek_head.seeks.into_vec() {
343 let tag = element.id as u32;
344 let etype = match ELEMENTS.get(&tag) {
345 Some((_, etype)) => *etype,
346 None => continue,
347 };
348 seek_positions.push((etype, segment_pos + element.position));
349 }
350 }
351 ElementType::Tracks => {
352 segment_tracks = Some(it.read_element_data::<TracksElement>()?);
353 }
354 ElementType::Info => {
355 info = Some(it.read_element_data::<InfoElement>()?);
356 }
357 ElementType::Cues => {
358 let cues = it.read_element_data::<CuesElement>()?;
359 for cue in cues.points.into_vec() {
360 clusters.push(ClusterElement {
361 timestamp: cue.time,
362 pos: segment_pos + cue.positions.cluster_position,
363 end: None,
364 blocks: Box::new([]),
365 });
366 }
367 }
368 ElementType::Tags => {
369 let tags = it.read_element_data::<TagsElement>()?;
370 metadata.push(tags.to_metadata());
371 }
372 ElementType::Cluster => {
373 current_cluster = Some(ClusterState { timestamp: None, end: header.end() });
375
376 break;
379 }
380 other => {
381 it.ignore_data()?;
382 log::debug!("ignored element {:?}", other);
383 }
384 }
385 }
386
387 if is_seekable {
388 seek_positions.sort_by_key(|sp| sp.1);
390
391 for (etype, pos) in seek_positions {
392 it.seek(pos)?;
393
394 match etype {
398 ElementType::Tracks => {
399 segment_tracks = Some(it.read_element::<TracksElement>()?);
400 }
401 ElementType::Info => {
402 info = Some(it.read_element::<InfoElement>()?);
403 }
404 ElementType::Tags => {
405 let tags = it.read_element::<TagsElement>()?;
406 metadata.push(tags.to_metadata());
407 }
408 ElementType::Cues => {
409 let cues = it.read_element::<CuesElement>()?;
410 for cue in cues.points.into_vec() {
411 clusters.push(ClusterElement {
412 timestamp: cue.time,
413 pos: segment_pos + cue.positions.cluster_position,
414 end: None,
415 blocks: Box::new([]),
416 });
417 }
418 }
419 _ => (),
420 }
421 }
422 }
423
424 let segment_tracks =
425 segment_tracks.ok_or(Error::DecodeError("mkv: missing Tracks element"))?;
426
427 if is_seekable {
428 let mut reader = it.into_inner();
429 reader.seek(SeekFrom::Start(segment_pos))?;
430 it = ElementIterator::new(reader, total_len);
431 }
432
433 let info = info.ok_or(Error::DecodeError("mkv: missing Info element"))?;
434
435 let time_base = TimeBase::new(u32::try_from(info.timestamp_scale).unwrap(), 1_000_000_000);
437
438 let mut tracks = Vec::new();
439 let mut states = HashMap::new();
440 for track in segment_tracks.tracks.into_vec() {
441 let codec_type = codec_id_to_type(&track);
442
443 let mut codec_params = CodecParameters::new();
444 codec_params.with_time_base(time_base);
445
446 if let Some(duration) = info.duration {
447 codec_params.with_n_frames(duration as u64);
448 }
449
450 if let Some(audio) = track.audio {
451 codec_params.with_sample_rate(audio.sampling_frequency.round() as u32);
452
453 let format = audio.bit_depth.and_then(|bits| match bits {
454 8 => Some(SampleFormat::S8),
455 16 => Some(SampleFormat::S16),
456 24 => Some(SampleFormat::S24),
457 32 => Some(SampleFormat::S32),
458 _ => None,
459 });
460
461 if let Some(format) = format {
462 codec_params.with_sample_format(format);
463 }
464
465 if let Some(bits) = audio.bit_depth {
466 codec_params.with_bits_per_sample(bits as u32);
467 }
468
469 let layout = match audio.channels {
470 1 => Some(Layout::Mono),
471 2 => Some(Layout::Stereo),
472 3 => Some(Layout::TwoPointOne),
473 6 => Some(Layout::FivePointOne),
474 other => {
475 log::warn!(
476 "track #{} has custom number of channels: {}",
477 track.number,
478 other
479 );
480 None
481 }
482 };
483
484 if let Some(layout) = layout {
485 codec_params.with_channel_layout(layout);
486 }
487
488 if let Some(codec_type) = codec_type {
489 codec_params.for_codec(codec_type);
490 if let Some(codec_private) = track.codec_private {
491 let extra_data = match codec_type {
492 CODEC_TYPE_VORBIS => {
493 vorbis_extra_data_from_codec_private(&codec_private)?
494 }
495 CODEC_TYPE_FLAC => flac_extra_data_from_codec_private(&codec_private)?,
496 _ => codec_private,
497 };
498 codec_params.with_extra_data(extra_data);
499 }
500 }
501 }
502
503 let track_id = track.number as u32;
504 tracks.push(Track {
505 id: track_id,
506 codec_params: codec_params.clone(),
507 language: track.language,
508 });
509
510 states.insert(
511 track_id,
512 TrackState {
513 codec_params,
514 track_num: track_id,
515 default_frame_duration: track.default_duration,
516 },
517 );
518 }
519
520 Ok(Self {
521 iter: it,
522 tracks,
523 track_states: states,
524 current_cluster,
525 metadata,
526 cues: Vec::new(),
527 frames: VecDeque::new(),
528 timestamp_scale: info.timestamp_scale,
529 clusters,
530 })
531 }
532
533 fn cues(&self) -> &[Cue] {
534 &self.cues
535 }
536
537 fn metadata(&mut self) -> Metadata<'_> {
538 self.metadata.metadata()
539 }
540
541 fn seek(&mut self, _mode: SeekMode, to: SeekTo) -> Result<SeekedTo> {
542 if self.tracks.is_empty() {
543 return seek_error(SeekErrorKind::Unseekable);
544 }
545
546 match to {
547 SeekTo::Time { time, track_id } => {
548 let track = match track_id {
549 Some(id) => self.tracks.iter().find(|track| track.id == id),
550 None => self.tracks.first(),
551 };
552 let track = track.ok_or(Error::SeekError(SeekErrorKind::InvalidTrack))?;
553 let tb = track.codec_params.time_base.unwrap();
554 let ts = tb.calc_timestamp(time);
555 let track_id = track.id;
556 self.seek_track_by_ts(track_id, ts)
557 }
558 SeekTo::TimeStamp { ts, track_id } => {
559 match self.tracks.iter().find(|t| t.id == track_id) {
560 Some(_) => self.seek_track_by_ts(track_id, ts),
561 None => seek_error(SeekErrorKind::InvalidTrack),
562 }
563 }
564 }
565 }
566
567 fn tracks(&self) -> &[Track] {
568 &self.tracks
569 }
570
571 fn next_packet(&mut self) -> Result<Packet> {
572 loop {
573 if let Some(frame) = self.frames.pop_front() {
574 return Ok(Packet::new_from_boxed_slice(
575 frame.track,
576 frame.timestamp,
577 frame.duration,
578 frame.data,
579 ));
580 }
581 self.next_element()?;
582 }
583 }
584
585 fn into_inner(self: Box<Self>) -> MediaSourceStream {
586 self.iter.into_inner()
587 }
588}
589
590impl QueryDescriptor for MkvReader {
591 fn query() -> &'static [Descriptor] {
592 &[support_format!(
593 "matroska",
594 "Matroska / WebM",
595 &["webm", "mkv"],
596 &["video/webm", "video/x-matroska"],
597 &[b"\x1A\x45\xDF\xA3"] )]
599 }
600
601 fn score(_context: &[u8]) -> u8 {
602 255
603 }
604}