use std::io::SeekFrom;
use symphonia_core::errors::{decode_error, seek_error, Error, Result, SeekErrorKind};
use symphonia_core::io::{MediaSource, ReadBytes};
use symphonia_core::util::bits::sign_extend_leq64_to_i64;
use crate::element_ids::{ElementType, Type, ELEMENTS};
use crate::segment::EbmlHeaderElement;
#[allow(clippy::never_loop)]
pub(crate) fn read_tag<R: ReadBytes>(mut reader: R) -> Result<(u32, u32, bool)> {
loop {
let byte = reader.read_byte()?;
let remaining_octets = byte.leading_zeros();
if remaining_octets > 3 {
break;
}
let mut vint = u32::from(byte);
for _ in 0..remaining_octets {
let byte = reader.read_byte()?;
vint = (vint << 8) | u32::from(byte);
}
log::debug!("element with tag: {:X}", vint);
return Ok((vint, remaining_octets + 1, false));
}
let mut tag = 0u32;
loop {
let ty = ELEMENTS.get(&tag).map(|(_, ty)| ty).filter(|ty| ty.is_top_level());
if let Some(ty) = ty {
log::info!("found next supported tag {:08X} ({:?})", tag, ty);
return Ok((tag, 4, true));
}
tag = (tag << 8) | u32::from(reader.read_u8()?);
}
}
pub(crate) fn read_size<R: ReadBytes>(reader: R) -> Result<Option<u64>> {
let (size, len) = read_vint(reader)?;
if size == u64::MAX && len == 1 {
return Ok(None);
}
Ok(Some(size))
}
pub(crate) fn read_unsigned_vint<R: ReadBytes>(reader: R) -> Result<u64> {
Ok(read_vint(reader)?.0)
}
pub(crate) fn read_signed_vint<R: ReadBytes>(mut reader: R) -> Result<i64> {
let (value, len) = read_vint(&mut reader)?;
let half_range = i64::pow(2, (len * 7) - 1) - 1;
Ok(value as i64 - half_range)
}
fn read_vint<R: ReadBytes>(mut reader: R) -> Result<(u64, u32)> {
let byte = reader.read_byte()?;
if byte == 0xFF {
return Ok((u64::MAX, 1));
}
let vint_width = byte.leading_zeros();
let mut vint = u64::from(byte);
vint ^= 1 << (7 - vint_width);
for _ in 0..vint_width {
let byte = reader.read_byte()?;
vint = (vint << 8) | u64::from(byte);
}
Ok((vint, vint_width + 1))
}
#[cfg(test)]
mod tests {
use symphonia_core::io::BufReader;
use super::{read_signed_vint, read_tag, read_unsigned_vint};
#[test]
fn element_tag_parsing() {
assert_eq!(read_tag(BufReader::new(&[0x82])).unwrap(), (0x82, 1, false));
assert_eq!(read_tag(BufReader::new(&[0x40, 0x02])).unwrap(), (0x4002, 2, false));
assert_eq!(read_tag(BufReader::new(&[0x20, 0x00, 0x02])).unwrap(), (0x200002, 3, false));
assert_eq!(
read_tag(BufReader::new(&[0x10, 0x00, 0x00, 0x02])).unwrap(),
(0x10000002, 4, false)
);
}
#[test]
fn variable_unsigned_integer_parsing() {
assert_eq!(read_unsigned_vint(BufReader::new(&[0x82])).unwrap(), 2);
assert_eq!(read_unsigned_vint(BufReader::new(&[0x40, 0x02])).unwrap(), 2);
assert_eq!(read_unsigned_vint(BufReader::new(&[0x20, 0x00, 0x02])).unwrap(), 2);
assert_eq!(read_unsigned_vint(BufReader::new(&[0x10, 0x00, 0x00, 0x02])).unwrap(), 2);
assert_eq!(read_unsigned_vint(BufReader::new(&[0x08, 0x00, 0x00, 0x00, 0x02])).unwrap(), 2);
assert_eq!(
read_unsigned_vint(BufReader::new(&[0x04, 0x00, 0x00, 0x00, 0x00, 0x02])).unwrap(),
2
);
assert_eq!(
read_unsigned_vint(BufReader::new(&[0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02]))
.unwrap(),
2
);
assert_eq!(
read_unsigned_vint(BufReader::new(&[0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02]))
.unwrap(),
2
);
}
#[test]
fn variable_signed_integer_parsing() {
assert_eq!(read_signed_vint(BufReader::new(&[0x80])).unwrap(), -63);
assert_eq!(read_signed_vint(BufReader::new(&[0x40, 0x00])).unwrap(), -8191);
}
}
#[derive(Copy, Clone, Debug)]
pub struct ElementHeader {
pub tag: u32,
pub etype: ElementType,
pub pos: u64,
pub len: u64,
pub data_pos: u64,
pub data_len: u64,
}
impl ElementHeader {
pub(crate) fn children<R: ReadBytes>(&self, reader: R) -> ElementIterator<R> {
assert_eq!(reader.pos(), self.data_pos, "unexpected position");
ElementIterator::new_of(reader, *self)
}
pub(crate) fn end(&self) -> Option<u64> {
if self.data_len == 0 {
None
}
else {
Some(self.data_pos + self.data_len)
}
}
}
pub trait Element: Sized {
const ID: ElementType;
fn read<B: ReadBytes>(reader: &mut B, header: ElementHeader) -> Result<Self>;
}
impl ElementHeader {
pub(crate) fn read<R: ReadBytes>(mut reader: &mut R) -> Result<(ElementHeader, bool)> {
let (tag, tag_len, reset) = read_tag(&mut reader)?;
let header_start = reader.pos() - u64::from(tag_len);
let size = read_size(&mut reader)?.unwrap_or(0);
Ok((
ElementHeader {
tag,
etype: ELEMENTS.get(&tag).map_or(ElementType::Unknown, |(_, etype)| *etype),
pos: header_start,
len: reader.pos() - header_start + size,
data_len: size,
data_pos: reader.pos(),
},
reset,
))
}
}
#[derive(Debug)]
pub(crate) struct EbmlElement {
pub(crate) header: EbmlHeaderElement,
}
impl Element for EbmlElement {
const ID: ElementType = ElementType::Ebml;
fn read<B: ReadBytes>(reader: &mut B, header: ElementHeader) -> Result<Self> {
let mut it = header.children(reader);
Ok(Self { header: it.read_element_data::<EbmlHeaderElement>()? })
}
}
pub(crate) struct ElementIterator<R: ReadBytes> {
reader: R,
current: Option<ElementHeader>,
next_pos: u64,
end: Option<u64>,
}
impl<R: ReadBytes> ElementIterator<R> {
pub(crate) fn new(reader: R, end: Option<u64>) -> Self {
let pos = reader.pos();
Self::new_at(reader, pos, end)
}
fn new_at(reader: R, start: u64, end: Option<u64>) -> Self {
Self { reader, current: None, next_pos: start, end }
}
fn new_of(reader: R, parent: ElementHeader) -> Self {
Self { reader, current: Some(parent), next_pos: parent.data_pos, end: parent.end() }
}
pub(crate) fn seek(&mut self, pos: u64) -> Result<()>
where
R: MediaSource,
{
let current_pos = self.pos();
self.current = None;
if self.reader.is_seekable() {
self.reader.seek(SeekFrom::Start(pos))?;
}
else if pos < current_pos {
return seek_error(SeekErrorKind::ForwardOnly);
}
else {
self.reader.ignore_bytes(pos - current_pos)?;
}
self.next_pos = pos;
Ok(())
}
pub(crate) fn into_inner(self) -> R {
self.reader
}
pub(crate) fn read_header(&mut self) -> Result<Option<ElementHeader>> {
let header = self.read_header_no_consume()?;
if let Some(header) = &header {
self.next_pos += header.len;
}
Ok(header)
}
pub(crate) fn read_child_header(&mut self) -> Result<Option<ElementHeader>> {
let header = self.read_header_no_consume()?;
if let Some(header) = &header {
match ELEMENTS.get(&header.tag).map(|it| it.0) {
Some(Type::Master) => {
self.next_pos = header.data_pos;
}
_ => {
self.next_pos += header.len;
}
}
}
Ok(header)
}
fn read_header_no_consume(&mut self) -> Result<Option<ElementHeader>> {
let pos = self.reader.pos();
if pos < self.next_pos {
self.reader.ignore_bytes(self.next_pos - pos)?;
}
assert_eq!(self.next_pos, self.reader.pos(), "invalid position");
if self.reader.pos() < self.end.unwrap_or(u64::MAX) {
let (header, reset) = ElementHeader::read(&mut self.reader)?;
if reset {
self.next_pos = self.reader.pos();
}
self.current = Some(header);
return Ok(Some(header));
}
Ok(None)
}
pub(crate) fn read_element<E: Element>(&mut self) -> Result<E> {
let _header = self.read_header()?;
self.read_element_data()
}
pub(crate) fn read_element_data<E: Element>(&mut self) -> Result<E> {
let header = self.current.expect("EBML header must be read before calling this function");
assert_eq!(
header.etype,
E::ID,
"EBML element type must be checked before calling this function"
);
let element = E::read(&mut self.reader, header)?;
self.next_pos = self.reader.pos();
Ok(element)
}
pub(crate) fn read_elements<E: Element>(&mut self) -> Result<Box<[E]>> {
let mut elements = vec![];
while let Some(header) = self.read_header()? {
if header.etype == ElementType::Crc32 {
continue;
}
if header.etype != E::ID {
log::warn!("found element with invalid type {:?}", header);
self.ignore_data()?;
continue;
}
elements.push(E::read(&mut self.reader, header)?);
}
Ok(elements.into_boxed_slice())
}
pub(crate) fn read_data(&mut self) -> Result<ElementData> {
let hdr = self.current.expect("not in an element");
let value = self
.try_read_data(hdr)?
.ok_or(Error::DecodeError("mkv: element has no primitive data"))?;
Ok(value)
}
pub(crate) fn read_u64(&mut self) -> Result<u64> {
match self.read_data()? {
ElementData::UnsignedInt(s) => Ok(s),
_ => Err(Error::DecodeError("mkv: expected an unsigned int")),
}
}
pub(crate) fn read_f64(&mut self) -> Result<f64> {
match self.read_data()? {
ElementData::Float(s) => Ok(s),
_ => Err(Error::DecodeError("mkv: expected a float")),
}
}
pub(crate) fn read_string(&mut self) -> Result<String> {
match self.read_data()? {
ElementData::String(s) => Ok(s),
_ => Err(Error::DecodeError("mkv: expected a string")),
}
}
pub(crate) fn read_boxed_slice(&mut self) -> Result<Box<[u8]>> {
match self.read_data()? {
ElementData::Binary(b) => Ok(b),
_ => Err(Error::DecodeError("mkv: expected binary data")),
}
}
pub(crate) fn try_read_data(&mut self, header: ElementHeader) -> Result<Option<ElementData>> {
Ok(match ELEMENTS.get(&header.tag) {
Some((ty, _)) => {
assert_eq!(header.data_pos, self.reader.pos(), "invalid stream position");
if let (Some(cur), Some(end)) = (self.current, self.end) {
if cur.pos + cur.len > end {
log::debug!("reading element data {:?}; parent end={}", cur, end);
return decode_error(
"mkv: attempt to read element data past master element ",
);
}
}
Some(match ty {
Type::Master => {
return Ok(None);
}
Type::Unsigned => {
if header.data_len > 8 {
self.ignore_data()?;
return decode_error("mkv: invalid unsigned integer length");
}
let mut buff = [0u8; 8];
let offset = 8 - header.data_len as usize;
self.reader.read_buf_exact(&mut buff[offset..])?;
let value = u64::from_be_bytes(buff);
ElementData::UnsignedInt(value)
}
Type::Signed | Type::Date => {
if header.data_len > 8 {
self.ignore_data()?;
return decode_error("mkv: invalid signed integer length");
}
let len = header.data_len as usize;
let mut buff = [0u8; 8];
self.reader.read_buf_exact(&mut buff[8 - len..])?;
let value = u64::from_be_bytes(buff);
let value = sign_extend_leq64_to_i64(value, (len as u32) * 8);
match ty {
Type::Signed => ElementData::SignedInt(value),
Type::Date => ElementData::Date(value),
_ => unreachable!(),
}
}
Type::Float => {
let value = match header.data_len {
0 => 0.0,
4 => self.reader.read_be_f32()? as f64,
8 => self.reader.read_be_f64()?,
_ => {
self.ignore_data()?;
return Err(Error::DecodeError("mkv: invalid float length"));
}
};
ElementData::Float(value)
}
Type::String => {
let data = self.reader.read_boxed_slice_exact(header.data_len as usize)?;
let bytes = data.split(|b| *b == 0).next().unwrap_or(&data);
ElementData::String(String::from_utf8_lossy(bytes).into_owned())
}
Type::Binary => ElementData::Binary(
self.reader.read_boxed_slice_exact(header.data_len as usize)?,
),
})
}
None => None,
})
}
pub(crate) fn ignore_data(&mut self) -> Result<()> {
if let Some(header) = self.current {
log::debug!("ignoring data of {:?} element", header.etype);
self.reader.ignore_bytes(header.data_len)?;
self.next_pos = header.data_pos + header.data_len;
}
Ok(())
}
pub(crate) fn pos(&self) -> u64 {
self.reader.pos()
}
}
#[derive(Clone, Debug)]
pub(crate) enum ElementData {
Binary(Box<[u8]>),
Float(f64),
SignedInt(i64),
String(String),
UnsignedInt(u64),
Date(i64),
}