#![allow(dead_code)]
use bitflags::bitflags;
use thiserror::Error;
pub const EBML_ID_TIMECODE: u32 = 0xE7;
pub const EBML_ID_POSITION: u32 = 0xA7;
pub const EBML_ID_PREV_SIZE: u32 = 0xAB;
pub const EBML_ID_SIMPLE_BLOCK: u32 = 0xA3;
pub const EBML_ID_BLOCK_GROUP: u32 = 0xA0;
pub const EBML_ID_BLOCK: u32 = 0xA1;
pub const EBML_ID_BLOCK_DURATION: u32 = 0x9B;
pub const EBML_ID_REFERENCE_BLOCK: u32 = 0xFB;
pub const EBML_ID_BLOCK_ADDITIONS: u32 = 0x75A1;
const VINT_UNKNOWN_1: u64 = 0x7F;
const VINT_UNKNOWN_2: u64 = 0x3FFF;
const VINT_UNKNOWN_3: u64 = 0x1F_FFFF;
const VINT_UNKNOWN_4: u64 = 0x0FFF_FFFF;
const VINT_UNKNOWN_5: u64 = 0x07_FFFF_FFFF;
const VINT_UNKNOWN_6: u64 = 0x03_FFFF_FFFF_FFFF;
const VINT_UNKNOWN_7: u64 = 0x01_FFFF_FFFF_FFFF_FF;
const VINT_UNKNOWN_8: u64 = 0x00FF_FFFF_FFFF_FFFF;
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum ClusterError {
#[error("unexpected end of data (need {need} bytes, have {have})")]
UnexpectedEof {
need: usize,
have: usize,
},
#[error("invalid EBML vint: leading byte is 0x00")]
InvalidVint,
#[error("block track number must be ≥ 1")]
TrackNumberZero,
#[error("invalid lace data: {reason}")]
InvalidLace {
reason: &'static str,
},
#[error("fixed lace: data length {data_len} is not divisible by frame count {frame_count}")]
FixedLaceNotDivisible {
data_len: usize,
frame_count: usize,
},
#[error("timecode overflow: cluster={cluster}, block={block}")]
TimecodeOverflow {
cluster: u64,
block: i16,
},
#[error("unknown cluster element ID: 0x{id:X}")]
UnknownElementId {
id: u32,
},
#[error("malformed element size vint at offset {offset}")]
MalformedSize {
offset: usize,
},
#[error("lace frame count must be ≥ 1")]
ZeroFrameCount,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum LaceType {
None,
Xiph,
Fixed,
Ebml,
}
impl LaceType {
#[must_use]
pub const fn from_flags_byte(flags: u8) -> Self {
match (flags >> 1) & 0b11 {
0b00 => Self::None,
0b01 => Self::Fixed,
0b10 => Self::Xiph,
_ => Self::Ebml, }
}
}
bitflags! {
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
pub struct BlockFlags: u8 {
const KEYFRAME = 0b1000_0000;
const INVISIBLE = 0b0000_1000;
const DISCARDABLE = 0b0000_0001;
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SimpleBlock {
pub track_number: u64,
pub timecode: i16,
pub flags: BlockFlags,
pub lace_type: LaceType,
pub frame_sizes: Vec<usize>,
pub data_offset: usize,
pub data_len: usize,
}
impl SimpleBlock {
pub fn absolute_timecode(&self, cluster_timecode: u64) -> Result<u64, ClusterError> {
compute_absolute_timecode(cluster_timecode, self.timecode)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BlockGroupEntry {
ReferenceBlock(i64),
BlockDuration(u64),
BlockAdditions(Vec<u8>),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BlockGroup {
pub block: SimpleBlock,
pub duration: Option<u64>,
pub reference_blocks: Vec<i64>,
pub additions: Vec<u8>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ClusterHeader {
pub timecode: u64,
pub position: Option<u64>,
pub prev_size: Option<u64>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ClusterBlock {
Simple(SimpleBlock),
Group(BlockGroup),
}
impl ClusterBlock {
#[must_use]
pub fn block(&self) -> &SimpleBlock {
match self {
Self::Simple(b) => b,
Self::Group(g) => &g.block,
}
}
pub fn absolute_timecode(&self, cluster_timecode: u64) -> Result<u64, ClusterError> {
self.block().absolute_timecode(cluster_timecode)
}
}
pub fn parse_vint(data: &[u8]) -> Result<(u64, usize), ClusterError> {
let first = data
.first()
.copied()
.ok_or(ClusterError::UnexpectedEof { need: 1, have: 0 })?;
let width = first.leading_zeros() as usize + 1;
if width > 8 {
return Err(ClusterError::InvalidVint);
}
if data.len() < width {
return Err(ClusterError::UnexpectedEof {
need: width,
have: data.len(),
});
}
let mask = 0xFF_u8 >> width;
let mut value = u64::from(first & mask);
for &byte in &data[1..width] {
value = (value << 8) | u64::from(byte);
}
Ok((value, width))
}
pub fn parse_vint_signed(data: &[u8]) -> Result<(i64, usize), ClusterError> {
let (raw, width) = parse_vint(data)?;
let bias: u64 = (1_u64 << (7 * width - 1)).saturating_sub(1);
let signed = (raw as i64) - (bias as i64);
Ok((signed, width))
}
#[must_use]
pub fn parse_block_flags(byte: u8) -> (BlockFlags, LaceType) {
let mut flags = BlockFlags::empty();
if byte & 0x80 != 0 {
flags |= BlockFlags::KEYFRAME;
}
if byte & 0x08 != 0 {
flags |= BlockFlags::INVISIBLE;
}
if byte & 0x01 != 0 {
flags |= BlockFlags::DISCARDABLE;
}
let lace_type = LaceType::from_flags_byte(byte);
(flags, lace_type)
}
pub fn decode_xiph_lace_sizes(
data: &[u8],
frame_count: usize,
total_data: usize,
) -> Result<(Vec<usize>, usize), ClusterError> {
if frame_count == 0 {
return Err(ClusterError::ZeroFrameCount);
}
if frame_count == 1 {
return Ok((vec![total_data], 0));
}
let mut sizes = Vec::with_capacity(frame_count);
let mut pos = 0usize;
let explicit_count = frame_count - 1;
for _ in 0..explicit_count {
let mut size: usize = 0;
loop {
let byte = data.get(pos).copied().ok_or(ClusterError::UnexpectedEof {
need: pos + 1,
have: data.len(),
})?;
pos += 1;
size = size
.checked_add(byte as usize)
.ok_or(ClusterError::InvalidLace {
reason: "Xiph frame size overflowed usize",
})?;
if byte < 255 {
break;
}
}
sizes.push(size);
}
let explicit_total: usize = sizes.iter().sum();
let last_size = total_data
.checked_sub(explicit_total)
.ok_or(ClusterError::InvalidLace {
reason: "Xiph explicit frame sizes exceed total data length",
})?;
sizes.push(last_size);
Ok((sizes, pos))
}
pub fn decode_ebml_lace_sizes(
data: &[u8],
frame_count: usize,
total_data: usize,
) -> Result<(Vec<usize>, usize), ClusterError> {
if frame_count == 0 {
return Err(ClusterError::ZeroFrameCount);
}
if frame_count == 1 {
return Ok((vec![total_data], 0));
}
let mut sizes = Vec::with_capacity(frame_count);
let mut pos = 0usize;
let (first_size_raw, w) = parse_vint(&data[pos..]).map_err(|_| ClusterError::InvalidLace {
reason: "EBML lace: cannot parse first frame size vint",
})?;
pos += w;
let first_size = first_size_raw as usize;
sizes.push(first_size);
let mut prev_size = first_size as i64;
for _ in 1..(frame_count - 1) {
let (delta, w) =
parse_vint_signed(&data[pos..]).map_err(|_| ClusterError::InvalidLace {
reason: "EBML lace: cannot parse frame size delta vint",
})?;
pos += w;
let cur_size = prev_size + delta;
if cur_size < 0 {
return Err(ClusterError::InvalidLace {
reason: "EBML lace: frame size became negative after delta",
});
}
sizes.push(cur_size as usize);
prev_size = cur_size;
}
let explicit_total: usize = sizes.iter().sum();
let last_size = total_data
.checked_sub(explicit_total)
.ok_or(ClusterError::InvalidLace {
reason: "EBML lace: explicit frame sizes exceed total data length",
})?;
sizes.push(last_size);
Ok((sizes, pos))
}
pub fn decode_fixed_lace_sizes(
frame_count: usize,
total_data: usize,
) -> Result<Vec<usize>, ClusterError> {
if frame_count == 0 {
return Err(ClusterError::ZeroFrameCount);
}
if total_data % frame_count != 0 {
return Err(ClusterError::FixedLaceNotDivisible {
data_len: total_data,
frame_count,
});
}
let frame_size = total_data / frame_count;
Ok(vec![frame_size; frame_count])
}
pub fn parse_simple_block(data: &[u8]) -> Result<SimpleBlock, ClusterError> {
let mut pos = 0usize;
let (track_number, w) = parse_vint(data)?;
pos += w;
if track_number == 0 {
return Err(ClusterError::TrackNumberZero);
}
if data.len() < pos + 2 {
return Err(ClusterError::UnexpectedEof {
need: pos + 2,
have: data.len(),
});
}
let timecode = i16::from_be_bytes([data[pos], data[pos + 1]]);
pos += 2;
let flags_byte = *data.get(pos).ok_or(ClusterError::UnexpectedEof {
need: pos + 1,
have: data.len(),
})?;
pos += 1;
let (flags, lace_type) = parse_block_flags(flags_byte);
let payload_total = data.len() - pos;
let (frame_sizes, data_offset) = match lace_type {
LaceType::None => (vec![payload_total], pos),
LaceType::Xiph | LaceType::Ebml | LaceType::Fixed => {
let lace_count_byte = *data.get(pos).ok_or(ClusterError::UnexpectedEof {
need: pos + 1,
have: data.len(),
})?;
pos += 1;
let frame_count = lace_count_byte as usize + 1;
let lace_and_payload = &data[pos..];
match lace_type {
LaceType::Xiph => {
let (sizes, hdr_len) =
decode_xiph_lace_sizes(lace_and_payload, frame_count, payload_total - 1)?;
(sizes, pos + hdr_len)
}
LaceType::Ebml => {
let payload_bytes = payload_total - 1; let (sizes, hdr_len) =
decode_ebml_lace_sizes(lace_and_payload, frame_count, payload_bytes)?;
(sizes, pos + hdr_len)
}
LaceType::Fixed => {
let payload_bytes = payload_total - 1;
let sizes = decode_fixed_lace_sizes(frame_count, payload_bytes)?;
(sizes, pos)
}
LaceType::None => unreachable!(),
}
}
};
let data_len = data.len() - data_offset;
Ok(SimpleBlock {
track_number,
timecode,
flags,
lace_type,
frame_sizes,
data_offset,
data_len,
})
}
fn parse_element_id(data: &[u8], pos: usize) -> Result<(u32, usize), ClusterError> {
let slice = data.get(pos..).ok_or(ClusterError::UnexpectedEof {
need: pos + 1,
have: data.len(),
})?;
let first = slice
.first()
.copied()
.ok_or(ClusterError::UnexpectedEof { need: 1, have: 0 })?;
let width = first.leading_zeros() as usize + 1;
if width > 4 {
return Err(ClusterError::MalformedSize { offset: pos });
}
if slice.len() < width {
return Err(ClusterError::UnexpectedEof {
need: pos + width,
have: data.len(),
});
}
let mut id = u32::from(first);
for &b in &slice[1..width] {
id = (id << 8) | u32::from(b);
}
Ok((id, width))
}
fn parse_element_size(data: &[u8], pos: usize) -> Result<(u64, usize), ClusterError> {
let slice = data.get(pos..).ok_or(ClusterError::UnexpectedEof {
need: pos + 1,
have: data.len(),
})?;
parse_vint(slice).map_err(|_| ClusterError::MalformedSize { offset: pos })
}
fn read_uint_be(data: &[u8], width: usize) -> Result<u64, ClusterError> {
if data.len() < width {
return Err(ClusterError::UnexpectedEof {
need: width,
have: data.len(),
});
}
let mut v = 0u64;
for &b in &data[..width] {
v = (v << 8) | u64::from(b);
}
Ok(v)
}
fn read_sint_be(data: &[u8], width: usize) -> Result<i64, ClusterError> {
let raw = read_uint_be(data, width)?;
let shift = (8 - width) * 8;
Ok(((raw << shift) as i64) >> shift)
}
pub fn parse_cluster_header(data: &[u8]) -> Result<ClusterHeader, ClusterError> {
let mut timecode: Option<u64> = None;
let mut position: Option<u64> = None;
let mut prev_size: Option<u64> = None;
let mut pos = 0usize;
while pos < data.len() {
let (id, id_len) = parse_element_id(data, pos)?;
pos += id_len;
let (elem_size, size_len) = parse_element_size(data, pos)?;
pos += size_len;
let elem_size = elem_size as usize;
if pos + elem_size > data.len() {
return Err(ClusterError::UnexpectedEof {
need: pos + elem_size,
have: data.len(),
});
}
let elem_data = &data[pos..pos + elem_size];
match id {
id if id == EBML_ID_TIMECODE => {
timecode = Some(read_uint_be(elem_data, elem_size)?);
}
id if id == EBML_ID_POSITION => {
position = Some(read_uint_be(elem_data, elem_size)?);
}
id if id == EBML_ID_PREV_SIZE => {
prev_size = Some(read_uint_be(elem_data, elem_size)?);
}
id if id == EBML_ID_SIMPLE_BLOCK
|| id == EBML_ID_BLOCK_GROUP
|| id == EBML_ID_BLOCK =>
{
break;
}
_ => {
}
}
pos += elem_size;
if timecode.is_some() && position.is_some() && prev_size.is_some() {
break;
}
}
let timecode = timecode.ok_or(ClusterError::MalformedSize { offset: 0 })?;
Ok(ClusterHeader {
timecode,
position,
prev_size,
})
}
pub fn compute_absolute_timecode(
cluster_timecode: u64,
block_timecode: i16,
) -> Result<u64, ClusterError> {
if block_timecode >= 0 {
cluster_timecode
.checked_add(block_timecode as u64)
.ok_or(ClusterError::TimecodeOverflow {
cluster: cluster_timecode,
block: block_timecode,
})
} else {
let abs_delta = block_timecode.unsigned_abs() as u64;
cluster_timecode
.checked_sub(abs_delta)
.ok_or(ClusterError::TimecodeOverflow {
cluster: cluster_timecode,
block: block_timecode,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_vint_1byte() {
let (val, width) = parse_vint(&[0x85]).unwrap();
assert_eq!(width, 1);
assert_eq!(val, 5);
}
#[test]
fn test_parse_vint_2byte() {
let (val, width) = parse_vint(&[0x40, 0x05]).unwrap();
assert_eq!(width, 2);
assert_eq!(val, 5);
}
#[test]
fn test_parse_vint_max_1byte() {
let (val, width) = parse_vint(&[0xFF]).unwrap();
assert_eq!(width, 1);
assert_eq!(val, VINT_UNKNOWN_1);
}
#[test]
fn test_parse_block_flags_keyframe_no_lace() {
let (flags, lace) = parse_block_flags(0x80);
assert!(flags.contains(BlockFlags::KEYFRAME));
assert!(!flags.contains(BlockFlags::INVISIBLE));
assert!(!flags.contains(BlockFlags::DISCARDABLE));
assert_eq!(lace, LaceType::None);
}
#[test]
fn test_parse_block_flags_xiph_discardable() {
let (flags, lace) = parse_block_flags(0x05);
assert!(!flags.contains(BlockFlags::KEYFRAME));
assert!(flags.contains(BlockFlags::DISCARDABLE));
assert_eq!(lace, LaceType::Xiph);
}
#[test]
fn test_decode_xiph_lace_sizes_three_frames() {
let header = &[100u8, 200u8];
let total_data = 400usize;
let (sizes, hdr_consumed) = decode_xiph_lace_sizes(header, 3, total_data).unwrap();
assert_eq!(hdr_consumed, 2); assert_eq!(sizes, vec![100, 200, 100]);
}
#[test]
fn test_decode_fixed_lace_sizes_success() {
let sizes = decode_fixed_lace_sizes(4, 400).unwrap();
assert_eq!(sizes, vec![100, 100, 100, 100]);
}
#[test]
fn test_decode_fixed_lace_sizes_not_divisible() {
let err = decode_fixed_lace_sizes(3, 100).unwrap_err();
assert!(matches!(
err,
ClusterError::FixedLaceNotDivisible {
data_len: 100,
frame_count: 3
}
));
}
#[test]
fn test_decode_fixed_lace_sizes_zero_count() {
let err = decode_fixed_lace_sizes(0, 100).unwrap_err();
assert_eq!(err, ClusterError::ZeroFrameCount);
}
#[test]
fn test_parse_simple_block_no_lace() {
let raw: &[u8] = &[0x81, 0x00, 0x00, 0x80, 0xDE, 0xAD, 0xBE, 0xEF];
let block = parse_simple_block(raw).unwrap();
assert_eq!(block.track_number, 1);
assert_eq!(block.timecode, 0);
assert!(block.flags.contains(BlockFlags::KEYFRAME));
assert_eq!(block.lace_type, LaceType::None);
assert_eq!(block.frame_sizes, vec![4]);
assert_eq!(block.data_offset, 4); assert_eq!(block.data_len, 4);
}
#[test]
fn test_absolute_timecode_overflow() {
let err = compute_absolute_timecode(u64::MAX, 1).unwrap_err();
assert!(matches!(err, ClusterError::TimecodeOverflow { .. }));
}
#[test]
fn test_absolute_timecode_underflow() {
let err = compute_absolute_timecode(5, -10).unwrap_err();
assert!(matches!(err, ClusterError::TimecodeOverflow { .. }));
}
#[test]
fn test_absolute_timecode_ok_positive() {
let abs = compute_absolute_timecode(1000, 42).unwrap();
assert_eq!(abs, 1042);
}
#[test]
fn test_absolute_timecode_ok_negative() {
let abs = compute_absolute_timecode(1000, -100).unwrap();
assert_eq!(abs, 900);
}
#[test]
fn test_parse_cluster_header_timecode_only() {
let data: &[u8] = &[0xE7, 0x82, 0x00, 0x64];
let header = parse_cluster_header(data).unwrap();
assert_eq!(header.timecode, 100);
assert!(header.position.is_none());
assert!(header.prev_size.is_none());
}
#[test]
fn test_parse_cluster_header_with_position() {
let data: &[u8] = &[
0xE7, 0x82, 0x00, 0xC8, 0xA7, 0x82, 0x10, 0x00, ];
let header = parse_cluster_header(data).unwrap();
assert_eq!(header.timecode, 200);
assert_eq!(header.position, Some(4096));
assert!(header.prev_size.is_none());
}
#[test]
fn test_parse_vint_3byte() {
let (val, width) = parse_vint(&[0x20, 0x00, 0x10]).unwrap();
assert_eq!(width, 3);
assert_eq!(val, 16);
}
#[test]
fn test_parse_vint_eof() {
let err = parse_vint(&[0x40]).unwrap_err();
assert!(matches!(
err,
ClusterError::UnexpectedEof { need: 2, have: 1 }
));
}
#[test]
fn test_parse_vint_signed_zero() {
let (val, w) = parse_vint_signed(&[0xBF]).unwrap();
assert_eq!(w, 1);
assert_eq!(val, 0);
}
#[test]
fn test_cluster_block_absolute_timecode() {
let block = SimpleBlock {
track_number: 1,
timecode: 50,
flags: BlockFlags::KEYFRAME,
lace_type: LaceType::None,
frame_sizes: vec![10],
data_offset: 4,
data_len: 10,
};
let cb = ClusterBlock::Simple(block);
assert_eq!(cb.absolute_timecode(1000).unwrap(), 1050);
}
#[test]
fn test_parse_simple_block_track_zero() {
let raw: &[u8] = &[0x80, 0x00, 0x00, 0x00, 0x01, 0x02];
let err = parse_simple_block(raw).unwrap_err();
assert_eq!(err, ClusterError::TrackNumberZero);
}
#[test]
fn test_decode_ebml_lace_sizes_equal() {
let header = &[0xE4, 0xBF];
let (sizes, hdr_len) = decode_ebml_lace_sizes(header, 3, 300).unwrap();
assert_eq!(hdr_len, 2);
assert_eq!(sizes, vec![100, 100, 100]);
}
#[test]
fn test_lace_type_from_flags() {
assert_eq!(LaceType::from_flags_byte(0x00), LaceType::None);
assert_eq!(LaceType::from_flags_byte(0x02), LaceType::Fixed);
assert_eq!(LaceType::from_flags_byte(0x04), LaceType::Xiph);
assert_eq!(LaceType::from_flags_byte(0x06), LaceType::Ebml);
}
}