use crate::vp8l_stream::{BitReader, BitReaderEof};
pub const NUM_CODE_LENGTH_CODES: usize = 19;
pub const CODE_LENGTH_CODE_ORDER: [usize; NUM_CODE_LENGTH_CODES] = [
17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
];
pub const MAX_CODE_LENGTH: usize = 15;
const LOOKUP_BITS: usize = 8;
const LOOKUP_SIZE: usize = 1 << LOOKUP_BITS;
const MIN_LOOKUP_USED: usize = 32;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PrefixError {
Eof(BitReaderEof),
SymbolOutOfRange {
symbol: usize,
alphabet_size: usize,
},
MaxSymbolTooLarge {
max_symbol: usize,
alphabet_size: usize,
},
LengthRunOverflow,
OverSubscribed,
Incomplete,
LengthTooLarge {
length: usize,
},
NoMatchingSymbol,
}
impl From<BitReaderEof> for PrefixError {
fn from(e: BitReaderEof) -> Self {
Self::Eof(e)
}
}
impl core::fmt::Display for PrefixError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Self::Eof(e) => write!(f, "VP8L §6.2.1 prefix code: {e}"),
Self::SymbolOutOfRange {
symbol,
alphabet_size,
} => write!(
f,
"VP8L §6.2.1 prefix code: symbol {symbol} out of range for alphabet size {alphabet_size}"
),
Self::MaxSymbolTooLarge {
max_symbol,
alphabet_size,
} => write!(
f,
"VP8L §6.2.1 prefix code: max_symbol {max_symbol} exceeds alphabet size {alphabet_size}"
),
Self::LengthRunOverflow => {
f.write_str("VP8L §6.2.1 prefix code: code-length run overran the alphabet")
}
Self::OverSubscribed => {
f.write_str("VP8L §6.2.1 prefix code: over-subscribed (sum 2^-len > 1)")
}
Self::Incomplete => {
f.write_str("VP8L §6.2.1 prefix code: incomplete (sum 2^-len < 1)")
}
Self::LengthTooLarge { length } => write!(
f,
"VP8L §6.2.1 prefix code: code length {length} exceeds the {MAX_CODE_LENGTH}-bit ceiling"
),
Self::NoMatchingSymbol => {
f.write_str("VP8L §6.2.1 prefix code: bit pattern matched no symbol")
}
}
}
}
impl std::error::Error for PrefixError {}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PrefixCode {
code_lengths: Vec<u8>,
length_rows: Vec<LengthRow>,
sorted_symbols: Vec<u16>,
single_symbol: Option<u16>,
lookup: Vec<u32>,
len_to_row: Vec<u8>,
}
const NO_ROW: u8 = u8::MAX;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct LengthRow {
length: u8,
first_code: u32,
first_symbol_index: usize,
count: usize,
}
impl PrefixCode {
pub fn from_code_lengths(code_lengths: Vec<u8>) -> Result<Self, PrefixError> {
let mut bl_count = [0usize; MAX_CODE_LENGTH + 1];
let mut used = 0usize;
let mut single_candidate: Option<u16> = None;
for (sym, &len) in code_lengths.iter().enumerate() {
let len = len as usize;
if len > MAX_CODE_LENGTH {
return Err(PrefixError::LengthTooLarge { length: len });
}
if len != 0 {
bl_count[len] += 1;
used += 1;
single_candidate = Some(sym as u16);
}
}
if used == 0 {
return Err(PrefixError::Incomplete);
}
if used == 1 {
let sym = single_candidate.expect("used == 1");
return Ok(Self {
code_lengths,
length_rows: Vec::new(),
sorted_symbols: vec![sym],
single_symbol: Some(sym),
lookup: Vec::new(),
len_to_row: Vec::new(),
});
}
let mut next_code = [0u32; MAX_CODE_LENGTH + 2];
let mut code: u32 = 0;
let mut kraft: u64 = 0;
for len in 1..=MAX_CODE_LENGTH {
code = (code + bl_count[len - 1] as u32) << 1;
next_code[len] = code;
if bl_count[len] > 0 {
kraft += (bl_count[len] as u64) << (MAX_CODE_LENGTH - len);
}
}
let full: u64 = 1u64 << MAX_CODE_LENGTH;
match kraft.cmp(&full) {
core::cmp::Ordering::Greater => return Err(PrefixError::OverSubscribed),
core::cmp::Ordering::Less => return Err(PrefixError::Incomplete),
core::cmp::Ordering::Equal => {}
}
let mut length_rows: Vec<LengthRow> = Vec::new();
let mut cursor = [0usize; MAX_CODE_LENGTH + 1];
let mut len_to_row = vec![NO_ROW; MAX_CODE_LENGTH + 1];
let mut first_symbol_index = 0usize;
for len in 1..=MAX_CODE_LENGTH {
if bl_count[len] == 0 {
continue;
}
cursor[len] = first_symbol_index;
len_to_row[len] = length_rows.len() as u8;
length_rows.push(LengthRow {
length: len as u8,
first_code: next_code[len],
first_symbol_index,
count: bl_count[len],
});
first_symbol_index += bl_count[len];
}
let mut sorted_symbols: Vec<u16> = vec![0; used];
for (sym, &slen) in code_lengths.iter().enumerate() {
if slen != 0 {
let slot = &mut cursor[slen as usize];
sorted_symbols[*slot] = sym as u16;
*slot += 1;
}
}
let mut lookup = Vec::new();
if used >= MIN_LOOKUP_USED {
lookup = vec![0u32; LOOKUP_SIZE];
for row in &length_rows {
let len = row.length as usize;
if len > LOOKUP_BITS {
break;
}
for k in 0..row.count {
let code = row.first_code + k as u32;
let symbol = sorted_symbols[row.first_symbol_index + k];
let base = (code.reverse_bits() >> (32 - len)) as usize;
let entry = ((len as u32) << 16) | symbol as u32;
let mut idx = base;
while idx < LOOKUP_SIZE {
lookup[idx] = entry;
idx += 1 << len;
}
}
}
}
Ok(Self {
code_lengths,
length_rows,
sorted_symbols,
single_symbol: None,
lookup,
len_to_row,
})
}
pub fn read_symbol(&self, reader: &mut BitReader<'_>) -> Result<u16, PrefixError> {
if let Some(sym) = self.single_symbol {
return Ok(sym);
}
if self.lookup.is_empty() {
return self.read_symbol_walk(reader);
}
let peeked = reader.peek_bits(LOOKUP_BITS);
let entry = self.lookup[peeked as usize];
let len = (entry >> 16) as usize;
let available = reader.bits_remaining();
if len != 0 {
if len <= available {
reader.advance_bits(len);
return Ok(entry as u16);
}
return self.read_symbol_walk(reader);
}
if available >= LOOKUP_BITS {
let code = peeked.reverse_bits() >> (32 - LOOKUP_BITS);
reader.advance_bits(LOOKUP_BITS);
return self.read_symbol_long(reader, code);
}
self.read_symbol_walk(reader)
}
fn read_symbol_long(
&self,
reader: &mut BitReader<'_>,
mut code: u32,
) -> Result<u16, PrefixError> {
let mut len = LOOKUP_BITS;
loop {
len += 1;
if len > MAX_CODE_LENGTH {
return Err(PrefixError::NoMatchingSymbol);
}
code = (code << 1) | reader.read_bits(1)?;
let ri = self.len_to_row[len];
if ri != NO_ROW {
let row = &self.length_rows[ri as usize];
let offset = code.wrapping_sub(row.first_code);
if (offset as usize) < row.count {
return Ok(self.sorted_symbols[row.first_symbol_index + offset as usize]);
}
}
}
}
#[doc(hidden)]
pub fn read_symbol_reference(&self, reader: &mut BitReader<'_>) -> Result<u16, PrefixError> {
if let Some(sym) = self.single_symbol {
return Ok(sym);
}
self.read_symbol_walk(reader)
}
fn read_symbol_walk(&self, reader: &mut BitReader<'_>) -> Result<u16, PrefixError> {
let mut code: u32 = 0;
let mut len: usize = 0;
loop {
len += 1;
if len > MAX_CODE_LENGTH {
return Err(PrefixError::NoMatchingSymbol);
}
code = (code << 1) | reader.read_bits(1)?;
let ri = self.len_to_row[len];
if ri != NO_ROW {
let row = &self.length_rows[ri as usize];
let offset = code.wrapping_sub(row.first_code);
if (offset as usize) < row.count {
return Ok(self.sorted_symbols[row.first_symbol_index + offset as usize]);
}
}
}
}
pub fn code_lengths(&self) -> &[u8] {
&self.code_lengths
}
pub fn single_symbol(&self) -> Option<u16> {
self.single_symbol
}
pub fn read(reader: &mut BitReader<'_>, alphabet_size: usize) -> Result<Self, PrefixError> {
let lengths = read_code_lengths(reader, alphabet_size)?;
Self::from_code_lengths(lengths)
}
}
pub fn read_code_lengths(
reader: &mut BitReader<'_>,
alphabet_size: usize,
) -> Result<Vec<u8>, PrefixError> {
if reader.read_bit()? {
read_simple_code_lengths(reader, alphabet_size)
} else {
read_normal_code_lengths(reader, alphabet_size)
}
}
fn read_simple_code_lengths(
reader: &mut BitReader<'_>,
alphabet_size: usize,
) -> Result<Vec<u8>, PrefixError> {
let mut lengths = vec![0u8; alphabet_size];
let num_symbols = reader.read_bits(1)? + 1;
let is_first_8bits = reader.read_bits(1)?;
let symbol0 = reader.read_bits(1 + 7 * is_first_8bits as usize)? as usize;
if symbol0 >= alphabet_size {
return Err(PrefixError::SymbolOutOfRange {
symbol: symbol0,
alphabet_size,
});
}
lengths[symbol0] = 1;
if num_symbols == 2 {
let symbol1 = reader.read_bits(8)? as usize;
if symbol1 >= alphabet_size {
return Err(PrefixError::SymbolOutOfRange {
symbol: symbol1,
alphabet_size,
});
}
lengths[symbol1] = 1;
}
Ok(lengths)
}
fn read_normal_code_lengths(
reader: &mut BitReader<'_>,
alphabet_size: usize,
) -> Result<Vec<u8>, PrefixError> {
let num_code_lengths = 4 + reader.read_bits(4)? as usize;
let mut clcl = [0u8; NUM_CODE_LENGTH_CODES];
for &pos in CODE_LENGTH_CODE_ORDER.iter().take(num_code_lengths) {
clcl[pos] = reader.read_bits(3)? as u8;
}
let cl_code = PrefixCode::from_code_lengths(clcl.to_vec())?;
let max_symbol = if reader.read_bits(1)? == 0 {
alphabet_size
} else {
let length_nbits = 2 + 2 * reader.read_bits(3)? as usize;
let ms = 2 + reader.read_bits(length_nbits)? as usize;
if ms > alphabet_size {
return Err(PrefixError::MaxSymbolTooLarge {
max_symbol: ms,
alphabet_size,
});
}
ms
};
let mut lengths = vec![0u8; alphabet_size];
let mut symbol = 0usize;
let mut prev_nonzero: u8 = 8; let mut emitted = 0usize;
while symbol < alphabet_size && emitted < max_symbol {
emitted += 1;
let code = cl_code.read_symbol(reader)? as usize;
match code {
0..=15 => {
lengths[symbol] = code as u8;
if code != 0 {
prev_nonzero = code as u8;
}
symbol += 1;
}
16 => {
let repeat = 3 + reader.read_bits(2)? as usize;
if symbol + repeat > alphabet_size {
return Err(PrefixError::LengthRunOverflow);
}
for _ in 0..repeat {
lengths[symbol] = prev_nonzero;
symbol += 1;
}
}
17 => {
let repeat = 3 + reader.read_bits(3)? as usize;
if symbol + repeat > alphabet_size {
return Err(PrefixError::LengthRunOverflow);
}
symbol += repeat;
}
18 => {
let repeat = 11 + reader.read_bits(7)? as usize;
if symbol + repeat > alphabet_size {
return Err(PrefixError::LengthRunOverflow);
}
symbol += repeat;
}
_ => return Err(PrefixError::LengthRunOverflow),
}
}
Ok(lengths)
}
#[cfg(test)]
mod tests {
use super::*;
struct BitWriter {
bytes: Vec<u8>,
bit_pos: usize,
}
impl BitWriter {
fn new() -> Self {
Self {
bytes: Vec::new(),
bit_pos: 0,
}
}
fn write_bits(&mut self, mut value: u32, n: usize) {
for _ in 0..n {
let byte_idx = self.bit_pos >> 3;
if byte_idx >= self.bytes.len() {
self.bytes.push(0);
}
let bit = (value & 1) as u8;
self.bytes[byte_idx] |= bit << (self.bit_pos & 7);
self.bit_pos += 1;
value >>= 1;
}
}
fn into_bytes(self) -> Vec<u8> {
self.bytes
}
}
#[test]
fn single_leaf_node_consumes_no_bits() {
let mut lengths = vec![0u8; 256];
lengths[42] = 1;
let code = PrefixCode::from_code_lengths(lengths).unwrap();
assert_eq!(code.single_symbol(), Some(42));
let data = [0u8; 0];
let mut r = BitReader::new(&data);
assert_eq!(code.read_symbol(&mut r).unwrap(), 42);
assert_eq!(r.bit_position(), 0);
}
#[test]
fn two_symbol_length_one_code_is_canonical() {
let mut lengths = vec![0u8; 4];
lengths[1] = 1;
lengths[3] = 1;
let code = PrefixCode::from_code_lengths(lengths).unwrap();
assert_eq!(code.single_symbol(), None);
let mut w = BitWriter::new();
w.write_bits(0, 1); w.write_bits(1, 1); let data = w.into_bytes();
let mut r = BitReader::new(&data);
assert_eq!(code.read_symbol(&mut r).unwrap(), 1);
assert_eq!(code.read_symbol(&mut r).unwrap(), 3);
}
#[test]
fn canonical_lengths_1_2_3_3_decode_in_value_order() {
let lengths = vec![1u8, 2, 3, 3];
let code = PrefixCode::from_code_lengths(lengths).unwrap();
let mut w = BitWriter::new();
w.write_bits(0, 1);
w.write_bits(1, 1);
w.write_bits(0, 1);
w.write_bits(1, 1);
w.write_bits(1, 1);
w.write_bits(0, 1);
w.write_bits(1, 1);
w.write_bits(1, 1);
w.write_bits(1, 1);
let data = w.into_bytes();
let mut r = BitReader::new(&data);
assert_eq!(code.read_symbol(&mut r).unwrap(), 0);
assert_eq!(code.read_symbol(&mut r).unwrap(), 1);
assert_eq!(code.read_symbol(&mut r).unwrap(), 2);
assert_eq!(code.read_symbol(&mut r).unwrap(), 3);
}
#[test]
fn over_subscribed_code_is_refused() {
let lengths = vec![1u8, 1, 1];
match PrefixCode::from_code_lengths(lengths) {
Err(PrefixError::OverSubscribed) => {}
other => panic!("expected OverSubscribed, got {other:?}"),
}
}
#[test]
fn incomplete_code_is_refused() {
let lengths = vec![2u8, 2, 0, 0];
match PrefixCode::from_code_lengths(lengths) {
Err(PrefixError::Incomplete) => {}
other => panic!("expected Incomplete, got {other:?}"),
}
}
#[test]
fn empty_table_is_refused() {
let lengths = vec![0u8; 8];
match PrefixCode::from_code_lengths(lengths) {
Err(PrefixError::Incomplete) => {}
other => panic!("expected Incomplete, got {other:?}"),
}
}
#[test]
fn length_too_large_is_refused() {
let mut lengths = vec![0u8; 4];
lengths[0] = (MAX_CODE_LENGTH + 1) as u8;
match PrefixCode::from_code_lengths(lengths) {
Err(PrefixError::LengthTooLarge { length }) => {
assert_eq!(length, MAX_CODE_LENGTH + 1);
}
other => panic!("expected LengthTooLarge, got {other:?}"),
}
}
#[test]
fn simple_one_symbol_8bit() {
let mut w = BitWriter::new();
w.write_bits(1, 1); w.write_bits(0, 1); w.write_bits(1, 1); w.write_bits(60, 8); let data = w.into_bytes();
let mut r = BitReader::new(&data);
let code = PrefixCode::read(&mut r, 256 + 24).unwrap();
assert_eq!(code.single_symbol(), Some(60));
}
#[test]
fn simple_one_symbol_1bit() {
let mut w = BitWriter::new();
w.write_bits(1, 1); w.write_bits(0, 1); w.write_bits(0, 1); w.write_bits(1, 1); let data = w.into_bytes();
let mut r = BitReader::new(&data);
let code = PrefixCode::read(&mut r, 256).unwrap();
assert_eq!(code.single_symbol(), Some(1));
}
#[test]
fn simple_two_symbols() {
let mut w = BitWriter::new();
w.write_bits(1, 1); w.write_bits(1, 1); w.write_bits(1, 1); w.write_bits(5, 8); w.write_bits(200, 8); let data = w.into_bytes();
let mut r = BitReader::new(&data);
let code = PrefixCode::read(&mut r, 256).unwrap();
assert_eq!(code.single_symbol(), None);
let mut w2 = BitWriter::new();
w2.write_bits(0, 1);
w2.write_bits(1, 1);
let d2 = w2.into_bytes();
let mut r2 = BitReader::new(&d2);
assert_eq!(code.read_symbol(&mut r2).unwrap(), 5);
assert_eq!(code.read_symbol(&mut r2).unwrap(), 200);
}
#[test]
fn simple_symbol_out_of_range_is_refused() {
let mut w = BitWriter::new();
w.write_bits(1, 1);
w.write_bits(0, 1);
w.write_bits(1, 1);
w.write_bits(200, 8);
let data = w.into_bytes();
let mut r = BitReader::new(&data);
match PrefixCode::read(&mut r, 40) {
Err(PrefixError::SymbolOutOfRange {
symbol,
alphabet_size,
}) => {
assert_eq!(symbol, 200);
assert_eq!(alphabet_size, 40);
}
other => panic!("expected SymbolOutOfRange, got {other:?}"),
}
}
#[test]
fn normal_two_symbol_alphabet_with_clc() {
let mut w = BitWriter::new();
w.write_bits(0, 1); w.write_bits(0, 4); w.write_bits(0, 3); w.write_bits(0, 3); w.write_bits(1, 3); w.write_bits(1, 3); w.write_bits(0, 1); w.write_bits(1, 1); w.write_bits(1, 1); w.write_bits(0, 1); w.write_bits(0, 1); let data = w.into_bytes();
let mut r = BitReader::new(&data);
let code = PrefixCode::read(&mut r, 4).unwrap();
assert_eq!(code.code_lengths(), &[1, 1, 0, 0]);
let mut w2 = BitWriter::new();
w2.write_bits(0, 1);
w2.write_bits(1, 1);
let d2 = w2.into_bytes();
let mut r2 = BitReader::new(&d2);
assert_eq!(code.read_symbol(&mut r2).unwrap(), 0);
assert_eq!(code.read_symbol(&mut r2).unwrap(), 1);
}
#[test]
fn normal_zero_run_code_18() {
let mut w = BitWriter::new();
w.write_bits(0, 1); w.write_bits(0, 4); w.write_bits(0, 3); w.write_bits(1, 3); w.write_bits(0, 3); w.write_bits(1, 3); w.write_bits(1, 1); w.write_bits(0, 3); w.write_bits(1, 2); w.write_bits(1, 1); w.write_bits(1, 7); w.write_bits(0, 1); w.write_bits(0, 1); let data = w.into_bytes();
let mut r = BitReader::new(&data);
let code = PrefixCode::read(&mut r, 16).unwrap();
let mut expected = [0u8; 16];
expected[12] = 1;
expected[13] = 1;
assert_eq!(code.code_lengths(), &expected[..]);
}
#[test]
fn normal_repeat_code_16() {
let mut w = BitWriter::new();
w.write_bits(0, 1); w.write_bits(5, 4); for pos in [17usize, 18, 0, 1, 2, 3, 4, 5, 16] {
let l = if pos == 2 || pos == 16 { 1 } else { 0 };
w.write_bits(l, 3);
}
w.write_bits(0, 1); w.write_bits(0, 1); w.write_bits(1, 1); w.write_bits(0, 2); let data = w.into_bytes();
let mut r = BitReader::new(&data);
let code = PrefixCode::read(&mut r, 4).unwrap();
assert_eq!(code.code_lengths(), &[2, 2, 2, 2]);
}
#[test]
fn normal_max_symbol_too_large_is_refused() {
let mut w = BitWriter::new();
w.write_bits(0, 1); w.write_bits(0, 4); for pos in [17usize, 18, 0, 1] {
let l = if pos == 0 || pos == 1 { 1 } else { 0 };
w.write_bits(l, 3);
}
w.write_bits(1, 1);
w.write_bits(0, 3);
w.write_bits(3, 2);
let data = w.into_bytes();
let mut r = BitReader::new(&data);
match PrefixCode::read(&mut r, 4) {
Err(PrefixError::MaxSymbolTooLarge {
max_symbol,
alphabet_size,
}) => {
assert_eq!(max_symbol, 5);
assert_eq!(alphabet_size, 4);
}
other => panic!("expected MaxSymbolTooLarge, got {other:?}"),
}
}
#[test]
fn read_symbol_reference_matches_fast_path_on_lut_built_code() {
let mut lengths = vec![10u8; 256];
let full: u64 = 1 << MAX_CODE_LENGTH;
let mut kraft: u64 = 256 << (MAX_CODE_LENGTH - 10);
let mut i = 0usize;
while kraft < full {
let l = lengths[i] as usize;
let gain = 1u64 << (MAX_CODE_LENGTH - l);
if kraft + gain <= full && l > 1 {
lengths[i] -= 1;
kraft += gain;
} else {
i += 1;
}
}
let code = PrefixCode::from_code_lengths(lengths).unwrap();
let mut bytes = Vec::with_capacity(257);
let mut state = 0x243f_6a88_85a3_08d3u64;
for _ in 0..257 {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
bytes.push((state >> 33) as u8);
}
let mut fast = BitReader::new(&bytes);
let mut reference = BitReader::new(&bytes);
loop {
let a = code.read_symbol(&mut fast);
let b = code.read_symbol_reference(&mut reference);
assert_eq!(a, b, "fast path and reference walk diverged");
assert_eq!(fast.bit_position(), reference.bit_position());
if a.is_err() {
break;
}
}
}
#[test]
fn truncated_prefix_code_reports_eof() {
let data = [0x00u8]; let mut r = BitReader::new(&data);
r.read_bits(8).unwrap(); match PrefixCode::read(&mut r, 256) {
Err(PrefixError::Eof(_)) => {}
other => panic!("expected Eof, got {other:?}"),
}
}
}