use super::super::blocks::sequence_section::ModeType;
use super::super::blocks::sequence_section::Sequence;
use super::super::blocks::sequence_section::SequencesHeader;
use super::scratch::FSEScratch;
use crate::bit_io::BitReaderReversed;
use crate::blocks::sequence_section::{
MAX_LITERAL_LENGTH_CODE, MAX_MATCH_LENGTH_CODE, MAX_OFFSET_CODE,
};
use crate::common::MAX_BLOCK_SIZE;
use crate::decoding::errors::{DecodeSequenceError, DecompressBlockError, ExecuteSequencesError};
use crate::decoding::sequence_execution::{do_offset_history, execute_sequences_fields};
use crate::fse::FSEDecoder;
use alloc::vec::Vec;
const ADVANCE: usize = 8;
const ADVANCE_MASK: usize = ADVANCE - 1;
const _: () = assert!(
ADVANCE.is_power_of_two(),
"ADVANCE must be a power of two; ring indexing uses `i & (ADVANCE - 1)` as `i % ADVANCE`"
);
pub fn decode_and_execute_sequences<B: super::buffer_backend::BufferBackend>(
section: &SequencesHeader,
source: &[u8],
fse: &mut FSEScratch,
buffer: &mut super::decode_buffer::DecodeBuffer<B>,
offset_hist: &mut [u32; 3],
literals_buffer: &[u8],
rle_fallback_sequences: &mut Vec<Sequence>,
) -> Result<(), DecompressBlockError> {
#[cfg(target_arch = "aarch64")]
use crate::cpu_kernel::NeonKernel;
#[cfg(all(target_arch = "aarch64", any(feature = "std", target_feature = "sve"),))]
use crate::cpu_kernel::SveKernel;
use crate::cpu_kernel::{CpuKernelTag, ScalarKernel, detect_cpu_kernel};
match detect_cpu_kernel() {
CpuKernelTag::Scalar => decode_and_execute_sequences_impl::<B, ScalarKernel>(
section,
source,
fse,
buffer,
offset_hist,
literals_buffer,
rle_fallback_sequences,
),
#[cfg(target_arch = "x86_64")]
CpuKernelTag::Bmi2 => {
unsafe {
decode_and_execute_sequences_bmi2::<B>(
section,
source,
fse,
buffer,
offset_hist,
literals_buffer,
rle_fallback_sequences,
)
}
}
#[cfg(target_arch = "x86_64")]
CpuKernelTag::Avx2 => {
unsafe {
decode_and_execute_sequences_avx2::<B>(
section,
source,
fse,
buffer,
offset_hist,
literals_buffer,
rle_fallback_sequences,
)
}
}
#[cfg(target_arch = "x86_64")]
CpuKernelTag::Vbmi2 => {
unsafe {
decode_and_execute_sequences_vbmi2::<B>(
section,
source,
fse,
buffer,
offset_hist,
literals_buffer,
rle_fallback_sequences,
)
}
}
#[cfg(target_arch = "aarch64")]
CpuKernelTag::Neon => decode_and_execute_sequences_impl::<B, NeonKernel>(
section,
source,
fse,
buffer,
offset_hist,
literals_buffer,
rle_fallback_sequences,
),
#[cfg(all(target_arch = "aarch64", any(feature = "std", target_feature = "sve"),))]
CpuKernelTag::Sve => decode_and_execute_sequences_impl::<B, SveKernel>(
section,
source,
fse,
buffer,
offset_hist,
literals_buffer,
rle_fallback_sequences,
),
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
unsafe fn decode_and_execute_sequences_bmi2<B: super::buffer_backend::BufferBackend>(
section: &SequencesHeader,
source: &[u8],
fse: &mut FSEScratch,
buffer: &mut super::decode_buffer::DecodeBuffer<B>,
offset_hist: &mut [u32; 3],
literals_buffer: &[u8],
rle_fallback_sequences: &mut Vec<Sequence>,
) -> Result<(), DecompressBlockError> {
decode_and_execute_sequences_impl::<B, crate::cpu_kernel::Bmi2Kernel>(
section,
source,
fse,
buffer,
offset_hist,
literals_buffer,
rle_fallback_sequences,
)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2,avx2")]
unsafe fn decode_and_execute_sequences_avx2<B: super::buffer_backend::BufferBackend>(
section: &SequencesHeader,
source: &[u8],
fse: &mut FSEScratch,
buffer: &mut super::decode_buffer::DecodeBuffer<B>,
offset_hist: &mut [u32; 3],
literals_buffer: &[u8],
rle_fallback_sequences: &mut Vec<Sequence>,
) -> Result<(), DecompressBlockError> {
decode_and_execute_sequences_impl::<B, crate::cpu_kernel::Avx2Kernel>(
section,
source,
fse,
buffer,
offset_hist,
literals_buffer,
rle_fallback_sequences,
)
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2,avx2,avx512vbmi2,avx512f,avx512vl,avx512bw")]
unsafe fn decode_and_execute_sequences_vbmi2<B: super::buffer_backend::BufferBackend>(
section: &SequencesHeader,
source: &[u8],
fse: &mut FSEScratch,
buffer: &mut super::decode_buffer::DecodeBuffer<B>,
offset_hist: &mut [u32; 3],
literals_buffer: &[u8],
rle_fallback_sequences: &mut Vec<Sequence>,
) -> Result<(), DecompressBlockError> {
decode_and_execute_sequences_impl::<B, crate::cpu_kernel::Vbmi2Kernel>(
section,
source,
fse,
buffer,
offset_hist,
literals_buffer,
rle_fallback_sequences,
)
}
fn decode_and_execute_sequences_impl<
B: super::buffer_backend::BufferBackend,
K: crate::cpu_kernel::CpuKernel,
>(
section: &SequencesHeader,
source: &[u8],
fse: &mut FSEScratch,
buffer: &mut super::decode_buffer::DecodeBuffer<B>,
offset_hist: &mut [u32; 3],
literals_buffer: &[u8],
rle_fallback_sequences: &mut Vec<Sequence>,
) -> Result<(), DecompressBlockError> {
rle_fallback_sequences.clear();
let ddict_is_cold = fse.ddict_is_cold;
fse.ddict_is_cold = false;
let bytes_read = maybe_update_fse_tables(section, source, fse)?;
vprintln!("Updating tables used {} bytes", bytes_read);
let bit_stream = &source[bytes_read..];
let mut br = BitReaderReversed::<K>::new(bit_stream);
let mut skipped_bits = 0;
loop {
let val = br.get_bits(1);
skipped_bits += 1;
if val == 1 || skipped_bits > 8 {
break;
}
}
if skipped_bits > 8 {
return Err(DecodeSequenceError::ExtraPadding { skipped_bits }.into());
}
if fse.ll_rle.is_some() || fse.ml_rle.is_some() || fse.of_rle.is_some() {
decode_sequences_with_rle(section, &mut br, fse, rle_fallback_sequences)?;
execute_sequences_fields(buffer, literals_buffer, offset_hist, rle_fallback_sequences)?;
return Ok(());
}
let mut ll_dec = FSEDecoder::new(&fse.literal_lengths);
let mut ml_dec = FSEDecoder::new(&fse.match_lengths);
let mut of_dec = FSEDecoder::new(&fse.offsets);
ll_dec
.init_state(&mut br)
.map_err(DecodeSequenceError::from)?;
of_dec
.init_state(&mut br)
.map_err(DecodeSequenceError::from)?;
ml_dec
.init_state(&mut br)
.map_err(DecodeSequenceError::from)?;
let max_update_bits = fse.literal_lengths.accuracy_log
+ fse.match_lengths.accuracy_log
+ fse.offsets.accuracy_log;
debug_assert!(
max_update_bits <= 56,
"sequence section update bits exceed 56-bit budget"
);
buffer.reserve(MAX_BLOCK_SIZE as usize);
let old_buffer_size = buffer.len();
let literals_buffer_len = literals_buffer.len();
let mut lit_cur: usize = 0;
let mut seq_sum: u32 = 0;
let buffer_checkpoint = buffer.checkpoint();
let saved_offset_hist = *offset_hist;
let num_sequences = section.num_sequences as usize;
#[cfg(target_pointer_width = "64")]
const MIN_LONG_OFFSET_SHARE: u32 = 7;
#[cfg(not(target_pointer_width = "64"))]
const MIN_LONG_OFFSET_SHARE: u32 = 20;
let use_long_pipeline = num_sequences >= ADVANCE * 2
&& (ddict_is_cold || fse.offsets_long_share >= MIN_LONG_OFFSET_SHARE);
if use_long_pipeline {
let pipeline_result = run_pipelined_sequence_loop(
&mut br,
&mut ll_dec,
&mut ml_dec,
&mut of_dec,
buffer,
offset_hist,
literals_buffer,
&mut lit_cur,
literals_buffer_len,
num_sequences,
old_buffer_size,
max_update_bits,
&mut seq_sum,
);
if let Err(e) = pipeline_result {
if buffer.try_restore_checkpoint(buffer_checkpoint) {
*offset_hist = saved_offset_hist;
}
return Err(e);
}
} else {
let mut shadow_hist = *offset_hist;
let mut fallback_err: Option<DecompressBlockError> = None;
for i in 0..num_sequences {
let seq = decode_one_sequence_inline(&mut ll_dec, &mut ml_dec, &mut of_dec, &mut br);
let resolved_offset = do_offset_history(seq.of, seq.ll, &mut shadow_hist);
if let Err(e) = execute_one_sequence_pipelined(
buffer,
literals_buffer,
&mut lit_cur,
literals_buffer_len,
seq,
resolved_offset,
) {
fallback_err = Some(e);
break;
}
seq_sum = seq_sum.wrapping_add(seq.ll).wrapping_add(seq.ml);
if i + 1 < num_sequences {
br.ensure_bits(max_update_bits);
ll_dec.update_state_fast(&mut br);
ml_dec.update_state_fast(&mut br);
of_dec.update_state_fast(&mut br);
}
}
if let Some(e) = fallback_err {
let _ = buffer.try_restore_checkpoint(buffer_checkpoint);
return Err(e);
}
*offset_hist = shadow_hist;
}
let remaining = br.bits_remaining();
if remaining != 0 {
if buffer.try_restore_checkpoint(buffer_checkpoint) {
*offset_hist = saved_offset_hist;
}
if remaining < 0 {
return Err(DecodeSequenceError::NotEnoughBytesForNumSequences.into());
}
return Err(DecodeSequenceError::ExtraBits {
bits_remaining: remaining,
}
.into());
}
if lit_cur < literals_buffer_len {
let rest = &literals_buffer[lit_cur..];
buffer.try_push(rest).map_err(ExecuteSequencesError::from)?;
seq_sum = seq_sum.wrapping_add(rest.len() as u32);
}
let diff = buffer.len() - old_buffer_size;
debug_assert_eq!(
seq_sum as usize, diff,
"seq_sum {seq_sum} != buffer growth {diff}"
);
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn run_pipelined_sequence_loop<
B: super::buffer_backend::BufferBackend,
K: crate::cpu_kernel::CpuKernel,
>(
br: &mut BitReaderReversed<'_, K>,
ll_dec: &mut FSEDecoder<'_>,
ml_dec: &mut FSEDecoder<'_>,
of_dec: &mut FSEDecoder<'_>,
buffer: &mut super::decode_buffer::DecodeBuffer<B>,
offset_hist: &mut [u32; 3],
literals_buffer: &[u8],
lit_cur: &mut usize,
literals_buffer_len: usize,
num_sequences: usize,
old_buffer_size: usize,
max_update_bits: u8,
seq_sum: &mut u32,
) -> Result<(), DecompressBlockError> {
let mut prefetch_pos: usize = old_buffer_size;
let mut shadow_hist: [u32; 3] = *offset_hist;
let mut ring: [(Sequence, u32); ADVANCE] = [(
Sequence {
ll: 0,
ml: 0,
of: 0,
},
0u32,
); ADVANCE];
for slot in ring.iter_mut() {
let seq = decode_one_sequence_inline(ll_dec, ml_dec, of_dec, br);
let actual_offset = do_offset_history(seq.of, seq.ll, &mut shadow_hist);
let match_start = prefetch_pos.wrapping_add(seq.ll as usize);
let source_idx = match_start.wrapping_sub(actual_offset as usize);
buffer.prefetch_lookahead_match_source(source_idx);
prefetch_pos = match_start.wrapping_add(seq.ml as usize);
*slot = (seq, actual_offset);
br.ensure_bits(max_update_bits);
ll_dec.update_state_fast(br);
ml_dec.update_state_fast(br);
of_dec.update_state_fast(br);
}
for i in ADVANCE..num_sequences {
let seq = decode_one_sequence_inline(ll_dec, ml_dec, of_dec, br);
let actual_offset = do_offset_history(seq.of, seq.ll, &mut shadow_hist);
let match_start = prefetch_pos.wrapping_add(seq.ll as usize);
let source_idx = match_start.wrapping_sub(actual_offset as usize);
buffer.prefetch_lookahead_match_source(source_idx);
prefetch_pos = match_start.wrapping_add(seq.ml as usize);
let slot = i & ADVANCE_MASK;
let (exec_seq, exec_offset) = ring[slot];
ring[slot] = (seq, actual_offset);
execute_one_sequence_pipelined(
buffer,
literals_buffer,
lit_cur,
literals_buffer_len,
exec_seq,
exec_offset,
)?;
*seq_sum = seq_sum.wrapping_add(exec_seq.ll).wrapping_add(exec_seq.ml);
if i + 1 < num_sequences {
br.ensure_bits(max_update_bits);
ll_dec.update_state_fast(br);
ml_dec.update_state_fast(br);
of_dec.update_state_fast(br);
}
}
for k in 0..ADVANCE {
let slot = (num_sequences + k) & ADVANCE_MASK;
let (exec_seq, exec_offset) = ring[slot];
execute_one_sequence_pipelined(
buffer,
literals_buffer,
lit_cur,
literals_buffer_len,
exec_seq,
exec_offset,
)?;
*seq_sum = seq_sum.wrapping_add(exec_seq.ll).wrapping_add(exec_seq.ml);
}
*offset_hist = shadow_hist;
Ok(())
}
#[inline(always)]
fn execute_one_sequence_pipelined<B: super::buffer_backend::BufferBackend>(
buffer: &mut super::decode_buffer::DecodeBuffer<B>,
literals: &[u8],
lit_cur: &mut usize,
lit_len: usize,
seq: Sequence,
resolved_offset: u32,
) -> Result<(), DecompressBlockError> {
let lit_cur_before = *lit_cur;
let high = lit_cur_before
.checked_add(seq.ll as usize)
.filter(|&h| h <= lit_len)
.ok_or(ExecuteSequencesError::NotEnoughBytesForSequence {
wanted: lit_cur_before.saturating_add(seq.ll as usize),
have: lit_len,
})?;
let lits = unsafe { literals.get_unchecked(lit_cur_before..high) };
*lit_cur = high;
if resolved_offset == 0 {
return Err(ExecuteSequencesError::ZeroOffset.into());
}
let inline_path_safe = B::SUPPORTS_INLINE_SEQUENCE_EXEC
&& lit_cur_before.checked_add(16).is_some_and(|b| b <= lit_len)
&& (seq.ll as usize <= 16
|| lit_cur_before
.checked_add((seq.ll as usize).next_multiple_of(16))
.is_some_and(|b| b <= lit_len));
if inline_path_safe {
let buf_len = buffer.len();
let offset = resolved_offset as usize;
let prefix_end = buf_len.checked_add(lits.len()).filter(|end| offset <= *end);
if prefix_end.is_none() {
buffer.try_push(lits).map_err(ExecuteSequencesError::from)?;
buffer
.repeat_lookahead_prefetched(offset, seq.ml as usize)
.map_err(ExecuteSequencesError::from)?;
return Ok(());
}
let lit_src = unsafe { literals.as_ptr().add(lit_cur_before) };
unsafe {
buffer
.buffer_mut()
.exec_sequence_inline(lit_src, seq.ll as usize, offset, seq.ml as usize)
.map_err(DecompressBlockError::ExecuteSequencesError)?;
}
return Ok(());
}
buffer.try_push(lits).map_err(ExecuteSequencesError::from)?;
buffer
.repeat_lookahead_prefetched(resolved_offset as usize, seq.ml as usize)
.map_err(ExecuteSequencesError::from)?;
Ok(())
}
#[inline(always)]
fn decode_one_sequence_inline<K: crate::cpu_kernel::CpuKernel>(
ll_dec: &mut FSEDecoder<'_>,
ml_dec: &mut FSEDecoder<'_>,
of_dec: &mut FSEDecoder<'_>,
br: &mut BitReaderReversed<'_, K>,
) -> Sequence {
let ll_state = ll_dec.state;
let ml_state = ml_dec.state;
let of_state = of_dec.state;
let ll_value = ll_state.base_value;
let ll_num_bits = ll_state.num_additional_bits;
let ml_value = ml_state.base_value;
let ml_num_bits = ml_state.num_additional_bits;
let of_num_bits = of_state.num_additional_bits;
let of_base = of_state.base_value;
debug_assert!(of_num_bits <= MAX_OFFSET_CODE);
let (obits, ml_add, ll_add) = br.get_bits_triple(of_num_bits, ml_num_bits, ll_num_bits);
let offset = obits as u32 + of_base;
debug_assert_ne!(offset, 0);
Sequence {
ll: ll_value + ll_add as u32,
ml: ml_value + ml_add as u32,
of: offset,
}
}
fn decode_sequences_with_rle<K: crate::cpu_kernel::CpuKernel>(
section: &SequencesHeader,
br: &mut BitReaderReversed<'_, K>,
scratch: &FSEScratch,
target: &mut Vec<Sequence>,
) -> Result<(), DecodeSequenceError> {
let mut ll_dec = FSEDecoder::new(&scratch.literal_lengths);
let mut ml_dec = FSEDecoder::new(&scratch.match_lengths);
let mut of_dec = FSEDecoder::new(&scratch.offsets);
if scratch.ll_rle.is_none() {
ll_dec.init_state(br)?;
}
if scratch.of_rle.is_none() {
of_dec.init_state(br)?;
}
if scratch.ml_rle.is_none() {
ml_dec.init_state(br)?;
}
target.clear();
target.reserve(section.num_sequences as usize);
let max_update_bits = if scratch.ll_rle.is_none() {
scratch.literal_lengths.accuracy_log
} else {
0
} + if scratch.ml_rle.is_none() {
scratch.match_lengths.accuracy_log
} else {
0
} + if scratch.of_rle.is_none() {
scratch.offsets.accuracy_log
} else {
0
};
debug_assert!(
max_update_bits <= 56,
"sequence section update bits exceed 56-bit budget"
);
for _seq_idx in 0..section.num_sequences {
let ll_code = if let Some(ll_rle) = scratch.ll_rle {
ll_rle
} else {
ll_dec.decode_symbol()
};
let ml_code = if let Some(ml_rle) = scratch.ml_rle {
ml_rle
} else {
ml_dec.decode_symbol()
};
let of_code = if let Some(of_rle) = scratch.of_rle {
of_rle
} else {
of_dec.decode_symbol()
};
let (ll_value, ll_num_bits) = if scratch.ll_rle.is_some() {
lookup_ll_code(ll_code)
} else {
(ll_dec.state.base_value, ll_dec.state.num_additional_bits)
};
let (ml_value, ml_num_bits) = if scratch.ml_rle.is_some() {
lookup_ml_code(ml_code)
} else {
(ml_dec.state.base_value, ml_dec.state.num_additional_bits)
};
debug_assert!(of_code <= MAX_OFFSET_CODE);
let (obits, ml_add, ll_add) = br.get_bits_triple(of_code, ml_num_bits, ll_num_bits);
let offset = obits as u32 + (1u32 << of_code);
debug_assert_ne!(offset, 0);
target.push(Sequence {
ll: ll_value + ll_add as u32,
ml: ml_value + ml_add as u32,
of: offset,
});
if target.len() < section.num_sequences as usize {
if max_update_bits > 0 {
br.ensure_bits(max_update_bits);
}
if scratch.ll_rle.is_none() {
ll_dec.update_state_fast(br);
}
if scratch.ml_rle.is_none() {
ml_dec.update_state_fast(br);
}
if scratch.of_rle.is_none() {
of_dec.update_state_fast(br);
}
}
if br.bits_remaining() < 0 {
return Err(DecodeSequenceError::NotEnoughBytesForNumSequences);
}
}
if br.bits_remaining() > 0 {
Err(DecodeSequenceError::ExtraBits {
bits_remaining: br.bits_remaining(),
})
} else {
Ok(())
}
}
pub(crate) const LL_META: [u32; 36] = pack_code_meta(
&[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48,
64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536,
],
&[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16,
],
);
pub(crate) const ML_META: [u32; 53] = pack_code_meta(
&[
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 131, 259, 515,
1027, 2051, 4099, 8195, 16387, 32771, 65539,
],
&[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
],
);
const fn pack_code_meta<const N: usize>(bases: &[u32; N], extra_bits: &[u8; N]) -> [u32; N] {
let mut out = [0u32; N];
let mut i = 0;
while i < N {
assert!(bases[i] & 0xFF00_0000 == 0, "baseline must fit in 24 bits");
assert!(extra_bits[i] <= 16, "extra_bits exceeds zstd format limit");
out[i] = bases[i] | ((extra_bits[i] as u32) << 24);
i += 1;
}
out
}
#[inline(always)]
const fn unpack_code_meta(meta: u32) -> (u32, u8) {
(meta & 0x00FF_FFFF, (meta >> 24) as u8)
}
#[inline(always)]
fn lookup_ll_code(code: u8) -> (u32, u8) {
let idx = code as usize;
debug_assert!(
idx < LL_META.len(),
"Illegal literal length code was: {code}"
);
unpack_code_meta(unsafe { *LL_META.get_unchecked(idx) })
}
#[inline(always)]
fn lookup_ml_code(code: u8) -> (u32, u8) {
let idx = code as usize;
debug_assert!(idx < ML_META.len(), "Illegal match length code was: {code}");
unpack_code_meta(unsafe { *ML_META.get_unchecked(idx) })
}
pub const LL_MAX_LOG: u8 = 9;
pub const ML_MAX_LOG: u8 = 9;
pub const OF_MAX_LOG: u8 = 8;
pub(crate) fn compute_offsets_long_share(offsets: &crate::fse::FSETable) -> u32 {
const OFFSET_FSE_LOG: u32 = 8;
const LONG_OFFSET_CODE_THRESHOLD: u32 = 22;
let table_log = offsets.accuracy_log as u32;
let raw = offsets
.decode
.iter()
.filter(|entry| u32::from(entry.symbol) > LONG_OFFSET_CODE_THRESHOLD)
.count() as u32;
raw << OFFSET_FSE_LOG.saturating_sub(table_log)
}
fn maybe_update_fse_tables(
section: &SequencesHeader,
source: &[u8],
scratch: &mut FSEScratch,
) -> Result<usize, DecodeSequenceError> {
let modes = section
.modes
.ok_or(DecodeSequenceError::MissingCompressionMode)?;
let mut bytes_read = 0;
match modes.ll_mode() {
ModeType::FSECompressed => {
let bytes = scratch.literal_lengths.build_decoder(source, LL_MAX_LOG)?;
bytes_read += bytes;
scratch
.literal_lengths
.enrich_with_packed_seq_meta(&LL_META);
vprintln!("Updating ll table");
vprintln!("Used bytes: {}", bytes);
scratch.ll_rle = None;
}
ModeType::RLE => {
vprintln!("Use RLE ll table");
if source.is_empty() {
return Err(DecodeSequenceError::MissingByteForRleLlTable);
}
bytes_read += 1;
if source[0] > MAX_LITERAL_LENGTH_CODE {
return Err(DecodeSequenceError::MissingByteForRleMlTable);
}
scratch.ll_rle = Some(source[0]);
}
ModeType::Predefined => {
vprintln!("Use predefined ll table");
#[cfg(feature = "std")]
{
scratch.literal_lengths.reinit_from(predefined_ll_table());
}
#[cfg(not(feature = "std"))]
{
scratch.literal_lengths.build_from_probabilities(
LL_DEFAULT_ACC_LOG,
&LITERALS_LENGTH_DEFAULT_DISTRIBUTION,
)?;
scratch
.literal_lengths
.enrich_with_packed_seq_meta(&LL_META);
}
scratch.ll_rle = None;
}
ModeType::Repeat => {
vprintln!("Repeat ll table");
}
};
let of_source = &source[bytes_read..];
match modes.of_mode() {
ModeType::FSECompressed => {
let bytes = scratch.offsets.build_decoder(of_source, OF_MAX_LOG)?;
scratch.offsets.enrich_for_offsets();
vprintln!("Updating of table");
vprintln!("Used bytes: {}", bytes);
bytes_read += bytes;
scratch.of_rle = None;
scratch.offsets_long_share = compute_offsets_long_share(&scratch.offsets);
}
ModeType::RLE => {
vprintln!("Use RLE of table");
if of_source.is_empty() {
return Err(DecodeSequenceError::MissingByteForRleOfTable);
}
bytes_read += 1;
if of_source[0] > MAX_OFFSET_CODE {
return Err(DecodeSequenceError::MissingByteForRleMlTable);
}
scratch.of_rle = Some(of_source[0]);
}
ModeType::Predefined => {
vprintln!("Use predefined of table");
#[cfg(feature = "std")]
{
let (cached, long_share) = predefined_of_table();
scratch.offsets.reinit_from(cached);
scratch.offsets_long_share = long_share;
}
#[cfg(not(feature = "std"))]
{
scratch
.offsets
.build_from_probabilities(OF_DEFAULT_ACC_LOG, &OFFSET_DEFAULT_DISTRIBUTION)?;
scratch.offsets.enrich_for_offsets();
scratch.offsets_long_share = compute_offsets_long_share(&scratch.offsets);
}
scratch.of_rle = None;
}
ModeType::Repeat => {
vprintln!("Repeat of table");
}
};
let ml_source = &source[bytes_read..];
match modes.ml_mode() {
ModeType::FSECompressed => {
let bytes = scratch.match_lengths.build_decoder(ml_source, ML_MAX_LOG)?;
scratch.match_lengths.enrich_with_packed_seq_meta(&ML_META);
bytes_read += bytes;
vprintln!("Updating ml table");
vprintln!("Used bytes: {}", bytes);
scratch.ml_rle = None;
}
ModeType::RLE => {
vprintln!("Use RLE ml table");
if ml_source.is_empty() {
return Err(DecodeSequenceError::MissingByteForRleMlTable);
}
bytes_read += 1;
if ml_source[0] > MAX_MATCH_LENGTH_CODE {
return Err(DecodeSequenceError::MissingByteForRleMlTable);
}
scratch.ml_rle = Some(ml_source[0]);
}
ModeType::Predefined => {
vprintln!("Use predefined ml table");
#[cfg(feature = "std")]
{
scratch.match_lengths.reinit_from(predefined_ml_table());
}
#[cfg(not(feature = "std"))]
{
scratch.match_lengths.build_from_probabilities(
ML_DEFAULT_ACC_LOG,
&MATCH_LENGTH_DEFAULT_DISTRIBUTION,
)?;
scratch.match_lengths.enrich_with_packed_seq_meta(&ML_META);
}
scratch.ml_rle = None;
}
ModeType::Repeat => {
vprintln!("Repeat ml table");
}
};
Ok(bytes_read)
}
const LL_DEFAULT_ACC_LOG: u8 = 6;
const LITERALS_LENGTH_DEFAULT_DISTRIBUTION: [i32; 36] = [
4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
-1, -1, -1, -1,
];
#[cfg(feature = "std")]
fn predefined_ll_table() -> &'static crate::fse::FSETable {
use std::sync::OnceLock;
static CACHED: OnceLock<crate::fse::FSETable> = OnceLock::new();
CACHED.get_or_init(|| {
let mut t = crate::fse::FSETable::new(MAX_LITERAL_LENGTH_CODE);
t.build_from_probabilities(LL_DEFAULT_ACC_LOG, &LITERALS_LENGTH_DEFAULT_DISTRIBUTION)
.expect("LITERALS_LENGTH_DEFAULT_DISTRIBUTION is a static RFC 8878 constant");
t.enrich_with_packed_seq_meta(&LL_META);
t
})
}
#[cfg(feature = "std")]
fn predefined_ml_table() -> &'static crate::fse::FSETable {
use std::sync::OnceLock;
static CACHED: OnceLock<crate::fse::FSETable> = OnceLock::new();
CACHED.get_or_init(|| {
let mut t = crate::fse::FSETable::new(MAX_MATCH_LENGTH_CODE);
t.build_from_probabilities(ML_DEFAULT_ACC_LOG, &MATCH_LENGTH_DEFAULT_DISTRIBUTION)
.expect("MATCH_LENGTH_DEFAULT_DISTRIBUTION is a static RFC 8878 constant");
t.enrich_with_packed_seq_meta(&ML_META);
t
})
}
#[cfg(feature = "std")]
fn predefined_of_table() -> (&'static crate::fse::FSETable, u32) {
use std::sync::OnceLock;
static CACHED: OnceLock<(crate::fse::FSETable, u32)> = OnceLock::new();
let cache = CACHED.get_or_init(|| {
let mut t = crate::fse::FSETable::new(MAX_OFFSET_CODE);
t.build_from_probabilities(OF_DEFAULT_ACC_LOG, &OFFSET_DEFAULT_DISTRIBUTION)
.expect("OFFSET_DEFAULT_DISTRIBUTION is a static RFC 8878 constant");
t.enrich_for_offsets();
let share = compute_offsets_long_share(&t);
(t, share)
});
(&cache.0, cache.1)
}
const ML_DEFAULT_ACC_LOG: u8 = 6;
const MATCH_LENGTH_DEFAULT_DISTRIBUTION: [i32; 53] = [
1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1,
];
const OF_DEFAULT_ACC_LOG: u8 = 5;
const OFFSET_DEFAULT_DISTRIBUTION: [i32; 29] = [
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
];
#[cfg(feature = "std")]
#[test]
fn predefined_fse_caches_match_rebuild_output() {
use crate::fse::FSETable;
let mut ll_rebuild = FSETable::new(MAX_LITERAL_LENGTH_CODE);
ll_rebuild
.build_from_probabilities(
LL_DEFAULT_ACC_LOG,
&Vec::from(&LITERALS_LENGTH_DEFAULT_DISTRIBUTION[..]),
)
.unwrap();
ll_rebuild.enrich_with_packed_seq_meta(&LL_META);
let ll_cached = predefined_ll_table();
assert_eq!(ll_rebuild.accuracy_log, ll_cached.accuracy_log);
assert_eq!(ll_rebuild.decode.len(), ll_cached.decode.len());
for (i, (a, b)) in ll_rebuild
.decode
.iter()
.zip(ll_cached.decode.iter())
.enumerate()
{
assert_eq!(a.symbol, b.symbol, "LL entry {i} symbol mismatch");
assert_eq!(a.num_bits, b.num_bits, "LL entry {i} num_bits mismatch");
assert_eq!(a.new_state, b.new_state, "LL entry {i} new_state mismatch");
assert_eq!(
a.base_value, b.base_value,
"LL entry {i} base_value mismatch"
);
assert_eq!(
a.num_additional_bits, b.num_additional_bits,
"LL entry {i} num_additional_bits mismatch"
);
}
let mut ml_rebuild = FSETable::new(MAX_MATCH_LENGTH_CODE);
ml_rebuild
.build_from_probabilities(
ML_DEFAULT_ACC_LOG,
&Vec::from(&MATCH_LENGTH_DEFAULT_DISTRIBUTION[..]),
)
.unwrap();
ml_rebuild.enrich_with_packed_seq_meta(&ML_META);
let ml_cached = predefined_ml_table();
assert_eq!(ml_rebuild.accuracy_log, ml_cached.accuracy_log);
assert_eq!(ml_rebuild.decode.len(), ml_cached.decode.len());
for (i, (a, b)) in ml_rebuild
.decode
.iter()
.zip(ml_cached.decode.iter())
.enumerate()
{
assert_eq!(a.symbol, b.symbol, "ML entry {i} symbol mismatch");
assert_eq!(a.num_bits, b.num_bits, "ML entry {i} num_bits mismatch");
assert_eq!(a.new_state, b.new_state, "ML entry {i} new_state mismatch");
assert_eq!(
a.base_value, b.base_value,
"ML entry {i} base_value mismatch"
);
assert_eq!(
a.num_additional_bits, b.num_additional_bits,
"ML entry {i} num_additional_bits mismatch"
);
}
let mut of_rebuild = FSETable::new(MAX_OFFSET_CODE);
of_rebuild
.build_from_probabilities(
OF_DEFAULT_ACC_LOG,
&Vec::from(&OFFSET_DEFAULT_DISTRIBUTION[..]),
)
.unwrap();
of_rebuild.enrich_for_offsets();
let of_rebuild_share = compute_offsets_long_share(&of_rebuild);
let (of_cached, of_cached_share) = predefined_of_table();
assert_eq!(of_rebuild.accuracy_log, of_cached.accuracy_log);
assert_eq!(of_rebuild.decode.len(), of_cached.decode.len());
assert_eq!(
of_rebuild_share, of_cached_share,
"OF offsets_long_share mismatch"
);
for (i, (a, b)) in of_rebuild
.decode
.iter()
.zip(of_cached.decode.iter())
.enumerate()
{
assert_eq!(a.symbol, b.symbol, "OF entry {i} symbol mismatch");
assert_eq!(a.num_bits, b.num_bits, "OF entry {i} num_bits mismatch");
assert_eq!(a.new_state, b.new_state, "OF entry {i} new_state mismatch");
assert_eq!(
a.base_value, b.base_value,
"OF entry {i} base_value mismatch"
);
assert_eq!(
a.num_additional_bits, b.num_additional_bits,
"OF entry {i} num_additional_bits mismatch"
);
}
}
#[test]
fn test_ll_default() {
let mut table = crate::fse::FSETable::new(MAX_LITERAL_LENGTH_CODE);
table
.build_from_probabilities(
LL_DEFAULT_ACC_LOG,
&Vec::from(&LITERALS_LENGTH_DEFAULT_DISTRIBUTION[..]),
)
.unwrap();
assert!(table.decode.len() == 64);
assert!(table.decode[0].symbol == 0);
assert!(table.decode[0].num_bits == 4);
assert!(table.decode[0].new_state == 0);
assert!(table.decode[19].symbol == 27);
assert!(table.decode[19].num_bits == 6);
assert!(table.decode[19].new_state == 0);
assert!(table.decode[39].symbol == 25);
assert!(table.decode[39].num_bits == 4);
assert!(table.decode[39].new_state == 16);
assert!(table.decode[60].symbol == 35);
assert!(table.decode[60].num_bits == 6);
assert!(table.decode[60].new_state == 0);
assert!(table.decode[59].symbol == 24);
assert!(table.decode[59].num_bits == 5);
assert!(table.decode[59].new_state == 32);
}
#[cfg(test)]
mod offsets_long_share_tests {
use super::compute_offsets_long_share;
use crate::fse::{Entry, FSETable};
fn synthetic_offsets_table(accuracy_log: u8, symbols: &[u8]) -> FSETable {
let size = 1usize << accuracy_log;
assert_eq!(
symbols.len(),
size,
"symbols.len() must equal 1 << accuracy_log"
);
let mut t = FSETable::new(31);
t.accuracy_log = accuracy_log;
t.decode = symbols
.iter()
.map(|&s| Entry {
new_state: 0,
symbol: s,
num_bits: 0,
base_value: 0,
num_additional_bits: 0,
})
.collect();
t
}
#[test]
fn zero_long_codes_returns_zero_share() {
for log in [3u8, 5, 6, 8] {
let size = 1usize << log;
let symbols: alloc::vec::Vec<u8> = (0..size).map(|i| (i as u8) % 22).collect();
let table = synthetic_offsets_table(log, &symbols);
assert_eq!(
compute_offsets_long_share(&table),
0,
"log={log}: pure short-offset table must score 0"
);
}
}
#[test]
fn long_codes_scale_to_offset_fse_log_reference() {
let mut symbols = [0u8; 32];
symbols[7] = 23;
let table = synthetic_offsets_table(5, &symbols);
assert_eq!(compute_offsets_long_share(&table), 8);
}
#[test]
fn raw_count_at_offset_fse_log_passes_through_unscaled() {
let mut symbols = [0u8; 256];
for sym in symbols.iter_mut().take(15) {
*sym = 25;
}
let table = synthetic_offsets_table(8, &symbols);
assert_eq!(compute_offsets_long_share(&table), 15);
}
#[test]
fn threshold_is_strict_greater_than() {
let mut symbols = [0u8; 256];
for sym in symbols.iter_mut().take(50) {
*sym = 22;
}
let table = synthetic_offsets_table(8, &symbols);
assert_eq!(compute_offsets_long_share(&table), 0);
symbols[0] = 23;
let table = synthetic_offsets_table(8, &symbols);
assert_eq!(compute_offsets_long_share(&table), 1);
}
}