pub mod errors;
mod frame_decoder;
mod streaming_decoder;
pub use dictionary::{Dictionary, DictionaryHandle};
pub use frame_decoder::{BlockDecodingStrategy, ContentChecksum, FrameDecoder};
#[cfg(feature = "lsm")]
pub use frame_decoder::{PartialDecode, ResumeInput, ResumeState};
pub use streaming_decoder::StreamingDecoder;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum FrameContentSize {
Known(u64),
Unknown,
}
pub fn read_frame_content_size(
src: &[u8],
) -> Result<FrameContentSize, errors::ReadFrameHeaderError> {
let (header, _consumed) = frame::read_frame_header_with_format(src, false)?;
Ok(if header.fcs_declared() {
FrameContentSize::Known(header.frame_content_size())
} else {
FrameContentSize::Unknown
})
}
#[derive(Debug)]
pub enum FrameSizeError {
Header(errors::ReadFrameHeaderError),
Truncated,
ReservedBlock,
OversizedBlock,
}
pub fn find_frame_compressed_size(src: &[u8]) -> Result<usize, FrameSizeError> {
let (header, header_len) = match frame::read_frame_header_with_format(src, false) {
Ok(parsed) => parsed,
Err(errors::ReadFrameHeaderError::SkipFrame { length, .. }) => {
return 8usize
.checked_add(length as usize)
.filter(|end| *end <= src.len())
.ok_or(FrameSizeError::Truncated);
}
Err(e) => return Err(FrameSizeError::Header(e)),
};
let walk = walk_blocks(src, header_len as usize, frame_block_size_max(&header))?;
if header.descriptor.content_checksum_flag() {
walk.end
.checked_add(4)
.filter(|end| *end <= src.len())
.ok_or(FrameSizeError::Truncated)
} else {
Ok(walk.end)
}
}
struct BlockWalk {
end: usize,
count: u64,
}
fn frame_block_size_max(header: &frame::FrameHeader) -> usize {
let window_size = match header.window_descriptor() {
Some(desc) => {
let exponent = u64::from(desc >> 3);
let mantissa = u64::from(desc & 0x7);
let window_base = 1u64 << (10 + exponent);
window_base + (window_base / 8) * mantissa
}
None => header.frame_content_size(),
};
window_size.min(128 * 1024) as usize
}
fn walk_blocks(
src: &[u8],
start: usize,
max_block_size: usize,
) -> Result<BlockWalk, FrameSizeError> {
let mut offset = start;
let mut count = 0u64;
loop {
let hdr = src
.get(offset..offset + 3)
.ok_or(FrameSizeError::Truncated)?;
let raw = u32::from(hdr[0]) | (u32::from(hdr[1]) << 8) | (u32::from(hdr[2]) << 16);
let last_block = (raw & 1) != 0;
let block_type = (raw >> 1) & 0b11;
let block_size = (raw >> 3) as usize;
let on_disk = match block_type {
1 => 1, 0 | 2 => block_size, _ => return Err(FrameSizeError::ReservedBlock),
};
if block_size > max_block_size {
return Err(FrameSizeError::OversizedBlock);
}
offset = offset
.checked_add(3 + on_disk)
.filter(|end| *end <= src.len())
.ok_or(FrameSizeError::Truncated)?;
count += 1;
if last_block {
break;
}
}
Ok(BlockWalk { end: offset, count })
}
pub fn frame_decompressed_bound(src: &[u8]) -> Result<u64, FrameSizeError> {
let (header, header_len) = match frame::read_frame_header_with_format(src, false) {
Ok(parsed) => parsed,
Err(errors::ReadFrameHeaderError::SkipFrame { length, .. }) => {
return 8usize
.checked_add(length as usize)
.filter(|end| *end <= src.len())
.map(|_| 0)
.ok_or(FrameSizeError::Truncated);
}
Err(e) => return Err(FrameSizeError::Header(e)),
};
let block_size_max = frame_block_size_max(&header);
let walk = walk_blocks(src, header_len as usize, block_size_max)?;
if header.descriptor.content_checksum_flag() {
walk.end
.checked_add(4)
.filter(|end| *end <= src.len())
.ok_or(FrameSizeError::Truncated)?;
}
if header.fcs_declared() {
return Ok(header.frame_content_size());
}
Ok(walk.count.saturating_mul(block_size_max as u64))
}
#[derive(Copy, Clone, Debug)]
pub struct FrameHeaderInfo {
pub content_size: FrameContentSize,
pub window_size: u64,
pub dictionary_id: Option<u32>,
pub content_checksum: bool,
pub header_size: usize,
}
pub fn frame_header_size(src: &[u8]) -> Result<usize, errors::ReadFrameHeaderError> {
let (_header, consumed) = frame::read_frame_header_with_format(src, false)?;
Ok(consumed as usize)
}
pub fn read_frame_header_info(
src: &[u8],
magicless: bool,
) -> Result<FrameHeaderInfo, errors::ReadFrameHeaderError> {
let (header, consumed) = frame::read_frame_header_with_format(src, magicless)?;
let content_size = if header.fcs_declared() {
FrameContentSize::Known(header.frame_content_size())
} else {
FrameContentSize::Unknown
};
let window_size = match header.window_descriptor() {
Some(desc) => {
let exponent = u64::from(desc >> 3);
let mantissa = u64::from(desc & 0x7);
let window_base = 1u64 << (10 + exponent);
window_base + (window_base / 8) * mantissa
}
None => header.frame_content_size(),
};
Ok(FrameHeaderInfo {
content_size,
window_size,
dictionary_id: header.dictionary_id(),
content_checksum: header.descriptor.content_checksum_flag(),
header_size: consumed as usize,
})
}
pub(crate) mod block_decoder;
pub(crate) mod buffer_backend;
pub(crate) mod decode_buffer;
pub(crate) mod dictionary;
pub(crate) mod exec_sequence_inline;
pub(crate) mod flat_buf;
pub(crate) mod frame;
pub(crate) mod literals_section_decoder;
pub(crate) mod prefetch;
mod ringbuffer;
#[allow(dead_code)]
pub(crate) mod scratch;
#[cfg(all(target_arch = "x86_64", feature = "kernel_avx2"))]
pub(crate) mod seq_decoder_avx2;
#[cfg(all(target_arch = "x86_64", feature = "kernel_bmi2"))]
pub(crate) mod seq_decoder_bmi2;
pub(crate) mod seq_decoder_scalar;
#[cfg(all(target_arch = "x86_64", feature = "kernel_vbmi2"))]
pub(crate) mod seq_decoder_vbmi2;
pub(crate) mod sequence_execution;
pub(crate) mod sequence_section_decoder;
pub(crate) mod simd_copy;
#[cfg(feature = "copy_shape_stats")]
pub use simd_copy::shape_stats;
pub(crate) mod user_slice_buf;
#[cfg(feature = "bench_internals")]
pub(crate) use self::simd_copy::copy_bytes_overshooting_for_bench;
#[cfg(test)]
mod frame_inspection_tests {
use super::{
FrameContentSize, FrameSizeError, find_frame_compressed_size, frame_decompressed_bound,
frame_header_size, read_frame_content_size, read_frame_header_info,
};
use crate::encoding::{CompressionLevel, compress_slice_to_vec};
use alloc::vec;
use alloc::vec::Vec;
fn frame(content: &[u8]) -> Vec<u8> {
compress_slice_to_vec(content, CompressionLevel::Default)
}
fn no_fcs_frame() -> Vec<u8> {
vec![
0x28, 0xB5, 0x2F, 0xFD, 0x00, 0x00, 0x19, 0x00, 0x00, 0xAA, 0xBB, 0xCC, ]
}
fn no_fcs_checksum_frame() -> Vec<u8> {
vec![
0x28, 0xB5, 0x2F, 0xFD, 0x04, 0x00, 0x19, 0x00, 0x00, 0xAA, 0xBB, 0xCC, 0xDE, 0xAD, 0xBE, 0xEF, ]
}
fn skippable_frame(payload: &[u8]) -> Vec<u8> {
let mut f = vec![0x50, 0x2A, 0x4D, 0x18];
f.extend_from_slice(&(payload.len() as u32).to_le_bytes());
f.extend_from_slice(payload);
f
}
#[test]
fn read_frame_content_size_reports_declared_size() {
let f = frame(&[42u8; 100]);
assert_eq!(
read_frame_content_size(&f).unwrap(),
FrameContentSize::Known(100)
);
}
#[test]
fn read_frame_content_size_reports_unknown_without_fcs() {
assert_eq!(
read_frame_content_size(&no_fcs_frame()).unwrap(),
FrameContentSize::Unknown
);
}
#[test]
fn read_frame_content_size_errors_on_garbage() {
assert!(read_frame_content_size(&[0xAB; 16]).is_err());
}
#[test]
fn find_frame_compressed_size_spans_one_frame_then_the_next() {
let first = frame(&[5u8; 256]);
assert_eq!(find_frame_compressed_size(&first).unwrap(), first.len());
let mut two = first.clone();
two.extend_from_slice(&frame(&[9u8; 50]));
assert_eq!(find_frame_compressed_size(&two).unwrap(), first.len());
}
#[test]
fn find_frame_compressed_size_measures_skippable_frame() {
let skip = skippable_frame(&[1, 2, 3, 4]);
assert_eq!(find_frame_compressed_size(&skip).unwrap(), skip.len());
}
#[test]
fn find_frame_compressed_size_rejects_truncation() {
let f = frame(&[7u8; 512]);
let err = find_frame_compressed_size(&f[..f.len() - 4]).unwrap_err();
assert!(matches!(err, FrameSizeError::Truncated));
}
#[test]
fn frame_header_size_matches_first_block_offset() {
let f = frame(&[3u8; 2048]);
let hdr = frame_header_size(&f).unwrap();
assert!((5..=18).contains(&hdr));
assert!(frame_header_size(&[0u8; 2]).is_err());
}
#[test]
fn read_frame_header_info_fills_declared_fields() {
let f = frame(&[7u8; 512]);
let info = read_frame_header_info(&f, false).unwrap();
assert_eq!(info.content_size, FrameContentSize::Known(512));
assert!(info.window_size >= 512);
assert_eq!(info.dictionary_id, None);
}
#[test]
fn read_frame_header_info_derives_window_without_fcs() {
let info = read_frame_header_info(&no_fcs_frame(), false).unwrap();
assert_eq!(info.content_size, FrameContentSize::Unknown);
assert_eq!(info.window_size, 1024);
}
#[test]
fn frame_decompressed_bound_returns_declared_size() {
let f = frame(&[4u8; 4096]);
assert_eq!(frame_decompressed_bound(&f).unwrap(), 4096);
}
#[test]
fn frame_decompressed_bound_uses_block_bound_without_fcs() {
assert_eq!(frame_decompressed_bound(&no_fcs_frame()).unwrap(), 1024);
}
#[test]
fn frame_decompressed_bound_accepts_present_checksum_trailer() {
assert_eq!(
frame_decompressed_bound(&no_fcs_checksum_frame()).unwrap(),
1024
);
}
#[test]
fn frame_decompressed_bound_rejects_missing_checksum_trailer() {
let mut f = no_fcs_checksum_frame();
f.truncate(f.len() - 4); assert!(matches!(
frame_decompressed_bound(&f).unwrap_err(),
FrameSizeError::Truncated
));
}
#[test]
fn size_helpers_reject_oversized_block_header() {
let block_size = 2000usize;
let raw = ((block_size as u32) << 3) | 1; let mut f = vec![
0x28,
0xB5,
0x2F,
0xFD, 0x00, 0x00, (raw & 0xFF) as u8,
((raw >> 8) & 0xFF) as u8,
((raw >> 16) & 0xFF) as u8,
];
f.resize(f.len() + block_size, 0xAB);
assert!(matches!(
find_frame_compressed_size(&f).unwrap_err(),
FrameSizeError::OversizedBlock
));
assert!(matches!(
frame_decompressed_bound(&f).unwrap_err(),
FrameSizeError::OversizedBlock
));
}
#[test]
fn frame_decompressed_bound_handles_skippable_frame() {
assert_eq!(
frame_decompressed_bound(&skippable_frame(&[0u8; 8])).unwrap(),
0
);
let mut short = skippable_frame(&[0u8; 8]);
short.truncate(short.len() - 2);
assert!(matches!(
frame_decompressed_bound(&short).unwrap_err(),
FrameSizeError::Truncated
));
}
#[test]
fn frame_decompressed_bound_errors_on_garbage_header() {
assert!(frame_decompressed_bound(&[0xAB; 16]).is_err());
}
}