#![allow(clippy::needless_range_loop)]
use std::io::{self, Write};
use std::sync::atomic::{AtomicUsize, Ordering};
#[cfg(any(test, feature = "counters"))]
pub mod hot_counters {
use std::sync::atomic::{AtomicU64, Ordering};
static DYNAMIC_BLOCKS: AtomicU64 = AtomicU64::new(0);
static MULTI_SYM_BLOCKS: AtomicU64 = AtomicU64::new(0);
static STANDARD_BLOCKS: AtomicU64 = AtomicU64::new(0);
pub fn reset() {
DYNAMIC_BLOCKS.store(0, Ordering::SeqCst);
MULTI_SYM_BLOCKS.store(0, Ordering::SeqCst);
STANDARD_BLOCKS.store(0, Ordering::SeqCst);
}
pub fn snapshot() -> (u64, u64, u64) {
(
DYNAMIC_BLOCKS.load(Ordering::SeqCst),
MULTI_SYM_BLOCKS.load(Ordering::SeqCst),
STANDARD_BLOCKS.load(Ordering::SeqCst),
)
}
#[inline(always)]
pub fn inc_dynamic() {
DYNAMIC_BLOCKS.fetch_add(1, Ordering::Relaxed);
}
#[inline(always)]
pub fn inc_multi_sym() {
MULTI_SYM_BLOCKS.fetch_add(1, Ordering::Relaxed);
}
#[inline(always)]
pub fn inc_standard() {
STANDARD_BLOCKS.fetch_add(1, Ordering::Relaxed);
}
}
use crate::decompress::combined_lut::CombinedLUT;
use crate::decompress::inflate_tables::CODE_LENGTH_ORDER;
use crate::decompress::packed_lut::PackedLUT;
#[allow(unused_imports)]
use crate::decompress::two_level_table::{FastBits, TurboBits, TwoLevelTable};
#[allow(dead_code)]
#[derive(Default)]
pub struct DecodeTrace {
pub literals: u64,
pub matches: u64,
pub fast_literals: u64,
pub fast_matches: u64,
pub slow_matches: u64,
pub slow_lit_len: u64,
pub match_bytes: u64,
pub dist_1: u64,
pub dist_2_7: u64,
pub dist_8_39: u64,
pub dist_40_plus: u64,
pub len_1_8: u64,
pub len_9_32: u64,
pub len_33_plus: u64,
pub refills: u64,
pub eob_count: u64,
}
#[allow(dead_code)]
impl DecodeTrace {
pub fn print_summary(&self, output_bytes: usize, elapsed_ns: u64) {
let mb_per_sec = output_bytes as f64 / (elapsed_ns as f64 / 1_000_000_000.0) / 1_000_000.0;
let total_symbols = self.literals + self.matches;
let ns_per_symbol = elapsed_ns.checked_div(total_symbols).unwrap_or(0);
eprintln!("\n=== DECODE TRACE ===");
eprintln!(
"Output: {} bytes in {:.2}ms = {:.1} MB/s",
output_bytes,
elapsed_ns as f64 / 1_000_000.0,
mb_per_sec
);
eprintln!("\nSymbols:");
eprintln!(
" Literals: {} ({} fast chain, {:.1}% fast)",
self.literals,
self.fast_literals,
if self.literals > 0 {
self.fast_literals as f64 / self.literals as f64 * 100.0
} else {
0.0
}
);
eprintln!(
" Matches: {} ({} fast, {} slow, {:.1}% fast)",
self.matches,
self.fast_matches,
self.slow_matches,
if self.matches > 0 {
self.fast_matches as f64 / self.matches as f64 * 100.0
} else {
0.0
}
);
eprintln!(" Slow lit/len: {}", self.slow_lit_len);
eprintln!("\nDistance distribution:");
let total_dist = self.dist_1 + self.dist_2_7 + self.dist_8_39 + self.dist_40_plus;
if total_dist > 0 {
eprintln!(
" d=1 (RLE): {} ({:.1}%)",
self.dist_1,
self.dist_1 as f64 / total_dist as f64 * 100.0
);
eprintln!(
" d=2-7: {} ({:.1}%)",
self.dist_2_7,
self.dist_2_7 as f64 / total_dist as f64 * 100.0
);
eprintln!(
" d=8-39: {} ({:.1}%)",
self.dist_8_39,
self.dist_8_39 as f64 / total_dist as f64 * 100.0
);
eprintln!(
" d>=40: {} ({:.1}%)",
self.dist_40_plus,
self.dist_40_plus as f64 / total_dist as f64 * 100.0
);
}
eprintln!("\nLength distribution:");
let total_len = self.len_1_8 + self.len_9_32 + self.len_33_plus;
if total_len > 0 {
eprintln!(
" len 3-8: {} ({:.1}%)",
self.len_1_8,
self.len_1_8 as f64 / total_len as f64 * 100.0
);
eprintln!(
" len 9-32: {} ({:.1}%)",
self.len_9_32,
self.len_9_32 as f64 / total_len as f64 * 100.0
);
eprintln!(
" len 33+: {} ({:.1}%)",
self.len_33_plus,
self.len_33_plus as f64 / total_len as f64 * 100.0
);
}
eprintln!("\nCopy stats:");
eprintln!(
" Match bytes: {} ({:.1} bytes/match avg)",
self.match_bytes,
if self.matches > 0 {
self.match_bytes as f64 / self.matches as f64
} else {
0.0
}
);
eprintln!("\nOverhead:");
eprintln!(" Bit refills: {}", self.refills);
eprintln!(" EOB count: {}", self.eob_count);
eprintln!(" ns/symbol: {}", ns_per_symbol);
eprintln!(
" symbols/byte: {:.2}",
total_symbols as f64 / output_bytes as f64
);
eprintln!("====================\n");
}
}
use std::cell::RefCell;
thread_local! {
static DECODE_TRACE: RefCell<DecodeTrace> = RefCell::new(DecodeTrace::default());
}
#[inline]
fn tracing_enabled() -> bool {
static ENABLED: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
*ENABLED.get_or_init(|| std::env::var("GZIPPY_TRACE").is_ok())
}
#[allow(dead_code)]
fn reset_trace() {
DECODE_TRACE.with(|t| *t.borrow_mut() = DecodeTrace::default());
}
#[allow(dead_code)]
fn take_trace() -> DecodeTrace {
DECODE_TRACE.with(|t| std::mem::take(&mut *t.borrow_mut()))
}
#[inline(always)]
fn trace_match(_distance: usize, _length: usize) {
#[cold]
#[inline(never)]
fn trace_match_slow(distance: usize, length: usize) {
DECODE_TRACE.with(|t| {
let mut trace = t.borrow_mut();
trace.matches += 1;
trace.match_bytes += length as u64;
if distance == 1 {
trace.dist_1 += 1;
} else if distance <= 7 {
trace.dist_2_7 += 1;
} else if distance <= 39 {
trace.dist_8_39 += 1;
} else {
trace.dist_40_plus += 1;
}
if length <= 8 {
trace.len_1_8 += 1;
} else if length <= 32 {
trace.len_9_32 += 1;
} else {
trace.len_33_plus += 1;
}
});
}
if tracing_enabled() {
trace_match_slow(_distance, _length);
}
}
#[allow(dead_code)]
#[inline]
fn trace_literals(count: u64, fast: bool) {
if !tracing_enabled() {
return;
}
DECODE_TRACE.with(|t| {
let mut trace = t.borrow_mut();
trace.literals += count;
if fast {
trace.fast_literals += count;
}
});
}
#[allow(dead_code)]
#[inline]
fn trace_slow_path() {
if !tracing_enabled() {
return;
}
DECODE_TRACE.with(|t| {
t.borrow_mut().slow_lit_len += 1;
});
}
#[derive(Debug, Clone)]
struct BgzfBlock {
start: usize,
length: usize,
isize: u32,
output_offset: usize,
deflate_start: usize,
}
fn parse_bgzf_blocks(data: &[u8]) -> io::Result<Vec<BgzfBlock>> {
let mut blocks = Vec::new();
let mut offset = 0;
let mut output_offset = 0;
while offset + 18 < data.len() {
if data[offset] != 0x1f || data[offset + 1] != 0x8b {
break;
}
if data[offset + 3] & 0x04 == 0 {
break;
}
if offset + 12 > data.len() {
break;
}
let xlen = u16::from_le_bytes([data[offset + 10], data[offset + 11]]) as usize;
if offset + 12 + xlen > data.len() {
break;
}
let extra_start = offset + 12;
let extra_field = &data[extra_start..extra_start + xlen];
let mut block_size = None;
let mut pos = 0;
while pos + 4 <= extra_field.len() {
let subfield_id = &extra_field[pos..pos + 2];
let subfield_len =
u16::from_le_bytes([extra_field[pos + 2], extra_field[pos + 3]]) as usize;
if subfield_id == b"GZ" {
if subfield_len == 4 && pos + 8 <= extra_field.len() {
let size = u32::from_le_bytes([
extra_field[pos + 4],
extra_field[pos + 5],
extra_field[pos + 6],
extra_field[pos + 7],
]) as usize;
if size > 0 {
block_size = Some(size);
}
break;
} else if subfield_len == 2 && pos + 6 <= extra_field.len() {
let size_minus_1 =
u16::from_le_bytes([extra_field[pos + 4], extra_field[pos + 5]]) as usize;
block_size = Some(size_minus_1 + 1);
break;
}
}
pos += 4 + subfield_len;
}
let length = match block_size {
Some(l) if l > 0 && offset + l <= data.len() => l,
_ => break,
};
let isize = if length >= 8 {
let trailer_start = offset + length - 4;
u32::from_le_bytes([
data[trailer_start],
data[trailer_start + 1],
data[trailer_start + 2],
data[trailer_start + 3],
])
} else {
0
};
let mut deflate_start = offset + 12 + xlen;
let flags = data[offset + 3];
if flags & 0x08 != 0 {
while deflate_start < offset + length && data[deflate_start] != 0 {
deflate_start += 1;
}
deflate_start += 1; }
if flags & 0x10 != 0 {
while deflate_start < offset + length && data[deflate_start] != 0 {
deflate_start += 1;
}
deflate_start += 1;
}
if flags & 0x02 != 0 {
deflate_start += 2;
}
blocks.push(BgzfBlock {
start: offset,
length,
isize,
output_offset,
deflate_start,
});
output_offset += isize as usize;
offset += length;
}
if blocks.is_empty() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"No BGZF blocks found",
));
}
Ok(blocks)
}
fn inflate_into(deflate_data: &[u8], output: &mut [u8]) -> io::Result<usize> {
inflate_into_libdeflate(deflate_data, output)
}
fn inflate_into_libdeflate(deflate_data: &[u8], output: &mut [u8]) -> io::Result<usize> {
let decompressor = unsafe { libdeflate_sys::libdeflate_alloc_decompressor() };
if decompressor.is_null() {
return Err(io::Error::other(
"failed to allocate libdeflate decompressor",
));
}
let mut actual_out = 0usize;
let result = unsafe {
libdeflate_sys::libdeflate_deflate_decompress(
decompressor,
deflate_data.as_ptr() as *const std::ffi::c_void,
deflate_data.len(),
output.as_mut_ptr() as *mut std::ffi::c_void,
output.len(),
&mut actual_out,
)
};
unsafe {
libdeflate_sys::libdeflate_free_decompressor(decompressor);
}
match result {
libdeflate_sys::libdeflate_result_LIBDEFLATE_SUCCESS => Ok(actual_out),
libdeflate_sys::libdeflate_result_LIBDEFLATE_BAD_DATA => Err(io::Error::new(
io::ErrorKind::InvalidData,
"invalid deflate data",
)),
libdeflate_sys::libdeflate_result_LIBDEFLATE_INSUFFICIENT_SPACE => Err(io::Error::new(
io::ErrorKind::WriteZero,
"output buffer too small",
)),
_ => Err(io::Error::other("unknown libdeflate error")),
}
}
pub fn inflate_into_pub(deflate_data: &[u8], output: &mut [u8]) -> io::Result<usize> {
inflate_into(deflate_data, output)
}
fn decode_stored_into(
bits: &mut FastBits,
output: &mut [u8],
mut out_pos: usize,
) -> io::Result<usize> {
bits.align();
bits.refill();
let len = bits.read(16) as usize;
let nlen = bits.read(16) as usize;
if len != (!nlen & 0xFFFF) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Stored block length mismatch",
));
}
for _ in 0..len {
if out_pos >= output.len() {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"Output buffer full",
));
}
bits.ensure(8);
output[out_pos] = bits.read(8) as u8;
out_pos += 1;
}
Ok(out_pos)
}
fn get_fixed_lit_len_lens() -> [u8; 288] {
let mut lens = [0u8; 288];
for i in 0..144 {
lens[i] = 8;
}
for i in 144..256 {
lens[i] = 9;
}
for i in 256..280 {
lens[i] = 7;
}
for i in 280..288 {
lens[i] = 8;
}
lens
}
fn get_fixed_tables() -> (
&'static TwoLevelTable,
&'static TwoLevelTable,
&'static CombinedLUT,
) {
use std::sync::OnceLock;
static FIXED_LIT_LEN: OnceLock<TwoLevelTable> = OnceLock::new();
static FIXED_DIST: OnceLock<TwoLevelTable> = OnceLock::new();
static FIXED_COMBINED: OnceLock<CombinedLUT> = OnceLock::new();
let lit_len =
FIXED_LIT_LEN.get_or_init(|| TwoLevelTable::build(&get_fixed_lit_len_lens()).unwrap());
let dist = FIXED_DIST.get_or_init(|| {
let lens = [5u8; 32];
TwoLevelTable::build(&lens).unwrap()
});
let combined = FIXED_COMBINED.get_or_init(|| {
let dist_lens = vec![5u8; 32];
CombinedLUT::build(&get_fixed_lit_len_lens(), &dist_lens).unwrap()
});
(lit_len, dist, combined)
}
#[allow(dead_code)]
fn get_fixed_tables_turbo() -> (
&'static TwoLevelTable,
&'static TwoLevelTable,
&'static PackedLUT,
) {
use std::sync::OnceLock;
static FIXED_LIT_LEN: OnceLock<TwoLevelTable> = OnceLock::new();
static FIXED_DIST: OnceLock<TwoLevelTable> = OnceLock::new();
static FIXED_PACKED: OnceLock<PackedLUT> = OnceLock::new();
let lit_len =
FIXED_LIT_LEN.get_or_init(|| TwoLevelTable::build(&get_fixed_lit_len_lens()).unwrap());
let dist = FIXED_DIST.get_or_init(|| {
let lens = [5u8; 32];
TwoLevelTable::build(&lens).unwrap()
});
let packed = FIXED_PACKED.get_or_init(|| {
let dist_lens = vec![5u8; 32];
PackedLUT::build(&get_fixed_lit_len_lens(), &dist_lens).unwrap()
});
(lit_len, dist, packed)
}
fn decode_fixed_into(bits: &mut FastBits, output: &mut [u8], out_pos: usize) -> io::Result<usize> {
let (lit_len_table, dist_table, combined_lut) = get_fixed_tables();
decode_huffman_into(
bits,
output,
out_pos,
combined_lut,
lit_len_table,
dist_table,
)
}
fn decode_dynamic_into(
bits: &mut FastBits,
output: &mut [u8],
out_pos: usize,
) -> io::Result<usize> {
bits.ensure(16);
let hlit = bits.read(5) as usize + 257;
let hdist = bits.read(5) as usize + 1;
let hclen = bits.read(4) as usize + 4;
let mut code_len_lens = [0u8; 19];
for i in 0..hclen {
bits.ensure(8);
code_len_lens[CODE_LENGTH_ORDER[i] as usize] = bits.read(3) as u8;
}
let code_len_table = TwoLevelTable::build(&code_len_lens)?;
let total_codes = hlit + hdist;
let mut code_lens = vec![0u8; total_codes];
let mut i = 0;
while i < total_codes {
bits.ensure(16);
let (symbol, sym_len) = code_len_table.decode(bits.buffer());
if sym_len == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid code length code",
));
}
bits.consume(sym_len);
match symbol {
0..=15 => {
code_lens[i] = symbol as u8;
i += 1;
}
16 => {
if i == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid repeat"));
}
let repeat = 3 + bits.read(2) as usize;
let last = code_lens[i - 1];
for _ in 0..repeat.min(total_codes - i) {
code_lens[i] = last;
i += 1;
}
}
17 => {
let repeat = 3 + bits.read(3) as usize;
i += repeat.min(total_codes - i);
}
18 => {
let repeat = 11 + bits.read(7) as usize;
i += repeat.min(total_codes - i);
}
_ => return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid code")),
}
}
let lit_len_table = TwoLevelTable::build(&code_lens[..hlit])?;
let dist_table = TwoLevelTable::build(&code_lens[hlit..])?;
let combined_lut = CombinedLUT::build(&code_lens[..hlit], &code_lens[hlit..])?;
let max_lit_len = code_lens[..hlit].iter().copied().max().unwrap_or(0);
let use_multi_sym = max_lit_len <= 6 && max_lit_len > 0;
#[cfg(any(test, feature = "counters"))]
hot_counters::inc_dynamic();
if use_multi_sym {
if let Ok(multi_sym_table) =
crate::decompress::simd_huffman::MultiSymTable::build(&code_lens[..hlit])
{
#[cfg(any(test, feature = "counters"))]
hot_counters::inc_multi_sym();
return decode_huffman_multi_sym(
bits,
output,
out_pos,
&multi_sym_table,
&combined_lut,
&lit_len_table,
&dist_table,
);
}
}
#[cfg(any(test, feature = "counters"))]
hot_counters::inc_standard();
decode_huffman_into(
bits,
output,
out_pos,
&combined_lut,
&lit_len_table,
&dist_table,
)
}
fn decode_huffman_multi_sym(
bits: &mut FastBits,
output: &mut [u8],
mut out_pos: usize,
multi_sym_table: &crate::decompress::simd_huffman::MultiSymTable,
_combined_lut: &CombinedLUT,
lit_len_table: &TwoLevelTable,
dist_table: &TwoLevelTable,
) -> io::Result<usize> {
use crate::decompress::inflate_tables::{
DIST_EXTRA_BITS, DIST_START, LEN_EXTRA_BITS, LEN_START,
};
loop {
bits.ensure(32);
let entry = multi_sym_table.lookup(bits.buffer());
if entry.sym_count > 0 && entry.total_bits > 0 {
bits.consume(entry.total_bits as u32);
match entry.sym_count {
1 => {
if out_pos >= output.len() {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"Output buffer full",
));
}
output[out_pos] = entry.sym1;
out_pos += 1;
}
2 => {
if out_pos + 1 >= output.len() {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"Output buffer full",
));
}
output[out_pos] = entry.sym1;
output[out_pos + 1] = entry.sym2;
out_pos += 2;
}
3 => {
if out_pos + 2 >= output.len() {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"Output buffer full",
));
}
output[out_pos] = entry.sym1;
output[out_pos + 1] = entry.sym2;
output[out_pos + 2] = entry.sym3;
out_pos += 3;
}
4 => {
if out_pos + 3 >= output.len() {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"Output buffer full",
));
}
output[out_pos] = entry.sym1;
output[out_pos + 1] = entry.sym2;
output[out_pos + 2] = entry.sym3;
output[out_pos + 3] = entry.sym4;
out_pos += 4;
}
_ => {}
}
continue;
}
if entry.total_bits > 0 {
bits.consume(entry.total_bits as u32);
let symbol = entry.symbol();
if symbol == 256 {
break;
}
let len_idx = (symbol - 257) as usize;
if len_idx >= 29 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid length code",
));
}
bits.ensure(16);
let length =
LEN_START[len_idx] as usize + bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 || dist_sym >= 30 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance code",
));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos || distance == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
} else {
let (symbol, code_len) = lit_len_table.decode(bits.buffer());
if code_len == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid Huffman code",
));
}
bits.consume(code_len);
if symbol < 256 {
if out_pos >= output.len() {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"Output buffer full",
));
}
output[out_pos] = symbol as u8;
out_pos += 1;
} else if symbol == 256 {
break;
} else {
let len_idx = (symbol - 257) as usize;
if len_idx >= 29 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid length code",
));
}
bits.ensure(16);
let length = LEN_START[len_idx] as usize
+ bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 || dist_sym >= 30 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance code",
));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos || distance == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
}
}
}
Ok(out_pos)
}
fn decode_huffman_into(
bits: &mut FastBits,
output: &mut [u8],
mut out_pos: usize,
combined_lut: &CombinedLUT,
lit_len_table: &TwoLevelTable,
dist_table: &TwoLevelTable,
) -> io::Result<usize> {
use crate::decompress::combined_lut::{DIST_END_OF_BLOCK, DIST_LITERAL, DIST_SLOW_PATH};
use crate::decompress::inflate_tables::{
DIST_EXTRA_BITS, DIST_START, LEN_EXTRA_BITS, LEN_START,
};
#[cold]
#[inline(never)]
fn cold_path() {}
#[inline(always)]
fn likely(b: bool) -> bool {
if !b {
cold_path();
}
b
}
#[inline(always)]
fn unlikely(b: bool) -> bool {
if b {
cold_path();
}
b
}
#[inline(always)]
#[allow(unused_variables)]
fn prefetch_output(output: &[u8], pos: usize) {
#[cfg(target_arch = "x86_64")]
if pos + 64 < output.len() {
unsafe {
std::arch::x86_64::_mm_prefetch(
output.as_ptr().add(pos + 64) as *const i8,
std::arch::x86_64::_MM_HINT_T0,
);
}
}
}
loop {
bits.ensure(32);
prefetch_output(output, out_pos);
let entry = combined_lut.decode(bits.buffer());
if unlikely(entry.bits_to_skip == 0) {
let (symbol, code_len) = lit_len_table.decode(bits.buffer());
if code_len == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid Huffman code",
));
}
bits.consume(code_len);
if likely(symbol < 256) {
if unlikely(out_pos >= output.len()) {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"Output buffer full",
));
}
output[out_pos] = symbol as u8;
out_pos += 1;
continue;
}
if unlikely(symbol == 256) {
break;
}
let len_idx = (symbol - 257) as usize;
if unlikely(len_idx >= 29) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid length code",
));
}
bits.ensure(16);
let length =
LEN_START[len_idx] as usize + bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if unlikely(dist_len == 0 || dist_sym >= 30) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance code",
));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if unlikely(distance > out_pos || distance == 0) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
bits.consume(entry.bits_to_skip as u32);
match entry.distance {
DIST_LITERAL => {
if unlikely(out_pos >= output.len()) {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"Output buffer full",
));
}
output[out_pos] = entry.symbol_or_length;
out_pos += 1;
while likely(bits.bits_available() >= 12 && out_pos + 8 <= output.len()) {
let e = combined_lut.decode(bits.buffer());
if e.bits_to_skip == 0 || e.distance != DIST_LITERAL {
break;
}
bits.consume(e.bits_to_skip as u32);
output[out_pos] = e.symbol_or_length;
out_pos += 1;
}
}
DIST_END_OF_BLOCK => break,
DIST_SLOW_PATH => {
let length = entry.symbol_or_length as usize + 3;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 || dist_sym >= 30 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance code",
));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if unlikely(distance > out_pos || distance == 0) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
}
distance => {
let length = entry.length();
let dist = distance as usize;
if dist > out_pos || dist == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, dist, length);
}
}
}
Ok(out_pos)
}
#[allow(dead_code)]
#[inline(never)]
fn decode_huffman_consume_first(
bits: &mut crate::decompress::two_level_table::TurboBits,
output: &mut [u8],
mut out_pos: usize,
lit_table: &crate::decompress::inflate::consume_first_table::ConsumeFirstTable,
dist_table: &crate::decompress::inflate::consume_first_table::ConsumeFirstTable,
) -> io::Result<usize> {
use crate::decompress::inflate::consume_first_table::CFEntry;
use crate::decompress::inflate_tables::{
DIST_EXTRA_BITS, DIST_START, LEN_EXTRA_BITS, LEN_START,
};
let out_end = output.len();
let fastloop_end = out_end.saturating_sub(320);
let mut iterations = 0u64;
let max_iterations = (out_end as u64 * 2).max(100_000);
#[inline(always)]
fn resolve_entry(
bits: &mut crate::decompress::two_level_table::TurboBits,
table: &crate::decompress::inflate::consume_first_table::ConsumeFirstTable,
) -> CFEntry {
let entry = table.lookup_main(bits.buffer());
bits.consume(entry.bits());
if entry.is_subtable() {
let sub_entry = table.lookup_sub(entry, bits.buffer());
bits.consume(sub_entry.bits());
sub_entry
} else {
entry
}
}
while out_pos < fastloop_end {
iterations += 1;
if iterations > max_iterations {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Infinite loop detected at out_pos={}", out_pos),
));
}
bits.ensure(56);
let entry = resolve_entry(bits, lit_table);
if entry.is_literal() {
output[out_pos] = entry.symbol() as u8;
out_pos += 1;
bits.ensure(32);
let e2 = resolve_entry(bits, lit_table);
if e2.is_literal() {
output[out_pos] = e2.symbol() as u8;
out_pos += 1;
let e3 = resolve_entry(bits, lit_table);
if e3.is_literal() {
output[out_pos] = e3.symbol() as u8;
out_pos += 1;
while bits.has_bits(24) {
let e = resolve_entry(bits, lit_table);
if e.is_literal() {
output[out_pos] = e.symbol() as u8;
out_pos += 1;
} else if e.is_eob() {
return Ok(out_pos);
} else if e.is_length() {
let len_idx = (e.symbol() - 257) as usize;
bits.ensure(16);
let length = LEN_START[len_idx] as usize
+ bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let d = resolve_entry(bits, dist_table);
let dist_sym = d.symbol();
if dist_sym >= 30 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance symbol",
));
}
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance == 0 || distance > out_pos {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
break;
} else {
break;
}
}
continue;
}
if e3.is_eob() {
return Ok(out_pos);
}
if e3.is_length() {
let len_idx = (e3.symbol() - 257) as usize;
bits.ensure(16);
let length = LEN_START[len_idx] as usize
+ bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let d = resolve_entry(bits, dist_table);
let dist_sym = d.symbol();
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance == 0 || distance > out_pos {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid distance: {} at out_pos={}", distance, out_pos),
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
}
continue;
}
if e2.is_eob() {
return Ok(out_pos);
}
if e2.is_length() {
let len_idx = (e2.symbol() - 257) as usize;
bits.ensure(16);
let length = LEN_START[len_idx] as usize
+ bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let d = resolve_entry(bits, dist_table);
let dist_sym = d.symbol();
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance == 0 || distance > out_pos {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid distance: {} at out_pos={}", distance, out_pos),
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
}
continue;
}
if entry.is_eob() {
return Ok(out_pos);
}
if entry.is_length() {
let len_sym = entry.symbol();
if !(257..=285).contains(&len_sym) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid length symbol: {} at out_pos={}", len_sym, out_pos),
));
}
let len_idx = (len_sym - 257) as usize;
bits.ensure(16);
let length =
LEN_START[len_idx] as usize + bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let d = resolve_entry(bits, dist_table);
let dist_sym = d.symbol();
if dist_sym >= 30 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Invalid distance symbol: {} at out_pos={}",
dist_sym, out_pos
),
));
}
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance == 0 || distance > out_pos {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid distance: {} at out_pos={}", distance, out_pos),
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
}
}
loop {
iterations += 1;
if iterations > max_iterations {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Infinite loop in generic at out_pos={}", out_pos),
));
}
bits.ensure(32);
let entry = resolve_entry(bits, lit_table);
if entry.is_literal() {
if out_pos >= out_end {
return Err(io::Error::new(io::ErrorKind::WriteZero, "Output full"));
}
output[out_pos] = entry.symbol() as u8;
out_pos += 1;
continue;
}
if entry.is_eob() {
return Ok(out_pos);
}
let len_symbol = entry.symbol();
if !(257..=285).contains(&len_symbol) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid length code: {}", len_symbol),
));
}
let len_idx = (len_symbol - 257) as usize;
bits.ensure(16);
let length =
LEN_START[len_idx] as usize + bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let d = resolve_entry(bits, dist_table);
let dist_sym = d.symbol();
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance == 0 || distance > out_pos {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid distance: {} at out_pos={}", distance, out_pos),
));
}
if out_pos + length > out_end {
return Err(io::Error::new(io::ErrorKind::WriteZero, "Output full"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
}
}
#[allow(dead_code)]
#[inline(never)]
fn decode_huffman_turbo(
bits: &mut crate::decompress::two_level_table::TurboBits,
output: &mut [u8],
mut out_pos: usize,
packed_lut: &crate::decompress::packed_lut::PackedLUT,
lit_len_table: &TwoLevelTable,
dist_table: &TwoLevelTable,
) -> io::Result<usize> {
use crate::decompress::inflate_tables::{
DIST_EXTRA_BITS, DIST_START, LEN_EXTRA_BITS, LEN_START,
};
const BITS_MASK: u32 = 0xFF;
const SYMBOL_SHIFT: u32 = 23;
const DIST_SHIFT: u32 = 8;
const DIST_MASK: u32 = 0x7FFF << DIST_SHIFT;
const DIST_EOB: u32 = 0x7FFF << DIST_SHIFT;
const DIST_SLOW: u32 = 0x7FFE << DIST_SHIFT;
const LUT_MASK: u64 = 0xFFF;
let out_end = output.len();
let fastloop_end = out_end.saturating_sub(320);
let table = &packed_lut.table;
while out_pos < fastloop_end {
bits.ensure(56);
let mut entry = table[(bits.buffer() & LUT_MASK) as usize].0;
if (entry as i32) < 0 && (entry & BITS_MASK) != 0 {
bits.consume_entry(entry);
let lit1 = ((entry >> SYMBOL_SHIFT) & 0xFF) as u8;
entry = table[(bits.buffer() & LUT_MASK) as usize].0;
output[out_pos] = lit1;
out_pos += 1;
if (entry as i32) < 0 && (entry & BITS_MASK) != 0 {
bits.consume_entry(entry);
let lit2 = ((entry >> SYMBOL_SHIFT) & 0xFF) as u8;
entry = table[(bits.buffer() & LUT_MASK) as usize].0;
output[out_pos] = lit2;
out_pos += 1;
if (entry as i32) < 0 && (entry & BITS_MASK) != 0 {
bits.consume_entry(entry);
let lit3 = ((entry >> SYMBOL_SHIFT) & 0xFF) as u8;
output[out_pos] = lit3;
out_pos += 1;
let mut e = table[(bits.buffer() & LUT_MASK) as usize].0;
while bits.has_bits(24) && (e as i32) < 0 && (e & BITS_MASK) != 0 {
bits.consume_entry(e);
let lit = ((e >> SYMBOL_SHIFT) & 0xFF) as u8;
e = table[(bits.buffer() & LUT_MASK) as usize].0;
output[out_pos] = lit;
out_pos += 1;
}
continue;
}
} else {
}
if entry & BITS_MASK == 0 {
let (symbol, code_len) = lit_len_table.decode(bits.buffer());
if code_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid code"));
}
bits.consume(code_len);
if symbol == 256 {
return Ok(out_pos);
}
if symbol < 256 {
output[out_pos] = symbol as u8;
out_pos += 1;
continue;
}
let len_idx = (symbol - 257) as usize;
bits.ensure(16);
let length = LEN_START[len_idx] as usize
+ bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid dist"));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
bits.consume_entry(entry);
let dist_field = entry & DIST_MASK;
if dist_field == DIST_EOB {
return Ok(out_pos);
}
if dist_field == DIST_SLOW {
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid dist"));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
let distance = (dist_field >> DIST_SHIFT) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
if entry & BITS_MASK == 0 {
let (symbol, code_len) = lit_len_table.decode(bits.buffer());
if code_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid code"));
}
bits.consume(code_len);
if symbol < 256 {
output[out_pos] = symbol as u8;
out_pos += 1;
continue;
}
if symbol == 256 {
return Ok(out_pos);
}
let len_idx = (symbol - 257) as usize;
bits.ensure(16);
let length =
LEN_START[len_idx] as usize + bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid dist"));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
bits.consume_entry(entry);
let dist_field = entry & DIST_MASK;
if dist_field == DIST_EOB {
return Ok(out_pos);
}
if dist_field == DIST_SLOW {
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
let next_entry_preload = table[(bits.buffer() & LUT_MASK) as usize].0;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid dist"));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
let _ = next_entry_preload;
continue;
}
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
let distance = (dist_field >> DIST_SHIFT) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
let src_start = out_pos - distance;
if distance >= 8 {
let mut copied = 0;
while copied < 40 && copied < length {
let src = src_start + copied;
let dst = out_pos + copied;
if dst + 8 <= output.len() && src + 8 <= output.len() {
unsafe {
let word = (output.as_ptr().add(src) as *const u64).read_unaligned();
(output.as_mut_ptr().add(dst) as *mut u64).write_unaligned(word);
}
}
copied += 8;
}
for i in 40.min(length)..length {
output[out_pos + i] = output[src_start + i];
}
} else if distance == 1 {
let byte = output[src_start];
for i in 0..length {
output[out_pos + i] = byte;
}
} else {
for i in 0..length {
output[out_pos + i] = output[src_start + i];
}
}
out_pos += length;
}
loop {
bits.ensure(32);
let entry = table[(bits.buffer() & LUT_MASK) as usize].0;
if entry & BITS_MASK == 0 {
let (symbol, code_len) = lit_len_table.decode(bits.buffer());
if code_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid code"));
}
bits.consume(code_len);
if symbol < 256 {
if out_pos >= out_end {
return Err(io::Error::new(io::ErrorKind::WriteZero, "Output full"));
}
output[out_pos] = symbol as u8;
out_pos += 1;
continue;
}
if symbol == 256 {
return Ok(out_pos);
}
let len_idx = (symbol - 257) as usize;
bits.ensure(16);
let length =
LEN_START[len_idx] as usize + bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid dist"));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
bits.consume_entry(entry);
if (entry as i32) < 0 {
if out_pos >= out_end {
return Err(io::Error::new(io::ErrorKind::WriteZero, "Output full"));
}
output[out_pos] = ((entry >> SYMBOL_SHIFT) & 0xFF) as u8;
out_pos += 1;
continue;
}
let dist_field = entry & DIST_MASK;
if dist_field == DIST_EOB {
return Ok(out_pos);
}
if dist_field == DIST_SLOW {
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid dist"));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
let distance = (dist_field >> DIST_SHIFT) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "bmi2")]
#[allow(dead_code)]
#[inline(never)]
unsafe fn decode_huffman_asm_x64(
compressed: &[u8],
output: &mut [u8],
mut out_pos: usize,
packed_lut: &crate::decompress::packed_lut::PackedLUT,
dist_table: &TwoLevelTable,
) -> io::Result<usize> {
use crate::decompress::inflate_tables::{DIST_EXTRA_BITS, DIST_START};
const LUT_MASK: u64 = 0xFFF;
const BITS_MASK: u32 = 0xFF;
const SYMBOL_SHIFT: u32 = 23;
const DIST_SHIFT: u32 = 8;
const DIST_MASK: u32 = 0x7FFF << DIST_SHIFT;
const DIST_EOB: u32 = 0x7FFF << DIST_SHIFT;
const DIST_SLOW: u32 = 0x7FFE << DIST_SHIFT;
let out_end = output.len();
let fastloop_end = out_end.saturating_sub(320);
let table = packed_lut.table.as_ptr();
let mut pos: usize = 0;
let mut bitbuf: u64 = 0;
let mut bits: u32 = 0;
if pos + 8 <= compressed.len() {
unsafe {
bitbuf = (compressed.as_ptr().add(pos) as *const u64)
.read_unaligned()
.to_le();
}
pos += 8;
bits = 64;
}
'main: while out_pos < fastloop_end {
if bits < 32 && pos + 4 <= compressed.len() {
unsafe {
let word = (compressed.as_ptr().add(pos) as *const u32).read_unaligned() as u64;
bitbuf |= word << bits;
let consumed = (64 - bits) / 8;
pos += consumed as usize;
bits |= 56;
}
}
if bits < 12 {
break;
}
let entry = unsafe { (*table.add((bitbuf & LUT_MASK) as usize)).0 };
if entry & BITS_MASK == 0 {
break;
}
let entry_bits = entry & BITS_MASK;
if (entry as i32) < 0 {
output[out_pos] = ((entry >> SYMBOL_SHIFT) & 0xFF) as u8;
out_pos += 1;
bitbuf >>= entry_bits;
bits = bits.wrapping_sub(entry_bits);
while bits >= 12 && out_pos < fastloop_end {
let e = unsafe { (*table.add((bitbuf & LUT_MASK) as usize)).0 };
if (e as i32) >= 0 || (e & BITS_MASK) == 0 {
break;
}
let e_bits = e & BITS_MASK;
output[out_pos] = ((e >> SYMBOL_SHIFT) & 0xFF) as u8;
out_pos += 1;
bitbuf >>= e_bits;
bits = bits.wrapping_sub(e_bits);
}
continue 'main;
}
bitbuf >>= entry_bits;
bits = bits.wrapping_sub(entry_bits);
let dist_field = entry & DIST_MASK;
if dist_field == DIST_EOB {
return Ok(out_pos);
}
if dist_field == DIST_SLOW {
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
if bits < 32 && pos + 4 <= compressed.len() {
unsafe {
let word = (compressed.as_ptr().add(pos) as *const u32).read_unaligned() as u64;
bitbuf |= word << bits;
let consumed = (64 - bits) / 8;
pos += consumed as usize;
bits |= 56;
}
}
let (dist_sym, dist_len) = dist_table.decode(bitbuf);
if dist_len == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance code",
));
}
bitbuf >>= dist_len;
bits = bits.wrapping_sub(dist_len);
let extra = DIST_EXTRA_BITS[dist_sym as usize] as u32;
if extra > 0 && bits < extra && pos + 4 <= compressed.len() {
unsafe {
let word = (compressed.as_ptr().add(pos) as *const u32).read_unaligned() as u64;
bitbuf |= word << bits;
let consumed = (64 - bits) / 8;
pos += consumed as usize;
bits |= 56;
}
}
let extra_val = (bitbuf & ((1u64 << extra) - 1)) as usize;
bitbuf >>= extra;
bits = bits.wrapping_sub(extra);
let distance = DIST_START[dist_sym as usize] as usize + extra_val;
if distance > out_pos {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_asm(output, out_pos, distance, length);
continue 'main;
}
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
let distance = (dist_field >> DIST_SHIFT) as usize;
if distance == 0 || distance > out_pos {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_asm(output, out_pos, distance, length);
}
Ok(out_pos)
}
#[cfg(target_arch = "x86_64")]
#[inline(always)]
fn copy_match_asm(output: &mut [u8], out_pos: usize, distance: usize, length: usize) -> usize {
let src_start = out_pos - distance;
assert!(
out_pos + length <= output.len(),
"output buffer overflow: out_pos={} length={} cap={}",
out_pos,
length,
output.len()
);
unsafe {
let dst = output.as_mut_ptr().add(out_pos);
let src = output.as_ptr().add(src_start);
if distance == 1 {
std::ptr::write_bytes(dst, *src, length);
} else if distance >= 8 {
let mut i = 0usize;
while i + 8 <= length {
let chunk = (src.add(i) as *const u64).read_unaligned();
(dst.add(i) as *mut u64).write_unaligned(chunk);
i += 8;
}
while i < length {
*dst.add(i) = *src.add(i);
i += 1;
}
} else {
for i in 0..length {
*dst.add(i) = *src.add(i);
}
}
}
out_pos + length
}
#[cfg(not(target_arch = "x86_64"))]
#[allow(dead_code)]
unsafe fn decode_huffman_asm_x64(
_compressed: &[u8],
_output: &mut [u8],
out_pos: usize,
_packed_lut: &crate::decompress::packed_lut::PackedLUT,
_dist_table: &TwoLevelTable,
) -> io::Result<usize> {
Ok(out_pos)
}
#[cfg(not(target_arch = "x86_64"))]
#[inline(always)]
#[allow(dead_code)]
fn copy_match_asm(output: &mut [u8], out_pos: usize, distance: usize, length: usize) -> usize {
copy_match_into(output, out_pos, distance, length)
}
#[allow(dead_code)]
#[inline(never)]
fn decode_huffman_ultra(
bits: &mut FastBits,
output: &mut [u8],
mut out_pos: usize,
packed_lut: &crate::decompress::packed_lut::PackedLUT,
lit_len_table: &TwoLevelTable,
dist_table: &TwoLevelTable,
) -> io::Result<usize> {
use crate::decompress::inflate_tables::{
DIST_EXTRA_BITS, DIST_START, LEN_EXTRA_BITS, LEN_START,
};
const BITS_MASK: u32 = 0xFF;
const SYMBOL_SHIFT: u32 = 23;
const DIST_SHIFT: u32 = 8;
const DIST_MASK: u32 = 0x7FFF << DIST_SHIFT;
const DIST_EOB: u32 = 0x7FFF << DIST_SHIFT;
const DIST_SLOW: u32 = 0x7FFE << DIST_SHIFT;
const LUT_MASK: u64 = 0xFFF;
let out_end = output.len();
let fastloop_end = out_end.saturating_sub(300);
let table = &packed_lut.table;
while out_pos < fastloop_end {
bits.ensure(56);
let entry = table[(bits.buffer() & LUT_MASK) as usize].0;
let entry_bits = entry & BITS_MASK;
if entry_bits == 0 {
let (symbol, code_len) = lit_len_table.decode(bits.buffer());
if code_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid code"));
}
bits.consume(code_len);
if symbol < 256 {
output[out_pos] = symbol as u8;
out_pos += 1;
continue;
}
if symbol == 256 {
return Ok(out_pos);
}
let len_idx = (symbol - 257) as usize;
bits.ensure(16);
let length =
LEN_START[len_idx] as usize + bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid dist"));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
bits.consume(entry_bits);
if (entry as i32) < 0 {
output[out_pos] = ((entry >> SYMBOL_SHIFT) & 0xFF) as u8;
out_pos += 1;
loop {
if bits.bits_available() < 12 {
break;
}
let e = table[(bits.buffer() & LUT_MASK) as usize].0;
if (e as i32) >= 0 || (e & BITS_MASK) == 0 {
break;
}
bits.consume(e & BITS_MASK);
output[out_pos] = ((e >> SYMBOL_SHIFT) & 0xFF) as u8;
out_pos += 1;
}
continue;
}
let dist_field = entry & DIST_MASK;
if dist_field == DIST_EOB {
return Ok(out_pos);
}
if dist_field == DIST_SLOW {
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid dist"));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
let distance = (dist_field >> DIST_SHIFT) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
}
loop {
bits.ensure(32);
let entry = table[(bits.buffer() & LUT_MASK) as usize].0;
let entry_bits = entry & BITS_MASK;
if entry_bits == 0 {
let (symbol, code_len) = lit_len_table.decode(bits.buffer());
if code_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid code"));
}
bits.consume(code_len);
if symbol < 256 {
if out_pos >= out_end {
return Err(io::Error::new(io::ErrorKind::WriteZero, "Output full"));
}
output[out_pos] = symbol as u8;
out_pos += 1;
continue;
}
if symbol == 256 {
return Ok(out_pos);
}
let len_idx = (symbol - 257) as usize;
bits.ensure(16);
let length =
LEN_START[len_idx] as usize + bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid dist"));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
bits.consume(entry_bits);
if (entry as i32) < 0 {
if out_pos >= out_end {
return Err(io::Error::new(io::ErrorKind::WriteZero, "Output full"));
}
output[out_pos] = ((entry >> SYMBOL_SHIFT) & 0xFF) as u8;
out_pos += 1;
continue;
}
let dist_field = entry & DIST_MASK;
if dist_field == DIST_EOB {
return Ok(out_pos);
}
if dist_field == DIST_SLOW {
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid dist"));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
let length = ((entry >> SYMBOL_SHIFT) & 0xFF) as usize + 3;
let distance = (dist_field >> DIST_SHIFT) as usize;
if distance > out_pos {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Bad dist"));
}
out_pos = copy_match_into(output, out_pos, distance, length);
}
}
#[allow(dead_code)]
fn decode_huffman_packed(
bits: &mut FastBits,
output: &mut [u8],
mut out_pos: usize,
packed_lut: &crate::decompress::packed_lut::PackedLUT,
lit_len_table: &TwoLevelTable,
dist_table: &TwoLevelTable,
) -> io::Result<usize> {
use crate::decompress::inflate_tables::{DIST_EXTRA_BITS, DIST_START};
const FASTLOOP_MARGIN: usize = 300;
let out_end = output.len();
let fastloop_end = out_end.saturating_sub(FASTLOOP_MARGIN);
while out_pos < fastloop_end {
bits.ensure(56);
let entry = packed_lut.decode(bits.buffer());
if entry.bits() == 0 {
let (symbol, code_len) = lit_len_table.decode(bits.buffer());
if code_len == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid Huffman code",
));
}
bits.consume(code_len);
if symbol < 256 {
output[out_pos] = symbol as u8;
out_pos += 1;
continue;
}
if symbol == 256 {
return Ok(out_pos);
}
let len_idx = (symbol - 257) as usize;
if len_idx >= 29 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid length code",
));
}
use crate::decompress::inflate_tables::{LEN_EXTRA_BITS, LEN_START};
bits.ensure(16);
let length =
LEN_START[len_idx] as usize + bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 || dist_sym >= 30 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance code",
));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos || distance == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
bits.consume(entry.bits());
if entry.is_literal() {
output[out_pos] = entry.symbol();
out_pos += 1;
while bits.bits_available() >= 12 {
let e = packed_lut.decode(bits.buffer());
if !e.is_literal() || e.bits() == 0 {
break;
}
bits.consume(e.bits());
output[out_pos] = e.symbol();
out_pos += 1;
}
continue;
}
if entry.is_eob() {
return Ok(out_pos);
}
if entry.is_slow_path() {
let length = entry.length();
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 || dist_sym >= 30 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance code",
));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos || distance == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
let length = entry.length();
let distance = entry.distance();
if distance > out_pos || distance == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
}
loop {
bits.ensure(32);
let entry = packed_lut.decode(bits.buffer());
if entry.bits() == 0 {
let (symbol, code_len) = lit_len_table.decode(bits.buffer());
if code_len == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid Huffman code",
));
}
bits.consume(code_len);
if symbol < 256 {
if out_pos >= out_end {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"Output buffer full",
));
}
output[out_pos] = symbol as u8;
out_pos += 1;
continue;
}
if symbol == 256 {
return Ok(out_pos);
}
let len_idx = (symbol - 257) as usize;
if len_idx >= 29 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid length code",
));
}
use crate::decompress::inflate_tables::{LEN_EXTRA_BITS, LEN_START};
bits.ensure(16);
let length =
LEN_START[len_idx] as usize + bits.read(LEN_EXTRA_BITS[len_idx] as u32) as usize;
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 || dist_sym >= 30 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance code",
));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos || distance == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
bits.consume(entry.bits());
if entry.is_literal() {
if out_pos >= out_end {
return Err(io::Error::new(
io::ErrorKind::WriteZero,
"Output buffer full",
));
}
output[out_pos] = entry.symbol();
out_pos += 1;
continue;
}
if entry.is_eob() {
return Ok(out_pos);
}
if entry.is_slow_path() {
let length = entry.length();
let (dist_sym, dist_len) = dist_table.decode(bits.buffer());
if dist_len == 0 || dist_sym >= 30 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance code",
));
}
bits.consume(dist_len);
bits.ensure(16);
let distance = DIST_START[dist_sym as usize] as usize
+ bits.read(DIST_EXTRA_BITS[dist_sym as usize] as u32) as usize;
if distance > out_pos || distance == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
continue;
}
let length = entry.length();
let distance = entry.distance();
if distance > out_pos || distance == 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid distance",
));
}
out_pos = copy_match_into(output, out_pos, distance, length);
}
}
#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
#[inline(always)]
unsafe fn copy_large_avx512(src: *const u8, dst: *mut u8, length: usize) {
use std::arch::x86_64::*;
let mut remaining = length;
let mut s = src;
let mut d = dst;
while remaining >= 64 {
let chunk = _mm512_loadu_si512(s as *const __m512i);
_mm512_storeu_si512(d as *mut __m512i, chunk);
s = s.add(64);
d = d.add(64);
remaining -= 64;
}
if remaining > 0 {
std::ptr::copy_nonoverlapping(s, d, remaining);
}
}
#[inline(always)]
pub fn copy_match_into(output: &mut [u8], out_pos: usize, distance: usize, length: usize) -> usize {
trace_match(distance, length);
let src_start = out_pos - distance;
assert!(
out_pos + length <= output.len(),
"output buffer overflow: out_pos={} length={} cap={}",
out_pos,
length,
output.len()
);
unsafe {
let dst = output.as_mut_ptr().add(out_pos);
let src = output.as_ptr().add(src_start);
#[cfg(target_arch = "x86_64")]
if length >= 32 {
use std::arch::x86_64::*;
_mm_prefetch(src.add(64) as *const i8, _MM_HINT_T0);
_mm_prefetch(dst.add(64) as *const i8, _MM_HINT_T0);
}
#[cfg(target_arch = "aarch64")]
if length >= 32 {
core::arch::asm!(
"prfm pldl1keep, [{0}]",
"prfm pstl1keep, [{1}]",
in(reg) src.add(64),
in(reg) dst.add(64),
options(nostack, preserves_flags)
);
}
if distance == 1 {
let byte = *src;
std::ptr::write_bytes(dst, byte, length);
} else if distance >= length {
#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
{
if length >= 64 {
copy_large_avx512(src, dst, length);
} else {
std::ptr::copy_nonoverlapping(src, dst, length);
}
}
#[cfg(not(all(target_arch = "x86_64", target_feature = "avx512f")))]
{
std::ptr::copy_nonoverlapping(src, dst, length);
}
} else if distance >= 8 {
let mut remaining = length;
let mut d = dst;
let mut s = src;
while remaining >= 8 {
let chunk = (s as *const u64).read_unaligned();
(d as *mut u64).write_unaligned(chunk);
d = d.add(8);
s = s.add(8);
remaining -= 8;
}
for i in 0..remaining {
*d.add(i) = *s.add(i);
}
} else if length >= 16 {
let mut d = dst;
let mut s = src;
let end = dst.add(length);
(d as *mut u64).write_unaligned((s as *const u64).read_unaligned());
s = s.add(distance);
d = d.add(distance);
(d as *mut u64).write_unaligned((s as *const u64).read_unaligned());
s = s.add(distance);
d = d.add(distance);
while d < end {
(d as *mut u64).write_unaligned((s as *const u64).read_unaligned());
s = s.add(distance);
d = d.add(distance);
}
} else if distance > 0 {
for i in 0..length {
*dst.add(i) = *src.add(i % distance);
}
}
}
out_pos + length
}
pub fn decompress_bgzf_parallel_to_vec(data: &[u8], num_threads: usize) -> io::Result<Vec<u8>> {
let blocks = parse_bgzf_blocks(data)?;
if blocks.is_empty() {
return Ok(Vec::new());
}
let total_output: usize = blocks.iter().map(|b| b.isize as usize).sum();
let output = vec![0u8; total_output];
let num_blocks = blocks.len();
let next_block = AtomicUsize::new(0);
let had_error = std::sync::atomic::AtomicBool::new(false);
use std::cell::UnsafeCell;
struct OutputBuffer(UnsafeCell<Vec<u8>>);
unsafe impl Sync for OutputBuffer {}
let output_cell = OutputBuffer(UnsafeCell::new(output));
std::thread::scope(|scope| {
for _ in 0..num_threads.min(num_blocks) {
let blocks_ref = &blocks;
let next_ref = &next_block;
let output_ref = &output_cell;
let error_ref = &had_error;
scope.spawn(move || {
let decompressor = unsafe { libdeflate_sys::libdeflate_alloc_decompressor() };
if decompressor.is_null() {
error_ref.store(true, Ordering::Relaxed);
return;
}
loop {
let idx = next_ref.fetch_add(1, Ordering::Relaxed);
if idx >= num_blocks {
break;
}
let block = &blocks_ref[idx];
let out_size = block.isize as usize;
if out_size == 0 {
continue;
}
let deflate_end = block.start + block.length - 8;
let deflate_data = &data[block.deflate_start..deflate_end];
let output_ptr = unsafe { (*output_ref.0.get()).as_mut_ptr() };
let out_start = block.output_offset;
let out_slice = unsafe {
std::slice::from_raw_parts_mut(output_ptr.add(out_start), out_size)
};
let mut actual_out = 0usize;
let ret = unsafe {
libdeflate_sys::libdeflate_deflate_decompress(
decompressor,
deflate_data.as_ptr() as *const std::ffi::c_void,
deflate_data.len(),
out_slice.as_mut_ptr() as *mut std::ffi::c_void,
out_size,
&mut actual_out,
)
};
if ret != 0 || actual_out != out_size {
error_ref.store(true, Ordering::Relaxed);
}
}
unsafe { libdeflate_sys::libdeflate_free_decompressor(decompressor) };
});
}
});
if had_error.load(std::sync::atomic::Ordering::Relaxed) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"CRC32 or size mismatch in BGZF block",
));
}
Ok(output_cell.0.into_inner())
}
pub fn decompress_bgzf_parallel<W: Write>(
data: &[u8],
writer: &mut W,
num_threads: usize,
) -> io::Result<u64> {
if num_threads <= 1 {
return decompress_bgzf_streaming(data, writer);
}
decompress_bgzf_pipelined(data, writer, num_threads)
}
fn decompress_bgzf_pipelined<W: Write>(
data: &[u8],
writer: &mut W,
num_threads: usize,
) -> io::Result<u64> {
let blocks = parse_bgzf_blocks(data)?;
if blocks.is_empty() {
return Ok(0);
}
let num_blocks = blocks.len();
let max_block_output = blocks.iter().map(|b| b.isize as usize).max().unwrap_or(0);
let channel_cap = num_threads * 2 + 2;
let (done_tx, done_rx) = std::sync::mpsc::sync_channel::<(usize, Vec<u8>)>(channel_cap);
let next_block = AtomicUsize::new(0);
let had_error = std::sync::atomic::AtomicBool::new(false);
let mut total = 0u64;
std::thread::scope(|scope| {
for _ in 0..num_threads.min(num_blocks) {
let done_tx = done_tx.clone();
let blocks_ref = &blocks;
let next_ref = &next_block;
let error_ref = &had_error;
scope.spawn(move || {
let decompressor = unsafe { libdeflate_sys::libdeflate_alloc_decompressor() };
if decompressor.is_null() {
error_ref.store(true, Ordering::Relaxed);
return;
}
let mut buf = vec![0u8; max_block_output];
loop {
let idx = next_ref.fetch_add(1, Ordering::Relaxed);
if idx >= num_blocks {
break;
}
let block = &blocks_ref[idx];
let out_size = block.isize as usize;
if out_size == 0 {
let _ = done_tx.send((idx, Vec::new()));
continue;
}
if buf.len() < out_size {
buf.resize(out_size, 0);
}
let deflate_end = block.start + block.length - 8;
let deflate_data = &data[block.deflate_start..deflate_end];
let mut actual_out = 0usize;
let ret = unsafe {
libdeflate_sys::libdeflate_deflate_decompress(
decompressor,
deflate_data.as_ptr() as *const std::ffi::c_void,
deflate_data.len(),
buf.as_mut_ptr() as *mut std::ffi::c_void,
out_size,
&mut actual_out,
)
};
if ret != 0 || actual_out != out_size {
error_ref.store(true, Ordering::Relaxed);
}
buf.truncate(actual_out);
let send_buf =
std::mem::replace(&mut buf, Vec::with_capacity(max_block_output));
let _ = done_tx.send((idx, send_buf));
}
unsafe { libdeflate_sys::libdeflate_free_decompressor(decompressor) };
});
}
drop(done_tx);
let mut next_to_write = 0usize;
let mut pending = std::collections::BTreeMap::<usize, Vec<u8>>::new();
let mut write_error: Option<io::Error> = None;
for (idx, data_vec) in &done_rx {
pending.insert(idx, data_vec);
while let Some(block_data) = pending.remove(&next_to_write) {
if write_error.is_none() && !block_data.is_empty() {
if let Err(e) = writer.write_all(&block_data) {
write_error = Some(e);
}
total += block_data.len() as u64;
}
next_to_write += 1;
}
}
});
if had_error.load(Ordering::Relaxed) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"CRC32 or size mismatch in BGZF block",
));
}
Ok(total)
}
fn decompress_bgzf_streaming<W: Write>(data: &[u8], writer: &mut W) -> io::Result<u64> {
let blocks = parse_bgzf_blocks(data)?;
if blocks.is_empty() {
return Ok(0);
}
let max_block_output = blocks.iter().map(|b| b.isize as usize).max().unwrap_or(0);
let mut buf = vec![0u8; max_block_output];
let mut total = 0u64;
let decompressor = unsafe { libdeflate_sys::libdeflate_alloc_decompressor() };
if decompressor.is_null() {
return Err(io::Error::other(
"failed to allocate libdeflate decompressor",
));
}
let result = (|| -> io::Result<u64> {
for block in &blocks {
let out_size = block.isize as usize;
if out_size == 0 {
continue;
}
if out_size > buf.len() {
buf.resize(out_size, 0);
}
let deflate_end = block.start + block.length - 8;
let deflate_data = &data[block.deflate_start..deflate_end];
let mut actual_out = 0usize;
let ret = unsafe {
libdeflate_sys::libdeflate_deflate_decompress(
decompressor,
deflate_data.as_ptr() as *const std::ffi::c_void,
deflate_data.len(),
buf.as_mut_ptr() as *mut std::ffi::c_void,
out_size,
&mut actual_out,
)
};
if ret != libdeflate_sys::libdeflate_result_LIBDEFLATE_SUCCESS {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"deflate decompression failed in BGZF block",
));
}
writer.write_all(&buf[..actual_out])?;
total += actual_out as u64;
}
Ok(total)
})();
unsafe { libdeflate_sys::libdeflate_free_decompressor(decompressor) };
result
}
fn parse_gzip_header(data: &[u8], offset: usize, end: usize) -> Option<usize> {
if end - offset < 10 {
return None;
}
let mut ds = offset + 10;
let flg = data[offset + 3];
if flg & 0x04 != 0 {
if ds + 2 > end {
return None;
}
let xlen = u16::from_le_bytes([data[ds], data[ds + 1]]) as usize;
ds += 2 + xlen;
}
if flg & 0x08 != 0 {
while ds < end && data[ds] != 0 {
ds += 1;
}
ds += 1;
}
if flg & 0x10 != 0 {
while ds < end && data[ds] != 0 {
ds += 1;
}
ds += 1;
}
if flg & 0x02 != 0 {
ds += 2;
}
if ds >= end {
None
} else {
Some(ds)
}
}
fn scan_member_boundaries_fast(data: &[u8]) -> Option<Vec<BgzfBlock>> {
if data.len() < 18 || data[0] != 0x1f || data[1] != 0x8b || data[2] != 0x08 {
return None;
}
let header_size = crate::decompress::format::parse_gzip_header_size(data).unwrap_or(10);
let mut starts = vec![0usize];
let mut pos = header_size + 1;
while pos + 10 < data.len() {
if data[pos] == 0x1f
&& data[pos + 1] == 0x8b
&& data[pos + 2] == 0x08
&& data[pos + 3] & 0xE0 == 0
&& pos >= 4
&& {
let isize = u32::from_le_bytes([
data[pos - 4], data[pos - 3], data[pos - 2], data[pos - 1],
]);
isize > 0 && isize <= 1_073_741_824
}
{
starts.push(pos);
}
pos += 1;
}
if starts.len() < 2 {
return None;
}
let mut members = Vec::with_capacity(starts.len());
let mut output_offset = 0usize;
for i in 0..starts.len() {
let start = starts[i];
let end = if i + 1 < starts.len() {
starts[i + 1]
} else {
data.len()
};
let length = end - start;
if length < 18 {
return None;
}
let isize_val =
u32::from_le_bytes([data[end - 4], data[end - 3], data[end - 2], data[end - 1]]);
let deflate_start = parse_gzip_header(data, start, end)?;
members.push(BgzfBlock {
start,
length,
isize: isize_val,
output_offset,
deflate_start,
});
output_offset += isize_val as usize;
}
if output_offset > data.len().saturating_mul(100) {
return None;
}
Some(members)
}
pub fn decompress_multi_member_parallel_to_vec(
data: &[u8],
num_threads: usize,
) -> io::Result<Vec<u8>> {
let members = scan_member_boundaries_fast(data).ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidData,
"Not a multi-member gzip file or boundary scan failed",
)
})?;
let total_output: usize = members.iter().map(|m| m.isize as usize).sum();
let output = vec![0u8; total_output];
let num_members = members.len();
let next_member = AtomicUsize::new(0);
let had_error = std::sync::atomic::AtomicBool::new(false);
use std::cell::UnsafeCell;
struct OutputBuffer(UnsafeCell<Vec<u8>>);
unsafe impl Sync for OutputBuffer {}
let output_cell = OutputBuffer(UnsafeCell::new(output));
std::thread::scope(|scope| {
for _ in 0..num_threads.min(num_members) {
let members_ref = &members;
let next_ref = &next_member;
let output_ref = &output_cell;
let error_ref = &had_error;
scope.spawn(move || {
let decompressor = unsafe { libdeflate_sys::libdeflate_alloc_decompressor() };
if decompressor.is_null() {
error_ref.store(true, Ordering::Relaxed);
return;
}
loop {
let idx = next_ref.fetch_add(1, Ordering::Relaxed);
if idx >= num_members {
break;
}
let member = &members_ref[idx];
let deflate_end = member.start + member.length - 8;
let deflate_data = &data[member.deflate_start..deflate_end];
let output_ptr = unsafe { (*output_ref.0.get()).as_mut_ptr() };
let out_start = member.output_offset;
let out_size = member.isize as usize;
let out_slice = unsafe {
std::slice::from_raw_parts_mut(output_ptr.add(out_start), out_size)
};
let mut actual_out = 0usize;
let ret = unsafe {
libdeflate_sys::libdeflate_deflate_decompress(
decompressor,
deflate_data.as_ptr() as *const std::ffi::c_void,
deflate_data.len(),
out_slice.as_mut_ptr() as *mut std::ffi::c_void,
out_size,
&mut actual_out,
)
};
if ret != 0 || actual_out != out_size {
error_ref.store(true, Ordering::Relaxed);
}
}
unsafe { libdeflate_sys::libdeflate_free_decompressor(decompressor) };
});
}
});
if had_error.load(std::sync::atomic::Ordering::Relaxed) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Decompression error in multi-member parallel",
));
}
Ok(output_cell.0.into_inner())
}
pub fn decompress_multi_member_parallel<W: Write>(
data: &[u8],
writer: &mut W,
num_threads: usize,
) -> io::Result<u64> {
if data.len() < 18 || data[0] != 0x1f || data[1] != 0x8b {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Not a gzip file",
));
}
let output = decompress_multi_member_parallel_to_vec(data, num_threads)?;
let len = output.len() as u64;
writer.write_all(&output)?;
Ok(len)
}
fn decompress_single_member<W: Write>(data: &[u8], writer: &mut W) -> io::Result<u64> {
use crate::backends::libdeflate::{DecompressError, DecompressorEx};
let mut decompressor = DecompressorEx::new();
let isize_hint = if data.len() >= 8 {
u32::from_le_bytes([
data[data.len() - 4],
data[data.len() - 3],
data[data.len() - 2],
data[data.len() - 1],
]) as usize
} else {
data.len() * 4
};
let initial_size = if isize_hint > 0 && isize_hint < 1024 * 1024 * 1024 {
isize_hint + 1024
} else {
data.len().saturating_mul(4).max(64 * 1024)
};
let mut buf = vec![0u8; initial_size];
loop {
match decompressor.gzip_decompress_ex(data, &mut buf) {
Ok(result) => {
writer.write_all(&buf[..result.output_size])?;
return Ok(result.output_size as u64);
}
Err(DecompressError::InsufficientSpace) => {
buf.resize(buf.len() * 2, 0);
}
Err(DecompressError::BadData) => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid gzip data",
));
}
}
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
struct ChunkBoundary {
deflate_bit_start: usize,
deflate_bit_end: usize,
output_offset: usize,
output_size: usize,
window: Vec<u8>,
}
#[allow(dead_code)] pub fn decompress_single_member_parallel<W: Write>(
data: &[u8],
writer: &mut W,
num_threads: usize,
) -> io::Result<u64> {
let isize_hint = if data.len() >= 8 {
u32::from_le_bytes([
data[data.len() - 4],
data[data.len() - 3],
data[data.len() - 2],
data[data.len() - 1],
]) as usize
} else {
data.len() * 4
};
const MIN_SIZE_FOR_PARALLEL: usize = 20 * 1024 * 1024;
const CHUNK_SIZE: usize = 4 * 1024 * 1024;
if isize_hint < MIN_SIZE_FOR_PARALLEL || num_threads <= 1 {
return decompress_single_member(data, writer);
}
let header_size = crate::decompress::parallel::marker_decode::skip_gzip_header(data)?;
let deflate_data = &data[header_size..data.len().saturating_sub(8)];
let mut output = Vec::with_capacity(isize_hint);
let mut chunk_windows: Vec<(usize, Vec<u8>)> = Vec::new();
let mut bits = FastBits::new(deflate_data);
let mut out_pos = 0;
output.resize(isize_hint.max(1024), 0);
loop {
bits.refill();
let bfinal = bits.read(1);
let btype = bits.read(2);
let start_out_pos = out_pos;
match btype {
0 => out_pos = decode_stored_into(&mut bits, &mut output, out_pos)?,
1 => out_pos = decode_fixed_into(&mut bits, &mut output, out_pos)?,
2 => out_pos = decode_dynamic_into(&mut bits, &mut output, out_pos)?,
3 => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Reserved block type",
))
}
_ => unreachable!(),
}
let chunk_before = start_out_pos / CHUNK_SIZE;
let chunk_after = out_pos / CHUNK_SIZE;
if chunk_after > chunk_before && out_pos >= 32 * 1024 {
let boundary_pos = chunk_after * CHUNK_SIZE;
let window_start = boundary_pos.saturating_sub(32 * 1024);
let window = output[window_start..boundary_pos.min(out_pos)].to_vec();
chunk_windows.push((boundary_pos, window));
}
if bfinal == 1 {
break;
}
}
output.truncate(out_pos);
if chunk_windows.len() < 2 {
writer.write_all(&output)?;
return Ok(out_pos as u64);
}
if std::env::var("GZIPPY_DEBUG").is_ok() {
eprintln!(
"[gzippy] Single-member parallel: {} bytes, {} chunk boundaries found",
out_pos,
chunk_windows.len()
);
}
writer.write_all(&output)?;
Ok(out_pos as u64)
}
#[allow(dead_code)]
pub fn is_multi_member(data: &[u8]) -> bool {
use crate::backends::libdeflate::{DecompressError, DecompressorEx};
if data.len() < 36 {
return false;
}
let mut decompressor = DecompressorEx::new();
let mut buf = vec![0u8; 256 * 1024];
loop {
match decompressor.gzip_decompress_ex(data, &mut buf) {
Ok(result) => {
return result.input_consumed < data.len()
&& data.len() - result.input_consumed >= 18
&& data[result.input_consumed] == 0x1f
&& data[result.input_consumed + 1] == 0x8b;
}
Err(DecompressError::InsufficientSpace) => {
buf.resize(buf.len() * 2, 0);
}
Err(DecompressError::BadData) => return false,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::assert_slices_eq;
fn assert_bytes_eq(actual: &[u8], expected: &[u8], context: &str) {
if actual == expected {
return;
}
let first_diff = actual
.iter()
.zip(expected.iter())
.enumerate()
.find(|(_, (a, b))| a != b)
.map(|(i, _)| i);
let mut msg = format!("\n{} - byte mismatch:\n", context);
msg.push_str(&format!(
" lengths: actual={}, expected={}\n",
actual.len(),
expected.len()
));
if let Some(pos) = first_diff {
msg.push_str(&format!(" first diff at byte {}\n", pos));
msg.push_str(&format!(
" actual[{}]={:#04x}, expected[{}]={:#04x}\n",
pos, actual[pos], pos, expected[pos]
));
let start = pos.saturating_sub(10);
let end = (pos + 20).min(actual.len()).min(expected.len());
if end > start {
let actual_ctx: String = actual[start..end]
.iter()
.map(|&b| {
if b.is_ascii_graphic() || b == b' ' {
b as char
} else {
'.'
}
})
.collect();
let expected_ctx: String = expected[start..end]
.iter()
.map(|&b| {
if b.is_ascii_graphic() || b == b' ' {
b as char
} else {
'.'
}
})
.collect();
msg.push_str(&format!(
" actual [{}..{}]: \"{}\"\n",
start, end, actual_ctx
));
msg.push_str(&format!(
" expected[{}..{}]: \"{}\"\n",
start, end, expected_ctx
));
}
}
panic!("{}", msg);
}
#[test]
fn test_turbo_bits_basic() {
let data = [0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0];
let mut bits = TurboBits::new(&data);
assert!(bits.has_bits(8), "Should have at least 8 bits");
let byte1 = bits.read(8);
assert_eq!(byte1, 0x12, "First byte should be 0x12");
bits.ensure(8);
let byte2 = bits.read(8);
assert_eq!(byte2, 0x34, "Second byte should be 0x34");
}
#[test]
fn test_turbo_bits_align() {
let data = [0xFF, 0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE];
let mut bits = TurboBits::new(&data);
let _ = bits.read(3);
bits.align();
bits.ensure(8);
let byte = bits.read(8);
assert_eq!(byte, 0x12, "After align, should read 0x12");
}
#[test]
fn test_turbo_inflate_literals() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let original = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::new(1)); encoder.write_all(original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!(
"Original: {} bytes, Compressed: {} bytes",
original.len(),
compressed.len()
);
eprintln!(
"Compressed hex: {:02x?}",
&compressed[..compressed.len().min(32)]
);
let mut output_std = vec![0u8; original.len() + 100];
let size_std = inflate_into_pub(&compressed, &mut output_std).unwrap();
assert_eq!(
&output_std[..size_std],
&original[..],
"Standard path failed"
);
eprintln!("Standard decoded: {} bytes", size_std);
let mut output_turbo = vec![0u8; original.len() + 100];
let size_turbo = inflate_into_pub(&compressed, &mut output_turbo).unwrap();
eprintln!("Turbo decoded: {} bytes", size_turbo);
eprintln!(
"Turbo output: {:?}",
String::from_utf8_lossy(&output_turbo[..size_turbo])
);
eprintln!("Expected: {:?}", String::from_utf8_lossy(original));
assert_eq!(
size_turbo, size_std,
"Turbo size mismatch: {} vs {}",
size_turbo, size_std
);
assert_eq!(
&output_turbo[..size_turbo],
&original[..],
"Turbo content mismatch"
);
}
#[test]
fn test_turbo_inflate_rle() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let original = vec![b'X'; 1000];
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let mut output_std = vec![0u8; original.len() + 100];
let size_std = inflate_into_pub(&compressed, &mut output_std).unwrap();
assert_eq!(
&output_std[..size_std],
&original[..],
"Standard path failed"
);
let mut output_turbo = vec![0u8; original.len() + 100];
let size_turbo = inflate_into_pub(&compressed, &mut output_turbo).unwrap();
assert_eq!(
size_turbo, size_std,
"Turbo size mismatch: {} vs {}",
size_turbo, size_std
);
assert_eq!(
&output_turbo[..size_turbo],
&original[..],
"Turbo content mismatch"
);
}
#[test]
fn test_turbo_inflate_mixed() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let pattern = b"The quick brown fox jumps over the lazy dog. ";
let original: Vec<u8> = pattern.iter().cycle().take(500).copied().collect();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let mut output_std = vec![0u8; original.len() + 100];
let size_std = inflate_into_pub(&compressed, &mut output_std).unwrap();
assert_bytes_eq(&output_std[..size_std], &original[..], "standard path");
let mut output_turbo = vec![0u8; original.len() + 100];
let size_turbo = inflate_into_pub(&compressed, &mut output_turbo).unwrap();
assert_eq!(
size_turbo, size_std,
"Turbo size mismatch: {} vs {}",
size_turbo, size_std
);
assert_bytes_eq(
&output_turbo[..size_turbo],
&original[..],
"turbo_inflate_mixed",
);
}
#[test]
fn test_decode_huffman_turbo_fixed() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let original = b"Hello, World!";
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::fast());
encoder.write_all(original).unwrap();
let compressed = encoder.finish().unwrap();
let (lit_len_table, dist_table, packed_lut) = get_fixed_tables_turbo();
let mut bits = TurboBits::new(&compressed);
bits.ensure(16);
let bfinal = bits.read(1);
let btype = bits.read(2);
eprintln!("Block: bfinal={}, btype={}", bfinal, btype);
if btype == 1 {
let mut output = vec![0u8; original.len() + 100];
let result = decode_huffman_turbo(
&mut bits,
&mut output,
0,
packed_lut,
lit_len_table,
dist_table,
);
match result {
Ok(size) => {
eprintln!("Decoded {} bytes", size);
assert_eq!(size, original.len(), "Size mismatch");
assert_slices_eq!(&output[..size], &original[..], "Content mismatch");
}
Err(e) => {
panic!("decode_huffman_turbo failed: {}", e);
}
}
} else {
eprintln!("Skipping - not a fixed Huffman block (btype={})", btype);
}
}
#[test]
fn test_packed_lut_entries() {
use crate::decompress::packed_lut::PackedLUT;
let lit_len_lens = get_fixed_lit_len_lens();
let dist_lens = vec![5u8; 32];
eprintln!("Code lengths for A-Z:");
for ch in b'A'..=b'Z' {
eprintln!(
" '{}' ({}) = {} bits",
ch as char, ch, lit_len_lens[ch as usize]
);
}
let packed_lut = PackedLUT::build(&lit_len_lens, &dist_lens).unwrap();
let mut literals = 0;
let mut eobs = 0;
let mut slow_paths = 0;
let mut invalid = 0;
for entry in packed_lut.table.iter() {
if entry.is_valid() {
if entry.is_literal() {
literals += 1;
} else if entry.is_eob() {
eobs += 1;
} else if entry.is_slow_path() {
slow_paths += 1;
}
} else {
invalid += 1;
}
}
eprintln!(
"PackedLUT entries: literals={}, eobs={}, slow_paths={}, invalid={}",
literals, eobs, slow_paths, invalid
);
assert!(literals > 0, "Should have literal entries");
assert!(eobs > 0, "Should have EOB entries");
}
#[test]
fn test_turbo_decode_trace() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
for size in [8, 10, 12, 16, 20, 24, 26] {
let original: Vec<u8> = (b'A'..).take(size).collect();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::fast());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let mut output_std = vec![0u8; 100];
let size_std = inflate_into_pub(&compressed, &mut output_std).unwrap();
let mut output_turbo = vec![0u8; 100];
let size_turbo = inflate_into_pub(&compressed, &mut output_turbo).unwrap();
let match_ok =
size_turbo == size_std && output_turbo[..size_turbo] == output_std[..size_std];
if !match_ok {
eprintln!("\n=== MISMATCH at size {} ===", size);
eprintln!("Original: {:?}", String::from_utf8_lossy(&original));
eprintln!(
"Compressed: {} bytes, hex: {:02x?}",
compressed.len(),
&compressed
);
eprintln!(
"Standard: {} bytes, output: {:?}",
size_std,
String::from_utf8_lossy(&output_std[..size_std])
);
eprintln!(
"Turbo: {} bytes, output: {:?}",
size_turbo,
String::from_utf8_lossy(&output_turbo[..size_turbo])
);
for i in 0..size_std.max(size_turbo) {
let std_byte = if i < size_std { output_std[i] } else { 0 };
let turbo_byte = if i < size_turbo { output_turbo[i] } else { 0 };
if std_byte != turbo_byte {
eprintln!(
" Position {}: std='{}' (0x{:02x}) vs turbo='{}' (0x{:02x})",
i, std_byte as char, std_byte, turbo_byte as char, turbo_byte
);
}
}
panic!("Turbo mismatch at size {}", size);
} else {
eprintln!("Size {}: OK", size);
}
}
}
#[test]
fn test_inflate_into() {
let original = b"Hello, World! This is a test of the BGZF inflate_into function.";
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(original).unwrap();
let compressed = encoder.finish().unwrap();
let mut output = vec![0u8; original.len()];
let actual_size = inflate_into(&compressed, &mut output).unwrap();
assert_eq!(actual_size, original.len());
assert_slices_eq!(&output[..actual_size], &original[..]);
}
#[test]
fn test_decode_huffman_asm_x64() {
use crate::decompress::packed_lut::PackedLUT;
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let original1 = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::fast());
encoder.write_all(original1).unwrap();
let compressed1 = encoder.finish().unwrap();
let original2 = vec![b'X'; 1000];
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original2).unwrap();
let compressed2 = encoder.finish().unwrap();
let pattern = b"The quick brown fox jumps over the lazy dog. ";
let original3: Vec<u8> = pattern.iter().cycle().take(2000).copied().collect();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original3).unwrap();
let compressed3 = encoder.finish().unwrap();
let mut original4 = Vec::new();
for i in 0u8..200 {
original4.push(i);
if i % 10 == 0 {
original4.extend(b"REPEAT");
}
}
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original4).unwrap();
let compressed4 = encoder.finish().unwrap();
let lit_len_lens = {
let mut v = vec![0u8; 288];
for i in 0..144 {
v[i] = 8;
}
for i in 144..256 {
v[i] = 9;
}
for i in 256..280 {
v[i] = 7;
}
for i in 280..288 {
v[i] = 8;
}
v
};
let dist_lens = vec![5u8; 32];
let packed_lut = PackedLUT::build(&lit_len_lens, &dist_lens).unwrap();
let dist_table = TwoLevelTable::build(&dist_lens).unwrap();
let test_stream = |compressed: &[u8], expected: &[u8], name: &str| {
let mut output = vec![0u8; expected.len() + 1000];
let result = unsafe {
decode_huffman_asm_x64(compressed, &mut output, 0, &packed_lut, &dist_table)
};
match result {
Ok(size) => {
if size > 0 {
eprintln!("{}: decoded {} bytes", name, size);
}
}
Err(e) => {
eprintln!("{}: error (expected for dynamic blocks): {}", name, e);
}
}
};
test_stream(&compressed1, original1, "literals");
test_stream(&compressed2, &original2, "rle");
test_stream(&compressed3, &original3, "repeated");
test_stream(&compressed4, &original4, "mixed");
for (compressed, original, name) in [
(&compressed1[..], &original1[..], "literals"),
(&compressed2[..], &original2[..], "rle"),
(&compressed3[..], &original3[..], "repeated"),
(&compressed4[..], &original4[..], "mixed"),
] {
let mut output = vec![0u8; original.len() + 1000];
let size = inflate_into(compressed, &mut output).unwrap();
assert_slices_eq!(&output[..size], original, format!("{} mismatch", name));
}
}
#[test]
fn test_multi_literal_correctness() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let original1: Vec<u8> = (0..10_000).map(|i| (i % 256) as u8).collect();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::fast());
encoder.write_all(&original1).unwrap();
let compressed = encoder.finish().unwrap();
let mut output = vec![0u8; original1.len() + 1000];
let size = inflate_into(&compressed, &mut output).unwrap();
assert_eq!(size, original1.len(), "Size mismatch for literals-only");
assert_eq!(&output[..size], &original1[..], "Content mismatch");
let original2: Vec<u8> = "ABCDEFGHIJ".repeat(1000).into_bytes();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original2).unwrap();
let compressed = encoder.finish().unwrap();
let mut output = vec![0u8; original2.len() + 1000];
let size = inflate_into(&compressed, &mut output).unwrap();
assert_eq!(size, original2.len(), "Size mismatch for repetitive");
assert_eq!(&output[..size], &original2[..], "Content mismatch");
let mut original3 = Vec::new();
for i in 0..100 {
original3.extend_from_slice(&[(i * 7) as u8; 50]);
original3.extend_from_slice(b"REPEAT_THIS_STRING_");
}
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original3).unwrap();
let compressed = encoder.finish().unwrap();
let mut output = vec![0u8; original3.len() + 1000];
let size = inflate_into(&compressed, &mut output).unwrap();
assert_eq!(size, original3.len(), "Size mismatch for mixed");
assert_eq!(&output[..size], &original3[..], "Content mismatch");
}
#[test]
fn microbench_decode_loop() {
use crate::decompress::two_level_table::FastBits;
let data: Vec<u8> = (0..8_000_000u64).map(|i| (i * 7 % 256) as u8).collect();
let lens: Vec<u8> = (0..288u16)
.map(|i| {
if i < 144 {
8
} else if i < 256 {
9
} else if i < 280 {
7
} else {
8
}
})
.collect();
let lut = crate::decompress::combined_lut::CombinedLUT::build(&lens, &[5u8; 32]).unwrap();
let iterations = 5_000_000u64;
let mut sum = 0u64;
let mut bits = FastBits::new(&data);
let start = std::time::Instant::now();
for _ in 0..iterations {
bits.ensure(12);
let entry = lut.decode(bits.buffer());
bits.consume(entry.bits_to_skip as u32);
sum += entry.symbol_or_length as u64;
}
let elapsed = start.elapsed();
let ops_per_sec = iterations as f64 / elapsed.as_secs_f64() / 1_000_000.0;
eprintln!("\n=== Decode Loop Micro-Benchmark ===");
eprintln!("Tight loop (no branching): {:.1} M/s", ops_per_sec);
eprintln!("Sum (prevent optimization): {}", sum);
}
#[test]
fn benchmark_inflate_into() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let original: Vec<u8> = (0..1_000_000)
.map(|i| ((i * 7 + i / 100) % 256) as u8)
.collect();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let mut output = vec![0u8; original.len() + 1000];
for _ in 0..3 {
let _ = inflate_into(&compressed, &mut output);
}
let start = std::time::Instant::now();
let iterations = 50;
for _ in 0..iterations {
let _ = inflate_into(&compressed, &mut output);
}
let our_time = start.elapsed();
let our_speed =
original.len() as f64 * iterations as f64 / our_time.as_secs_f64() / 1_000_000.0;
let mut libdeflate = libdeflater::Decompressor::new();
let mut ld_output = vec![0u8; original.len() + 1000];
let start = std::time::Instant::now();
for _ in 0..iterations {
let _ = libdeflate.deflate_decompress(&compressed, &mut ld_output);
}
let ld_time = start.elapsed();
let ld_speed =
original.len() as f64 * iterations as f64 / ld_time.as_secs_f64() / 1_000_000.0;
let ratio = our_time.as_secs_f64() / ld_time.as_secs_f64();
eprintln!("\n=== inflate_into vs libdeflate ===");
eprintln!("Our inflate_into: {:.1} MB/s", our_speed);
eprintln!("libdeflate: {:.1} MB/s", ld_speed);
eprintln!("Ratio: {:.2}x slower than libdeflate", ratio);
eprintln!("Gap to close: {:.0}%", (ratio - 1.0) * 100.0);
let size = inflate_into(&compressed, &mut output).unwrap();
assert_eq!(size, original.len());
}
#[test]
fn benchmark_packed_vs_combined() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let mut original = Vec::with_capacity(1_000_000);
for i in 0..100_000 {
original.push(((i * 7) % 256) as u8);
original.push((i % 256) as u8);
if i % 100 == 0 {
original.extend(std::iter::repeat_n(b'A', 10));
}
}
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::fast());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let mut output = vec![0u8; original.len() + 10000];
for _ in 0..5 {
let _ = inflate_into(&compressed, &mut output);
}
let iterations = 100;
let start = std::time::Instant::now();
for _ in 0..iterations {
let _ = inflate_into(&compressed, &mut output);
}
let time = start.elapsed();
let speed = original.len() as f64 * iterations as f64 / time.as_secs_f64() / 1_000_000.0;
eprintln!("\n=== inflate_into (CombinedLUT) Benchmark ===");
eprintln!("Output size: {} bytes", original.len());
eprintln!("Iterations: {}", iterations);
eprintln!("Speed: {:.1} MB/s", speed);
}
#[test]
fn benchmark_turbo_decoder() {
use crate::decompress::packed_lut::PackedLUT;
use crate::decompress::two_level_table::TurboBits;
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let mut original = Vec::with_capacity(1_000_000);
for i in 0..100_000 {
original.push(((i * 7) % 256) as u8);
original.push((i % 256) as u8);
if i % 100 == 0 {
original.extend(std::iter::repeat_n(b'A', 10));
}
}
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::fast());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
#[allow(clippy::needless_range_loop)]
let lit_len_lens = {
let mut v = vec![0u8; 288];
for i in 0..144 {
v[i] = 8;
}
for i in 144..256 {
v[i] = 9;
}
for i in 256..280 {
v[i] = 7;
}
for i in 280..288 {
v[i] = 8;
}
v
};
let dist_lens = vec![5u8; 32];
let packed_lut = PackedLUT::build(&lit_len_lens, &dist_lens).unwrap();
let lit_len_table = TwoLevelTable::build(&lit_len_lens).unwrap();
let dist_table = TwoLevelTable::build(&dist_lens).unwrap();
let mut output = vec![0u8; original.len() + 10000];
for _ in 0..5 {
let mut bits = TurboBits::new(&compressed);
bits.ensure(16);
let _ = bits.read(3); let _ = decode_huffman_turbo(
&mut bits,
&mut output,
0,
&packed_lut,
&lit_len_table,
&dist_table,
);
}
let iterations = 100;
let start = std::time::Instant::now();
for _ in 0..iterations {
let mut bits = TurboBits::new(&compressed);
bits.ensure(16);
let _ = bits.read(3);
let _ = decode_huffman_turbo(
&mut bits,
&mut output,
0,
&packed_lut,
&lit_len_table,
&dist_table,
);
}
let turbo_time = start.elapsed();
let turbo_speed =
original.len() as f64 * iterations as f64 / turbo_time.as_secs_f64() / 1_000_000.0;
let start = std::time::Instant::now();
for _ in 0..iterations {
let _ = inflate_into(&compressed, &mut output);
}
let standard_time = start.elapsed();
let standard_speed =
original.len() as f64 * iterations as f64 / standard_time.as_secs_f64() / 1_000_000.0;
let mut decompressor = libdeflater::Decompressor::new();
let start = std::time::Instant::now();
for _ in 0..iterations {
let _ = decompressor.deflate_decompress(&compressed, &mut output);
}
let libdeflate_time = start.elapsed();
let libdeflate_speed =
original.len() as f64 * iterations as f64 / libdeflate_time.as_secs_f64() / 1_000_000.0;
eprintln!("\n=== Phase 1 Turbo Decoder Benchmark ===");
eprintln!("Data size: {} bytes", original.len());
eprintln!("Turbo (Phase 1): {:.1} MB/s", turbo_speed);
eprintln!("Standard: {:.1} MB/s", standard_speed);
eprintln!("libdeflate: {:.1} MB/s", libdeflate_speed);
eprintln!("Turbo vs standard: {:.2}x", turbo_speed / standard_speed);
eprintln!(
"Turbo vs libdeflate: {:.0}%",
turbo_speed / libdeflate_speed * 100.0
);
}
#[test]
fn test_bgzf_parallel() {
let data = match std::fs::read("benchmark_data/test-gzippy-l1-t14.gz") {
Ok(d) => d,
Err(_) => {
eprintln!("Skipping test - no gzippy test file");
return;
}
};
use std::io::Read;
let mut expected = Vec::new();
let mut decoder = flate2::read::MultiGzDecoder::new(&data[..]);
decoder.read_to_end(&mut expected).unwrap();
let mut output = Vec::new();
decompress_bgzf_parallel(&data, &mut output, 8).unwrap();
assert_eq!(output.len(), expected.len(), "Size mismatch");
for (i, (&a, &b)) in output.iter().zip(expected.iter()).enumerate() {
if a != b {
let start = i.saturating_sub(10);
let end = (i + 20).min(output.len());
eprintln!(
"First mismatch at byte {}: got {:02x} expected {:02x}",
i, a, b
);
eprintln!("Context - ours: {:02x?}", &output[start..end]);
eprintln!("Context - expected: {:02x?}", &expected[start..end]);
panic!("Content mismatch at byte {}", i);
}
}
}
#[test]
fn benchmark_bgzf_parallel() {
let data = match std::fs::read("benchmark_data/test-gzippy-l1-t14.gz") {
Ok(d) => d,
Err(_) => {
eprintln!("Skipping benchmark - no test file");
return;
}
};
use std::io::Read;
let mut expected = Vec::new();
let mut decoder = flate2::read::MultiGzDecoder::new(&data[..]);
decoder.read_to_end(&mut expected).unwrap();
let expected_size = expected.len();
for _ in 0..3 {
let mut output = Vec::new();
decompress_bgzf_parallel(&data, &mut output, 8).unwrap();
}
let start = std::time::Instant::now();
let iterations = 5;
for _ in 0..iterations {
let mut output = Vec::new();
decompress_bgzf_parallel(&data, &mut output, 8).unwrap();
}
let elapsed = start.elapsed() / iterations;
let speed = expected_size as f64 / elapsed.as_secs_f64() / 1_000_000.0;
eprintln!("BGZF parallel (8 threads): {:.1} MB/s", speed);
}
#[test]
fn test_multi_member_parallel() {
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let part1: Vec<u8> = (0..100_000).map(|i| (i % 256) as u8).collect();
let part2: Vec<u8> = (0..100_000).map(|i| ((i + 50) % 256) as u8).collect();
let part3: Vec<u8> = (0..100_000).map(|i| ((i + 100) % 256) as u8).collect();
let mut encoder1 = GzEncoder::new(Vec::new(), Compression::default());
encoder1.write_all(&part1).unwrap();
let compressed1 = encoder1.finish().unwrap();
let mut encoder2 = GzEncoder::new(Vec::new(), Compression::default());
encoder2.write_all(&part2).unwrap();
let compressed2 = encoder2.finish().unwrap();
let mut encoder3 = GzEncoder::new(Vec::new(), Compression::default());
encoder3.write_all(&part3).unwrap();
let compressed3 = encoder3.finish().unwrap();
let mut multi = compressed1.clone();
multi.extend_from_slice(&compressed2);
multi.extend_from_slice(&compressed3);
assert!(is_multi_member(&multi), "Should detect multi-member");
let mut expected = part1.clone();
expected.extend_from_slice(&part2);
expected.extend_from_slice(&part3);
let mut output = Vec::new();
decompress_multi_member_parallel(&multi, &mut output, 4).unwrap();
assert_eq!(output.len(), expected.len(), "Size mismatch");
assert_slices_eq!(output, expected, "Content mismatch");
}
#[test]
fn test_multi_member_large() {
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write as IoWrite;
let mut multi = Vec::new();
let mut expected = Vec::new();
let num_members = 10;
for i in 0..num_members {
let part: Vec<u8> = (0..50_000).map(|j| ((i * 17 + j) % 256) as u8).collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&part).unwrap();
multi.extend_from_slice(&encoder.finish().unwrap());
expected.extend_from_slice(&part);
}
assert!(is_multi_member(&multi), "Should detect multi-member");
let mut output = Vec::new();
decompress_multi_member_parallel(&multi, &mut output, 8).unwrap();
assert_eq!(output.len(), expected.len(), "Size mismatch");
assert_slices_eq!(output, expected, "Content mismatch");
}
#[test]
fn bench_production_inflate() {
let _ = crate::tests::datasets::prepare_datasets();
let datasets = [
(
"silesia",
"benchmark_data/silesia-gzip.tar.gz",
"mixed content",
),
(
"software",
"benchmark_data/software.archive.gz",
"source code",
),
("logs", "benchmark_data/logs.txt.gz", "repetitive logs"),
];
const WARMUP: usize = 3;
let iterations: usize = std::env::var("BENCH_RUNS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(10);
eprintln!("\n╔══════════════════════════════════════════════════════════════╗");
eprintln!("║ GZIPPY DECOMPRESSION BENCHMARK ║");
eprintln!("╠══════════════════════════════════════════════════════════════╣");
eprintln!(
"║ Warmup: {} iterations, Measured: {} iterations ║",
WARMUP, iterations
);
eprintln!("╚══════════════════════════════════════════════════════════════╝\n");
for (name, path, desc) in &datasets {
let gz = match std::fs::read(path) {
Ok(d) => d,
Err(_) => {
eprintln!("⚠ Skipping {} - file not found: {}", name, path);
continue;
}
};
let mut pos = 10;
let flg = gz[3];
if (flg & 0x04) != 0 {
let xlen = u16::from_le_bytes([gz[pos], gz[pos + 1]]) as usize;
pos += 2 + xlen;
}
if (flg & 0x08) != 0 {
while pos < gz.len() && gz[pos] != 0 {
pos += 1;
}
pos += 1;
}
if (flg & 0x10) != 0 {
while pos < gz.len() && gz[pos] != 0 {
pos += 1;
}
pos += 1;
}
if (flg & 0x02) != 0 {
pos += 2;
}
let deflate = &gz[pos..gz.len() - 8];
let expected_size = u32::from_le_bytes([
gz[gz.len() - 4],
gz[gz.len() - 3],
gz[gz.len() - 2],
gz[gz.len() - 1],
]) as usize;
let mut output = vec![0u8; expected_size + 1024];
eprintln!(
"┌─ {} ({}) ─────────────────────────────",
name.to_uppercase(),
desc
);
eprintln!(
"│ Size: {:.1} MB uncompressed",
expected_size as f64 / 1_000_000.0
);
for _ in 0..WARMUP {
let _ = inflate_into_pub(deflate, &mut output);
}
let start = std::time::Instant::now();
for _ in 0..iterations {
let _ = inflate_into_pub(deflate, &mut output);
}
let production_speed =
(expected_size * iterations) as f64 / start.elapsed().as_secs_f64() / 1_000_000.0;
for _ in 0..WARMUP {
libdeflater::Decompressor::new()
.deflate_decompress(deflate, &mut output)
.unwrap();
}
let start = std::time::Instant::now();
for _ in 0..iterations {
libdeflater::Decompressor::new()
.deflate_decompress(deflate, &mut output)
.unwrap();
}
let direct_libdeflate_speed =
(expected_size * iterations) as f64 / start.elapsed().as_secs_f64() / 1_000_000.0;
let overhead_pct = (1.0 - production_speed / direct_libdeflate_speed) * 100.0;
eprintln!(
"│ production (inflate_into_pub): {:>8.1} MB/s",
production_speed
);
eprintln!(
"│ direct libdeflater (reference): {:>8.1} MB/s (wrapper overhead: {:.1}%)",
direct_libdeflate_speed, overhead_pct
);
eprintln!("│ NOTE: These are raw deflate numbers. CLI throughput is lower due to");
eprintln!("│ gzip header parsing, mmap, routing, CRC32, and write I/O.");
eprintln!("└────────────────────────────────────────────────\n");
}
}
#[test]
fn bench_analyze() {
use crate::decompress::inflate::consume_first_decode::{
get_block_stats, get_cache_stats, get_spec_cache_stats, get_spec_stats,
get_table_cache_size, reset_cache_stats,
};
let _ = crate::tests::datasets::prepare_datasets();
let datasets = [
(
"silesia",
"benchmark_data/silesia-gzip.tar.gz",
"mixed content",
),
(
"software",
"benchmark_data/software.archive.gz",
"source code",
),
("logs", "benchmark_data/logs.txt.gz", "repetitive logs"),
];
eprintln!("\n╔══════════════════════════════════════════════════════════════╗");
eprintln!("║ GZIPPY DECOMPRESSION ANALYSIS ║");
eprintln!("╠══════════════════════════════════════════════════════════════╣");
eprintln!("║ Block types, cache stats, path usage ║");
eprintln!("╚══════════════════════════════════════════════════════════════╝\n");
for (name, path, desc) in &datasets {
let gz = match std::fs::read(path) {
Ok(d) => d,
Err(_) => {
eprintln!(" Skipping {} - file not found: {}", name, path);
continue;
}
};
let mut pos = 10;
let flg = gz[3];
if (flg & 0x04) != 0 {
let xlen = u16::from_le_bytes([gz[pos], gz[pos + 1]]) as usize;
pos += 2 + xlen;
}
if (flg & 0x08) != 0 {
while pos < gz.len() && gz[pos] != 0 {
pos += 1;
}
pos += 1;
}
if (flg & 0x10) != 0 {
while pos < gz.len() && gz[pos] != 0 {
pos += 1;
}
pos += 1;
}
if (flg & 0x02) != 0 {
pos += 2;
}
let deflate = &gz[pos..gz.len() - 8];
let expected_size = u32::from_le_bytes([
gz[gz.len() - 4],
gz[gz.len() - 3],
gz[gz.len() - 2],
gz[gz.len() - 1],
]) as usize;
let mut output = vec![0u8; expected_size + 1024];
reset_cache_stats();
let start = std::time::Instant::now();
let _ = inflate_into_pub(deflate, &mut output);
let elapsed = start.elapsed();
let block_stats = get_block_stats();
let (cache_hits, cache_misses, cache_rate) = get_cache_stats();
let (spec_used, spec_fallback) = get_spec_stats();
let speed = expected_size as f64 / elapsed.as_secs_f64() / 1_000_000.0;
eprintln!(
"┌─ {} ({}) ─────────────────────────────",
name.to_uppercase(),
desc
);
eprintln!("│");
eprintln!(
"│ Size: {:.2} MB compressed → {:.2} MB uncompressed",
deflate.len() as f64 / 1_000_000.0,
expected_size as f64 / 1_000_000.0
);
eprintln!("│ Speed: {:.1} MB/s", speed);
eprintln!("│");
let total_blocks = block_stats.total_blocks();
eprintln!("│ BLOCK TYPES ({} total):", total_blocks);
if block_stats.stored_blocks > 0 {
let pct = block_stats.stored_blocks as f64 / total_blocks as f64 * 100.0;
let bytes_pct = block_stats.stored_bytes as f64 / expected_size as f64 * 100.0;
eprintln!(
"│ Stored: {:>5} blocks ({:>5.1}%) → {:>10} bytes ({:>5.1}%)",
block_stats.stored_blocks, pct, block_stats.stored_bytes, bytes_pct
);
}
if block_stats.fixed_blocks > 0 {
let pct = block_stats.fixed_blocks as f64 / total_blocks as f64 * 100.0;
let bytes_pct = block_stats.fixed_bytes as f64 / expected_size as f64 * 100.0;
eprintln!(
"│ Fixed: {:>5} blocks ({:>5.1}%) → {:>10} bytes ({:>5.1}%)",
block_stats.fixed_blocks, pct, block_stats.fixed_bytes, bytes_pct
);
}
if block_stats.dynamic_blocks > 0 {
let pct = block_stats.dynamic_blocks as f64 / total_blocks as f64 * 100.0;
let bytes_pct = block_stats.dynamic_bytes as f64 / expected_size as f64 * 100.0;
eprintln!(
"│ Dynamic: {:>5} blocks ({:>5.1}%) → {:>10} bytes ({:>5.1}%)",
block_stats.dynamic_blocks, pct, block_stats.dynamic_bytes, bytes_pct
);
}
eprintln!("│");
let total_cache = cache_hits + cache_misses;
let cache_size = get_table_cache_size();
if total_cache > 0 {
eprintln!("│ TABLE CACHE:");
eprintln!(
"│ Hits: {:>5} ({:.1}%)",
cache_hits,
cache_rate * 100.0
);
eprintln!("│ Misses: {:>5}", cache_misses);
eprintln!("│ Unique: {:>5} fingerprints", cache_size);
eprintln!("│");
}
let total_spec = spec_used + spec_fallback;
let (spec_decoders, spec_failed, spec_total_uses, spec_max_uses) =
get_spec_cache_stats();
if total_spec > 0 {
let spec_rate = spec_used as f64 / total_spec as f64 * 100.0;
eprintln!("│ DECODE PATH:");
eprintln!("│ Specialized: {:>5} ({:.1}%)", spec_used, spec_rate);
eprintln!("│ Generic: {:>5}", spec_fallback);
if spec_decoders > 0 || spec_failed > 0 {
eprintln!("│ SPEC CACHE:");
eprintln!("│ Decoders: {:>5} unique tables", spec_decoders);
eprintln!("│ Failed: {:>5} (tables too complex)", spec_failed);
if spec_decoders > 0 {
let avg_uses = spec_total_uses as f64 / spec_decoders as f64;
eprintln!(
"│ Reuse: {:>5.1}x avg, {}x max",
avg_uses, spec_max_uses
);
}
}
eprintln!("│");
}
let dominant_type = if block_stats.dynamic_bytes > block_stats.fixed_bytes
&& block_stats.dynamic_bytes > block_stats.stored_bytes
{
"dynamic"
} else if block_stats.fixed_bytes > block_stats.stored_bytes {
"fixed"
} else {
"stored"
};
eprintln!("│ CHARACTERISTICS:");
eprintln!("│ Dominant block type: {}", dominant_type);
let compression_ratio = deflate.len() as f64 / expected_size as f64;
eprintln!(
"│ Compression ratio: {:.2}x ({:.1}% of original)",
1.0 / compression_ratio,
compression_ratio * 100.0
);
eprintln!("└────────────────────────────────────────────────\n");
}
eprintln!("╔══════════════════════════════════════════════════════════════╗");
eprintln!("║ OPTIMIZATION NOTES ║");
eprintln!("╠══════════════════════════════════════════════════════════════╣");
eprintln!("║ - Dynamic blocks: libdeflate-style decode (fastest) ║");
eprintln!("║ - Fixed blocks: need optimization (currently slower) ║");
eprintln!("║ - High cache hit rate: table reuse working ║");
eprintln!("║ - Low cache hit rate: consider fingerprint tuning ║");
eprintln!("╚══════════════════════════════════════════════════════════════╝\n");
}
#[test]
fn bench_profile() {
use crate::decompress::inflate::consume_first_decode::{
get_timing_stats, reset_cache_stats,
};
let _ = crate::tests::datasets::prepare_datasets();
let datasets = [
(
"silesia",
"benchmark_data/silesia-gzip.tar.gz",
"mixed content",
),
(
"software",
"benchmark_data/software.archive.gz",
"source code",
),
("logs", "benchmark_data/logs.txt.gz", "repetitive logs"),
];
eprintln!("\n╔══════════════════════════════════════════════════════════════╗");
eprintln!("║ GZIPPY TIMING PROFILE ║");
eprintln!("╠══════════════════════════════════════════════════════════════╣");
eprintln!("║ Breakdown of table building vs decode time ║");
eprintln!("╚══════════════════════════════════════════════════════════════╝\n");
for (name, path, desc) in &datasets {
let gz = match std::fs::read(path) {
Ok(d) => d,
Err(_) => {
eprintln!("⚠ Skipping {} - file not found: {}", name, path);
continue;
}
};
let mut pos = 10;
let flg = gz[3];
if (flg & 0x04) != 0 {
let xlen = u16::from_le_bytes([gz[pos], gz[pos + 1]]) as usize;
pos += 2 + xlen;
}
if (flg & 0x08) != 0 {
while pos < gz.len() && gz[pos] != 0 {
pos += 1;
}
pos += 1;
}
if (flg & 0x10) != 0 {
while pos < gz.len() && gz[pos] != 0 {
pos += 1;
}
pos += 1;
}
if (flg & 0x02) != 0 {
pos += 2;
}
let deflate = &gz[pos..gz.len() - 8];
let expected_size = u32::from_le_bytes([
gz[gz.len() - 4],
gz[gz.len() - 3],
gz[gz.len() - 2],
gz[gz.len() - 1],
]) as usize;
let mut output = vec![0u8; expected_size + 1024];
reset_cache_stats();
let start = std::time::Instant::now();
let _ = inflate_into_pub(deflate, &mut output);
let total_time = start.elapsed();
let timing = get_timing_stats();
let total_nanos = total_time.as_nanos() as f64;
let table_pct = timing.table_build_nanos as f64 / total_nanos * 100.0;
let decode_pct = timing.decode_nanos as f64 / total_nanos * 100.0;
let other_pct = 100.0 - table_pct - decode_pct;
let avg_table_us = if timing.table_build_count > 0 {
timing.table_build_nanos as f64 / timing.table_build_count as f64 / 1000.0
} else {
0.0
};
let avg_decode_us = if timing.decode_count > 0 {
timing.decode_nanos as f64 / timing.decode_count as f64 / 1000.0
} else {
0.0
};
eprintln!(
"┌─ {} ({}) ─────────────────────────────",
name.to_uppercase(),
desc
);
eprintln!(
"│ Size: {:.1} MB, Total time: {:.1}ms",
expected_size as f64 / 1_000_000.0,
total_time.as_secs_f64() * 1000.0
);
eprintln!("│");
eprintln!("│ TIME BREAKDOWN:");
eprintln!(
"│ Table building: {:>6.1}ms ({:>5.1}%) - {} tables, {:.1}µs avg",
timing.table_build_nanos as f64 / 1_000_000.0,
table_pct,
timing.table_build_count,
avg_table_us
);
eprintln!(
"│ Decoding: {:>6.1}ms ({:>5.1}%) - {} blocks, {:.1}µs avg",
timing.decode_nanos as f64 / 1_000_000.0,
decode_pct,
timing.decode_count,
avg_decode_us
);
eprintln!(
"│ Other/overhead: {:>6.1}ms ({:>5.1}%)",
(total_nanos - timing.table_build_nanos as f64 - timing.decode_nanos as f64)
/ 1_000_000.0,
other_pct
);
eprintln!(
"│ Speed: {:.1} MB/s",
expected_size as f64 / total_time.as_secs_f64() / 1_000_000.0
);
eprintln!("└────────────────────────────────────────────────\n");
}
}
}
#[cfg(test)]
mod optimization_tests {
use super::*;
fn assert_bytes_eq(actual: &[u8], expected: &[u8], context: &str) {
if actual == expected {
return;
}
let len_match = actual.len() == expected.len();
let first_diff = actual
.iter()
.zip(expected.iter())
.enumerate()
.find(|(_, (a, b))| a != b)
.map(|(i, _)| i);
let mut msg = format!("\n{} - byte mismatch:\n", context);
msg.push_str(&format!(
" lengths: actual={}, expected={}\n",
actual.len(),
expected.len()
));
if let Some(pos) = first_diff {
msg.push_str(&format!(" first diff at byte {}\n", pos));
msg.push_str(&format!(
" actual[{}]={:#04x}, expected[{}]={:#04x}\n",
pos, actual[pos], pos, expected[pos]
));
let start = pos.saturating_sub(10);
let end = (pos + 20).min(actual.len()).min(expected.len());
if end > start {
let actual_ctx: String = actual[start..end]
.iter()
.map(|&b| {
if b.is_ascii_graphic() || b == b' ' {
b as char
} else {
'.'
}
})
.collect();
let expected_ctx: String = expected[start..end]
.iter()
.map(|&b| {
if b.is_ascii_graphic() || b == b' ' {
b as char
} else {
'.'
}
})
.collect();
msg.push_str(&format!(
" actual [{}..{}]: \"{}\"\n",
start, end, actual_ctx
));
msg.push_str(&format!(
" expected[{}..{}]: \"{}\"\n",
start, end, expected_ctx
));
}
} else if !len_match {
msg.push_str(" content matches up to shorter length\n");
}
panic!("{}", msg);
}
#[test]
fn test_saved_bitbuf_extra_bits() {
let data: [u8; 16] = [
0b11010101, 0b10101010, 0b11001100, 0b00110011, 0xFF, 0x00, 0xAA, 0x55, 0x12, 0x34,
0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0,
];
let mut bits = TurboBits::new(&data);
bits.ensure(32);
let saved = bits.buffer();
let entry_bits = 8u32;
bits.consume(entry_bits);
let extra_shift = entry_bits;
let extra_mask = (1u64 << 5) - 1;
let extra = ((saved >> extra_shift) & extra_mask) as u32;
let expected = 0b10101010u64 & 0b11111;
assert_eq!(
extra, expected as u32,
"saved_bitbuf extra extraction failed: got {:05b}, expected {:05b}",
extra, expected
);
}
#[test]
fn test_saved_bitbuf_length_decode() {
let data: [u8; 8] = [0b10101011, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF];
let mut bits = TurboBits::new(&data);
bits.ensure(16);
let saved = bits.buffer();
let codeword_len = 7u32;
let extra_count = 1u32;
let length_base = 7u32;
bits.consume(codeword_len + extra_count);
let extra_bits = (saved >> codeword_len) & ((1 << extra_count) - 1);
let length = length_base + extra_bits as u32;
assert_eq!(length, 8, "Length decode with saved_bitbuf failed");
}
#[test]
fn test_unconditional_copy_short_match() {
let mut output = vec![0u8; 1024];
for i in 0..8 {
output[i] = (i as u8) + 1;
}
let out_pos = copy_match_into(&mut output, 100, 100, 5);
assert_eq!(out_pos, 105);
for i in 0..5 {
assert_eq!(output[100 + i], output[i], "Mismatch at byte {}", i);
}
}
#[test]
fn test_unconditional_copy_non_overlapping() {
let mut output = vec![0u8; 1024];
for i in 0..100 {
output[i] = (i as u8).wrapping_mul(7);
}
let src_start = 10;
let dst_start = 200;
let length = 35;
let out_pos = copy_match_into(&mut output, dst_start, dst_start - src_start, length);
assert_eq!(out_pos, dst_start + length);
for i in 0..length {
assert_eq!(
output[dst_start + i],
output[src_start + i],
"Copy mismatch at offset {}",
i
);
}
}
#[test]
fn test_rle_optimization() {
let mut output = vec![0u8; 1024];
output[50] = 0xAA;
let out_pos = copy_match_into(&mut output, 51, 1, 100);
assert_eq!(out_pos, 151);
for i in 51..151 {
assert_eq!(output[i], 0xAA, "RLE mismatch at position {}", i);
}
}
#[test]
fn test_table_fingerprint_identical() {
let lens1: Vec<u8> = vec![8, 8, 8, 8, 7, 7, 7, 7, 6, 6];
let lens2: Vec<u8> = vec![8, 8, 8, 8, 7, 7, 7, 7, 6, 6];
fn fingerprint(lens: &[u8]) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
lens.hash(&mut hasher);
hasher.finish()
}
let fp1 = fingerprint(&lens1);
let fp2 = fingerprint(&lens2);
assert_eq!(
fp1, fp2,
"Identical code lengths should have same fingerprint"
);
}
#[test]
fn test_table_fingerprint_different() {
let lens1: Vec<u8> = vec![8, 8, 8, 8, 7, 7, 7, 7];
let lens2: Vec<u8> = vec![8, 8, 8, 7, 7, 7, 7, 7];
fn fingerprint(lens: &[u8]) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
lens.hash(&mut hasher);
hasher.finish()
}
let fp1 = fingerprint(&lens1);
let fp2 = fingerprint(&lens2);
assert_ne!(
fp1, fp2,
"Different code lengths should have different fingerprints"
);
}
#[test]
#[cfg(target_arch = "x86_64")]
fn test_bmi2_equivalence() {
if !is_x86_feature_detected!("bmi2") {
eprintln!("BMI2 not available, skipping test");
return;
}
let original: Vec<u8> = (0..10000).map(|i| ((i * 7) % 256) as u8).collect();
use crate::assert_slices_eq;
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let header_size =
crate::decompress::parallel::marker_decode::skip_gzip_header(&compressed).unwrap();
let deflate_data = &compressed[header_size..compressed.len() - 8];
let mut output = vec![0u8; original.len() + 1024];
let size = inflate_into_pub(deflate_data, &mut output).unwrap();
output.truncate(size);
assert_slices_eq!(output, original, "BMI2 path produced different output");
}
#[test]
fn test_variable_shift() {
let value: u64 = 0xDEADBEEFCAFEBABE;
for shift in 0..64u32 {
let generic = value >> shift;
let simulated_bmi2 = value.wrapping_shr(shift);
assert_eq!(
generic, simulated_bmi2,
"Shift mismatch for shift={}",
shift
);
}
}
#[test]
fn test_11bit_table_with_subtable() {
let mut lens = vec![0u8; 288];
for i in 0..256 {
lens[i] = 8;
}
lens[256] = 7;
for i in 257..288 {
lens[i] = if i < 280 { 8 } else { 12 };
}
let table = TwoLevelTable::build(&lens).unwrap();
let test_bits: u64 = 0b111111111111;
let (symbol, code_len) = table.decode(test_bits);
assert!(code_len > 0, "Should decode valid code");
assert!(symbol < 288, "Symbol should be in range");
}
#[test]
fn test_minimal_refill() {
let data: Vec<u8> = (0..1000).map(|i| (i % 256) as u8).collect();
let mut bits = TurboBits::new(&data);
assert!(bits.has_bits(56), "Initial refill should give 56+ bits");
bits.consume(40);
assert!(bits.has_bits(16), "Should have 16+ bits after consuming 40");
bits.ensure(20);
assert!(bits.has_bits(20), "Ensure should work");
}
#[test]
fn test_preload_before_consume() {
let data: Vec<u8> = (0..1000).map(|i| (i % 256) as u8).collect();
let mut bits = TurboBits::new(&data);
bits.ensure(56);
let current_bits = bits.buffer();
let _next_entry_preview = current_bits >> 12;
bits.consume(12);
let actual_next = bits.buffer() & 0xFFF;
assert_eq!(
_next_entry_preview & 0xFFF,
actual_next,
"Preload should match actual next bits"
);
}
#[test]
fn test_optimized_decode_correctness() {
let original: Vec<u8> = {
let mut data = Vec::with_capacity(100_000);
for i in 0..100_000 {
let byte = match i % 100 {
0..=30 => (i * 17 % 256) as u8,
31..=60 => 0xAA,
61..=80 => ((i / 100) % 256) as u8,
_ => 0x00,
};
data.push(byte);
}
data
};
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
let mut encoder = GzEncoder::new(Vec::new(), Compression::best());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let header_size =
crate::decompress::parallel::marker_decode::skip_gzip_header(&compressed).unwrap();
let deflate_data = &compressed[header_size..compressed.len() - 8];
let mut output = vec![0u8; original.len() + 1024];
let size = inflate_into_pub(deflate_data, &mut output).unwrap();
output.truncate(size);
assert_eq!(output.len(), original.len(), "Size mismatch");
let mismatches: Vec<usize> = original
.iter()
.zip(output.iter())
.enumerate()
.filter(|(_, (a, b))| a != b)
.map(|(i, _)| i)
.take(10)
.collect();
assert!(
mismatches.is_empty(),
"Content mismatch at positions: {:?}",
mismatches
);
}
#[test]
fn bench_pure_literals() {
let original: Vec<u8> = (0..50_000).map(|i| ((i * 17 + 31) % 256) as u8).collect();
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
let mut encoder = GzEncoder::new(Vec::new(), Compression::fast());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let header_size =
crate::decompress::parallel::marker_decode::skip_gzip_header(&compressed).unwrap();
let deflate_data = &compressed[header_size..compressed.len() - 8];
let mut output = vec![0u8; original.len() + 1024];
for _ in 0..3 {
let _ = inflate_into_pub(deflate_data, &mut output);
}
let iterations = 50;
let start = std::time::Instant::now();
for _ in 0..iterations {
let _ = inflate_into_pub(deflate_data, &mut output);
}
let elapsed = start.elapsed();
let bytes_per_iter = original.len();
let total_bytes = bytes_per_iter * iterations;
let mb_per_sec = total_bytes as f64 / elapsed.as_secs_f64() / 1_000_000.0;
eprintln!(
"\n[BENCH] Pure Literals: {} iterations, {} bytes each",
iterations, bytes_per_iter
);
eprintln!(
"[BENCH] Time: {:.2}ms total, {:.1} MB/s",
elapsed.as_secs_f64() * 1000.0,
mb_per_sec
);
eprintln!(
"[BENCH] Compression ratio: {:.2}x",
original.len() as f64 / deflate_data.len() as f64
);
}
#[test]
fn bench_rle_matches() {
let mut original = Vec::with_capacity(50_000);
for i in 0..100 {
let byte = (i % 256) as u8;
for _ in 0..500 {
original.push(byte);
}
}
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
let mut encoder = GzEncoder::new(Vec::new(), Compression::best());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let header_size =
crate::decompress::parallel::marker_decode::skip_gzip_header(&compressed).unwrap();
let deflate_data = &compressed[header_size..compressed.len() - 8];
let mut output = vec![0u8; original.len() + 1024];
for _ in 0..3 {
let _ = inflate_into_pub(deflate_data, &mut output);
}
let iterations = 100;
let start = std::time::Instant::now();
for _ in 0..iterations {
let _ = inflate_into_pub(deflate_data, &mut output);
}
let elapsed = start.elapsed();
let mb_per_sec = (original.len() * iterations) as f64 / elapsed.as_secs_f64() / 1_000_000.0;
eprintln!("\n[BENCH] RLE Matches (d=1): {} iterations", iterations);
eprintln!(
"[BENCH] Time: {:.2}ms total, {:.1} MB/s",
elapsed.as_secs_f64() * 1000.0,
mb_per_sec
);
eprintln!(
"[BENCH] Compression ratio: {:.2}x",
original.len() as f64 / deflate_data.len() as f64
);
}
#[test]
fn bench_short_distance_matches() {
let pattern = [0xDE, 0xAD, 0xBE, 0xEF];
let original: Vec<u8> = pattern.iter().cycle().take(50_000).copied().collect();
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
let mut encoder = GzEncoder::new(Vec::new(), Compression::best());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let header_size =
crate::decompress::parallel::marker_decode::skip_gzip_header(&compressed).unwrap();
let deflate_data = &compressed[header_size..compressed.len() - 8];
let mut output = vec![0u8; original.len() + 1024];
for _ in 0..3 {
let _ = inflate_into_pub(deflate_data, &mut output);
}
let iterations = 100;
let start = std::time::Instant::now();
for _ in 0..iterations {
let _ = inflate_into_pub(deflate_data, &mut output);
}
let elapsed = start.elapsed();
let mb_per_sec = (original.len() * iterations) as f64 / elapsed.as_secs_f64() / 1_000_000.0;
eprintln!(
"\n[BENCH] Short Distance Matches (d=2-7): {} iterations",
iterations
);
eprintln!(
"[BENCH] Time: {:.2}ms total, {:.1} MB/s",
elapsed.as_secs_f64() * 1000.0,
mb_per_sec
);
eprintln!(
"[BENCH] Compression ratio: {:.2}x",
original.len() as f64 / deflate_data.len() as f64
);
}
#[test]
fn bench_long_distance_matches() {
let mut original = Vec::with_capacity(50_000);
let base: Vec<u8> = (0..500).map(|i| (i * 7 % 256) as u8).collect();
for _ in 0..100 {
original.extend_from_slice(&base);
}
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
let mut encoder = GzEncoder::new(Vec::new(), Compression::best());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let header_size =
crate::decompress::parallel::marker_decode::skip_gzip_header(&compressed).unwrap();
let deflate_data = &compressed[header_size..compressed.len() - 8];
let mut output = vec![0u8; original.len() + 1024];
for _ in 0..3 {
let _ = inflate_into_pub(deflate_data, &mut output);
}
let iterations = 100;
let start = std::time::Instant::now();
for _ in 0..iterations {
let _ = inflate_into_pub(deflate_data, &mut output);
}
let elapsed = start.elapsed();
let mb_per_sec = (original.len() * iterations) as f64 / elapsed.as_secs_f64() / 1_000_000.0;
eprintln!(
"\n[BENCH] Long Distance Matches (d>=40): {} iterations",
iterations
);
eprintln!(
"[BENCH] Time: {:.2}ms total, {:.1} MB/s",
elapsed.as_secs_f64() * 1000.0,
mb_per_sec
);
eprintln!(
"[BENCH] Compression ratio: {:.2}x",
original.len() as f64 / deflate_data.len() as f64
);
}
#[test]
fn bench_table_lookup_speed() {
let table: Vec<u32> = (0..4096).map(|i| i as u32 * 0x12345).collect();
let lookups: Vec<u64> = (0..100_000).map(|i| (i * 7919) % 4096).collect();
let iterations = 1000;
let start = std::time::Instant::now();
let mut sum = 0u64;
for _ in 0..iterations {
for &idx in &lookups {
sum = sum.wrapping_add(table[(idx & 0xFFF) as usize] as u64);
}
}
let elapsed = start.elapsed();
let lookups_per_sec =
(lookups.len() * iterations) as f64 / elapsed.as_secs_f64() / 1_000_000.0;
eprintln!(
"\n[BENCH] Table Lookup: {:.1} M lookups/sec",
lookups_per_sec
);
eprintln!("[BENCH] (sum={} to prevent optimization)", sum % 1000);
}
#[test]
fn bench_branch_pattern() {
let entries: Vec<u32> = (0..100_000)
.map(|i| {
if (i * 7919) % 10 < 7 {
0x8000_0008 | ((i as u32 & 0xFF) << 16) } else {
0x0000_0008 | ((i as u32 & 0xFF) << 16) }
})
.collect();
let iterations = 1000;
let start = std::time::Instant::now();
let mut sum = 0u64;
let mut bits_consumed = 0u64;
for _ in 0..iterations {
for &entry in &entries {
if (entry as i32) < 0 {
bits_consumed += (entry & 0xFF) as u64;
sum += ((entry >> 16) & 0xFF) as u64;
} else {
bits_consumed += (entry & 0xFF) as u64;
sum += entry as u64;
}
}
}
let elapsed1 = start.elapsed();
let start = std::time::Instant::now();
let mut sum2 = 0u64;
let mut bits_consumed2 = 0u64;
for _ in 0..iterations {
for &entry in &entries {
bits_consumed2 += (entry & 0xFF) as u64;
if (entry as i32) < 0 {
sum2 += ((entry >> 16) & 0xFF) as u64;
} else {
sum2 += entry as u64;
}
}
}
let elapsed2 = start.elapsed();
eprintln!("\n[BENCH] Branch Pattern Comparison:");
eprintln!(
"[BENCH] Check-first (ours): {:.2}ms",
elapsed1.as_secs_f64() * 1000.0
);
eprintln!(
"[BENCH] Consume-first (libdeflate): {:.2}ms",
elapsed2.as_secs_f64() * 1000.0
);
eprintln!(
"[BENCH] Consume-first speedup: {:.0}%",
(elapsed1.as_secs_f64() / elapsed2.as_secs_f64() - 1.0) * 100.0
);
eprintln!(
"[BENCH] (sums: {} {} bits: {} {})",
sum % 1000,
sum2 % 1000,
bits_consumed % 1000,
bits_consumed2 % 1000
);
}
#[test]
fn bench_bit_extraction() {
let words: Vec<u64> = (0..100_000).map(|i| i * 0x12345678ABCD).collect();
let counts: Vec<u32> = (0..100_000).map(|i| (i % 15 + 1) as u32).collect();
let iterations = 500;
let start = std::time::Instant::now();
let mut sum = 0u64;
for _ in 0..iterations {
for (&word, &count) in words.iter().zip(counts.iter()) {
sum = sum.wrapping_add(word & ((1u64 << count) - 1));
}
}
let elapsed_mask = start.elapsed();
let start = std::time::Instant::now();
let mut sum2 = 0u64;
for _ in 0..iterations {
for (&word, &count) in words.iter().zip(counts.iter()) {
sum2 = sum2.wrapping_add(word & (1u64.wrapping_shl(count).wrapping_sub(1)));
}
}
let elapsed_wrap = start.elapsed();
#[cfg(all(target_arch = "x86_64", target_feature = "bmi2"))]
let elapsed_bmi2 = {
use std::arch::x86_64::_bzhi_u64;
let start = std::time::Instant::now();
let mut sum3 = 0u64;
for _ in 0..iterations {
for (&word, &count) in words.iter().zip(counts.iter()) {
sum3 = sum3.wrapping_add(unsafe { _bzhi_u64(word, count) });
}
}
let _ = sum3; start.elapsed()
};
eprintln!("\n[BENCH] Bit Extraction Methods:");
eprintln!(
"[BENCH] Mask (standard): {:.2}ms",
elapsed_mask.as_secs_f64() * 1000.0
);
eprintln!(
"[BENCH] Wrapping ops: {:.2}ms",
elapsed_wrap.as_secs_f64() * 1000.0
);
#[cfg(all(target_arch = "x86_64", target_feature = "bmi2"))]
eprintln!(
"[BENCH] BMI2 bzhi: {:.2}ms",
elapsed_bmi2.as_secs_f64() * 1000.0
);
#[cfg(not(all(target_arch = "x86_64", target_feature = "bmi2")))]
eprintln!("[BENCH] BMI2 bzhi: (not available on this CPU)");
eprintln!(
"[BENCH] (sums: {} {} to prevent optimization)",
sum % 1000,
sum2 % 1000
);
}
#[test]
fn bench_subtable_vs_fallback() {
let main_table: Vec<u32> = (0..4096)
.map(|i| {
if i % 20 == 0 {
0x4000 | ((i as u32) << 16) | (4 << 8) | 12 } else {
0x8000_0008 | ((i as u32 & 0xFF) << 16) }
})
.collect();
let subtable: Vec<u32> = (0..256).map(|i| 0x8000_0004 | (i << 16)).collect();
let bits_sequence: Vec<u64> = (0..100_000)
.map(|i| (i * 0x12345678) % 0xFFFFFFFF)
.collect();
let iterations = 500;
let start = std::time::Instant::now();
let mut sum = 0u64;
for _ in 0..iterations {
for &bits in &bits_sequence {
let idx = (bits & 0xFFF) as usize;
let entry = main_table[idx];
if entry & 0xFF == 0 {
sum = sum.wrapping_add(bits & 0xFF);
} else {
sum = sum.wrapping_add(entry as u64);
}
}
}
let elapsed_fallback = start.elapsed();
let start = std::time::Instant::now();
let mut sum2 = 0u64;
for _ in 0..iterations {
for &bits in &bits_sequence {
let idx = (bits & 0xFFF) as usize;
let entry = main_table[idx];
if entry & 0x4000 != 0 {
let subtable_idx = (entry >> 16) as usize;
let extra_bits = (entry >> 8) & 0x3F;
let sub_idx = (bits >> 12) & ((1 << extra_bits) - 1);
let sub_entry = subtable[(subtable_idx + sub_idx as usize) % subtable.len()];
sum2 = sum2.wrapping_add(sub_entry as u64);
} else {
sum2 = sum2.wrapping_add(entry as u64);
}
}
}
let elapsed_subtable = start.elapsed();
eprintln!("\n[BENCH] Subtable vs Fallback (5% long codes):");
eprintln!(
"[BENCH] Fallback (current): {:.2}ms",
elapsed_fallback.as_secs_f64() * 1000.0
);
eprintln!(
"[BENCH] Subtable (libdeflate): {:.2}ms",
elapsed_subtable.as_secs_f64() * 1000.0
);
eprintln!(
"[BENCH] Ratio: {:.2}x",
elapsed_fallback.as_secs_f64() / elapsed_subtable.as_secs_f64()
);
eprintln!(
"[BENCH] (sums: {} {} to prevent opt)",
sum % 1000,
sum2 % 1000
);
}
#[test]
fn bench_jit_table_cache() {
use std::collections::HashMap;
let code_lengths: Vec<Vec<u8>> = (0..100)
.map(|seed| {
(0..288)
.map(|i| {
let base = (seed * 7 + i * 3) % 15;
base.clamp(1, 15) as u8
})
.collect()
})
.collect();
let patterns: Vec<&Vec<u8>> = code_lengths.iter().take(10).collect();
let queries: Vec<usize> = (0..10000).map(|i| i % 10).collect();
let iterations = 50;
let start = std::time::Instant::now();
let mut sum = 0u64;
for _ in 0..iterations {
for &pattern_idx in &queries {
let lens = &patterns[pattern_idx];
let hash: u64 = lens.iter().map(|&b| b as u64).sum();
sum = sum.wrapping_add(hash);
}
}
let elapsed_rebuild = start.elapsed();
let start = std::time::Instant::now();
let mut sum2 = 0u64;
let mut cache: HashMap<u64, u64> = HashMap::new();
for _ in 0..iterations {
for &pattern_idx in &queries {
let lens = &patterns[pattern_idx];
let fingerprint: u64 = lens
.iter()
.enumerate()
.map(|(i, &b)| (b as u64) << (i % 8))
.fold(0, |a, b| a ^ b);
let value = *cache.entry(fingerprint).or_insert_with(|| {
lens.iter().map(|&b| b as u64).sum()
});
sum2 = sum2.wrapping_add(value);
}
}
let elapsed_cached = start.elapsed();
eprintln!("\n[BENCH] JIT Table Cache (10 unique patterns):");
eprintln!(
"[BENCH] Always rebuild: {:.2}ms",
elapsed_rebuild.as_secs_f64() * 1000.0
);
eprintln!(
"[BENCH] Fingerprint cache: {:.2}ms",
elapsed_cached.as_secs_f64() * 1000.0
);
eprintln!(
"[BENCH] Speedup: {:.1}x",
elapsed_rebuild.as_secs_f64() / elapsed_cached.as_secs_f64()
);
eprintln!(
"[BENCH] (sums: {} {} to prevent opt)",
sum % 1000,
sum2 % 1000
);
}
#[test]
fn bench_multi_symbol_decode() {
let single_table: Vec<u32> = (0..4096)
.map(|i| {
((i & 0xFF) as u32) << 16 | 8 })
.collect();
let multi_table: Vec<u64> = (0..4096)
.map(|i| {
if i % 10 < 6 {
let sym1 = (i & 0xFF) as u64;
let sym2 = ((i >> 4) & 0xFF) as u64;
(sym1 << 56) | (8 << 52) | (sym2 << 44) | (8 << 40) | (16) | (1 << 8)
} else {
let sym1 = (i & 0xFF) as u64;
(sym1 << 56) | (8 << 52) | 8 }
})
.collect();
let indices: Vec<usize> = (0..100_000).map(|i| (i * 7919) % 4096).collect();
let iterations = 500;
let start = std::time::Instant::now();
let mut sum = 0u64;
let mut decoded = 0u64;
for _ in 0..iterations {
for &idx in &indices {
let entry = single_table[idx];
sum = sum.wrapping_add((entry >> 16) as u64);
decoded += 1;
}
}
let elapsed_single = start.elapsed();
let start = std::time::Instant::now();
let mut sum2 = 0u64;
let mut decoded2 = 0u64;
for _ in 0..iterations {
for &idx in &indices {
let entry = multi_table[idx];
let sym1 = entry >> 56;
sum2 = sum2.wrapping_add(sym1);
decoded2 += 1;
if entry & (1 << 8) != 0 {
let sym2 = (entry >> 44) & 0xFF;
sum2 = sum2.wrapping_add(sym2);
decoded2 += 1;
}
}
}
let elapsed_multi = start.elapsed();
eprintln!("\n[BENCH] Multi-Symbol Decode (60% doubles):");
eprintln!(
"[BENCH] Single symbol: {:.2}ms ({} symbols)",
elapsed_single.as_secs_f64() * 1000.0,
decoded
);
eprintln!(
"[BENCH] Multi symbol: {:.2}ms ({} symbols)",
elapsed_multi.as_secs_f64() * 1000.0,
decoded2
);
eprintln!(
"[BENCH] Symbols/ms single: {:.0}",
decoded as f64 / elapsed_single.as_secs_f64() / 1000.0
);
eprintln!(
"[BENCH] Symbols/ms multi: {:.0}",
decoded2 as f64 / elapsed_multi.as_secs_f64() / 1000.0
);
eprintln!(
"[BENCH] (sums: {} {} to prevent opt)",
sum % 1000,
sum2 % 1000
);
}
#[test]
fn test_consume_first_integration() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original = b"Hello World! This is a test of the consume-first decoder. ".repeat(50);
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let mut libdeflate_out = vec![0u8; original.len()];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
eprintln!("\n[TEST] Consume-first integration test:");
eprintln!("[TEST] Original: {} bytes", original.len());
eprintln!("[TEST] Compressed: {} bytes", compressed.len());
eprintln!("[TEST] libdeflate output: {} bytes", libdeflate_size);
assert_eq!(&libdeflate_out[..libdeflate_size], &original[..]);
eprintln!("[TEST] ✓ libdeflate matches original");
}
#[test]
fn test_consume_first_entry_debug() {
use crate::decompress::inflate::consume_first_table::ConsumeFirstTable;
let mut lit_len_lengths = vec![0u8; 288];
lit_len_lengths[..144].fill(8);
lit_len_lengths[144..256].fill(9);
lit_len_lengths[256] = 7; lit_len_lengths[257..280].fill(7); lit_len_lengths[280..288].fill(8);
let lit_table = ConsumeFirstTable::build(&lit_len_lengths).unwrap();
eprintln!("\n[DEBUG] ConsumeFirstTable entry types:");
let mut literal_count = 0;
let mut length_count = 0;
let mut eob_count = 0;
let mut subtable_count = 0;
for pattern in 0..2048u64 {
let entry = lit_table.lookup_main(pattern);
if entry.is_literal() {
literal_count += 1;
} else if entry.is_length() {
length_count += 1;
} else if entry.is_eob() {
eob_count += 1;
} else if entry.is_subtable() {
subtable_count += 1;
}
}
eprintln!("[DEBUG] Literals: {}", literal_count);
eprintln!("[DEBUG] Lengths: {}", length_count);
eprintln!("[DEBUG] EOB: {}", eob_count);
eprintln!("[DEBUG] Subtables: {}", subtable_count);
for pattern in 0u64..16 {
let entry = lit_table.lookup_main(pattern);
eprintln!(
"[DEBUG] Pattern {:3}: sym={:3}, bits={}, lit={}, len={}, eob={}, sub={}",
pattern,
entry.symbol(),
entry.bits(),
entry.is_literal(),
entry.is_length(),
entry.is_eob(),
entry.is_subtable()
);
}
}
#[test]
fn bench_consume_first_vs_turbo() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original: Vec<u8> = (0..50_000).map(|i| (i % 256) as u8).collect();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
let iterations = 100;
let start = std::time::Instant::now();
for _ in 0..iterations {
let mut out = vec![0u8; original.len()];
super::inflate_into_pub(&compressed, &mut out).unwrap();
}
let elapsed_turbo = start.elapsed();
let start = std::time::Instant::now();
for _ in 0..iterations {
let mut out = vec![0u8; original.len()];
libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut out)
.unwrap();
}
let elapsed_libdeflate = start.elapsed();
let bytes_total = original.len() * iterations;
let turbo_mbs = bytes_total as f64 / elapsed_turbo.as_secs_f64() / 1_000_000.0;
let libdeflate_mbs = bytes_total as f64 / elapsed_libdeflate.as_secs_f64() / 1_000_000.0;
eprintln!("\n[BENCH] Consume-First Integration Benchmark:");
eprintln!(
"[BENCH] Current turbo: {:.2}ms ({:.1} MB/s)",
elapsed_turbo.as_secs_f64() * 1000.0,
turbo_mbs
);
eprintln!(
"[BENCH] libdeflate: {:.2}ms ({:.1} MB/s)",
elapsed_libdeflate.as_secs_f64() * 1000.0,
libdeflate_mbs
);
eprintln!(
"[BENCH] Ratio: {:.1}%",
turbo_mbs / libdeflate_mbs * 100.0
);
}
#[test]
fn test_cf_small_literals_only() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original = b"ABCD".to_vec();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("\n[CF-TEST] Small literals only:");
eprintln!("[CF-TEST] Original: {:?}", original);
eprintln!("[CF-TEST] Compressed: {:?}", compressed);
let mut libdeflate_out = vec![0u8; original.len() + 100];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
eprintln!("[CF-TEST] libdeflate output: {} bytes", libdeflate_size);
assert_eq!(&libdeflate_out[..libdeflate_size], &original[..]);
let mut turbo_out = vec![0u8; original.len() + 100];
let turbo_size =
super::inflate_into_pub(&compressed, &mut turbo_out).expect("turbo failed");
eprintln!("[CF-TEST] turbo output: {} bytes", turbo_size);
assert_eq!(&turbo_out[..turbo_size], &original[..]);
eprintln!("[CF-TEST] ✓ Passed");
}
#[test]
fn test_cf_rle_pattern() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original = b"AAAAAAAAAA".to_vec();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("\n[CF-TEST] RLE pattern:");
eprintln!(
"[CF-TEST] Original: {:?} ({} bytes)",
String::from_utf8_lossy(&original),
original.len()
);
eprintln!("[CF-TEST] Compressed: {} bytes", compressed.len());
let mut libdeflate_out = vec![0u8; original.len() + 100];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
assert_eq!(&libdeflate_out[..libdeflate_size], &original[..]);
let mut turbo_out = vec![0u8; original.len() + 100];
let turbo_size =
super::inflate_into_pub(&compressed, &mut turbo_out).expect("turbo failed");
assert_eq!(&turbo_out[..turbo_size], &original[..]);
eprintln!("[CF-TEST] ✓ Passed");
}
#[test]
fn test_cf_with_backrefs() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original = b"Hello Hello Hello".to_vec();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("\n[CF-TEST] With back-references:");
eprintln!(
"[CF-TEST] Original: {:?}",
String::from_utf8_lossy(&original)
);
eprintln!("[CF-TEST] Compressed: {} bytes", compressed.len());
let mut libdeflate_out = vec![0u8; original.len() + 100];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
assert_eq!(&libdeflate_out[..libdeflate_size], &original[..]);
let mut turbo_out = vec![0u8; original.len() + 100];
let turbo_size =
super::inflate_into_pub(&compressed, &mut turbo_out).expect("turbo failed");
assert_eq!(&turbo_out[..turbo_size], &original[..]);
eprintln!("[CF-TEST] ✓ Passed");
}
#[test]
fn test_cf_complex_pattern() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original = b"The quick brown fox jumps over the lazy dog. ".repeat(10);
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("\n[CF-TEST] Complex pattern:");
eprintln!("[CF-TEST] Original: {} bytes", original.len());
eprintln!("[CF-TEST] Compressed: {} bytes", compressed.len());
eprintln!(
"[CF-TEST] Ratio: {:.1}%",
compressed.len() as f64 / original.len() as f64 * 100.0
);
let mut libdeflate_out = vec![0u8; original.len() + 100];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
assert_bytes_eq(
&libdeflate_out[..libdeflate_size],
&original[..],
"libdeflate",
);
let mut turbo_out = vec![0u8; original.len() + 100];
let turbo_size =
super::inflate_into_pub(&compressed, &mut turbo_out).expect("turbo failed");
assert_bytes_eq(
&turbo_out[..turbo_size],
&original[..],
"cf_complex_pattern",
);
eprintln!("[CF-TEST] ✓ Passed");
}
#[test]
fn test_cf_binary_data() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original: Vec<u8> = (0..=255).collect();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("\n[CF-TEST] Binary data (0-255):");
eprintln!("[CF-TEST] Original: {} bytes", original.len());
eprintln!("[CF-TEST] Compressed: {} bytes", compressed.len());
let mut libdeflate_out = vec![0u8; original.len() + 100];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
assert_eq!(&libdeflate_out[..libdeflate_size], &original[..]);
let mut turbo_out = vec![0u8; original.len() + 100];
let turbo_size =
super::inflate_into_pub(&compressed, &mut turbo_out).expect("turbo failed");
assert_eq!(&turbo_out[..turbo_size], &original[..]);
eprintln!("[CF-TEST] ✓ Passed");
}
#[test]
fn test_cf_empty_data() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original: Vec<u8> = vec![];
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("\n[CF-TEST] Empty data:");
eprintln!("[CF-TEST] Original: {} bytes", original.len());
eprintln!("[CF-TEST] Compressed: {} bytes", compressed.len());
eprintln!("[CF-TEST] Compressed bytes: {:?}", compressed);
let mut libdeflate_out = vec![0u8; 100];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
assert_eq!(libdeflate_size, 0);
let mut turbo_out = vec![0u8; 100];
let turbo_size =
super::inflate_into_pub(&compressed, &mut turbo_out).expect("turbo failed");
assert_eq!(turbo_size, 0);
eprintln!("[CF-TEST] ✓ Passed");
}
#[test]
fn test_cf_medium_repetitive() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original = b"ABCDEFGH".repeat(128);
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("\n[CF-TEST] Medium repetitive (1KB):");
eprintln!("[CF-TEST] Original: {} bytes", original.len());
eprintln!("[CF-TEST] Compressed: {} bytes", compressed.len());
let mut libdeflate_out = vec![0u8; original.len() + 100];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
assert_eq!(&libdeflate_out[..libdeflate_size], &original[..]);
let mut turbo_out = vec![0u8; original.len() + 100];
let turbo_size =
super::inflate_into_pub(&compressed, &mut turbo_out).expect("turbo failed");
assert_eq!(&turbo_out[..turbo_size], &original[..]);
eprintln!("[CF-TEST] ✓ Passed");
}
#[test]
fn test_cf_large_data() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original: Vec<u8> = (0..10240).map(|i| ((i * 7 + 13) % 256) as u8).collect();
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("\n[CF-TEST] Large data (10KB):");
eprintln!("[CF-TEST] Original: {} bytes", original.len());
eprintln!("[CF-TEST] Compressed: {} bytes", compressed.len());
let mut libdeflate_out = vec![0u8; original.len() + 100];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
assert_eq!(&libdeflate_out[..libdeflate_size], &original[..]);
let mut turbo_out = vec![0u8; original.len() + 100];
let turbo_size =
super::inflate_into_pub(&compressed, &mut turbo_out).expect("turbo failed");
assert_eq!(&turbo_out[..turbo_size], &original[..]);
eprintln!("[CF-TEST] ✓ Passed");
}
#[test]
fn test_cf_dickens_max() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original = match std::fs::read("benchmark_data/dickens") {
Ok(d) => d[..100_000.min(d.len())].to_vec(),
Err(_) => {
eprintln!("[CF-TEST] Skipping - no dickens file");
return;
}
};
eprintln!("\n[CF-TEST] Dickens MAX compression test:");
eprintln!("[CF-TEST] Original: {} bytes", original.len());
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::best());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("[CF-TEST] Compressed: {} bytes", compressed.len());
let mut libdeflate_out = vec![0u8; original.len() + 100];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
assert_eq!(&libdeflate_out[..libdeflate_size], &original[..]);
let mut turbo_out = vec![0u8; original.len() + 100];
let result = super::inflate_into_pub(&compressed, &mut turbo_out);
match result {
Ok(turbo_size) => {
if turbo_out[..turbo_size] != original[..] {
let first_mismatch = turbo_out[..turbo_size]
.iter()
.zip(original.iter())
.enumerate()
.find(|(_, (a, b))| a != b);
if let Some((pos, (got, exp))) = first_mismatch {
eprintln!(
"[CF-TEST] First mismatch at byte {}: got {} exp {}",
pos, got, exp
);
panic!("Content mismatch at byte {}", pos);
}
}
eprintln!("[CF-TEST] ✓ Passed");
}
Err(e) => {
panic!("Decompression failed: {:?}", e);
}
}
}
#[test]
fn test_cf_multi_block() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let mut original = Vec::new();
for _ in 0..10 {
original.extend(b"The quick brown fox jumps over the lazy dog. ".repeat(200));
original.extend((0u8..=255).collect::<Vec<_>>().repeat(30));
}
eprintln!("\n[CF-TEST] Multi-block test:");
eprintln!("[CF-TEST] Original: {} bytes", original.len());
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::best());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("[CF-TEST] Compressed: {} bytes", compressed.len());
let mut libdeflate_out = vec![0u8; original.len() + 100];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
assert_bytes_eq(
&libdeflate_out[..libdeflate_size],
&original[..],
"libdeflate",
);
let mut turbo_out = vec![0u8; original.len() + 100];
let turbo_size =
super::inflate_into_pub(&compressed, &mut turbo_out).expect("turbo failed");
assert_eq!(turbo_size, original.len());
assert_bytes_eq(&turbo_out[..turbo_size], &original[..], "cf_multi_block");
eprintln!("[CF-TEST] ✓ Passed");
}
#[test]
fn test_cf_gzip_format() {
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
let original: Vec<u8> = (0..1_000_000)
.map(|i| ((i * 17 + 13) % 256) as u8)
.collect();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("\n[CF-TEST] Gzip format:");
eprintln!("[CF-TEST] Original: {} bytes", original.len());
eprintln!("[CF-TEST] Compressed: {} bytes", compressed.len());
let mut libdeflate_out = vec![0u8; original.len() + 100];
let libdeflate_size = libdeflater::Decompressor::new()
.gzip_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
assert_eq!(&libdeflate_out[..libdeflate_size], &original[..]);
let mut our_out = Vec::new();
let our_size = crate::decompress::parallel::ultra_fast_inflate::inflate_gzip_preallocated(
&compressed,
&mut our_out,
)
.expect("our inflate failed");
assert_eq!(our_size, original.len());
assert_eq!(&our_out[..our_size], &original[..]);
eprintln!("[CF-TEST] ✓ Passed");
}
#[test]
fn test_cf_table_comparison() {
use crate::decompress::inflate::consume_first_table::ConsumeFirstTable;
use crate::decompress::two_level_table::TwoLevelTable;
let mut lit_len_lens = vec![0u8; 286];
for i in 0..144 {
lit_len_lens[i] = 8;
}
for i in 144..256 {
lit_len_lens[i] = 9;
}
for i in 256..280 {
lit_len_lens[i] = 7;
}
for i in 280..286 {
lit_len_lens[i] = 8;
}
let cf_table = ConsumeFirstTable::build(&lit_len_lens).expect("CF build failed");
let tl_table = TwoLevelTable::build(&lit_len_lens).expect("TL build failed");
let mut mismatches = 0;
for bits in 0u64..2048 {
let cf_entry = cf_table.lookup_main(bits);
let (tl_sym, tl_len) = tl_table.decode(bits);
if cf_entry.is_subtable() {
continue;
}
let cf_sym = if cf_entry.is_eob() {
256
} else {
cf_entry.symbol()
};
let cf_len = cf_entry.bits() as u16;
if tl_len == 0 {
continue;
}
if cf_sym != tl_sym || cf_len as u32 != tl_len {
if mismatches < 10 {
eprintln!("[TABLE-CMP] bits={:#06x} CF(sym={}, len={}, lit={}, eob={}, length={}) TL(sym={}, len={})",
bits, cf_sym, cf_len, cf_entry.is_literal(), cf_entry.is_eob(), cf_entry.is_length(), tl_sym, tl_len);
}
mismatches += 1;
}
}
if mismatches > 0 {
panic!("Found {} table mismatches!", mismatches);
}
eprintln!("[CF-TEST] Tables match for all 2048 patterns");
}
#[test]
fn test_cf_subtable_construction() {
use crate::decompress::inflate::consume_first_table::ConsumeFirstTable;
let mut code_lens = vec![0u8; 286];
code_lens[256] = 7; for i in 0..256 {
code_lens[i] = 12; }
let cf_table = ConsumeFirstTable::build(&code_lens).expect("CF build failed");
fn reverse_bits(mut val: u32, n: u32) -> u32 {
let mut result = 0;
for _ in 0..n {
result = (result << 1) | (val & 1);
val >>= 1;
}
result
}
let mut bl_count = [0u32; 16];
for &len in &code_lens {
if len > 0 {
bl_count[len as usize] += 1;
}
}
let mut next_code = [0u32; 16];
let mut code = 0u32;
for bits in 1..=15 {
code = (code + bl_count[bits - 1]) << 1;
next_code[bits] = code;
}
eprintln!("\n[CF-SUBTABLE-TEST] Testing 12-bit codes with subtables:");
eprintln!(
"[CF-SUBTABLE-TEST] 12-bit codes: {} symbols",
bl_count[12]
);
eprintln!("[CF-SUBTABLE-TEST] 7-bit codes: {} symbols", bl_count[7]);
eprintln!(
"[CF-SUBTABLE-TEST] CF subtable size: {} entries",
cf_table.sub.len()
);
let mut test_next_code = next_code;
let mut errors = 0;
{
let code = test_next_code[7];
test_next_code[7] += 1;
let reversed = reverse_bits(code, 7);
let cf_entry = cf_table.lookup_main(reversed as u64);
eprintln!(
"[CF-SUBTABLE-TEST] EOB (sym=256): code={:#b}, reversed={:#b}",
code, reversed
);
eprintln!(
"[CF-SUBTABLE-TEST] CF: sym={}, bits={}, is_eob={}",
cf_entry.symbol(),
cf_entry.bits(),
cf_entry.is_eob()
);
if !cf_entry.is_eob() {
eprintln!("[CF-SUBTABLE-TEST] ERROR: EOB mismatch - not marked as EOB!");
errors += 1;
}
if cf_entry.bits() != 7 {
eprintln!("[CF-SUBTABLE-TEST] ERROR: EOB should consume 7 bits!");
errors += 1;
}
}
{
let sym = 0;
let code = test_next_code[12];
test_next_code[12] += 1;
let reversed = reverse_bits(code, 12);
let main_entry = cf_table.lookup_main(reversed as u64);
eprintln!(
"[CF-SUBTABLE-TEST] Literal (sym=0): code={:#b}, reversed={:#b}",
code, reversed
);
eprintln!(
"[CF-SUBTABLE-TEST] Main entry: sym={}, bits={}, is_subtable={}",
main_entry.symbol(),
main_entry.bits(),
main_entry.is_subtable()
);
if main_entry.is_subtable() {
let remaining_bits = reversed >> 11; let sub_entry = cf_table.lookup_sub(main_entry, remaining_bits as u64);
eprintln!(
"[CF-SUBTABLE-TEST] Sub entry: sym={}, bits={}, is_literal={}",
sub_entry.symbol(),
sub_entry.bits(),
sub_entry.is_literal()
);
let total_bits = main_entry.bits() + sub_entry.bits();
eprintln!(
"[CF-SUBTABLE-TEST] Total bits: {} (main={}, sub={})",
total_bits,
main_entry.bits(),
sub_entry.bits()
);
if !sub_entry.is_literal() || sub_entry.symbol() != sym as u16 {
eprintln!("[CF-SUBTABLE-TEST] ERROR: Symbol mismatch!");
errors += 1;
}
if total_bits != 12 {
eprintln!("[CF-SUBTABLE-TEST] ERROR: Total bits should be 12!");
errors += 1;
}
} else if !main_entry.is_literal() || main_entry.symbol() != sym as u16 {
eprintln!("[CF-SUBTABLE-TEST] ERROR: Direct entry mismatch!");
errors += 1;
}
}
{
let sym = 100;
for _ in 0..99 {
test_next_code[12] += 1;
}
let code = test_next_code[12];
let reversed = reverse_bits(code, 12);
let main_entry = cf_table.lookup_main(reversed as u64);
eprintln!(
"[CF-SUBTABLE-TEST] Literal (sym=100): code={:#b}, reversed={:#b}",
code, reversed
);
eprintln!(
"[CF-SUBTABLE-TEST] Main entry: sym={}, bits={}, is_subtable={}",
main_entry.symbol(),
main_entry.bits(),
main_entry.is_subtable()
);
if main_entry.is_subtable() {
let remaining_bits = reversed >> 11;
let sub_entry = cf_table.lookup_sub(main_entry, remaining_bits as u64);
eprintln!(
"[CF-SUBTABLE-TEST] Sub entry: sym={}, bits={}, is_literal={}",
sub_entry.symbol(),
sub_entry.bits(),
sub_entry.is_literal()
);
if !sub_entry.is_literal() || sub_entry.symbol() != sym as u16 {
eprintln!("[CF-SUBTABLE-TEST] ERROR: Symbol mismatch!");
errors += 1;
}
}
}
assert_eq!(errors, 0, "Found {} subtable construction errors", errors);
eprintln!("[CF-SUBTABLE-TEST] PASSED");
}
#[test]
fn test_cf_subtable_mixed() {
use crate::decompress::inflate::consume_first_table::{ConsumeFirstTable, CF_TABLE_BITS};
use crate::decompress::two_level_table::TurboBits;
let mut code_lens = vec![0u8; 286];
for i in 0..128 {
code_lens[i] = 8;
}
for i in 128..192 {
code_lens[i] = 9;
}
for i in 192..224 {
code_lens[i] = 10;
}
for i in 224..256 {
code_lens[i] = 12;
}
code_lens[256] = 7;
for i in 257..286 {
code_lens[i] = 8;
}
let cf_table = ConsumeFirstTable::build(&code_lens).expect("CF build failed");
eprintln!("\n[CF-MIXED-TEST] Mixed code lengths with subtables:");
eprintln!(
"[CF-MIXED-TEST] Subtable size: {} entries",
cf_table.sub.len()
);
fn reverse_bits(mut val: u32, n: u32) -> u32 {
let mut result = 0;
for _ in 0..n {
result = (result << 1) | (val & 1);
val >>= 1;
}
result
}
let mut bl_count = [0u32; 16];
for &len in &code_lens {
if len > 0 {
bl_count[len as usize] += 1;
}
}
let mut next_code = [0u32; 16];
let mut code = 0u32;
for bits in 1..=15 {
code = (code + bl_count[bits - 1]) << 1;
next_code[bits] = code;
}
let mut symbol_codes: Vec<(u32, u8)> = Vec::with_capacity(286);
let mut next_code_temp = next_code;
for &len in code_lens.iter() {
if len > 0 {
let c = next_code_temp[len as usize];
next_code_temp[len as usize] += 1;
symbol_codes.push((reverse_bits(c, len as u32), len));
} else {
symbol_codes.push((0, 0));
}
}
let mut errors = 0;
let sym = 230;
let (reversed, len) = symbol_codes[sym];
eprintln!(
"[CF-MIXED-TEST] Testing 12-bit symbol {}: code_len={}, reversed={:#b} (CF_TABLE_BITS={})",
sym, len, reversed, CF_TABLE_BITS
);
let main_entry = cf_table.lookup_main(reversed as u64);
eprintln!(
"[CF-MIXED-TEST] Main entry: sym={}, bits={}, is_subtable={}",
main_entry.symbol(),
main_entry.bits(),
main_entry.is_subtable()
);
if CF_TABLE_BITS >= 15 {
if main_entry.is_subtable() {
eprintln!("[CF-MIXED-TEST] ERROR: 12-bit code should NOT need subtable with {}-bit table!", CF_TABLE_BITS);
errors += 1;
} else if !main_entry.is_literal() || main_entry.symbol() != sym as u16 {
eprintln!("[CF-MIXED-TEST] ERROR: Symbol mismatch!");
errors += 1;
}
} else {
if main_entry.is_subtable() {
let remaining_bits = reversed >> CF_TABLE_BITS;
let sub_entry = cf_table.lookup_sub(main_entry, remaining_bits as u64);
eprintln!(
"[CF-MIXED-TEST] Sub entry: sym={}, bits={}, is_literal={}",
sub_entry.symbol(),
sub_entry.bits(),
sub_entry.is_literal()
);
let total_bits = main_entry.bits() + sub_entry.bits();
eprintln!("[CF-MIXED-TEST] Total bits: {}", total_bits);
if !sub_entry.is_literal() || sub_entry.symbol() != sym as u16 {
eprintln!("[CF-MIXED-TEST] ERROR: Symbol mismatch!");
errors += 1;
}
} else {
eprintln!("[CF-MIXED-TEST] ERROR: Expected subtable for 12-bit code with {}-bit table!", CF_TABLE_BITS);
errors += 1;
}
}
let sym_a = 65;
let sym_rare = 230;
let (code_a, len_a) = symbol_codes[sym_a];
let (code_rare, len_rare) = symbol_codes[sym_rare];
let (code_eob, len_eob) = symbol_codes[256];
let mut bitstream: u64 = 0;
let mut bit_pos = 0;
bitstream |= code_a as u64;
bit_pos += len_a as u32;
bitstream |= (code_rare as u64) << bit_pos;
bit_pos += len_rare as u32;
bitstream |= (code_eob as u64) << bit_pos;
eprintln!(
"[CF-MIXED-TEST] Decoding sequence: A({} bits), 230({} bits), EOB({} bits)",
len_a, len_rare, len_eob
);
eprintln!("[CF-MIXED-TEST] Bitstream: {:#066b}", bitstream);
let buf = bitstream.to_le_bytes();
let mut bits = TurboBits::new(&buf);
bits.refill_branchless();
let e1 = cf_table.lookup_main(bits.buffer());
bits.consume(e1.bits());
if e1.is_subtable() {
eprintln!("[CF-MIXED-TEST] ERROR: 'A' should not need subtable!");
errors += 1;
} else if e1.symbol() != sym_a as u16 {
eprintln!(
"[CF-MIXED-TEST] ERROR: Expected 'A' ({}), got {}!",
sym_a,
e1.symbol()
);
errors += 1;
} else {
eprintln!("[CF-MIXED-TEST] Decoded 'A' correctly");
}
let e2_main = cf_table.lookup_main(bits.buffer());
bits.consume(e2_main.bits());
if CF_TABLE_BITS >= 15 {
if e2_main.is_subtable() {
eprintln!("[CF-MIXED-TEST] ERROR: Symbol 230 should NOT need subtable with {}-bit table!", CF_TABLE_BITS);
errors += 1;
} else if e2_main.symbol() != sym_rare as u16 {
eprintln!(
"[CF-MIXED-TEST] ERROR: Expected {}, got {}!",
sym_rare,
e2_main.symbol()
);
errors += 1;
} else {
eprintln!("[CF-MIXED-TEST] Decoded {} correctly", sym_rare);
}
} else {
if e2_main.is_subtable() {
let e2_sub = cf_table.lookup_sub(e2_main, bits.buffer());
bits.consume(e2_sub.bits());
if e2_sub.symbol() != sym_rare as u16 {
eprintln!(
"[CF-MIXED-TEST] ERROR: Expected {}, got {}!",
sym_rare,
e2_sub.symbol()
);
errors += 1;
} else {
eprintln!("[CF-MIXED-TEST] Decoded {} correctly", sym_rare);
}
} else {
eprintln!(
"[CF-MIXED-TEST] ERROR: Symbol 230 should need subtable with {}-bit table!",
CF_TABLE_BITS
);
errors += 1;
}
}
let e3 = cf_table.lookup_main(bits.buffer());
bits.consume(e3.bits());
if e3.is_subtable() {
let e3_sub = cf_table.lookup_sub(e3, bits.buffer());
bits.consume(e3_sub.bits());
if !e3_sub.is_eob() {
eprintln!("[CF-MIXED-TEST] ERROR: Expected EOB!");
errors += 1;
} else {
eprintln!("[CF-MIXED-TEST] Decoded EOB correctly");
}
} else if !e3.is_eob() {
eprintln!("[CF-MIXED-TEST] ERROR: Expected EOB!");
errors += 1;
} else {
eprintln!("[CF-MIXED-TEST] Decoded EOB correctly");
}
assert_eq!(errors, 0, "Found {} errors in mixed subtable test", errors);
eprintln!("[CF-MIXED-TEST] PASSED");
}
#[test]
fn test_cf_distance_table() {
use crate::decompress::inflate::consume_first_table::ConsumeFirstTable;
let dist_lens: Vec<u8> = vec![5; 30];
let table =
ConsumeFirstTable::build_distance(&dist_lens).expect("Failed to build distance table");
fn reverse_bits(mut val: u32, n: u32) -> u32 {
let mut result = 0;
for _ in 0..n {
result = (result << 1) | (val & 1);
val >>= 1;
}
result
}
for sym in 0..10 {
let code = reverse_bits(sym as u32, 5);
let entry = table.lookup_main(code as u64);
assert!(
entry.is_length(),
"Distance symbol {} should be length type",
sym
);
assert_eq!(
entry.symbol(),
sym,
"Distance symbol {} decoded incorrectly",
sym
);
assert_eq!(
entry.bits(),
5,
"Distance symbol {} should consume 5 bits",
sym
);
}
eprintln!("[CF-TEST] Distance table builds correctly");
}
#[test]
fn test_cf_silesia_raw() {
let gzip_data = match std::fs::read("benchmark_data/silesia-gzip.tar.gz") {
Ok(d) => d,
Err(_) => {
eprintln!("[CF-TEST] Skipping - no silesia file");
return;
}
};
eprintln!("\n[CF-TEST] Silesia raw deflate:");
eprintln!("[CF-TEST] Gzip file: {} bytes", gzip_data.len());
let flags = gzip_data[3];
let mut pos = 10;
if flags & 0x04 != 0 {
let xlen = u16::from_le_bytes([gzip_data[pos], gzip_data[pos + 1]]) as usize;
pos += 2 + xlen;
}
if flags & 0x08 != 0 {
while gzip_data[pos] != 0 {
pos += 1;
}
pos += 1;
}
if flags & 0x10 != 0 {
while gzip_data[pos] != 0 {
pos += 1;
}
pos += 1;
}
if flags & 0x02 != 0 {
pos += 2;
}
let deflate_start = pos;
let deflate_end = gzip_data.len() - 8; let deflate_data = &gzip_data[deflate_start..deflate_end];
eprintln!(
"[CF-TEST] Deflate data: {} bytes (start at {})",
deflate_data.len(),
deflate_start
);
let mut libdeflate_out = vec![0u8; 212_000_000]; let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(deflate_data, &mut libdeflate_out)
.expect("libdeflate failed");
eprintln!("[CF-TEST] Expected output: {} bytes", libdeflate_size);
let test_size = 100_000.min(libdeflate_size);
let mut turbo_out = vec![0u8; libdeflate_size + 1000];
match super::inflate_into_pub(deflate_data, &mut turbo_out) {
Ok(size) => {
eprintln!("[CF-TEST] Our output: {} bytes", size);
let first_mismatch = turbo_out[..test_size]
.iter()
.zip(libdeflate_out[..test_size].iter())
.enumerate()
.find(|(_, (a, b))| a != b);
if let Some((pos, (got, exp))) = first_mismatch {
eprintln!(
"[CF-TEST] FIRST MISMATCH at byte {}: got {} expected {}",
pos, got, exp
);
panic!("Mismatch at byte {}", pos);
}
eprintln!("[CF-TEST] ✓ First {} bytes match", test_size);
}
Err(e) => {
let cmp_size = turbo_out.len().min(libdeflate_out.len());
let first_mismatch = turbo_out[..cmp_size]
.iter()
.zip(libdeflate_out[..cmp_size].iter())
.enumerate()
.find(|(_, (a, b))| a != b);
if let Some((pos, _)) = first_mismatch {
eprintln!("[CF-TEST] First mismatch at byte {}", pos);
}
panic!("Decompression failed: {:?}", e);
}
}
}
#[test]
fn test_cf_silesia_small() {
let data = match std::fs::read("benchmark_data/silesia-gzip.tar.gz") {
Ok(d) => d[..1_000_000.min(d.len())].to_vec(),
Err(_) => {
eprintln!("[CF-TEST] Skipping - no silesia file");
return;
}
};
eprintln!("\n[CF-TEST] Silesia (first 1MB compressed):");
eprintln!("[CF-TEST] Compressed chunk: {} bytes", data.len());
let full_data = std::fs::read("benchmark_data/silesia-gzip.tar.gz").unwrap();
use std::io::Read;
let mut flate2_dec = flate2::read::GzDecoder::new(&full_data[..]);
let mut expected = Vec::new();
flate2_dec.read_to_end(&mut expected).unwrap();
let check_size = 10_000_000.min(expected.len());
eprintln!(
"[CF-TEST] Expected total: {} bytes, checking first {} bytes",
expected.len(),
check_size
);
let mut our_out = Vec::new();
let result = crate::decompress::parallel::ultra_fast_inflate::inflate_gzip_preallocated(
&full_data,
&mut our_out,
);
match result {
Ok(our_size) => {
eprintln!("[CF-TEST] Our output: {} bytes", our_size);
if our_size >= check_size {
assert_eq!(
&our_out[..check_size],
&expected[..check_size],
"Mismatch in first {} bytes",
check_size
);
eprintln!("[CF-TEST] ✓ First {} bytes match", check_size);
} else {
panic!(
"Output too small: {} vs expected {}",
our_size,
expected.len()
);
}
}
Err(e) => {
eprintln!("[CF-TEST] Error: {:?}", e);
eprintln!("[CF-TEST] Output so far: {} bytes", our_out.len());
let cmp_size = our_out.len().min(expected.len());
if cmp_size > 0 {
let first_mismatch = our_out[..cmp_size]
.iter()
.zip(expected[..cmp_size].iter())
.enumerate()
.find(|(_, (a, b))| a != b);
if let Some((pos, _)) = first_mismatch {
eprintln!("[CF-TEST] FIRST MISMATCH at byte {}:", pos);
eprintln!(
"[CF-TEST] Got: {:?}",
&our_out[pos..pos.saturating_add(20).min(cmp_size)]
);
eprintln!(
"[CF-TEST] Exp: {:?}",
&expected[pos..pos.saturating_add(20).min(cmp_size)]
);
let ctx_start = pos.saturating_sub(10);
eprintln!("[CF-TEST] Context before ({}-{}):", ctx_start, pos);
eprintln!(
"[CF-TEST] Got: {:?}",
String::from_utf8_lossy(&our_out[ctx_start..pos])
);
eprintln!(
"[CF-TEST] Exp: {:?}",
String::from_utf8_lossy(&expected[ctx_start..pos])
);
} else {
eprintln!("[CF-TEST] First {} bytes match perfectly", cmp_size);
}
}
panic!("Decompression failed: {:?}", e);
}
}
}
#[test]
fn test_cf_very_large_repetitive() {
use flate2::write::DeflateEncoder;
use flate2::Compression;
use std::io::Write;
let original = b"The quick brown fox jumps over the lazy dog. ".repeat(2000);
let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!("\n[CF-TEST] Very large repetitive (100KB):");
eprintln!("[CF-TEST] Original: {} bytes", original.len());
eprintln!("[CF-TEST] Compressed: {} bytes", compressed.len());
eprintln!(
"[CF-TEST] Ratio: {:.1}%",
compressed.len() as f64 / original.len() as f64 * 100.0
);
let mut libdeflate_out = vec![0u8; original.len() + 100];
let libdeflate_size = libdeflater::Decompressor::new()
.deflate_decompress(&compressed, &mut libdeflate_out)
.expect("libdeflate failed");
assert_bytes_eq(
&libdeflate_out[..libdeflate_size],
&original[..],
"libdeflate",
);
let mut turbo_out = vec![0u8; original.len() + 100];
let turbo_size =
super::inflate_into_pub(&compressed, &mut turbo_out).expect("turbo failed");
assert_bytes_eq(
&turbo_out[..turbo_size],
&original[..],
"cf_very_large_repetitive",
);
eprintln!("[CF-TEST] ✓ Passed");
}
#[test]
fn bench_valid_entries_consume_first() {
const TYPE_SUBTABLE: u32 = 0b00 << 30;
const TYPE_LITERAL: u32 = 0b01 << 30;
const TYPE_LENGTH: u32 = 0b10 << 30;
const TYPE_EOB: u32 = 0b11 << 30;
let table: Vec<u32> = (0..4096)
.map(|i| {
if i % 100 == 0 {
TYPE_SUBTABLE | ((i & 0x3FFFFF) << 8) | 12 } else if i % 50 == 0 {
TYPE_EOB | 7 } else if i % 20 == 0 {
TYPE_LENGTH | ((i & 0x1F) << 8) | 10 } else {
TYPE_LITERAL | ((i & 0xFF) << 8) | 8 }
})
.collect();
let bits_sequence: Vec<u64> = (0..100_000).map(|i| i * 0x1234567).collect();
let iterations = 500;
let start = std::time::Instant::now();
let mut literals = 0u64;
let mut lengths = 0u64;
let mut subtables = 0u64;
let mut eobs = 0u64;
let mut bitbuf_accum = 0u64;
for _ in 0..iterations {
for &bits in &bits_sequence {
let mut bitbuf = bits;
let entry = table[(bitbuf & 0xFFF) as usize];
let bits_to_skip = entry & 0xFF;
bitbuf >>= bits_to_skip;
bitbuf_accum ^= bitbuf;
match entry >> 30 {
0b01 => literals += 1, 0b10 => lengths += 1, 0b11 => eobs += 1, _ => subtables += 1, }
}
}
let elapsed = start.elapsed();
eprintln!("\n[BENCH] Valid-Entries Consume-First:");
eprintln!("[BENCH] Time: {:.2}ms", elapsed.as_secs_f64() * 1000.0);
eprintln!(
"[BENCH] Throughput: {:.1} M entries/sec",
(iterations * bits_sequence.len()) as f64 / elapsed.as_secs_f64() / 1_000_000.0
);
eprintln!(
"[BENCH] Distribution: {} lit, {} len, {} eob, {} sub (accum {})",
literals,
lengths,
eobs,
subtables,
bitbuf_accum % 1000
);
}
#[test]
fn bench_consume_first_simulation() {
let table: Vec<u32> = (0..4096)
.map(|i| {
if i % 20 != 0 {
0x8000_0000 | ((i & 0xFF) << 16) | 8 } else {
0x4000_0000 | 10 }
})
.collect();
let bits_sequence: Vec<u64> = (0..100_000).map(|i| i * 0x1234567).collect();
let iterations = 500;
let start = std::time::Instant::now();
let mut sum = 0u64;
for _ in 0..iterations {
for &bits in &bits_sequence {
let mut bitbuf = bits;
let entry = table[(bitbuf & 0xFFF) as usize];
if entry & 0x8000_0000 != 0 {
let bits_to_skip = entry & 0xFF;
bitbuf >>= bits_to_skip;
sum = sum.wrapping_add((entry >> 16) as u64 & 0xFF);
sum ^= bitbuf; }
}
}
let elapsed_check_first = start.elapsed();
let start = std::time::Instant::now();
let mut sum2 = 0u64;
for _ in 0..iterations {
for &bits in &bits_sequence {
let mut bitbuf = bits;
let entry = table[(bitbuf & 0xFFF) as usize];
let bits_to_skip = entry & 0xFF;
bitbuf >>= bits_to_skip;
if entry & 0x8000_0000 != 0 {
sum2 = sum2.wrapping_add((entry >> 16) as u64 & 0xFF);
}
sum2 ^= bitbuf; }
}
let elapsed_consume_first = start.elapsed();
eprintln!("\n[BENCH] Consume-First vs Check-First (95% literals):");
eprintln!(
"[BENCH] Check-first: {:.2}ms",
elapsed_check_first.as_secs_f64() * 1000.0
);
eprintln!(
"[BENCH] Consume-first: {:.2}ms",
elapsed_consume_first.as_secs_f64() * 1000.0
);
eprintln!(
"[BENCH] Speedup: {:.1}%",
(elapsed_check_first.as_secs_f64() / elapsed_consume_first.as_secs_f64() - 1.0) * 100.0
);
eprintln!("[BENCH] (sums: {}, {} to prevent opt)", sum, sum2);
}
#[test]
fn bench_with_path_counts() {
let data = match std::fs::read("benchmark_data/silesia.tar.gz") {
Ok(d) => d,
Err(_) => {
eprintln!("Skipping - no silesia.tar.gz");
return;
}
};
let header_size = match crate::decompress::parallel::marker_decode::skip_gzip_header(&data)
{
Ok(n) => n,
Err(_) => {
eprintln!("Skipping - not a valid gzip file");
return;
}
};
let deflate_data = &data[header_size..data.len().saturating_sub(8)];
let isize = u32::from_le_bytes([
data[data.len() - 4],
data[data.len() - 3],
data[data.len() - 2],
data[data.len() - 1],
]) as usize;
let mut output = vec![0u8; isize + 1024];
for _ in 0..2 {
let _ = inflate_into_pub(deflate_data, &mut output);
}
eprintln!("\n[BENCH] Real-world data (silesia) with GZIPPY_TRACE=1:");
eprintln!("[BENCH] Set GZIPPY_TRACE=1 to see detailed path counts");
let iterations = 3;
let start = std::time::Instant::now();
for _ in 0..iterations {
let size = inflate_into_pub(deflate_data, &mut output).unwrap();
assert!(size > 0);
}
let elapsed = start.elapsed();
let mb_per_sec = (isize * iterations) as f64 / elapsed.as_secs_f64() / 1_000_000.0;
eprintln!("[BENCH] Output size: {} bytes", isize);
eprintln!("[BENCH] Speed: {:.1} MB/s", mb_per_sec);
}
#[test]
fn bench_turbo_vs_libdeflate() {
let gzip_data = match std::fs::read("benchmark_data/silesia-gzip.tar.gz") {
Ok(d) => d,
Err(_) => {
eprintln!("[BENCH] Skipping - no silesia file");
return;
}
};
let deflate_start = 10
+ if (gzip_data[3] & 0x08) != 0 {
gzip_data[10..].iter().position(|&b| b == 0).unwrap_or(0) + 1
} else {
0
};
let deflate_end = gzip_data.len() - 8;
let deflate_data = &gzip_data[deflate_start..deflate_end];
let isize_bytes = &gzip_data[gzip_data.len() - 4..];
let isize = u32::from_le_bytes([
isize_bytes[0],
isize_bytes[1],
isize_bytes[2],
isize_bytes[3],
]) as usize;
let mut output = vec![0u8; isize + 1000];
let _ = inflate_into_pub(deflate_data, &mut output);
let _ = libdeflater::Decompressor::new().deflate_decompress(deflate_data, &mut output);
eprintln!("\n=== Turbo Path vs libdeflate (silesia) ===");
let iterations = 5;
let start = std::time::Instant::now();
for _ in 0..iterations {
let size = libdeflater::Decompressor::new()
.deflate_decompress(deflate_data, &mut output)
.unwrap();
assert!(size > 0);
}
let libdeflate_time = start.elapsed();
let libdeflate_speed =
(isize * iterations) as f64 / libdeflate_time.as_secs_f64() / 1_000_000.0;
let start = std::time::Instant::now();
for _ in 0..iterations {
let size = inflate_into_pub(deflate_data, &mut output).unwrap();
assert!(size > 0);
}
let turbo_time = start.elapsed();
let turbo_speed = (isize * iterations) as f64 / turbo_time.as_secs_f64() / 1_000_000.0;
let ratio = turbo_speed / libdeflate_speed * 100.0;
eprintln!(
"libdeflate (C): {:>8.1?} = {:>7.1} MB/s",
libdeflate_time / iterations as u32,
libdeflate_speed
);
eprintln!(
"Turbo (Rust): {:>8.1?} = {:>7.1} MB/s",
turbo_time / iterations as u32,
turbo_speed
);
eprintln!("Ratio: Turbo is {:.1}% of libdeflate", ratio);
}
#[test]
fn test_tarball_l1_diagnostic() {
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
eprintln!("\n{}", "=".repeat(70));
eprintln!("DIAGNOSTIC: Tarball L1 Decompression Test");
eprintln!("{}\n", "=".repeat(70));
let mut original = Vec::new();
for i in 0..1000 {
let line = format!(
" fn function_{}(arg: u32) -> Result<String, Error> {{\n \
let value = arg * {} + {};\n Ok(format!(\"result: {{}}\", value))\n }}\n\n",
i,
i % 17,
i % 31
);
original.extend_from_slice(line.as_bytes());
}
eprintln!("Part 1 (source code): {} bytes", original.len());
let binary_start = original.len();
for i in 0..50000 {
let b = ((i * 0x1234567) ^ (i >> 3)) as u8;
original.push(b);
}
eprintln!(
"Part 2 (binary): {} bytes (starts at {})",
original.len() - binary_start,
binary_start
);
let rle_start = original.len();
for _ in 0..10000 {
original.extend_from_slice(b"AAAAAAAAAAAAAAAA");
}
eprintln!(
"Part 3 (repetitive): {} bytes (starts at {})",
original.len() - rle_start,
rle_start
);
let code2_start = original.len();
for i in 0..500 {
let line = format!(
"// Comment line {} with some text\nconst VALUE_{}: u64 = {};\n",
i,
i,
i * 12345
);
original.extend_from_slice(line.as_bytes());
}
eprintln!(
"Part 4 (more code): {} bytes (starts at {})",
original.len() - code2_start,
code2_start
);
eprintln!("\nTotal original size: {} bytes", original.len());
let mut encoder = GzEncoder::new(Vec::new(), Compression::fast());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!(
"Compressed size: {} bytes (ratio {:.1}%)",
compressed.len(),
compressed.len() as f64 / original.len() as f64 * 100.0
);
assert!(compressed[0] == 0x1f && compressed[1] == 0x8b, "Not gzip");
let flags = compressed[3];
let mut header_size = 10;
if flags & 0x04 != 0 {
let xlen = u16::from_le_bytes([compressed[10], compressed[11]]) as usize;
header_size = 12 + xlen;
}
if flags & 0x08 != 0 {
while header_size < compressed.len() && compressed[header_size] != 0 {
header_size += 1;
}
header_size += 1;
}
if flags & 0x10 != 0 {
while header_size < compressed.len() && compressed[header_size] != 0 {
header_size += 1;
}
header_size += 1;
}
if flags & 0x02 != 0 {
header_size += 2;
}
let deflate_data = &compressed[header_size..compressed.len() - 8];
eprintln!(
"Deflate data: {} bytes (header={}, trailer=8)",
deflate_data.len(),
header_size
);
let mut output = vec![0u8; original.len() + 1024];
eprintln!("\nDecompressing with consume_first_decode...");
let result = crate::decompress::inflate::consume_first_decode::inflate_consume_first(
deflate_data,
&mut output,
);
match result {
Ok(decompressed_size) => {
eprintln!("Decompression succeeded: {} bytes", decompressed_size);
if decompressed_size != original.len() {
eprintln!(
"\n*** SIZE MISMATCH: expected {}, got {} ***",
original.len(),
decompressed_size
);
}
let check_len = decompressed_size.min(original.len());
let mut mismatch_pos = None;
for i in 0..check_len {
if output[i] != original[i] {
mismatch_pos = Some(i);
break;
}
}
if let Some(pos) = mismatch_pos {
eprintln!("\n{}", "*".repeat(70));
eprintln!("*** FIRST MISMATCH AT POSITION {} ***", pos);
eprintln!("{}", "*".repeat(70));
let section = if pos < binary_start {
"Part 1 (source code)"
} else if pos < rle_start {
"Part 2 (binary)"
} else if pos < code2_start {
"Part 3 (repetitive)"
} else {
"Part 4 (more code)"
};
eprintln!("Section: {}", section);
let ctx_start = pos.saturating_sub(32);
let ctx_end = (pos + 64).min(check_len);
eprintln!("\nExpected bytes around position {}:", pos);
eprintln!(" Offset: {:6} to {:6}", ctx_start, ctx_end);
eprint!(" Hex: ");
for i in ctx_start..ctx_end {
if i == pos {
eprint!("[{:02x}]", original[i]);
} else {
eprint!(" {:02x} ", original[i]);
}
}
eprintln!();
eprint!(" ASCII: ");
for i in ctx_start..ctx_end {
let c = original[i];
let ch = if (32..127).contains(&c) {
c as char
} else {
'.'
};
if i == pos {
eprint!("[{}]", ch);
} else {
eprint!(" {} ", ch);
}
}
eprintln!();
eprintln!("\nActual bytes (our output):");
eprint!(" Hex: ");
for i in ctx_start..ctx_end {
if i == pos {
eprint!("[{:02x}]", output[i]);
} else {
eprint!(" {:02x} ", output[i]);
}
}
eprintln!();
eprint!(" ASCII: ");
for i in ctx_start..ctx_end {
let c = output[i];
let ch = if (32..127).contains(&c) {
c as char
} else {
'.'
};
if i == pos {
eprint!("[{}]", ch);
} else {
eprint!(" {} ", ch);
}
}
eprintln!();
eprintln!(
"\nAt position {}: expected 0x{:02x} ('{}'), got 0x{:02x} ('{}')",
pos,
original[pos],
if original[pos] >= 32 && original[pos] < 127 {
original[pos] as char
} else {
'.'
},
output[pos],
if output[pos] >= 32 && output[pos] < 127 {
output[pos] as char
} else {
'.'
}
);
let total_mismatches =
(0..check_len).filter(|&i| output[i] != original[i]).count();
eprintln!(
"\nTotal mismatches: {} out of {} bytes ({:.2}%)",
total_mismatches,
check_len,
total_mismatches as f64 / check_len as f64 * 100.0
);
panic!(
"Decompression mismatch at position {} (expected 0x{:02x}, got 0x{:02x})",
pos, original[pos], output[pos]
);
} else if decompressed_size == original.len() {
eprintln!("\n*** SUCCESS: Output matches original exactly! ***");
} else {
panic!(
"Size mismatch but no byte mismatch in overlap - expected {}, got {}",
original.len(),
decompressed_size
);
}
}
Err(e) => {
eprintln!("\n*** DECOMPRESSION FAILED: {} ***", e);
panic!("Decompression failed: {}", e);
}
}
}
#[test]
fn test_with_system_gzip() {
use crate::decompress::inflate::consume_first_decode::inflate_consume_first;
use std::process::Command;
let gzip_check = Command::new("gzip").arg("--version").output();
if gzip_check.is_err() {
eprintln!("gzip not available, skipping system gzip test");
return;
}
eprintln!("\n{}", "=".repeat(70));
eprintln!("Testing with SYSTEM GZIP binary");
eprintln!("{}\n", "=".repeat(70));
let mut original = Vec::new();
for i in 0..500 {
let line = format!(
"pub fn func_{}(x: i32) -> i32 {{ x * {} + {} }}\n",
i,
i % 17,
i % 31
);
original.extend_from_slice(line.as_bytes());
}
for i in 0..20000 {
original.push(((i * 0x1234567) ^ (i >> 3)) as u8);
}
for _ in 0..5000 {
original.extend_from_slice(b"REPEATREPEAT");
}
eprintln!("Original data: {} bytes", original.len());
let tmp_dir = std::env::temp_dir();
std::fs::create_dir_all(&tmp_dir).ok();
let input_path = tmp_dir.join("gzip_test_input.bin");
let compressed_path = tmp_dir.join("gzip_test_input.bin.gz");
std::fs::write(&input_path, &original).expect("Failed to write input");
let output = Command::new("gzip")
.arg("-1")
.arg("-f") .arg("-k") .arg(&input_path)
.output()
.expect("Failed to run gzip");
if !output.status.success() {
eprintln!("gzip failed: {}", String::from_utf8_lossy(&output.stderr));
panic!("gzip compression failed");
}
let compressed_path = if compressed_path.exists() {
compressed_path
} else {
tmp_dir.join("gzip_test_input.gz")
};
let compressed = std::fs::read(&compressed_path).expect("Failed to read compressed file");
eprintln!(
"Compressed with gzip -1: {} bytes ({:.1}%)",
compressed.len(),
compressed.len() as f64 / original.len() as f64 * 100.0
);
assert!(
compressed[0] == 0x1f && compressed[1] == 0x8b,
"Not gzip format"
);
let flags = compressed[3];
let mut header_size = 10;
if flags & 0x04 != 0 {
let xlen = u16::from_le_bytes([compressed[10], compressed[11]]) as usize;
header_size = 12 + xlen;
}
if flags & 0x08 != 0 {
while header_size < compressed.len() && compressed[header_size] != 0 {
header_size += 1;
}
header_size += 1;
}
if flags & 0x10 != 0 {
while header_size < compressed.len() && compressed[header_size] != 0 {
header_size += 1;
}
header_size += 1;
}
if flags & 0x02 != 0 {
header_size += 2;
}
let deflate_data = &compressed[header_size..compressed.len() - 8];
eprintln!("Deflate data: {} bytes", deflate_data.len());
let mut output_buf = vec![0u8; original.len() + 1024];
eprintln!("Decompressing...");
let result = inflate_consume_first(deflate_data, &mut output_buf);
let _ = std::fs::remove_file(&input_path);
let _ = std::fs::remove_file(&compressed_path);
match result {
Ok(size) => {
eprintln!("Decompressed: {} bytes", size);
if size != original.len() {
panic!("Size mismatch: expected {}, got {}", original.len(), size);
}
for i in 0..original.len() {
if output_buf[i] != original[i] {
eprintln!("\n{}", "*".repeat(70));
eprintln!("MISMATCH at position {}", i);
eprintln!("{}", "*".repeat(70));
let start = i.saturating_sub(32);
let end = (i + 64).min(original.len());
eprintln!("\nExpected around {}:", i);
eprint!(" ");
for j in start..end {
if j == i {
eprint!("[{:02x}]", original[j]);
} else {
eprint!("{:02x} ", original[j]);
}
}
eprintln!();
eprintln!("\nGot:");
eprint!(" ");
for j in start..end {
if j == i {
eprint!("[{:02x}]", output_buf[j]);
} else {
eprint!("{:02x} ", output_buf[j]);
}
}
eprintln!();
panic!(
"Mismatch at {}: expected 0x{:02x}, got 0x{:02x}",
i, original[i], output_buf[i]
);
}
}
eprintln!("\n*** SUCCESS: Output matches original ***");
}
Err(e) => {
panic!("Decompression failed: {}", e);
}
}
}
#[test]
#[should_panic(expected = "output buffer overflow")]
fn test_copy_match_into_overflow_panics() {
let mut output = vec![0u8; 10];
for (i, b) in b"ABCDEFGHIJ".iter().enumerate() {
output[i] = *b;
}
copy_match_into(&mut output, 8, 4, 5);
}
#[test]
fn test_copy_match_into_overflow_does_not_corrupt_previous() {
let mut output = vec![0u8; 12];
for (i, b) in b"ABCDEFGHIJKL".iter().enumerate() {
output[i] = *b;
}
let new_pos = copy_match_into(&mut output, 6, 6, 4);
assert_eq!(new_pos, 10);
assert_eq!(&output[..6], b"ABCDEF");
}
#[test]
fn test_copy_match_edge_cases() {
use crate::decompress::inflate::consume_first_decode::inflate_consume_first;
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
let test_cases: Vec<(&str, Vec<u8>)> = vec![
("large_dist_len", {
let mut data = vec![0u8; 10000];
for i in 0..100 {
data[i] = (i * 7) as u8;
}
for i in 5000..5100 {
data[i] = data[i - 4900]; }
data
}),
("medium_dist", {
let mut data = vec![0u8; 1000];
for i in 0..100 {
data[i] = (i * 3) as u8;
}
for i in 110..210 {
data[i] = data[i - 10]; }
data
}),
("rle_pattern", {
let mut data = Vec::new();
data.extend_from_slice(b"Hello");
data.extend(vec![b'A'; 1000]); data.extend_from_slice(b"World");
data.extend(vec![b'B'; 500]); data
}),
("small_dist", {
let mut data = Vec::new();
data.extend_from_slice(b"ABCDEFGH"); for _ in 0..100 {
data.extend_from_slice(b"ABC");
}
data
}),
("long_rle", {
let data = vec![0x42u8; 100_000]; data
}),
("alternating", {
let mut data = Vec::new();
for i in 0..10000 {
data.push(if i % 2 == 0 { 0xAA } else { 0x55 });
}
data
}),
("binary_matches", {
let mut data = vec![0u8; 50000];
for i in 0..500 {
let base = i * 100;
for j in 0..50 {
data[base + j] = ((i * 7 + j * 13) & 0xFF) as u8;
}
}
for i in 0..200 {
let src = (i * 50) % 25000;
let dst = 25000 + i * 100;
for j in 0..50 {
if dst + j < data.len() {
data[dst + j] = data[src + j];
}
}
}
data
}),
];
for (name, original) in test_cases {
eprintln!("\n--- Testing: {} ({} bytes) ---", name, original.len());
let mut encoder = GzEncoder::new(Vec::new(), Compression::fast());
encoder.write_all(&original).unwrap();
let compressed = encoder.finish().unwrap();
eprintln!(
" Compressed: {} bytes ({:.1}%)",
compressed.len(),
compressed.len() as f64 / original.len() as f64 * 100.0
);
assert!(compressed[0] == 0x1f && compressed[1] == 0x8b);
let flags = compressed[3];
let mut header_size = 10;
if flags & 0x04 != 0 {
let xlen = u16::from_le_bytes([compressed[10], compressed[11]]) as usize;
header_size = 12 + xlen;
}
if flags & 0x08 != 0 {
while header_size < compressed.len() && compressed[header_size] != 0 {
header_size += 1;
}
header_size += 1;
}
if flags & 0x10 != 0 {
while header_size < compressed.len() && compressed[header_size] != 0 {
header_size += 1;
}
header_size += 1;
}
if flags & 0x02 != 0 {
header_size += 2;
}
let deflate_data = &compressed[header_size..compressed.len() - 8];
let mut output = vec![0u8; original.len() + 1024];
let size = inflate_consume_first(deflate_data, &mut output)
.unwrap_or_else(|e| panic!("Decompression failed for {}: {}", name, e));
assert_eq!(size, original.len(), "{}: size mismatch", name);
for i in 0..original.len() {
if output[i] != original[i] {
eprintln!(
" MISMATCH at {}: expected 0x{:02x}, got 0x{:02x}",
i, original[i], output[i]
);
let start = i.saturating_sub(16);
let end = (i + 16).min(original.len());
eprintln!(" Expected: {:?}", &original[start..end]);
eprintln!(" Got: {:?}", &output[start..end]);
panic!("{}: mismatch at position {}", name, i);
}
}
eprintln!(" OK!");
}
}
#[test]
fn test_bgzf_roundtrip_barely_compressible() {
use crate::compress::parallel::GzipHeaderInfo;
let mut data = Vec::with_capacity(1024 * 1024);
let mut state: u64 = 0xdeadbeef;
while data.len() < 1024 * 1024 {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
data.extend_from_slice(&state.to_le_bytes());
if state & 0xff < 30 {
let repeat_len = ((state >> 8) & 0x3f) as usize + 1;
let repeat_byte = (state >> 16) as u8;
data.extend(std::iter::repeat_n(repeat_byte, repeat_len));
}
}
data.truncate(1024 * 1024);
let header_info = GzipHeaderInfo {
filename: Some("test.tar".to_string()),
mtime: 1700000000,
comment: None,
};
let block_size = 128 * 1024; let mut compressed = Vec::new();
for chunk in data.chunks(block_size) {
let mut block_output = Vec::new();
crate::compress::parallel::compress_block_bgzf_libdeflate(
&mut block_output,
chunk,
1, &header_info,
);
compressed.extend_from_slice(&block_output);
}
let mut decompressor = crate::backends::libdeflate::DecompressorEx::new();
let mut verify_buf = vec![0u8; data.len() + 1024];
let mut verify_offset = 0;
let mut comp_offset = 0;
while comp_offset < compressed.len() {
if compressed[comp_offset] != 0x1f || compressed[comp_offset + 1] != 0x8b {
break;
}
let result = decompressor
.gzip_decompress_ex(&compressed[comp_offset..], &mut verify_buf[verify_offset..])
.expect("sequential gzip_decompress_ex should succeed");
verify_offset += result.output_size;
comp_offset += result.input_consumed;
}
assert_eq!(
verify_offset,
data.len(),
"sequential decompress should produce correct size"
);
assert_eq!(
&verify_buf[..verify_offset],
&data[..],
"sequential decompress should match original"
);
assert!(
crate::decompress::format::has_bgzf_markers(&compressed),
"compressed data should have BGZF markers"
);
let parallel_output =
decompress_bgzf_parallel_to_vec(&compressed, 4).expect("BGZF parallel should succeed");
assert_eq!(
parallel_output.len(),
data.len(),
"BGZF parallel output size mismatch: expected={} got={} delta={}",
data.len(),
parallel_output.len(),
parallel_output.len() as i64 - data.len() as i64
);
if parallel_output != data {
let first_diff = parallel_output
.iter()
.zip(data.iter())
.enumerate()
.find(|(_, (a, b))| a != b)
.map(|(i, _)| i)
.unwrap_or(data.len().min(parallel_output.len()));
panic!(
"BGZF parallel output content mismatch: first_diff_at={} of {}",
first_diff,
data.len()
);
}
}
#[test]
fn test_bgzf_roundtrip_large_barely_compressible() {
use crate::compress::parallel::GzipHeaderInfo;
let mut data = Vec::with_capacity(10 * 1024 * 1024);
let mut state: u64 = 0xcafebabe;
while data.len() < 10 * 1024 * 1024 {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
data.extend_from_slice(&state.to_le_bytes());
if state & 0xff < 30 {
let repeat_len = ((state >> 8) & 0x3f) as usize + 1;
let repeat_byte = (state >> 16) as u8;
data.extend(std::iter::repeat_n(repeat_byte, repeat_len));
}
}
data.truncate(10 * 1024 * 1024);
let header_info = GzipHeaderInfo {
filename: Some("test.tar".to_string()),
mtime: 1700000000,
comment: None,
};
let block_size = 128 * 1024;
let mut compressed = Vec::new();
for chunk in data.chunks(block_size) {
let mut block_output = Vec::new();
crate::compress::parallel::compress_block_bgzf_libdeflate(
&mut block_output,
chunk,
1,
&header_info,
);
compressed.extend_from_slice(&block_output);
}
assert!(crate::decompress::format::has_bgzf_markers(&compressed));
let parallel_output =
decompress_bgzf_parallel_to_vec(&compressed, 4).expect("BGZF parallel should succeed");
assert_eq!(
parallel_output.len(),
data.len(),
"Large BGZF parallel output size: expected={} got={} delta={}",
data.len(),
parallel_output.len(),
parallel_output.len() as i64 - data.len() as i64
);
assert_eq!(
parallel_output, data,
"Large BGZF parallel output content mismatch"
);
}
#[test]
fn test_single_member_with_embedded_gzip() {
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
let inner_data = b"This is the inner gzip content that should not appear as extra output";
let mut inner_encoder = GzEncoder::new(Vec::new(), Compression::default());
inner_encoder.write_all(inner_data).unwrap();
let inner_gz = inner_encoder.finish().unwrap();
let mut payload = Vec::with_capacity(256 * 1024);
payload.extend_from_slice(b"TAR HEADER PADDING ");
payload.extend_from_slice(&[0u8; 493]); payload.extend_from_slice(&inner_gz); while payload.len() < 256 * 1024 {
payload.push((payload.len() % 256) as u8);
}
let mut outer_encoder = GzEncoder::new(Vec::new(), Compression::fast());
outer_encoder.write_all(&payload).unwrap();
let compressed = outer_encoder.finish().unwrap();
assert_eq!(compressed[0], 0x1f);
assert_eq!(compressed[1], 0x8b);
let mut output = Vec::new();
let result = decompress_multi_member_parallel(&compressed, &mut output, 4);
match result {
Ok(size) => {
assert_eq!(
size as usize,
payload.len(),
"Multi-member parallel should produce exact original size, got {} extra bytes",
size as usize - payload.len()
);
assert_eq!(output, payload);
}
Err(_) => {
output.clear();
decompress_single_member(&compressed, &mut output).expect("sequential should work");
assert_eq!(output.len(), payload.len());
assert_eq!(output, payload);
}
}
}
}