use crate::bits::BitWriter;
use crate::deflate::tables::{encode_distance, encode_length};
use crate::deflate::tokens::LZ77Token;
use crate::huffman::HuffmanEncoder;
const MAX_DISTANCE: usize = 32768;
pub struct BoundaryResolver {
decode_buf: Vec<u8>,
tail_len: usize,
current_len: usize,
position: u64,
refs_resolved: u64,
refs_preserved: u64,
}
impl BoundaryResolver {
pub fn new() -> Self {
Self {
decode_buf: Vec::with_capacity(MAX_DISTANCE + 72 * 1024),
tail_len: 0,
current_len: 0,
position: 0,
refs_resolved: 0,
refs_preserved: 0,
}
}
#[inline(always)]
fn push_byte(&mut self, byte: u8) {
self.decode_buf.push(byte);
self.current_len += 1;
}
#[inline]
fn copy_from_back(&mut self, distance: u16, length: u16) {
debug_assert!(distance > 0);
let dist = distance as usize;
let len = length as usize;
let src_start = self.decode_buf.len() - dist;
if dist >= len {
self.decode_buf.extend_from_within(src_start..src_start + len);
} else {
self.decode_buf.reserve(len);
for i in 0..len {
let byte = self.decode_buf[src_start + (i % dist)];
self.decode_buf.push(byte);
}
}
self.current_len += len;
}
fn rotate_tail(&mut self) {
let total = self.decode_buf.len();
let keep = total.min(MAX_DISTANCE);
if keep < total {
self.decode_buf.copy_within(total - keep..total, 0);
self.decode_buf.truncate(keep);
}
self.tail_len = keep;
self.current_len = 0;
}
pub fn resolve_block(
&mut self,
_block_start: u64,
tokens: &[LZ77Token],
) -> (Vec<LZ77Token>, u32, u32) {
let mut output = Vec::with_capacity(tokens.len());
for token in tokens {
match token {
LZ77Token::Literal(byte) => {
self.push_byte(*byte);
self.position += 1;
output.push(LZ77Token::Literal(*byte));
}
LZ77Token::Copy { length, distance } => {
let dist = *distance as usize;
let len = *length as usize;
if dist > self.current_len {
let src_start = self.decode_buf.len() - dist;
for i in 0..len {
let byte = self.decode_buf[src_start + (i % dist)];
self.push_byte(byte);
output.push(LZ77Token::Literal(byte));
}
self.refs_resolved += 1;
} else {
self.copy_from_back(*distance, *length);
output.push(LZ77Token::Copy { length: *length, distance: *distance });
self.refs_preserved += 1;
}
self.position += *length as u64;
}
LZ77Token::EndOfBlock => {}
}
}
let (crc, uncompressed_size) = self.finish_block();
(output, crc, uncompressed_size)
}
fn finish_block(&mut self) -> (u32, u32) {
let block_bytes = &self.decode_buf[self.tail_len..self.tail_len + self.current_len];
let crc = crc32fast::hash(block_bytes);
let uncompressed_size = self.current_len as u32;
self.rotate_tail();
(crc, uncompressed_size)
}
pub fn resolve_and_encode_fixed(
&mut self,
_block_start: u64,
tokens: &[LZ77Token],
encoder: &HuffmanEncoder,
) -> (Vec<u8>, u32, u32) {
let mut writer = BitWriter::with_capacity(tokens.len() * 2);
writer.write_bit(true); writer.write_bits(1, 2);
let lit_codes = encoder.fixed_lit_codes();
let dist_codes = encoder.fixed_dist_codes();
for token in tokens {
match token {
LZ77Token::Literal(byte) => {
self.push_byte(*byte);
self.position += 1;
let (code, len) = lit_codes[*byte as usize];
writer.write_bits(code, len);
}
LZ77Token::Copy { length, distance } => {
let dist = *distance as usize;
let len = *length as usize;
if dist > self.current_len {
let src_start = self.decode_buf.len() - dist;
for i in 0..len {
let byte = self.decode_buf[src_start + (i % dist)];
self.push_byte(byte);
let (code, code_len) = lit_codes[byte as usize];
writer.write_bits(code, code_len);
}
self.refs_resolved += 1;
} else {
self.copy_from_back(*distance, *length);
if let Some((len_code, extra_val, extra_bits)) = encode_length(*length) {
let (code, code_len) = lit_codes[len_code as usize];
writer.write_bits(code, code_len);
if extra_bits > 0 {
writer.write_bits(extra_val as u32, extra_bits);
}
}
if let Some((dist_code, extra_val, extra_bits)) = encode_distance(*distance)
{
let (code, code_len) = dist_codes[dist_code as usize];
writer.write_bits(code, code_len);
if extra_bits > 0 {
writer.write_bits(extra_val as u32, extra_bits);
}
}
self.refs_preserved += 1;
}
self.position += *length as u64;
}
LZ77Token::EndOfBlock => {}
}
}
let (code, len) = lit_codes[256];
writer.write_bits(code, len);
let deflate_data = writer.finish();
let (crc, uncompressed_size) = self.finish_block();
(deflate_data, crc, uncompressed_size)
}
pub fn position(&self) -> u64 {
self.position
}
pub fn stats(&self) -> (u64, u64) {
(self.refs_resolved, self.refs_preserved)
}
pub fn reset(&mut self) {
self.decode_buf.clear();
self.tail_len = 0;
self.current_len = 0;
self.position = 0;
self.refs_resolved = 0;
self.refs_preserved = 0;
}
}
impl Default for BoundaryResolver {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_literals_only() {
let mut resolver = BoundaryResolver::new();
let tokens = vec![LZ77Token::Literal(b'H'), LZ77Token::Literal(b'i')];
let (resolved, crc, size) = resolver.resolve_block(0, &tokens);
assert_eq!(resolved.len(), 2);
assert_eq!(resolved[0], LZ77Token::Literal(b'H'));
assert_eq!(resolved[1], LZ77Token::Literal(b'i'));
assert_eq!(resolver.position(), 2);
assert_eq!(size, 2);
assert_eq!(crc, crc32fast::hash(b"Hi"));
}
#[test]
fn test_copy_within_block() {
let mut resolver = BoundaryResolver::new();
let tokens = vec![
LZ77Token::Literal(b'A'),
LZ77Token::Literal(b'B'),
LZ77Token::Copy { length: 2, distance: 2 }, ];
let (resolved, crc, size) = resolver.resolve_block(0, &tokens);
assert_eq!(resolved.len(), 3);
assert!(matches!(resolved[2], LZ77Token::Copy { .. }));
assert_eq!(size, 4);
assert_eq!(crc, crc32fast::hash(b"ABAB"));
let (refs_resolved, refs_preserved) = resolver.stats();
assert_eq!(refs_resolved, 0);
assert_eq!(refs_preserved, 1);
}
#[test]
fn test_copy_crosses_boundary() {
let mut resolver = BoundaryResolver::new();
let tokens1 = vec![
LZ77Token::Literal(b'A'),
LZ77Token::Literal(b'B'),
LZ77Token::Literal(b'C'),
LZ77Token::Literal(b'D'),
];
let (_, crc1, size1) = resolver.resolve_block(0, &tokens1);
assert_eq!(resolver.position(), 4);
assert_eq!(size1, 4);
assert_eq!(crc1, crc32fast::hash(b"ABCD"));
let tokens2 = vec![
LZ77Token::Literal(b'E'),
LZ77Token::Copy { length: 2, distance: 5 }, ];
let (resolved, crc2, size2) = resolver.resolve_block(4, &tokens2);
assert_eq!(resolved.len(), 3);
assert_eq!(resolved[0], LZ77Token::Literal(b'E'));
assert_eq!(resolved[1], LZ77Token::Literal(b'A'));
assert_eq!(resolved[2], LZ77Token::Literal(b'B'));
assert_eq!(size2, 3);
assert_eq!(crc2, crc32fast::hash(b"EAB"));
let (refs_resolved, refs_preserved) = resolver.stats();
assert_eq!(refs_resolved, 1);
assert_eq!(refs_preserved, 0);
}
#[test]
fn test_mixed_copies() {
let mut resolver = BoundaryResolver::new();
let tokens1 = vec![
LZ77Token::Literal(b'A'),
LZ77Token::Literal(b'B'),
LZ77Token::Literal(b'C'),
LZ77Token::Literal(b'D'),
];
let _ = resolver.resolve_block(0, &tokens1);
let tokens2 = vec![
LZ77Token::Literal(b'E'),
LZ77Token::Copy { length: 2, distance: 5 }, LZ77Token::Copy { length: 2, distance: 1 }, ];
let (resolved, crc, size) = resolver.resolve_block(4, &tokens2);
assert_eq!(resolved.len(), 4);
assert_eq!(resolved[0], LZ77Token::Literal(b'E'));
assert_eq!(resolved[1], LZ77Token::Literal(b'A'));
assert_eq!(resolved[2], LZ77Token::Literal(b'B'));
assert!(matches!(resolved[3], LZ77Token::Copy { length: 2, distance: 1 }));
assert_eq!(size, 5);
assert_eq!(crc, crc32fast::hash(b"EABBB")); }
#[test]
fn test_large_history_cross_boundary_rle() {
let mut resolver = BoundaryResolver::new();
let mut tokens1 = Vec::new();
for i in 0..40000u32 {
tokens1.push(LZ77Token::Literal((i & 0xFF) as u8));
}
let (_, _, size1) = resolver.resolve_block(0, &tokens1);
assert_eq!(size1, 40000);
let tokens2 = vec![
LZ77Token::Literal(b'X'),
LZ77Token::Literal(b'Y'),
LZ77Token::Literal(b'Z'),
LZ77Token::Copy { length: 5, distance: 100 }, LZ77Token::Copy { length: 6, distance: 3 }, ];
let (resolved, crc, size2) = resolver.resolve_block(40000, &tokens2);
assert_eq!(size2, 3 + 5 + 6);
assert_eq!(resolved[0], LZ77Token::Literal(b'X'));
assert_eq!(resolved[1], LZ77Token::Literal(b'Y'));
assert_eq!(resolved[2], LZ77Token::Literal(b'Z'));
for token in &resolved[3..8] {
assert!(matches!(token, LZ77Token::Literal(_)));
}
assert!(matches!(resolved[8], LZ77Token::Copy { length: 6, distance: 3 }));
let mut expected = vec![b'X', b'Y', b'Z'];
for i in 0..5u32 {
expected.push(((39903 + i) & 0xFF) as u8);
}
let pattern: Vec<u8> = expected[expected.len() - 3..].to_vec();
for i in 0..6 {
expected.push(pattern[i % 3]);
}
assert_eq!(crc, crc32fast::hash(&expected));
let (refs_resolved, refs_preserved) = resolver.stats();
assert_eq!(refs_resolved, 1);
assert_eq!(refs_preserved, 1);
}
#[test]
fn test_fused_encode_parity() {
use crate::huffman::HuffmanEncoder;
let mut encoder = HuffmanEncoder::new(true);
let mut tokens1 = Vec::new();
for i in 0..1000u32 {
tokens1.push(LZ77Token::Literal((i & 0xFF) as u8));
}
let tokens2 = vec![
LZ77Token::Literal(b'A'),
LZ77Token::Literal(b'B'),
LZ77Token::Copy { length: 4, distance: 500 }, LZ77Token::Literal(b'C'),
LZ77Token::Copy { length: 3, distance: 3 }, LZ77Token::Copy { length: 5, distance: 2 }, ];
let mut resolver_2pass = BoundaryResolver::new();
resolver_2pass.resolve_block(0, &tokens1);
let (resolved, crc_2pass, size_2pass) = resolver_2pass.resolve_block(1000, &tokens2);
let deflate_2pass = encoder.encode(&resolved, true).unwrap();
let mut resolver_fused = BoundaryResolver::new();
resolver_fused.resolve_block(0, &tokens1);
let (deflate_fused, crc_fused, size_fused) =
resolver_fused.resolve_and_encode_fixed(1000, &tokens2, &encoder);
assert_eq!(deflate_fused, deflate_2pass, "DEFLATE output must match");
assert_eq!(crc_fused, crc_2pass, "CRC must match");
assert_eq!(size_fused, size_2pass, "Uncompressed size must match");
}
}