pub struct DwgLZ77AC18Compressor {
block: Vec<i32>,
}
impl DwgLZ77AC18Compressor {
pub fn new() -> Self {
Self {
block: vec![-1; 0x8000],
}
}
fn restart_block(&mut self) {
for entry in self.block.iter_mut() {
*entry = -1;
}
}
pub fn compress(&mut self, source: &[u8], offset: usize, total_size: usize) -> Vec<u8> {
self.restart_block();
let mut dest: Vec<u8> = Vec::with_capacity(total_size);
let initial_offset = offset;
let total_offset = initial_offset + total_size;
let mut curr_offset = initial_offset;
let mut curr_position = initial_offset + 4;
let mut compression_offset: usize = 0;
let mut match_pos: usize = 0;
while curr_position < total_offset.saturating_sub(0x13) {
let (found, chunk_offset, chunk_match_pos) =
self.compress_chunk(source, initial_offset, total_offset, curr_position);
if !found {
curr_position += 1;
continue;
}
let mask = curr_position - curr_offset;
if compression_offset != 0 {
Self::apply_mask(&mut dest, match_pos, compression_offset, mask);
}
Self::write_literal_length(&mut dest, source, curr_offset, mask);
curr_position += chunk_offset;
curr_offset = curr_position;
compression_offset = chunk_offset;
match_pos = chunk_match_pos;
}
let literal_length = total_offset - curr_offset;
if compression_offset != 0 {
Self::apply_mask(&mut dest, match_pos, compression_offset, literal_length);
}
Self::write_literal_length(&mut dest, source, curr_offset, literal_length);
dest.push(0x11);
dest.push(0x00);
dest.push(0x00);
dest
}
fn write_len(dest: &mut Vec<u8>, mut len: usize) {
debug_assert!(len > 0);
while len > 0xFF {
len -= 0xFF;
dest.push(0);
}
dest.push(len as u8);
}
fn write_opcode(
dest: &mut Vec<u8>,
opcode: u8,
compression_offset: usize,
threshold: usize,
) {
debug_assert!(compression_offset > 0);
if compression_offset <= threshold {
dest.push(opcode | (compression_offset as u8 - 2));
} else {
dest.push(opcode);
Self::write_len(dest, compression_offset - threshold);
}
}
fn write_literal_length(
dest: &mut Vec<u8>,
source: &[u8],
start: usize,
length: usize,
) {
if length == 0 {
return;
}
if length > 3 {
Self::write_opcode(dest, 0, length - 1, 0x11);
}
for i in 0..length {
dest.push(source[start + i]);
}
}
fn apply_mask(
dest: &mut Vec<u8>,
mut match_position: usize,
compression_offset: usize,
mask: usize,
) {
let curr: u8;
let next: u8;
if compression_offset >= 0x0F || match_position > 0x400 {
if match_position <= 0x4000 {
match_position -= 1;
Self::write_opcode(dest, 0x20, compression_offset, 0x21);
} else {
match_position -= 0x4000;
Self::write_opcode(
dest,
0x10 | ((match_position >> 11) as u8 & 8),
compression_offset,
0x09,
);
}
let mut c = ((match_position & 0xFF) << 2) as u8;
next = (match_position >> 6) as u8;
if mask < 4 {
c |= mask as u8;
}
curr = c;
} else {
match_position -= 1;
let mut c = ((compression_offset + 1) << 4) as u8 | ((match_position & 0x03) << 2) as u8;
next = (match_position >> 2) as u8;
if mask < 4 {
c |= mask as u8;
}
curr = c;
}
dest.push(curr);
dest.push(next);
}
fn compress_chunk(
&mut self,
source: &[u8],
initial_offset: usize,
total_offset: usize,
curr_position: usize,
) -> (bool, usize, usize) {
let v1 = (source[curr_position + 3] as i32) << 6;
let v2 = v1 ^ source[curr_position + 2] as i32;
let v3 = (v2 << 5) ^ source[curr_position + 1] as i32;
let v4 = (v3 << 5) ^ source[curr_position] as i32;
let mut value_index = ((v4.wrapping_add(v4 >> 5)) & 0x7FFF) as usize;
let mut value = self.block[value_index];
let mut match_pos = if value >= 0 {
curr_position.wrapping_sub(value as usize)
} else {
usize::MAX
};
if value >= initial_offset as i32 && match_pos <= 0xBFFF {
if match_pos > 0x400 && source[curr_position + 3] != source[value as usize + 3] {
value_index = (value_index & 0x7FF) ^ 0b100000000011111;
value = self.block[value_index];
match_pos = if value >= 0 {
curr_position.wrapping_sub(value as usize)
} else {
usize::MAX
};
if value < initial_offset as i32
|| match_pos > 0xBFFF
|| (match_pos > 0x400
&& source[curr_position + 3] != source[value as usize + 3])
{
self.block[value_index] = curr_position as i32;
return (false, 0, 0);
}
}
let v = value as usize;
if source[curr_position] == source[v]
&& source[curr_position + 1] == source[v + 1]
&& source[curr_position + 2] == source[v + 2]
{
let mut offset = 3usize;
let mut index = v + 3;
let mut curr_off = curr_position + 3;
while curr_off < total_offset && source[index] == source[curr_off] {
offset += 1;
index += 1;
curr_off += 1;
}
self.block[value_index] = curr_position as i32;
return (offset >= 3, offset, match_pos);
}
}
self.block[value_index] = curr_position as i32;
(false, 0, 0)
}
}
impl Default for DwgLZ77AC18Compressor {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compress_empty() {
let mut comp = DwgLZ77AC18Compressor::new();
let data = vec![0u8; 4]; let result = comp.compress(&data, 0, data.len());
assert!(result.len() >= 3);
let len = result.len();
assert_eq!(&result[len - 3..], &[0x11, 0x00, 0x00]);
}
#[test]
fn test_compress_small_data() {
let mut comp = DwgLZ77AC18Compressor::new();
let data = b"Hello, World! This is a test.";
let result = comp.compress(data, 0, data.len());
let len = result.len();
assert_eq!(&result[len - 3..], &[0x11, 0x00, 0x00]);
}
#[test]
fn test_compress_repetitive_data() {
let mut comp = DwgLZ77AC18Compressor::new();
let mut data = Vec::new();
for _ in 0..100 {
data.extend_from_slice(b"ABCDEFGH");
}
let result = comp.compress(&data, 0, data.len());
assert!(
result.len() < data.len(),
"Compressed {} >= original {}",
result.len(),
data.len()
);
}
#[test]
fn test_compress_zeros() {
let mut comp = DwgLZ77AC18Compressor::new();
let data = vec![0u8; 1000];
let result = comp.compress(&data, 0, data.len());
assert!(result.len() < data.len() / 2);
}
#[test]
fn test_compress_with_offset() {
let mut comp = DwgLZ77AC18Compressor::new();
let mut data = vec![0xFFu8; 10]; data.extend_from_slice(&vec![0u8; 100]); let result = comp.compress(&data, 10, 100);
let len = result.len();
assert_eq!(&result[len - 3..], &[0x11, 0x00, 0x00]);
}
#[test]
fn test_terminator_always_present() {
let mut comp = DwgLZ77AC18Compressor::new();
for size in [4, 10, 50, 100, 500] {
let data = vec![42u8; size];
let result = comp.compress(&data, 0, data.len());
let len = result.len();
assert_eq!(
&result[len - 3..],
&[0x11, 0x00, 0x00],
"Missing terminator for size {}",
size
);
}
}
}