use crate::error::{ArchiveError, Result};
use bitstream_io::{BitRead, BitReader, LittleEndian};
const TABLE_SIZE: usize = 8192;
const RESERVED_ENTRIES: usize = 257;
#[derive(Clone, Copy, Default)]
struct LzwEntry {
parent: i32,
byte: u8,
}
struct CrushedState {
table: Vec<LzwEntry>,
usage: Vec<u8>,
table_size: usize,
code_bits: u32,
next_bump: usize,
literal_mode: bool,
ring_buffer: [bool; 500],
ring_pos: usize,
string_count: usize,
usage_pos: usize,
prev_sym: Option<usize>,
}
impl CrushedState {
fn new() -> Self {
let mut table = vec![LzwEntry::default(); TABLE_SIZE];
for (i, entry) in table.iter_mut().enumerate().take(256) {
*entry = LzwEntry { parent: -1, byte: i as u8 };
}
table[256] = LzwEntry { parent: -1, byte: 0 };
let mut usage = vec![0u8; TABLE_SIZE];
for item in usage.iter_mut().take(RESERVED_ENTRIES) {
*item = 4; }
Self {
table,
usage,
table_size: RESERVED_ENTRIES,
code_bits: 1,
next_bump: 2,
literal_mode: true,
ring_buffer: [false; 500],
ring_pos: 0,
string_count: 0,
usage_pos: RESERVED_ENTRIES,
prev_sym: None,
}
}
#[inline]
fn table_full(&self) -> bool {
self.table_size >= TABLE_SIZE
}
fn first_byte(&self, mut sym: usize) -> u8 {
let mut iterations = 0usize;
while sym < TABLE_SIZE && self.table[sym].parent >= 0 {
iterations += 1;
if iterations > TABLE_SIZE {
break; }
sym = self.table[sym].parent as usize;
}
self.table.get(sym).map(|e| e.byte).unwrap_or(0)
}
fn mark_used(&mut self, mut sym: usize) {
let mut iterations = 0usize;
while sym < TABLE_SIZE {
iterations += 1;
if iterations > TABLE_SIZE {
break; }
self.usage[sym] = 4;
let parent = self.table[sym].parent;
if parent < 0 {
break;
}
sym = parent as usize;
}
}
fn update_mode(&mut self, is_string: bool) {
if self.ring_buffer[self.ring_pos] {
self.string_count = self.string_count.saturating_sub(1);
}
self.ring_buffer[self.ring_pos] = is_string;
if is_string {
self.string_count += 1;
}
self.ring_pos = (self.ring_pos + 1) % 500;
let use_literal_mode = self.string_count < 375;
if use_literal_mode != self.literal_mode {
self.literal_mode = use_literal_mode;
self.next_bump = 1usize << self.code_bits;
if !self.literal_mode {
self.next_bump = self.next_bump.saturating_sub(0x100);
}
}
}
fn check_code_size(&mut self) {
let added = self.table_size.saturating_sub(RESERVED_ENTRIES);
if added >= self.next_bump && self.code_bits < 13 {
self.code_bits += 1;
self.next_bump = 1usize << self.code_bits;
if !self.literal_mode {
self.next_bump = self.next_bump.saturating_sub(0x100);
}
}
}
fn read_symbol<R: BitRead>(&self, reader: &mut R) -> Result<usize> {
let sym = if self.literal_mode {
let is_string = reader
.read_bit()
.map_err(|e| ArchiveError::decompression_failed("Crushed", format!("read error: {e}")))?;
if is_string {
let code: u16 = reader
.read_var(self.code_bits)
.map_err(|e| ArchiveError::decompression_failed("Crushed", format!("read error: {e}")))?;
code as usize + 256
} else {
reader
.read::<8, u8>()
.map_err(|e| ArchiveError::decompression_failed("Crushed", format!("read error: {e}")))? as usize
}
} else {
let code: u16 = reader
.read_var(self.code_bits)
.map_err(|e| ArchiveError::decompression_failed("Crushed", format!("read error: {e}")))?;
let code = code as usize;
if code < 0x100 {
code ^ 0xFF
} else {
code
}
};
Ok(sym)
}
fn decode_string(&self, sym: usize, out: &mut Vec<u8>) -> Result<()> {
out.clear();
if sym == self.table_size {
let prev = self
.prev_sym
.ok_or_else(|| ArchiveError::decompression_failed("Crushed", "KwKwK without previous symbol"))?;
let mut s = prev;
let mut iterations = 0usize;
while s < TABLE_SIZE {
iterations += 1;
if iterations > TABLE_SIZE {
return Err(ArchiveError::decompression_failed(
"Crushed",
"infinite loop detected (corrupt data or wrong password)",
));
}
let entry = &self.table[s];
out.push(entry.byte);
if entry.parent < 0 {
break;
}
s = entry.parent as usize;
}
let first = *out.last().unwrap_or(&0);
out.insert(0, first);
return Ok(());
}
if sym >= TABLE_SIZE {
return Err(ArchiveError::decompression_failed("Crushed", format!("invalid symbol {sym}")));
}
let mut s = sym;
let mut iterations = 0usize;
while s < TABLE_SIZE {
iterations += 1;
if iterations > TABLE_SIZE {
return Err(ArchiveError::decompression_failed(
"Crushed",
"infinite loop detected (corrupt data or wrong password)",
));
}
let entry = &self.table[s];
out.push(entry.byte);
if entry.parent < 0 {
break;
}
s = entry.parent as usize;
}
Ok(())
}
fn add_entry(&mut self, sym: usize) {
let prev = match self.prev_sym {
None => {
self.prev_sym = Some(sym);
return;
}
Some(p) => p,
};
let first = if sym == self.table_size {
self.first_byte(prev)
} else {
self.first_byte(sym)
};
if !self.table_full() {
self.table[self.table_size] = LzwEntry {
parent: prev as i32,
byte: first,
};
self.usage[self.table_size] = 2;
self.table_size += 1;
} else {
let mut min_idx = RESERVED_ENTRIES;
let mut min_use = u8::MAX;
let mut idx = self.usage_pos;
loop {
idx += 1;
if idx >= TABLE_SIZE {
idx = RESERVED_ENTRIES;
}
if self.usage[idx] < min_use {
min_idx = idx;
min_use = self.usage[idx];
}
if self.usage[idx] > 0 {
self.usage[idx] -= 1;
}
if self.usage[idx] == 0 || idx == self.usage_pos {
break;
}
}
self.usage_pos = idx;
self.table[min_idx] = LzwEntry {
parent: prev as i32,
byte: first,
};
self.usage[min_idx] = 2;
}
self.prev_sym = Some(sym);
}
}
pub fn decompress(input: &[u8]) -> Result<Vec<u8>> {
if input.is_empty() {
return Ok(Vec::new());
}
let mut state = CrushedState::new();
let mut reader = BitReader::endian(input, LittleEndian);
let mut output = Vec::new();
let mut buffer = Vec::with_capacity(256);
loop {
let sym = state.read_symbol(&mut reader)?;
if sym == 0x100 {
break;
}
if sym >= TABLE_SIZE {
return Err(ArchiveError::decompression_failed("Crushed", format!("invalid symbol {sym}")));
}
if sym > state.table_size {
return Err(ArchiveError::decompression_failed(
"Crushed",
format!("symbol {sym} > table_size {}", state.table_size),
));
}
if sym < state.table_size {
state.mark_used(sym);
} else if let Some(prev) = state.prev_sym {
state.mark_used(prev);
}
state.update_mode(sym >= 256);
state.decode_string(sym, &mut buffer)?;
output.reserve(buffer.len());
for &b in buffer.iter().rev() {
output.push(b);
}
state.add_entry(sym);
state.check_code_size();
}
Ok(output)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_input() {
assert_eq!(decompress(&[]).unwrap(), Vec::<u8>::new());
}
}