use std::{fmt, mem};
pub(crate) use bitstream::Bitstream;
pub(crate) use block::{Block, Decoded, Kind as BlockKind};
pub(crate) use tree::{CanonicalTree, Tree};
use window::Window;
pub use window::WindowSize;
mod bitstream;
mod block;
mod tree;
mod window;
pub const MAX_CHUNK_SIZE: usize = 32 * 1024;
pub(crate) struct DecoderState {
window_size: WindowSize,
main_tree: CanonicalTree,
length_tree: CanonicalTree,
}
struct PostProcessState {
e8_translation_size: i32,
data_chunk: Box<[u8]>,
}
pub struct Lzxd {
window: Window,
state: DecoderState,
r: [u32; 3],
chunk_offset: usize,
first_chunk_read: bool,
current_block: Block,
postprocess: Option<PostProcessState>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DecodeFailed {
OverreadBlock,
UnexpectedEof,
InvalidBlock(u8),
InvalidBlockSize(u32),
InvalidPretreeElement(u16),
InvalidPretreeRle,
InvalidPathLengths,
EmptyTree,
WindowTooSmall,
ChunkTooLong,
}
impl fmt::Display for DecodeFailed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use DecodeFailed::*;
match self {
OverreadBlock => write!(
f,
"read more items than available in the block in a single step"
),
UnexpectedEof => write!(f, "reached end of chunk without fully decoding it"),
InvalidBlock(kind) => write!(f, "block type {} is invalid", kind),
InvalidBlockSize(size) => write!(f, "block size {} is invalid", size),
InvalidPretreeElement(elem) => write!(f, "found invalid pretree element {}", elem),
InvalidPretreeRle => write!(f, "found invalid pretree rle element"),
InvalidPathLengths => write!(f, "encountered invalid path lengths"),
EmptyTree => write!(f, "encountered empty decode tree"),
WindowTooSmall => write!(f, "decode window was too small"),
ChunkTooLong => write!(
f,
"tried reading a chunk longer than {} bytes",
MAX_CHUNK_SIZE
),
}
}
}
impl std::error::Error for DecodeFailed {}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct DecompressError(DecodeFailed);
impl fmt::Display for DecompressError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl std::error::Error for DecompressError {}
impl From<DecodeFailed> for DecompressError {
fn from(value: DecodeFailed) -> Self {
Self(value)
}
}
impl Lzxd {
pub fn new(window_size: WindowSize) -> Self {
let main_tree = CanonicalTree::new(256 + 8 * window_size.position_slots());
let length_tree = CanonicalTree::new(249);
Self {
window: window_size.create_buffer(),
state: DecoderState {
window_size,
main_tree,
length_tree,
},
r: [1, 1, 1],
first_chunk_read: false,
chunk_offset: 0,
postprocess: None,
current_block: Block {
remaining: 0,
size: 0,
kind: BlockKind::Uncompressed { r: [1, 1, 1] },
},
}
}
fn try_read_first_chunk(&mut self, bitstream: &mut Bitstream) -> Result<(), DecodeFailed> {
if !self.first_chunk_read {
self.first_chunk_read = true;
let e8_translation = bitstream.read_bit()? != 0;
self.postprocess = if e8_translation {
Some(PostProcessState {
data_chunk: vec![0; MAX_CHUNK_SIZE].into_boxed_slice(),
e8_translation_size: bitstream.read_bits(32)? as i32,
})
} else {
None
};
}
Ok(())
}
fn postprocess(
translation_size: i32,
chunk_offset: usize,
idata: &mut [u8],
) -> Result<&[u8], DecodeFailed> {
let mut processed = 0usize;
while let Some(pos) = idata[processed..]
.iter()
.position(|&e| e == 0xE8)
.map(|pos| processed + pos)
{
if idata.len() - pos <= 10 {
break;
}
let current_pointer = chunk_offset + pos;
let abs_val = i32::from_le_bytes([
idata[pos + 1],
idata[pos + 2],
idata[pos + 3],
idata[pos + 4],
]);
if (abs_val >= -(current_pointer as i32)) && abs_val < translation_size {
let rel_val = if abs_val.is_positive() {
abs_val.wrapping_sub(current_pointer as i32)
} else {
abs_val.wrapping_add(translation_size)
};
idata[pos + 1..pos + 5].copy_from_slice(&rel_val.to_le_bytes());
}
processed = pos + 5;
}
Ok(idata)
}
pub fn decompress_next(
&mut self,
chunk: &[u8],
output_len: usize,
) -> Result<&[u8], DecompressError> {
let mut bitstream = Bitstream::new(chunk);
self.try_read_first_chunk(&mut bitstream)?;
let mut decoded_len = 0;
while decoded_len != output_len {
if self.current_block.remaining == 0 {
if matches!(self.current_block.kind, BlockKind::Uncompressed { .. })
&& self.current_block.size % 2 != 0
{
bitstream.read_byte();
}
self.current_block = Block::read(&mut bitstream, &mut self.state)?;
assert_ne!(self.current_block.remaining, 0);
}
let decoded = self
.current_block
.decode_element(&mut bitstream, &mut self.r)?;
let advance = match decoded {
Decoded::Single(value) => {
self.window.push(value);
1
}
Decoded::Match { offset, length } => {
self.window.copy_from_self(offset, length);
length
}
Decoded::Read(length) => {
let length = usize::min(bitstream.remaining_bytes(), length);
self.window.copy_from_bitstream(&mut bitstream, length)?;
length
}
};
assert_ne!(advance, 0);
decoded_len += advance;
if let Some(value) = self.current_block.remaining.checked_sub(advance as u32) {
self.current_block.remaining = value;
} else {
return Err(DecodeFailed::OverreadBlock.into());
}
}
let chunk_offset = self.chunk_offset;
self.chunk_offset += decoded_len;
let view = self.window.past_view(decoded_len)?;
if let Some(postprocess) = self.postprocess.as_mut() {
if chunk_offset >= 0x4000_0000 || decoded_len <= 10 {
Ok(view)
} else {
let postprocess_buf = &mut postprocess.data_chunk[..decoded_len];
postprocess_buf.copy_from_slice(view);
let view = Self::postprocess(
postprocess.e8_translation_size,
chunk_offset,
postprocess_buf,
)?;
Ok(view)
}
} else {
Ok(view)
}
}
pub fn reset(&mut self) {
let this = Self::new(self.state.window_size);
let _ = mem::replace(self, this);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn check_uncompressed() {
let data = [
0x00, 0x30, 0x30, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, b'a', b'b', b'c', 0x00,
];
let mut lzxd = Lzxd::new(WindowSize::KB32); let res = lzxd.decompress_next(&data, 3);
assert_eq!(res.unwrap(), [b'a', b'b', b'c']);
}
#[test]
fn reset() {
let data = [
0x00, 0x30, 0x30, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, b'a', b'b', b'c', 0x00,
];
let mut lzxd = Lzxd::new(WindowSize::KB32); let res = lzxd.decompress_next(&data, 3);
assert_eq!(res.unwrap(), [b'a', b'b', b'c']);
lzxd.reset();
let res = lzxd.decompress_next(&data, 3);
assert_eq!(res.unwrap(), [b'a', b'b', b'c']);
}
#[test]
fn check_e8() {
let data = [
0x5B, 0x80, 0x80, 0x8D, 0x00, 0x30, 0x80, 0x0A, 0x18, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x54, 0x68, 0x69, 0x73, 0x20, 0x66, 0x69, 0x6C,
0x65, 0x20, 0x68, 0x61, 0x73, 0x20, 0x61, 0x6E, 0x20, 0x45, 0x38, 0x20, 0x62, 0x79,
0x74, 0x65, 0x20, 0x74, 0x6F, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x45, 0x38, 0x20,
0x74, 0x72, 0x61, 0x6E, 0x73, 0x6C, 0x61, 0x74, 0x69, 0x6F, 0x6E, 0x2C, 0x20, 0x58,
0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64, 0xE8, 0x7B,
0x00, 0x00, 0x00, 0xE8, 0x7B, 0x00, 0x00, 0x00, 0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
0x64, 0x64, 0x64, 0x64, 0x64, 0x64,
];
let mut lzxd = Lzxd::new(WindowSize::KB32);
let res = lzxd.decompress_next(&data, 168);
assert_eq!(
res.unwrap(),
b"This file has an E8 byte to test E8 translation, Xdddddddddddddddd\
dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\
dddddddddddddd\xE8\xE9\xFF\xFF\xFF\xE8\xE4\xFF\xFF\xFFdddddddddddd"
);
}
}