use std::io::{self, Read, Write};
use super::{Decoder, Encoder, method};
const LIZARD_MAGIC: u32 = 0x184D2206;
const LIZARD_FLAG_LITERALS: u8 = 1;
const LIZARD_FLAG_FLAGS: u8 = 2;
const LIZARD_FLAG_OFFSET16: u8 = 4;
const LIZARD_FLAG_OFFSET24: u8 = 8;
const LIZARD_FLAG_UNCOMPRESSED: u8 = 128;
const MAX_SHORT_LITLEN: usize = 7;
const MAX_SHORT_MATCHLEN: usize = 15;
const LIZARD_LAST_LONG_OFF: u8 = 31;
const MM_LONGOFF: usize = 16;
const BLOCK_SIZES: [usize; 8] = [
0, 128 * 1024, 256 * 1024, 1024 * 1024, 4 * 1024 * 1024, 16 * 1024 * 1024, 64 * 1024 * 1024, 256 * 1024 * 1024, ];
pub struct LizardDecoder<R: Read> {
inner: R,
buffer: Vec<u8>,
read_pos: usize,
finished: bool,
header_parsed: bool,
block_independent: bool,
max_block_size: usize,
block_checksum: bool,
content_checksum: bool,
#[allow(dead_code)] content_size: Option<u64>,
last_offset: isize,
}
impl<R: Read> std::fmt::Debug for LizardDecoder<R> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LizardDecoder")
.field("buffer_len", &self.buffer.len())
.field("read_pos", &self.read_pos)
.field("finished", &self.finished)
.field("max_block_size", &self.max_block_size)
.finish()
}
}
impl<R: Read + Send> LizardDecoder<R> {
pub fn new(input: R) -> Self {
Self {
inner: input,
buffer: Vec::new(),
read_pos: 0,
finished: false,
header_parsed: false,
block_independent: true,
max_block_size: 4 * 1024 * 1024, block_checksum: false,
content_checksum: false,
content_size: None,
last_offset: 0,
}
}
fn parse_header(&mut self) -> io::Result<()> {
let mut magic_bytes = [0u8; 4];
self.inner.read_exact(&mut magic_bytes)?;
let magic = u32::from_le_bytes(magic_bytes);
if magic != LIZARD_MAGIC {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Invalid Lizard magic number: 0x{:08X}, expected 0x{:08X}",
magic, LIZARD_MAGIC
),
));
}
let mut flg = [0u8; 1];
self.inner.read_exact(&mut flg)?;
let flg = flg[0];
let version = (flg >> 6) & 0x03;
if version != 0x01 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Unsupported Lizard version: {}", version),
));
}
self.block_independent = (flg & 0x20) != 0;
self.block_checksum = (flg & 0x10) != 0;
let has_content_size = (flg & 0x08) != 0;
self.content_checksum = (flg & 0x04) != 0;
let mut bd = [0u8; 1];
self.inner.read_exact(&mut bd)?;
let bd = bd[0];
let block_size_id = ((bd >> 4) & 0x07) as usize;
if block_size_id == 0 || block_size_id >= BLOCK_SIZES.len() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("Invalid block size ID: {}", block_size_id),
));
}
self.max_block_size = BLOCK_SIZES[block_size_id];
if has_content_size {
let mut size_bytes = [0u8; 8];
self.inner.read_exact(&mut size_bytes)?;
self.content_size = Some(u64::from_le_bytes(size_bytes));
}
let mut _hc = [0u8; 1];
self.inner.read_exact(&mut _hc)?;
self.buffer.reserve(self.max_block_size);
self.header_parsed = true;
Ok(())
}
fn read_block(&mut self) -> io::Result<bool> {
let mut size_bytes = [0u8; 4];
self.inner.read_exact(&mut size_bytes)?;
let block_header = u32::from_le_bytes(size_bytes);
if block_header == 0 {
if self.content_checksum {
let mut checksum = [0u8; 4];
self.inner.read_exact(&mut checksum)?;
}
return Ok(false);
}
let is_frame_uncompressed = (block_header & 0x80000000) != 0;
let block_size = (block_header & 0x7FFFFFFF) as usize;
if block_size > self.max_block_size {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Block size {} exceeds maximum {}",
block_size, self.max_block_size
),
));
}
let mut block_data = vec![0u8; block_size];
self.inner.read_exact(&mut block_data)?;
if self.block_checksum {
let mut _checksum = [0u8; 4];
self.inner.read_exact(&mut _checksum)?;
}
if self.block_independent {
self.buffer.clear();
self.read_pos = 0;
}
if is_frame_uncompressed {
self.buffer.extend_from_slice(&block_data);
} else {
self.decompress_lizard_block(&block_data)?;
}
Ok(true)
}
fn decompress_lizard_block(&mut self, data: &[u8]) -> io::Result<()> {
if data.is_empty() {
return Ok(());
}
let mut pos = 0;
let _compression_level = data[pos];
pos += 1;
while pos < data.len() {
let header_byte = data[pos];
pos += 1;
if header_byte == LIZARD_FLAG_UNCOMPRESSED {
if pos + 3 > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Incomplete uncompressed block header",
));
}
let length = read_le24(&data[pos..]);
pos += 3;
if pos + length > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Incomplete uncompressed block data",
));
}
self.buffer.extend_from_slice(&data[pos..pos + length]);
pos += length;
} else {
pos = self.decompress_compressed_block(data, pos - 1)?;
}
}
Ok(())
}
fn decompress_compressed_block(&mut self, data: &[u8], start: usize) -> io::Result<usize> {
let mut pos = start;
if pos >= data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Missing block header",
));
}
let header_byte = data[pos];
pos += 1;
if (header_byte
& (LIZARD_FLAG_LITERALS
| LIZARD_FLAG_FLAGS
| LIZARD_FLAG_OFFSET16
| LIZARD_FLAG_OFFSET24))
!= 0
{
return Err(io::Error::new(
io::ErrorKind::Unsupported,
"Huffman-compressed Lizard streams are not yet supported",
));
}
if pos + 3 > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Incomplete lengths stream",
));
}
let len_stream_len = read_le24(&data[pos..]);
pos += 3;
let len_stream_start = pos;
let len_stream_end = pos + len_stream_len;
if len_stream_end > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Lengths stream overflow",
));
}
pos = len_stream_end;
if pos + 3 > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Incomplete offset16 stream",
));
}
let off16_stream_len = read_le24(&data[pos..]);
pos += 3;
let off16_stream_start = pos;
let off16_stream_end = pos + off16_stream_len;
if off16_stream_end > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Offset16 stream overflow",
));
}
pos = off16_stream_end;
if pos + 3 > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Incomplete offset24 stream",
));
}
let off24_stream_len = read_le24(&data[pos..]);
pos += 3;
let off24_stream_start = pos;
let off24_stream_end = pos + off24_stream_len;
if off24_stream_end > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Offset24 stream overflow",
));
}
pos = off24_stream_end;
if pos + 3 > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Incomplete flags stream",
));
}
let flags_stream_len = read_le24(&data[pos..]);
pos += 3;
let flags_stream_start = pos;
let flags_stream_end = pos + flags_stream_len;
if flags_stream_end > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Flags stream overflow",
));
}
pos = flags_stream_end;
if pos + 3 > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Incomplete literals stream",
));
}
let lit_stream_len = read_le24(&data[pos..]);
pos += 3;
let lit_stream_start = pos;
let lit_stream_end = pos + lit_stream_len;
if lit_stream_end > data.len() {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Literals stream overflow",
));
}
pos = lit_stream_end;
let mut len_ptr = len_stream_start;
let mut off16_ptr = off16_stream_start;
let mut off24_ptr = off24_stream_start;
let mut flags_ptr = flags_stream_start;
let mut lit_ptr = lit_stream_start;
while flags_ptr < flags_stream_end {
let token = data[flags_ptr];
flags_ptr += 1;
if token >= 32 {
let mut literal_len = (token & MAX_SHORT_LITLEN as u8) as usize;
let mut match_len = ((token >> 3) & MAX_SHORT_MATCHLEN as u8) as usize;
let use_last_offset = (token & 0x80) != 0;
if literal_len == MAX_SHORT_LITLEN {
let ext_len = self.read_length(data, &mut len_ptr, len_stream_end)?;
literal_len += ext_len;
}
if literal_len > 0 {
if lit_ptr + literal_len > lit_stream_end {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Literal overflow",
));
}
self.buffer
.extend_from_slice(&data[lit_ptr..lit_ptr + literal_len]);
lit_ptr += literal_len;
}
if !use_last_offset {
if off16_ptr + 2 > off16_stream_end {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Offset16 overflow",
));
}
let offset =
u16::from_le_bytes([data[off16_ptr], data[off16_ptr + 1]]) as isize;
off16_ptr += 2;
self.last_offset = -offset;
}
if !use_last_offset && match_len < 4 {
match_len = 4;
}
if match_len == MAX_SHORT_MATCHLEN {
let ext_len = self.read_length(data, &mut len_ptr, len_stream_end)?;
match_len += ext_len;
}
self.copy_match(match_len)?;
} else if token < LIZARD_LAST_LONG_OFF {
let match_len = token as usize + MM_LONGOFF;
if off24_ptr + 3 > off24_stream_end {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Offset24 overflow",
));
}
let offset = read_le24(&data[off24_ptr..]) as isize;
off24_ptr += 3;
self.last_offset = -offset;
self.copy_match(match_len)?;
} else {
let mut match_len = self.read_length(data, &mut len_ptr, len_stream_end)?;
match_len += LIZARD_LAST_LONG_OFF as usize + MM_LONGOFF;
if off24_ptr + 3 > off24_stream_end {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Offset24 overflow",
));
}
let offset = read_le24(&data[off24_ptr..]) as isize;
off24_ptr += 3;
self.last_offset = -offset;
self.copy_match(match_len)?;
}
}
if lit_ptr < lit_stream_end {
self.buffer
.extend_from_slice(&data[lit_ptr..lit_stream_end]);
}
Ok(pos)
}
fn read_length(&self, data: &[u8], ptr: &mut usize, end: usize) -> io::Result<usize> {
if *ptr >= end {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Length stream underflow",
));
}
let first_byte = data[*ptr];
*ptr += 1;
if first_byte < 254 {
Ok(first_byte as usize)
} else if first_byte == 254 {
if *ptr + 2 > end {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Length stream underflow (2-byte)",
));
}
let len = u16::from_le_bytes([data[*ptr], data[*ptr + 1]]) as usize;
*ptr += 2;
Ok(len)
} else {
if *ptr + 3 > end {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Length stream underflow (3-byte)",
));
}
let len = read_le24(&data[*ptr..]);
*ptr += 3;
Ok(len)
}
}
fn copy_match(&mut self, length: usize) -> io::Result<()> {
let offset = (-self.last_offset) as usize;
if offset == 0 || offset > self.buffer.len() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Invalid match offset {} (buffer size {})",
offset,
self.buffer.len()
),
));
}
let start = self.buffer.len() - offset;
for i in 0..length {
let byte = self.buffer[start + (i % offset)];
self.buffer.push(byte);
}
Ok(())
}
}
fn read_le24(data: &[u8]) -> usize {
data[0] as usize | ((data[1] as usize) << 8) | ((data[2] as usize) << 16)
}
impl<R: Read + Send> Read for LizardDecoder<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if self.finished {
return Ok(0);
}
if !self.header_parsed {
self.parse_header()?;
}
if self.read_pos < self.buffer.len() {
let available = self.buffer.len() - self.read_pos;
let to_copy = buf.len().min(available);
buf[..to_copy].copy_from_slice(&self.buffer[self.read_pos..self.read_pos + to_copy]);
self.read_pos += to_copy;
return Ok(to_copy);
}
loop {
let has_more = self.read_block()?;
if !has_more {
self.finished = true;
return Ok(0);
}
if self.read_pos < self.buffer.len() {
let available = self.buffer.len() - self.read_pos;
let to_copy = buf.len().min(available);
buf[..to_copy]
.copy_from_slice(&self.buffer[self.read_pos..self.read_pos + to_copy]);
self.read_pos += to_copy;
return Ok(to_copy);
}
}
}
}
impl<R: Read + Send> Decoder for LizardDecoder<R> {
fn method_id(&self) -> &'static [u8] {
method::LIZARD
}
}
#[derive(Debug, Clone)]
pub struct LizardEncoderOptions {
pub level: i32,
pub block_size_id: u8,
pub block_independent: bool,
pub content_checksum: bool,
pub block_checksum: bool,
}
impl Default for LizardEncoderOptions {
fn default() -> Self {
Self {
level: 17,
block_size_id: 4, block_independent: true,
content_checksum: false,
block_checksum: false,
}
}
}
impl LizardEncoderOptions {
pub fn new() -> Self {
Self::default()
}
pub fn level(mut self, level: i32) -> Self {
self.level = level.clamp(10, 49);
self
}
pub fn block_size_id(mut self, id: u8) -> Self {
self.block_size_id = id.clamp(1, 7);
self
}
pub fn block_independent(mut self, independent: bool) -> Self {
self.block_independent = independent;
self
}
pub fn max_block_size(&self) -> usize {
BLOCK_SIZES[self.block_size_id as usize]
}
}
pub struct LizardEncoder<W: Write> {
inner: W,
options: LizardEncoderOptions,
buffer: Vec<u8>,
max_block_size: usize,
header_written: bool,
}
impl<W: Write> std::fmt::Debug for LizardEncoder<W> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LizardEncoder")
.field("options", &self.options)
.field("buffer_len", &self.buffer.len())
.field("max_block_size", &self.max_block_size)
.finish_non_exhaustive()
}
}
impl<W: Write + Send> LizardEncoder<W> {
pub fn new(output: W, options: LizardEncoderOptions) -> Self {
let max_block_size = options.max_block_size();
Self {
inner: output,
options,
buffer: Vec::with_capacity(max_block_size),
max_block_size,
header_written: false,
}
}
fn write_header(&mut self) -> io::Result<()> {
self.inner.write_all(&LIZARD_MAGIC.to_le_bytes())?;
let mut flg: u8 = 0x40; if self.options.block_independent {
flg |= 0x20;
}
if self.options.block_checksum {
flg |= 0x10;
}
if self.options.content_checksum {
flg |= 0x04;
}
self.inner.write_all(&[flg])?;
let bd: u8 = (self.options.block_size_id & 0x07) << 4;
self.inner.write_all(&[bd])?;
let hc = ((flg as u16 + bd as u16) % 256) as u8;
self.inner.write_all(&[hc])?;
self.header_written = true;
Ok(())
}
fn compress_block(&mut self, data: &[u8]) -> io::Result<Vec<u8>> {
if data.is_empty() {
return Ok(Vec::new());
}
let mut output = Vec::with_capacity(data.len() + 5);
output.push(self.options.level as u8);
let mut pos = 0;
let max_subblock_size = 0xFFFFFF;
while pos < data.len() {
let remaining = data.len() - pos;
let subblock_size = remaining.min(max_subblock_size);
output.push(LIZARD_FLAG_UNCOMPRESSED);
write_le24(&mut output, subblock_size);
output.extend_from_slice(&data[pos..pos + subblock_size]);
pos += subblock_size;
}
Ok(output)
}
fn flush_block(&mut self) -> io::Result<()> {
if self.buffer.is_empty() {
return Ok(());
}
let data = std::mem::take(&mut self.buffer);
let compressed = self.compress_block(&data)?;
let block_size = compressed.len() as u32;
self.inner.write_all(&block_size.to_le_bytes())?;
self.inner.write_all(&compressed)?;
Ok(())
}
pub fn try_finish(mut self) -> io::Result<W> {
if !self.header_written {
self.write_header()?;
}
self.flush_block()?;
self.inner.write_all(&0u32.to_le_bytes())?;
Ok(self.inner)
}
}
fn write_le24(output: &mut Vec<u8>, value: usize) {
output.push(value as u8);
output.push((value >> 8) as u8);
output.push((value >> 16) as u8);
}
impl<W: Write + Send> Write for LizardEncoder<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
if !self.header_written {
self.write_header()?;
}
self.buffer.extend_from_slice(buf);
while self.buffer.len() >= self.max_block_size {
let block_data: Vec<u8> = self.buffer.drain(..self.max_block_size).collect();
let compressed = self.compress_block(&block_data)?;
let block_size = compressed.len() as u32;
self.inner.write_all(&block_size.to_le_bytes())?;
self.inner.write_all(&compressed)?;
}
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> {
self.inner.flush()
}
}
impl<W: Write + Send> Encoder for LizardEncoder<W> {
fn method_id(&self) -> &'static [u8] {
method::LIZARD
}
fn finish(mut self: Box<Self>) -> io::Result<()> {
if !self.header_written {
self.write_header()?;
}
self.flush_block()?;
self.inner.write_all(&0u32.to_le_bytes())?;
self.inner.flush()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_lizard_decoder_method_id() {
let data = vec![0u8; 16];
let decoder = LizardDecoder::new(Cursor::new(data));
assert_eq!(decoder.method_id(), method::LIZARD);
}
#[test]
fn test_lizard_encoder_method_id() {
let output = Vec::new();
let encoder = LizardEncoder::new(output, LizardEncoderOptions::default());
assert_eq!(encoder.method_id(), method::LIZARD);
}
#[test]
fn test_lizard_encoder_options() {
let opts = LizardEncoderOptions::new().level(30);
assert_eq!(opts.level, 30);
let opts_low = LizardEncoderOptions::new().level(5);
assert_eq!(opts_low.level, 10);
let opts_high = LizardEncoderOptions::new().level(100);
assert_eq!(opts_high.level, 49);
let opts_bs = LizardEncoderOptions::new().block_size_id(3);
assert_eq!(opts_bs.max_block_size(), 1024 * 1024); }
#[test]
fn test_lizard_invalid_magic() {
let data = vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
let mut decoder = LizardDecoder::new(Cursor::new(data));
let mut buf = [0u8; 10];
let result = decoder.read(&mut buf);
assert!(result.is_err());
}
#[test]
fn test_read_le24() {
assert_eq!(read_le24(&[0x01, 0x02, 0x03]), 0x030201);
assert_eq!(read_le24(&[0xFF, 0xFF, 0xFF]), 0xFFFFFF);
assert_eq!(read_le24(&[0x00, 0x00, 0x00]), 0x000000);
}
#[test]
fn test_write_le24() {
let mut output = Vec::new();
write_le24(&mut output, 0x030201);
assert_eq!(output, vec![0x01, 0x02, 0x03]);
output.clear();
write_le24(&mut output, 0xFFFFFF);
assert_eq!(output, vec![0xFF, 0xFF, 0xFF]);
}
fn create_minimal_lizard_frame(uncompressed_data: &[u8]) -> Vec<u8> {
let mut frame = Vec::new();
frame.extend_from_slice(&LIZARD_MAGIC.to_le_bytes());
frame.push(0x60);
frame.push(0x40);
frame.push(0x00);
let block_size = uncompressed_data.len() as u32 | 0x80000000;
frame.extend_from_slice(&block_size.to_le_bytes());
frame.extend_from_slice(uncompressed_data);
frame.extend_from_slice(&0u32.to_le_bytes());
frame
}
#[test]
fn test_lizard_uncompressed_frame() {
let original = b"Hello, World! This is a test of uncompressed Lizard data.";
let frame = create_minimal_lizard_frame(original);
let mut decoder = LizardDecoder::new(Cursor::new(frame));
let mut output = Vec::new();
decoder
.read_to_end(&mut output)
.expect("Failed to decompress");
assert_eq!(output, original);
}
#[test]
fn test_lizard_encoder_roundtrip_small() {
let original = b"Hello, World!";
let mut compressed = Vec::new();
{
let mut encoder = LizardEncoder::new(&mut compressed, LizardEncoderOptions::default());
encoder.write_all(original).unwrap();
encoder.try_finish().unwrap();
}
let mut decoder = LizardDecoder::new(Cursor::new(&compressed));
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed).unwrap();
assert_eq!(decompressed, original);
}
#[test]
fn test_lizard_encoder_roundtrip_medium() {
let original: Vec<u8> = b"ABCDEFGHIJKLMNOP".repeat(100);
let mut compressed = Vec::new();
{
let mut encoder = LizardEncoder::new(&mut compressed, LizardEncoderOptions::default());
encoder.write_all(&original).unwrap();
encoder.try_finish().unwrap();
}
let mut decoder = LizardDecoder::new(Cursor::new(&compressed));
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed).unwrap();
assert_eq!(decompressed, original);
}
#[test]
fn test_lizard_encoder_empty_input() {
let mut compressed = Vec::new();
{
let encoder = LizardEncoder::new(&mut compressed, LizardEncoderOptions::default());
encoder.try_finish().unwrap();
}
assert!(!compressed.is_empty());
let mut decoder = LizardDecoder::new(Cursor::new(&compressed));
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed).unwrap();
assert!(decompressed.is_empty());
}
}