#![cfg_attr(docsrs, doc(cfg(feature = "lz5")))]
use alloc::vec::Vec;
use crate::error::Error;
use crate::traits::{Algorithm, RawDecoder, RawEncoder, RawProgress};
mod block;
mod xxh32;
pub use block::Lz4ModeDecoder;
pub const MAGIC: u32 = 0x184D2206;
pub const DEFAULT_BLOCK_SIZE: usize = 128 * 1024;
const MAX_BLOCK_SIZE: usize = 256 * 1024 * 1024;
#[derive(Debug, Clone, Copy, Default)]
pub struct Lz5;
impl Algorithm for Lz5 {
const NAME: &'static str = "lz5";
type Encoder = Encoder;
type Decoder = Decoder;
type EncoderConfig = ();
type DecoderConfig = ();
fn encoder_with(_: ()) -> Encoder {
Encoder::new()
}
fn decoder_with(_: ()) -> Decoder {
Decoder::new()
}
}
const FLG_VERSION_MASK: u8 = 0b1100_0000;
const FLG_VERSION_01: u8 = 0b0100_0000;
const FLG_BLOCK_INDEPENDENCE: u8 = 0b0010_0000;
const FLG_BLOCK_CHECKSUM: u8 = 0b0001_0000;
const FLG_CONTENT_SIZE: u8 = 0b0000_1000;
const FLG_CONTENT_CHECKSUM: u8 = 0b0000_0100;
const FLG_RESERVED_MASK: u8 = 0b0000_0011;
const BD_RESERVED_MASK: u8 = 0b1000_1111;
const BD_BLOCK_MAXSIZE_SHIFT: u32 = 4;
const BLOCK_UNCOMPRESSED_FLAG: u32 = 0x8000_0000;
const BLOCK_SIZE_MASK: u32 = 0x7FFF_FFFF;
const fn block_size_for_bd_code(code: u8) -> Option<usize> {
match code {
1 => Some(128 * 1024),
2 => Some(256 * 1024),
3 => Some(1024 * 1024),
4 => Some(4 * 1024 * 1024),
5 => Some(16 * 1024 * 1024),
6 => Some(64 * 1024 * 1024),
7 => Some(256 * 1024 * 1024),
_ => None,
}
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum EncPhase {
Header,
Buffering,
Flushing,
EndMark,
Done,
}
pub struct Encoder {
raw: Vec<u8>,
staged: Vec<u8>,
staged_idx: usize,
header: [u8; 7],
header_idx: u8,
endmark_idx: u8,
phase: EncPhase,
}
impl Encoder {
pub fn new() -> Self {
let mut enc = Self {
raw: Vec::with_capacity(DEFAULT_BLOCK_SIZE),
staged: Vec::with_capacity(DEFAULT_BLOCK_SIZE + 4),
staged_idx: 0,
header: [0; 7],
header_idx: 0,
endmark_idx: 0,
phase: EncPhase::Header,
};
enc.build_header();
enc
}
fn build_header(&mut self) {
let magic = MAGIC.to_le_bytes();
let flg = FLG_VERSION_01 | FLG_BLOCK_INDEPENDENCE;
let bd = (1u8) << BD_BLOCK_MAXSIZE_SHIFT; self.header[0..4].copy_from_slice(&magic);
self.header[4] = flg;
self.header[5] = bd;
let hc = (xxh32::xxh32(&[flg, bd], 0) >> 8) as u8;
self.header[6] = hc;
}
fn build_block(&mut self) {
if self.raw.is_empty() {
return;
}
let size = self.raw.len() as u32 | BLOCK_UNCOMPRESSED_FLAG;
self.staged.clear();
self.staged.extend_from_slice(&size.to_le_bytes());
self.staged.extend_from_slice(&self.raw);
self.raw.clear();
self.staged_idx = 0;
self.phase = EncPhase::Flushing;
}
fn drain_header(&mut self, output: &mut [u8], written: &mut usize) {
while (self.header_idx as usize) < self.header.len() && *written < output.len() {
output[*written] = self.header[self.header_idx as usize];
self.header_idx += 1;
*written += 1;
}
if (self.header_idx as usize) == self.header.len() {
self.phase = EncPhase::Buffering;
}
}
fn drain_staged(&mut self, output: &mut [u8], written: &mut usize) {
let avail = self.staged.len() - self.staged_idx;
let space = output.len() - *written;
let n = avail.min(space);
if n > 0 {
output[*written..*written + n]
.copy_from_slice(&self.staged[self.staged_idx..self.staged_idx + n]);
self.staged_idx += n;
*written += n;
}
if self.staged_idx == self.staged.len() {
self.staged.clear();
self.staged_idx = 0;
self.phase = EncPhase::Buffering;
}
}
fn drain_endmark(&mut self, output: &mut [u8], written: &mut usize) {
while self.endmark_idx < 4 && *written < output.len() {
output[*written] = 0;
*written += 1;
self.endmark_idx += 1;
}
if self.endmark_idx == 4 {
self.phase = EncPhase::Done;
}
}
}
impl Default for Encoder {
fn default() -> Self {
Self::new()
}
}
impl RawEncoder for Encoder {
fn raw_encode(&mut self, input: &[u8], output: &mut [u8]) -> Result<RawProgress, Error> {
let mut consumed = 0usize;
let mut written = 0usize;
loop {
match self.phase {
EncPhase::Header => {
self.drain_header(output, &mut written);
if self.phase == EncPhase::Header {
return Ok(RawProgress {
consumed,
written,
done: false,
});
}
}
EncPhase::Flushing => {
self.drain_staged(output, &mut written);
if self.phase == EncPhase::Flushing {
return Ok(RawProgress {
consumed,
written,
done: false,
});
}
}
EncPhase::Buffering => {
if consumed == input.len() {
return Ok(RawProgress {
consumed,
written,
done: false,
});
}
let room = DEFAULT_BLOCK_SIZE - self.raw.len();
let take = (input.len() - consumed).min(room);
self.raw
.extend_from_slice(&input[consumed..consumed + take]);
consumed += take;
if self.raw.len() == DEFAULT_BLOCK_SIZE {
self.build_block();
} else {
return Ok(RawProgress {
consumed,
written,
done: false,
});
}
}
EncPhase::EndMark | EncPhase::Done => {
return Ok(RawProgress {
consumed,
written,
done: false,
});
}
}
}
}
fn raw_finish(&mut self, output: &mut [u8]) -> Result<RawProgress, Error> {
let mut written = 0usize;
loop {
match self.phase {
EncPhase::Header => {
self.drain_header(output, &mut written);
if self.phase == EncPhase::Header {
return Ok(RawProgress {
consumed: 0,
written,
done: false,
});
}
}
EncPhase::Buffering => {
if !self.raw.is_empty() {
self.build_block();
} else {
self.phase = EncPhase::EndMark;
}
}
EncPhase::Flushing => {
self.drain_staged(output, &mut written);
if self.phase == EncPhase::Flushing {
return Ok(RawProgress {
consumed: 0,
written,
done: false,
});
}
}
EncPhase::EndMark => {
self.drain_endmark(output, &mut written);
if self.phase == EncPhase::EndMark {
return Ok(RawProgress {
consumed: 0,
written,
done: false,
});
}
}
EncPhase::Done => {
return Ok(RawProgress {
consumed: 0,
written,
done: true,
});
}
}
}
}
fn raw_reset(&mut self) {
self.raw.clear();
self.staged.clear();
self.staged_idx = 0;
self.header_idx = 0;
self.endmark_idx = 0;
self.phase = EncPhase::Header;
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
enum DecPhase {
Header,
BlockSize,
RawBlock { remaining: usize },
CompressedBlock { block_len: usize, gathered: usize },
Draining,
ContentChecksum { idx: u8 },
Done,
}
pub struct Decoder {
header_buf: [u8; 15],
header_idx: u8,
header_expected: u8,
bs_buf: [u8; 4],
bs_idx: u8,
max_block_raw: usize,
expect_content_checksum: bool,
block_buf: Vec<u8>,
decoded: Vec<u8>,
decoded_idx: usize,
total_emitted: u64,
phase: DecPhase,
poisoned: bool,
}
impl Decoder {
pub fn new() -> Self {
Self {
header_buf: [0; 15],
header_idx: 0,
header_expected: 7,
bs_buf: [0; 4],
bs_idx: 0,
max_block_raw: DEFAULT_BLOCK_SIZE,
expect_content_checksum: false,
block_buf: Vec::new(),
decoded: Vec::new(),
decoded_idx: 0,
total_emitted: 0,
phase: DecPhase::Header,
poisoned: false,
}
}
fn finish_header(&mut self) -> Result<(), Error> {
let magic = u32::from_le_bytes([
self.header_buf[0],
self.header_buf[1],
self.header_buf[2],
self.header_buf[3],
]);
if magic != MAGIC {
return Err(Error::BadHeader);
}
let flg = self.header_buf[4];
let bd = self.header_buf[5];
if flg & FLG_VERSION_MASK != FLG_VERSION_01 {
return Err(Error::BadHeader);
}
if flg & FLG_RESERVED_MASK != 0 {
return Err(Error::BadHeader);
}
if flg & FLG_BLOCK_CHECKSUM != 0 {
return Err(Error::Unsupported);
}
if bd & BD_RESERVED_MASK != 0 {
return Err(Error::BadHeader);
}
let bd_code = (bd >> BD_BLOCK_MAXSIZE_SHIFT) & 0b0111;
let max_block = block_size_for_bd_code(bd_code).ok_or(Error::BadHeader)?;
self.max_block_raw = max_block;
self.expect_content_checksum = (flg & FLG_CONTENT_CHECKSUM) != 0;
let hc_offset = if flg & FLG_CONTENT_SIZE != 0 { 14 } else { 6 };
let descriptor = &self.header_buf[4..hc_offset];
let expected_hc = (xxh32::xxh32(descriptor, 0) >> 8) as u8;
if self.header_buf[hc_offset] != expected_hc {
return Err(Error::ChecksumMismatch);
}
Ok(())
}
fn drain_decoded(&mut self, output: &mut [u8], written: &mut usize) {
let avail = self.decoded.len() - self.decoded_idx;
let space = output.len() - *written;
let n = avail.min(space);
if n > 0 {
output[*written..*written + n]
.copy_from_slice(&self.decoded[self.decoded_idx..self.decoded_idx + n]);
self.decoded_idx += n;
*written += n;
}
if self.decoded_idx == self.decoded.len() {
self.decoded.clear();
self.decoded_idx = 0;
self.phase = DecPhase::BlockSize;
self.bs_idx = 0;
}
}
}
impl Default for Decoder {
fn default() -> Self {
Self::new()
}
}
impl RawDecoder for Decoder {
fn raw_decode(&mut self, input: &[u8], output: &mut [u8]) -> Result<RawProgress, Error> {
if self.poisoned {
return Err(Error::Corrupt);
}
let mut consumed = 0usize;
let mut written = 0usize;
loop {
match self.phase {
DecPhase::Header => {
while (self.header_idx as usize) < self.header_expected as usize
&& consumed < input.len()
{
self.header_buf[self.header_idx as usize] = input[consumed];
self.header_idx += 1;
consumed += 1;
if self.header_idx == 5 {
let flg = self.header_buf[4];
if flg & FLG_VERSION_MASK != FLG_VERSION_01
|| flg & FLG_RESERVED_MASK != 0
{
self.poisoned = true;
return Err(Error::BadHeader);
}
self.header_expected = if flg & FLG_CONTENT_SIZE != 0 { 15 } else { 7 };
}
}
if (self.header_idx as usize) < self.header_expected as usize {
return Ok(RawProgress {
consumed,
written,
done: false,
});
}
if let Err(e) = self.finish_header() {
self.poisoned = true;
return Err(e);
}
self.phase = DecPhase::BlockSize;
self.bs_idx = 0;
}
DecPhase::BlockSize => {
while self.bs_idx < 4 && consumed < input.len() {
self.bs_buf[self.bs_idx as usize] = input[consumed];
self.bs_idx += 1;
consumed += 1;
}
if self.bs_idx < 4 {
return Ok(RawProgress {
consumed,
written,
done: false,
});
}
let bs = u32::from_le_bytes(self.bs_buf);
if bs == 0 {
if self.expect_content_checksum {
self.phase = DecPhase::ContentChecksum { idx: 0 };
} else {
self.phase = DecPhase::Done;
return Ok(RawProgress {
consumed,
written,
done: true,
});
}
continue;
}
let payload_len = (bs & BLOCK_SIZE_MASK) as usize;
let uncompressed = (bs & BLOCK_UNCOMPRESSED_FLAG) != 0;
if payload_len > self.max_block_raw && !uncompressed {
if payload_len > self.max_block_raw.saturating_mul(2) + 32 {
self.poisoned = true;
return Err(Error::Corrupt);
}
}
if payload_len > MAX_BLOCK_SIZE {
self.poisoned = true;
return Err(Error::Corrupt);
}
if uncompressed {
if payload_len > self.max_block_raw {
self.poisoned = true;
return Err(Error::Corrupt);
}
self.decoded.clear();
self.decoded.reserve(payload_len);
self.decoded_idx = 0;
self.phase = DecPhase::RawBlock {
remaining: payload_len,
};
} else {
self.block_buf.clear();
self.block_buf.reserve(payload_len);
self.phase = DecPhase::CompressedBlock {
block_len: payload_len,
gathered: 0,
};
}
}
DecPhase::RawBlock { mut remaining } => {
let avail = input.len() - consumed;
let take = remaining.min(avail);
if take > 0 {
self.decoded
.extend_from_slice(&input[consumed..consumed + take]);
consumed += take;
remaining -= take;
}
if remaining > 0 {
self.phase = DecPhase::RawBlock { remaining };
return Ok(RawProgress {
consumed,
written,
done: false,
});
}
self.total_emitted =
self.total_emitted.saturating_add(self.decoded.len() as u64);
self.decoded_idx = 0;
self.phase = DecPhase::Draining;
}
DecPhase::CompressedBlock {
block_len,
mut gathered,
} => {
let need = block_len - gathered;
let avail = input.len() - consumed;
let take = need.min(avail);
if take > 0 {
self.block_buf
.extend_from_slice(&input[consumed..consumed + take]);
consumed += take;
gathered += take;
}
if gathered < block_len {
self.phase = DecPhase::CompressedBlock {
block_len,
gathered,
};
return Ok(RawProgress {
consumed,
written,
done: false,
});
}
self.decoded.clear();
if let Err(e) = block::decode_compressed_block(
&self.block_buf,
&mut self.decoded,
self.max_block_raw,
) {
self.poisoned = true;
return Err(e);
}
if self.decoded.len() > self.max_block_raw {
self.poisoned = true;
return Err(Error::Corrupt);
}
self.total_emitted =
self.total_emitted.saturating_add(self.decoded.len() as u64);
self.decoded_idx = 0;
self.phase = DecPhase::Draining;
}
DecPhase::Draining => {
self.drain_decoded(output, &mut written);
if self.phase == DecPhase::Draining {
return Ok(RawProgress {
consumed,
written,
done: false,
});
}
}
DecPhase::ContentChecksum { mut idx } => {
while idx < 4 && consumed < input.len() {
consumed += 1;
idx += 1;
}
if idx < 4 {
self.phase = DecPhase::ContentChecksum { idx };
return Ok(RawProgress {
consumed,
written,
done: false,
});
}
self.phase = DecPhase::Done;
return Ok(RawProgress {
consumed,
written,
done: true,
});
}
DecPhase::Done => {
return Ok(RawProgress {
consumed,
written,
done: true,
});
}
}
}
}
fn raw_finish(&mut self, output: &mut [u8]) -> Result<RawProgress, Error> {
if self.poisoned {
return Err(Error::Corrupt);
}
let mut written = 0usize;
if self.phase == DecPhase::Draining {
self.drain_decoded(output, &mut written);
if self.phase == DecPhase::Draining {
return Ok(RawProgress {
consumed: 0,
written,
done: false,
});
}
}
match self.phase {
DecPhase::Done => Ok(RawProgress {
consumed: 0,
written,
done: true,
}),
DecPhase::BlockSize if self.bs_idx == 0 => {
Ok(RawProgress {
consumed: 0,
written,
done: true,
})
}
_ => Err(Error::UnexpectedEnd),
}
}
fn raw_reset(&mut self) {
self.header_buf = [0; 15];
self.header_idx = 0;
self.header_expected = 7;
self.bs_buf = [0; 4];
self.bs_idx = 0;
self.max_block_raw = DEFAULT_BLOCK_SIZE;
self.expect_content_checksum = false;
self.block_buf.clear();
self.decoded.clear();
self.decoded_idx = 0;
self.total_emitted = 0;
self.phase = DecPhase::Header;
self.poisoned = false;
}
}