#[cfg(feature = "std")]
use crate::error::StreamResult;
use crate::error::{BufferResult, LzwError, LzwStatus, VectorResult};
use crate::{BitOrder, Code, StreamBuf, MAX_CODESIZE, MAX_ENTRIES, STREAM_BUF_SIZE};
use crate::alloc::{boxed::Box, vec, vec::Vec};
#[cfg(feature = "std")]
use std::io::{self, BufRead, Write};
pub struct Decoder {
state: Box<dyn Stateful + Send + 'static>,
}
#[cfg_attr(
not(feature = "std"),
deprecated = "This type is only useful with the `std` feature."
)]
#[cfg_attr(not(feature = "std"), allow(dead_code))]
pub struct IntoStream<'d, W> {
decoder: &'d mut Decoder,
writer: W,
buffer: Option<StreamBuf<'d>>,
default_size: usize,
}
#[cfg(feature = "async")]
pub struct IntoAsync<'d, W> {
decoder: &'d mut Decoder,
writer: W,
buffer: Option<StreamBuf<'d>>,
default_size: usize,
}
pub struct IntoVec<'d> {
decoder: &'d mut Decoder,
vector: &'d mut Vec<u8>,
}
trait Stateful {
fn advance(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult;
fn has_ended(&self) -> bool;
fn restart(&mut self);
fn reset(&mut self);
}
#[derive(Clone, Copy, Default)]
#[repr(C, packed)]
struct Link {
prev: u16,
first: u8,
}
#[derive(Clone)]
struct DerivationBase {
code: Code,
first: u8,
}
#[derive(Default)]
struct MsbBuffer {
bit_buffer: u64,
code_mask: u16,
code_size: u8,
bits: u8,
}
#[derive(Default)]
struct LsbBuffer {
bit_buffer: u64,
code_mask: u16,
code_size: u8,
bits: u8,
}
trait CodeBuffer {
fn new(min_size: u8) -> Self;
fn reset(&mut self, min_size: u8);
fn bump_code_size(&mut self);
fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code>;
fn refill_bits(&mut self, inp: &mut &[u8]);
fn peek_bits(&self, code: &mut [Code; BURST]) -> usize;
fn consume_bits(&mut self, code_cnt: u8);
fn max_code(&self) -> Code;
fn code_size(&self) -> u8;
}
trait CodegenConstants {
const YIELD_ON_FULL: bool;
}
pub(crate) struct NoYield;
pub(crate) struct YieldOnFull;
impl CodegenConstants for NoYield {
const YIELD_ON_FULL: bool = false;
}
impl CodegenConstants for YieldOnFull {
const YIELD_ON_FULL: bool = true;
}
struct DecodeState<CodeBuffer, Constants: CodegenConstants> {
min_size: u8,
table: Table,
buffer: Buffer,
last: Option<DerivationBase>,
next_code: Code,
clear_code: Code,
end_code: Code,
has_ended: bool,
is_tiff: bool,
implicit_reset: bool,
code_buffer: CodeBuffer,
#[allow(dead_code)]
constants: core::marker::PhantomData<Constants>,
}
const BURST: usize = 6;
struct Buffer {
bytes: Box<[u8]>,
read_mark: usize,
write_mark: usize,
pub(crate) reconstructed_another_code: bool,
}
const MASK: usize = MAX_ENTRIES - 1;
const STREAMING_Q: usize = 8;
struct Table {
suffixes: Box<[[u8; STREAMING_Q]; MAX_ENTRIES]>,
chain: Box<[Link; MAX_ENTRIES]>,
depths: Box<[u16; MAX_ENTRIES]>,
len: usize,
}
#[derive(Clone, Debug)]
pub struct Configuration {
order: BitOrder,
size: u8,
tiff: bool,
yield_on_full: bool,
}
impl Configuration {
pub fn new(order: BitOrder, size: u8) -> Self {
super::assert_decode_size(size);
Configuration {
order,
size,
tiff: false,
yield_on_full: false,
}
}
pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
super::assert_decode_size(size);
Configuration {
order,
size,
tiff: true,
yield_on_full: false,
}
}
pub fn with_yield_on_full_buffer(self, do_yield: bool) -> Self {
Configuration {
yield_on_full: do_yield,
..self
}
}
pub fn build(self) -> Decoder {
Decoder {
state: Decoder::from_configuration(&self),
}
}
}
impl Decoder {
pub fn new(order: BitOrder, size: u8) -> Self {
Configuration::new(order, size).build()
}
pub fn with_tiff_size_switch(order: BitOrder, size: u8) -> Self {
Configuration::with_tiff_size_switch(order, size).build()
}
fn from_configuration(configuration: &Configuration) -> Box<dyn Stateful + Send + 'static> {
macro_rules! make_state {
($buf:ty, $cgc:ty) => {{
let mut state = Box::new(DecodeState::<$buf, $cgc>::new(configuration.size));
state.is_tiff = configuration.tiff;
state as Box<dyn Stateful + Send + 'static>
}};
}
match (configuration.order, configuration.yield_on_full) {
(BitOrder::Lsb, false) => {
make_state!(LsbBuffer, NoYield)
}
(BitOrder::Lsb, true) => {
make_state!(LsbBuffer, YieldOnFull)
}
(BitOrder::Msb, false) => {
make_state!(MsbBuffer, NoYield)
}
(BitOrder::Msb, true) => {
make_state!(MsbBuffer, YieldOnFull)
}
}
}
pub fn decode_bytes(&mut self, inp: &[u8], out: &mut [u8]) -> BufferResult {
self.state.advance(inp, out)
}
pub fn decode(&mut self, data: &[u8]) -> Result<Vec<u8>, LzwError> {
let mut output = vec![];
self.into_vec(&mut output).decode_all(data).status?;
Ok(output)
}
#[cfg(feature = "std")]
pub fn into_stream<W: Write>(&mut self, writer: W) -> IntoStream<'_, W> {
IntoStream {
decoder: self,
writer,
buffer: None,
default_size: STREAM_BUF_SIZE,
}
}
#[cfg(feature = "async")]
pub fn into_async<W: futures::io::AsyncWrite>(&mut self, writer: W) -> IntoAsync<'_, W> {
IntoAsync {
decoder: self,
writer,
buffer: None,
default_size: STREAM_BUF_SIZE,
}
}
pub fn into_vec<'lt>(&'lt mut self, vec: &'lt mut Vec<u8>) -> IntoVec<'lt> {
IntoVec {
decoder: self,
vector: vec,
}
}
pub fn has_ended(&self) -> bool {
self.state.has_ended()
}
#[allow(dead_code)]
pub(crate) fn restart(&mut self) {
self.state.restart();
}
pub fn reset(&mut self) {
self.state.reset();
}
}
#[cfg(feature = "std")]
impl<'d, W: Write> IntoStream<'d, W> {
pub fn decode(&mut self, read: impl BufRead) -> StreamResult {
self.decode_part(read, false)
}
pub fn decode_all(mut self, read: impl BufRead) -> StreamResult {
self.decode_part(read, true)
}
pub fn set_buffer_size(&mut self, size: usize) {
assert_ne!(size, 0, "Attempted to set empty buffer");
self.default_size = size;
}
pub fn set_buffer(&mut self, buffer: &'d mut [u8]) {
assert_ne!(buffer.len(), 0, "Attempted to set empty buffer");
self.buffer = Some(StreamBuf::Borrowed(buffer));
}
fn decode_part(&mut self, mut read: impl BufRead, must_finish: bool) -> StreamResult {
let IntoStream {
decoder,
writer,
buffer,
default_size,
} = self;
enum Progress {
Ok,
Done,
}
let mut bytes_read = 0;
let mut bytes_written = 0;
let read_bytes = &mut bytes_read;
let write_bytes = &mut bytes_written;
let outbuf: &mut [u8] =
match buffer.get_or_insert_with(|| StreamBuf::Owned(vec![0u8; *default_size])) {
StreamBuf::Borrowed(slice) => slice,
StreamBuf::Owned(vec) => &mut *vec,
};
assert!(!outbuf.is_empty());
let once = move || {
let data = read.fill_buf()?;
let result = decoder.decode_bytes(data, &mut outbuf[..]);
*read_bytes += result.consumed_in;
*write_bytes += result.consumed_out;
read.consume(result.consumed_in);
let done = result.status.map_err(|err| {
io::Error::new(io::ErrorKind::InvalidData, &*format!("{:?}", err))
})?;
if let LzwStatus::NoProgress = done {
debug_assert_eq!(
result.consumed_out, 0,
"No progress means we have not decoded any data"
);
if must_finish {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"No more data but no end marker detected",
));
} else {
return Ok(Progress::Done);
}
}
writer.write_all(&outbuf[..result.consumed_out])?;
Ok(if let LzwStatus::Done = done {
Progress::Done
} else {
Progress::Ok
})
};
let status = core::iter::repeat_with(once)
.scan((), |(), result| match result {
Ok(Progress::Ok) => Some(Ok(())),
Err(err) => Some(Err(err)),
Ok(Progress::Done) => None,
})
.fuse()
.collect();
StreamResult {
bytes_read,
bytes_written,
status,
}
}
}
impl IntoVec<'_> {
pub fn decode(&mut self, read: &[u8]) -> VectorResult {
self.decode_part(read, false)
}
pub fn decode_all(mut self, read: &[u8]) -> VectorResult {
self.decode_part(read, true)
}
fn grab_buffer(&mut self) -> (&mut [u8], &mut Decoder) {
const CHUNK_SIZE: usize = 1 << 12;
let decoder = &mut self.decoder;
let length = self.vector.len();
self.vector.reserve(CHUNK_SIZE);
self.vector.resize(length + CHUNK_SIZE, 0u8);
(&mut self.vector[length..], decoder)
}
fn decode_part(&mut self, part: &[u8], must_finish: bool) -> VectorResult {
let mut result = VectorResult {
consumed_in: 0,
consumed_out: 0,
status: Ok(LzwStatus::Ok),
};
enum Progress {
Ok,
Done,
}
let read_bytes = &mut result.consumed_in;
let write_bytes = &mut result.consumed_out;
let mut data = part;
let once = move || {
let (outbuf, decoder) = self.grab_buffer();
let result = decoder.decode_bytes(data, &mut outbuf[..]);
*read_bytes += result.consumed_in;
*write_bytes += result.consumed_out;
data = &data[result.consumed_in..];
let unfilled = outbuf.len() - result.consumed_out;
let filled = self.vector.len() - unfilled;
self.vector.truncate(filled);
match result.status {
Err(err) => Err(err),
Ok(LzwStatus::NoProgress) if must_finish => Err(LzwError::InvalidCode),
Ok(LzwStatus::NoProgress) | Ok(LzwStatus::Done) => Ok(Progress::Done),
Ok(LzwStatus::Ok) => Ok(Progress::Ok),
}
};
let status: Result<(), _> = core::iter::repeat_with(once)
.scan((), |(), result| match result {
Ok(Progress::Ok) => Some(Ok(())),
Err(err) => Some(Err(err)),
Ok(Progress::Done) => None,
})
.fuse()
.collect();
if let Err(err) = status {
result.status = Err(err);
}
result
}
}
#[cfg(feature = "async")]
#[path = "decode_into_async.rs"]
mod impl_decode_into_async;
impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> {
fn new(min_size: u8) -> Self {
let mut pre_state = DecodeState {
min_size,
table: Table::new(),
buffer: Buffer::new(),
last: None,
clear_code: 1 << min_size,
end_code: (1 << min_size) + 1,
next_code: (1 << min_size) + 2,
has_ended: false,
is_tiff: false,
implicit_reset: true,
code_buffer: C::new(min_size),
constants: core::marker::PhantomData,
};
pre_state.bump_initial_code_size();
pre_state
}
fn init_tables(&mut self) {
self.code_buffer.reset(self.min_size);
self.next_code = (1 << self.min_size) + 2;
self.table.init(self.min_size);
self.bump_initial_code_size();
}
fn reset_tables(&mut self) {
self.code_buffer.reset(self.min_size);
self.next_code = (1 << self.min_size) + 2;
self.table.clear(self.min_size);
self.bump_initial_code_size();
}
fn bump_initial_code_size(&mut self) {
if self.end_code > self.code_buffer.max_code() - Code::from(self.is_tiff)
&& self.code_buffer.code_size() < MAX_CODESIZE
{
self.code_buffer.bump_code_size();
}
}
fn bump_post_initial_code_size(&mut self) {
if self.next_code > self.code_buffer.max_code()
&& self.code_buffer.code_size() < MAX_CODESIZE
{
self.code_buffer.bump_code_size();
}
}
}
impl<C: CodeBuffer, CgC: CodegenConstants> Stateful for DecodeState<C, CgC> {
fn has_ended(&self) -> bool {
self.has_ended
}
fn restart(&mut self) {
self.has_ended = false;
}
fn reset(&mut self) {
self.table.init(self.min_size);
self.next_code = (1 << self.min_size) + 2;
self.buffer.read_mark = 0;
self.buffer.write_mark = 0;
self.last = None;
self.restart();
self.code_buffer = CodeBuffer::new(self.min_size);
if self.next_code > self.code_buffer.max_code()
&& self.code_buffer.code_size() < MAX_CODESIZE
{
self.code_buffer.bump_code_size();
}
}
fn advance(&mut self, mut inp: &[u8], mut out: &mut [u8]) -> BufferResult {
if self.has_ended {
return BufferResult {
consumed_in: 0,
consumed_out: 0,
status: Ok(LzwStatus::Done),
};
}
let o_in = inp.len();
let o_out = out.len();
let mut code_link = None;
let mut status = Ok(LzwStatus::Ok);
self.buffer.reconstructed_another_code = false;
match self.last.take() {
None => {
match self.next_symbol(&mut inp) {
Some(code) if code > self.next_code => status = Err(LzwError::InvalidCode),
Some(code) if code == self.next_code => status = Err(LzwError::InvalidCode),
None => status = Ok(LzwStatus::NoProgress),
Some(init_code) => {
if init_code == self.clear_code {
self.init_tables();
} else if init_code == self.end_code {
self.has_ended = true;
status = Ok(LzwStatus::Done);
} else {
if self.table.is_empty() && self.implicit_reset {
self.init_tables();
} else if self.table.is_empty() {
status = Err(LzwError::InvalidCode);
}
if !self.table.is_empty() {
self.buffer.fill_reconstruct(&self.table, init_code);
self.bump_post_initial_code_size();
code_link = Some(DerivationBase {
code: init_code,
first: self.table.first_of(init_code),
});
}
}
}
}
}
Some(tup) => code_link = Some(tup),
};
let mut have_yet_to_decode_data = false;
if code_link.is_some() {
let remain = self.buffer.buffer();
if remain.len() > out.len() {
if out.is_empty() {
status = Ok(LzwStatus::NoProgress);
have_yet_to_decode_data = remain.is_empty();
} else {
out.copy_from_slice(&remain[..out.len()]);
self.buffer.consume(out.len());
out = &mut [];
}
} else if remain.is_empty() {
have_yet_to_decode_data = true;
} else {
let consumed = remain.len();
out[..consumed].copy_from_slice(remain);
self.buffer.consume(consumed);
out = &mut out[consumed..];
have_yet_to_decode_data = false;
}
}
let mut last_decoded: Option<&[u8]> = None;
if self.buffer.buffer().is_empty() {
let mut burst = [0; BURST];
let mut burst_byte = [0u8; BURST];
loop {
if CgC::YIELD_ON_FULL && out.is_empty() {
break;
}
let mut deriv = match code_link.take() {
Some(link) => link,
None => {
break;
}
};
self.refill_bits(&mut inp);
let cnt = self.code_buffer.peek_bits(&mut burst);
if cnt == 0 {
if have_yet_to_decode_data {
status = Ok(LzwStatus::NoProgress);
}
code_link = Some(deriv);
break;
}
debug_assert!(
self.table.len >= MAX_ENTRIES - usize::from(self.is_tiff)
|| self.code_buffer.max_code() - Code::from(self.is_tiff) >= self.next_code,
"Table: {}, code_size: {}, next_code: {}, table_condition: {}",
self.table.is_full(),
self.code_buffer.code_size(),
self.next_code,
self.code_buffer.max_code() - Code::from(self.is_tiff),
);
let mut burst_size = 0;
let size_switch_at = self.code_buffer.max_code() - Code::from(self.is_tiff);
let left_before_size_switch = size_switch_at.wrapping_sub(self.next_code);
let clear_code = self.clear_code;
let next_code = self.next_code;
let mut last_decoded_bytes = None;
let codes = &burst[..cnt];
let mut code_iter = codes.iter();
let mut broken_burst = true;
loop {
let Some(&read_code) = code_iter.next() else {
broken_burst = false;
break;
};
burst_size += 1;
if burst_size > usize::from(left_before_size_switch) {
break;
}
if read_code.wrapping_sub(clear_code) < 2 || read_code >= next_code {
break;
}
let len = self.table.code_len(read_code);
if out.len() < usize::from(len) {
break;
}
if CgC::YIELD_ON_FULL {
if out.len() == usize::from(len) {
break;
}
}
if read_code < clear_code {
let target = out.split_off_first_mut().unwrap();
burst_byte[burst_size - 1] = read_code as u8;
*target = burst_byte[burst_size - 1];
debug_assert_eq!(1, self.table.depths[read_code as usize]);
last_decoded_bytes = Some(core::slice::from_mut(target));
} else {
debug_assert_eq!(len, self.table.depths[read_code as usize]);
let chunk_len = len.div_ceil(8);
let slack = out.len() - usize::from(len);
let chunked = out.as_chunks_mut::<8>().0;
let target;
if chunked.len() >= usize::from(chunk_len) && slack >= 8 {
let relaxed = &mut chunked[..usize::from(chunk_len)];
burst_byte[burst_size - 1] =
self.table.reconstruct_simple(read_code, relaxed);
target = out.split_off_mut(..usize::from(len)).unwrap();
} else {
target = out.split_off_mut(..usize::from(len)).unwrap();
burst_byte[burst_size - 1] = self.table.reconstruct(read_code, target);
}
last_decoded_bytes = Some(target);
}
}
self.code_buffer.consume_bits(burst_size as u8);
debug_assert!(burst_size > 0);
have_yet_to_decode_data = false;
if !broken_burst {
if !self.table.is_full() {
self.next_code += cnt as u16;
self.table.derive_burst(&mut deriv, codes, &burst_byte[..]);
}
debug_assert!(self.table.is_full() || self.next_code <= size_switch_at);
code_link = Some(DerivationBase {
code: burst[cnt - 1],
first: burst_byte[cnt - 1],
});
last_decoded = last_decoded_bytes.map(|x| &*x);
continue;
}
let (&new_code, burst) = burst[..burst_size].split_last().unwrap();
if !self.table.is_full() {
self.next_code += burst_size as u16 - 1;
self.table.derive_burst(&mut deriv, burst, &burst_byte[..]);
}
if new_code == self.clear_code {
self.reset_tables();
last_decoded = None;
break;
}
if new_code == self.end_code {
self.has_ended = true;
status = Ok(LzwStatus::Done);
last_decoded = None;
break;
}
if new_code > self.next_code {
status = Err(LzwError::InvalidCode);
last_decoded = None;
break;
}
let have_next_code = new_code == self.next_code;
let required_len = if have_next_code {
self.table.code_len(deriv.code) + 1
} else {
self.table.code_len(new_code)
};
if have_next_code {
if last_decoded_bytes.is_some() {
last_decoded = last_decoded_bytes.map(|x| &*x);
}
}
let cha;
let is_in_buffer = usize::from(required_len) > out.len();
if is_in_buffer {
if have_next_code {
if let Some(last) = last_decoded.take() {
self.buffer.bytes[..last.len()].copy_from_slice(last);
self.buffer.write_mark = last.len();
self.buffer.read_mark = last.len();
}
cha = self.buffer.fill_cscsc();
} else {
last_decoded = None;
cha = self.buffer.fill_reconstruct(&self.table, new_code);
}
} else {
let (target, tail) = out.split_at_mut(usize::from(required_len));
out = tail;
if have_next_code {
let source = match last_decoded.take() {
Some(last) => last,
None => &self.buffer.bytes[..self.buffer.write_mark],
};
cha = source.first().copied().unwrap_or(0);
target[..source.len()].copy_from_slice(source);
target[source.len()..][0] = cha;
} else {
cha = self.table.reconstruct(new_code, target);
}
last_decoded = Some(target);
}
if !self.table.is_full() {
self.table.derive(&deriv, cha);
if self.next_code >= self.code_buffer.max_code() - Code::from(self.is_tiff)
&& self.code_buffer.code_size() < MAX_CODESIZE
{
self.bump_code_size();
}
self.next_code += 1;
}
code_link = Some(DerivationBase {
code: new_code,
first: cha,
});
if is_in_buffer || out.is_empty() {
break;
}
}
}
if let Some(tail) = last_decoded {
self.buffer.bytes[..tail.len()].copy_from_slice(tail);
self.buffer.write_mark = tail.len();
self.buffer.read_mark = tail.len();
}
if o_in > inp.len() || self.buffer.reconstructed_another_code {
if let Ok(LzwStatus::NoProgress) = status {
status = Ok(LzwStatus::Ok);
}
}
self.last = code_link;
BufferResult {
consumed_in: o_in.wrapping_sub(inp.len()),
consumed_out: o_out.wrapping_sub(out.len()),
status,
}
}
}
impl<C: CodeBuffer, CgC: CodegenConstants> DecodeState<C, CgC> {
fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
self.code_buffer.next_symbol(inp)
}
fn bump_code_size(&mut self) {
self.code_buffer.bump_code_size()
}
fn refill_bits(&mut self, inp: &mut &[u8]) {
self.code_buffer.refill_bits(inp)
}
}
impl CodeBuffer for MsbBuffer {
fn new(min_size: u8) -> Self {
MsbBuffer {
code_size: min_size + 1,
code_mask: (1u16 << (min_size + 1)) - 1,
bit_buffer: 0,
bits: 0,
}
}
fn reset(&mut self, min_size: u8) {
self.code_size = min_size + 1;
self.code_mask = (1 << self.code_size) - 1;
}
fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
if self.bits < self.code_size {
self.refill_bits(inp);
}
if self.bits < self.code_size {
return None;
}
let mask = u64::from(self.code_mask);
let rotbuf = self.bit_buffer.rotate_left(self.code_size.into());
self.bit_buffer = rotbuf & !mask;
self.bits -= self.code_size;
Some((rotbuf & mask) as u16)
}
fn bump_code_size(&mut self) {
self.code_size += 1;
self.code_mask = (self.code_mask << 1) | 1;
}
fn refill_bits(&mut self, inp: &mut &[u8]) {
let wish_count = (64 - self.bits) / 8;
let mut buffer = [0u8; 8];
let new_bits = match inp.get(..usize::from(wish_count)) {
Some(bytes) => {
buffer[..usize::from(wish_count)].copy_from_slice(bytes);
*inp = &inp[usize::from(wish_count)..];
wish_count * 8
}
None => {
let new_bits = inp.len() * 8;
buffer[..inp.len()].copy_from_slice(inp);
*inp = &[];
new_bits as u8
}
};
self.bit_buffer |= u64::from_be_bytes(buffer) >> self.bits;
self.bits += new_bits;
}
fn peek_bits(&self, code: &mut [Code; BURST]) -> usize {
let mut bit_buffer = self.bit_buffer;
let mask = u64::from(self.code_mask);
let mut consumed = 0;
let mut cnt = 0;
for b in code {
let consumed_after = consumed + self.code_size;
if consumed_after > self.bits {
break;
}
cnt += 1;
consumed = consumed_after;
let rotbuf = bit_buffer.rotate_left(self.code_size.into());
*b = (rotbuf & mask) as u16;
bit_buffer = rotbuf;
}
cnt
}
fn consume_bits(&mut self, code_cnt: u8) {
let bits = self.code_size * code_cnt;
debug_assert!(bits <= self.bits);
if bits >= self.bits {
self.bit_buffer = 0;
} else {
self.bit_buffer <<= bits;
}
self.bits = self.bits.wrapping_sub(bits);
}
fn max_code(&self) -> Code {
self.code_mask
}
fn code_size(&self) -> u8 {
self.code_size
}
}
impl CodeBuffer for LsbBuffer {
fn new(min_size: u8) -> Self {
LsbBuffer {
code_size: min_size + 1,
code_mask: (1u16 << (min_size + 1)) - 1,
bit_buffer: 0,
bits: 0,
}
}
fn reset(&mut self, min_size: u8) {
self.code_size = min_size + 1;
self.code_mask = (1 << self.code_size) - 1;
}
fn next_symbol(&mut self, inp: &mut &[u8]) -> Option<Code> {
if self.bits < self.code_size {
self.refill_bits(inp);
}
if self.bits < self.code_size {
return None;
}
let mask = u64::from(self.code_mask);
let code = self.bit_buffer & mask;
self.bit_buffer >>= self.code_size;
self.bits -= self.code_size;
Some(code as u16)
}
fn bump_code_size(&mut self) {
self.code_size += 1;
self.code_mask = (self.code_mask << 1) | 1;
}
fn refill_bits(&mut self, inp: &mut &[u8]) {
let wish_count = (64 - self.bits) / 8;
let mut buffer = [0u8; 8];
let new_bits = match inp.get(..usize::from(wish_count)) {
Some(bytes) => {
buffer[..usize::from(wish_count)].copy_from_slice(bytes);
*inp = &inp[usize::from(wish_count)..];
wish_count * 8
}
None => {
let new_bits = inp.len() * 8;
buffer[..inp.len()].copy_from_slice(inp);
*inp = &[];
new_bits as u8
}
};
self.bit_buffer |= u64::from_le_bytes(buffer) << self.bits;
self.bits += new_bits;
}
fn peek_bits(&self, code: &mut [Code; BURST]) -> usize {
let mut bit_buffer = self.bit_buffer;
let mask = u64::from(self.code_mask);
let mut consumed = 0;
let mut cnt = 0;
for b in code {
let consumed_after = consumed + self.code_size;
if consumed_after > self.bits {
break;
}
cnt += 1;
consumed = consumed_after;
*b = (bit_buffer & mask) as u16;
bit_buffer >>= self.code_size;
}
cnt
}
fn consume_bits(&mut self, code_cnt: u8) {
let bits = self.code_size * code_cnt;
debug_assert!(bits <= self.bits);
if bits >= self.bits {
self.bit_buffer = 0;
} else {
self.bit_buffer >>= bits;
}
self.bits = self.bits.wrapping_sub(bits);
}
fn max_code(&self) -> Code {
self.code_mask
}
fn code_size(&self) -> u8 {
self.code_size
}
}
impl Buffer {
fn new() -> Self {
Buffer {
bytes: vec![0; MAX_ENTRIES].into_boxed_slice(),
read_mark: 0,
write_mark: 0,
reconstructed_another_code: false,
}
}
fn fill_cscsc(&mut self) -> u8 {
self.bytes[self.write_mark] = self.bytes[0];
self.reconstructed_another_code = true;
self.write_mark += 1;
self.read_mark = 0;
self.bytes[0]
}
fn fill_reconstruct(&mut self, table: &Table, code: Code) -> u8 {
self.write_mark = 0;
self.read_mark = 0;
let depth = table.code_len(code);
let out = &mut self.bytes[..usize::from(depth)];
let last = table.reconstruct(code, out);
self.reconstructed_another_code = true;
self.write_mark = usize::from(depth);
last
}
fn buffer(&self) -> &[u8] {
&self.bytes[self.read_mark..self.write_mark]
}
fn consume(&mut self, amt: usize) {
self.read_mark += amt;
}
}
impl Table {
fn new() -> Self {
Table {
suffixes: boxed_arr(),
chain: boxed_arr(),
depths: boxed_arr(),
len: 0,
}
}
fn clear(&mut self, min_size: u8) {
self.len = usize::from(1u16 << u16::from(min_size)) + 2;
}
fn init(&mut self, min_size: u8) {
self.len = 0;
for i in 0..(1u16 << u16::from(min_size)) {
let idx = self.len & MASK;
self.suffixes[idx] = [i as u8, 0, 0, 0, 0, 0, 0, 0];
self.chain[idx] = Link::base(i as u8);
self.depths[idx] = 1;
self.len += 1;
}
for _ in 0..2 {
if self.len < MAX_ENTRIES {
let idx = self.len & MASK;
self.chain[idx] = Link::base(0);
self.depths[idx] = 0;
}
self.len += 1;
}
}
fn first_of(&self, code: Code) -> u8 {
self.chain[usize::from(code) & MASK].first
}
fn code_len(&self, code: Code) -> u16 {
self.depths[usize::from(code) & MASK]
}
fn is_empty(&self) -> bool {
self.len == 0
}
fn is_full(&self) -> bool {
self.len >= MAX_ENTRIES
}
fn derive(&mut self, from: &DerivationBase, byte: u8) {
debug_assert!(self.len < MAX_ENTRIES);
let idx = self.len & MASK;
let parent = usize::from(from.code) & MASK;
let parent_depth = self.depths[parent];
let pos = parent_depth as usize & (STREAMING_Q - 1);
let mut link = from.derive();
if pos > 0 {
self.suffixes[idx] = self.suffixes[parent];
self.suffixes[idx][pos] = byte;
link.prev = self.chain[parent].prev;
} else {
self.suffixes[idx] = [0u8; STREAMING_Q];
self.suffixes[idx][0] = byte;
}
self.depths[idx] = parent_depth + 1;
self.chain[idx] = link;
self.len += 1;
}
fn derive_burst(&mut self, from: &mut DerivationBase, burst: &[Code], first: &[u8]) {
let max = MAX_ENTRIES - self.len;
for (&code, &first_byte) in burst.iter().zip(first.iter()).take(max) {
self.derive(from, first_byte);
from.code = code;
from.first = first_byte;
}
}
fn reconstruct(&self, code: Code, out: &mut [u8]) -> u8 {
let o = out.len();
let code_index = usize::from(code) & MASK;
let suffix = self.suffixes[code_index];
if o <= STREAMING_Q {
Self::non_memcpy(out, suffix);
return self.chain[code_index].first;
}
let tail_len = ((o - 1) & (STREAMING_Q - 1)) + 1;
let tail_start = o - tail_len;
Self::non_memcpy(&mut out[tail_start..], suffix);
let first = self.chain[code_index].first;
let mut c = self.chain[code_index].previous_code();
for chunk in out[..tail_start].chunks_exact_mut(STREAMING_Q).rev() {
let code_index = usize::from(c) & MASK;
chunk.copy_from_slice(&self.suffixes[code_index]);
c = self.chain[code_index].previous_code();
}
first
}
fn reconstruct_simple(&self, code: Code, out: &mut [[u8; 8]]) -> u8 {
let code_index = usize::from(code) & MASK;
let suffix = self.suffixes[code_index];
let Some((last, prefix)) = out.split_last_mut() else {
return 0;
};
*last = suffix;
if prefix.is_empty() {
return self.chain[code_index].first;
}
let first = self.chain[code_index].first;
let mut c = self.chain[code_index].previous_code();
for chunk in prefix.iter_mut().rev() {
let code_index = usize::from(c) & MASK;
*chunk = self.suffixes[code_index];
c = self.chain[code_index].previous_code();
}
first
}
fn non_memcpy(out: &mut [u8], from: [u8; STREAMING_Q]) {
match out.len() {
0 => {}
1 => out[0] = from[0],
2 => out[..2].copy_from_slice(&from[..2]),
3 => out[..3].copy_from_slice(&from[..3]),
4 => out[..4].copy_from_slice(&from[..4]),
5 => out[..5].copy_from_slice(&from[..5]),
6 => out[..6].copy_from_slice(&from[..6]),
7 => out[..7].copy_from_slice(&from[..7]),
8 => out[..8].copy_from_slice(&from[..8]),
_ => unreachable!(),
}
}
}
fn boxed_arr<T: Clone + Default, const N: usize>() -> Box<[T; N]> {
use core::convert::TryInto;
vec![T::default(); N]
.into_boxed_slice()
.try_into()
.ok()
.unwrap()
}
impl Link {
fn previous_code(&self) -> u16 {
self.prev
}
fn base(byte: u8) -> Self {
Link {
prev: 0u16,
first: byte,
}
}
}
impl DerivationBase {
fn derive(&self) -> Link {
Link {
prev: self.code,
first: self.first,
}
}
}
#[cfg(test)]
mod tests {
use crate::alloc::vec::Vec;
#[cfg(feature = "std")]
use crate::StreamBuf;
use crate::{decode::Decoder, BitOrder};
#[test]
fn invalid_code_size_low() {
let _ = Decoder::new(BitOrder::Msb, 0);
let _ = Decoder::new(BitOrder::Msb, 1);
}
#[test]
#[should_panic]
fn invalid_code_size_high() {
let _ = Decoder::new(BitOrder::Msb, 14);
}
fn make_encoded() -> Vec<u8> {
const FILE: &'static [u8] = include_bytes!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/benches/binary-8-msb.lzw"
));
return Vec::from(FILE);
}
#[test]
#[cfg(feature = "std")]
fn into_stream_buffer_no_alloc() {
let encoded = make_encoded();
let mut decoder = Decoder::new(BitOrder::Msb, 8);
let mut output = vec![];
let mut buffer = [0; 512];
let mut istream = decoder.into_stream(&mut output);
istream.set_buffer(&mut buffer[..]);
istream.decode(&encoded[..]).status.unwrap();
match istream.buffer {
Some(StreamBuf::Borrowed(_)) => {}
None => panic!("Decoded without buffer??"),
Some(StreamBuf::Owned(_)) => panic!("Unexpected buffer allocation"),
}
}
#[test]
#[cfg(feature = "std")]
fn into_stream_buffer_small_alloc() {
struct WriteTap<W: std::io::Write>(W);
const BUF_SIZE: usize = 512;
impl<W: std::io::Write> std::io::Write for WriteTap<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
assert!(buf.len() <= BUF_SIZE);
self.0.write(buf)
}
fn flush(&mut self) -> std::io::Result<()> {
self.0.flush()
}
}
let encoded = make_encoded();
let mut decoder = Decoder::new(BitOrder::Msb, 8);
let mut output = vec![];
let mut istream = decoder.into_stream(WriteTap(&mut output));
istream.set_buffer_size(512);
istream.decode(&encoded[..]).status.unwrap();
match istream.buffer {
Some(StreamBuf::Owned(vec)) => assert!(vec.len() <= BUF_SIZE),
Some(StreamBuf::Borrowed(_)) => panic!("Unexpected borrowed buffer, where from?"),
None => panic!("Decoded without buffer??"),
}
}
#[test]
#[cfg(feature = "std")]
fn reset() {
let encoded = make_encoded();
let mut decoder = Decoder::new(BitOrder::Msb, 8);
let mut reference = None;
for _ in 0..2 {
let mut output = vec![];
let mut buffer = [0; 512];
let mut istream = decoder.into_stream(&mut output);
istream.set_buffer(&mut buffer[..]);
istream.decode_all(&encoded[..]).status.unwrap();
decoder.reset();
if let Some(reference) = &reference {
assert_eq!(output, *reference);
} else {
reference = Some(output);
}
}
}
#[test]
fn table_derive() {
let mut table = super::Table::new();
table.init(8);
let mut base = super::DerivationBase {
code: 1,
first: 0x1,
};
for i in 0..16 {
table.derive(&base, i);
base.code = (table.len - 1) as u16;
}
let last = (table.len - 1) as u16;
assert_eq!(table.first_of(last), 1);
assert_eq!(table.code_len(last), 17);
assert_eq!(table.suffixes[last as usize], [15, 0, 0, 0, 0, 0, 0, 0]);
assert_eq!(table.chain[last as usize].previous_code(), last - 1);
assert_eq!(
table.suffixes[last as usize - 1],
[7, 8, 9, 10, 11, 12, 13, 14]
);
assert_eq!(table.chain[last as usize - 1].previous_code(), last - 9);
assert_eq!(table.suffixes[last as usize - 9], [1, 0, 1, 2, 3, 4, 5, 6]);
assert_eq!(table.chain[last as usize - 9].previous_code(), 0);
let mut out = [0; 17];
table.reconstruct(last, &mut out);
assert_eq!(out[..8], [1, 0, 1, 2, 3, 4, 5, 6]);
assert_eq!(out[8..16], [7, 8, 9, 10, 11, 12, 13, 14]);
assert_eq!(out[16..], [15]);
out.fill(0x42);
table.reconstruct(last - 1, &mut out[..16]);
assert_eq!(out[..8], [1, 0, 1, 2, 3, 4, 5, 6]);
assert_eq!(out[8..16], [7, 8, 9, 10, 11, 12, 13, 14]);
}
}