#![cfg_attr(docsrs, doc(cfg(feature = "bwt")))]
extern crate alloc;
use alloc::vec::Vec;
use crate::error::Error;
use crate::traits::{Algorithm, RawDecoder, RawEncoder, RawProgress};
mod transform;
#[cfg(test)]
mod tests;
pub const DEFAULT_BLOCK_SIZE: usize = 256 * 1024;
pub const MIN_BLOCK_SIZE: usize = 1;
pub const MAX_BLOCK_SIZE: usize = 64 * 1024 * 1024;
#[derive(Debug, Clone, Copy, Default)]
pub struct Bwt;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub struct EncoderConfig {
pub block_size: usize,
}
impl Default for EncoderConfig {
fn default() -> Self {
Self {
block_size: DEFAULT_BLOCK_SIZE,
}
}
}
impl EncoderConfig {
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn with_block_size(mut self, block_size: usize) -> Self {
self.block_size = block_size;
self
}
}
impl Algorithm for Bwt {
const NAME: &'static str = "bwt";
type Encoder = Encoder;
type Decoder = Decoder;
type EncoderConfig = EncoderConfig;
type DecoderConfig = ();
fn encoder_with(cfg: EncoderConfig) -> Encoder {
Encoder::new(cfg.block_size)
}
fn decoder_with(_: ()) -> Decoder {
Decoder::new()
}
}
#[derive(Debug)]
pub struct Encoder {
block_size: usize,
input: Vec<u8>,
output: Vec<u8>,
out_cursor: usize,
finalized: bool,
}
impl Encoder {
pub fn new(block_size: usize) -> Self {
Self {
block_size: block_size.clamp(MIN_BLOCK_SIZE, MAX_BLOCK_SIZE),
input: Vec::new(),
output: Vec::new(),
out_cursor: 0,
finalized: false,
}
}
fn finalize(&mut self) {
for block in self.input.chunks(self.block_size) {
let (last_col, primary) = transform::forward(block);
let len = block.len() as u32;
self.output.extend_from_slice(&len.to_le_bytes());
self.output
.extend_from_slice(&(primary as u32).to_le_bytes());
self.output.extend_from_slice(&last_col);
}
}
}
impl RawEncoder for Encoder {
fn raw_encode(&mut self, input: &[u8], _output: &mut [u8]) -> Result<RawProgress, Error> {
self.input.extend_from_slice(input);
Ok(RawProgress {
consumed: input.len(),
written: 0,
done: false,
})
}
fn raw_finish(&mut self, output: &mut [u8]) -> Result<RawProgress, Error> {
if !self.finalized {
self.finalize();
self.finalized = true;
}
let remaining = self.output.len() - self.out_cursor;
let take = remaining.min(output.len());
output[..take].copy_from_slice(&self.output[self.out_cursor..self.out_cursor + take]);
self.out_cursor += take;
Ok(RawProgress {
consumed: 0,
written: take,
done: self.out_cursor >= self.output.len(),
})
}
fn raw_reset(&mut self) {
self.input.clear();
self.output.clear();
self.out_cursor = 0;
self.finalized = false;
}
}
#[derive(Debug)]
pub struct Decoder {
input: Vec<u8>,
output: Vec<u8>,
out_cursor: usize,
decoded: bool,
}
impl Default for Decoder {
fn default() -> Self {
Self::new()
}
}
impl Decoder {
pub fn new() -> Self {
Self {
input: Vec::new(),
output: Vec::new(),
out_cursor: 0,
decoded: false,
}
}
fn decode_all(&mut self) -> Result<(), Error> {
if self.decoded {
return Ok(());
}
let buf = &self.input[..];
let mut pos = 0usize;
while pos < buf.len() {
if buf.len() - pos < 8 {
return Err(Error::UnexpectedEnd);
}
let len =
u32::from_le_bytes([buf[pos], buf[pos + 1], buf[pos + 2], buf[pos + 3]]) as usize;
let primary =
u32::from_le_bytes([buf[pos + 4], buf[pos + 5], buf[pos + 6], buf[pos + 7]])
as usize;
pos += 8;
if len == 0 || primary >= len {
return Err(Error::Corrupt);
}
if buf.len() - pos < len {
return Err(Error::UnexpectedEnd);
}
let last_col = &buf[pos..pos + len];
pos += len;
transform::inverse(last_col, primary, &mut self.output)?;
}
self.decoded = true;
Ok(())
}
fn drain(&mut self, output: &mut [u8]) -> RawProgress {
let remaining = self.output.len() - self.out_cursor;
let take = remaining.min(output.len());
output[..take].copy_from_slice(&self.output[self.out_cursor..self.out_cursor + take]);
self.out_cursor += take;
RawProgress {
consumed: 0,
written: take,
done: self.out_cursor >= self.output.len(),
}
}
}
impl RawDecoder for Decoder {
fn raw_decode(&mut self, input: &[u8], output: &mut [u8]) -> Result<RawProgress, Error> {
if !self.decoded {
self.input.extend_from_slice(input);
return Ok(RawProgress {
consumed: input.len(),
written: 0,
done: false,
});
}
let p = self.drain(output);
Ok(RawProgress {
consumed: 0,
written: p.written,
done: p.done,
})
}
fn raw_finish(&mut self, output: &mut [u8]) -> Result<RawProgress, Error> {
self.decode_all()?;
Ok(self.drain(output))
}
fn raw_reset(&mut self) {
self.input.clear();
self.output.clear();
self.out_cursor = 0;
self.decoded = false;
}
}