use std::io::{self, Cursor, Read};
use crate::burrows_wheeler::BwtEncoded;
mod burrows_wheeler;
mod file_format;
mod huffman;
mod move_to_front;
mod rle1;
mod rle2;
#[derive(Debug, thiserror::Error)]
pub enum CompressError {
#[error("I/O error: {0}")]
IOError(io::Error),
}
impl From<io::Error> for CompressError {
fn from(value: io::Error) -> Self {
CompressError::IOError(value)
}
}
#[derive(Debug, thiserror::Error)]
pub enum DecompressError {
#[error("I/O error: {0}")]
IOError(io::Error),
#[error("Unable to parse the bzip2 stream: {0}")]
Parse(#[from] file_format::DecodeError),
#[error("Failed to decode at a runlength step")]
RunLengthDecode,
#[error("Failed to decode at a burrows-wheeler step")]
BurrowsWheelerDecode(#[from] burrows_wheeler::DecodeError),
#[error("Failed to decode at a huffman code step")]
HuffmanDecode,
}
impl From<io::Error> for DecompressError {
fn from(value: io::Error) -> Self {
DecompressError::IOError(value)
}
}
impl From<rle1::Error> for DecompressError {
fn from(value: rle1::Error) -> Self {
match value {
rle1::Error::RunLengthInvalid(_) => DecompressError::RunLengthDecode,
rle1::Error::RunLengthTruncated => DecompressError::RunLengthDecode,
}
}
}
impl From<huffman::Error> for DecompressError {
fn from(_value: huffman::Error) -> Self {
Self::HuffmanDecode
}
}
pub fn compress<R>(mut data: R) -> Result<impl Read, CompressError>
where
R: Read,
{
let mut all_data = vec![];
data.read_to_end(&mut all_data)?;
let rle_data = rle1::encode(&all_data);
let burrows_wheeler_data = burrows_wheeler::encode(&rle_data);
let move_to_front_data = move_to_front::encode(&burrows_wheeler_data.data);
let rle2_data = rle2::encode(&move_to_front_data);
let _huffman_data = huffman::encode(&rle2_data);
let file_data = Vec::new();
let cursor = Cursor::new(file_data);
Ok(cursor)
}
pub fn decompress<R>(mut data: R) -> Result<impl Read, DecompressError>
where
R: Read,
{
let mut all_data = vec![];
let mut decompressed_data = vec![];
data.read_to_end(&mut all_data)?;
let un_file_data = file_format::decode(&all_data)?;
for block in &un_file_data {
let un_huffman_data = huffman::decode(block.symbols());
let un_rle2 = rle2::decode(&un_huffman_data);
let un_move_to_front_data = move_to_front::decode(&un_rle2, block.symbol_stack());
let un_burrows_wheeler_data = burrows_wheeler::decode(&BwtEncoded::new(
un_move_to_front_data,
block.origin_pointer(),
))?;
let mut un_rle_data = rle1::decode(&un_burrows_wheeler_data)?;
decompressed_data.append(&mut un_rle_data);
}
let cursor = Cursor::new(decompressed_data);
Ok(cursor)
}
#[cfg(test)]
mod tests {
use std::{
io::Write as _,
process::{Command, Stdio},
};
use super::*;
#[test]
fn can_we_read() {
let peter_piper = "If Peter Piper picked a peck of pickled peppers, where's the peck of pickled peppers Peter Piper picked?????";
for level in 1..=9 {
let mut child = Command::new("bzip2")
.arg("-c")
.arg(format!("-{level}"))
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
{
let mut stdin = child.stdin.take().unwrap();
stdin.write_all(peter_piper.as_bytes()).unwrap();
}
let bytes = child.wait_with_output().unwrap().stdout;
let mut data = decompress(&bytes[..]).expect("Cannot decompress test data");
let mut buffer = vec![];
let _num_bytes = data
.read_to_end(&mut buffer)
.expect("Cannot read decompressed data");
assert_eq!(std::str::from_utf8(&buffer).unwrap(), peter_piper);
}
}
#[test]
fn four_bytes() {
let sample = [0x04, 0x00, 0x00, 0x00];
let mut child = Command::new("bzip2")
.arg("-c")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
{
let mut stdin = child.stdin.take().unwrap();
stdin.write_all(&sample).unwrap();
}
let bytes = child.wait_with_output().unwrap().stdout;
let mut data = decompress(&bytes[..]).expect("Cannot decompress test data");
let mut buffer = vec![];
let _bytes = data
.read_to_end(&mut buffer)
.expect("Cannot read decompressed data");
assert_eq!(buffer, sample);
}
#[test]
fn parsing_selectors() {
let sample = [
0, 255, 255, 72, 255, 255, 255, 255, 0, 5, 2, 255, 7, 255, 0, 0, 0, 0, 1, 0, 2, 255, 0,
255, 255, 255, 255, 196, 251, 255, 183, 0, 0, 0, 182, 5, 0, 0, 255, 183, 0, 0, 0, 182,
5, 0, 0, 0,
];
let mut child = Command::new("bzip2")
.arg("-c")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.unwrap();
{
let mut stdin = child.stdin.take().unwrap();
stdin.write_all(&sample).unwrap();
}
let bytes = child.wait_with_output().unwrap().stdout;
let mut data = decompress(&bytes[..]).expect("Cannot decompress test data");
let mut buffer = vec![];
let _bytes = data
.read_to_end(&mut buffer)
.expect("Cannot read decompressed data");
assert_eq!(buffer, sample);
}
}