1use std::io::{self, Cursor, Read};
9
10use crate::burrows_wheeler::BwtEncoded;
11
12mod burrows_wheeler;
13mod file_format;
14mod huffman;
15mod move_to_front;
16mod rle1;
17mod rle2;
18
19#[derive(Debug, thiserror::Error)]
21pub enum CompressError {
22 #[error("I/O error: {0}")]
24 IOError(io::Error),
25}
26
27impl From<io::Error> for CompressError {
28 fn from(value: io::Error) -> Self {
29 CompressError::IOError(value)
30 }
31}
32
33#[derive(Debug, thiserror::Error)]
35pub enum DecompressError {
36 #[error("I/O error: {0}")]
38 IOError(io::Error),
39 #[error("Unable to parse the bzip2 stream: {0}")]
41 Parse(#[from] file_format::DecodeError),
42 #[error("Failed to decode at a runlength step")]
44 RunLengthDecode,
45 #[error("Failed to decode at a burrows-wheeler step")]
47 BurrowsWheelerDecode(#[from] burrows_wheeler::DecodeError),
48 #[error("Failed to decode at a huffman code step")]
50 HuffmanDecode,
51}
52
53impl From<io::Error> for DecompressError {
54 fn from(value: io::Error) -> Self {
55 DecompressError::IOError(value)
56 }
57}
58
59impl From<rle1::Error> for DecompressError {
60 fn from(value: rle1::Error) -> Self {
61 match value {
62 rle1::Error::RunLengthInvalid(_) => DecompressError::RunLengthDecode,
63 rle1::Error::RunLengthTruncated => DecompressError::RunLengthDecode,
64 }
65 }
66}
67
68impl From<huffman::Error> for DecompressError {
69 fn from(_value: huffman::Error) -> Self {
70 Self::HuffmanDecode
71 }
72}
73
74pub fn compress<R>(mut data: R) -> Result<impl Read, CompressError>
76where
77 R: Read,
78{
79 let mut all_data = vec![];
80 data.read_to_end(&mut all_data)?;
81
82 let rle_data = rle1::encode(&all_data);
83 let burrows_wheeler_data = burrows_wheeler::encode(&rle_data);
86 let move_to_front_data = move_to_front::encode(&burrows_wheeler_data.data);
87 let rle2_data = rle2::encode(&move_to_front_data);
88 let _huffman_data = huffman::encode(&rle2_data);
89 let file_data = Vec::new();
92
93 let cursor = Cursor::new(file_data);
94
95 Ok(cursor)
96}
97
98pub fn decompress<R>(mut data: R) -> Result<impl Read, DecompressError>
104where
105 R: Read,
106{
107 let mut all_data = vec![];
108 let mut decompressed_data = vec![];
109 data.read_to_end(&mut all_data)?;
110
111 let un_file_data = file_format::decode(&all_data)?;
112
113 for block in &un_file_data {
114 let un_huffman_data = huffman::decode(block.symbols());
115 let un_rle2 = rle2::decode(&un_huffman_data);
116 let un_move_to_front_data = move_to_front::decode(&un_rle2, block.symbol_stack());
117 let un_burrows_wheeler_data = burrows_wheeler::decode(&BwtEncoded::new(
118 un_move_to_front_data,
119 block.origin_pointer(),
120 ))?;
121 let mut un_rle_data = rle1::decode(&un_burrows_wheeler_data)?;
122 decompressed_data.append(&mut un_rle_data);
123 }
124
125 let cursor = Cursor::new(decompressed_data);
126
127 Ok(cursor)
128}
129
130#[cfg(test)]
131mod tests {
132 use std::{
133 io::Write as _,
134 process::{Command, Stdio},
135 };
136
137 use super::*;
138
139 #[test]
140 fn can_we_read() {
141 let peter_piper = "If Peter Piper picked a peck of pickled peppers, where's the peck of pickled peppers Peter Piper picked?????";
142
143 for level in 1..=9 {
144 let mut child = Command::new("bzip2")
145 .arg("-c")
146 .arg(format!("-{level}"))
147 .stdin(Stdio::piped())
148 .stdout(Stdio::piped())
149 .spawn()
150 .unwrap();
151 {
152 let mut stdin = child.stdin.take().unwrap();
153 stdin.write_all(peter_piper.as_bytes()).unwrap();
154 }
155 let bytes = child.wait_with_output().unwrap().stdout;
156
157 let mut data = decompress(&bytes[..]).expect("Cannot decompress test data");
158
159 let mut buffer = vec![];
160 let _num_bytes = data
161 .read_to_end(&mut buffer)
162 .expect("Cannot read decompressed data");
163 assert_eq!(std::str::from_utf8(&buffer).unwrap(), peter_piper);
164 }
165 }
166
167 #[test]
169 fn four_bytes() {
170 let sample = [0x04, 0x00, 0x00, 0x00];
171
172 let mut child = Command::new("bzip2")
173 .arg("-c")
174 .stdin(Stdio::piped())
175 .stdout(Stdio::piped())
176 .spawn()
177 .unwrap();
178 {
179 let mut stdin = child.stdin.take().unwrap();
180 stdin.write_all(&sample).unwrap();
181 }
182 let bytes = child.wait_with_output().unwrap().stdout;
183
184 let mut data = decompress(&bytes[..]).expect("Cannot decompress test data");
185
186 let mut buffer = vec![];
187 let _bytes = data
188 .read_to_end(&mut buffer)
189 .expect("Cannot read decompressed data");
190 assert_eq!(buffer, sample);
191 }
192
193 #[test]
198 fn parsing_selectors() {
199 let sample = [
200 0, 255, 255, 72, 255, 255, 255, 255, 0, 5, 2, 255, 7, 255, 0, 0, 0, 0, 1, 0, 2, 255, 0,
201 255, 255, 255, 255, 196, 251, 255, 183, 0, 0, 0, 182, 5, 0, 0, 255, 183, 0, 0, 0, 182,
202 5, 0, 0, 0,
203 ];
204
205 let mut child = Command::new("bzip2")
206 .arg("-c")
207 .stdin(Stdio::piped())
208 .stdout(Stdio::piped())
209 .spawn()
210 .unwrap();
211 {
212 let mut stdin = child.stdin.take().unwrap();
213 stdin.write_all(&sample).unwrap();
214 }
215 let bytes = child.wait_with_output().unwrap().stdout;
216
217 let mut data = decompress(&bytes[..]).expect("Cannot decompress test data");
218
219 let mut buffer = vec![];
220 let _bytes = data
221 .read_to_end(&mut buffer)
222 .expect("Cannot read decompressed data");
223 assert_eq!(buffer, sample);
224 }
225}