1use huff_coding::prelude::{
2 compress_with_tree,
3 ByteWeights,
4 HuffTree,
5};
6
7use super::{
8 utils,
9 error::{
10 Error,
11 ErrorKind
12 }
13};
14
15use std::{
16 fs::File,
17 convert::TryInto,
18 path::PathBuf,
19 io::{
20 BufReader,
21 BufWriter,
22 Read,
23 Write,
24 Seek,
25 SeekFrom,
26 },
27};
28
29pub fn read_compress_write(src_path: &PathBuf, dst_path: &PathBuf, block_size: usize) -> Result<(), Error>{
33 let src = File::open(src_path)?;
35 let mut src_bytes_left = src.metadata().unwrap().len() as usize;
36 let mut reader = BufReader::new(src);
37
38 let dst = File::create(dst_path)?;
40 let mut writer = BufWriter::new(dst);
41
42 let mut buf = vec![0; block_size];
44
45 let tree = huff_tree_from_reader(&mut reader, &mut src_bytes_left.clone(), &mut buf);
47 let tree_bin = tree.as_bin();
48 let tree_bin_padding = utils::calc_padding_bits(tree_bin.len());
49 let tree_bin_bytes = tree_bin.into_vec();
50
51 reader.seek(SeekFrom::Start(0))?;
53
54 writer.write_all(&[0])?;
56 writer.write_all(&(tree_bin_bytes.len() as u32).to_be_bytes())?;
58 writer.write_all(&tree_bin_bytes)?;
60 let comp_padding =
62 compress_to_writer(
63 &mut reader, &mut writer,
64 &mut src_bytes_left, &mut buf,
65 tree
66 )?;
67
68 writer.seek(SeekFrom::Start(0))?;
70 writer.write_all(&[(tree_bin_padding << 4) + comp_padding])?;
71
72 writer.flush()?;
73 Ok(())
74}
75
76pub fn read_decompress_write(src_path: &PathBuf, dst_path: &PathBuf, block_size: usize) -> Result<(), Error>{
80 let src = File::open(src_path)?;
82 let mut src_bytes_left = src.metadata().unwrap().len() as usize;
83 let reader = BufReader::new(src);
84
85 let dst = File::create(dst_path)?;
87 let mut writer = BufWriter::new(dst);
88
89 let mut buf = vec![0; block_size];
91
92 let mut reader = reader.take(5);
94 let bytes_read = reader.read(&mut buf)?;
95 if bytes_read < 5{
96 return Err(Error::new(
97 format!("{:?} too short to decompress, missing header information", src_path),
98 ErrorKind::MissingHeaderInfo
99 ))
100 }
101 src_bytes_left -= 5;
102
103 let padding = buf[0];
105 let tree_padding_bits = padding >> 4;
106 let data_padding_bits = padding & 0b0000_1111;
107 if tree_padding_bits > 7 || data_padding_bits > 7{
108 return Err(Error::new(
109 format!("{:?} stores invalid header information", src_path),
110 ErrorKind::InvalidHeaderInfo
111 ))
112 }
113 let tree_len = u32::from_be_bytes(
115 buf[1..5]
116 .try_into()
117 .unwrap()
118 ) as usize;
119
120 reader.set_limit(tree_len as u64);
122 let bytes_read = reader.read(&mut buf)?;
123 if bytes_read < tree_len{
124 return Err(Error::new(
125 format!("{:?} too short to decompress, missing header information", src_path),
126 ErrorKind::MissingHeaderInfo
127 ))
128 }
129 src_bytes_left -= tree_len;
130
131 let tree = match huff_coding::prelude::HuffTree::<u8>::try_from_bin({
133 let mut b = huff_coding::bitvec::prelude::BitVec::from_vec(
134 buf[..tree_len]
135 .to_vec()
136 );
137 for _ in 0..tree_padding_bits{b.pop();}
138 b
139 }){
140 Ok(tree) => tree,
141 Err(_) => return Err(Error::new(
142 format!("{:?} stores invalid header information", src_path),
143 ErrorKind::InvalidHeaderInfo
144 ))
145 };
146
147 let mut reader = reader.into_inner();
149 decompress_to_writer(
150 &mut reader, &mut writer,
151 &mut src_bytes_left, &mut buf,
152 tree, data_padding_bits
153 )?;
154
155 writer.flush()?;
156 Ok(())
157}
158
159pub fn huff_tree_from_reader<R: Read>(reader: &mut R, reader_bytes_left: &mut usize, buf: &mut [u8]) -> HuffTree<u8>{
162 let mut bw = ByteWeights::new();
163 while reader.read_exact(buf).is_ok(){
164 bw += ByteWeights::threaded_from_bytes(&buf, 12);
165 *reader_bytes_left -= buf.len();
166 }
167 if *reader_bytes_left > 0{
168 bw += ByteWeights::threaded_from_bytes(&buf[..*reader_bytes_left], 12);
169 }
170
171 HuffTree::from_weights(bw)
172}
173
174fn compress_to_writer<R: Read, W: Write + Seek>(
178 reader: &mut R, writer: &mut W,
179 reader_bytes_left: &mut usize, buf: &mut [u8],
180 tree: HuffTree<u8>) -> Result<u8, Error>{
181 let mut tree = tree;
182
183 let mut prev_byte = 0;
184 let mut prev_padding = 0;
185 macro_rules! comp_data_from {
188 ($buf:expr) => {{
189 let (mut comp_bytes, padding_bits, huff_tree) =
191 compress_with_tree($buf, tree.clone())
192 .unwrap()
193 .into_inner();
194 if prev_padding != 0{
197 writer.seek(SeekFrom::Current(-1)).unwrap();
198
199 comp_bytes = utils::offset_bytes(&comp_bytes, prev_padding as usize);
200 comp_bytes[0] |= prev_byte
201 }
202
203 (comp_bytes, padding_bits, huff_tree)
204 }};
205 }
206 while reader.read_exact(buf).is_ok(){
208 let (comp_bytes, padding_bits, huff_tree) = comp_data_from!(&buf);
209 writer.write_all(&comp_bytes)?;
210
211 prev_padding = padding_bits;
212 prev_byte = comp_bytes[comp_bytes.len() - 1];
213 tree = huff_tree;
214
215 *reader_bytes_left -= buf.len();
216 }
217 if *reader_bytes_left > 0{
219 let (comp_bytes, padding_bits, _) = comp_data_from!(&buf[..*reader_bytes_left]);
220 writer.write_all(&comp_bytes)?;
221
222 prev_padding = padding_bits;
223 }
224
225 Ok(prev_padding)
227}
228
229fn decompress_to_writer<R: Read, W: Write>(
233 reader: &mut R, writer: &mut W,
234 reader_bytes_left: &mut usize, buf: &mut [u8],
235 tree: HuffTree<u8>, padding_bits: u8) -> Result<(), Error>{
236
237 let mut decomp_buf = Vec::new();
240 let mut current_branch = tree.root();
241 macro_rules! read_codes_in_byte {
242 ($byte: expr;[$bitrange:expr]) => {
243 for bit_ptr in $bitrange{
244 if current_branch.has_children(){
245 match ($byte >> (7 - bit_ptr)) & 1 == 1{
246 true =>{
247 current_branch = current_branch.right_child().unwrap();
248 }
249 false =>{
250 current_branch = current_branch.left_child().unwrap();
251 }
252 }
253 }
254 if !current_branch.has_children(){
255 decomp_buf.push(current_branch.leaf().letter().unwrap().clone());
256 current_branch = tree.root();
257 }
258 }
259 };
260 }
261 while reader.read_exact(buf).is_ok(){
263 for byte in &buf[..]{
264 read_codes_in_byte!(byte;[0..8]);
265 }
266 writer.write_all(&decomp_buf)?;
267 decomp_buf.clear();
268 *reader_bytes_left -= buf.len();
269 }
270 if *reader_bytes_left > 0{
273 for byte in &buf[..*reader_bytes_left - 1]{
274 read_codes_in_byte!(byte;[0..8]);
275 }
276 read_codes_in_byte!(buf[*reader_bytes_left - 1];[0..8 - padding_bits]);
277 writer.write_all(&decomp_buf)?;
278 }
279 Ok(())
280}