huff/
comp.rs

1use huff_coding::prelude::{
2    compress_with_tree, 
3    ByteWeights, 
4    HuffTree,
5};
6
7use super::{
8    utils,
9    error::{
10        Error,
11        ErrorKind
12    }
13};
14
15use std::{
16    fs::File,
17    convert::TryInto,
18    path::PathBuf,
19    io::{
20        BufReader,
21        BufWriter,
22        Read,
23        Write,
24        Seek,
25        SeekFrom,
26    },
27};
28
29/// Read the the src file, compress it, and write the compressed data into dst file.
30/// 
31/// Chunk size means how many bytes will be read from src file at one time
32pub fn read_compress_write(src_path: &PathBuf, dst_path: &PathBuf, block_size: usize) -> Result<(), Error>{
33    // read from src file
34    let src = File::open(src_path)?;
35    let mut src_bytes_left = src.metadata().unwrap().len() as usize;
36    let mut reader = BufReader::new(src);
37
38    // write to dst file
39    let dst = File::create(dst_path)?;
40    let mut writer = BufWriter::new(dst);
41
42    // allocate a u8 buffer of size == block_size
43    let mut buf = vec![0; block_size];
44
45    // create a HuffTree from the src file bytes
46    let tree = huff_tree_from_reader(&mut reader, &mut src_bytes_left.clone(), &mut buf);
47    let tree_bin = tree.as_bin();
48    let tree_bin_padding = utils::calc_padding_bits(tree_bin.len());
49    let tree_bin_bytes = tree_bin.into_vec();
50
51    // return reader to start
52    reader.seek(SeekFrom::Start(0))?;
53
54    // write an empty byte, later to be filled by padding data
55    writer.write_all(&[0])?;
56    // write the tree_bin_bytes lenght as a 4 byte num
57    writer.write_all(&(tree_bin_bytes.len() as u32).to_be_bytes())?;
58    // write the HuffTree represented as bytes
59    writer.write_all(&tree_bin_bytes)?;
60    // compress and write compressed bytes, returning the number of bits used as padding
61    let comp_padding = 
62        compress_to_writer(
63            &mut reader, &mut writer, 
64            &mut src_bytes_left, &mut buf, 
65            tree
66        )?;
67
68    // return to the start of the file and set the padding bits
69    writer.seek(SeekFrom::Start(0))?;
70    writer.write_all(&[(tree_bin_padding << 4) + comp_padding])?;
71
72    writer.flush()?;
73    Ok(())
74}
75
76/// Read the src file, decompress it, and write the decompressed data into dst file.
77/// 
78/// Chunk size means how many bytes will be read from src file at one time
79pub fn read_decompress_write(src_path: &PathBuf, dst_path: &PathBuf, block_size: usize) -> Result<(), Error>{
80    // read from src file
81    let src = File::open(src_path)?;
82    let mut src_bytes_left = src.metadata().unwrap().len() as usize;
83    let reader = BufReader::new(src);
84
85    // write to dst file
86    let dst = File::create(dst_path)?;
87    let mut writer = BufWriter::new(dst);
88
89    // allocate a u8 buffer of size == block_size
90    let mut buf = vec![0; block_size];
91
92    // read only first 5 bytes
93    let mut reader = reader.take(5);
94    let bytes_read = reader.read(&mut buf)?;
95    if bytes_read < 5{
96        return Err(Error::new(
97            format!("{:?} too short to decompress, missing header information", src_path),
98            ErrorKind::MissingHeaderInfo
99        ))
100    }
101    src_bytes_left -= 5;
102
103    // read padding info from the first byte
104    let padding = buf[0];
105    let tree_padding_bits =  padding >> 4;
106    let data_padding_bits = padding & 0b0000_1111;
107    if tree_padding_bits > 7 || data_padding_bits > 7{
108        return Err(Error::new(
109            format!("{:?} stores invalid header information", src_path),
110            ErrorKind::InvalidHeaderInfo
111        ))
112    }
113    // read tree_bin's length
114    let tree_len = u32::from_be_bytes(
115        buf[1..5]
116        .try_into()
117        .unwrap()
118    ) as usize;
119    
120    // read only next tree_len bytes
121    reader.set_limit(tree_len as u64);
122    let bytes_read = reader.read(&mut buf)?;
123    if bytes_read < tree_len{
124        return Err(Error::new(
125            format!("{:?} too short to decompress, missing header information", src_path),
126            ErrorKind::MissingHeaderInfo
127        ))
128    }
129    src_bytes_left -= tree_len;
130
131    // read the HuffTree
132    let tree = match huff_coding::prelude::HuffTree::<u8>::try_from_bin({
133        let mut b = huff_coding::bitvec::prelude::BitVec::from_vec(
134            buf[..tree_len]
135            .to_vec()
136        );
137        for _ in 0..tree_padding_bits{b.pop();}
138        b
139    }){
140        Ok(tree) => tree,
141        Err(_) => return Err(Error::new(
142            format!("{:?} stores invalid header information", src_path), 
143            ErrorKind::InvalidHeaderInfo
144        ))
145    };
146
147    // decompress the remaining bytes
148    let mut reader = reader.into_inner();
149    decompress_to_writer(
150        &mut reader, &mut writer, 
151        &mut src_bytes_left, &mut buf,
152        tree, data_padding_bits
153    )?;
154
155    writer.flush()?;
156    Ok(())
157}
158
159/// Read bytes from reader, loading at most buf.len() bytes
160/// from it at one time, building a HuffTree from them
161pub fn huff_tree_from_reader<R: Read>(reader: &mut R, reader_bytes_left: &mut usize, buf: &mut [u8]) -> HuffTree<u8>{
162    let mut bw = ByteWeights::new();
163    while reader.read_exact(buf).is_ok(){
164        bw += ByteWeights::threaded_from_bytes(&buf, 12);
165        *reader_bytes_left -= buf.len();
166    }
167    if *reader_bytes_left > 0{
168        bw += ByteWeights::threaded_from_bytes(&buf[..*reader_bytes_left], 12);
169    }
170
171    HuffTree::from_weights(bw)
172}
173
174/// Read bytes from reader, loading at most buf.len() bytes
175/// from it at one time, compress them with the provided tree, 
176/// and write them to writer
177fn compress_to_writer<R: Read, W: Write + Seek>(
178    reader: &mut R, writer: &mut W, 
179    reader_bytes_left: &mut usize, buf: &mut [u8], 
180    tree: HuffTree<u8>) -> Result<u8, Error>{
181    let mut tree = tree;
182
183    let mut prev_byte = 0;
184    let mut prev_padding = 0;
185    /// compress the buffer into CompressData, combining it with
186    /// the prev_byte if the prev_padding != 0
187    macro_rules! comp_data_from {
188        ($buf:expr) => {{
189            // get and own the compress data
190            let (mut comp_bytes, padding_bits, huff_tree) = 
191                compress_with_tree($buf, tree.clone())
192                .unwrap()
193                .into_inner();
194            // if the previous compress data's padding isn't 0
195            // write the comp_bytes minding the padding
196            if prev_padding != 0{
197                writer.seek(SeekFrom::Current(-1)).unwrap();
198
199                comp_bytes = utils::offset_bytes(&comp_bytes, prev_padding as usize);
200                comp_bytes[0] |= prev_byte
201            }
202
203            (comp_bytes, padding_bits, huff_tree)
204        }};
205    }
206    // try to read exactly buf.len() bytes, compressing them and repeating
207    while reader.read_exact(buf).is_ok(){
208        let (comp_bytes, padding_bits, huff_tree) =  comp_data_from!(&buf);
209        writer.write_all(&comp_bytes)?;
210        
211        prev_padding = padding_bits;
212        prev_byte = comp_bytes[comp_bytes.len() - 1];
213        tree = huff_tree;
214
215        *reader_bytes_left -= buf.len();
216    }
217    // if couldn't read exactly buf.len() bytes and there are some bytes left, compress them
218    if *reader_bytes_left > 0{
219        let (comp_bytes, padding_bits, _) =  comp_data_from!(&buf[..*reader_bytes_left]);
220        writer.write_all(&comp_bytes)?;
221
222        prev_padding = padding_bits;
223    }
224
225    // return the written compressed data's padding bits
226    Ok(prev_padding)
227}
228
229/// Read bytes from reader, loading at most buf.len() bytes
230/// from it at one time, decompress them with the provided tree, 
231/// and write them to writer
232fn decompress_to_writer<R: Read, W: Write>(
233    reader: &mut R, writer: &mut W, 
234    reader_bytes_left: &mut usize, buf: &mut [u8],
235    tree: HuffTree<u8>, padding_bits: u8) -> Result<(), Error>{
236
237    // do pretty much the same thing as in huff_coding::comp::decompress
238    // see it's docs for an explanation
239    let mut decomp_buf = Vec::new();
240    let mut current_branch = tree.root();
241    macro_rules! read_codes_in_byte {
242        ($byte: expr;[$bitrange:expr]) => {
243            for bit_ptr in $bitrange{
244                if current_branch.has_children(){
245                    match ($byte >> (7 - bit_ptr)) & 1 == 1{
246                        true =>{
247                            current_branch = current_branch.right_child().unwrap();
248                        }
249                        false =>{
250                            current_branch = current_branch.left_child().unwrap();
251                        }
252                    }
253                }
254                if !current_branch.has_children(){
255                    decomp_buf.push(current_branch.leaf().letter().unwrap().clone());
256                    current_branch = tree.root();
257                }
258            }
259        };
260    }
261    // try to read exactly buf.len() bytes, decompressing them and writing
262    while reader.read_exact(buf).is_ok(){
263        for byte in &buf[..]{
264            read_codes_in_byte!(byte;[0..8]);
265        }
266        writer.write_all(&decomp_buf)?;
267        decomp_buf.clear();
268        *reader_bytes_left -= buf.len();
269    }
270    // if couldn't read exactly buf.len() bytes and there are some bytes left, 
271    // decompress them minding the padding bits
272    if *reader_bytes_left > 0{
273        for byte in &buf[..*reader_bytes_left - 1]{
274            read_codes_in_byte!(byte;[0..8]);
275        }
276        read_codes_in_byte!(buf[*reader_bytes_left - 1];[0..8 - padding_bits]);
277        writer.write_all(&decomp_buf)?;
278    }
279    Ok(())
280}