streaming_libdeflate_rs/
lib.rs

1// #![cfg_attr(debug_assertions, deny(warnings))]
2pub mod bitstream;
3
4pub(crate) mod block_finder;
5pub mod decode_blocks;
6pub mod decompress_deflate;
7pub mod decompress_gzip;
8mod decompress_utils;
9mod deflate_constants;
10mod gzip_constants;
11pub mod streams;
12pub mod unchecked;
13
14#[macro_use]
15extern crate static_assertions;
16
17use crate::decompress_deflate::{
18    HuffmanDecodeStruct, OutStreamResult, FAST_TABLESIZE, LITLEN_SUBTABLESIZE, LITLEN_TABLESIZE,
19    OFFSET_SUBTABLESIZE, OFFSET_TABLESIZE,
20};
21use crate::decompress_gzip::libdeflate_gzip_decompress;
22use crate::decompress_utils::fast_decode_entry::FastDecodeEntry;
23use crate::deflate_constants::DEFLATE_MAX_NUM_SYMS;
24use crate::streams::deflate_chunked_buffer_input::DeflateChunkedBufferInput;
25use crate::streams::deflate_chunked_buffer_output::DeflateChunkedBufferOutput;
26use crate::unchecked::UncheckedArray;
27use std::fs::File;
28use std::io::Read;
29use std::mem::size_of;
30use std::path::Path;
31
32/*
33 * The main DEFLATE decompressor structure.  Since this implementation only
34 * supports full buffer decompression, this structure does not store the entire
35 * decompression state, but rather only some arrays that are too large to
36 * comfortably allocate on the stack.
37 */
38pub struct LibdeflateDecodeTables {
39    pub(crate) huffman_decode: HuffmanDecodeStruct,
40    pub(crate) litlen_decode_table: UncheckedArray<FastDecodeEntry, LITLEN_TABLESIZE>,
41
42    pub(crate) offset_decode_table: UncheckedArray<FastDecodeEntry, OFFSET_TABLESIZE>,
43
44    pub(crate) fast_decode_table: UncheckedArray<FastDecodeEntry, FAST_TABLESIZE>,
45
46    pub(crate) litlen_decode_subtable: UncheckedArray<FastDecodeEntry, LITLEN_SUBTABLESIZE>,
47    pub(crate) offset_decode_subtable: UncheckedArray<FastDecodeEntry, OFFSET_SUBTABLESIZE>,
48
49    /* used only during build_decode_table() */
50    pub(crate) sorted_syms: UncheckedArray<u16, DEFLATE_MAX_NUM_SYMS>,
51    pub(crate) static_codes_loaded: bool,
52}
53
54/*
55 * Result of a call to libdeflate_deflate_decompress(),
56 * libdeflate_zlib_decompress(), or libdeflate_gzip_decompress().
57 */
58#[derive(Debug)]
59pub enum LibdeflateError {
60    /* Decompressed failed because the compressed data was invalid, corrupt,
61     * or otherwise unsupported.  */
62    BadData = 1,
63
64    /* A NULL 'actual_out_nbytes_ret' was provided, but the data would have
65     * decompressed to fewer than 'out_nbytes_avail' bytes.  */
66    ShortOutput = 2,
67
68    /* The data would have decompressed to more than 'out_nbytes_avail'
69     * bytes.  */
70    InsufficientSpace = 3,
71}
72
73pub trait DeflateInput {
74    const MAX_LOOK_BACK: usize = size_of::<usize>() * 2;
75    const MAX_OVERREAD: usize = size_of::<usize>() * 2;
76
77    unsafe fn get_le_word_no_advance(&mut self) -> usize;
78    fn move_stream_pos<const REFILL: bool>(&mut self, amount: isize);
79    fn get_stream_pos_mut(&mut self) -> &mut usize;
80    fn tell_stream_pos(&self) -> usize;
81    fn read<const REFILL: bool>(&mut self, out_data: &mut [u8]) -> usize;
82    // Ensure that the current buffer has at least `Self::MAX_OVERREAD` elements. this function must never fail
83    fn ensure_overread_length(&mut self);
84    // Check if the stream buffer has at least Self::MAX_OVERREAD bytes remaining with either valid data or eof data
85    fn has_readable_overread(&self) -> bool;
86    fn has_valid_bytes_slow(&mut self) -> bool;
87    fn read_exact_into<O: DeflateOutput>(&mut self, out_stream: &mut O, length: usize) -> bool;
88
89    #[inline(always)]
90    fn read_byte<const REFILL: bool>(&mut self) -> u8 {
91        let mut byte = [0];
92        self.read::<REFILL>(&mut byte);
93        byte[0]
94    }
95
96    #[inline(always)]
97    fn read_le_u16<const REFILL: bool>(&mut self) -> u16 {
98        let mut bytes = [0, 0];
99        self.read::<REFILL>(&mut bytes);
100        u16::from_le_bytes(bytes)
101    }
102
103    #[inline(always)]
104    fn read_le_u32<const REFILL: bool>(&mut self) -> u32 {
105        let mut bytes = [0, 0, 0, 0];
106        self.read::<REFILL>(&mut bytes);
107        u32::from_le_bytes(bytes)
108    }
109}
110
111pub trait DeflateOutput {
112    const MAX_LOOK_BACK: usize = 32768;
113    const OVERWRITE_MAX: usize = 16;
114
115    fn has_writable_length(&mut self, length: usize) -> bool;
116    fn flush_ensure_length(&mut self, length: usize) -> bool;
117
118    fn get_output_ptr(&mut self) -> *mut u8;
119    unsafe fn set_output_ptr(&mut self, ptr: *mut u8);
120
121    fn final_flush(&mut self) -> Result<OutStreamResult, ()>;
122}
123
124pub fn libdeflate_alloc_decode_tables() -> LibdeflateDecodeTables {
125    LibdeflateDecodeTables {
126        huffman_decode: HuffmanDecodeStruct {
127            lens: UncheckedArray::default(),
128            precode_lens: UncheckedArray::default(),
129            precode_decode_table: UncheckedArray::default(),
130            fast_temp_litlen: Vec::with_capacity(FAST_TABLESIZE),
131        },
132        litlen_decode_table: UncheckedArray::default(),
133        offset_decode_table: UncheckedArray::default(),
134        fast_decode_table: UncheckedArray::default(),
135
136        litlen_decode_subtable: UncheckedArray::default(),
137        offset_decode_subtable: UncheckedArray::default(),
138
139        sorted_syms: UncheckedArray::default(),
140        static_codes_loaded: false,
141    }
142}
143
144pub fn decompress_file_buffered(
145    file: impl AsRef<Path>,
146    func: impl FnMut(&[u8]) -> Result<(), ()>,
147    buf_size: usize,
148) -> Result<(), LibdeflateError> {
149    let mut read_file = File::open(file).unwrap();
150    let mut input_stream =
151        DeflateChunkedBufferInput::new(|buf| read_file.read(buf).unwrap_or(0), buf_size);
152
153    let mut output_stream = DeflateChunkedBufferOutput::new(func, buf_size);
154
155    let mut decompressor = libdeflate_alloc_decode_tables();
156
157    while {
158        input_stream.ensure_overread_length();
159        input_stream.has_valid_bytes_slow()
160    } {
161        libdeflate_gzip_decompress(&mut decompressor, &mut input_stream, &mut output_stream)?;
162    }
163    Ok(())
164}
165
166#[cfg(test)]
167mod tests {
168    use crate::decompress_file_buffered;
169    use rayon::prelude::*;
170    use std::sync::atomic::{AtomicUsize, Ordering};
171    use std::sync::Arc;
172    use std::time::Instant;
173
174    #[test]
175    fn decompression_speed() {
176        let context = Arc::new(AtomicUsize::new(0));
177
178        const PATH: &str = "strains-test";
179
180        let paths = std::fs::read_dir(PATH).unwrap();
181        let mut paths_vec = Vec::new();
182
183        for path in paths {
184            paths_vec.push(path.unwrap().path());
185        }
186
187        paths_vec.sort();
188        paths_vec.truncate(10000);
189        let start = Instant::now();
190
191        paths_vec.into_par_iter().for_each(|file| {
192            let context = context.clone();
193
194            match decompress_file_buffered(
195                &file,
196                |data| {
197                    let mut rem = 0;
198                    for d in data {
199                        rem += *d as usize;
200                    }
201                    context.fetch_add(rem, Ordering::Relaxed);
202                    Ok(())
203                },
204                1024 * 512,
205            ) {
206                Ok(_) => {}
207                Err(_error) => {
208                    println!("Error: {}", file.display());
209                }
210            }
211        });
212
213        println!("Bench duration: {:.2}", start.elapsed().as_secs_f32());
214    }
215}