streaming_libdeflate_rs/
lib.rs1pub mod bitstream;
3
4pub(crate) mod block_finder;
5pub mod decode_blocks;
6pub mod decompress_deflate;
7pub mod decompress_gzip;
8mod decompress_utils;
9mod deflate_constants;
10mod gzip_constants;
11pub mod streams;
12pub mod unchecked;
13
14#[macro_use]
15extern crate static_assertions;
16
17use crate::decompress_deflate::{
18 HuffmanDecodeStruct, OutStreamResult, FAST_TABLESIZE, LITLEN_SUBTABLESIZE, LITLEN_TABLESIZE,
19 OFFSET_SUBTABLESIZE, OFFSET_TABLESIZE,
20};
21use crate::decompress_gzip::libdeflate_gzip_decompress;
22use crate::decompress_utils::fast_decode_entry::FastDecodeEntry;
23use crate::deflate_constants::DEFLATE_MAX_NUM_SYMS;
24use crate::streams::deflate_chunked_buffer_input::DeflateChunkedBufferInput;
25use crate::streams::deflate_chunked_buffer_output::DeflateChunkedBufferOutput;
26use crate::unchecked::UncheckedArray;
27use std::fs::File;
28use std::io::Read;
29use std::mem::size_of;
30use std::path::Path;
31
32pub struct LibdeflateDecodeTables {
39 pub(crate) huffman_decode: HuffmanDecodeStruct,
40 pub(crate) litlen_decode_table: UncheckedArray<FastDecodeEntry, LITLEN_TABLESIZE>,
41
42 pub(crate) offset_decode_table: UncheckedArray<FastDecodeEntry, OFFSET_TABLESIZE>,
43
44 pub(crate) fast_decode_table: UncheckedArray<FastDecodeEntry, FAST_TABLESIZE>,
45
46 pub(crate) litlen_decode_subtable: UncheckedArray<FastDecodeEntry, LITLEN_SUBTABLESIZE>,
47 pub(crate) offset_decode_subtable: UncheckedArray<FastDecodeEntry, OFFSET_SUBTABLESIZE>,
48
49 pub(crate) sorted_syms: UncheckedArray<u16, DEFLATE_MAX_NUM_SYMS>,
51 pub(crate) static_codes_loaded: bool,
52}
53
54#[derive(Debug)]
59pub enum LibdeflateError {
60 BadData = 1,
63
64 ShortOutput = 2,
67
68 InsufficientSpace = 3,
71}
72
73pub trait DeflateInput {
74 const MAX_LOOK_BACK: usize = size_of::<usize>() * 2;
75 const MAX_OVERREAD: usize = size_of::<usize>() * 2;
76
77 unsafe fn get_le_word_no_advance(&mut self) -> usize;
78 fn move_stream_pos<const REFILL: bool>(&mut self, amount: isize);
79 fn get_stream_pos_mut(&mut self) -> &mut usize;
80 fn tell_stream_pos(&self) -> usize;
81 fn read<const REFILL: bool>(&mut self, out_data: &mut [u8]) -> usize;
82 fn ensure_overread_length(&mut self);
84 fn has_readable_overread(&self) -> bool;
86 fn has_valid_bytes_slow(&mut self) -> bool;
87 fn read_exact_into<O: DeflateOutput>(&mut self, out_stream: &mut O, length: usize) -> bool;
88
89 #[inline(always)]
90 fn read_byte<const REFILL: bool>(&mut self) -> u8 {
91 let mut byte = [0];
92 self.read::<REFILL>(&mut byte);
93 byte[0]
94 }
95
96 #[inline(always)]
97 fn read_le_u16<const REFILL: bool>(&mut self) -> u16 {
98 let mut bytes = [0, 0];
99 self.read::<REFILL>(&mut bytes);
100 u16::from_le_bytes(bytes)
101 }
102
103 #[inline(always)]
104 fn read_le_u32<const REFILL: bool>(&mut self) -> u32 {
105 let mut bytes = [0, 0, 0, 0];
106 self.read::<REFILL>(&mut bytes);
107 u32::from_le_bytes(bytes)
108 }
109}
110
111pub trait DeflateOutput {
112 const MAX_LOOK_BACK: usize = 32768;
113 const OVERWRITE_MAX: usize = 16;
114
115 fn has_writable_length(&mut self, length: usize) -> bool;
116 fn flush_ensure_length(&mut self, length: usize) -> bool;
117
118 fn get_output_ptr(&mut self) -> *mut u8;
119 unsafe fn set_output_ptr(&mut self, ptr: *mut u8);
120
121 fn final_flush(&mut self) -> Result<OutStreamResult, ()>;
122}
123
124pub fn libdeflate_alloc_decode_tables() -> LibdeflateDecodeTables {
125 LibdeflateDecodeTables {
126 huffman_decode: HuffmanDecodeStruct {
127 lens: UncheckedArray::default(),
128 precode_lens: UncheckedArray::default(),
129 precode_decode_table: UncheckedArray::default(),
130 fast_temp_litlen: Vec::with_capacity(FAST_TABLESIZE),
131 },
132 litlen_decode_table: UncheckedArray::default(),
133 offset_decode_table: UncheckedArray::default(),
134 fast_decode_table: UncheckedArray::default(),
135
136 litlen_decode_subtable: UncheckedArray::default(),
137 offset_decode_subtable: UncheckedArray::default(),
138
139 sorted_syms: UncheckedArray::default(),
140 static_codes_loaded: false,
141 }
142}
143
144pub fn decompress_file_buffered(
145 file: impl AsRef<Path>,
146 func: impl FnMut(&[u8]) -> Result<(), ()>,
147 buf_size: usize,
148) -> Result<(), LibdeflateError> {
149 let mut read_file = File::open(file).unwrap();
150 let mut input_stream =
151 DeflateChunkedBufferInput::new(|buf| read_file.read(buf).unwrap_or(0), buf_size);
152
153 let mut output_stream = DeflateChunkedBufferOutput::new(func, buf_size);
154
155 let mut decompressor = libdeflate_alloc_decode_tables();
156
157 while {
158 input_stream.ensure_overread_length();
159 input_stream.has_valid_bytes_slow()
160 } {
161 libdeflate_gzip_decompress(&mut decompressor, &mut input_stream, &mut output_stream)?;
162 }
163 Ok(())
164}
165
166#[cfg(test)]
167mod tests {
168 use crate::decompress_file_buffered;
169 use rayon::prelude::*;
170 use std::sync::atomic::{AtomicUsize, Ordering};
171 use std::sync::Arc;
172 use std::time::Instant;
173
174 #[test]
175 fn decompression_speed() {
176 let context = Arc::new(AtomicUsize::new(0));
177
178 const PATH: &str = "strains-test";
179
180 let paths = std::fs::read_dir(PATH).unwrap();
181 let mut paths_vec = Vec::new();
182
183 for path in paths {
184 paths_vec.push(path.unwrap().path());
185 }
186
187 paths_vec.sort();
188 paths_vec.truncate(10000);
189 let start = Instant::now();
190
191 paths_vec.into_par_iter().for_each(|file| {
192 let context = context.clone();
193
194 match decompress_file_buffered(
195 &file,
196 |data| {
197 let mut rem = 0;
198 for d in data {
199 rem += *d as usize;
200 }
201 context.fetch_add(rem, Ordering::Relaxed);
202 Ok(())
203 },
204 1024 * 512,
205 ) {
206 Ok(_) => {}
207 Err(_error) => {
208 println!("Error: {}", file.display());
209 }
210 }
211 });
212
213 println!("Bench duration: {:.2}", start.elapsed().as_secs_f32());
214 }
215}