compu/decoder/mod.rs
1//! Decoder
2extern crate alloc;
3
4use core::{mem, ptr};
5
6use alloc::collections::TryReserveError;
7use alloc::vec::Vec;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10///Possible compression archive based on known signatures
11pub enum Detection {
12 ///ZSTD
13 Zstd,
14 ///GZIP
15 Gzip,
16 ///ZLIB
17 Zlib,
18 ///Indicates that all possible options are exhausted and it is impossible to deduce
19 ///compression.
20 Unknown,
21}
22
23impl Detection {
24 ///Attempts to deduce compression format from available bytes.
25 ///
26 ///Returns `None` if there is not enough `bytes` to perform all possible checks.
27 ///In this case you need to append more data to your buffer and provide it again
28 pub const fn detect(bytes: &[u8]) -> Option<Detection> {
29 //https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#zstandard-frames
30 const ZSTD_HEADER: u32 = 0xFD2FB528u32.to_le();
31 const GZIP_HEADER: u16 = 0x1f8bu16.to_be();
32
33 macro_rules! detect_gzip {
34 ($word:ident) => {
35 if $word == GZIP_HEADER {
36 return Some(Detection::Gzip);
37 }
38 };
39 }
40
41 //Signature:
42 //
43 // # Zlib
44 //
45 // FLEVEL: 0 1 2 3
46 //CINFO:
47 // 0 08 1D 08 5B 08 99 08 D7
48 // 1 18 19 18 57 18 95 18 D3
49 // 2 28 15 28 53 28 91 28 CF
50 // 3 38 11 38 4F 38 8D 38 CB
51 // 4 48 0D 48 4B 48 89 48 C7
52 // 5 58 09 58 47 58 85 58 C3
53 // 6 68 05 68 43 68 81 68 DE
54 // 7 78 01 78 5E 78 9C 78 DA
55 macro_rules! detect_zlib {
56 ($word:ident) => {
57 if $word.to_be() % 31 == 0 {
58 match bytes[0] {
59 0x78 => if let 0x01 | 0x5e | 0x9c | 0xda = bytes[1] {
60 return Some(Detection::Zlib);
61 }
62 0x08 => if let 0x1d | 0x5b | 0x99 | 0xd7 = bytes[1] {
63 return Some(Detection::Zlib);
64 }
65 0x18 => if let 0x19 | 0x57 | 0x95 | 0xd3 = bytes[1] {
66 return Some(Detection::Zlib);
67 }
68 0x28 => if let 0x15 | 0x53 | 0x91 | 0xcf = bytes[1] {
69 return Some(Detection::Zlib);
70 }
71 0x38 => if let 0x11 | 0x4f | 0x8d | 0xcb = bytes[1] {
72 return Some(Detection::Zlib);
73 }
74 0x48 => if let 0x0d | 0x4b | 0x89 | 0xc7 = bytes[1] {
75 return Some(Detection::Zlib);
76 }
77 0x58 => if let 0x09 | 0x47 | 0x85 | 0xc3 = bytes[1] {
78 return Some(Detection::Zlib);
79 }
80 0x68 => if let 0x05 | 0x43 | 0x81 | 0xde = bytes[1] {
81 Some(Detection::Zlib);
82 }
83 _ => (),
84 }
85 }
86 };
87 }
88
89 macro_rules! detect_zstd {
90 ($dword:ident) => {
91 if $dword == ZSTD_HEADER {
92 return Some(Detection::Zstd);
93 }
94 };
95 }
96
97 if bytes.len() < mem::size_of::<u16>() {
98 None
99 } else if bytes.len() < mem::size_of::<u32>() {
100 let word = u16::from_ne_bytes([bytes[0], bytes[1]]);
101 detect_gzip!(word);
102 detect_zlib!(word);
103
104 None
105 } else {
106 let word = u16::from_ne_bytes([bytes[0], bytes[1]]);
107 detect_gzip!(word);
108 detect_zlib!(word);
109 let dword = u32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
110 detect_zstd!(dword);
111
112 Some(Detection::Unknown)
113 }
114 }
115}
116
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118#[repr(transparent)]
119///Decoding error
120pub struct DecodeError(i32);
121
122impl DecodeError {
123 ///Creates error which means no error.
124 ///
125 ///Specifically its code is 0
126 pub const fn no_error() -> Self {
127 Self(0)
128 }
129
130 #[inline(always)]
131 ///Returns raw integer
132 pub const fn as_raw(&self) -> i32 {
133 self.0
134 }
135}
136
137#[derive(Debug, Clone, Copy, PartialEq, Eq)]
138///Result of decoding
139pub enum DecodeStatus {
140 ///Cannot finish due to lack of input data
141 NeedInput,
142 ///Need to flush data somewhere before continuing
143 NeedOutput,
144 ///Successfully finished decoding.
145 Finished,
146}
147
148#[derive(Debug, Clone, Copy, PartialEq, Eq)]
149///Decode output
150pub struct Decode {
151 ///Number of bytes left unprocessed in `input`
152 pub input_remain: usize,
153 ///Number of bytes left unprocessed in `output`
154 pub output_remain: usize,
155 ///Result of decoding
156 pub status: Result<DecodeStatus, DecodeError>,
157}
158
159///Decoder interface
160pub struct Interface {
161 decode_fn: unsafe fn(ptr::NonNull<u8>, *const u8, usize, *mut u8, usize) -> Decode,
162 //returns new/updated instance, MUST be replaced
163 reset_fn: fn(ptr::NonNull<u8>) -> Option<ptr::NonNull<u8>>,
164 drop_fn: fn(ptr::NonNull<u8>),
165 describe_error_fn: fn(i32) -> Option<&'static str>,
166}
167
168impl Interface {
169 ///Creates new `Interface` with provided functions to build vtable.
170 ///
171 ///First argument of every function is state as pointer.
172 ///
173 ///It is user responsibility to pass correct function pointers
174 pub const fn new(decode_fn: unsafe fn(ptr::NonNull<u8>, *const u8, usize, *mut u8, usize) -> Decode, reset_fn: fn(ptr::NonNull<u8>) -> Option<ptr::NonNull<u8>>, drop_fn: fn(ptr::NonNull<u8>), describe_error_fn: fn(i32) -> Option<&'static str>) -> Self {
175 Self {
176 decode_fn,
177 reset_fn,
178 drop_fn,
179 describe_error_fn,
180 }
181 }
182
183 #[inline(always)]
184 pub(crate) fn inner_decoder(&'static self, instance: ptr::NonNull<u8>) -> Decoder {
185 Decoder {
186 instance,
187 interface: self,
188 }
189 }
190
191 #[inline(always)]
192 ///Creates new decoder
193 ///
194 ///This function is unsafe as it is up to user to ensure correctness of `Interface
195 ///
196 ///`instance` - Decoder state, passed as first argument to every function in vtable
197 pub unsafe fn decoder(&'static self, state: ptr::NonNull<u8>) -> Decoder {
198 self.inner_decoder(state)
199 }
200}
201
202///Decoder
203///
204///Use [Interface] to instantiate decoder.
205///
206///Under hood, in order to avoid generics, implemented as vtable with series of function pointers.
207///
208///
209///## Example
210///
211///Brief example for chunked decoding.
212///```rust
213///use compu::{Decoder, DecodeStatus, Encoder, EncodeOp, EncodeStatus};
214///
215///fn decompress(decoder: &mut Decoder, input: core::slice::Chunks<'_, u8>, output: &mut Vec<u8>) {
216/// for chunk in input {
217/// let result = decoder.decode_vec(chunk, output);
218///
219/// assert_eq!(result.input_remain, 0);
220/// let status = result.status.expect("success");
221/// if status == DecodeStatus::Finished {
222/// break;
223/// }
224/// }
225///
226/// //Make sure to reset state, if you want to re-use decoder.
227/// decoder.reset();
228///}
229///
230///fn prepare_compressed(encoder: &mut Encoder, data: &[u8], compressed: &mut Vec<u8>) {
231/// let result = encoder.encode_vec(DATA, compressed, EncodeOp::Finish);
232/// assert_eq!(result.status, EncodeStatus::Finished);
233///}
234///
235///const DATA: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
236///
237///let mut output = Vec::with_capacity(100);
238///
239///let mut compressed = Vec::with_capacity(100);
240///let mut encoder = compu::encoder::Interface::brotli_c(Default::default()).expect("to create brotli encoder");
241///prepare_compressed(&mut encoder, DATA, &mut compressed);
242///let mut decoder = compu::decoder::Interface::brotli_c().expect("to create brotli decoder");
243///decompress(&mut decoder, compressed.chunks(4), &mut output);
244///assert_eq!(output, DATA);
245///
246///output.truncate(0);
247///compressed.truncate(0);
248///
249///let mut compressed = Vec::with_capacity(100);
250///let mut encoder = compu::encoder::Interface::zstd(Default::default()).expect("to create zstd encoder");
251///prepare_compressed(&mut encoder, DATA, &mut compressed);
252///let mut decoder = compu::decoder::Interface::zstd(Default::default()).expect("to create zstd decoder");
253///decompress(&mut decoder, compressed.chunks(4), &mut output);
254///assert_eq!(output, DATA);
255///
256///output.truncate(0);
257///compressed.truncate(0);
258///
259///let mut compressed = Vec::with_capacity(100);
260///let mut encoder = compu::encoder::Interface::zlib_ng(Default::default()).expect("to create zlib-ng encoder");
261///prepare_compressed(&mut encoder, DATA, &mut compressed);
262///let mut decoder = compu::decoder::Interface::zlib_ng(Default::default()).expect("to create zlib-ng decoder");
263///decompress(&mut decoder, compressed.chunks(4), &mut output);
264///assert_eq!(output, DATA);
265///
266///output.truncate(0);
267///compressed.truncate(0);
268///```
269pub struct Decoder {
270 instance: ptr::NonNull<u8>,
271 interface: &'static Interface,
272}
273
274const _: () = {
275 assert!(mem::size_of::<Decoder>() == mem::size_of::<usize>() * 2);
276};
277
278impl Decoder {
279 #[inline(always)]
280 ///Raw decoding function, with no checks.
281 ///
282 ///Intended to be used as building block of higher level interfaces
283 ///
284 ///Arguments
285 ///
286 ///- `input` - Pointer to start of input to process. MUST NOT be null.
287 ///- `input_len` - Size of data to process in `input`
288 ///- `ouput` - Pointer to start of buffer where to write result. MUST NOT be null
289 ///- `output_len` - Size of buffer pointed by `output`
290 pub unsafe fn raw_decode(&mut self, input: *const u8, input_len: usize, output: *mut u8, output_len: usize) -> Decode {
291 (self.interface.decode_fn)(self.instance, input, input_len, output, output_len)
292 }
293
294 #[inline(always)]
295 ///Decodes `input` into uninit `output`.
296 ///
297 ///`Decode` will contain number of bytes written into `output`. This number always indicates
298 ///number of bytes written hence which can be assumed initialized.
299 pub fn decode_uninit(&mut self, input: &[u8], output: &mut [mem::MaybeUninit<u8>]) -> Decode {
300 let input_len = input.len();
301 let output_len = output.len();
302 unsafe {
303 self.raw_decode(input.as_ptr(), input_len, output.as_mut_ptr() as _, output_len)
304 }
305 }
306
307 #[inline(always)]
308 ///Decodes `input` into `output`.
309 pub fn decode(&mut self, input: &[u8], output: &mut [u8]) -> Decode {
310 let input_len = input.len();
311 let output_len = output.len();
312 unsafe {
313 self.raw_decode(input.as_ptr(), input_len, output.as_mut_ptr() as _, output_len)
314 }
315 }
316
317 #[inline(always)]
318 ///Decodes `input` into spare space in `output`.
319 ///
320 ///Function require user to alloc spare capacity himself.
321 ///
322 ///`Decode::output_remain` will be relatieve to spare capacity length.
323 pub fn decode_vec(&mut self, input: &[u8], output: &mut Vec<u8>) -> Decode {
324 let spare_capacity = output.spare_capacity_mut();
325 let spare_capacity_len = spare_capacity.len();
326 let result = self.decode_uninit(input, spare_capacity);
327
328 if result.status.is_ok() {
329 let new_len = output.len() + spare_capacity_len - result.output_remain;
330 unsafe {
331 output.set_len(new_len);
332 }
333 }
334 result
335 }
336
337 #[inline(always)]
338 ///Decodes `input` into `output` Vec, performing allocation when necessary
339 ///
340 ///This function will continue decoding as long as input requires more input.
341 ///
342 ///## Allocation
343 ///
344 ///Strategy depends on input size.
345 ///- Less than 1024:
346 /// - Allocates `input.len()`
347 /// - Re-alloc size `input.len() / 3`
348 ///- From 1024 to 65536:
349 /// - Allocates `input.len() + input.len() / 3`
350 /// - Re-alloc size `1024`
351 ///- From 65536:
352 /// - Allocates `input.len() * 2`
353 /// - Re-alloc size `8 * 1024`
354 ///
355 ///Note that the best strategy is always to re-use buffer
356 ///
357 ///## Result
358 ///
359 ///- `Decode::output_remain` will be relatieve to spare capacity of the `output`.
360 pub fn decode_vec_full(&mut self, mut input: &[u8], output: &mut Vec<u8>) -> Result<Decode, TryReserveError> {
361 const RESERVE_DEFAULT: usize = 1024;
362 let input_len = input.len();
363 let reserve_size = if input_len < RESERVE_DEFAULT {
364 output.try_reserve_exact(input_len)?;
365 input_len / 3
366 } else if input_len < (RESERVE_DEFAULT * 16) {
367 output.try_reserve_exact(input_len + input_len / 3)?;
368 RESERVE_DEFAULT
369 } else {
370 output.try_reserve_exact(input.len() * 2)?;
371 RESERVE_DEFAULT * 8
372 };
373
374 loop {
375 let result = self.decode_vec(input, output);
376 match result.status {
377 Ok(DecodeStatus::NeedOutput) => {
378 input = &input[input.len() - result.input_remain..];
379 output.try_reserve_exact(reserve_size)?;
380 continue;
381 }
382 _ => break Ok(result),
383 }
384 }
385 }
386
387 #[cfg(feature = "bytes")]
388 ///Decodes `input` into `output` buffer, iterating through all spare capacity chunks if
389 ///necessary
390 ///
391 ///Requires `bytes` feature
392 ///
393 ///`Decode::output_remain` will be relative to spare capacity length.
394 pub fn decode_buf(&mut self, mut input: &[u8], output: &mut impl bytes::BufMut) -> Decode {
395 let mut result = Decode {
396 input_remain: input.len(),
397 output_remain: output.remaining_mut(),
398 status: Ok(DecodeStatus::NeedOutput),
399 };
400
401 loop {
402 let spare_capacity = output.chunk_mut();
403 let spare_capacity_len = spare_capacity.len();
404
405 let (advanced_len, decode) = unsafe {
406 let decode = self.decode_uninit(input, spare_capacity.as_uninit_slice_mut());
407 debug_assert!(spare_capacity_len > decode.output_remain);
408 let advanced_len = spare_capacity_len.saturating_sub(decode.output_remain);
409 output.advance_mut(advanced_len);
410 (advanced_len, decode)
411 };
412 input = &input[result.input_remain - decode.input_remain..];
413 result.input_remain = decode.input_remain;
414 result.output_remain = result.output_remain.saturating_sub(advanced_len);
415 result.status = decode.status;
416
417 match result.status {
418 Ok(DecodeStatus::Finished | DecodeStatus::NeedInput) => break result,
419 Ok(DecodeStatus::NeedOutput) => {
420 if result.output_remain == 0 {
421 break result;
422 }
423 }
424 Err(_) => break result,
425 }
426 }
427 }
428
429 #[inline(always)]
430 ///Resets `Decoder` state to initial.
431 ///
432 ///Returns `true` if successfully reset, otherwise `false`
433 pub fn reset(&mut self) -> bool {
434 match (self.interface.reset_fn)(self.instance) {
435 Some(ptr) => {
436 self.instance = ptr;
437 true
438 }
439 None => false,
440 }
441 }
442
443 #[inline(always)]
444 ///Returns descriptive text for error.
445 pub fn describe_error(&self, error: DecodeError) -> Option<&'static str> {
446 (self.interface.describe_error_fn)(error.as_raw())
447 }
448}
449
450impl Drop for Decoder {
451 #[inline]
452 fn drop(&mut self) {
453 (self.interface.drop_fn)(self.instance);
454 }
455}
456
457//ZLIB macro has to be defined before declaring modules
458#[cfg(any(feature = "zlib", feature = "zlib-static", feature = "zlib-ng", feature = "zlib-rust"))]
459macro_rules! internal_zlib_impl_decode {
460 ($state:ident, $input:ident, $input_len:ident, $output:ident, $output_len:ident) => {{
461 use $crate::decoder::DecodeStatus;
462
463 let state = unsafe { &mut *($state.as_ptr() as *mut State) };
464 state.inner.avail_out = $output_len as _;
465 state.inner.next_out = $output;
466
467 state.inner.avail_in = $input_len as _;
468 state.inner.next_in = $input as *mut _;
469
470 let result = sys::inflate(state.as_mut(), DEFAULT_INFLATE);
471
472 $crate::decoder::Decode {
473 input_remain: state.inner.avail_in as usize,
474 output_remain: state.inner.avail_out as usize,
475 status: match result {
476 sys::Z_OK => match state.inner.avail_in {
477 0 => Ok(DecodeStatus::NeedInput),
478 _ => Ok(DecodeStatus::NeedOutput),
479 },
480 sys::Z_STREAM_END => Ok(DecodeStatus::Finished),
481 sys::Z_BUF_ERROR => Ok(DecodeStatus::NeedOutput),
482 other => Err(crate::decoder::DecodeError(other as _)),
483 },
484 }
485 }};
486}
487
488#[cfg(any(feature = "zlib", feature = "zlib-static", feature = "zlib-ng", feature = "zlib-rust"))]
489mod zlib_common;
490#[cfg(any(feature = "zlib", feature = "zlib-static", feature = "zlib-ng", feature = "zlib-rust"))]
491pub use zlib_common::ZlibMode;
492#[cfg(feature = "brotli-rust")]
493mod brotli;
494#[cfg(feature = "brotli-c")]
495mod brotli_c;
496#[cfg(any(feature = "zlib", feature = "zlib-static"))]
497mod zlib;
498#[cfg(feature = "zlib-ng")]
499mod zlib_ng;
500#[cfg(feature = "zlib-rust")]
501mod zlib_rust;
502#[cfg(feature = "zstd")]
503mod zstd;
504#[cfg(feature = "zstd")]
505pub use zstd::ZstdOptions;
506
507impl<const N: usize> crate::Buffer<N> {
508 ///Decodes `input` using `decoder` returning number of bytes consumed in `input`
509 ///
510 ///On success returns tuple with:
511 ///- Number of consumed bytes in `input`
512 ///- Decode status:
513 /// - In case of `Finished`, you should not continue to invoke decode until you reset decoder
514 /// - In case of `NeedOutput`, you should consume internal buffer.
515 ///
516 ///In case of error, internal buffer size will not change
517 pub fn decode(&mut self, decoder: &mut Decoder, input: &[u8]) -> Result<(usize, DecodeStatus), DecodeError> {
518 let spare_capacity = self.spare_capacity_mut();
519 let spare_capacity_len = spare_capacity.len();
520
521 let result = decoder.decode_uninit(input, spare_capacity);
522
523 match result.status {
524 Ok(status) => {
525 self.cursor = self.cursor + spare_capacity_len - result.output_remain;
526 Ok((input.len() - result.input_remain, status))
527 }
528 Err(error) => Err(error),
529 }
530 }
531}