Skip to main content

compu/decoder/
mod.rs

1//! Decoder
2extern crate alloc;
3
4use core::{mem, ptr};
5
6use alloc::collections::TryReserveError;
7use alloc::vec::Vec;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10///Possible compression archive based on known signatures
11pub enum Detection {
12    ///ZSTD
13    Zstd,
14    ///GZIP
15    Gzip,
16    ///ZLIB
17    Zlib,
18    ///Indicates that all possible options are exhausted and it is impossible to deduce
19    ///compression.
20    Unknown,
21}
22
23impl Detection {
24    ///Attempts to deduce compression format from available bytes.
25    ///
26    ///Returns `None` if there is not enough `bytes` to perform all possible checks.
27    ///In this case you need to append more data to your buffer and provide it again
28    pub const fn detect(bytes: &[u8]) -> Option<Detection> {
29        //https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#zstandard-frames
30        const ZSTD_HEADER: u32 = 0xFD2FB528u32.to_le();
31        const GZIP_HEADER: u16 = 0x1f8bu16.to_be();
32
33        macro_rules! detect_gzip {
34            ($word:ident) => {
35                if $word == GZIP_HEADER {
36                    return Some(Detection::Gzip);
37                }
38            };
39        }
40
41        //Signature:
42        //
43        // # Zlib
44        //
45        //      FLEVEL: 0       1       2       3
46        //CINFO:
47        //     0      08 1D   08 5B   08 99   08 D7
48        //     1      18 19   18 57   18 95   18 D3
49        //     2      28 15   28 53   28 91   28 CF
50        //     3      38 11   38 4F   38 8D   38 CB
51        //     4      48 0D   48 4B   48 89   48 C7
52        //     5      58 09   58 47   58 85   58 C3
53        //     6      68 05   68 43   68 81   68 DE
54        //     7      78 01   78 5E   78 9C   78 DA
55        macro_rules! detect_zlib {
56            ($word:ident) => {
57                if $word.to_be() % 31 == 0 {
58                    match bytes[0] {
59                        0x78 => if let 0x01 | 0x5e | 0x9c | 0xda = bytes[1] {
60                            return Some(Detection::Zlib);
61                        }
62                        0x08 => if let 0x1d | 0x5b | 0x99 | 0xd7 = bytes[1] {
63                            return Some(Detection::Zlib);
64                        }
65                        0x18 => if let 0x19 | 0x57 | 0x95 | 0xd3 = bytes[1] {
66                            return Some(Detection::Zlib);
67                        }
68                        0x28 => if let 0x15 | 0x53 | 0x91 | 0xcf = bytes[1] {
69                            return Some(Detection::Zlib);
70                        }
71                        0x38 => if let 0x11 | 0x4f | 0x8d | 0xcb = bytes[1] {
72                            return Some(Detection::Zlib);
73                        }
74                        0x48 => if let 0x0d | 0x4b | 0x89 | 0xc7 = bytes[1] {
75                            return Some(Detection::Zlib);
76                        }
77                        0x58 => if let 0x09 | 0x47 | 0x85 | 0xc3 = bytes[1] {
78                            return Some(Detection::Zlib);
79                        }
80                        0x68 => if let 0x05 | 0x43 | 0x81 | 0xde = bytes[1] {
81                            Some(Detection::Zlib);
82                        }
83                        _ => (),
84                    }
85                }
86            };
87        }
88
89        macro_rules! detect_zstd {
90            ($dword:ident) => {
91                if $dword == ZSTD_HEADER {
92                    return Some(Detection::Zstd);
93                }
94            };
95        }
96
97        if bytes.len() < mem::size_of::<u16>() {
98            None
99        } else if bytes.len() < mem::size_of::<u32>() {
100            let word = u16::from_ne_bytes([bytes[0], bytes[1]]);
101            detect_gzip!(word);
102            detect_zlib!(word);
103
104            None
105        } else {
106            let word = u16::from_ne_bytes([bytes[0], bytes[1]]);
107            detect_gzip!(word);
108            detect_zlib!(word);
109            let dword = u32::from_ne_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
110            detect_zstd!(dword);
111
112            Some(Detection::Unknown)
113        }
114    }
115}
116
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118#[repr(transparent)]
119///Decoding error
120pub struct DecodeError(i32);
121
122impl DecodeError {
123    ///Creates error which means no error.
124    ///
125    ///Specifically its code is 0
126    pub const fn no_error() -> Self {
127        Self(0)
128    }
129
130    #[inline(always)]
131    ///Returns raw integer
132    pub const fn as_raw(&self) -> i32 {
133        self.0
134    }
135}
136
137#[derive(Debug, Clone, Copy, PartialEq, Eq)]
138///Result of decoding
139pub enum DecodeStatus {
140    ///Cannot finish due to lack of input data
141    NeedInput,
142    ///Need to flush data somewhere before continuing
143    NeedOutput,
144    ///Successfully finished decoding.
145    Finished,
146}
147
148#[derive(Debug, Clone, Copy, PartialEq, Eq)]
149///Decode output
150pub struct Decode {
151    ///Number of bytes left unprocessed in `input`
152    pub input_remain: usize,
153    ///Number of bytes left unprocessed in `output`
154    pub output_remain: usize,
155    ///Result of decoding
156    pub status: Result<DecodeStatus, DecodeError>,
157}
158
159///Decoder interface
160pub struct Interface {
161    decode_fn: unsafe fn(ptr::NonNull<u8>, *const u8, usize, *mut u8, usize) -> Decode,
162    //returns new/updated instance, MUST be replaced
163    reset_fn: fn(ptr::NonNull<u8>) -> Option<ptr::NonNull<u8>>,
164    drop_fn: fn(ptr::NonNull<u8>),
165    describe_error_fn: fn(i32) -> Option<&'static str>,
166}
167
168impl Interface {
169    ///Creates new `Interface` with provided functions to build vtable.
170    ///
171    ///First argument of every function is state as pointer.
172    ///
173    ///It is user responsibility to pass correct function pointers
174    pub const fn new(decode_fn: unsafe fn(ptr::NonNull<u8>, *const u8, usize, *mut u8, usize) -> Decode, reset_fn: fn(ptr::NonNull<u8>) -> Option<ptr::NonNull<u8>>, drop_fn: fn(ptr::NonNull<u8>), describe_error_fn: fn(i32) -> Option<&'static str>) -> Self {
175        Self {
176            decode_fn,
177            reset_fn,
178            drop_fn,
179            describe_error_fn,
180        }
181    }
182
183    #[inline(always)]
184    pub(crate) fn inner_decoder(&'static self, instance: ptr::NonNull<u8>) -> Decoder {
185        Decoder {
186            instance,
187            interface: self,
188        }
189    }
190
191    #[inline(always)]
192    ///Creates new decoder
193    ///
194    ///This function is unsafe as it is up to user to ensure correctness of `Interface
195    ///
196    ///`instance` - Decoder state, passed as first argument to every function in vtable
197    pub unsafe fn decoder(&'static self, state: ptr::NonNull<u8>) -> Decoder {
198        self.inner_decoder(state)
199    }
200}
201
202///Decoder
203///
204///Use [Interface] to instantiate decoder.
205///
206///Under hood, in order to avoid generics, implemented as vtable with series of function pointers.
207///
208///
209///## Example
210///
211///Brief example for chunked decoding.
212///```rust
213///use compu::{Decoder, DecodeStatus, Encoder, EncodeOp, EncodeStatus};
214///
215///fn decompress(decoder: &mut Decoder, input: core::slice::Chunks<'_, u8>, output: &mut Vec<u8>) {
216///   for chunk in input {
217///     let result = decoder.decode_vec(chunk, output);
218///
219///     assert_eq!(result.input_remain, 0);
220///     let status = result.status.expect("success");
221///     if status == DecodeStatus::Finished {
222///         break;
223///     }
224///   }
225///
226///   //Make sure to reset state, if you want to re-use decoder.
227///   decoder.reset();
228///}
229///
230///fn prepare_compressed(encoder: &mut Encoder, data: &[u8], compressed: &mut Vec<u8>) {
231///    let result = encoder.encode_vec(DATA, compressed, EncodeOp::Finish);
232///    assert_eq!(result.status, EncodeStatus::Finished);
233///}
234///
235///const DATA: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
236///
237///let mut output = Vec::with_capacity(100);
238///
239///let mut compressed = Vec::with_capacity(100);
240///let mut encoder = compu::encoder::Interface::brotli_c(Default::default()).expect("to create brotli encoder");
241///prepare_compressed(&mut encoder, DATA, &mut compressed);
242///let mut decoder = compu::decoder::Interface::brotli_c().expect("to create brotli decoder");
243///decompress(&mut decoder, compressed.chunks(4), &mut output);
244///assert_eq!(output, DATA);
245///
246///output.truncate(0);
247///compressed.truncate(0);
248///
249///let mut compressed = Vec::with_capacity(100);
250///let mut encoder = compu::encoder::Interface::zstd(Default::default()).expect("to create zstd encoder");
251///prepare_compressed(&mut encoder, DATA, &mut compressed);
252///let mut decoder = compu::decoder::Interface::zstd(Default::default()).expect("to create zstd decoder");
253///decompress(&mut decoder, compressed.chunks(4), &mut output);
254///assert_eq!(output, DATA);
255///
256///output.truncate(0);
257///compressed.truncate(0);
258///
259///let mut compressed = Vec::with_capacity(100);
260///let mut encoder = compu::encoder::Interface::zlib_ng(Default::default()).expect("to create zlib-ng encoder");
261///prepare_compressed(&mut encoder, DATA, &mut compressed);
262///let mut decoder = compu::decoder::Interface::zlib_ng(Default::default()).expect("to create zlib-ng decoder");
263///decompress(&mut decoder, compressed.chunks(4), &mut output);
264///assert_eq!(output, DATA);
265///
266///output.truncate(0);
267///compressed.truncate(0);
268///```
269pub struct Decoder {
270    instance: ptr::NonNull<u8>,
271    interface: &'static Interface,
272}
273
274const _: () = {
275    assert!(mem::size_of::<Decoder>() == mem::size_of::<usize>() * 2);
276};
277
278impl Decoder {
279    #[inline(always)]
280    ///Raw decoding function, with no checks.
281    ///
282    ///Intended to be used as building block of higher level interfaces
283    ///
284    ///Arguments
285    ///
286    ///- `input` - Pointer to start of input to process. MUST NOT be null.
287    ///- `input_len` - Size of data to process in `input`
288    ///- `ouput` - Pointer to start of buffer where to write result. MUST NOT be null
289    ///- `output_len` - Size of buffer pointed by `output`
290    pub unsafe fn raw_decode(&mut self, input: *const u8, input_len: usize, output: *mut u8, output_len: usize) -> Decode {
291        (self.interface.decode_fn)(self.instance, input, input_len, output, output_len)
292    }
293
294    #[inline(always)]
295    ///Decodes `input` into uninit `output`.
296    ///
297    ///`Decode` will contain number of bytes written into `output`. This number always indicates
298    ///number of bytes written hence which can be assumed initialized.
299    pub fn decode_uninit(&mut self, input: &[u8], output: &mut [mem::MaybeUninit<u8>]) -> Decode {
300        let input_len = input.len();
301        let output_len = output.len();
302        unsafe {
303            self.raw_decode(input.as_ptr(), input_len, output.as_mut_ptr() as _, output_len)
304        }
305    }
306
307    #[inline(always)]
308    ///Decodes `input` into `output`.
309    pub fn decode(&mut self, input: &[u8], output: &mut [u8]) -> Decode {
310        let input_len = input.len();
311        let output_len = output.len();
312        unsafe {
313            self.raw_decode(input.as_ptr(), input_len, output.as_mut_ptr() as _, output_len)
314        }
315    }
316
317    #[inline(always)]
318    ///Decodes `input` into spare space in `output`.
319    ///
320    ///Function require user to alloc spare capacity himself.
321    ///
322    ///`Decode::output_remain` will be relatieve to spare capacity length.
323    pub fn decode_vec(&mut self, input: &[u8], output: &mut Vec<u8>) -> Decode {
324        let spare_capacity = output.spare_capacity_mut();
325        let spare_capacity_len = spare_capacity.len();
326        let result = self.decode_uninit(input, spare_capacity);
327
328        if result.status.is_ok() {
329            let new_len = output.len() + spare_capacity_len - result.output_remain;
330            unsafe {
331                output.set_len(new_len);
332            }
333        }
334        result
335    }
336
337    #[inline(always)]
338    ///Decodes `input` into `output` Vec, performing allocation when necessary
339    ///
340    ///This function will continue decoding as long as input requires more input.
341    ///
342    ///## Allocation
343    ///
344    ///Strategy depends on input size.
345    ///- Less than 1024:
346    ///   - Allocates `input.len()`
347    ///   - Re-alloc size `input.len() / 3`
348    ///- From 1024 to 65536:
349    ///   - Allocates `input.len() + input.len() / 3`
350    ///   - Re-alloc size `1024`
351    ///- From 65536:
352    ///   - Allocates `input.len() * 2`
353    ///   - Re-alloc size `8 * 1024`
354    ///
355    ///Note that the best strategy is always to re-use buffer
356    ///
357    ///## Result
358    ///
359    ///- `Decode::output_remain` will be relatieve to spare capacity of the `output`.
360    pub fn decode_vec_full(&mut self, mut input: &[u8], output: &mut Vec<u8>) -> Result<Decode, TryReserveError> {
361        const RESERVE_DEFAULT: usize = 1024;
362        let input_len = input.len();
363        let reserve_size = if input_len < RESERVE_DEFAULT {
364            output.try_reserve_exact(input_len)?;
365            input_len / 3
366        } else if input_len < (RESERVE_DEFAULT * 16) {
367            output.try_reserve_exact(input_len + input_len / 3)?;
368            RESERVE_DEFAULT
369        } else {
370            output.try_reserve_exact(input.len() * 2)?;
371            RESERVE_DEFAULT * 8
372        };
373
374        loop {
375            let result = self.decode_vec(input, output);
376            match result.status {
377                Ok(DecodeStatus::NeedOutput) => {
378                    input = &input[input.len() - result.input_remain..];
379                    output.try_reserve_exact(reserve_size)?;
380                    continue;
381                }
382                _ => break Ok(result),
383            }
384        }
385    }
386
387    #[cfg(feature = "bytes")]
388    ///Decodes `input` into `output` buffer, iterating through all spare capacity chunks if
389    ///necessary
390    ///
391    ///Requires `bytes` feature
392    ///
393    ///`Decode::output_remain` will be relative to spare capacity length.
394    pub fn decode_buf(&mut self, mut input: &[u8], output: &mut impl bytes::BufMut) -> Decode {
395        let mut result = Decode {
396            input_remain: input.len(),
397            output_remain: output.remaining_mut(),
398            status: Ok(DecodeStatus::NeedOutput),
399        };
400
401        loop {
402            let spare_capacity = output.chunk_mut();
403            let spare_capacity_len = spare_capacity.len();
404
405            let (advanced_len, decode) = unsafe {
406                let decode = self.decode_uninit(input, spare_capacity.as_uninit_slice_mut());
407                debug_assert!(spare_capacity_len > decode.output_remain);
408                let advanced_len = spare_capacity_len.saturating_sub(decode.output_remain);
409                output.advance_mut(advanced_len);
410                (advanced_len, decode)
411            };
412            input = &input[result.input_remain - decode.input_remain..];
413            result.input_remain = decode.input_remain;
414            result.output_remain = result.output_remain.saturating_sub(advanced_len);
415            result.status = decode.status;
416
417            match result.status {
418                Ok(DecodeStatus::Finished | DecodeStatus::NeedInput) => break result,
419                Ok(DecodeStatus::NeedOutput) => {
420                    if result.output_remain == 0 {
421                        break result;
422                    }
423                }
424                Err(_) => break result,
425            }
426        }
427    }
428
429    #[inline(always)]
430    ///Resets `Decoder` state to initial.
431    ///
432    ///Returns `true` if successfully reset, otherwise `false`
433    pub fn reset(&mut self) -> bool {
434        match (self.interface.reset_fn)(self.instance) {
435            Some(ptr) => {
436                self.instance = ptr;
437                true
438            }
439            None => false,
440        }
441    }
442
443    #[inline(always)]
444    ///Returns descriptive text for error.
445    pub fn describe_error(&self, error: DecodeError) -> Option<&'static str> {
446        (self.interface.describe_error_fn)(error.as_raw())
447    }
448}
449
450impl Drop for Decoder {
451    #[inline]
452    fn drop(&mut self) {
453        (self.interface.drop_fn)(self.instance);
454    }
455}
456
457//ZLIB macro has to be defined before declaring modules
458#[cfg(any(feature = "zlib", feature = "zlib-static", feature = "zlib-ng", feature = "zlib-rust"))]
459macro_rules! internal_zlib_impl_decode {
460    ($state:ident, $input:ident, $input_len:ident, $output:ident, $output_len:ident) => {{
461        use $crate::decoder::DecodeStatus;
462
463        let state = unsafe { &mut *($state.as_ptr() as *mut State) };
464        state.inner.avail_out = $output_len as _;
465        state.inner.next_out = $output;
466
467        state.inner.avail_in = $input_len as _;
468        state.inner.next_in = $input as *mut _;
469
470        let result = sys::inflate(state.as_mut(), DEFAULT_INFLATE);
471
472        $crate::decoder::Decode {
473            input_remain: state.inner.avail_in as usize,
474            output_remain: state.inner.avail_out as usize,
475            status: match result {
476                sys::Z_OK => match state.inner.avail_in {
477                    0 => Ok(DecodeStatus::NeedInput),
478                    _ => Ok(DecodeStatus::NeedOutput),
479                },
480                sys::Z_STREAM_END => Ok(DecodeStatus::Finished),
481                sys::Z_BUF_ERROR => Ok(DecodeStatus::NeedOutput),
482                other => Err(crate::decoder::DecodeError(other as _)),
483            },
484        }
485    }};
486}
487
488#[cfg(any(feature = "zlib", feature = "zlib-static", feature = "zlib-ng", feature = "zlib-rust"))]
489mod zlib_common;
490#[cfg(any(feature = "zlib", feature = "zlib-static", feature = "zlib-ng", feature = "zlib-rust"))]
491pub use zlib_common::ZlibMode;
492#[cfg(feature = "brotli-rust")]
493mod brotli;
494#[cfg(feature = "brotli-c")]
495mod brotli_c;
496#[cfg(any(feature = "zlib", feature = "zlib-static"))]
497mod zlib;
498#[cfg(feature = "zlib-ng")]
499mod zlib_ng;
500#[cfg(feature = "zlib-rust")]
501mod zlib_rust;
502#[cfg(feature = "zstd")]
503mod zstd;
504#[cfg(feature = "zstd")]
505pub use zstd::ZstdOptions;
506
507impl<const N: usize> crate::Buffer<N> {
508    ///Decodes `input` using `decoder` returning number of bytes consumed in `input`
509    ///
510    ///On success returns tuple with:
511    ///- Number of consumed bytes in `input`
512    ///- Decode status:
513    ///    - In case of `Finished`, you should not continue to invoke decode until you reset decoder
514    ///    - In case of `NeedOutput`, you should consume internal buffer.
515    ///
516    ///In case of error, internal buffer size will not change
517    pub fn decode(&mut self, decoder: &mut Decoder, input: &[u8]) -> Result<(usize, DecodeStatus), DecodeError> {
518        let spare_capacity = self.spare_capacity_mut();
519        let spare_capacity_len = spare_capacity.len();
520
521        let result = decoder.decode_uninit(input, spare_capacity);
522
523        match result.status {
524            Ok(status) => {
525                self.cursor = self.cursor + spare_capacity_len - result.output_remain;
526                Ok((input.len() - result.input_remain, status))
527            }
528            Err(error) => Err(error),
529        }
530    }
531}