Skip to main content

structured_zstd/decoding/
frame_decoder.rs

1//! Framedecoder is the main low-level struct users interact with to decode zstd frames
2//!
3//! Zstandard compressed data is made of one or more frames. Each frame is independent and can be
4//! decompressed independently of other frames. This module contains structures
5//! and utilities that can be used to decode a frame.
6
7use super::frame;
8use crate::decoding;
9use crate::decoding::block_decoder::BlockDecoder;
10use crate::decoding::decode_buffer::DecodeBuffer;
11use crate::decoding::dictionary::{Dictionary, DictionaryHandle};
12use crate::decoding::errors::{DecodeBlockContentError, FrameDecoderError};
13use crate::decoding::flat_buf::FlatBuf;
14use crate::decoding::ringbuffer::RingBuffer;
15use crate::decoding::scratch::DecoderScratch;
16use crate::io::{Error, Read, Write};
17use alloc::collections::BTreeMap;
18use alloc::vec::Vec;
19use core::convert::TryInto;
20
21use crate::common::MAXIMUM_ALLOWED_WINDOW_SIZE;
22
23/// Low level Zstandard decoder that can be used to decompress frames with fine control over when and how many bytes are decoded.
24///
25/// This decoder is able to decode frames only partially and gives control
26/// over how many bytes/blocks will be decoded at a time (so you don't have to decode a 10GB file into memory all at once).
27/// It reads bytes as needed from a provided source and can be read from to collect partial results.
28///
29/// If you want to just read the whole frame with an `io::Read` without having to deal with manually calling [FrameDecoder::decode_blocks]
30/// you can use the provided [crate::decoding::StreamingDecoder] wich wraps this FrameDecoder.
31///
32/// Workflow is as follows:
33/// ```
34/// use structured_zstd::decoding::BlockDecodingStrategy;
35///
36/// # #[cfg(feature = "std")]
37/// use std::io::{Read, Write};
38///
39/// // no_std environments can use the crate's own Read traits
40/// # #[cfg(not(feature = "std"))]
41/// use structured_zstd::io::{Read, Write};
42///
43/// fn decode_this(mut file: impl Read) {
44///     //Create a new decoder
45///     let mut frame_dec = structured_zstd::decoding::FrameDecoder::new();
46///     let mut result = Vec::new();
47///
48///     // Use reset or init to make the decoder ready to decode the frame from the io::Read
49///     frame_dec.reset(&mut file).unwrap();
50///
51///     // Loop until the frame has been decoded completely
52///     while !frame_dec.is_finished() {
53///         // decode (roughly) batch_size many bytes
54///         frame_dec.decode_blocks(&mut file, BlockDecodingStrategy::UptoBytes(1024)).unwrap();
55///
56///         // read from the decoder to collect bytes from the internal buffer
57///         let bytes_read = frame_dec.read(result.as_mut_slice()).unwrap();
58///
59///         // then do something with it
60///         do_something(&result[0..bytes_read]);
61///     }
62///
63///     // handle the last chunk of data
64///     while frame_dec.can_collect() > 0 {
65///         let x = frame_dec.read(result.as_mut_slice()).unwrap();
66///
67///         do_something(&result[0..x]);
68///     }
69/// }
70///
71/// fn do_something(data: &[u8]) {
72/// # #[cfg(feature = "std")]
73///     std::io::stdout().write_all(data).unwrap();
74/// }
75/// ```
76pub struct FrameDecoder {
77    state: Option<FrameDecoderState>,
78    owned_dicts: BTreeMap<u32, Dictionary>,
79    #[cfg(target_has_atomic = "ptr")]
80    shared_dicts: BTreeMap<u32, DictionaryHandle>,
81    #[cfg(not(target_has_atomic = "ptr"))]
82    shared_dicts: (),
83    /// `ZSTD_f_zstd1_magicless` — when true, [`init`] / [`reset`]
84    /// expect frames without the 4-byte magic number prefix.
85    /// Default false (standard zstd format).
86    magicless: bool,
87}
88
89/// Backend-tagged decode scratch — chosen at frame-reset time based
90/// on the parsed `FrameHeader.descriptor.single_segment_flag()` and
91/// kept stable through the lifetime of the frame. The match in each
92/// helper below dispatches **once per call** (e.g. once per block in
93/// `decode_block_content`, once per drain in `drain_to_writer`) —
94/// never inside the hot push/repeat loop, which is fully
95/// monomorphised through the `DecoderScratch<B>` generic.
96enum DecoderScratchKind {
97    Ring(DecoderScratch<RingBuffer>),
98    Flat(DecoderScratch<FlatBuf>),
99}
100
101impl DecoderScratchKind {
102    fn new_ring(window_size: usize) -> Self {
103        let mut s = DecoderScratch::<RingBuffer>::new(window_size);
104        s.buffer.reserve(window_size);
105        Self::Ring(s)
106    }
107
108    /// Construct a flat-backed scratch sized for a single-segment
109    /// frame. `frame_content_size` is the upcoming output size in
110    /// bytes (== `window_size` when the flag is set).
111    fn new_flat(frame_content_size: usize) -> Self {
112        let flat = FlatBuf::with_capacity(frame_content_size);
113        // DecoderScratch's default ctor would discard the pre-sized
114        // FlatBuf — go through from_backend so the buffer carries the
115        // capacity the constructor wants.
116        let mut s = DecoderScratch::<FlatBuf>::new(frame_content_size);
117        s.buffer = DecodeBuffer::from_backend(flat, frame_content_size);
118        Self::Flat(s)
119    }
120
121    /// Reset (or transition between) backends for a new frame.
122    /// Reuses the existing `DecoderScratch` allocations (FSE / HUF
123    /// tables, sequence vec, etc.) when the backend kind is unchanged
124    /// — only the underlying buffer is re-sized for the new frame.
125    /// Building a fresh `DecoderScratch` on every frame would
126    /// re-allocate everything and was measured at +255 % vs ring on
127    /// small frames; reusing it keeps the small-frame cost flat.
128    fn reset(&mut self, frame: &frame::FrameHeader, window_size: usize) {
129        if frame.descriptor.single_segment_flag() {
130            match self {
131                Self::Flat(s) => {
132                    s.reset(window_size);
133                    // DecodeBuffer::reset clears + reserves
134                    // window_size; FlatBuf's reserve grows the
135                    // backing Vec if the new FCS is larger than
136                    // what's already allocated. No alloc when the
137                    // previous flat frame had >= this capacity.
138                }
139                Self::Ring(_) => *self = Self::new_flat(window_size),
140            }
141        } else {
142            match self {
143                Self::Ring(s) => s.reset(window_size),
144                Self::Flat(_) => *self = Self::new_ring(window_size),
145            }
146        }
147    }
148
149    fn init_from_dict(&mut self, dict: &Dictionary) {
150        match self {
151            Self::Ring(s) => s.init_from_dict(dict),
152            Self::Flat(s) => s.init_from_dict(dict),
153        }
154    }
155
156    #[inline]
157    fn buffer_len(&self) -> usize {
158        match self {
159            Self::Ring(s) => s.buffer.len(),
160            Self::Flat(s) => s.buffer.len(),
161        }
162    }
163
164    fn buffer_drain(&mut self) -> Vec<u8> {
165        match self {
166            Self::Ring(s) => s.buffer.drain(),
167            Self::Flat(s) => s.buffer.drain(),
168        }
169    }
170
171    fn buffer_drain_to_window_size(&mut self) -> Option<Vec<u8>> {
172        match self {
173            Self::Ring(s) => s.buffer.drain_to_window_size(),
174            Self::Flat(s) => s.buffer.drain_to_window_size(),
175        }
176    }
177
178    fn buffer_drain_to_writer(&mut self, sink: impl Write) -> Result<usize, Error> {
179        match self {
180            Self::Ring(s) => s.buffer.drain_to_writer(sink),
181            Self::Flat(s) => s.buffer.drain_to_writer(sink),
182        }
183    }
184
185    fn buffer_drain_to_window_size_writer(&mut self, sink: impl Write) -> Result<usize, Error> {
186        match self {
187            Self::Ring(s) => s.buffer.drain_to_window_size_writer(sink),
188            Self::Flat(s) => s.buffer.drain_to_window_size_writer(sink),
189        }
190    }
191
192    fn buffer_can_drain(&self) -> usize {
193        match self {
194            Self::Ring(s) => s.buffer.can_drain(),
195            Self::Flat(s) => s.buffer.can_drain(),
196        }
197    }
198
199    fn buffer_can_drain_to_window_size(&self) -> Option<usize> {
200        match self {
201            Self::Ring(s) => s.buffer.can_drain_to_window_size(),
202            Self::Flat(s) => s.buffer.can_drain_to_window_size(),
203        }
204    }
205
206    fn buffer_read(&mut self, target: &mut [u8]) -> Result<usize, Error> {
207        match self {
208            Self::Ring(s) => s.buffer.read(target),
209            Self::Flat(s) => s.buffer.read(target),
210        }
211    }
212
213    fn buffer_read_all(&mut self, target: &mut [u8]) -> Result<usize, Error> {
214        match self {
215            Self::Ring(s) => s.buffer.read_all(target),
216            Self::Flat(s) => s.buffer.read_all(target),
217        }
218    }
219
220    fn decode_block_content<R: Read>(
221        &mut self,
222        decoder: &mut BlockDecoder,
223        header: &crate::blocks::block::BlockHeader,
224        source: R,
225    ) -> Result<u64, DecodeBlockContentError> {
226        match self {
227            Self::Ring(s) => decoder.decode_block_content(header, s, source),
228            Self::Flat(s) => decoder.decode_block_content(header, s, source),
229        }
230    }
231
232    #[cfg(feature = "hash")]
233    fn hash_finish(&self) -> u64 {
234        use core::hash::Hasher;
235        match self {
236            Self::Ring(s) => s.buffer.hash.finish(),
237            Self::Flat(s) => s.buffer.hash.finish(),
238        }
239    }
240}
241
242struct FrameDecoderState {
243    pub frame_header: frame::FrameHeader,
244    decoder_scratch: DecoderScratchKind,
245    frame_finished: bool,
246    block_counter: usize,
247    bytes_read_counter: u64,
248    check_sum: Option<u32>,
249    using_dict: Option<u32>,
250}
251
252pub enum BlockDecodingStrategy {
253    All,
254    UptoBlocks(usize),
255    UptoBytes(usize),
256}
257
258impl FrameDecoderState {
259    /// Construct a new frame decoder state, reading the frame header
260    /// from `source`. When `magicless` is `true`, the 4-byte magic
261    /// number prefix is NOT consumed (donor `ZSTD_f_zstd1_magicless`).
262    /// Crate-internal — reached only via `FrameDecoder::init` /
263    /// `FrameDecoder::init_with_dict_handle`. Pre-allocates the
264    /// decode buffer to `window_size` so the first block does not
265    /// trigger incremental growth from zero capacity.
266    pub(crate) fn new_with_format(
267        source: impl Read,
268        magicless: bool,
269    ) -> Result<FrameDecoderState, FrameDecoderError> {
270        let (frame, header_size) = frame::read_frame_header_with_format(source, magicless)?;
271        let window_size = frame.window_size()?;
272
273        if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
274            return Err(FrameDecoderError::WindowSizeTooBig {
275                requested: window_size,
276            });
277        }
278
279        let decoder_scratch = if frame.descriptor.single_segment_flag() {
280            DecoderScratchKind::new_flat(window_size as usize)
281        } else {
282            DecoderScratchKind::new_ring(window_size as usize)
283        };
284        Ok(FrameDecoderState {
285            frame_header: frame,
286            frame_finished: false,
287            block_counter: 0,
288            decoder_scratch,
289            bytes_read_counter: u64::from(header_size),
290            check_sum: None,
291            using_dict: None,
292        })
293    }
294
295    /// Reset this state for a new frame read from `source`, reusing
296    /// existing allocations. When `magicless` is `true`, the frame
297    /// header is read WITHOUT expecting a magic-number prefix
298    /// (donor `ZSTD_f_zstd1_magicless`). Crate-internal — reached
299    /// only via `FrameDecoder::reset`.
300    ///
301    /// `DecodeBuffer::reset` reserves `window_size` internally, so
302    /// no additional frame-level reservation is needed here.
303    /// Further buffer growth during decoding is performed on demand
304    /// by the active block path.
305    pub(crate) fn reset_with_format(
306        &mut self,
307        source: impl Read,
308        magicless: bool,
309    ) -> Result<(), FrameDecoderError> {
310        let (frame_header, header_size) = frame::read_frame_header_with_format(source, magicless)?;
311        let window_size = frame_header.window_size()?;
312
313        if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
314            return Err(FrameDecoderError::WindowSizeTooBig {
315                requested: window_size,
316            });
317        }
318
319        self.decoder_scratch
320            .reset(&frame_header, window_size as usize);
321        self.frame_header = frame_header;
322        self.frame_finished = false;
323        self.block_counter = 0;
324        self.bytes_read_counter = u64::from(header_size);
325        self.check_sum = None;
326        self.using_dict = None;
327        Ok(())
328    }
329}
330
331impl Default for FrameDecoder {
332    fn default() -> Self {
333        Self::new()
334    }
335}
336
337impl FrameDecoder {
338    /// This will create a new decoder without allocating anything yet.
339    /// init()/reset() will allocate all needed buffers if it is the first time this decoder is used
340    /// else they just reset these buffers with not further allocations
341    pub fn new() -> FrameDecoder {
342        FrameDecoder {
343            state: None,
344            owned_dicts: BTreeMap::new(),
345            #[cfg(target_has_atomic = "ptr")]
346            shared_dicts: BTreeMap::new(),
347            #[cfg(not(target_has_atomic = "ptr"))]
348            shared_dicts: (),
349            magicless: false,
350        }
351    }
352
353    /// Enable or disable magicless frame format
354    /// (`ZSTD_f_zstd1_magicless`). When set to `true`, subsequent
355    /// [`init`] / [`reset`] calls expect the frame header to begin
356    /// directly with the frame-header descriptor — no 4-byte magic
357    /// number prefix. Default false. Must match the encoder's
358    /// magicless setting; the format is unambiguous only when the
359    /// caller knows it out-of-band.
360    ///
361    /// Note: magicless mode also disables skippable-frame detection.
362    /// The `0x184D2A50..=0x184D2A5F` skippable-frame magic range is
363    /// only recognised when the 4-byte magic prefix is consumed, so
364    /// `decode_all` / `init` / `reset` will treat a skippable frame
365    /// at the head of a magicless stream as a malformed frame header
366    /// (bad descriptor / window-size error) instead of skipping it.
367    /// Mixed-format streams that interleave skippable frames must be
368    /// pre-split by the caller; `set_magicless(true)` is only safe
369    /// when the entire stream is known to be magicless zstd frames.
370    pub fn set_magicless(&mut self, magicless: bool) {
371        self.magicless = magicless;
372    }
373
374    #[cfg(target_has_atomic = "ptr")]
375    fn shared_dict_exists(&self, dict_id: u32) -> bool {
376        self.shared_dicts.contains_key(&dict_id)
377    }
378
379    #[cfg(not(target_has_atomic = "ptr"))]
380    fn shared_dict_exists(&self, _dict_id: u32) -> bool {
381        false
382    }
383
384    fn validate_registered_dictionary(dict: &Dictionary) -> Result<(), FrameDecoderError> {
385        use crate::decoding::errors::DictionaryDecodeError as dict_err;
386
387        if dict.id == 0 {
388            return Err(FrameDecoderError::from(dict_err::ZeroDictionaryId));
389        }
390        if let Some(index) = dict.offset_hist.iter().position(|&rep| rep == 0) {
391            return Err(FrameDecoderError::from(
392                dict_err::ZeroRepeatOffsetInDictionary { index: index as u8 },
393            ));
394        }
395        Ok(())
396    }
397
398    /// init() will allocate all needed buffers if it is the first time this decoder is used
399    /// else they just reset these buffers with not further allocations
400    ///
401    /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
402    ///
403    /// equivalent to reset()
404    pub fn init(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
405        self.reset(source)
406    }
407
408    /// Initialize the decoder for a new frame using a pre-parsed dictionary handle.
409    ///
410    /// If the frame header has a dictionary ID, this validates it against
411    /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
412    ///
413    /// If the header omits the optional dictionary ID, this still applies the
414    /// provided dictionary handle.
415    ///
416    /// # Warning
417    ///
418    /// This method always applies `dict` unless the frame header contains a
419    /// non-matching dictionary ID. Callers must only use this API when they
420    /// already know the frame was encoded with the provided dictionary, even if
421    /// the frame header omits the dictionary ID or encodes an explicit
422    /// dictionary ID of `0`.
423    ///
424    /// Passing a dictionary for a frame that was not encoded with it can
425    /// silently corrupt the decoded output.
426    pub fn init_with_dict_handle(
427        &mut self,
428        source: impl Read,
429        dict: &DictionaryHandle,
430    ) -> Result<(), FrameDecoderError> {
431        self.reset_with_dict_handle(source, dict)
432    }
433
434    /// reset() will allocate all needed buffers if it is the first time this decoder is used
435    /// else they just reset these buffers with not further allocations
436    ///
437    /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
438    ///
439    /// equivalent to init()
440    pub fn reset(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
441        use FrameDecoderError as err;
442        let magicless = self.magicless;
443        let dict_id = match &mut self.state {
444            Some(s) => {
445                s.reset_with_format(source, magicless)?;
446                s.frame_header.dictionary_id()
447            }
448            None => {
449                self.state = Some(FrameDecoderState::new_with_format(source, magicless)?);
450                self.state
451                    .as_ref()
452                    .and_then(|state| state.frame_header.dictionary_id())
453            }
454        };
455        if let Some(dict_id) = dict_id {
456            let state = self.state.as_mut().expect("state initialized");
457            let owned_dicts = &self.owned_dicts;
458            #[cfg(target_has_atomic = "ptr")]
459            let shared_dicts = &self.shared_dicts;
460            let dict = owned_dicts
461                .get(&dict_id)
462                .or_else(|| {
463                    #[cfg(target_has_atomic = "ptr")]
464                    {
465                        shared_dicts.get(&dict_id).map(DictionaryHandle::as_dict)
466                    }
467                    #[cfg(not(target_has_atomic = "ptr"))]
468                    {
469                        None
470                    }
471                })
472                .ok_or(err::DictNotProvided { dict_id })?;
473            state.decoder_scratch.init_from_dict(dict);
474            state.using_dict = Some(dict_id);
475        }
476        Ok(())
477    }
478
479    /// Reset this decoder for a new frame using a pre-parsed dictionary handle.
480    ///
481    /// If the frame header has a dictionary ID, this validates it against
482    /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
483    ///
484    /// If the header omits the optional dictionary ID, this still applies the
485    /// provided dictionary handle.
486    ///
487    /// # Warning
488    ///
489    /// This method always applies `dict` unless the frame header contains a
490    /// non-matching dictionary ID. Callers must only use this API when they
491    /// already know the frame was encoded with the provided dictionary, even if
492    /// the frame header omits the dictionary ID or encodes an explicit
493    /// dictionary ID of `0`.
494    ///
495    /// Passing a dictionary for a frame that was not encoded with it can
496    /// silently corrupt the decoded output.
497    pub fn reset_with_dict_handle(
498        &mut self,
499        source: impl Read,
500        dict: &DictionaryHandle,
501    ) -> Result<(), FrameDecoderError> {
502        use FrameDecoderError as err;
503        Self::validate_registered_dictionary(dict.as_dict())?;
504        let magicless = self.magicless;
505        let state = match &mut self.state {
506            Some(s) => {
507                s.reset_with_format(source, magicless)?;
508                s
509            }
510            None => {
511                self.state = Some(FrameDecoderState::new_with_format(source, magicless)?);
512                self.state.as_mut().unwrap()
513            }
514        };
515        if let Some(dict_id) = state.frame_header.dictionary_id()
516            && dict_id != dict.id()
517        {
518            return Err(err::DictIdMismatch {
519                expected: dict_id,
520                provided: dict.id(),
521            });
522        }
523        state.decoder_scratch.init_from_dict(dict.as_dict());
524        state.using_dict = Some(dict.id());
525        Ok(())
526    }
527
528    /// Add a dictionary that can be selected dynamically by frame dictionary ID.
529    ///
530    /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
531    /// registered (either as owned or shared).
532    pub fn add_dict(&mut self, dict: Dictionary) -> Result<(), FrameDecoderError> {
533        Self::validate_registered_dictionary(&dict)?;
534        let dict_id = dict.id;
535        if self.owned_dicts.contains_key(&dict_id) || self.shared_dict_exists(dict_id) {
536            return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
537        }
538        self.owned_dicts.insert(dict_id, dict);
539        Ok(())
540    }
541
542    /// Parse and add a serialized dictionary blob.
543    pub fn add_dict_from_bytes(&mut self, raw_dictionary: &[u8]) -> Result<(), FrameDecoderError> {
544        let dict = Dictionary::decode_dict(raw_dictionary)?;
545        self.add_dict(dict)
546    }
547
548    /// Add a pre-parsed dictionary handle for reuse across decoders.
549    ///
550    /// This API is available on targets with pointer-width atomics
551    /// (`target_has_atomic = "ptr"`).
552    ///
553    /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
554    /// registered (either as owned or shared).
555    #[cfg(target_has_atomic = "ptr")]
556    pub fn add_dict_handle(&mut self, dict: DictionaryHandle) -> Result<(), FrameDecoderError> {
557        Self::validate_registered_dictionary(dict.as_dict())?;
558        let dict_id = dict.id();
559        if self.owned_dicts.contains_key(&dict_id) || self.shared_dicts.contains_key(&dict_id) {
560            return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
561        }
562        self.shared_dicts.insert(dict_id, dict);
563        Ok(())
564    }
565
566    pub fn force_dict(&mut self, dict_id: u32) -> Result<(), FrameDecoderError> {
567        use FrameDecoderError as err;
568        let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
569        let owned_dicts = &self.owned_dicts;
570        #[cfg(target_has_atomic = "ptr")]
571        let shared_dicts = &self.shared_dicts;
572
573        let dict = owned_dicts
574            .get(&dict_id)
575            .or_else(|| {
576                #[cfg(target_has_atomic = "ptr")]
577                {
578                    shared_dicts.get(&dict_id).map(DictionaryHandle::as_dict)
579                }
580                #[cfg(not(target_has_atomic = "ptr"))]
581                {
582                    None
583                }
584            })
585            .ok_or(err::DictNotProvided { dict_id })?;
586        state.decoder_scratch.init_from_dict(dict);
587        state.using_dict = Some(dict_id);
588
589        Ok(())
590    }
591
592    /// Returns how many bytes the frame contains after decompression
593    pub fn content_size(&self) -> u64 {
594        match &self.state {
595            None => 0,
596            Some(s) => s.frame_header.frame_content_size(),
597        }
598    }
599
600    /// Returns the checksum that was read from the data. Only available after all bytes have been read. It is the last 4 bytes of a zstd-frame
601    pub fn get_checksum_from_data(&self) -> Option<u32> {
602        let state = self.state.as_ref()?;
603
604        state.check_sum
605    }
606
607    /// Returns the checksum that was calculated while decoding.
608    /// Only a sensible value after all decoded bytes have been collected/read from the FrameDecoder
609    #[cfg(feature = "hash")]
610    pub fn get_calculated_checksum(&self) -> Option<u32> {
611        let state = self.state.as_ref()?;
612        let cksum_64bit = state.decoder_scratch.hash_finish();
613        //truncate to lower 32bit because reasons...
614        Some(cksum_64bit as u32)
615    }
616
617    /// Counter for how many bytes have been consumed while decoding the frame
618    pub fn bytes_read_from_source(&self) -> u64 {
619        let state = match &self.state {
620            None => return 0,
621            Some(s) => s,
622        };
623        state.bytes_read_counter
624    }
625
626    /// Whether the current frames last block has been decoded yet
627    /// If this returns true you can call the drain* functions to get all content
628    /// (the read() function will drain automatically if this returns true)
629    pub fn is_finished(&self) -> bool {
630        let state = match &self.state {
631            None => return true,
632            Some(s) => s,
633        };
634        if state.frame_header.descriptor.content_checksum_flag() {
635            state.frame_finished && state.check_sum.is_some()
636        } else {
637            state.frame_finished
638        }
639    }
640
641    /// Counter for how many blocks have already been decoded
642    pub fn blocks_decoded(&self) -> usize {
643        let state = match &self.state {
644            None => return 0,
645            Some(s) => s,
646        };
647        state.block_counter
648    }
649
650    /// Decodes blocks from a reader. It requires that the framedecoder has been initialized first.
651    /// The Strategy influences how many blocks will be decoded before the function returns
652    /// This is important if you want to manage memory consumption carefully. If you don't care
653    /// about that you can just choose the strategy "All" and have all blocks of the frame decoded into the buffer
654    pub fn decode_blocks(
655        &mut self,
656        mut source: impl Read,
657        strat: BlockDecodingStrategy,
658    ) -> Result<bool, FrameDecoderError> {
659        use FrameDecoderError as err;
660        let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
661
662        let mut block_dec = decoding::block_decoder::new();
663
664        let buffer_size_before = state.decoder_scratch.buffer_len();
665        let block_counter_before = state.block_counter;
666        loop {
667            vprintln!("################");
668            vprintln!("Next Block: {}", state.block_counter);
669            vprintln!("################");
670            let (block_header, block_header_size) = block_dec
671                .read_block_header(&mut source)
672                .map_err(err::FailedToReadBlockHeader)?;
673            state.bytes_read_counter += u64::from(block_header_size);
674
675            vprintln!();
676            vprintln!(
677                "Found {} block with size: {}, which will be of size: {}",
678                block_header.block_type,
679                block_header.content_size,
680                block_header.decompressed_size
681            );
682
683            let bytes_read_in_block_body = state
684                .decoder_scratch
685                .decode_block_content(&mut block_dec, &block_header, &mut source)
686                .map_err(err::FailedToReadBlockBody)?;
687            state.bytes_read_counter += bytes_read_in_block_body;
688
689            state.block_counter += 1;
690
691            vprintln!("Output: {}", state.decoder_scratch.buffer_len());
692
693            if block_header.last_block {
694                state.frame_finished = true;
695                if state.frame_header.descriptor.content_checksum_flag() {
696                    let mut chksum = [0u8; 4];
697                    source
698                        .read_exact(&mut chksum)
699                        .map_err(err::FailedToReadChecksum)?;
700                    state.bytes_read_counter += 4;
701                    let chksum = u32::from_le_bytes(chksum);
702                    state.check_sum = Some(chksum);
703                }
704                break;
705            }
706
707            match strat {
708                BlockDecodingStrategy::All => { /* keep going */ }
709                BlockDecodingStrategy::UptoBlocks(n) => {
710                    if state.block_counter - block_counter_before >= n {
711                        break;
712                    }
713                }
714                BlockDecodingStrategy::UptoBytes(n) => {
715                    if state.decoder_scratch.buffer_len() - buffer_size_before >= n {
716                        break;
717                    }
718                }
719            }
720        }
721
722        Ok(state.frame_finished)
723    }
724
725    /// Collect bytes and retain window_size bytes while decoding is still going on.
726    /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
727    pub fn collect(&mut self) -> Option<Vec<u8>> {
728        let finished = self.is_finished();
729        let state = self.state.as_mut()?;
730        if finished {
731            Some(state.decoder_scratch.buffer_drain())
732        } else {
733            state.decoder_scratch.buffer_drain_to_window_size()
734        }
735    }
736
737    /// Collect bytes and retain window_size bytes while decoding is still going on.
738    /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
739    pub fn collect_to_writer(&mut self, w: impl Write) -> Result<usize, Error> {
740        let finished = self.is_finished();
741        let state = match &mut self.state {
742            None => return Ok(0),
743            Some(s) => s,
744        };
745        if finished {
746            state.decoder_scratch.buffer_drain_to_writer(w)
747        } else {
748            state.decoder_scratch.buffer_drain_to_window_size_writer(w)
749        }
750    }
751
752    /// How many bytes can currently be collected from the decodebuffer, while decoding is going on this will be lower than the actual decodbuffer size
753    /// because window_size bytes need to be retained for decoding.
754    /// After decoding of the frame (is_finished() == true) has finished it will report all remaining bytes
755    pub fn can_collect(&self) -> usize {
756        let finished = self.is_finished();
757        let state = match &self.state {
758            None => return 0,
759            Some(s) => s,
760        };
761        if finished {
762            state.decoder_scratch.buffer_can_drain()
763        } else {
764            state
765                .decoder_scratch
766                .buffer_can_drain_to_window_size()
767                .unwrap_or(0)
768        }
769    }
770
771    /// Decodes as many blocks as possible from the source slice and reads from the decodebuffer into the target slice
772    /// The source slice may contain only parts of a frame but must contain at least one full block to make progress
773    ///
774    /// By all means use decode_blocks if you have a io.Reader available. This is just for compatibility with other decompressors
775    /// which try to serve an old-style c api
776    ///
777    /// Returns (read, written), if read == 0 then the source did not contain a full block and further calls with the same
778    /// input will not make any progress!
779    ///
780    /// Note that no kind of block can be bigger than 128kb.
781    /// So to be safe use at least 128*1024 (max block content size) + 3 (block_header size) + 18 (max frame_header size) bytes as your source buffer
782    ///
783    /// You may call this function with an empty source after all bytes have been decoded. This is equivalent to just call decoder.read(&mut target)
784    pub fn decode_from_to(
785        &mut self,
786        source: &[u8],
787        target: &mut [u8],
788    ) -> Result<(usize, usize), FrameDecoderError> {
789        use FrameDecoderError as err;
790        let bytes_read_at_start = match &self.state {
791            Some(s) => s.bytes_read_counter,
792            None => 0,
793        };
794
795        if !self.is_finished() || self.state.is_none() {
796            let mut mt_source = source;
797
798            if self.state.is_none() {
799                self.init(&mut mt_source)?;
800            }
801
802            //pseudo block to scope "state" so we can borrow self again after the block
803            {
804                let state = match &mut self.state {
805                    Some(s) => s,
806                    None => panic!("Bug in library"),
807                };
808                let mut block_dec = decoding::block_decoder::new();
809
810                if state.frame_header.descriptor.content_checksum_flag()
811                    && state.frame_finished
812                    && state.check_sum.is_none()
813                {
814                    //this block is needed if the checksum were the only 4 bytes that were not included in the last decode_from_to call for a frame
815                    if mt_source.len() >= 4 {
816                        let chksum = mt_source[..4].try_into().expect("optimized away");
817                        state.bytes_read_counter += 4;
818                        let chksum = u32::from_le_bytes(chksum);
819                        state.check_sum = Some(chksum);
820                    }
821                    return Ok((4, 0));
822                }
823
824                loop {
825                    //check if there are enough bytes for the next header
826                    if mt_source.len() < 3 {
827                        break;
828                    }
829                    let (block_header, block_header_size) = block_dec
830                        .read_block_header(&mut mt_source)
831                        .map_err(err::FailedToReadBlockHeader)?;
832
833                    // check the needed size for the block before updating counters.
834                    // If not enough bytes are in the source, the header will have to be read again, so act like we never read it in the first place
835                    if mt_source.len() < block_header.content_size as usize {
836                        break;
837                    }
838                    state.bytes_read_counter += u64::from(block_header_size);
839
840                    let bytes_read_in_block_body = state
841                        .decoder_scratch
842                        .decode_block_content(&mut block_dec, &block_header, &mut mt_source)
843                        .map_err(err::FailedToReadBlockBody)?;
844                    state.bytes_read_counter += bytes_read_in_block_body;
845                    state.block_counter += 1;
846
847                    if block_header.last_block {
848                        state.frame_finished = true;
849                        if state.frame_header.descriptor.content_checksum_flag() {
850                            //if there are enough bytes handle this here. Else the block at the start of this function will handle it at the next call
851                            if mt_source.len() >= 4 {
852                                let chksum = mt_source[..4].try_into().expect("optimized away");
853                                state.bytes_read_counter += 4;
854                                let chksum = u32::from_le_bytes(chksum);
855                                state.check_sum = Some(chksum);
856                            }
857                        }
858                        break;
859                    }
860                }
861            }
862        }
863
864        let result_len = self.read(target).map_err(err::FailedToDrainDecodebuffer)?;
865        let bytes_read_at_end = match &mut self.state {
866            Some(s) => s.bytes_read_counter,
867            None => panic!("Bug in library"),
868        };
869        let read_len = bytes_read_at_end - bytes_read_at_start;
870        Ok((read_len as usize, result_len))
871    }
872
873    /// Decode multiple frames into the output slice.
874    ///
875    /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
876    /// skipped during decode.
877    ///
878    /// `output` must be large enough to hold the decompressed data. If you don't know
879    /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
880    ///
881    /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
882    ///
883    /// Returns the number of bytes written to `output`.
884    pub fn decode_all(
885        &mut self,
886        input: &[u8],
887        output: &mut [u8],
888    ) -> Result<usize, FrameDecoderError> {
889        self.decode_all_impl(input, output, |this, src| this.init(src))
890    }
891
892    /// Decode multiple frames into the output slice using a pre-parsed dictionary handle.
893    ///
894    /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
895    /// skipped during decode.
896    ///
897    /// `output` must be large enough to hold the decompressed data. If you don't know
898    /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
899    ///
900    /// This calls [`FrameDecoder::init_with_dict_handle`], and all bytes currently in the
901    /// decoder will be lost.
902    ///
903    /// # Warning
904    ///
905    /// Each decoded frame is initialized with `dict`, even when a frame header
906    /// omits the optional dictionary ID. Callers must only use this API when
907    /// they already know the input frames were encoded with the provided
908    /// dictionary; otherwise decoded output can be silently corrupted.
909    pub fn decode_all_with_dict_handle(
910        &mut self,
911        input: &[u8],
912        output: &mut [u8],
913        dict: &DictionaryHandle,
914    ) -> Result<usize, FrameDecoderError> {
915        self.decode_all_impl(input, output, |this, src| {
916            this.init_with_dict_handle(src, dict)
917        })
918    }
919
920    fn decode_all_impl(
921        &mut self,
922        mut input: &[u8],
923        mut output: &mut [u8],
924        mut init_frame: impl FnMut(&mut Self, &mut &[u8]) -> Result<(), FrameDecoderError>,
925    ) -> Result<usize, FrameDecoderError> {
926        let mut total_bytes_written = 0;
927        while !input.is_empty() {
928            match init_frame(self, &mut input) {
929                Ok(_) => {}
930                Err(FrameDecoderError::ReadFrameHeaderError(
931                    crate::decoding::errors::ReadFrameHeaderError::SkipFrame { length, .. },
932                )) => {
933                    input = input
934                        .get(length as usize..)
935                        .ok_or(FrameDecoderError::FailedToSkipFrame)?;
936                    continue;
937                }
938                Err(e) => return Err(e),
939            };
940            loop {
941                self.decode_blocks(&mut input, BlockDecodingStrategy::UptoBytes(1024 * 1024))?;
942                let bytes_written = self
943                    .read(output)
944                    .map_err(FrameDecoderError::FailedToDrainDecodebuffer)?;
945                output = &mut output[bytes_written..];
946                total_bytes_written += bytes_written;
947                if self.can_collect() != 0 {
948                    return Err(FrameDecoderError::TargetTooSmall);
949                }
950                if self.is_finished() {
951                    break;
952                }
953            }
954        }
955
956        Ok(total_bytes_written)
957    }
958
959    /// Decode multiple frames into the output slice using a serialized dictionary.
960    ///
961    /// # Warning
962    ///
963    /// Each decoded frame is initialized with the parsed dictionary, even when a
964    /// frame header omits the optional dictionary ID. Callers must only use this
965    /// API when they already know the input frames were encoded with that
966    /// dictionary; otherwise decoded output can be silently corrupted.
967    pub fn decode_all_with_dict_bytes(
968        &mut self,
969        input: &[u8],
970        output: &mut [u8],
971        raw_dictionary: &[u8],
972    ) -> Result<usize, FrameDecoderError> {
973        let dict = DictionaryHandle::decode_dict(raw_dictionary)?;
974        self.decode_all_with_dict_handle(input, output, &dict)
975    }
976
977    /// Decode multiple frames into the extra capacity of the output vector.
978    ///
979    /// `input` must contain an exact number of frames.
980    ///
981    /// `output` must have enough extra capacity to hold the decompressed data.
982    /// This function will not reallocate or grow the vector. If you don't know
983    /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
984    ///
985    /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
986    ///
987    /// The length of the output vector is updated to include the decompressed data.
988    /// The length is not changed if an error occurs.
989    pub fn decode_all_to_vec(
990        &mut self,
991        input: &[u8],
992        output: &mut Vec<u8>,
993    ) -> Result<(), FrameDecoderError> {
994        let len = output.len();
995        let cap = output.capacity();
996        output.resize(cap, 0);
997        match self.decode_all(input, &mut output[len..]) {
998            Ok(bytes_written) => {
999                let new_len = core::cmp::min(len + bytes_written, cap); // Sanitizes `bytes_written`.
1000                output.resize(new_len, 0);
1001                Ok(())
1002            }
1003            Err(e) => {
1004                output.resize(len, 0);
1005                Err(e)
1006            }
1007        }
1008    }
1009}
1010
1011/// Read bytes from the decode_buffer that are no longer needed. While the frame is not yet finished
1012/// this will retain window_size bytes, else it will drain it completely
1013impl Read for FrameDecoder {
1014    fn read(&mut self, target: &mut [u8]) -> Result<usize, Error> {
1015        let state = match &mut self.state {
1016            None => return Ok(0),
1017            Some(s) => s,
1018        };
1019        if state.frame_finished {
1020            state.decoder_scratch.buffer_read_all(target)
1021        } else {
1022            state.decoder_scratch.buffer_read(target)
1023        }
1024    }
1025}
1026
1027#[cfg(test)]
1028mod tests {
1029    extern crate std;
1030
1031    use super::{DictionaryHandle, FrameDecoder};
1032    use crate::encoding::{CompressionLevel, FrameCompressor};
1033    use alloc::vec::Vec;
1034
1035    #[test]
1036    fn reset_with_dict_handle_applies_dict_when_no_dict_id() {
1037        let payload = b"reset-without-dict-id";
1038        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
1039        compressor.set_source(payload.as_slice());
1040        let mut compressed = Vec::new();
1041        compressor.set_drain(&mut compressed);
1042        compressor.compress();
1043
1044        let dict_raw = include_bytes!("../../dict_tests/dictionary");
1045        let handle = DictionaryHandle::decode_dict(dict_raw).expect("dictionary should parse");
1046
1047        let mut decoder = FrameDecoder::new();
1048        decoder
1049            .reset_with_dict_handle(compressed.as_slice(), &handle)
1050            .expect("reset should succeed");
1051        let state = decoder.state.as_ref().expect("state should be initialized");
1052        assert!(state.frame_header.dictionary_id().is_none());
1053        assert_eq!(state.using_dict, Some(handle.id()));
1054    }
1055}