Skip to main content

structured_zstd/decoding/
frame_decoder.rs

1//! Framedecoder is the main low-level struct users interact with to decode zstd frames
2//!
3//! Zstandard compressed data is made of one or more frames. Each frame is independent and can be
4//! decompressed independently of other frames. This module contains structures
5//! and utilities that can be used to decode a frame.
6
7use super::frame;
8use crate::decoding;
9use crate::decoding::dictionary::{Dictionary, DictionaryHandle};
10use crate::decoding::errors::FrameDecoderError;
11use crate::decoding::scratch::DecoderScratch;
12use crate::io::{Error, Read, Write};
13use alloc::collections::BTreeMap;
14use alloc::vec::Vec;
15use core::convert::TryInto;
16
17use crate::common::MAXIMUM_ALLOWED_WINDOW_SIZE;
18
19/// Low level Zstandard decoder that can be used to decompress frames with fine control over when and how many bytes are decoded.
20///
21/// This decoder is able to decode frames only partially and gives control
22/// over how many bytes/blocks will be decoded at a time (so you don't have to decode a 10GB file into memory all at once).
23/// It reads bytes as needed from a provided source and can be read from to collect partial results.
24///
25/// If you want to just read the whole frame with an `io::Read` without having to deal with manually calling [FrameDecoder::decode_blocks]
26/// you can use the provided [crate::decoding::StreamingDecoder] wich wraps this FrameDecoder.
27///
28/// Workflow is as follows:
29/// ```
30/// use structured_zstd::decoding::BlockDecodingStrategy;
31///
32/// # #[cfg(feature = "std")]
33/// use std::io::{Read, Write};
34///
35/// // no_std environments can use the crate's own Read traits
36/// # #[cfg(not(feature = "std"))]
37/// use structured_zstd::io::{Read, Write};
38///
39/// fn decode_this(mut file: impl Read) {
40///     //Create a new decoder
41///     let mut frame_dec = structured_zstd::decoding::FrameDecoder::new();
42///     let mut result = Vec::new();
43///
44///     // Use reset or init to make the decoder ready to decode the frame from the io::Read
45///     frame_dec.reset(&mut file).unwrap();
46///
47///     // Loop until the frame has been decoded completely
48///     while !frame_dec.is_finished() {
49///         // decode (roughly) batch_size many bytes
50///         frame_dec.decode_blocks(&mut file, BlockDecodingStrategy::UptoBytes(1024)).unwrap();
51///
52///         // read from the decoder to collect bytes from the internal buffer
53///         let bytes_read = frame_dec.read(result.as_mut_slice()).unwrap();
54///
55///         // then do something with it
56///         do_something(&result[0..bytes_read]);
57///     }
58///
59///     // handle the last chunk of data
60///     while frame_dec.can_collect() > 0 {
61///         let x = frame_dec.read(result.as_mut_slice()).unwrap();
62///
63///         do_something(&result[0..x]);
64///     }
65/// }
66///
67/// fn do_something(data: &[u8]) {
68/// # #[cfg(feature = "std")]
69///     std::io::stdout().write_all(data).unwrap();
70/// }
71/// ```
72pub struct FrameDecoder {
73    state: Option<FrameDecoderState>,
74    owned_dicts: BTreeMap<u32, Dictionary>,
75    #[cfg(target_has_atomic = "ptr")]
76    shared_dicts: BTreeMap<u32, DictionaryHandle>,
77    #[cfg(not(target_has_atomic = "ptr"))]
78    shared_dicts: (),
79}
80
81struct FrameDecoderState {
82    pub frame_header: frame::FrameHeader,
83    decoder_scratch: DecoderScratch,
84    frame_finished: bool,
85    block_counter: usize,
86    bytes_read_counter: u64,
87    check_sum: Option<u32>,
88    using_dict: Option<u32>,
89}
90
91pub enum BlockDecodingStrategy {
92    All,
93    UptoBlocks(usize),
94    UptoBytes(usize),
95}
96
97impl FrameDecoderState {
98    /// Read the frame header from `source` and create a new decoder state.
99    ///
100    /// Pre-allocates the decode buffer to `window_size` so the first block
101    /// does not trigger incremental growth from zero capacity.
102    pub fn new(source: impl Read) -> Result<FrameDecoderState, FrameDecoderError> {
103        let (frame, header_size) = frame::read_frame_header(source)?;
104        let window_size = frame.window_size()?;
105
106        if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
107            return Err(FrameDecoderError::WindowSizeTooBig {
108                requested: window_size,
109            });
110        }
111
112        let mut decoder_scratch = DecoderScratch::new(window_size as usize);
113        decoder_scratch.buffer.reserve(window_size as usize);
114        Ok(FrameDecoderState {
115            frame_header: frame,
116            frame_finished: false,
117            block_counter: 0,
118            decoder_scratch,
119            bytes_read_counter: u64::from(header_size),
120            check_sum: None,
121            using_dict: None,
122        })
123    }
124
125    /// Reset this state for a new frame read from `source`, reusing existing allocations.
126    ///
127    /// `DecodeBuffer::reset` reserves `window_size` internally, so no
128    /// additional frame-level reservation is needed here. Further buffer
129    /// growth during decoding is performed on demand by the active block path.
130    pub fn reset(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
131        let (frame_header, header_size) = frame::read_frame_header(source)?;
132        let window_size = frame_header.window_size()?;
133
134        if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
135            return Err(FrameDecoderError::WindowSizeTooBig {
136                requested: window_size,
137            });
138        }
139
140        self.frame_header = frame_header;
141        self.frame_finished = false;
142        self.block_counter = 0;
143        self.decoder_scratch.reset(window_size as usize);
144        self.bytes_read_counter = u64::from(header_size);
145        self.check_sum = None;
146        self.using_dict = None;
147        Ok(())
148    }
149}
150
151impl Default for FrameDecoder {
152    fn default() -> Self {
153        Self::new()
154    }
155}
156
157impl FrameDecoder {
158    /// This will create a new decoder without allocating anything yet.
159    /// init()/reset() will allocate all needed buffers if it is the first time this decoder is used
160    /// else they just reset these buffers with not further allocations
161    pub fn new() -> FrameDecoder {
162        FrameDecoder {
163            state: None,
164            owned_dicts: BTreeMap::new(),
165            #[cfg(target_has_atomic = "ptr")]
166            shared_dicts: BTreeMap::new(),
167            #[cfg(not(target_has_atomic = "ptr"))]
168            shared_dicts: (),
169        }
170    }
171
172    #[cfg(target_has_atomic = "ptr")]
173    fn shared_dict_exists(&self, dict_id: u32) -> bool {
174        self.shared_dicts.contains_key(&dict_id)
175    }
176
177    #[cfg(not(target_has_atomic = "ptr"))]
178    fn shared_dict_exists(&self, _dict_id: u32) -> bool {
179        false
180    }
181
182    fn validate_registered_dictionary(dict: &Dictionary) -> Result<(), FrameDecoderError> {
183        use crate::decoding::errors::DictionaryDecodeError as dict_err;
184
185        if dict.id == 0 {
186            return Err(FrameDecoderError::from(dict_err::ZeroDictionaryId));
187        }
188        if let Some(index) = dict.offset_hist.iter().position(|&rep| rep == 0) {
189            return Err(FrameDecoderError::from(
190                dict_err::ZeroRepeatOffsetInDictionary { index: index as u8 },
191            ));
192        }
193        Ok(())
194    }
195
196    /// init() will allocate all needed buffers if it is the first time this decoder is used
197    /// else they just reset these buffers with not further allocations
198    ///
199    /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
200    ///
201    /// equivalent to reset()
202    pub fn init(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
203        self.reset(source)
204    }
205
206    /// Initialize the decoder for a new frame using a pre-parsed dictionary handle.
207    ///
208    /// If the frame header has a dictionary ID, this validates it against
209    /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
210    ///
211    /// If the header omits the optional dictionary ID, this still applies the
212    /// provided dictionary handle.
213    ///
214    /// # Warning
215    ///
216    /// This method always applies `dict` unless the frame header contains a
217    /// non-matching dictionary ID. Callers must only use this API when they
218    /// already know the frame was encoded with the provided dictionary, even if
219    /// the frame header omits the dictionary ID or encodes an explicit
220    /// dictionary ID of `0`.
221    ///
222    /// Passing a dictionary for a frame that was not encoded with it can
223    /// silently corrupt the decoded output.
224    pub fn init_with_dict_handle(
225        &mut self,
226        source: impl Read,
227        dict: &DictionaryHandle,
228    ) -> Result<(), FrameDecoderError> {
229        self.reset_with_dict_handle(source, dict)
230    }
231
232    /// reset() will allocate all needed buffers if it is the first time this decoder is used
233    /// else they just reset these buffers with not further allocations
234    ///
235    /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
236    ///
237    /// equivalent to init()
238    pub fn reset(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
239        use FrameDecoderError as err;
240        let dict_id = match &mut self.state {
241            Some(s) => {
242                s.reset(source)?;
243                s.frame_header.dictionary_id()
244            }
245            None => {
246                self.state = Some(FrameDecoderState::new(source)?);
247                self.state
248                    .as_ref()
249                    .and_then(|state| state.frame_header.dictionary_id())
250            }
251        };
252        if let Some(dict_id) = dict_id {
253            let state = self.state.as_mut().expect("state initialized");
254            let owned_dicts = &self.owned_dicts;
255            #[cfg(target_has_atomic = "ptr")]
256            let shared_dicts = &self.shared_dicts;
257            let dict = owned_dicts
258                .get(&dict_id)
259                .or_else(|| {
260                    #[cfg(target_has_atomic = "ptr")]
261                    {
262                        shared_dicts.get(&dict_id).map(DictionaryHandle::as_dict)
263                    }
264                    #[cfg(not(target_has_atomic = "ptr"))]
265                    {
266                        None
267                    }
268                })
269                .ok_or(err::DictNotProvided { dict_id })?;
270            state.decoder_scratch.init_from_dict(dict);
271            state.using_dict = Some(dict_id);
272        }
273        Ok(())
274    }
275
276    /// Reset this decoder for a new frame using a pre-parsed dictionary handle.
277    ///
278    /// If the frame header has a dictionary ID, this validates it against
279    /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
280    ///
281    /// If the header omits the optional dictionary ID, this still applies the
282    /// provided dictionary handle.
283    ///
284    /// # Warning
285    ///
286    /// This method always applies `dict` unless the frame header contains a
287    /// non-matching dictionary ID. Callers must only use this API when they
288    /// already know the frame was encoded with the provided dictionary, even if
289    /// the frame header omits the dictionary ID or encodes an explicit
290    /// dictionary ID of `0`.
291    ///
292    /// Passing a dictionary for a frame that was not encoded with it can
293    /// silently corrupt the decoded output.
294    pub fn reset_with_dict_handle(
295        &mut self,
296        source: impl Read,
297        dict: &DictionaryHandle,
298    ) -> Result<(), FrameDecoderError> {
299        use FrameDecoderError as err;
300        Self::validate_registered_dictionary(dict.as_dict())?;
301        let state = match &mut self.state {
302            Some(s) => {
303                s.reset(source)?;
304                s
305            }
306            None => {
307                self.state = Some(FrameDecoderState::new(source)?);
308                self.state.as_mut().unwrap()
309            }
310        };
311        if let Some(dict_id) = state.frame_header.dictionary_id()
312            && dict_id != dict.id()
313        {
314            return Err(err::DictIdMismatch {
315                expected: dict_id,
316                provided: dict.id(),
317            });
318        }
319        state.decoder_scratch.init_from_dict(dict.as_dict());
320        state.using_dict = Some(dict.id());
321        Ok(())
322    }
323
324    /// Add a dictionary that can be selected dynamically by frame dictionary ID.
325    ///
326    /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
327    /// registered (either as owned or shared).
328    pub fn add_dict(&mut self, dict: Dictionary) -> Result<(), FrameDecoderError> {
329        Self::validate_registered_dictionary(&dict)?;
330        let dict_id = dict.id;
331        if self.owned_dicts.contains_key(&dict_id) || self.shared_dict_exists(dict_id) {
332            return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
333        }
334        self.owned_dicts.insert(dict_id, dict);
335        Ok(())
336    }
337
338    /// Parse and add a serialized dictionary blob.
339    pub fn add_dict_from_bytes(&mut self, raw_dictionary: &[u8]) -> Result<(), FrameDecoderError> {
340        let dict = Dictionary::decode_dict(raw_dictionary)?;
341        self.add_dict(dict)
342    }
343
344    /// Add a pre-parsed dictionary handle for reuse across decoders.
345    ///
346    /// This API is available on targets with pointer-width atomics
347    /// (`target_has_atomic = "ptr"`).
348    ///
349    /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
350    /// registered (either as owned or shared).
351    #[cfg(target_has_atomic = "ptr")]
352    pub fn add_dict_handle(&mut self, dict: DictionaryHandle) -> Result<(), FrameDecoderError> {
353        Self::validate_registered_dictionary(dict.as_dict())?;
354        let dict_id = dict.id();
355        if self.owned_dicts.contains_key(&dict_id) || self.shared_dicts.contains_key(&dict_id) {
356            return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
357        }
358        self.shared_dicts.insert(dict_id, dict);
359        Ok(())
360    }
361
362    pub fn force_dict(&mut self, dict_id: u32) -> Result<(), FrameDecoderError> {
363        use FrameDecoderError as err;
364        let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
365        let owned_dicts = &self.owned_dicts;
366        #[cfg(target_has_atomic = "ptr")]
367        let shared_dicts = &self.shared_dicts;
368
369        let dict = owned_dicts
370            .get(&dict_id)
371            .or_else(|| {
372                #[cfg(target_has_atomic = "ptr")]
373                {
374                    shared_dicts.get(&dict_id).map(DictionaryHandle::as_dict)
375                }
376                #[cfg(not(target_has_atomic = "ptr"))]
377                {
378                    None
379                }
380            })
381            .ok_or(err::DictNotProvided { dict_id })?;
382        state.decoder_scratch.init_from_dict(dict);
383        state.using_dict = Some(dict_id);
384
385        Ok(())
386    }
387
388    /// Returns how many bytes the frame contains after decompression
389    pub fn content_size(&self) -> u64 {
390        match &self.state {
391            None => 0,
392            Some(s) => s.frame_header.frame_content_size(),
393        }
394    }
395
396    /// Returns the checksum that was read from the data. Only available after all bytes have been read. It is the last 4 bytes of a zstd-frame
397    pub fn get_checksum_from_data(&self) -> Option<u32> {
398        let state = match &self.state {
399            None => return None,
400            Some(s) => s,
401        };
402
403        state.check_sum
404    }
405
406    /// Returns the checksum that was calculated while decoding.
407    /// Only a sensible value after all decoded bytes have been collected/read from the FrameDecoder
408    #[cfg(feature = "hash")]
409    pub fn get_calculated_checksum(&self) -> Option<u32> {
410        use core::hash::Hasher;
411
412        let state = match &self.state {
413            None => return None,
414            Some(s) => s,
415        };
416        let cksum_64bit = state.decoder_scratch.buffer.hash.finish();
417        //truncate to lower 32bit because reasons...
418        Some(cksum_64bit as u32)
419    }
420
421    /// Counter for how many bytes have been consumed while decoding the frame
422    pub fn bytes_read_from_source(&self) -> u64 {
423        let state = match &self.state {
424            None => return 0,
425            Some(s) => s,
426        };
427        state.bytes_read_counter
428    }
429
430    /// Whether the current frames last block has been decoded yet
431    /// If this returns true you can call the drain* functions to get all content
432    /// (the read() function will drain automatically if this returns true)
433    pub fn is_finished(&self) -> bool {
434        let state = match &self.state {
435            None => return true,
436            Some(s) => s,
437        };
438        if state.frame_header.descriptor.content_checksum_flag() {
439            state.frame_finished && state.check_sum.is_some()
440        } else {
441            state.frame_finished
442        }
443    }
444
445    /// Counter for how many blocks have already been decoded
446    pub fn blocks_decoded(&self) -> usize {
447        let state = match &self.state {
448            None => return 0,
449            Some(s) => s,
450        };
451        state.block_counter
452    }
453
454    /// Decodes blocks from a reader. It requires that the framedecoder has been initialized first.
455    /// The Strategy influences how many blocks will be decoded before the function returns
456    /// This is important if you want to manage memory consumption carefully. If you don't care
457    /// about that you can just choose the strategy "All" and have all blocks of the frame decoded into the buffer
458    pub fn decode_blocks(
459        &mut self,
460        mut source: impl Read,
461        strat: BlockDecodingStrategy,
462    ) -> Result<bool, FrameDecoderError> {
463        use FrameDecoderError as err;
464        let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
465
466        let mut block_dec = decoding::block_decoder::new();
467
468        let buffer_size_before = state.decoder_scratch.buffer.len();
469        let block_counter_before = state.block_counter;
470        loop {
471            vprintln!("################");
472            vprintln!("Next Block: {}", state.block_counter);
473            vprintln!("################");
474            let (block_header, block_header_size) = block_dec
475                .read_block_header(&mut source)
476                .map_err(err::FailedToReadBlockHeader)?;
477            state.bytes_read_counter += u64::from(block_header_size);
478
479            vprintln!();
480            vprintln!(
481                "Found {} block with size: {}, which will be of size: {}",
482                block_header.block_type,
483                block_header.content_size,
484                block_header.decompressed_size
485            );
486
487            let bytes_read_in_block_body = block_dec
488                .decode_block_content(&block_header, &mut state.decoder_scratch, &mut source)
489                .map_err(err::FailedToReadBlockBody)?;
490            state.bytes_read_counter += bytes_read_in_block_body;
491
492            state.block_counter += 1;
493
494            vprintln!("Output: {}", state.decoder_scratch.buffer.len());
495
496            if block_header.last_block {
497                state.frame_finished = true;
498                if state.frame_header.descriptor.content_checksum_flag() {
499                    let mut chksum = [0u8; 4];
500                    source
501                        .read_exact(&mut chksum)
502                        .map_err(err::FailedToReadChecksum)?;
503                    state.bytes_read_counter += 4;
504                    let chksum = u32::from_le_bytes(chksum);
505                    state.check_sum = Some(chksum);
506                }
507                break;
508            }
509
510            match strat {
511                BlockDecodingStrategy::All => { /* keep going */ }
512                BlockDecodingStrategy::UptoBlocks(n) => {
513                    if state.block_counter - block_counter_before >= n {
514                        break;
515                    }
516                }
517                BlockDecodingStrategy::UptoBytes(n) => {
518                    if state.decoder_scratch.buffer.len() - buffer_size_before >= n {
519                        break;
520                    }
521                }
522            }
523        }
524
525        Ok(state.frame_finished)
526    }
527
528    /// Collect bytes and retain window_size bytes while decoding is still going on.
529    /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
530    pub fn collect(&mut self) -> Option<Vec<u8>> {
531        let finished = self.is_finished();
532        let state = self.state.as_mut()?;
533        if finished {
534            Some(state.decoder_scratch.buffer.drain())
535        } else {
536            state.decoder_scratch.buffer.drain_to_window_size()
537        }
538    }
539
540    /// Collect bytes and retain window_size bytes while decoding is still going on.
541    /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
542    pub fn collect_to_writer(&mut self, w: impl Write) -> Result<usize, Error> {
543        let finished = self.is_finished();
544        let state = match &mut self.state {
545            None => return Ok(0),
546            Some(s) => s,
547        };
548        if finished {
549            state.decoder_scratch.buffer.drain_to_writer(w)
550        } else {
551            state.decoder_scratch.buffer.drain_to_window_size_writer(w)
552        }
553    }
554
555    /// How many bytes can currently be collected from the decodebuffer, while decoding is going on this will be lower than the actual decodbuffer size
556    /// because window_size bytes need to be retained for decoding.
557    /// After decoding of the frame (is_finished() == true) has finished it will report all remaining bytes
558    pub fn can_collect(&self) -> usize {
559        let finished = self.is_finished();
560        let state = match &self.state {
561            None => return 0,
562            Some(s) => s,
563        };
564        if finished {
565            state.decoder_scratch.buffer.can_drain()
566        } else {
567            state
568                .decoder_scratch
569                .buffer
570                .can_drain_to_window_size()
571                .unwrap_or(0)
572        }
573    }
574
575    /// Decodes as many blocks as possible from the source slice and reads from the decodebuffer into the target slice
576    /// The source slice may contain only parts of a frame but must contain at least one full block to make progress
577    ///
578    /// By all means use decode_blocks if you have a io.Reader available. This is just for compatibility with other decompressors
579    /// which try to serve an old-style c api
580    ///
581    /// Returns (read, written), if read == 0 then the source did not contain a full block and further calls with the same
582    /// input will not make any progress!
583    ///
584    /// Note that no kind of block can be bigger than 128kb.
585    /// So to be safe use at least 128*1024 (max block content size) + 3 (block_header size) + 18 (max frame_header size) bytes as your source buffer
586    ///
587    /// You may call this function with an empty source after all bytes have been decoded. This is equivalent to just call decoder.read(&mut target)
588    pub fn decode_from_to(
589        &mut self,
590        source: &[u8],
591        target: &mut [u8],
592    ) -> Result<(usize, usize), FrameDecoderError> {
593        use FrameDecoderError as err;
594        let bytes_read_at_start = match &self.state {
595            Some(s) => s.bytes_read_counter,
596            None => 0,
597        };
598
599        if !self.is_finished() || self.state.is_none() {
600            let mut mt_source = source;
601
602            if self.state.is_none() {
603                self.init(&mut mt_source)?;
604            }
605
606            //pseudo block to scope "state" so we can borrow self again after the block
607            {
608                let state = match &mut self.state {
609                    Some(s) => s,
610                    None => panic!("Bug in library"),
611                };
612                let mut block_dec = decoding::block_decoder::new();
613
614                if state.frame_header.descriptor.content_checksum_flag()
615                    && state.frame_finished
616                    && state.check_sum.is_none()
617                {
618                    //this block is needed if the checksum were the only 4 bytes that were not included in the last decode_from_to call for a frame
619                    if mt_source.len() >= 4 {
620                        let chksum = mt_source[..4].try_into().expect("optimized away");
621                        state.bytes_read_counter += 4;
622                        let chksum = u32::from_le_bytes(chksum);
623                        state.check_sum = Some(chksum);
624                    }
625                    return Ok((4, 0));
626                }
627
628                loop {
629                    //check if there are enough bytes for the next header
630                    if mt_source.len() < 3 {
631                        break;
632                    }
633                    let (block_header, block_header_size) = block_dec
634                        .read_block_header(&mut mt_source)
635                        .map_err(err::FailedToReadBlockHeader)?;
636
637                    // check the needed size for the block before updating counters.
638                    // If not enough bytes are in the source, the header will have to be read again, so act like we never read it in the first place
639                    if mt_source.len() < block_header.content_size as usize {
640                        break;
641                    }
642                    state.bytes_read_counter += u64::from(block_header_size);
643
644                    let bytes_read_in_block_body = block_dec
645                        .decode_block_content(
646                            &block_header,
647                            &mut state.decoder_scratch,
648                            &mut mt_source,
649                        )
650                        .map_err(err::FailedToReadBlockBody)?;
651                    state.bytes_read_counter += bytes_read_in_block_body;
652                    state.block_counter += 1;
653
654                    if block_header.last_block {
655                        state.frame_finished = true;
656                        if state.frame_header.descriptor.content_checksum_flag() {
657                            //if there are enough bytes handle this here. Else the block at the start of this function will handle it at the next call
658                            if mt_source.len() >= 4 {
659                                let chksum = mt_source[..4].try_into().expect("optimized away");
660                                state.bytes_read_counter += 4;
661                                let chksum = u32::from_le_bytes(chksum);
662                                state.check_sum = Some(chksum);
663                            }
664                        }
665                        break;
666                    }
667                }
668            }
669        }
670
671        let result_len = self.read(target).map_err(err::FailedToDrainDecodebuffer)?;
672        let bytes_read_at_end = match &mut self.state {
673            Some(s) => s.bytes_read_counter,
674            None => panic!("Bug in library"),
675        };
676        let read_len = bytes_read_at_end - bytes_read_at_start;
677        Ok((read_len as usize, result_len))
678    }
679
680    /// Decode multiple frames into the output slice.
681    ///
682    /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
683    /// skipped during decode.
684    ///
685    /// `output` must be large enough to hold the decompressed data. If you don't know
686    /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
687    ///
688    /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
689    ///
690    /// Returns the number of bytes written to `output`.
691    pub fn decode_all(
692        &mut self,
693        input: &[u8],
694        output: &mut [u8],
695    ) -> Result<usize, FrameDecoderError> {
696        self.decode_all_impl(input, output, |this, src| this.init(src))
697    }
698
699    /// Decode multiple frames into the output slice using a pre-parsed dictionary handle.
700    ///
701    /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
702    /// skipped during decode.
703    ///
704    /// `output` must be large enough to hold the decompressed data. If you don't know
705    /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
706    ///
707    /// This calls [`FrameDecoder::init_with_dict_handle`], and all bytes currently in the
708    /// decoder will be lost.
709    ///
710    /// # Warning
711    ///
712    /// Each decoded frame is initialized with `dict`, even when a frame header
713    /// omits the optional dictionary ID. Callers must only use this API when
714    /// they already know the input frames were encoded with the provided
715    /// dictionary; otherwise decoded output can be silently corrupted.
716    pub fn decode_all_with_dict_handle(
717        &mut self,
718        input: &[u8],
719        output: &mut [u8],
720        dict: &DictionaryHandle,
721    ) -> Result<usize, FrameDecoderError> {
722        self.decode_all_impl(input, output, |this, src| {
723            this.init_with_dict_handle(src, dict)
724        })
725    }
726
727    fn decode_all_impl(
728        &mut self,
729        mut input: &[u8],
730        mut output: &mut [u8],
731        mut init_frame: impl FnMut(&mut Self, &mut &[u8]) -> Result<(), FrameDecoderError>,
732    ) -> Result<usize, FrameDecoderError> {
733        let mut total_bytes_written = 0;
734        while !input.is_empty() {
735            match init_frame(self, &mut input) {
736                Ok(_) => {}
737                Err(FrameDecoderError::ReadFrameHeaderError(
738                    crate::decoding::errors::ReadFrameHeaderError::SkipFrame { length, .. },
739                )) => {
740                    input = input
741                        .get(length as usize..)
742                        .ok_or(FrameDecoderError::FailedToSkipFrame)?;
743                    continue;
744                }
745                Err(e) => return Err(e),
746            };
747            loop {
748                self.decode_blocks(&mut input, BlockDecodingStrategy::UptoBytes(1024 * 1024))?;
749                let bytes_written = self
750                    .read(output)
751                    .map_err(FrameDecoderError::FailedToDrainDecodebuffer)?;
752                output = &mut output[bytes_written..];
753                total_bytes_written += bytes_written;
754                if self.can_collect() != 0 {
755                    return Err(FrameDecoderError::TargetTooSmall);
756                }
757                if self.is_finished() {
758                    break;
759                }
760            }
761        }
762
763        Ok(total_bytes_written)
764    }
765
766    /// Decode multiple frames into the output slice using a serialized dictionary.
767    ///
768    /// # Warning
769    ///
770    /// Each decoded frame is initialized with the parsed dictionary, even when a
771    /// frame header omits the optional dictionary ID. Callers must only use this
772    /// API when they already know the input frames were encoded with that
773    /// dictionary; otherwise decoded output can be silently corrupted.
774    pub fn decode_all_with_dict_bytes(
775        &mut self,
776        input: &[u8],
777        output: &mut [u8],
778        raw_dictionary: &[u8],
779    ) -> Result<usize, FrameDecoderError> {
780        let dict = DictionaryHandle::decode_dict(raw_dictionary)?;
781        self.decode_all_with_dict_handle(input, output, &dict)
782    }
783
784    /// Decode multiple frames into the extra capacity of the output vector.
785    ///
786    /// `input` must contain an exact number of frames.
787    ///
788    /// `output` must have enough extra capacity to hold the decompressed data.
789    /// This function will not reallocate or grow the vector. If you don't know
790    /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
791    ///
792    /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
793    ///
794    /// The length of the output vector is updated to include the decompressed data.
795    /// The length is not changed if an error occurs.
796    pub fn decode_all_to_vec(
797        &mut self,
798        input: &[u8],
799        output: &mut Vec<u8>,
800    ) -> Result<(), FrameDecoderError> {
801        let len = output.len();
802        let cap = output.capacity();
803        output.resize(cap, 0);
804        match self.decode_all(input, &mut output[len..]) {
805            Ok(bytes_written) => {
806                let new_len = core::cmp::min(len + bytes_written, cap); // Sanitizes `bytes_written`.
807                output.resize(new_len, 0);
808                Ok(())
809            }
810            Err(e) => {
811                output.resize(len, 0);
812                Err(e)
813            }
814        }
815    }
816}
817
818/// Read bytes from the decode_buffer that are no longer needed. While the frame is not yet finished
819/// this will retain window_size bytes, else it will drain it completely
820impl Read for FrameDecoder {
821    fn read(&mut self, target: &mut [u8]) -> Result<usize, Error> {
822        let state = match &mut self.state {
823            None => return Ok(0),
824            Some(s) => s,
825        };
826        if state.frame_finished {
827            state.decoder_scratch.buffer.read_all(target)
828        } else {
829            state.decoder_scratch.buffer.read(target)
830        }
831    }
832}
833
834#[cfg(test)]
835mod tests {
836    extern crate std;
837
838    use super::{DictionaryHandle, FrameDecoder};
839    use crate::encoding::{CompressionLevel, FrameCompressor};
840    use alloc::vec::Vec;
841
842    #[test]
843    fn reset_with_dict_handle_applies_dict_when_no_dict_id() {
844        let payload = b"reset-without-dict-id";
845        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
846        compressor.set_source(payload.as_slice());
847        let mut compressed = Vec::new();
848        compressor.set_drain(&mut compressed);
849        compressor.compress();
850
851        let dict_raw = include_bytes!("../../dict_tests/dictionary");
852        let handle = DictionaryHandle::decode_dict(dict_raw).expect("dictionary should parse");
853
854        let mut decoder = FrameDecoder::new();
855        decoder
856            .reset_with_dict_handle(compressed.as_slice(), &handle)
857            .expect("reset should succeed");
858        let state = decoder.state.as_ref().expect("state should be initialized");
859        assert!(state.frame_header.dictionary_id().is_none());
860        assert_eq!(state.using_dict, Some(handle.id()));
861    }
862}