structured_zstd/decoding/frame_decoder.rs
1//! Framedecoder is the main low-level struct users interact with to decode zstd frames
2//!
3//! Zstandard compressed data is made of one or more frames. Each frame is independent and can be
4//! decompressed independently of other frames. This module contains structures
5//! and utilities that can be used to decode a frame.
6
7use super::frame;
8use crate::decoding;
9use crate::decoding::dictionary::{Dictionary, DictionaryHandle};
10use crate::decoding::errors::FrameDecoderError;
11use crate::decoding::scratch::DecoderScratch;
12use crate::io::{Error, Read, Write};
13use alloc::collections::BTreeMap;
14use alloc::vec::Vec;
15use core::convert::TryInto;
16
17use crate::common::MAXIMUM_ALLOWED_WINDOW_SIZE;
18
19/// Low level Zstandard decoder that can be used to decompress frames with fine control over when and how many bytes are decoded.
20///
21/// This decoder is able to decode frames only partially and gives control
22/// over how many bytes/blocks will be decoded at a time (so you don't have to decode a 10GB file into memory all at once).
23/// It reads bytes as needed from a provided source and can be read from to collect partial results.
24///
25/// If you want to just read the whole frame with an `io::Read` without having to deal with manually calling [FrameDecoder::decode_blocks]
26/// you can use the provided [crate::decoding::StreamingDecoder] wich wraps this FrameDecoder.
27///
28/// Workflow is as follows:
29/// ```
30/// use structured_zstd::decoding::BlockDecodingStrategy;
31///
32/// # #[cfg(feature = "std")]
33/// use std::io::{Read, Write};
34///
35/// // no_std environments can use the crate's own Read traits
36/// # #[cfg(not(feature = "std"))]
37/// use structured_zstd::io::{Read, Write};
38///
39/// fn decode_this(mut file: impl Read) {
40/// //Create a new decoder
41/// let mut frame_dec = structured_zstd::decoding::FrameDecoder::new();
42/// let mut result = Vec::new();
43///
44/// // Use reset or init to make the decoder ready to decode the frame from the io::Read
45/// frame_dec.reset(&mut file).unwrap();
46///
47/// // Loop until the frame has been decoded completely
48/// while !frame_dec.is_finished() {
49/// // decode (roughly) batch_size many bytes
50/// frame_dec.decode_blocks(&mut file, BlockDecodingStrategy::UptoBytes(1024)).unwrap();
51///
52/// // read from the decoder to collect bytes from the internal buffer
53/// let bytes_read = frame_dec.read(result.as_mut_slice()).unwrap();
54///
55/// // then do something with it
56/// do_something(&result[0..bytes_read]);
57/// }
58///
59/// // handle the last chunk of data
60/// while frame_dec.can_collect() > 0 {
61/// let x = frame_dec.read(result.as_mut_slice()).unwrap();
62///
63/// do_something(&result[0..x]);
64/// }
65/// }
66///
67/// fn do_something(data: &[u8]) {
68/// # #[cfg(feature = "std")]
69/// std::io::stdout().write_all(data).unwrap();
70/// }
71/// ```
72pub struct FrameDecoder {
73 state: Option<FrameDecoderState>,
74 owned_dicts: BTreeMap<u32, Dictionary>,
75 #[cfg(target_has_atomic = "ptr")]
76 shared_dicts: BTreeMap<u32, DictionaryHandle>,
77 #[cfg(not(target_has_atomic = "ptr"))]
78 shared_dicts: (),
79}
80
81struct FrameDecoderState {
82 pub frame_header: frame::FrameHeader,
83 decoder_scratch: DecoderScratch,
84 frame_finished: bool,
85 block_counter: usize,
86 bytes_read_counter: u64,
87 check_sum: Option<u32>,
88 using_dict: Option<u32>,
89}
90
91pub enum BlockDecodingStrategy {
92 All,
93 UptoBlocks(usize),
94 UptoBytes(usize),
95}
96
97impl FrameDecoderState {
98 /// Read the frame header from `source` and create a new decoder state.
99 ///
100 /// Pre-allocates the decode buffer to `window_size` so the first block
101 /// does not trigger incremental growth from zero capacity.
102 pub fn new(source: impl Read) -> Result<FrameDecoderState, FrameDecoderError> {
103 let (frame, header_size) = frame::read_frame_header(source)?;
104 let window_size = frame.window_size()?;
105
106 if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
107 return Err(FrameDecoderError::WindowSizeTooBig {
108 requested: window_size,
109 });
110 }
111
112 let mut decoder_scratch = DecoderScratch::new(window_size as usize);
113 decoder_scratch.buffer.reserve(window_size as usize);
114 Ok(FrameDecoderState {
115 frame_header: frame,
116 frame_finished: false,
117 block_counter: 0,
118 decoder_scratch,
119 bytes_read_counter: u64::from(header_size),
120 check_sum: None,
121 using_dict: None,
122 })
123 }
124
125 /// Reset this state for a new frame read from `source`, reusing existing allocations.
126 ///
127 /// `DecodeBuffer::reset` reserves `window_size` internally, so no
128 /// additional frame-level reservation is needed here. Further buffer
129 /// growth during decoding is performed on demand by the active block path.
130 pub fn reset(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
131 let (frame_header, header_size) = frame::read_frame_header(source)?;
132 let window_size = frame_header.window_size()?;
133
134 if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
135 return Err(FrameDecoderError::WindowSizeTooBig {
136 requested: window_size,
137 });
138 }
139
140 self.frame_header = frame_header;
141 self.frame_finished = false;
142 self.block_counter = 0;
143 self.decoder_scratch.reset(window_size as usize);
144 self.bytes_read_counter = u64::from(header_size);
145 self.check_sum = None;
146 self.using_dict = None;
147 Ok(())
148 }
149}
150
151impl Default for FrameDecoder {
152 fn default() -> Self {
153 Self::new()
154 }
155}
156
157impl FrameDecoder {
158 /// This will create a new decoder without allocating anything yet.
159 /// init()/reset() will allocate all needed buffers if it is the first time this decoder is used
160 /// else they just reset these buffers with not further allocations
161 pub fn new() -> FrameDecoder {
162 FrameDecoder {
163 state: None,
164 owned_dicts: BTreeMap::new(),
165 #[cfg(target_has_atomic = "ptr")]
166 shared_dicts: BTreeMap::new(),
167 #[cfg(not(target_has_atomic = "ptr"))]
168 shared_dicts: (),
169 }
170 }
171
172 #[cfg(target_has_atomic = "ptr")]
173 fn shared_dict_exists(&self, dict_id: u32) -> bool {
174 self.shared_dicts.contains_key(&dict_id)
175 }
176
177 #[cfg(not(target_has_atomic = "ptr"))]
178 fn shared_dict_exists(&self, _dict_id: u32) -> bool {
179 false
180 }
181
182 fn validate_registered_dictionary(dict: &Dictionary) -> Result<(), FrameDecoderError> {
183 use crate::decoding::errors::DictionaryDecodeError as dict_err;
184
185 if dict.id == 0 {
186 return Err(FrameDecoderError::from(dict_err::ZeroDictionaryId));
187 }
188 if let Some(index) = dict.offset_hist.iter().position(|&rep| rep == 0) {
189 return Err(FrameDecoderError::from(
190 dict_err::ZeroRepeatOffsetInDictionary { index: index as u8 },
191 ));
192 }
193 Ok(())
194 }
195
196 /// init() will allocate all needed buffers if it is the first time this decoder is used
197 /// else they just reset these buffers with not further allocations
198 ///
199 /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
200 ///
201 /// equivalent to reset()
202 pub fn init(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
203 self.reset(source)
204 }
205
206 /// Initialize the decoder for a new frame using a pre-parsed dictionary handle.
207 ///
208 /// If the frame header has a dictionary ID, this validates it against
209 /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
210 ///
211 /// If the header omits the optional dictionary ID, this still applies the
212 /// provided dictionary handle.
213 ///
214 /// # Warning
215 ///
216 /// This method always applies `dict` unless the frame header contains a
217 /// non-matching dictionary ID. Callers must only use this API when they
218 /// already know the frame was encoded with the provided dictionary, even if
219 /// the frame header omits the dictionary ID or encodes an explicit
220 /// dictionary ID of `0`.
221 ///
222 /// Passing a dictionary for a frame that was not encoded with it can
223 /// silently corrupt the decoded output.
224 pub fn init_with_dict_handle(
225 &mut self,
226 source: impl Read,
227 dict: &DictionaryHandle,
228 ) -> Result<(), FrameDecoderError> {
229 self.reset_with_dict_handle(source, dict)
230 }
231
232 /// reset() will allocate all needed buffers if it is the first time this decoder is used
233 /// else they just reset these buffers with not further allocations
234 ///
235 /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
236 ///
237 /// equivalent to init()
238 pub fn reset(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
239 use FrameDecoderError as err;
240 let dict_id = match &mut self.state {
241 Some(s) => {
242 s.reset(source)?;
243 s.frame_header.dictionary_id()
244 }
245 None => {
246 self.state = Some(FrameDecoderState::new(source)?);
247 self.state
248 .as_ref()
249 .and_then(|state| state.frame_header.dictionary_id())
250 }
251 };
252 if let Some(dict_id) = dict_id {
253 let state = self.state.as_mut().expect("state initialized");
254 let owned_dicts = &self.owned_dicts;
255 #[cfg(target_has_atomic = "ptr")]
256 let shared_dicts = &self.shared_dicts;
257 let dict = owned_dicts
258 .get(&dict_id)
259 .or_else(|| {
260 #[cfg(target_has_atomic = "ptr")]
261 {
262 shared_dicts.get(&dict_id).map(DictionaryHandle::as_dict)
263 }
264 #[cfg(not(target_has_atomic = "ptr"))]
265 {
266 None
267 }
268 })
269 .ok_or(err::DictNotProvided { dict_id })?;
270 state.decoder_scratch.init_from_dict(dict);
271 state.using_dict = Some(dict_id);
272 }
273 Ok(())
274 }
275
276 /// Reset this decoder for a new frame using a pre-parsed dictionary handle.
277 ///
278 /// If the frame header has a dictionary ID, this validates it against
279 /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
280 ///
281 /// If the header omits the optional dictionary ID, this still applies the
282 /// provided dictionary handle.
283 ///
284 /// # Warning
285 ///
286 /// This method always applies `dict` unless the frame header contains a
287 /// non-matching dictionary ID. Callers must only use this API when they
288 /// already know the frame was encoded with the provided dictionary, even if
289 /// the frame header omits the dictionary ID or encodes an explicit
290 /// dictionary ID of `0`.
291 ///
292 /// Passing a dictionary for a frame that was not encoded with it can
293 /// silently corrupt the decoded output.
294 pub fn reset_with_dict_handle(
295 &mut self,
296 source: impl Read,
297 dict: &DictionaryHandle,
298 ) -> Result<(), FrameDecoderError> {
299 use FrameDecoderError as err;
300 Self::validate_registered_dictionary(dict.as_dict())?;
301 let state = match &mut self.state {
302 Some(s) => {
303 s.reset(source)?;
304 s
305 }
306 None => {
307 self.state = Some(FrameDecoderState::new(source)?);
308 self.state.as_mut().unwrap()
309 }
310 };
311 if let Some(dict_id) = state.frame_header.dictionary_id()
312 && dict_id != dict.id()
313 {
314 return Err(err::DictIdMismatch {
315 expected: dict_id,
316 provided: dict.id(),
317 });
318 }
319 state.decoder_scratch.init_from_dict(dict.as_dict());
320 state.using_dict = Some(dict.id());
321 Ok(())
322 }
323
324 /// Add a dictionary that can be selected dynamically by frame dictionary ID.
325 ///
326 /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
327 /// registered (either as owned or shared).
328 pub fn add_dict(&mut self, dict: Dictionary) -> Result<(), FrameDecoderError> {
329 Self::validate_registered_dictionary(&dict)?;
330 let dict_id = dict.id;
331 if self.owned_dicts.contains_key(&dict_id) || self.shared_dict_exists(dict_id) {
332 return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
333 }
334 self.owned_dicts.insert(dict_id, dict);
335 Ok(())
336 }
337
338 /// Parse and add a serialized dictionary blob.
339 pub fn add_dict_from_bytes(&mut self, raw_dictionary: &[u8]) -> Result<(), FrameDecoderError> {
340 let dict = Dictionary::decode_dict(raw_dictionary)?;
341 self.add_dict(dict)
342 }
343
344 /// Add a pre-parsed dictionary handle for reuse across decoders.
345 ///
346 /// This API is available on targets with pointer-width atomics
347 /// (`target_has_atomic = "ptr"`).
348 ///
349 /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
350 /// registered (either as owned or shared).
351 #[cfg(target_has_atomic = "ptr")]
352 pub fn add_dict_handle(&mut self, dict: DictionaryHandle) -> Result<(), FrameDecoderError> {
353 Self::validate_registered_dictionary(dict.as_dict())?;
354 let dict_id = dict.id();
355 if self.owned_dicts.contains_key(&dict_id) || self.shared_dicts.contains_key(&dict_id) {
356 return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
357 }
358 self.shared_dicts.insert(dict_id, dict);
359 Ok(())
360 }
361
362 pub fn force_dict(&mut self, dict_id: u32) -> Result<(), FrameDecoderError> {
363 use FrameDecoderError as err;
364 let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
365 let owned_dicts = &self.owned_dicts;
366 #[cfg(target_has_atomic = "ptr")]
367 let shared_dicts = &self.shared_dicts;
368
369 let dict = owned_dicts
370 .get(&dict_id)
371 .or_else(|| {
372 #[cfg(target_has_atomic = "ptr")]
373 {
374 shared_dicts.get(&dict_id).map(DictionaryHandle::as_dict)
375 }
376 #[cfg(not(target_has_atomic = "ptr"))]
377 {
378 None
379 }
380 })
381 .ok_or(err::DictNotProvided { dict_id })?;
382 state.decoder_scratch.init_from_dict(dict);
383 state.using_dict = Some(dict_id);
384
385 Ok(())
386 }
387
388 /// Returns how many bytes the frame contains after decompression
389 pub fn content_size(&self) -> u64 {
390 match &self.state {
391 None => 0,
392 Some(s) => s.frame_header.frame_content_size(),
393 }
394 }
395
396 /// Returns the checksum that was read from the data. Only available after all bytes have been read. It is the last 4 bytes of a zstd-frame
397 pub fn get_checksum_from_data(&self) -> Option<u32> {
398 let state = match &self.state {
399 None => return None,
400 Some(s) => s,
401 };
402
403 state.check_sum
404 }
405
406 /// Returns the checksum that was calculated while decoding.
407 /// Only a sensible value after all decoded bytes have been collected/read from the FrameDecoder
408 #[cfg(feature = "hash")]
409 pub fn get_calculated_checksum(&self) -> Option<u32> {
410 use core::hash::Hasher;
411
412 let state = match &self.state {
413 None => return None,
414 Some(s) => s,
415 };
416 let cksum_64bit = state.decoder_scratch.buffer.hash.finish();
417 //truncate to lower 32bit because reasons...
418 Some(cksum_64bit as u32)
419 }
420
421 /// Counter for how many bytes have been consumed while decoding the frame
422 pub fn bytes_read_from_source(&self) -> u64 {
423 let state = match &self.state {
424 None => return 0,
425 Some(s) => s,
426 };
427 state.bytes_read_counter
428 }
429
430 /// Whether the current frames last block has been decoded yet
431 /// If this returns true you can call the drain* functions to get all content
432 /// (the read() function will drain automatically if this returns true)
433 pub fn is_finished(&self) -> bool {
434 let state = match &self.state {
435 None => return true,
436 Some(s) => s,
437 };
438 if state.frame_header.descriptor.content_checksum_flag() {
439 state.frame_finished && state.check_sum.is_some()
440 } else {
441 state.frame_finished
442 }
443 }
444
445 /// Counter for how many blocks have already been decoded
446 pub fn blocks_decoded(&self) -> usize {
447 let state = match &self.state {
448 None => return 0,
449 Some(s) => s,
450 };
451 state.block_counter
452 }
453
454 /// Decodes blocks from a reader. It requires that the framedecoder has been initialized first.
455 /// The Strategy influences how many blocks will be decoded before the function returns
456 /// This is important if you want to manage memory consumption carefully. If you don't care
457 /// about that you can just choose the strategy "All" and have all blocks of the frame decoded into the buffer
458 pub fn decode_blocks(
459 &mut self,
460 mut source: impl Read,
461 strat: BlockDecodingStrategy,
462 ) -> Result<bool, FrameDecoderError> {
463 use FrameDecoderError as err;
464 let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
465
466 let mut block_dec = decoding::block_decoder::new();
467
468 let buffer_size_before = state.decoder_scratch.buffer.len();
469 let block_counter_before = state.block_counter;
470 loop {
471 vprintln!("################");
472 vprintln!("Next Block: {}", state.block_counter);
473 vprintln!("################");
474 let (block_header, block_header_size) = block_dec
475 .read_block_header(&mut source)
476 .map_err(err::FailedToReadBlockHeader)?;
477 state.bytes_read_counter += u64::from(block_header_size);
478
479 vprintln!();
480 vprintln!(
481 "Found {} block with size: {}, which will be of size: {}",
482 block_header.block_type,
483 block_header.content_size,
484 block_header.decompressed_size
485 );
486
487 let bytes_read_in_block_body = block_dec
488 .decode_block_content(&block_header, &mut state.decoder_scratch, &mut source)
489 .map_err(err::FailedToReadBlockBody)?;
490 state.bytes_read_counter += bytes_read_in_block_body;
491
492 state.block_counter += 1;
493
494 vprintln!("Output: {}", state.decoder_scratch.buffer.len());
495
496 if block_header.last_block {
497 state.frame_finished = true;
498 if state.frame_header.descriptor.content_checksum_flag() {
499 let mut chksum = [0u8; 4];
500 source
501 .read_exact(&mut chksum)
502 .map_err(err::FailedToReadChecksum)?;
503 state.bytes_read_counter += 4;
504 let chksum = u32::from_le_bytes(chksum);
505 state.check_sum = Some(chksum);
506 }
507 break;
508 }
509
510 match strat {
511 BlockDecodingStrategy::All => { /* keep going */ }
512 BlockDecodingStrategy::UptoBlocks(n) => {
513 if state.block_counter - block_counter_before >= n {
514 break;
515 }
516 }
517 BlockDecodingStrategy::UptoBytes(n) => {
518 if state.decoder_scratch.buffer.len() - buffer_size_before >= n {
519 break;
520 }
521 }
522 }
523 }
524
525 Ok(state.frame_finished)
526 }
527
528 /// Collect bytes and retain window_size bytes while decoding is still going on.
529 /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
530 pub fn collect(&mut self) -> Option<Vec<u8>> {
531 let finished = self.is_finished();
532 let state = self.state.as_mut()?;
533 if finished {
534 Some(state.decoder_scratch.buffer.drain())
535 } else {
536 state.decoder_scratch.buffer.drain_to_window_size()
537 }
538 }
539
540 /// Collect bytes and retain window_size bytes while decoding is still going on.
541 /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
542 pub fn collect_to_writer(&mut self, w: impl Write) -> Result<usize, Error> {
543 let finished = self.is_finished();
544 let state = match &mut self.state {
545 None => return Ok(0),
546 Some(s) => s,
547 };
548 if finished {
549 state.decoder_scratch.buffer.drain_to_writer(w)
550 } else {
551 state.decoder_scratch.buffer.drain_to_window_size_writer(w)
552 }
553 }
554
555 /// How many bytes can currently be collected from the decodebuffer, while decoding is going on this will be lower than the actual decodbuffer size
556 /// because window_size bytes need to be retained for decoding.
557 /// After decoding of the frame (is_finished() == true) has finished it will report all remaining bytes
558 pub fn can_collect(&self) -> usize {
559 let finished = self.is_finished();
560 let state = match &self.state {
561 None => return 0,
562 Some(s) => s,
563 };
564 if finished {
565 state.decoder_scratch.buffer.can_drain()
566 } else {
567 state
568 .decoder_scratch
569 .buffer
570 .can_drain_to_window_size()
571 .unwrap_or(0)
572 }
573 }
574
575 /// Decodes as many blocks as possible from the source slice and reads from the decodebuffer into the target slice
576 /// The source slice may contain only parts of a frame but must contain at least one full block to make progress
577 ///
578 /// By all means use decode_blocks if you have a io.Reader available. This is just for compatibility with other decompressors
579 /// which try to serve an old-style c api
580 ///
581 /// Returns (read, written), if read == 0 then the source did not contain a full block and further calls with the same
582 /// input will not make any progress!
583 ///
584 /// Note that no kind of block can be bigger than 128kb.
585 /// So to be safe use at least 128*1024 (max block content size) + 3 (block_header size) + 18 (max frame_header size) bytes as your source buffer
586 ///
587 /// You may call this function with an empty source after all bytes have been decoded. This is equivalent to just call decoder.read(&mut target)
588 pub fn decode_from_to(
589 &mut self,
590 source: &[u8],
591 target: &mut [u8],
592 ) -> Result<(usize, usize), FrameDecoderError> {
593 use FrameDecoderError as err;
594 let bytes_read_at_start = match &self.state {
595 Some(s) => s.bytes_read_counter,
596 None => 0,
597 };
598
599 if !self.is_finished() || self.state.is_none() {
600 let mut mt_source = source;
601
602 if self.state.is_none() {
603 self.init(&mut mt_source)?;
604 }
605
606 //pseudo block to scope "state" so we can borrow self again after the block
607 {
608 let state = match &mut self.state {
609 Some(s) => s,
610 None => panic!("Bug in library"),
611 };
612 let mut block_dec = decoding::block_decoder::new();
613
614 if state.frame_header.descriptor.content_checksum_flag()
615 && state.frame_finished
616 && state.check_sum.is_none()
617 {
618 //this block is needed if the checksum were the only 4 bytes that were not included in the last decode_from_to call for a frame
619 if mt_source.len() >= 4 {
620 let chksum = mt_source[..4].try_into().expect("optimized away");
621 state.bytes_read_counter += 4;
622 let chksum = u32::from_le_bytes(chksum);
623 state.check_sum = Some(chksum);
624 }
625 return Ok((4, 0));
626 }
627
628 loop {
629 //check if there are enough bytes for the next header
630 if mt_source.len() < 3 {
631 break;
632 }
633 let (block_header, block_header_size) = block_dec
634 .read_block_header(&mut mt_source)
635 .map_err(err::FailedToReadBlockHeader)?;
636
637 // check the needed size for the block before updating counters.
638 // If not enough bytes are in the source, the header will have to be read again, so act like we never read it in the first place
639 if mt_source.len() < block_header.content_size as usize {
640 break;
641 }
642 state.bytes_read_counter += u64::from(block_header_size);
643
644 let bytes_read_in_block_body = block_dec
645 .decode_block_content(
646 &block_header,
647 &mut state.decoder_scratch,
648 &mut mt_source,
649 )
650 .map_err(err::FailedToReadBlockBody)?;
651 state.bytes_read_counter += bytes_read_in_block_body;
652 state.block_counter += 1;
653
654 if block_header.last_block {
655 state.frame_finished = true;
656 if state.frame_header.descriptor.content_checksum_flag() {
657 //if there are enough bytes handle this here. Else the block at the start of this function will handle it at the next call
658 if mt_source.len() >= 4 {
659 let chksum = mt_source[..4].try_into().expect("optimized away");
660 state.bytes_read_counter += 4;
661 let chksum = u32::from_le_bytes(chksum);
662 state.check_sum = Some(chksum);
663 }
664 }
665 break;
666 }
667 }
668 }
669 }
670
671 let result_len = self.read(target).map_err(err::FailedToDrainDecodebuffer)?;
672 let bytes_read_at_end = match &mut self.state {
673 Some(s) => s.bytes_read_counter,
674 None => panic!("Bug in library"),
675 };
676 let read_len = bytes_read_at_end - bytes_read_at_start;
677 Ok((read_len as usize, result_len))
678 }
679
680 /// Decode multiple frames into the output slice.
681 ///
682 /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
683 /// skipped during decode.
684 ///
685 /// `output` must be large enough to hold the decompressed data. If you don't know
686 /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
687 ///
688 /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
689 ///
690 /// Returns the number of bytes written to `output`.
691 pub fn decode_all(
692 &mut self,
693 input: &[u8],
694 output: &mut [u8],
695 ) -> Result<usize, FrameDecoderError> {
696 self.decode_all_impl(input, output, |this, src| this.init(src))
697 }
698
699 /// Decode multiple frames into the output slice using a pre-parsed dictionary handle.
700 ///
701 /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
702 /// skipped during decode.
703 ///
704 /// `output` must be large enough to hold the decompressed data. If you don't know
705 /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
706 ///
707 /// This calls [`FrameDecoder::init_with_dict_handle`], and all bytes currently in the
708 /// decoder will be lost.
709 ///
710 /// # Warning
711 ///
712 /// Each decoded frame is initialized with `dict`, even when a frame header
713 /// omits the optional dictionary ID. Callers must only use this API when
714 /// they already know the input frames were encoded with the provided
715 /// dictionary; otherwise decoded output can be silently corrupted.
716 pub fn decode_all_with_dict_handle(
717 &mut self,
718 input: &[u8],
719 output: &mut [u8],
720 dict: &DictionaryHandle,
721 ) -> Result<usize, FrameDecoderError> {
722 self.decode_all_impl(input, output, |this, src| {
723 this.init_with_dict_handle(src, dict)
724 })
725 }
726
727 fn decode_all_impl(
728 &mut self,
729 mut input: &[u8],
730 mut output: &mut [u8],
731 mut init_frame: impl FnMut(&mut Self, &mut &[u8]) -> Result<(), FrameDecoderError>,
732 ) -> Result<usize, FrameDecoderError> {
733 let mut total_bytes_written = 0;
734 while !input.is_empty() {
735 match init_frame(self, &mut input) {
736 Ok(_) => {}
737 Err(FrameDecoderError::ReadFrameHeaderError(
738 crate::decoding::errors::ReadFrameHeaderError::SkipFrame { length, .. },
739 )) => {
740 input = input
741 .get(length as usize..)
742 .ok_or(FrameDecoderError::FailedToSkipFrame)?;
743 continue;
744 }
745 Err(e) => return Err(e),
746 };
747 loop {
748 self.decode_blocks(&mut input, BlockDecodingStrategy::UptoBytes(1024 * 1024))?;
749 let bytes_written = self
750 .read(output)
751 .map_err(FrameDecoderError::FailedToDrainDecodebuffer)?;
752 output = &mut output[bytes_written..];
753 total_bytes_written += bytes_written;
754 if self.can_collect() != 0 {
755 return Err(FrameDecoderError::TargetTooSmall);
756 }
757 if self.is_finished() {
758 break;
759 }
760 }
761 }
762
763 Ok(total_bytes_written)
764 }
765
766 /// Decode multiple frames into the output slice using a serialized dictionary.
767 ///
768 /// # Warning
769 ///
770 /// Each decoded frame is initialized with the parsed dictionary, even when a
771 /// frame header omits the optional dictionary ID. Callers must only use this
772 /// API when they already know the input frames were encoded with that
773 /// dictionary; otherwise decoded output can be silently corrupted.
774 pub fn decode_all_with_dict_bytes(
775 &mut self,
776 input: &[u8],
777 output: &mut [u8],
778 raw_dictionary: &[u8],
779 ) -> Result<usize, FrameDecoderError> {
780 let dict = DictionaryHandle::decode_dict(raw_dictionary)?;
781 self.decode_all_with_dict_handle(input, output, &dict)
782 }
783
784 /// Decode multiple frames into the extra capacity of the output vector.
785 ///
786 /// `input` must contain an exact number of frames.
787 ///
788 /// `output` must have enough extra capacity to hold the decompressed data.
789 /// This function will not reallocate or grow the vector. If you don't know
790 /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
791 ///
792 /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
793 ///
794 /// The length of the output vector is updated to include the decompressed data.
795 /// The length is not changed if an error occurs.
796 pub fn decode_all_to_vec(
797 &mut self,
798 input: &[u8],
799 output: &mut Vec<u8>,
800 ) -> Result<(), FrameDecoderError> {
801 let len = output.len();
802 let cap = output.capacity();
803 output.resize(cap, 0);
804 match self.decode_all(input, &mut output[len..]) {
805 Ok(bytes_written) => {
806 let new_len = core::cmp::min(len + bytes_written, cap); // Sanitizes `bytes_written`.
807 output.resize(new_len, 0);
808 Ok(())
809 }
810 Err(e) => {
811 output.resize(len, 0);
812 Err(e)
813 }
814 }
815 }
816}
817
818/// Read bytes from the decode_buffer that are no longer needed. While the frame is not yet finished
819/// this will retain window_size bytes, else it will drain it completely
820impl Read for FrameDecoder {
821 fn read(&mut self, target: &mut [u8]) -> Result<usize, Error> {
822 let state = match &mut self.state {
823 None => return Ok(0),
824 Some(s) => s,
825 };
826 if state.frame_finished {
827 state.decoder_scratch.buffer.read_all(target)
828 } else {
829 state.decoder_scratch.buffer.read(target)
830 }
831 }
832}
833
834#[cfg(test)]
835mod tests {
836 extern crate std;
837
838 use super::{DictionaryHandle, FrameDecoder};
839 use crate::encoding::{CompressionLevel, FrameCompressor};
840 use alloc::vec::Vec;
841
842 #[test]
843 fn reset_with_dict_handle_applies_dict_when_no_dict_id() {
844 let payload = b"reset-without-dict-id";
845 let mut compressor = FrameCompressor::new(CompressionLevel::Default);
846 compressor.set_source(payload.as_slice());
847 let mut compressed = Vec::new();
848 compressor.set_drain(&mut compressed);
849 compressor.compress();
850
851 let dict_raw = include_bytes!("../../dict_tests/dictionary");
852 let handle = DictionaryHandle::decode_dict(dict_raw).expect("dictionary should parse");
853
854 let mut decoder = FrameDecoder::new();
855 decoder
856 .reset_with_dict_handle(compressed.as_slice(), &handle)
857 .expect("reset should succeed");
858 let state = decoder.state.as_ref().expect("state should be initialized");
859 assert!(state.frame_header.dictionary_id().is_none());
860 assert_eq!(state.using_dict, Some(handle.id()));
861 }
862}