structured_zstd/decoding/frame_decoder.rs
1//! Framedecoder is the main low-level struct users interact with to decode zstd frames
2//!
3//! Zstandard compressed data is made of one or more frames. Each frame is independent and can be
4//! decompressed independently of other frames. This module contains structures
5//! and utilities that can be used to decode a frame.
6
7use super::frame;
8use crate::decoding;
9use crate::decoding::block_decoder::BlockDecoder;
10use crate::decoding::decode_buffer::DecodeBuffer;
11use crate::decoding::dictionary::{Dictionary, DictionaryHandle};
12use crate::decoding::errors::{DecodeBlockContentError, FrameDecoderError};
13use crate::decoding::flat_buf::FlatBuf;
14use crate::decoding::ringbuffer::RingBuffer;
15use crate::decoding::scratch::DecoderScratch;
16use crate::io::{Error, Read, Write};
17use alloc::collections::BTreeMap;
18use alloc::vec::Vec;
19use core::convert::TryInto;
20
21use crate::common::MAXIMUM_ALLOWED_WINDOW_SIZE;
22
23/// Low level Zstandard decoder that can be used to decompress frames with fine control over when and how many bytes are decoded.
24///
25/// This decoder is able to decode frames only partially and gives control
26/// over how many bytes/blocks will be decoded at a time (so you don't have to decode a 10GB file into memory all at once).
27/// It reads bytes as needed from a provided source and can be read from to collect partial results.
28///
29/// If you want to just read the whole frame with an `io::Read` without having to deal with manually calling [FrameDecoder::decode_blocks]
30/// you can use the provided [crate::decoding::StreamingDecoder] wich wraps this FrameDecoder.
31///
32/// Workflow is as follows:
33/// ```
34/// use structured_zstd::decoding::BlockDecodingStrategy;
35///
36/// # #[cfg(feature = "std")]
37/// use std::io::{Read, Write};
38///
39/// // no_std environments can use the crate's own Read traits
40/// # #[cfg(not(feature = "std"))]
41/// use structured_zstd::io::{Read, Write};
42///
43/// fn decode_this(mut file: impl Read) {
44/// //Create a new decoder
45/// let mut frame_dec = structured_zstd::decoding::FrameDecoder::new();
46/// let mut result = Vec::new();
47///
48/// // Use reset or init to make the decoder ready to decode the frame from the io::Read
49/// frame_dec.reset(&mut file).unwrap();
50///
51/// // Loop until the frame has been decoded completely
52/// while !frame_dec.is_finished() {
53/// // decode (roughly) batch_size many bytes
54/// frame_dec.decode_blocks(&mut file, BlockDecodingStrategy::UptoBytes(1024)).unwrap();
55///
56/// // read from the decoder to collect bytes from the internal buffer
57/// let bytes_read = frame_dec.read(result.as_mut_slice()).unwrap();
58///
59/// // then do something with it
60/// do_something(&result[0..bytes_read]);
61/// }
62///
63/// // handle the last chunk of data
64/// while frame_dec.can_collect() > 0 {
65/// let x = frame_dec.read(result.as_mut_slice()).unwrap();
66///
67/// do_something(&result[0..x]);
68/// }
69/// }
70///
71/// fn do_something(data: &[u8]) {
72/// # #[cfg(feature = "std")]
73/// std::io::stdout().write_all(data).unwrap();
74/// }
75/// ```
76pub struct FrameDecoder {
77 state: Option<FrameDecoderState>,
78 owned_dicts: BTreeMap<u32, Dictionary>,
79 #[cfg(target_has_atomic = "ptr")]
80 shared_dicts: BTreeMap<u32, DictionaryHandle>,
81 #[cfg(not(target_has_atomic = "ptr"))]
82 shared_dicts: (),
83 /// `ZSTD_f_zstd1_magicless` — when true, [`init`] / [`reset`]
84 /// expect frames without the 4-byte magic number prefix.
85 /// Default false (standard zstd format).
86 magicless: bool,
87}
88
89/// Backend-tagged decode scratch — chosen at frame-reset time based
90/// on the parsed `FrameHeader.descriptor.single_segment_flag()` and
91/// kept stable through the lifetime of the frame. The match in each
92/// helper below dispatches **once per call** (e.g. once per block in
93/// `decode_block_content`, once per drain in `drain_to_writer`) —
94/// never inside the hot push/repeat loop, which is fully
95/// monomorphised through the `DecoderScratch<B>` generic.
96enum DecoderScratchKind {
97 Ring(DecoderScratch<RingBuffer>),
98 Flat(DecoderScratch<FlatBuf>),
99}
100
101impl DecoderScratchKind {
102 fn new_ring(window_size: usize) -> Self {
103 let mut s = DecoderScratch::<RingBuffer>::new(window_size);
104 s.buffer.reserve(window_size);
105 Self::Ring(s)
106 }
107
108 /// Construct a flat-backed scratch sized for a single-segment
109 /// frame. `frame_content_size` is the upcoming output size in
110 /// bytes (== `window_size` when the flag is set).
111 fn new_flat(frame_content_size: usize) -> Self {
112 let flat = FlatBuf::with_capacity(frame_content_size);
113 // DecoderScratch's default ctor would discard the pre-sized
114 // FlatBuf — go through from_backend so the buffer carries the
115 // capacity the constructor wants.
116 let mut s = DecoderScratch::<FlatBuf>::new(frame_content_size);
117 s.buffer = DecodeBuffer::from_backend(flat, frame_content_size);
118 Self::Flat(s)
119 }
120
121 /// Reset (or transition between) backends for a new frame.
122 /// Reuses the existing `DecoderScratch` allocations (FSE / HUF
123 /// tables, sequence vec, etc.) when the backend kind is unchanged
124 /// — only the underlying buffer is re-sized for the new frame.
125 /// Building a fresh `DecoderScratch` on every frame would
126 /// re-allocate everything and was measured at +255 % vs ring on
127 /// small frames; reusing it keeps the small-frame cost flat.
128 fn reset(&mut self, frame: &frame::FrameHeader, window_size: usize) {
129 if frame.descriptor.single_segment_flag() {
130 match self {
131 Self::Flat(s) => {
132 s.reset(window_size);
133 // DecodeBuffer::reset clears + reserves
134 // window_size; FlatBuf's reserve grows the
135 // backing Vec if the new FCS is larger than
136 // what's already allocated. No alloc when the
137 // previous flat frame had >= this capacity.
138 }
139 Self::Ring(_) => *self = Self::new_flat(window_size),
140 }
141 } else {
142 match self {
143 Self::Ring(s) => s.reset(window_size),
144 Self::Flat(_) => *self = Self::new_ring(window_size),
145 }
146 }
147 }
148
149 fn init_from_dict(&mut self, dict: &Dictionary) {
150 match self {
151 Self::Ring(s) => s.init_from_dict(dict),
152 Self::Flat(s) => s.init_from_dict(dict),
153 }
154 }
155
156 #[inline]
157 fn buffer_len(&self) -> usize {
158 match self {
159 Self::Ring(s) => s.buffer.len(),
160 Self::Flat(s) => s.buffer.len(),
161 }
162 }
163
164 fn buffer_drain(&mut self) -> Vec<u8> {
165 match self {
166 Self::Ring(s) => s.buffer.drain(),
167 Self::Flat(s) => s.buffer.drain(),
168 }
169 }
170
171 fn buffer_drain_to_window_size(&mut self) -> Option<Vec<u8>> {
172 match self {
173 Self::Ring(s) => s.buffer.drain_to_window_size(),
174 Self::Flat(s) => s.buffer.drain_to_window_size(),
175 }
176 }
177
178 fn buffer_drain_to_writer(&mut self, sink: impl Write) -> Result<usize, Error> {
179 match self {
180 Self::Ring(s) => s.buffer.drain_to_writer(sink),
181 Self::Flat(s) => s.buffer.drain_to_writer(sink),
182 }
183 }
184
185 fn buffer_drain_to_window_size_writer(&mut self, sink: impl Write) -> Result<usize, Error> {
186 match self {
187 Self::Ring(s) => s.buffer.drain_to_window_size_writer(sink),
188 Self::Flat(s) => s.buffer.drain_to_window_size_writer(sink),
189 }
190 }
191
192 fn buffer_can_drain(&self) -> usize {
193 match self {
194 Self::Ring(s) => s.buffer.can_drain(),
195 Self::Flat(s) => s.buffer.can_drain(),
196 }
197 }
198
199 fn buffer_can_drain_to_window_size(&self) -> Option<usize> {
200 match self {
201 Self::Ring(s) => s.buffer.can_drain_to_window_size(),
202 Self::Flat(s) => s.buffer.can_drain_to_window_size(),
203 }
204 }
205
206 fn buffer_read(&mut self, target: &mut [u8]) -> Result<usize, Error> {
207 match self {
208 Self::Ring(s) => s.buffer.read(target),
209 Self::Flat(s) => s.buffer.read(target),
210 }
211 }
212
213 fn buffer_read_all(&mut self, target: &mut [u8]) -> Result<usize, Error> {
214 match self {
215 Self::Ring(s) => s.buffer.read_all(target),
216 Self::Flat(s) => s.buffer.read_all(target),
217 }
218 }
219
220 fn decode_block_content<R: Read>(
221 &mut self,
222 decoder: &mut BlockDecoder,
223 header: &crate::blocks::block::BlockHeader,
224 source: R,
225 ) -> Result<u64, DecodeBlockContentError> {
226 match self {
227 Self::Ring(s) => decoder.decode_block_content(header, s, source),
228 Self::Flat(s) => decoder.decode_block_content(header, s, source),
229 }
230 }
231
232 #[cfg(feature = "hash")]
233 fn hash_finish(&self) -> u64 {
234 use core::hash::Hasher;
235 match self {
236 Self::Ring(s) => s.buffer.hash.finish(),
237 Self::Flat(s) => s.buffer.hash.finish(),
238 }
239 }
240}
241
242struct FrameDecoderState {
243 pub frame_header: frame::FrameHeader,
244 decoder_scratch: DecoderScratchKind,
245 frame_finished: bool,
246 block_counter: usize,
247 bytes_read_counter: u64,
248 check_sum: Option<u32>,
249 using_dict: Option<u32>,
250}
251
252pub enum BlockDecodingStrategy {
253 All,
254 UptoBlocks(usize),
255 UptoBytes(usize),
256}
257
258impl FrameDecoderState {
259 /// Construct a new frame decoder state, reading the frame header
260 /// from `source`. When `magicless` is `true`, the 4-byte magic
261 /// number prefix is NOT consumed (donor `ZSTD_f_zstd1_magicless`).
262 /// Crate-internal — reached only via `FrameDecoder::init` /
263 /// `FrameDecoder::init_with_dict_handle`. Pre-allocates the
264 /// decode buffer to `window_size` so the first block does not
265 /// trigger incremental growth from zero capacity.
266 pub(crate) fn new_with_format(
267 source: impl Read,
268 magicless: bool,
269 ) -> Result<FrameDecoderState, FrameDecoderError> {
270 let (frame, header_size) = frame::read_frame_header_with_format(source, magicless)?;
271 let window_size = frame.window_size()?;
272
273 if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
274 return Err(FrameDecoderError::WindowSizeTooBig {
275 requested: window_size,
276 });
277 }
278
279 let decoder_scratch = if frame.descriptor.single_segment_flag() {
280 DecoderScratchKind::new_flat(window_size as usize)
281 } else {
282 DecoderScratchKind::new_ring(window_size as usize)
283 };
284 Ok(FrameDecoderState {
285 frame_header: frame,
286 frame_finished: false,
287 block_counter: 0,
288 decoder_scratch,
289 bytes_read_counter: u64::from(header_size),
290 check_sum: None,
291 using_dict: None,
292 })
293 }
294
295 /// Reset this state for a new frame read from `source`, reusing
296 /// existing allocations. When `magicless` is `true`, the frame
297 /// header is read WITHOUT expecting a magic-number prefix
298 /// (donor `ZSTD_f_zstd1_magicless`). Crate-internal — reached
299 /// only via `FrameDecoder::reset`.
300 ///
301 /// `DecodeBuffer::reset` reserves `window_size` internally, so
302 /// no additional frame-level reservation is needed here.
303 /// Further buffer growth during decoding is performed on demand
304 /// by the active block path.
305 pub(crate) fn reset_with_format(
306 &mut self,
307 source: impl Read,
308 magicless: bool,
309 ) -> Result<(), FrameDecoderError> {
310 let (frame_header, header_size) = frame::read_frame_header_with_format(source, magicless)?;
311 let window_size = frame_header.window_size()?;
312
313 if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
314 return Err(FrameDecoderError::WindowSizeTooBig {
315 requested: window_size,
316 });
317 }
318
319 self.decoder_scratch
320 .reset(&frame_header, window_size as usize);
321 self.frame_header = frame_header;
322 self.frame_finished = false;
323 self.block_counter = 0;
324 self.bytes_read_counter = u64::from(header_size);
325 self.check_sum = None;
326 self.using_dict = None;
327 Ok(())
328 }
329}
330
331impl Default for FrameDecoder {
332 fn default() -> Self {
333 Self::new()
334 }
335}
336
337impl FrameDecoder {
338 /// This will create a new decoder without allocating anything yet.
339 /// init()/reset() will allocate all needed buffers if it is the first time this decoder is used
340 /// else they just reset these buffers with not further allocations
341 pub fn new() -> FrameDecoder {
342 FrameDecoder {
343 state: None,
344 owned_dicts: BTreeMap::new(),
345 #[cfg(target_has_atomic = "ptr")]
346 shared_dicts: BTreeMap::new(),
347 #[cfg(not(target_has_atomic = "ptr"))]
348 shared_dicts: (),
349 magicless: false,
350 }
351 }
352
353 /// Enable or disable magicless frame format
354 /// (`ZSTD_f_zstd1_magicless`). When set to `true`, subsequent
355 /// [`init`] / [`reset`] calls expect the frame header to begin
356 /// directly with the frame-header descriptor — no 4-byte magic
357 /// number prefix. Default false. Must match the encoder's
358 /// magicless setting; the format is unambiguous only when the
359 /// caller knows it out-of-band.
360 ///
361 /// Note: magicless mode also disables skippable-frame detection.
362 /// The `0x184D2A50..=0x184D2A5F` skippable-frame magic range is
363 /// only recognised when the 4-byte magic prefix is consumed, so
364 /// `decode_all` / `init` / `reset` will treat a skippable frame
365 /// at the head of a magicless stream as a malformed frame header
366 /// (bad descriptor / window-size error) instead of skipping it.
367 /// Mixed-format streams that interleave skippable frames must be
368 /// pre-split by the caller; `set_magicless(true)` is only safe
369 /// when the entire stream is known to be magicless zstd frames.
370 pub fn set_magicless(&mut self, magicless: bool) {
371 self.magicless = magicless;
372 }
373
374 #[cfg(target_has_atomic = "ptr")]
375 fn shared_dict_exists(&self, dict_id: u32) -> bool {
376 self.shared_dicts.contains_key(&dict_id)
377 }
378
379 #[cfg(not(target_has_atomic = "ptr"))]
380 fn shared_dict_exists(&self, _dict_id: u32) -> bool {
381 false
382 }
383
384 fn validate_registered_dictionary(dict: &Dictionary) -> Result<(), FrameDecoderError> {
385 use crate::decoding::errors::DictionaryDecodeError as dict_err;
386
387 if dict.id == 0 {
388 return Err(FrameDecoderError::from(dict_err::ZeroDictionaryId));
389 }
390 if let Some(index) = dict.offset_hist.iter().position(|&rep| rep == 0) {
391 return Err(FrameDecoderError::from(
392 dict_err::ZeroRepeatOffsetInDictionary { index: index as u8 },
393 ));
394 }
395 Ok(())
396 }
397
398 /// init() will allocate all needed buffers if it is the first time this decoder is used
399 /// else they just reset these buffers with not further allocations
400 ///
401 /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
402 ///
403 /// equivalent to reset()
404 pub fn init(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
405 self.reset(source)
406 }
407
408 /// Initialize the decoder for a new frame using a pre-parsed dictionary handle.
409 ///
410 /// If the frame header has a dictionary ID, this validates it against
411 /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
412 ///
413 /// If the header omits the optional dictionary ID, this still applies the
414 /// provided dictionary handle.
415 ///
416 /// # Warning
417 ///
418 /// This method always applies `dict` unless the frame header contains a
419 /// non-matching dictionary ID. Callers must only use this API when they
420 /// already know the frame was encoded with the provided dictionary, even if
421 /// the frame header omits the dictionary ID or encodes an explicit
422 /// dictionary ID of `0`.
423 ///
424 /// Passing a dictionary for a frame that was not encoded with it can
425 /// silently corrupt the decoded output.
426 pub fn init_with_dict_handle(
427 &mut self,
428 source: impl Read,
429 dict: &DictionaryHandle,
430 ) -> Result<(), FrameDecoderError> {
431 self.reset_with_dict_handle(source, dict)
432 }
433
434 /// reset() will allocate all needed buffers if it is the first time this decoder is used
435 /// else they just reset these buffers with not further allocations
436 ///
437 /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
438 ///
439 /// equivalent to init()
440 pub fn reset(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
441 use FrameDecoderError as err;
442 let magicless = self.magicless;
443 let dict_id = match &mut self.state {
444 Some(s) => {
445 s.reset_with_format(source, magicless)?;
446 s.frame_header.dictionary_id()
447 }
448 None => {
449 self.state = Some(FrameDecoderState::new_with_format(source, magicless)?);
450 self.state
451 .as_ref()
452 .and_then(|state| state.frame_header.dictionary_id())
453 }
454 };
455 if let Some(dict_id) = dict_id {
456 let state = self.state.as_mut().expect("state initialized");
457 let owned_dicts = &self.owned_dicts;
458 #[cfg(target_has_atomic = "ptr")]
459 let shared_dicts = &self.shared_dicts;
460 let dict = owned_dicts
461 .get(&dict_id)
462 .or_else(|| {
463 #[cfg(target_has_atomic = "ptr")]
464 {
465 shared_dicts.get(&dict_id).map(DictionaryHandle::as_dict)
466 }
467 #[cfg(not(target_has_atomic = "ptr"))]
468 {
469 None
470 }
471 })
472 .ok_or(err::DictNotProvided { dict_id })?;
473 state.decoder_scratch.init_from_dict(dict);
474 state.using_dict = Some(dict_id);
475 }
476 Ok(())
477 }
478
479 /// Reset this decoder for a new frame using a pre-parsed dictionary handle.
480 ///
481 /// If the frame header has a dictionary ID, this validates it against
482 /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
483 ///
484 /// If the header omits the optional dictionary ID, this still applies the
485 /// provided dictionary handle.
486 ///
487 /// # Warning
488 ///
489 /// This method always applies `dict` unless the frame header contains a
490 /// non-matching dictionary ID. Callers must only use this API when they
491 /// already know the frame was encoded with the provided dictionary, even if
492 /// the frame header omits the dictionary ID or encodes an explicit
493 /// dictionary ID of `0`.
494 ///
495 /// Passing a dictionary for a frame that was not encoded with it can
496 /// silently corrupt the decoded output.
497 pub fn reset_with_dict_handle(
498 &mut self,
499 source: impl Read,
500 dict: &DictionaryHandle,
501 ) -> Result<(), FrameDecoderError> {
502 use FrameDecoderError as err;
503 Self::validate_registered_dictionary(dict.as_dict())?;
504 let magicless = self.magicless;
505 let state = match &mut self.state {
506 Some(s) => {
507 s.reset_with_format(source, magicless)?;
508 s
509 }
510 None => {
511 self.state = Some(FrameDecoderState::new_with_format(source, magicless)?);
512 self.state.as_mut().unwrap()
513 }
514 };
515 if let Some(dict_id) = state.frame_header.dictionary_id()
516 && dict_id != dict.id()
517 {
518 return Err(err::DictIdMismatch {
519 expected: dict_id,
520 provided: dict.id(),
521 });
522 }
523 state.decoder_scratch.init_from_dict(dict.as_dict());
524 state.using_dict = Some(dict.id());
525 Ok(())
526 }
527
528 /// Add a dictionary that can be selected dynamically by frame dictionary ID.
529 ///
530 /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
531 /// registered (either as owned or shared).
532 pub fn add_dict(&mut self, dict: Dictionary) -> Result<(), FrameDecoderError> {
533 Self::validate_registered_dictionary(&dict)?;
534 let dict_id = dict.id;
535 if self.owned_dicts.contains_key(&dict_id) || self.shared_dict_exists(dict_id) {
536 return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
537 }
538 self.owned_dicts.insert(dict_id, dict);
539 Ok(())
540 }
541
542 /// Parse and add a serialized dictionary blob.
543 pub fn add_dict_from_bytes(&mut self, raw_dictionary: &[u8]) -> Result<(), FrameDecoderError> {
544 let dict = Dictionary::decode_dict(raw_dictionary)?;
545 self.add_dict(dict)
546 }
547
548 /// Add a pre-parsed dictionary handle for reuse across decoders.
549 ///
550 /// This API is available on targets with pointer-width atomics
551 /// (`target_has_atomic = "ptr"`).
552 ///
553 /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
554 /// registered (either as owned or shared).
555 #[cfg(target_has_atomic = "ptr")]
556 pub fn add_dict_handle(&mut self, dict: DictionaryHandle) -> Result<(), FrameDecoderError> {
557 Self::validate_registered_dictionary(dict.as_dict())?;
558 let dict_id = dict.id();
559 if self.owned_dicts.contains_key(&dict_id) || self.shared_dicts.contains_key(&dict_id) {
560 return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
561 }
562 self.shared_dicts.insert(dict_id, dict);
563 Ok(())
564 }
565
566 pub fn force_dict(&mut self, dict_id: u32) -> Result<(), FrameDecoderError> {
567 use FrameDecoderError as err;
568 let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
569 let owned_dicts = &self.owned_dicts;
570 #[cfg(target_has_atomic = "ptr")]
571 let shared_dicts = &self.shared_dicts;
572
573 let dict = owned_dicts
574 .get(&dict_id)
575 .or_else(|| {
576 #[cfg(target_has_atomic = "ptr")]
577 {
578 shared_dicts.get(&dict_id).map(DictionaryHandle::as_dict)
579 }
580 #[cfg(not(target_has_atomic = "ptr"))]
581 {
582 None
583 }
584 })
585 .ok_or(err::DictNotProvided { dict_id })?;
586 state.decoder_scratch.init_from_dict(dict);
587 state.using_dict = Some(dict_id);
588
589 Ok(())
590 }
591
592 /// Returns how many bytes the frame contains after decompression
593 pub fn content_size(&self) -> u64 {
594 match &self.state {
595 None => 0,
596 Some(s) => s.frame_header.frame_content_size(),
597 }
598 }
599
600 /// Returns the checksum that was read from the data. Only available after all bytes have been read. It is the last 4 bytes of a zstd-frame
601 pub fn get_checksum_from_data(&self) -> Option<u32> {
602 let state = self.state.as_ref()?;
603
604 state.check_sum
605 }
606
607 /// Returns the checksum that was calculated while decoding.
608 /// Only a sensible value after all decoded bytes have been collected/read from the FrameDecoder
609 #[cfg(feature = "hash")]
610 pub fn get_calculated_checksum(&self) -> Option<u32> {
611 let state = self.state.as_ref()?;
612 let cksum_64bit = state.decoder_scratch.hash_finish();
613 //truncate to lower 32bit because reasons...
614 Some(cksum_64bit as u32)
615 }
616
617 /// Counter for how many bytes have been consumed while decoding the frame
618 pub fn bytes_read_from_source(&self) -> u64 {
619 let state = match &self.state {
620 None => return 0,
621 Some(s) => s,
622 };
623 state.bytes_read_counter
624 }
625
626 /// Whether the current frames last block has been decoded yet
627 /// If this returns true you can call the drain* functions to get all content
628 /// (the read() function will drain automatically if this returns true)
629 pub fn is_finished(&self) -> bool {
630 let state = match &self.state {
631 None => return true,
632 Some(s) => s,
633 };
634 if state.frame_header.descriptor.content_checksum_flag() {
635 state.frame_finished && state.check_sum.is_some()
636 } else {
637 state.frame_finished
638 }
639 }
640
641 /// Counter for how many blocks have already been decoded
642 pub fn blocks_decoded(&self) -> usize {
643 let state = match &self.state {
644 None => return 0,
645 Some(s) => s,
646 };
647 state.block_counter
648 }
649
650 /// Decodes blocks from a reader. It requires that the framedecoder has been initialized first.
651 /// The Strategy influences how many blocks will be decoded before the function returns
652 /// This is important if you want to manage memory consumption carefully. If you don't care
653 /// about that you can just choose the strategy "All" and have all blocks of the frame decoded into the buffer
654 pub fn decode_blocks(
655 &mut self,
656 mut source: impl Read,
657 strat: BlockDecodingStrategy,
658 ) -> Result<bool, FrameDecoderError> {
659 use FrameDecoderError as err;
660 let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
661
662 let mut block_dec = decoding::block_decoder::new();
663
664 let buffer_size_before = state.decoder_scratch.buffer_len();
665 let block_counter_before = state.block_counter;
666 loop {
667 vprintln!("################");
668 vprintln!("Next Block: {}", state.block_counter);
669 vprintln!("################");
670 let (block_header, block_header_size) = block_dec
671 .read_block_header(&mut source)
672 .map_err(err::FailedToReadBlockHeader)?;
673 state.bytes_read_counter += u64::from(block_header_size);
674
675 vprintln!();
676 vprintln!(
677 "Found {} block with size: {}, which will be of size: {}",
678 block_header.block_type,
679 block_header.content_size,
680 block_header.decompressed_size
681 );
682
683 let bytes_read_in_block_body = state
684 .decoder_scratch
685 .decode_block_content(&mut block_dec, &block_header, &mut source)
686 .map_err(err::FailedToReadBlockBody)?;
687 state.bytes_read_counter += bytes_read_in_block_body;
688
689 state.block_counter += 1;
690
691 vprintln!("Output: {}", state.decoder_scratch.buffer_len());
692
693 if block_header.last_block {
694 state.frame_finished = true;
695 if state.frame_header.descriptor.content_checksum_flag() {
696 let mut chksum = [0u8; 4];
697 source
698 .read_exact(&mut chksum)
699 .map_err(err::FailedToReadChecksum)?;
700 state.bytes_read_counter += 4;
701 let chksum = u32::from_le_bytes(chksum);
702 state.check_sum = Some(chksum);
703 }
704 break;
705 }
706
707 match strat {
708 BlockDecodingStrategy::All => { /* keep going */ }
709 BlockDecodingStrategy::UptoBlocks(n) => {
710 if state.block_counter - block_counter_before >= n {
711 break;
712 }
713 }
714 BlockDecodingStrategy::UptoBytes(n) => {
715 if state.decoder_scratch.buffer_len() - buffer_size_before >= n {
716 break;
717 }
718 }
719 }
720 }
721
722 Ok(state.frame_finished)
723 }
724
725 /// Collect bytes and retain window_size bytes while decoding is still going on.
726 /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
727 pub fn collect(&mut self) -> Option<Vec<u8>> {
728 let finished = self.is_finished();
729 let state = self.state.as_mut()?;
730 if finished {
731 Some(state.decoder_scratch.buffer_drain())
732 } else {
733 state.decoder_scratch.buffer_drain_to_window_size()
734 }
735 }
736
737 /// Collect bytes and retain window_size bytes while decoding is still going on.
738 /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
739 pub fn collect_to_writer(&mut self, w: impl Write) -> Result<usize, Error> {
740 let finished = self.is_finished();
741 let state = match &mut self.state {
742 None => return Ok(0),
743 Some(s) => s,
744 };
745 if finished {
746 state.decoder_scratch.buffer_drain_to_writer(w)
747 } else {
748 state.decoder_scratch.buffer_drain_to_window_size_writer(w)
749 }
750 }
751
752 /// How many bytes can currently be collected from the decodebuffer, while decoding is going on this will be lower than the actual decodbuffer size
753 /// because window_size bytes need to be retained for decoding.
754 /// After decoding of the frame (is_finished() == true) has finished it will report all remaining bytes
755 pub fn can_collect(&self) -> usize {
756 let finished = self.is_finished();
757 let state = match &self.state {
758 None => return 0,
759 Some(s) => s,
760 };
761 if finished {
762 state.decoder_scratch.buffer_can_drain()
763 } else {
764 state
765 .decoder_scratch
766 .buffer_can_drain_to_window_size()
767 .unwrap_or(0)
768 }
769 }
770
771 /// Decodes as many blocks as possible from the source slice and reads from the decodebuffer into the target slice
772 /// The source slice may contain only parts of a frame but must contain at least one full block to make progress
773 ///
774 /// By all means use decode_blocks if you have a io.Reader available. This is just for compatibility with other decompressors
775 /// which try to serve an old-style c api
776 ///
777 /// Returns (read, written), if read == 0 then the source did not contain a full block and further calls with the same
778 /// input will not make any progress!
779 ///
780 /// Note that no kind of block can be bigger than 128kb.
781 /// So to be safe use at least 128*1024 (max block content size) + 3 (block_header size) + 18 (max frame_header size) bytes as your source buffer
782 ///
783 /// You may call this function with an empty source after all bytes have been decoded. This is equivalent to just call decoder.read(&mut target)
784 pub fn decode_from_to(
785 &mut self,
786 source: &[u8],
787 target: &mut [u8],
788 ) -> Result<(usize, usize), FrameDecoderError> {
789 use FrameDecoderError as err;
790 let bytes_read_at_start = match &self.state {
791 Some(s) => s.bytes_read_counter,
792 None => 0,
793 };
794
795 if !self.is_finished() || self.state.is_none() {
796 let mut mt_source = source;
797
798 if self.state.is_none() {
799 self.init(&mut mt_source)?;
800 }
801
802 //pseudo block to scope "state" so we can borrow self again after the block
803 {
804 let state = match &mut self.state {
805 Some(s) => s,
806 None => panic!("Bug in library"),
807 };
808 let mut block_dec = decoding::block_decoder::new();
809
810 if state.frame_header.descriptor.content_checksum_flag()
811 && state.frame_finished
812 && state.check_sum.is_none()
813 {
814 //this block is needed if the checksum were the only 4 bytes that were not included in the last decode_from_to call for a frame
815 if mt_source.len() >= 4 {
816 let chksum = mt_source[..4].try_into().expect("optimized away");
817 state.bytes_read_counter += 4;
818 let chksum = u32::from_le_bytes(chksum);
819 state.check_sum = Some(chksum);
820 }
821 return Ok((4, 0));
822 }
823
824 loop {
825 //check if there are enough bytes for the next header
826 if mt_source.len() < 3 {
827 break;
828 }
829 let (block_header, block_header_size) = block_dec
830 .read_block_header(&mut mt_source)
831 .map_err(err::FailedToReadBlockHeader)?;
832
833 // check the needed size for the block before updating counters.
834 // If not enough bytes are in the source, the header will have to be read again, so act like we never read it in the first place
835 if mt_source.len() < block_header.content_size as usize {
836 break;
837 }
838 state.bytes_read_counter += u64::from(block_header_size);
839
840 let bytes_read_in_block_body = state
841 .decoder_scratch
842 .decode_block_content(&mut block_dec, &block_header, &mut mt_source)
843 .map_err(err::FailedToReadBlockBody)?;
844 state.bytes_read_counter += bytes_read_in_block_body;
845 state.block_counter += 1;
846
847 if block_header.last_block {
848 state.frame_finished = true;
849 if state.frame_header.descriptor.content_checksum_flag() {
850 //if there are enough bytes handle this here. Else the block at the start of this function will handle it at the next call
851 if mt_source.len() >= 4 {
852 let chksum = mt_source[..4].try_into().expect("optimized away");
853 state.bytes_read_counter += 4;
854 let chksum = u32::from_le_bytes(chksum);
855 state.check_sum = Some(chksum);
856 }
857 }
858 break;
859 }
860 }
861 }
862 }
863
864 let result_len = self.read(target).map_err(err::FailedToDrainDecodebuffer)?;
865 let bytes_read_at_end = match &mut self.state {
866 Some(s) => s.bytes_read_counter,
867 None => panic!("Bug in library"),
868 };
869 let read_len = bytes_read_at_end - bytes_read_at_start;
870 Ok((read_len as usize, result_len))
871 }
872
873 /// Decode multiple frames into the output slice.
874 ///
875 /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
876 /// skipped during decode.
877 ///
878 /// `output` must be large enough to hold the decompressed data. If you don't know
879 /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
880 ///
881 /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
882 ///
883 /// Returns the number of bytes written to `output`.
884 pub fn decode_all(
885 &mut self,
886 input: &[u8],
887 output: &mut [u8],
888 ) -> Result<usize, FrameDecoderError> {
889 self.decode_all_impl(input, output, |this, src| this.init(src))
890 }
891
892 /// Decode multiple frames into the output slice using a pre-parsed dictionary handle.
893 ///
894 /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
895 /// skipped during decode.
896 ///
897 /// `output` must be large enough to hold the decompressed data. If you don't know
898 /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
899 ///
900 /// This calls [`FrameDecoder::init_with_dict_handle`], and all bytes currently in the
901 /// decoder will be lost.
902 ///
903 /// # Warning
904 ///
905 /// Each decoded frame is initialized with `dict`, even when a frame header
906 /// omits the optional dictionary ID. Callers must only use this API when
907 /// they already know the input frames were encoded with the provided
908 /// dictionary; otherwise decoded output can be silently corrupted.
909 pub fn decode_all_with_dict_handle(
910 &mut self,
911 input: &[u8],
912 output: &mut [u8],
913 dict: &DictionaryHandle,
914 ) -> Result<usize, FrameDecoderError> {
915 self.decode_all_impl(input, output, |this, src| {
916 this.init_with_dict_handle(src, dict)
917 })
918 }
919
920 fn decode_all_impl(
921 &mut self,
922 mut input: &[u8],
923 mut output: &mut [u8],
924 mut init_frame: impl FnMut(&mut Self, &mut &[u8]) -> Result<(), FrameDecoderError>,
925 ) -> Result<usize, FrameDecoderError> {
926 let mut total_bytes_written = 0;
927 while !input.is_empty() {
928 match init_frame(self, &mut input) {
929 Ok(_) => {}
930 Err(FrameDecoderError::ReadFrameHeaderError(
931 crate::decoding::errors::ReadFrameHeaderError::SkipFrame { length, .. },
932 )) => {
933 input = input
934 .get(length as usize..)
935 .ok_or(FrameDecoderError::FailedToSkipFrame)?;
936 continue;
937 }
938 Err(e) => return Err(e),
939 };
940 loop {
941 self.decode_blocks(&mut input, BlockDecodingStrategy::UptoBytes(1024 * 1024))?;
942 let bytes_written = self
943 .read(output)
944 .map_err(FrameDecoderError::FailedToDrainDecodebuffer)?;
945 output = &mut output[bytes_written..];
946 total_bytes_written += bytes_written;
947 if self.can_collect() != 0 {
948 return Err(FrameDecoderError::TargetTooSmall);
949 }
950 if self.is_finished() {
951 break;
952 }
953 }
954 }
955
956 Ok(total_bytes_written)
957 }
958
959 /// Decode multiple frames into the output slice using a serialized dictionary.
960 ///
961 /// # Warning
962 ///
963 /// Each decoded frame is initialized with the parsed dictionary, even when a
964 /// frame header omits the optional dictionary ID. Callers must only use this
965 /// API when they already know the input frames were encoded with that
966 /// dictionary; otherwise decoded output can be silently corrupted.
967 pub fn decode_all_with_dict_bytes(
968 &mut self,
969 input: &[u8],
970 output: &mut [u8],
971 raw_dictionary: &[u8],
972 ) -> Result<usize, FrameDecoderError> {
973 let dict = DictionaryHandle::decode_dict(raw_dictionary)?;
974 self.decode_all_with_dict_handle(input, output, &dict)
975 }
976
977 /// Decode multiple frames into the extra capacity of the output vector.
978 ///
979 /// `input` must contain an exact number of frames.
980 ///
981 /// `output` must have enough extra capacity to hold the decompressed data.
982 /// This function will not reallocate or grow the vector. If you don't know
983 /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
984 ///
985 /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
986 ///
987 /// The length of the output vector is updated to include the decompressed data.
988 /// The length is not changed if an error occurs.
989 pub fn decode_all_to_vec(
990 &mut self,
991 input: &[u8],
992 output: &mut Vec<u8>,
993 ) -> Result<(), FrameDecoderError> {
994 let len = output.len();
995 let cap = output.capacity();
996 output.resize(cap, 0);
997 match self.decode_all(input, &mut output[len..]) {
998 Ok(bytes_written) => {
999 let new_len = core::cmp::min(len + bytes_written, cap); // Sanitizes `bytes_written`.
1000 output.resize(new_len, 0);
1001 Ok(())
1002 }
1003 Err(e) => {
1004 output.resize(len, 0);
1005 Err(e)
1006 }
1007 }
1008 }
1009}
1010
1011/// Read bytes from the decode_buffer that are no longer needed. While the frame is not yet finished
1012/// this will retain window_size bytes, else it will drain it completely
1013impl Read for FrameDecoder {
1014 fn read(&mut self, target: &mut [u8]) -> Result<usize, Error> {
1015 let state = match &mut self.state {
1016 None => return Ok(0),
1017 Some(s) => s,
1018 };
1019 if state.frame_finished {
1020 state.decoder_scratch.buffer_read_all(target)
1021 } else {
1022 state.decoder_scratch.buffer_read(target)
1023 }
1024 }
1025}
1026
1027#[cfg(test)]
1028mod tests {
1029 extern crate std;
1030
1031 use super::{DictionaryHandle, FrameDecoder};
1032 use crate::encoding::{CompressionLevel, FrameCompressor};
1033 use alloc::vec::Vec;
1034
1035 #[test]
1036 fn reset_with_dict_handle_applies_dict_when_no_dict_id() {
1037 let payload = b"reset-without-dict-id";
1038 let mut compressor = FrameCompressor::new(CompressionLevel::Default);
1039 compressor.set_source(payload.as_slice());
1040 let mut compressed = Vec::new();
1041 compressor.set_drain(&mut compressed);
1042 compressor.compress();
1043
1044 let dict_raw = include_bytes!("../../dict_tests/dictionary");
1045 let handle = DictionaryHandle::decode_dict(dict_raw).expect("dictionary should parse");
1046
1047 let mut decoder = FrameDecoder::new();
1048 decoder
1049 .reset_with_dict_handle(compressed.as_slice(), &handle)
1050 .expect("reset should succeed");
1051 let state = decoder.state.as_ref().expect("state should be initialized");
1052 assert!(state.frame_header.dictionary_id().is_none());
1053 assert_eq!(state.using_dict, Some(handle.id()));
1054 }
1055}