Skip to main content

structured_zstd/decoding/
frame_decoder.rs

1//! Framedecoder is the main low-level struct users interact with to decode zstd frames
2//!
3//! Zstandard compressed data is made of one or more frames. Each frame is independent and can be
4//! decompressed independently of other frames. This module contains structures
5//! and utilities that can be used to decode a frame.
6
7use super::frame;
8use crate::decoding;
9use crate::decoding::block_decoder::BlockDecoder;
10use crate::decoding::decode_buffer::DecodeBuffer;
11use crate::decoding::dictionary::{Dictionary, DictionaryHandle};
12use crate::decoding::errors::{DecodeBlockContentError, FrameDecoderError};
13use crate::decoding::flat_buf::FlatBuf;
14use crate::decoding::ringbuffer::RingBuffer;
15use crate::decoding::scratch::DecoderScratch;
16use crate::io::{Error, Read, Write};
17use alloc::collections::BTreeMap;
18use alloc::vec::Vec;
19use core::convert::TryInto;
20
21use crate::common::MAXIMUM_ALLOWED_WINDOW_SIZE;
22
23/// Low level Zstandard decoder that can be used to decompress frames with fine control over when and how many bytes are decoded.
24///
25/// This decoder is able to decode frames only partially and gives control
26/// over how many bytes/blocks will be decoded at a time (so you don't have to decode a 10GB file into memory all at once).
27/// It reads bytes as needed from a provided source and can be read from to collect partial results.
28///
29/// If you want to just read the whole frame with an `io::Read` without having to deal with manually calling [FrameDecoder::decode_blocks]
30/// you can use the provided [crate::decoding::StreamingDecoder] wich wraps this FrameDecoder.
31///
32/// Workflow is as follows:
33/// ```
34/// use structured_zstd::decoding::BlockDecodingStrategy;
35///
36/// # #[cfg(feature = "std")]
37/// use std::io::{Read, Write};
38///
39/// // no_std environments can use the crate's own Read traits
40/// # #[cfg(not(feature = "std"))]
41/// use structured_zstd::io::{Read, Write};
42///
43/// fn decode_this(mut file: impl Read) {
44///     //Create a new decoder
45///     let mut frame_dec = structured_zstd::decoding::FrameDecoder::new();
46///     let mut result = Vec::new();
47///
48///     // Use reset or init to make the decoder ready to decode the frame from the io::Read
49///     frame_dec.reset(&mut file).unwrap();
50///
51///     // Loop until the frame has been decoded completely
52///     while !frame_dec.is_finished() {
53///         // decode (roughly) batch_size many bytes
54///         frame_dec.decode_blocks(&mut file, BlockDecodingStrategy::UptoBytes(1024)).unwrap();
55///
56///         // read from the decoder to collect bytes from the internal buffer
57///         let bytes_read = frame_dec.read(result.as_mut_slice()).unwrap();
58///
59///         // then do something with it
60///         do_something(&result[0..bytes_read]);
61///     }
62///
63///     // handle the last chunk of data
64///     while frame_dec.can_collect() > 0 {
65///         let x = frame_dec.read(result.as_mut_slice()).unwrap();
66///
67///         do_something(&result[0..x]);
68///     }
69/// }
70///
71/// fn do_something(data: &[u8]) {
72/// # #[cfg(feature = "std")]
73///     std::io::stdout().write_all(data).unwrap();
74/// }
75/// ```
76pub struct FrameDecoder {
77    state: Option<FrameDecoderState>,
78    owned_dicts: BTreeMap<u32, Dictionary>,
79    #[cfg(target_has_atomic = "ptr")]
80    shared_dicts: BTreeMap<u32, DictionaryHandle>,
81    #[cfg(not(target_has_atomic = "ptr"))]
82    shared_dicts: (),
83    /// `ZSTD_f_zstd1_magicless` — when true, [`init`] / [`reset`]
84    /// expect frames without the 4-byte magic number prefix.
85    /// Default false (standard zstd format).
86    magicless: bool,
87    /// Pinned `Dictionary_ID` expectation set via
88    /// [`Self::expect_dict_id`]. `None` (default) disables the
89    /// check; `Some(0)` matches frames whose header omits the
90    /// optional dict_id (treated as "no dictionary"). Validated in
91    /// [`Self::reset`] AFTER the frame header parses successfully
92    /// and BEFORE any block decode work.
93    #[cfg(feature = "lsm")]
94    expect_dict_id: Option<u32>,
95    /// Pinned `Window_Descriptor` byte expectation set via
96    /// [`Self::expect_window_descriptor`]. `None` (default)
97    /// disables the check. Validated in [`Self::reset`] AFTER the
98    /// frame header parses successfully and BEFORE any block
99    /// decode work. Single-segment frames (which omit the
100    /// `Window_Descriptor` byte from the wire) surface as
101    /// [`crate::decoding::errors::FrameDecoderError::UnexpectedWindowDescriptor`]
102    /// with `found: None`.
103    #[cfg(feature = "lsm")]
104    expect_window_descriptor: Option<u8>,
105}
106
107/// Backend-tagged decode scratch — chosen at frame-reset time based
108/// on the parsed `FrameHeader.descriptor.single_segment_flag()` and
109/// kept stable through the lifetime of the frame. The match in each
110/// helper below dispatches **once per call** (e.g. once per block in
111/// `decode_block_content`, once per drain in `drain_to_writer`) —
112/// never inside the hot push/repeat loop, which is fully
113/// monomorphised through the `DecoderScratch<B>` generic.
114enum DecoderScratchKind {
115    Ring(DecoderScratch<RingBuffer>),
116    Flat(DecoderScratch<FlatBuf>),
117}
118
119impl DecoderScratchKind {
120    fn new_ring(window_size: usize) -> Self {
121        let mut s = DecoderScratch::<RingBuffer>::new(window_size);
122        s.buffer.reserve(window_size);
123        Self::Ring(s)
124    }
125
126    /// Construct a flat-backed scratch sized for a single-segment
127    /// frame. `frame_content_size` is the upcoming output size in
128    /// bytes (== `window_size` when the flag is set).
129    fn new_flat(frame_content_size: usize) -> Self {
130        let flat = FlatBuf::with_capacity(frame_content_size);
131        // DecoderScratch's default ctor would discard the pre-sized
132        // FlatBuf — go through from_backend so the buffer carries the
133        // capacity the constructor wants.
134        let mut s = DecoderScratch::<FlatBuf>::new(frame_content_size);
135        s.buffer = DecodeBuffer::from_backend(flat, frame_content_size);
136        Self::Flat(s)
137    }
138
139    /// Reset (or transition between) backends for a new frame.
140    /// Reuses the existing `DecoderScratch` allocations (FSE / HUF
141    /// tables, sequence vec, etc.) when the backend kind is unchanged
142    /// — only the underlying buffer is re-sized for the new frame.
143    /// Building a fresh `DecoderScratch` on every frame would
144    /// re-allocate everything and was measured at +255 % vs ring on
145    /// small frames; reusing it keeps the small-frame cost flat.
146    fn reset(&mut self, frame: &frame::FrameHeader, window_size: usize) {
147        if frame.descriptor.single_segment_flag() {
148            match self {
149                Self::Flat(s) => {
150                    s.reset(window_size);
151                    // DecodeBuffer::reset clears + reserves
152                    // window_size; FlatBuf's reserve grows the
153                    // backing Vec if the new FCS is larger than
154                    // what's already allocated. No alloc when the
155                    // previous flat frame had >= this capacity.
156                }
157                Self::Ring(_) => *self = Self::new_flat(window_size),
158            }
159        } else {
160            match self {
161                Self::Ring(s) => s.reset(window_size),
162                Self::Flat(_) => *self = Self::new_ring(window_size),
163            }
164        }
165    }
166
167    fn init_from_dict(&mut self, dict: &Dictionary) {
168        match self {
169            Self::Ring(s) => s.init_from_dict(dict),
170            Self::Flat(s) => s.init_from_dict(dict),
171        }
172    }
173
174    #[inline]
175    fn buffer_len(&self) -> usize {
176        match self {
177            Self::Ring(s) => s.buffer.len(),
178            Self::Flat(s) => s.buffer.len(),
179        }
180    }
181
182    fn buffer_drain(&mut self) -> Vec<u8> {
183        match self {
184            Self::Ring(s) => s.buffer.drain(),
185            Self::Flat(s) => s.buffer.drain(),
186        }
187    }
188
189    fn buffer_drain_to_window_size(&mut self) -> Option<Vec<u8>> {
190        match self {
191            Self::Ring(s) => s.buffer.drain_to_window_size(),
192            Self::Flat(s) => s.buffer.drain_to_window_size(),
193        }
194    }
195
196    fn buffer_drain_to_writer(&mut self, sink: impl Write) -> Result<usize, Error> {
197        match self {
198            Self::Ring(s) => s.buffer.drain_to_writer(sink),
199            Self::Flat(s) => s.buffer.drain_to_writer(sink),
200        }
201    }
202
203    fn buffer_drain_to_window_size_writer(&mut self, sink: impl Write) -> Result<usize, Error> {
204        match self {
205            Self::Ring(s) => s.buffer.drain_to_window_size_writer(sink),
206            Self::Flat(s) => s.buffer.drain_to_window_size_writer(sink),
207        }
208    }
209
210    fn buffer_can_drain(&self) -> usize {
211        match self {
212            Self::Ring(s) => s.buffer.can_drain(),
213            Self::Flat(s) => s.buffer.can_drain(),
214        }
215    }
216
217    fn buffer_can_drain_to_window_size(&self) -> Option<usize> {
218        match self {
219            Self::Ring(s) => s.buffer.can_drain_to_window_size(),
220            Self::Flat(s) => s.buffer.can_drain_to_window_size(),
221        }
222    }
223
224    fn buffer_read(&mut self, target: &mut [u8]) -> Result<usize, Error> {
225        match self {
226            Self::Ring(s) => s.buffer.read(target),
227            Self::Flat(s) => s.buffer.read(target),
228        }
229    }
230
231    fn buffer_read_all(&mut self, target: &mut [u8]) -> Result<usize, Error> {
232        match self {
233            Self::Ring(s) => s.buffer.read_all(target),
234            Self::Flat(s) => s.buffer.read_all(target),
235        }
236    }
237
238    fn decode_block_content<R: Read>(
239        &mut self,
240        decoder: &mut BlockDecoder,
241        header: &crate::blocks::block::BlockHeader,
242        source: R,
243    ) -> Result<u64, DecodeBlockContentError> {
244        match self {
245            Self::Ring(s) => decoder.decode_block_content(header, s, source),
246            Self::Flat(s) => decoder.decode_block_content(header, s, source),
247        }
248    }
249
250    #[cfg(feature = "hash")]
251    fn hash_finish(&self) -> u64 {
252        use core::hash::Hasher;
253        match self {
254            Self::Ring(s) => s.buffer.hash.finish(),
255            Self::Flat(s) => s.buffer.hash.finish(),
256        }
257    }
258}
259
260struct FrameDecoderState {
261    pub frame_header: frame::FrameHeader,
262    decoder_scratch: DecoderScratchKind,
263    frame_finished: bool,
264    block_counter: usize,
265    bytes_read_counter: u64,
266    check_sum: Option<u32>,
267    using_dict: Option<u32>,
268}
269
270pub enum BlockDecodingStrategy {
271    All,
272    UptoBlocks(usize),
273    UptoBytes(usize),
274}
275
276impl FrameDecoderState {
277    /// Construct a new frame decoder state, reading the frame header
278    /// from `source`. When `magicless` is `true`, the 4-byte magic
279    /// number prefix is NOT consumed (donor `ZSTD_f_zstd1_magicless`).
280    /// Crate-internal — reached only via `FrameDecoder::init` /
281    /// `FrameDecoder::init_with_dict_handle`. Pre-allocates the
282    /// decode buffer to `window_size` so the first block does not
283    /// trigger incremental growth from zero capacity.
284    pub(crate) fn new_with_format(
285        source: impl Read,
286        magicless: bool,
287    ) -> Result<FrameDecoderState, FrameDecoderError> {
288        let (frame, header_size) = frame::read_frame_header_with_format(source, magicless)?;
289        let window_size = frame.window_size()?;
290
291        if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
292            return Err(FrameDecoderError::WindowSizeTooBig {
293                requested: window_size,
294            });
295        }
296
297        let decoder_scratch = if frame.descriptor.single_segment_flag() {
298            DecoderScratchKind::new_flat(window_size as usize)
299        } else {
300            DecoderScratchKind::new_ring(window_size as usize)
301        };
302        Ok(FrameDecoderState {
303            frame_header: frame,
304            frame_finished: false,
305            block_counter: 0,
306            decoder_scratch,
307            bytes_read_counter: u64::from(header_size),
308            check_sum: None,
309            using_dict: None,
310        })
311    }
312
313    /// Reset this state for a new frame read from `source`, reusing
314    /// existing allocations. When `magicless` is `true`, the frame
315    /// header is read WITHOUT expecting a magic-number prefix
316    /// (donor `ZSTD_f_zstd1_magicless`). Crate-internal — reached
317    /// only via `FrameDecoder::reset`.
318    ///
319    /// `DecodeBuffer::reset` reserves `window_size` internally, so
320    /// no additional frame-level reservation is needed here.
321    /// Further buffer growth during decoding is performed on demand
322    /// by the active block path.
323    pub(crate) fn reset_with_format(
324        &mut self,
325        source: impl Read,
326        magicless: bool,
327    ) -> Result<(), FrameDecoderError> {
328        let (frame_header, header_size) = frame::read_frame_header_with_format(source, magicless)?;
329        let window_size = frame_header.window_size()?;
330
331        if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
332            return Err(FrameDecoderError::WindowSizeTooBig {
333                requested: window_size,
334            });
335        }
336
337        self.decoder_scratch
338            .reset(&frame_header, window_size as usize);
339        self.frame_header = frame_header;
340        self.frame_finished = false;
341        self.block_counter = 0;
342        self.bytes_read_counter = u64::from(header_size);
343        self.check_sum = None;
344        self.using_dict = None;
345        Ok(())
346    }
347}
348
349impl Default for FrameDecoder {
350    fn default() -> Self {
351        Self::new()
352    }
353}
354
355impl FrameDecoder {
356    /// This will create a new decoder without allocating anything yet.
357    /// init()/reset() will allocate all needed buffers if it is the first time this decoder is used
358    /// else they just reset these buffers with not further allocations
359    pub fn new() -> FrameDecoder {
360        FrameDecoder {
361            state: None,
362            owned_dicts: BTreeMap::new(),
363            #[cfg(target_has_atomic = "ptr")]
364            shared_dicts: BTreeMap::new(),
365            #[cfg(not(target_has_atomic = "ptr"))]
366            shared_dicts: (),
367            magicless: false,
368            #[cfg(feature = "lsm")]
369            expect_dict_id: None,
370            #[cfg(feature = "lsm")]
371            expect_window_descriptor: None,
372        }
373    }
374
375    /// Pin the expected `Dictionary_ID` for the next frame.
376    ///
377    /// When `expected` is set, [`Self::init`] / [`Self::reset`]
378    /// validate it against the parsed frame header BEFORE any
379    /// block decode work runs. A mismatch returns
380    /// [`crate::decoding::errors::FrameDecoderError::UnexpectedDictId`]
381    /// before any block decode and before any output is produced.
382    /// Scratch buffer allocation / reservation for the decode
383    /// pipeline happens during frame-header parsing, which is
384    /// already complete when this validation fires — the cost of
385    /// scratch sizing is paid even on a mismatched header. The
386    /// guarantee is "no block decode, no XXH64 init, no partial
387    /// output", not "zero allocation".
388    ///
389    /// `Some(0)` is treated as "no dictionary expected": a frame
390    /// whose header omits the optional `Dictionary_ID` field
391    /// (flag value 0) passes the check; a frame that carries an
392    /// explicit non-zero id fails.
393    ///
394    /// `None` (default) disables the check.
395    ///
396    /// Primary use case: post-AEAD-decrypt sanity check in
397    /// wire-format consumers (e.g. lsm-tree's encrypted block
398    /// format pins the `dict_id` baked into the AAD against the
399    /// inner zstd frame's `dict_id` to defeat dict-substitution
400    /// attacks).
401    ///
402    /// NOT a replacement for AEAD authentication. NOT the same
403    /// semantic as donor `ZSTD_d_windowLogMax` (which is a
404    /// ceiling-style limit, separate concern).
405    #[cfg(feature = "lsm")]
406    #[cfg_attr(docsrs, doc(cfg(feature = "lsm")))]
407    pub fn expect_dict_id(&mut self, expected: Option<u32>) {
408        self.expect_dict_id = expected;
409    }
410
411    /// Pin the expected raw `Window_Descriptor` byte (RFC 8878
412    /// §3.1.1.1.2 layout: `(exp << 3) | mantissa`) for the next
413    /// frame.
414    ///
415    /// When `expected` is set, [`Self::init`] / [`Self::reset`]
416    /// validate it against the parsed frame header BEFORE any
417    /// block decode work runs. A mismatch returns
418    /// [`crate::decoding::errors::FrameDecoderError::UnexpectedWindowDescriptor`].
419    ///
420    /// Single-segment frames omit the `Window_Descriptor` byte
421    /// from the wire entirely. Setting an expectation while
422    /// receiving a single-segment frame fails the check with
423    /// `found: None` — there is no on-wire byte to match against,
424    /// which is reported explicitly rather than silently passing.
425    ///
426    /// `None` (default) disables the check.
427    ///
428    /// Byte-exact equality, NOT a ceiling. Donor
429    /// `ZSTD_d_windowLogMax` is a separate ceiling-style limit
430    /// available through the C FFI surface; this method is for
431    /// strict equality validation against a pinned expectation
432    /// (e.g. lsm-tree's wire format pins the window descriptor
433    /// from the AAD to defeat decompression-bomb-swap attacks).
434    #[cfg(feature = "lsm")]
435    #[cfg_attr(docsrs, doc(cfg(feature = "lsm")))]
436    pub fn expect_window_descriptor(&mut self, expected: Option<u8>) {
437        self.expect_window_descriptor = expected;
438    }
439
440    /// Validate the just-parsed frame header against any pinned
441    /// expectations set via [`Self::expect_dict_id`] /
442    /// [`Self::expect_window_descriptor`].
443    ///
444    /// Returns the typed error variant on mismatch and leaves
445    /// `self.state` in a re-resettable shape — a subsequent
446    /// `reset()` will overwrite `frame_header` from the new source
447    /// without needing intermediate cleanup.
448    #[cfg(feature = "lsm")]
449    fn validate_expectations(
450        &self,
451        frame_header: &frame::FrameHeader,
452    ) -> Result<(), FrameDecoderError> {
453        if let Some(expected) = self.expect_dict_id {
454            let found = frame_header.dictionary_id();
455            // `Some(0)` is the "no dictionary expected" sentinel —
456            // matches a frame whose header omits the optional
457            // dict_id field (which is reported as `None` by the
458            // parser). All other values must match exactly.
459            let matches = match (expected, found) {
460                (0, None) => true,
461                (e, Some(f)) => e == f,
462                _ => false,
463            };
464            if !matches {
465                return Err(FrameDecoderError::UnexpectedDictId {
466                    expected: Some(expected),
467                    found,
468                });
469            }
470        }
471        if let Some(expected) = self.expect_window_descriptor {
472            let found = frame_header.window_descriptor();
473            if found != Some(expected) {
474                return Err(FrameDecoderError::UnexpectedWindowDescriptor { expected, found });
475            }
476        }
477        Ok(())
478    }
479
480    /// Enable or disable magicless frame format
481    /// (`ZSTD_f_zstd1_magicless`). When set to `true`, subsequent
482    /// [`init`] / [`reset`] calls expect the frame header to begin
483    /// directly with the frame-header descriptor — no 4-byte magic
484    /// number prefix. Default false. Must match the encoder's
485    /// magicless setting; the format is unambiguous only when the
486    /// caller knows it out-of-band.
487    ///
488    /// Note: magicless mode also disables skippable-frame detection.
489    /// The `0x184D2A50..=0x184D2A5F` skippable-frame magic range is
490    /// only recognised when the 4-byte magic prefix is consumed, so
491    /// `decode_all` / `init` / `reset` will treat a skippable frame
492    /// at the head of a magicless stream as a malformed frame header
493    /// (bad descriptor / window-size error) instead of skipping it.
494    /// Mixed-format streams that interleave skippable frames must be
495    /// pre-split by the caller; `set_magicless(true)` is only safe
496    /// when the entire stream is known to be magicless zstd frames.
497    pub fn set_magicless(&mut self, magicless: bool) {
498        self.magicless = magicless;
499    }
500
501    #[cfg(target_has_atomic = "ptr")]
502    fn shared_dict_exists(&self, dict_id: u32) -> bool {
503        self.shared_dicts.contains_key(&dict_id)
504    }
505
506    #[cfg(not(target_has_atomic = "ptr"))]
507    fn shared_dict_exists(&self, _dict_id: u32) -> bool {
508        false
509    }
510
511    fn validate_registered_dictionary(dict: &Dictionary) -> Result<(), FrameDecoderError> {
512        use crate::decoding::errors::DictionaryDecodeError as dict_err;
513
514        if dict.id == 0 {
515            return Err(FrameDecoderError::from(dict_err::ZeroDictionaryId));
516        }
517        if let Some(index) = dict.offset_hist.iter().position(|&rep| rep == 0) {
518            return Err(FrameDecoderError::from(
519                dict_err::ZeroRepeatOffsetInDictionary { index: index as u8 },
520            ));
521        }
522        Ok(())
523    }
524
525    /// init() will allocate all needed buffers if it is the first time this decoder is used
526    /// else they just reset these buffers with not further allocations
527    ///
528    /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
529    ///
530    /// equivalent to reset()
531    pub fn init(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
532        self.reset(source)
533    }
534
535    /// Initialize the decoder for a new frame using a pre-parsed dictionary handle.
536    ///
537    /// If the frame header has a dictionary ID, this validates it against
538    /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
539    ///
540    /// If the header omits the optional dictionary ID, this still applies the
541    /// provided dictionary handle.
542    ///
543    /// # Warning
544    ///
545    /// This method always applies `dict` unless the frame header contains a
546    /// non-matching dictionary ID. Callers must only use this API when they
547    /// already know the frame was encoded with the provided dictionary, even if
548    /// the frame header omits the dictionary ID or encodes an explicit
549    /// dictionary ID of `0`.
550    ///
551    /// Passing a dictionary for a frame that was not encoded with it can
552    /// silently corrupt the decoded output.
553    pub fn init_with_dict_handle(
554        &mut self,
555        source: impl Read,
556        dict: &DictionaryHandle,
557    ) -> Result<(), FrameDecoderError> {
558        self.reset_with_dict_handle(source, dict)
559    }
560
561    /// reset() will allocate all needed buffers if it is the first time this decoder is used
562    /// else they just reset these buffers with not further allocations
563    ///
564    /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
565    ///
566    /// equivalent to init()
567    pub fn reset(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
568        use FrameDecoderError as err;
569        let magicless = self.magicless;
570        let dict_id = match &mut self.state {
571            Some(s) => {
572                s.reset_with_format(source, magicless)?;
573                s.frame_header.dictionary_id()
574            }
575            None => {
576                self.state = Some(FrameDecoderState::new_with_format(source, magicless)?);
577                self.state
578                    .as_ref()
579                    .and_then(|state| state.frame_header.dictionary_id())
580            }
581        };
582        // Validate any pinned expectations BEFORE block decode work
583        // runs. Catches dict_id substitution / window-descriptor
584        // tampering on inputs already authenticated by an outer
585        // layer (e.g. AEAD). Returning here leaves `self.state` in
586        // a re-resettable shape — next `reset()` re-parses the
587        // frame header without intermediate cleanup.
588        #[cfg(feature = "lsm")]
589        if let Some(state) = self.state.as_ref() {
590            self.validate_expectations(&state.frame_header)?;
591        }
592        if let Some(dict_id) = dict_id {
593            let state = self.state.as_mut().expect("state initialized");
594            let owned_dicts = &self.owned_dicts;
595            #[cfg(target_has_atomic = "ptr")]
596            let shared_dicts = &self.shared_dicts;
597            let dict = owned_dicts
598                .get(&dict_id)
599                .or_else(|| {
600                    #[cfg(target_has_atomic = "ptr")]
601                    {
602                        shared_dicts.get(&dict_id).map(DictionaryHandle::as_dict)
603                    }
604                    #[cfg(not(target_has_atomic = "ptr"))]
605                    {
606                        None
607                    }
608                })
609                .ok_or(err::DictNotProvided { dict_id })?;
610            state.decoder_scratch.init_from_dict(dict);
611            state.using_dict = Some(dict_id);
612        }
613        Ok(())
614    }
615
616    /// Reset this decoder for a new frame using a pre-parsed dictionary handle.
617    ///
618    /// If the frame header has a dictionary ID, this validates it against
619    /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
620    ///
621    /// If the header omits the optional dictionary ID, this still applies the
622    /// provided dictionary handle.
623    ///
624    /// # Warning
625    ///
626    /// This method always applies `dict` unless the frame header contains a
627    /// non-matching dictionary ID. Callers must only use this API when they
628    /// already know the frame was encoded with the provided dictionary, even if
629    /// the frame header omits the dictionary ID or encodes an explicit
630    /// dictionary ID of `0`.
631    ///
632    /// Passing a dictionary for a frame that was not encoded with it can
633    /// silently corrupt the decoded output.
634    pub fn reset_with_dict_handle(
635        &mut self,
636        source: impl Read,
637        dict: &DictionaryHandle,
638    ) -> Result<(), FrameDecoderError> {
639        use FrameDecoderError as err;
640        Self::validate_registered_dictionary(dict.as_dict())?;
641        let magicless = self.magicless;
642        // Scope the &mut borrow of `self.state` to the header parse
643        // alone, so the subsequent `validate_expectations(&self, ...)`
644        // call below can take a fresh shared borrow of self without
645        // tripping the borrow checker.
646        match &mut self.state {
647            Some(s) => s.reset_with_format(source, magicless)?,
648            None => {
649                self.state = Some(FrameDecoderState::new_with_format(source, magicless)?);
650            }
651        }
652        // Single source of truth: route through the same
653        // `validate_expectations` used by `reset()`. Routing through
654        // the helper keeps the two code paths from drifting (e.g.,
655        // if expect-semantics or error wiring changes later).
656        #[cfg(feature = "lsm")]
657        {
658            let header = &self
659                .state
660                .as_ref()
661                .expect("state populated by reset_with_format/new_with_format")
662                .frame_header;
663            self.validate_expectations(header)?;
664        }
665        let state = self
666            .state
667            .as_mut()
668            .expect("state populated by reset_with_format/new_with_format");
669        if let Some(dict_id) = state.frame_header.dictionary_id()
670            && dict_id != dict.id()
671        {
672            return Err(err::DictIdMismatch {
673                expected: dict_id,
674                provided: dict.id(),
675            });
676        }
677        state.decoder_scratch.init_from_dict(dict.as_dict());
678        state.using_dict = Some(dict.id());
679        Ok(())
680    }
681
682    /// Add a dictionary that can be selected dynamically by frame dictionary ID.
683    ///
684    /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
685    /// registered (either as owned or shared).
686    pub fn add_dict(&mut self, dict: Dictionary) -> Result<(), FrameDecoderError> {
687        Self::validate_registered_dictionary(&dict)?;
688        let dict_id = dict.id;
689        if self.owned_dicts.contains_key(&dict_id) || self.shared_dict_exists(dict_id) {
690            return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
691        }
692        self.owned_dicts.insert(dict_id, dict);
693        Ok(())
694    }
695
696    /// Parse and add a serialized dictionary blob.
697    pub fn add_dict_from_bytes(&mut self, raw_dictionary: &[u8]) -> Result<(), FrameDecoderError> {
698        let dict = Dictionary::decode_dict(raw_dictionary)?;
699        self.add_dict(dict)
700    }
701
702    /// Add a pre-parsed dictionary handle for reuse across decoders.
703    ///
704    /// This API is available on targets with pointer-width atomics
705    /// (`target_has_atomic = "ptr"`).
706    ///
707    /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
708    /// registered (either as owned or shared).
709    #[cfg(target_has_atomic = "ptr")]
710    pub fn add_dict_handle(&mut self, dict: DictionaryHandle) -> Result<(), FrameDecoderError> {
711        Self::validate_registered_dictionary(dict.as_dict())?;
712        let dict_id = dict.id();
713        if self.owned_dicts.contains_key(&dict_id) || self.shared_dicts.contains_key(&dict_id) {
714            return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
715        }
716        self.shared_dicts.insert(dict_id, dict);
717        Ok(())
718    }
719
720    pub fn force_dict(&mut self, dict_id: u32) -> Result<(), FrameDecoderError> {
721        use FrameDecoderError as err;
722        let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
723        let owned_dicts = &self.owned_dicts;
724        #[cfg(target_has_atomic = "ptr")]
725        let shared_dicts = &self.shared_dicts;
726
727        let dict = owned_dicts
728            .get(&dict_id)
729            .or_else(|| {
730                #[cfg(target_has_atomic = "ptr")]
731                {
732                    shared_dicts.get(&dict_id).map(DictionaryHandle::as_dict)
733                }
734                #[cfg(not(target_has_atomic = "ptr"))]
735                {
736                    None
737                }
738            })
739            .ok_or(err::DictNotProvided { dict_id })?;
740        state.decoder_scratch.init_from_dict(dict);
741        state.using_dict = Some(dict_id);
742
743        Ok(())
744    }
745
746    /// Returns how many bytes the frame contains after decompression
747    pub fn content_size(&self) -> u64 {
748        match &self.state {
749            None => 0,
750            Some(s) => s.frame_header.frame_content_size(),
751        }
752    }
753
754    /// Returns the checksum that was read from the data. Only available after all bytes have been read. It is the last 4 bytes of a zstd-frame
755    pub fn get_checksum_from_data(&self) -> Option<u32> {
756        let state = self.state.as_ref()?;
757
758        state.check_sum
759    }
760
761    /// Returns the checksum that was calculated while decoding.
762    /// Only a sensible value after all decoded bytes have been collected/read from the FrameDecoder
763    #[cfg(feature = "hash")]
764    pub fn get_calculated_checksum(&self) -> Option<u32> {
765        let state = self.state.as_ref()?;
766        let cksum_64bit = state.decoder_scratch.hash_finish();
767        //truncate to lower 32bit because reasons...
768        Some(cksum_64bit as u32)
769    }
770
771    /// Counter for how many bytes have been consumed while decoding the frame
772    pub fn bytes_read_from_source(&self) -> u64 {
773        let state = match &self.state {
774            None => return 0,
775            Some(s) => s,
776        };
777        state.bytes_read_counter
778    }
779
780    /// Whether the current frames last block has been decoded yet
781    /// If this returns true you can call the drain* functions to get all content
782    /// (the read() function will drain automatically if this returns true)
783    pub fn is_finished(&self) -> bool {
784        let state = match &self.state {
785            None => return true,
786            Some(s) => s,
787        };
788        if state.frame_header.descriptor.content_checksum_flag() {
789            state.frame_finished && state.check_sum.is_some()
790        } else {
791            state.frame_finished
792        }
793    }
794
795    /// Counter for how many blocks have already been decoded
796    pub fn blocks_decoded(&self) -> usize {
797        let state = match &self.state {
798            None => return 0,
799            Some(s) => s,
800        };
801        state.block_counter
802    }
803
804    /// Decodes blocks from a reader. It requires that the framedecoder has been initialized first.
805    /// The Strategy influences how many blocks will be decoded before the function returns
806    /// This is important if you want to manage memory consumption carefully. If you don't care
807    /// about that you can just choose the strategy "All" and have all blocks of the frame decoded into the buffer
808    pub fn decode_blocks(
809        &mut self,
810        mut source: impl Read,
811        strat: BlockDecodingStrategy,
812    ) -> Result<bool, FrameDecoderError> {
813        use FrameDecoderError as err;
814        let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
815
816        let mut block_dec = decoding::block_decoder::new();
817
818        let buffer_size_before = state.decoder_scratch.buffer_len();
819        let block_counter_before = state.block_counter;
820        loop {
821            vprintln!("################");
822            vprintln!("Next Block: {}", state.block_counter);
823            vprintln!("################");
824            let (block_header, block_header_size) = block_dec
825                .read_block_header(&mut source)
826                .map_err(err::FailedToReadBlockHeader)?;
827            state.bytes_read_counter += u64::from(block_header_size);
828
829            vprintln!();
830            vprintln!(
831                "Found {} block with size: {}, which will be of size: {}",
832                block_header.block_type,
833                block_header.content_size,
834                block_header.decompressed_size
835            );
836
837            let bytes_read_in_block_body = state
838                .decoder_scratch
839                .decode_block_content(&mut block_dec, &block_header, &mut source)
840                .map_err(err::FailedToReadBlockBody)?;
841            state.bytes_read_counter += bytes_read_in_block_body;
842
843            state.block_counter += 1;
844
845            vprintln!("Output: {}", state.decoder_scratch.buffer_len());
846
847            if block_header.last_block {
848                state.frame_finished = true;
849                if state.frame_header.descriptor.content_checksum_flag() {
850                    let mut chksum = [0u8; 4];
851                    source
852                        .read_exact(&mut chksum)
853                        .map_err(err::FailedToReadChecksum)?;
854                    state.bytes_read_counter += 4;
855                    let chksum = u32::from_le_bytes(chksum);
856                    state.check_sum = Some(chksum);
857                }
858                break;
859            }
860
861            match strat {
862                BlockDecodingStrategy::All => { /* keep going */ }
863                BlockDecodingStrategy::UptoBlocks(n) => {
864                    if state.block_counter - block_counter_before >= n {
865                        break;
866                    }
867                }
868                BlockDecodingStrategy::UptoBytes(n) => {
869                    if state.decoder_scratch.buffer_len() - buffer_size_before >= n {
870                        break;
871                    }
872                }
873            }
874        }
875
876        Ok(state.frame_finished)
877    }
878
879    /// Collect bytes and retain window_size bytes while decoding is still going on.
880    /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
881    pub fn collect(&mut self) -> Option<Vec<u8>> {
882        let finished = self.is_finished();
883        let state = self.state.as_mut()?;
884        if finished {
885            Some(state.decoder_scratch.buffer_drain())
886        } else {
887            state.decoder_scratch.buffer_drain_to_window_size()
888        }
889    }
890
891    /// Collect bytes and retain window_size bytes while decoding is still going on.
892    /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
893    pub fn collect_to_writer(&mut self, w: impl Write) -> Result<usize, Error> {
894        let finished = self.is_finished();
895        let state = match &mut self.state {
896            None => return Ok(0),
897            Some(s) => s,
898        };
899        if finished {
900            state.decoder_scratch.buffer_drain_to_writer(w)
901        } else {
902            state.decoder_scratch.buffer_drain_to_window_size_writer(w)
903        }
904    }
905
906    /// How many bytes can currently be collected from the decodebuffer, while decoding is going on this will be lower than the actual decodbuffer size
907    /// because window_size bytes need to be retained for decoding.
908    /// After decoding of the frame (is_finished() == true) has finished it will report all remaining bytes
909    pub fn can_collect(&self) -> usize {
910        let finished = self.is_finished();
911        let state = match &self.state {
912            None => return 0,
913            Some(s) => s,
914        };
915        if finished {
916            state.decoder_scratch.buffer_can_drain()
917        } else {
918            state
919                .decoder_scratch
920                .buffer_can_drain_to_window_size()
921                .unwrap_or(0)
922        }
923    }
924
925    /// Decodes as many blocks as possible from the source slice and reads from the decodebuffer into the target slice
926    /// The source slice may contain only parts of a frame but must contain at least one full block to make progress
927    ///
928    /// By all means use decode_blocks if you have a io.Reader available. This is just for compatibility with other decompressors
929    /// which try to serve an old-style c api
930    ///
931    /// Returns (read, written), if read == 0 then the source did not contain a full block and further calls with the same
932    /// input will not make any progress!
933    ///
934    /// Note that no kind of block can be bigger than 128kb.
935    /// So to be safe use at least 128*1024 (max block content size) + 3 (block_header size) + 18 (max frame_header size) bytes as your source buffer
936    ///
937    /// You may call this function with an empty source after all bytes have been decoded. This is equivalent to just call decoder.read(&mut target)
938    pub fn decode_from_to(
939        &mut self,
940        source: &[u8],
941        target: &mut [u8],
942    ) -> Result<(usize, usize), FrameDecoderError> {
943        use FrameDecoderError as err;
944        let bytes_read_at_start = match &self.state {
945            Some(s) => s.bytes_read_counter,
946            None => 0,
947        };
948
949        if !self.is_finished() || self.state.is_none() {
950            let mut mt_source = source;
951
952            if self.state.is_none() {
953                self.init(&mut mt_source)?;
954            }
955
956            //pseudo block to scope "state" so we can borrow self again after the block
957            {
958                let state = match &mut self.state {
959                    Some(s) => s,
960                    None => panic!("Bug in library"),
961                };
962                let mut block_dec = decoding::block_decoder::new();
963
964                if state.frame_header.descriptor.content_checksum_flag()
965                    && state.frame_finished
966                    && state.check_sum.is_none()
967                {
968                    //this block is needed if the checksum were the only 4 bytes that were not included in the last decode_from_to call for a frame
969                    if mt_source.len() >= 4 {
970                        let chksum = mt_source[..4].try_into().expect("optimized away");
971                        state.bytes_read_counter += 4;
972                        let chksum = u32::from_le_bytes(chksum);
973                        state.check_sum = Some(chksum);
974                    }
975                    return Ok((4, 0));
976                }
977
978                loop {
979                    //check if there are enough bytes for the next header
980                    if mt_source.len() < 3 {
981                        break;
982                    }
983                    let (block_header, block_header_size) = block_dec
984                        .read_block_header(&mut mt_source)
985                        .map_err(err::FailedToReadBlockHeader)?;
986
987                    // check the needed size for the block before updating counters.
988                    // If not enough bytes are in the source, the header will have to be read again, so act like we never read it in the first place
989                    if mt_source.len() < block_header.content_size as usize {
990                        break;
991                    }
992                    state.bytes_read_counter += u64::from(block_header_size);
993
994                    let bytes_read_in_block_body = state
995                        .decoder_scratch
996                        .decode_block_content(&mut block_dec, &block_header, &mut mt_source)
997                        .map_err(err::FailedToReadBlockBody)?;
998                    state.bytes_read_counter += bytes_read_in_block_body;
999                    state.block_counter += 1;
1000
1001                    if block_header.last_block {
1002                        state.frame_finished = true;
1003                        if state.frame_header.descriptor.content_checksum_flag() {
1004                            //if there are enough bytes handle this here. Else the block at the start of this function will handle it at the next call
1005                            if mt_source.len() >= 4 {
1006                                let chksum = mt_source[..4].try_into().expect("optimized away");
1007                                state.bytes_read_counter += 4;
1008                                let chksum = u32::from_le_bytes(chksum);
1009                                state.check_sum = Some(chksum);
1010                            }
1011                        }
1012                        break;
1013                    }
1014                }
1015            }
1016        }
1017
1018        let result_len = self.read(target).map_err(err::FailedToDrainDecodebuffer)?;
1019        let bytes_read_at_end = match &mut self.state {
1020            Some(s) => s.bytes_read_counter,
1021            None => panic!("Bug in library"),
1022        };
1023        let read_len = bytes_read_at_end - bytes_read_at_start;
1024        Ok((read_len as usize, result_len))
1025    }
1026
1027    /// Decode multiple frames into the output slice.
1028    ///
1029    /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
1030    /// skipped during decode.
1031    ///
1032    /// `output` must be large enough to hold the decompressed data. If you don't know
1033    /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
1034    ///
1035    /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
1036    ///
1037    /// Returns the number of bytes written to `output`.
1038    pub fn decode_all(
1039        &mut self,
1040        input: &[u8],
1041        output: &mut [u8],
1042    ) -> Result<usize, FrameDecoderError> {
1043        self.decode_all_impl(input, output, |this, src| this.init(src))
1044    }
1045
1046    /// Decode multiple frames into the output slice using a pre-parsed dictionary handle.
1047    ///
1048    /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
1049    /// skipped during decode.
1050    ///
1051    /// `output` must be large enough to hold the decompressed data. If you don't know
1052    /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
1053    ///
1054    /// This calls [`FrameDecoder::init_with_dict_handle`], and all bytes currently in the
1055    /// decoder will be lost.
1056    ///
1057    /// # Warning
1058    ///
1059    /// Each decoded frame is initialized with `dict`, even when a frame header
1060    /// omits the optional dictionary ID. Callers must only use this API when
1061    /// they already know the input frames were encoded with the provided
1062    /// dictionary; otherwise decoded output can be silently corrupted.
1063    pub fn decode_all_with_dict_handle(
1064        &mut self,
1065        input: &[u8],
1066        output: &mut [u8],
1067        dict: &DictionaryHandle,
1068    ) -> Result<usize, FrameDecoderError> {
1069        self.decode_all_impl(input, output, |this, src| {
1070            this.init_with_dict_handle(src, dict)
1071        })
1072    }
1073
1074    fn decode_all_impl(
1075        &mut self,
1076        mut input: &[u8],
1077        mut output: &mut [u8],
1078        mut init_frame: impl FnMut(&mut Self, &mut &[u8]) -> Result<(), FrameDecoderError>,
1079    ) -> Result<usize, FrameDecoderError> {
1080        let mut total_bytes_written = 0;
1081        while !input.is_empty() {
1082            match init_frame(self, &mut input) {
1083                Ok(_) => {}
1084                Err(FrameDecoderError::ReadFrameHeaderError(
1085                    crate::decoding::errors::ReadFrameHeaderError::SkipFrame { length, .. },
1086                )) => {
1087                    input = input
1088                        .get(length as usize..)
1089                        .ok_or(FrameDecoderError::FailedToSkipFrame)?;
1090                    continue;
1091                }
1092                Err(e) => return Err(e),
1093            };
1094            loop {
1095                self.decode_blocks(&mut input, BlockDecodingStrategy::UptoBytes(1024 * 1024))?;
1096                let bytes_written = self
1097                    .read(output)
1098                    .map_err(FrameDecoderError::FailedToDrainDecodebuffer)?;
1099                output = &mut output[bytes_written..];
1100                total_bytes_written += bytes_written;
1101                if self.can_collect() != 0 {
1102                    return Err(FrameDecoderError::TargetTooSmall);
1103                }
1104                if self.is_finished() {
1105                    break;
1106                }
1107            }
1108        }
1109
1110        Ok(total_bytes_written)
1111    }
1112
1113    /// Decode multiple frames into the output slice using a serialized dictionary.
1114    ///
1115    /// # Warning
1116    ///
1117    /// Each decoded frame is initialized with the parsed dictionary, even when a
1118    /// frame header omits the optional dictionary ID. Callers must only use this
1119    /// API when they already know the input frames were encoded with that
1120    /// dictionary; otherwise decoded output can be silently corrupted.
1121    pub fn decode_all_with_dict_bytes(
1122        &mut self,
1123        input: &[u8],
1124        output: &mut [u8],
1125        raw_dictionary: &[u8],
1126    ) -> Result<usize, FrameDecoderError> {
1127        let dict = DictionaryHandle::decode_dict(raw_dictionary)?;
1128        self.decode_all_with_dict_handle(input, output, &dict)
1129    }
1130
1131    /// Decode multiple frames into the extra capacity of the output vector.
1132    ///
1133    /// `input` must contain an exact number of frames.
1134    ///
1135    /// `output` must have enough extra capacity to hold the decompressed data.
1136    /// This function will not reallocate or grow the vector. If you don't know
1137    /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
1138    ///
1139    /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
1140    ///
1141    /// The length of the output vector is updated to include the decompressed data.
1142    /// The length is not changed if an error occurs.
1143    pub fn decode_all_to_vec(
1144        &mut self,
1145        input: &[u8],
1146        output: &mut Vec<u8>,
1147    ) -> Result<(), FrameDecoderError> {
1148        let len = output.len();
1149        let cap = output.capacity();
1150        output.resize(cap, 0);
1151        match self.decode_all(input, &mut output[len..]) {
1152            Ok(bytes_written) => {
1153                let new_len = core::cmp::min(len + bytes_written, cap); // Sanitizes `bytes_written`.
1154                output.resize(new_len, 0);
1155                Ok(())
1156            }
1157            Err(e) => {
1158                output.resize(len, 0);
1159                Err(e)
1160            }
1161        }
1162    }
1163
1164    /// Decode a single zstd frame from `input` directly into
1165    /// `output`, bypassing the internal `DecodeBuffer` -> `read()`
1166    /// drain copy when the frame is eligible. Donor parity with the
1167    /// `ZSTD_in_dst` litBuffer placement strategy.
1168    ///
1169    /// Eligibility requires all of:
1170    /// - `frame_content_size` is present in the header (> 0).
1171    /// - `output.len() >= frame_content_size + WILDCOPY_OVERLENGTH`
1172    ///   (room for the SIMD wildcopy overshoot slack).
1173    /// - No active dictionary on `self.state` (dict_content is not
1174    ///   carried into the stack-local DecodeBuffer this method
1175    ///   builds).
1176    ///
1177    /// `content_checksum_flag` is NOT a disqualifier: when set,
1178    /// the direct path hashes the decoded `output[..content_size]`
1179    /// once at the end of decode and propagates the digest into
1180    /// the persistent scratch's `hash` so
1181    /// [`Self::get_calculated_checksum`] returns the right value.
1182    ///
1183    /// Multi-segment frames are supported via a per-block
1184    /// `DecodeBuffer::drop_to_window_size` call that caps the
1185    /// visible buffer at `window_size` at block boundaries. The
1186    /// discarded bytes stay physically in the user slice (they're
1187    /// the frame's already-decoded output); only their
1188    /// `BufferBackend::head` visibility moves forward.
1189    ///
1190    /// Note: `drop_to_window_size` runs only BETWEEN blocks, so
1191    /// within a single block `buffer.len()` can temporarily exceed
1192    /// `window_size`. `DecodeBuffer::repeat` validates match
1193    /// offsets against `buffer.len()` (not against `window_size`),
1194    /// so corrupted streams with `offset > window_size` but
1195    /// `offset <= current buffer.len()` are NOT rejected by this
1196    /// gate. Strict spec compliance for offsets in multi-segment
1197    /// frames would require an in-block offset bound that we don't
1198    /// currently enforce on either the direct or the fallback path.
1199    ///
1200    /// Non-eligible frames fall back transparently to the existing
1201    /// `decode_blocks` + `read` drain path.
1202    ///
1203    /// `input` is expected to contain a single zstd frame. Bytes
1204    /// past the end of that frame are NOT validated and are silently
1205    /// ignored — this differs from [`Self::decode_all`], which loops
1206    /// until `input` is fully consumed and will attempt to parse a
1207    /// second frame (or error) on trailing bytes. Multi-frame
1208    /// streams must use [`Self::decode_all`].
1209    ///
1210    /// On the direct path the literal pushes and
1211    /// sequence-execution match copies write straight into
1212    /// `output`, eliminating the FlatBuf-as-intermediate `read()`
1213    /// drain that dominates poorly-compressed L-7-class corpora
1214    /// (~28% of decode time on
1215    /// `level_-7_fast/decodecorpus-z000033/rust_stream`). Both
1216    /// single-segment and multi-segment frames take the direct
1217    /// path; multi-segment frames cap the visible buffer at
1218    /// `window_size` between blocks via
1219    /// `DecodeBuffer::drop_to_window_size`.
1220    ///
1221    /// Frames that aren't eligible (zero `frame_content_size`,
1222    /// active dictionary, undersized `output`) transparently fall
1223    /// back to the internal block-decode + read drain loop. The
1224    /// fallback is NOT [`Self::decode_all`] semantics: it decodes
1225    /// exactly one frame and returns; trailing bytes past the
1226    /// frame are silently ignored. Use [`Self::decode_all`] for
1227    /// multi-frame input or streams that may contain skippable
1228    /// frames.
1229    ///
1230    /// `input` is expected to contain exactly ONE non-skippable
1231    /// zstd frame. **Skippable frames are rejected with
1232    /// `ReadFrameHeaderError::SkipFrame` from `init`** — this
1233    /// method does NOT skip them. Multi-frame input or input that
1234    /// might contain skippable frames must go through
1235    /// [`Self::decode_all`], which iterates `init` and handles
1236    /// `SkipFrame` by advancing past the skippable payload.
1237    ///
1238    /// # State observability after this call
1239    ///
1240    /// On the direct path, decoded bytes are written into `output`
1241    /// via a stack-local `DecodeBuffer<UserSliceBackend>` that is
1242    /// dropped before this function returns. The persistent
1243    /// `state.decoder_scratch.buffer` stays empty. Consequently,
1244    /// after `decode_to_slice_trusted` returns:
1245    ///
1246    /// - [`Self::is_finished`] returns `true`,
1247    /// - [`Self::can_collect`] returns `0`,
1248    /// - [`Self::read`] (the crate's `io::Read` impl, which under
1249    ///   `feature = "std"` is `std::io::Read`) reads 0 bytes,
1250    /// - [`Self::collect`] returns `Some(Vec::new())`,
1251    /// - [`Self::get_calculated_checksum`] returns the correct
1252    ///   value when the frame had `content_checksum_flag` set —
1253    ///   the direct path walks the output once at end of decode
1254    ///   and propagates the digest into the persistent scratch's
1255    ///   hasher so this accessor reads the right state.
1256    ///
1257    /// Callers must use the bytes from `output[..n]` (where `n`
1258    /// is the returned count); do not mix `decode_to_slice_trusted` with
1259    /// `read`/`collect` on the same `FrameDecoder`.
1260    ///
1261    /// When the frame is NOT eligible (no FCS in the header, or
1262    /// output buffer too small for the WILDCOPY slack, or active
1263    /// dictionary), this method falls back to a single-frame
1264    /// `decode_blocks` + `read` drain loop, draining into the
1265    /// caller's `output` slice. This is NOT `decode_all`: it
1266    /// processes only one frame (no trailing-frame iteration, no
1267    /// silent skippable-frame skip) and returns
1268    /// [`FrameDecoderError::TargetTooSmall`] if the decoded
1269    /// output does not fit in `output`.
1270    ///
1271    /// # Panic / DoS surface
1272    ///
1273    /// **For trusted input only.** On the direct path
1274    /// `UserSliceBackend` uses release-mode `assert!` for capacity
1275    /// checks across all three write entry points (`extend`,
1276    /// `extend_and_fill`, `extend_from_within_unchecked`). A
1277    /// malformed Compressed block whose payload expands past the
1278    /// declared `frame_content_size` (and beyond the
1279    /// `WILDCOPY_OVERLENGTH` slack the caller sized into `output`)
1280    /// will panic mid-block rather than returning a structured
1281    /// error. The per-block `produced > content_size` guard catches
1282    /// the overshoot AFTER the block, but cannot prevent the
1283    /// in-block writes from running first.
1284    ///
1285    /// The trade-off is deliberate for this PR. Making the writes
1286    /// fallible requires extending the `BufferBackend` trait
1287    /// surface, touching every backend implementation, and
1288    /// propagating `Result<_, _>` through the entire sequence
1289    /// executor — a refactor too large to fold into the direct
1290    /// decode wiring without losing review tractability. The
1291    /// follow-up issue tracking that work (referenced below in
1292    /// "Fallible BufferBackend writes") is a hard prerequisite
1293    /// before this entry point becomes safe to expose on
1294    /// untrusted streams.
1295    ///
1296    /// Callers handling untrusted input must use [`Self::decode_all`]
1297    /// which routes through `FlatBuf` / `RingBuffer`. Those
1298    /// backends grow via `Vec::reserve` (succeeds or aborts on
1299    /// alloc failure — not error-returning), but the growable Vec
1300    /// capacity absorbs a malformed block's overshoot inside the
1301    /// allocation; the frame-level checks then turn the size
1302    /// mismatch into `FrameContentSizeMismatch` instead of OOB
1303    /// writes into a fixed-size user slice. Fallible
1304    /// `BufferBackend` writes that would let `decode_to_slice_trusted`
1305    /// remain safe on adversarial input are tracked in issue #246.
1306    // The `_trusted` suffix is part of the API contract: this
1307    // entry point is for trusted input only. Adversarial /
1308    // malformed input can panic via release-mode `assert!` inside
1309    // `UserSliceBackend`. Callers handling untrusted data MUST use
1310    // `decode_all` instead. The fallible-`BufferBackend` refactor
1311    // that would let this entry point be safe on adversarial
1312    // input is tracked as a follow-up.
1313    #[doc(alias = "decode_to_slice")]
1314    #[must_use = "decode_to_slice_trusted returns the decoded byte count; ignoring it leaves the output's effective length ambiguous"]
1315    pub fn decode_to_slice_trusted(
1316        &mut self,
1317        mut input: &[u8],
1318        output: &mut [u8],
1319    ) -> Result<usize, FrameDecoderError> {
1320        use super::block_decoder;
1321        use super::buffer_backend::WILDCOPY_OVERLENGTH;
1322        use super::decode_buffer::DecodeBuffer;
1323        use super::scratch::DirectScratch;
1324        use super::user_slice_buf::UserSliceBackend;
1325        use crate::io::Read;
1326        use FrameDecoderError as err;
1327
1328        // Parse the frame header. This populates `self.state` with
1329        // the frame descriptor + resets the per-frame scratch
1330        // (DecoderScratchKind::Flat for single-segment, ::Ring
1331        // otherwise).
1332        //
1333        // Skippable frames are reported by `init` as
1334        // `ReadFrameHeaderError::SkipFrame`. The direct path
1335        // doesn't have a state model for "skip + decode next" since
1336        // it processes a single frame at most — propagate the error
1337        // unchanged so callers learn this input needs `decode_all`
1338        // (which iterates init and advances past skippable
1339        // payloads).
1340        self.init(&mut input)?;
1341
1342        let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
1343        let content_size = state.frame_header.frame_content_size();
1344        let needed = content_size.saturating_add(WILDCOPY_OVERLENGTH as u64);
1345        // Eligibility independent of `single_segment_flag`:
1346        // multi-segment frames work via a coarse, block-boundary
1347        // cap on the visible buffer. The post-block
1348        // `drop_to_window_size` call in the loop below advances the
1349        // backend's `head` so `buffer.len()` doesn't grow past
1350        // `window_size` between blocks — bytes physically remain
1351        // in the user slice, just leave `len()`'s visible range so
1352        // a subsequent block's match-offset cannot reach back
1353        // arbitrarily far via stale history.
1354        //
1355        // This is NOT strict spec enforcement of
1356        // `offset <= window_size`: within a single block
1357        // `buffer.len()` can temporarily exceed `window_size` and
1358        // `DecodeBuffer::repeat` validates against `buffer.len()`
1359        // (not `window_size`), so an in-block match with
1360        // `offset > window_size` but `offset <= current
1361        // buffer.len()` is accepted on both direct and fallback
1362        // paths. See the `decode_to_slice_trusted` doc for the full
1363        // limitation note.
1364        //
1365        // Disabled when a dictionary is active: the persistent
1366        // `DecoderScratch::buffer.dict_content` seeded by
1367        // `init_from_dict` is NOT carried into the stack-local
1368        // `DecodeBuffer<UserSliceBackend>` we build below, so any
1369        // match that reaches into the external dictionary would
1370        // decode against an empty prefix. Fall back to the regular
1371        // path which keeps the dict in the persistent buffer.
1372        let dict_active = state.using_dict.is_some();
1373        // `content_checksum_flag` is no longer a disqualifier. The
1374        // direct path writes the decoded bytes into the user's
1375        // `output` slice, and we hash that slice once at the end of
1376        // decode (single sequential pass over cache-hot data). The
1377        // computed digest is then propagated into the persistent
1378        // `state.decoder_scratch.buffer.hash` so the public
1379        // `get_calculated_checksum()` accessor reads the correct
1380        // value just like on the `decode_all` path.
1381        let eligible = content_size > 0 && !dict_active && (output.len() as u64) >= needed;
1382
1383        if !eligible {
1384            // Frame doesn't qualify for direct decode (empty
1385            // frame_content_size — header lacks FCS — or output too
1386            // small for the WILDCOPY slack). Fall through to the
1387            // existing per-block decode + drain path — `init`
1388            // already populated `self.state`, so `decode_blocks` +
1389            // `read` pick up from there.
1390            let mut output_tail: &mut [u8] = output;
1391            let mut total_bytes_written = 0;
1392            loop {
1393                self.decode_blocks(&mut input, BlockDecodingStrategy::UptoBytes(1024 * 1024))?;
1394                let bytes_written = self
1395                    .read(output_tail)
1396                    .map_err(err::FailedToDrainDecodebuffer)?;
1397                output_tail = &mut output_tail[bytes_written..];
1398                total_bytes_written += bytes_written;
1399                if self.can_collect() != 0 {
1400                    return Err(err::TargetTooSmall);
1401                }
1402                if self.is_finished() {
1403                    break;
1404                }
1405            }
1406            // When the frame header declares a `frame_content_size`,
1407            // ensure the drained byte count actually matches it.
1408            // Without this check a corrupt frame that sets FCS but
1409            // ends early (e.g. last-block flag on a sub-FCS payload)
1410            // would silently return success here, while the
1411            // eligible-frame direct path catches the same condition
1412            // via `FrameContentSizeMismatch`. The fallback path now
1413            // matches that behaviour.
1414            //
1415            // Use `fcs_declared()` (NOT `content_size > 0`) as the
1416            // "is FCS on the wire" gate. The two diverge on the
1417            // legitimate edge case of an empty frame with an
1418            // EXPLICIT FCS=0 on the wire (FCS_flag>=1 with bytes
1419            // reading 0, or single_segment+FCS_flag=0 with the
1420            // 1-byte FCS=0): `content_size` is 0 in BOTH the
1421            // "absent" and "explicitly zero" cases, while
1422            // `fcs_declared()` returns false only in the truly
1423            // absent case.
1424            let state = self.state.as_ref().expect("state populated by init");
1425            if state.frame_header.fcs_declared() && (total_bytes_written as u64) != content_size {
1426                return Err(err::FrameContentSizeMismatch {
1427                    declared: content_size,
1428                    produced: total_bytes_written as u64,
1429                });
1430            }
1431            return Ok(total_bytes_written);
1432        }
1433
1434        // Direct decode path. Borrow the persistent fields
1435        // (HUF/FSE tables, offset_hist, scratch Vecs) out of the
1436        // existing single-segment Flat scratch; we keep them
1437        // populated across `decode_to_slice_trusted` calls (HUF table reuse
1438        // is the main scratch-reuse win on small frames). Then
1439        // construct a stack-local DecodeBuffer<UserSliceBackend<'o>>
1440        // over `output` and bundle into a `DirectScratch`.
1441        // Borrow persistent fields out of whichever scratch variant
1442        // `init` produced (Flat for single_segment, Ring for
1443        // multi-segment) — both expose the same set of HUF/FSE/Vec
1444        // fields; only `buffer` differs and we don't use that here.
1445        // Macro-style binding to avoid the closure / generic
1446        // gymnastics of returning multiple &mut from a match arm.
1447        let (huf, fse, offset_hist, literals_buffer, sequences, block_content_buffer, window_size) =
1448            match &mut state.decoder_scratch {
1449                DecoderScratchKind::Flat(s) => (
1450                    &mut s.huf,
1451                    &mut s.fse,
1452                    &mut s.offset_hist,
1453                    &mut s.literals_buffer,
1454                    &mut s.sequences,
1455                    &mut s.block_content_buffer,
1456                    s.buffer.window_size,
1457                ),
1458                DecoderScratchKind::Ring(s) => (
1459                    &mut s.huf,
1460                    &mut s.fse,
1461                    &mut s.offset_hist,
1462                    &mut s.literals_buffer,
1463                    &mut s.sequences,
1464                    &mut s.block_content_buffer,
1465                    s.buffer.window_size,
1466                ),
1467            };
1468        let backend = UserSliceBackend::from_slice(output);
1469        let buffer = DecodeBuffer::from_backend(backend, window_size);
1470        let mut direct = DirectScratch {
1471            huf,
1472            fse,
1473            offset_hist,
1474            literals_buffer,
1475            sequences,
1476            block_content_buffer,
1477            buffer,
1478        };
1479
1480        // Block loop. Mirrors `decode_blocks` (without the
1481        // strategy-bounded early exit — we always decode the whole
1482        // frame in one shot for the direct path). Keeps
1483        // `state.bytes_read_counter` / `state.block_counter` in
1484        // sync with `decode_blocks` so post-call accessors
1485        // (`bytes_read_from_source`, `blocks_decoded`) return
1486        // accurate values.
1487        let mut block_dec = block_decoder::new();
1488        // Track total output bytes against the declared
1489        // `frame_content_size` via the buffer's actual write
1490        // counter — `BlockHeader.decompressed_size` is 0 for
1491        // Compressed blocks (the header parser can't know the
1492        // expanded size before decoding the body), so per-header
1493        // tracking would always count 0 for those blocks and
1494        // miscount frames that aren't pure Raw/RLE.
1495        let mut produced: u64 = 0;
1496        loop {
1497            let (block_header, hsize) = block_dec
1498                .read_block_header(&mut input)
1499                .map_err(err::FailedToReadBlockHeader)?;
1500            state.bytes_read_counter += u64::from(hsize);
1501            // Pre-flight FCS check ONLY for Raw / RLE blocks where
1502            // `decompressed_size` is the actual block output size.
1503            // For Compressed blocks the header field is 0; the
1504            // post-decode check below catches overflow via the
1505            // backend's actual write counter delta.
1506            let block_upper = u64::from(block_header.decompressed_size);
1507            if block_upper > 0 && produced + block_upper > content_size {
1508                // Frame is corrupt — Raw/RLE block headers claim
1509                // more output than the FCS allows. Caller's buffer
1510                // was sized against FCS, so this is decoder-side
1511                // corruption, not user sizing.
1512                return Err(err::FrameContentSizeMismatch {
1513                    declared: content_size,
1514                    produced: produced + block_upper,
1515                });
1516            }
1517            // Slice-source fast path: consume the block body
1518            // straight from `input` without copying into the
1519            // persistent `block_content_buffer`.
1520            let before = direct.buffer.total_produced();
1521            let body_consumed = block_dec
1522                .decode_block_content_from_slice(&block_header, &mut direct, &mut input)
1523                .map_err(err::FailedToReadBlockBody)?;
1524            produced = direct.buffer.total_produced();
1525            // Post-decode FCS overflow check. Works uniformly for
1526            // Raw/RLE/Compressed since it reads the actual bytes
1527            // written by the backend rather than the header's
1528            // (possibly zero) `decompressed_size`.
1529            if produced > content_size {
1530                return Err(err::FrameContentSizeMismatch {
1531                    declared: content_size,
1532                    produced,
1533                });
1534            }
1535            // Silence unused-binding warning when this delta isn't
1536            // consulted — it's there so future debug builds can
1537            // assert (produced - before) <= MAX_BLOCK_SIZE if the
1538            // spec invariant ever needs an explicit gate.
1539            let _ = before;
1540            state.bytes_read_counter += body_consumed;
1541            state.block_counter += 1;
1542            // Cap the visible buffer at window_size between blocks
1543            // so the next block's match-offset validation matches
1544            // the spec's `offset <= window_size` rule. Bytes
1545            // physically stay in the user slice; we just narrow
1546            // the visible range via `head`. For single-segment
1547            // frames `content_size <= window_size` so this is a
1548            // no-op on every iteration; for multi-segment frames
1549            // it advances `head` once `tail - head` outgrows the
1550            // window.
1551            direct.buffer.drop_to_window_size();
1552            if block_header.last_block {
1553                if state.frame_header.descriptor.content_checksum_flag() {
1554                    let mut chksum = [0u8; 4];
1555                    input
1556                        .read_exact(&mut chksum)
1557                        .map_err(err::FailedToReadChecksum)?;
1558                    state.bytes_read_counter += 4;
1559                    state.check_sum = Some(u32::from_le_bytes(chksum));
1560                }
1561                break;
1562            }
1563        }
1564        // Final sanity: blocks summed to exactly `content_size`. A
1565        // malformed frame with `last_block` set early (or one whose
1566        // block headers under-count) would land here. Distinct from
1567        // TargetTooSmall — the caller did their part, the frame
1568        // itself is corrupt.
1569        if produced != content_size {
1570            return Err(err::FrameContentSizeMismatch {
1571                declared: content_size,
1572                produced,
1573            });
1574        }
1575
1576        // `direct.buffer.len()` would only show the visible (post
1577        // head-advance) range, not the total bytes physically
1578        // written into the user slice. The decode succeeded so
1579        // `content_size` is the authoritative byte count — the
1580        // backend wrote exactly that many bytes starting at
1581        // `output[0]`.
1582        let written = content_size as usize;
1583        state.frame_finished = true;
1584        // Drop the stack-local DirectScratch (and its DecodeBuffer
1585        // borrow on `output`) so we can re-borrow `output` for the
1586        // hash pass below. After this point `direct` is gone.
1587        drop(direct);
1588        #[cfg(feature = "hash")]
1589        {
1590            // Direct path bypasses the per-write hash accounting
1591            // (DecodeBuffer hashes during drain; the direct path
1592            // never drains because the user slice IS the buffer).
1593            // Walk the decoded output once and propagate the
1594            // resulting hasher state into the persistent scratch's
1595            // buffer so `get_calculated_checksum()` returns the
1596            // right value. Cost: ~330 us / MiB at xxhash's
1597            // ~3 GB/s throughput on x86_64, against cache-hot data.
1598            //
1599            // Done unconditionally for every successful direct
1600            // decode (not just frames with `content_checksum_flag`)
1601            // so `get_calculated_checksum()` returns the running
1602            // digest path-independently — matches what
1603            // `decode_all`'s drain-time hashing produces on
1604            // checksumless frames too.
1605            use core::hash::Hasher;
1606            let mut hasher = twox_hash::XxHash64::with_seed(0);
1607            hasher.write(&output[..written]);
1608            match &mut state.decoder_scratch {
1609                DecoderScratchKind::Flat(s) => s.buffer.hash = hasher,
1610                DecoderScratchKind::Ring(s) => s.buffer.hash = hasher,
1611            }
1612        }
1613        Ok(written)
1614    }
1615}
1616
1617/// Read bytes from the decode_buffer that are no longer needed. While the frame is not yet finished
1618/// this will retain window_size bytes, else it will drain it completely
1619impl Read for FrameDecoder {
1620    fn read(&mut self, target: &mut [u8]) -> Result<usize, Error> {
1621        let state = match &mut self.state {
1622            None => return Ok(0),
1623            Some(s) => s,
1624        };
1625        if state.frame_finished {
1626            state.decoder_scratch.buffer_read_all(target)
1627        } else {
1628            state.decoder_scratch.buffer_read(target)
1629        }
1630    }
1631}
1632
1633#[cfg(test)]
1634mod tests {
1635    extern crate std;
1636
1637    use super::{DictionaryHandle, FrameDecoder};
1638    use crate::encoding::{CompressionLevel, FrameCompressor};
1639    use alloc::vec::Vec;
1640
1641    #[test]
1642    fn decode_to_slice_trusted_matches_decode_all_on_single_segment_frame() {
1643        // Roundtrip a small payload through the encoder, then decode
1644        // it via both `decode_all` and `decode_to_slice_trusted`. Both paths
1645        // must produce identical output bytes — the only difference
1646        // is the internal buffer/drain shape, not the decoded
1647        // semantics. This is the regression gate for the
1648        // direct-decode wiring.
1649        let payload: Vec<u8> = (0..4096u32).map(|i| (i & 0xFF) as u8).collect();
1650        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
1651        compressor.set_source(payload.as_slice());
1652        let mut compressed = Vec::new();
1653        compressor.set_drain(&mut compressed);
1654        compressor.compress();
1655
1656        // Baseline: decode_all.
1657        let mut dec_a = FrameDecoder::new();
1658        let mut out_a = alloc::vec![0u8; payload.len()];
1659        let n_a = dec_a
1660            .decode_all(compressed.as_slice(), &mut out_a)
1661            .expect("decode_all should succeed");
1662        assert_eq!(n_a, payload.len());
1663        assert_eq!(&out_a[..n_a], payload.as_slice());
1664
1665        // Direct: decode_to_slice_trusted with WILDCOPY slack.
1666        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
1667        let mut dec_b = FrameDecoder::new();
1668        let mut out_b = alloc::vec![0u8; payload.len() + slack];
1669        let n_b = dec_b
1670            .decode_to_slice_trusted(compressed.as_slice(), &mut out_b)
1671            .expect("decode_to_slice_trusted should succeed");
1672        assert_eq!(
1673            n_b,
1674            payload.len(),
1675            "direct decode produced wrong byte count"
1676        );
1677        assert_eq!(&out_b[..n_b], payload.as_slice());
1678    }
1679
1680    #[test]
1681    fn decode_to_slice_trusted_multi_segment_frame_decodes_correctly() {
1682        // Multi-segment frame: payload large enough that the
1683        // encoder's default frame layout has `single_segment_flag =
1684        // false` and `window_size < frame_content_size`. The direct
1685        // path must cap the visible buffer at window_size after each
1686        // block (drop_to_window_size) so match-offset validation
1687        // matches the spec rule `offset <= window_size`, and still
1688        // produce the same bytes as decode_all on the
1689        // FlatBuf/Ring-backed path.
1690        //
1691        // Make the payload structured so multi-segment behavior
1692        // actually kicks in: 2 MiB of repeating + random-ish bytes
1693        // forces window_size lower than content_size at the encoder.
1694        let mut payload: Vec<u8> = Vec::with_capacity(2 * 1024 * 1024);
1695        for i in 0..payload.capacity() {
1696            payload.push((i.wrapping_mul(2_654_435_761) & 0xFF) as u8);
1697        }
1698        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
1699        compressor.set_source(payload.as_slice());
1700        let mut compressed = Vec::new();
1701        compressor.set_drain(&mut compressed);
1702        compressor.compress();
1703
1704        // Baseline: decode_all through the FlatBuf+drain path.
1705        let mut dec_a = FrameDecoder::new();
1706        let mut out_a = alloc::vec![0u8; payload.len()];
1707        let n_a = dec_a
1708            .decode_all(compressed.as_slice(), &mut out_a)
1709            .expect("decode_all should succeed");
1710        assert_eq!(n_a, payload.len());
1711        assert_eq!(&out_a[..n_a], payload.as_slice());
1712
1713        // Direct path: must give identical bytes via UserSliceBackend
1714        // + per-block drop_to_window_size.
1715        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
1716        let mut dec_b = FrameDecoder::new();
1717        let mut out_b = alloc::vec![0u8; payload.len() + slack];
1718        let n_b = dec_b
1719            .decode_to_slice_trusted(compressed.as_slice(), &mut out_b)
1720            .expect("decode_to_slice_trusted should succeed on multi-segment frame");
1721        assert_eq!(n_b, payload.len(), "wrong byte count on direct path");
1722        assert_eq!(&out_b[..n_b], payload.as_slice());
1723
1724        // Sanity-check: confirm the encoded frame really IS
1725        // multi-segment. If a future encoder default changes,
1726        // catching the assumption here is better than silently
1727        // testing single_segment on this name.
1728        let mut sanity = FrameDecoder::new();
1729        sanity.init(&mut compressed.as_slice()).unwrap();
1730        assert!(
1731            !sanity
1732                .state
1733                .as_ref()
1734                .unwrap()
1735                .frame_header
1736                .descriptor
1737                .single_segment_flag(),
1738            "test precondition violated: frame is single-segment, rename or resize"
1739        );
1740    }
1741
1742    #[cfg(feature = "hash")]
1743    #[test]
1744    fn decode_to_slice_trusted_propagates_checksum_into_persistent_scratch() {
1745        // Direct path on a checksum-flagged frame: the FrameCompressor
1746        // under `feature = "hash"` sets content_checksum_flag, so the
1747        // decoded frame has a recorded checksum. After
1748        // decode_to_slice_trusted we must be able to verify it matches via
1749        // the public get_calculated_checksum() accessor — the digest
1750        // is computed by walking output at end of decode and stored
1751        // into the persistent scratch's hasher.
1752        let payload: Vec<u8> = (0..8192u32).map(|i| (i & 0xFF) as u8).collect();
1753        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
1754        compressor.set_source(payload.as_slice());
1755        let mut compressed = Vec::new();
1756        compressor.set_drain(&mut compressed);
1757        compressor.compress();
1758
1759        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
1760        let mut dec = FrameDecoder::new();
1761        let mut out = alloc::vec![0u8; payload.len() + slack];
1762        let n = dec
1763            .decode_to_slice_trusted(compressed.as_slice(), &mut out)
1764            .expect("decode_to_slice_trusted with checksum must succeed");
1765        assert_eq!(n, payload.len());
1766        assert_eq!(&out[..n], payload.as_slice());
1767
1768        // Both sides must report the same checksum: the frame header
1769        // carries the stored u32, and get_calculated_checksum reads
1770        // the running digest the direct path just propagated.
1771        let stored = dec.get_checksum_from_data();
1772        let calculated = dec.get_calculated_checksum();
1773        assert!(stored.is_some(), "frame must carry stored checksum");
1774        assert!(
1775            calculated.is_some(),
1776            "direct path must propagate calculated checksum"
1777        );
1778        assert_eq!(
1779            stored, calculated,
1780            "stored vs calculated checksum mismatch on direct path"
1781        );
1782    }
1783
1784    #[test]
1785    fn decode_to_slice_trusted_fcs_overflow_via_corrupt_frame_returns_structured_error() {
1786        // Hand-build a corrupt frame that declares
1787        // frame_content_size = 4 but the (last) block carries a
1788        // larger Raw payload. The pre-flight FCS check inside the
1789        // direct path's block loop catches this and returns the
1790        // structured FrameContentSizeMismatch variant — not a
1791        // panic, not a generic TargetTooSmall.
1792        //
1793        // Frame layout (single_segment, FCS=4):
1794        //   magic            4 bytes  0xFD2FB528
1795        //   FHD              1 byte   single_segment=1, no checksum,
1796        //                              FCS field size = 0 (-> 1-byte FCS)
1797        //   FCS              1 byte   0x04
1798        //   block_header     3 bytes  last=1, type=Raw, block_size=10
1799        //   block_payload    10 bytes 0xAA repeated
1800        let mut frame = alloc::vec::Vec::new();
1801        // magic
1802        frame.extend_from_slice(&0xFD2FB528u32.to_le_bytes());
1803        // FHD: single_segment=1, fcs_flag=0 (1-byte FCS), no checksum,
1804        // no dict. Bit layout: FCS(7-6)=0, single_segment(5)=1,
1805        // reserved/uncs(4)=0, content_checksum(2)=0, dict(0-1)=00.
1806        frame.push(0b0010_0000);
1807        // FCS: 1 byte
1808        frame.push(4);
1809        // Block header: cBlockSize=10, type=Raw (0), last=1
1810        // 3-byte LE: bit0=last, bits1-2=type(2 bits), bits3-23=size
1811        let cblock_size: u32 = 10;
1812        let bh: u32 = 1 | (cblock_size << 3); // last=1, type=Raw=0
1813        frame.push((bh & 0xFF) as u8);
1814        frame.push((bh >> 8) as u8);
1815        frame.push((bh >> 16) as u8);
1816        // Payload — 10 bytes that, if decoded, would exceed FCS=4.
1817        frame.extend(core::iter::repeat_n(0xAAu8, 10));
1818
1819        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
1820        let mut dec = FrameDecoder::new();
1821        let mut out = alloc::vec![0u8; 4 + slack];
1822        let err = dec
1823            .decode_to_slice_trusted(&frame, &mut out)
1824            .expect_err("FCS-overflow frame must fail decode");
1825        assert!(
1826            matches!(
1827                err,
1828                super::FrameDecoderError::FrameContentSizeMismatch { .. }
1829            ),
1830            "expected FrameContentSizeMismatch, got {:?}",
1831            err
1832        );
1833    }
1834
1835    #[test]
1836    fn decode_to_slice_trusted_falls_back_when_output_too_small_for_wildcopy_slack() {
1837        // Output sized exactly to frame_content_size (no
1838        // WILDCOPY_OVERLENGTH slack) must NOT trigger the direct
1839        // path — the burst's `extend_from_within_unchecked` writes
1840        // past `tail` into the slack region. Direct dispatcher
1841        // recognises this and falls back to the FlatBuf + drain
1842        // path which still produces the right output.
1843        let payload: Vec<u8> = (0..2048u32)
1844            .map(|i| (i.wrapping_mul(31) & 0xFF) as u8)
1845            .collect();
1846        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
1847        compressor.set_source(payload.as_slice());
1848        let mut compressed = Vec::new();
1849        compressor.set_drain(&mut compressed);
1850        compressor.compress();
1851
1852        let mut dec = FrameDecoder::new();
1853        // Exactly payload.len(), no slack — direct path is gated out.
1854        let mut out = alloc::vec![0u8; payload.len()];
1855        let n = dec
1856            .decode_to_slice_trusted(compressed.as_slice(), &mut out)
1857            .expect("decode_to_slice_trusted should still succeed via fallback");
1858        assert_eq!(n, payload.len());
1859        assert_eq!(&out[..n], payload.as_slice());
1860    }
1861
1862    #[test]
1863    fn decode_to_slice_trusted_fallback_validates_fcs_against_total_output() {
1864        // Synthetic single-segment frame: FCS = 20 bytes, but the
1865        // last-block flag fires after only 4 bytes of raw payload.
1866        // On the direct path this would trip the post-block
1867        // `produced > content_size` check; the fallback path
1868        // (eligible=false because output is sized exactly to FCS,
1869        // no WILDCOPY slack) used to silently return Ok(4). With
1870        // the fix it now surfaces `FrameContentSizeMismatch`
1871        // matching the direct path.
1872        //
1873        // Frame layout: 4 B magic | 1 B FHD (single_segment=1,
1874        // FCS_flag=3 → 8-byte FCS) | 8 B FCS=20 | block header
1875        // (Raw, last, size=4) | 4 raw bytes.
1876        let mut wire = Vec::new();
1877        wire.extend_from_slice(&0xFD2F_B528u32.to_le_bytes()); // magic
1878        // FHD: FCS_flag=3 (8-byte FCS) <<6 | single_segment=1 <<5.
1879        wire.push(0b1110_0000);
1880        wire.extend_from_slice(&20u64.to_le_bytes()); // declared FCS
1881        // Block header: (size << 3) | (block_type << 1) | last_block.
1882        // Raw block (block_type=0), last_block=1, size=4 → 0b00100001 = 0x21.
1883        wire.push(0x21);
1884        wire.push(0x00);
1885        wire.push(0x00);
1886        wire.extend_from_slice(&[1u8, 2, 3, 4]);
1887
1888        let mut dec = FrameDecoder::new();
1889        // Size output exactly at declared FCS (no WILDCOPY slack)
1890        // so the eligibility check gates the direct path out.
1891        let mut out = alloc::vec![0u8; 20];
1892        let err = dec
1893            .decode_to_slice_trusted(wire.as_slice(), &mut out)
1894            .expect_err("fallback must reject corrupt FCS underflow");
1895        match err {
1896            crate::decoding::errors::FrameDecoderError::FrameContentSizeMismatch {
1897                declared,
1898                produced,
1899            } => {
1900                assert_eq!(declared, 20);
1901                assert_eq!(produced, 4);
1902            }
1903            other => panic!("expected FrameContentSizeMismatch, got {other:?}"),
1904        }
1905    }
1906
1907    #[test]
1908    fn decode_to_slice_trusted_fallback_treats_explicit_fcs_zero_as_declared() {
1909        // Synthetic multi-segment frame with FCS_flag=2 (4-byte
1910        // FCS) explicitly set to 0. The header DECLARES zero
1911        // content, but the body carries a 5-byte raw last-block.
1912        // `fcs_declared()` must return true (the field is on the
1913        // wire) so the fallback's post-decode size check sees the
1914        // mismatch — even though `frame_content_size == 0`. This
1915        // is exactly the FCS=0 edge case where the previous
1916        // `content_size > 0` proxy would have silently accepted
1917        // the corrupt frame.
1918        //
1919        // Frame layout:
1920        //   4 B magic            — 28 B5 2F FD
1921        //   1 B FHD              — FCS_flag=2 (bits 7-6), no
1922        //                          single_segment, content_checksum=0,
1923        //                          dict_id_flag=0 → 0b1000_0000
1924        //   1 B window_descriptor — exp=10, mantissa=0 → window=1 MiB
1925        //   4 B FCS              — 0 LE
1926        //   3 B block header     — raw, last, size=5 → 0x29 0x00 0x00
1927        //   5 B raw payload      — anything non-empty
1928        let mut wire = Vec::new();
1929        wire.extend_from_slice(&0xFD2F_B528u32.to_le_bytes());
1930        wire.push(0b1000_0000); // FHD: FCS_flag=2, others 0.
1931        wire.push(0x50); // window_descriptor: exp=10, mantissa=0.
1932        wire.extend_from_slice(&0u32.to_le_bytes()); // FCS = 0.
1933        // Block header (24-bit LE): (size << 3) | (block_type << 1) | last_block
1934        // = (5 << 3) | (0 << 1) | 1 = 0x29.
1935        wire.push(0x29);
1936        wire.push(0x00);
1937        wire.push(0x00);
1938        wire.extend_from_slice(&[1u8, 2, 3, 4, 5]);
1939
1940        let mut dec = FrameDecoder::new();
1941        // FCS=0 declared, so eligibility (`content_size > 0`)
1942        // false — falls through to the drain loop. Output buffer
1943        // size doesn't matter for the eligibility check here;
1944        // give it some room so `read()` can drain the block.
1945        let mut out = alloc::vec![0u8; 16];
1946        let err = dec
1947            .decode_to_slice_trusted(wire.as_slice(), &mut out)
1948            .expect_err("corrupt FCS=0 + 5-byte block must error");
1949        match err {
1950            crate::decoding::errors::FrameDecoderError::FrameContentSizeMismatch {
1951                declared,
1952                produced,
1953            } => {
1954                assert_eq!(declared, 0);
1955                assert_eq!(produced, 5);
1956            }
1957            other => panic!("expected FrameContentSizeMismatch, got {other:?}"),
1958        }
1959    }
1960
1961    #[test]
1962    fn decode_to_slice_trusted_fallback_accepts_honest_explicit_fcs_zero() {
1963        // Companion to the corrupt-FCS=0 test above: an HONEST
1964        // empty frame with FCS_flag=2 (4-byte FCS) explicitly set
1965        // to 0 AND a 0-byte raw last-block. `fcs_declared()`
1966        // returns true and `content_size == 0 == total_written`,
1967        // so the fallback validation accepts the frame instead of
1968        // misreporting a mismatch.
1969        //
1970        // (Single-segment FCS=0 would test a similar invariant
1971        // but trips header-stage validation: `window_size =
1972        // frame_content_size = 0 < MIN_WINDOW_SIZE` fails the
1973        // window-size sanity check before decode runs. Use the
1974        // multi-segment shape where `window_size` comes from
1975        // `window_descriptor` independently of FCS.)
1976        //
1977        // Frame layout:
1978        //   4 B magic
1979        //   1 B FHD              — FCS_flag=2, others 0 → 0x80
1980        //   1 B window_descriptor — exp=10 → 1 MiB window
1981        //   4 B FCS              — 0 LE
1982        //   3 B block header     — raw, last, size=0 → 0x01 0x00 0x00
1983        let mut wire = Vec::new();
1984        wire.extend_from_slice(&0xFD2F_B528u32.to_le_bytes());
1985        wire.push(0b1000_0000);
1986        wire.push(0x50);
1987        wire.extend_from_slice(&0u32.to_le_bytes());
1988        // Block header: (0 << 3) | (0 << 1) | 1 = 0x01.
1989        wire.push(0x01);
1990        wire.push(0x00);
1991        wire.push(0x00);
1992
1993        let mut dec = FrameDecoder::new();
1994        let mut out = alloc::vec![0u8; 16];
1995        let n = dec
1996            .decode_to_slice_trusted(wire.as_slice(), &mut out)
1997            .expect("honest FCS=0 + empty block must succeed");
1998        assert_eq!(n, 0);
1999    }
2000
2001    #[test]
2002    fn reset_with_dict_handle_applies_dict_when_no_dict_id() {
2003        let payload = b"reset-without-dict-id";
2004        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
2005        compressor.set_source(payload.as_slice());
2006        let mut compressed = Vec::new();
2007        compressor.set_drain(&mut compressed);
2008        compressor.compress();
2009
2010        let dict_raw = include_bytes!("../../dict_tests/dictionary");
2011        let handle = DictionaryHandle::decode_dict(dict_raw).expect("dictionary should parse");
2012
2013        let mut decoder = FrameDecoder::new();
2014        decoder
2015            .reset_with_dict_handle(compressed.as_slice(), &handle)
2016            .expect("reset should succeed");
2017        let state = decoder.state.as_ref().expect("state should be initialized");
2018        assert!(state.frame_header.dictionary_id().is_none());
2019        assert_eq!(state.using_dict, Some(handle.id()));
2020    }
2021
2022    #[cfg(feature = "lsm")]
2023    mod expect_validation {
2024        use super::*;
2025        use crate::decoding::errors::FrameDecoderError;
2026
2027        fn compress(payload: &[u8]) -> Vec<u8> {
2028            let mut compressor = FrameCompressor::new(CompressionLevel::Default);
2029            compressor.set_source(payload);
2030            let mut compressed = Vec::new();
2031            compressor.set_drain(&mut compressed);
2032            compressor.compress();
2033            compressed
2034        }
2035
2036        fn compress_with_dict(payload: &[u8], dict_raw: &[u8]) -> Vec<u8> {
2037            let mut compressor = FrameCompressor::new(CompressionLevel::Default);
2038            compressor
2039                .set_dictionary_from_bytes(dict_raw)
2040                .expect("dict load");
2041            compressor.set_source(payload);
2042            let mut compressed = Vec::new();
2043            compressor.set_drain(&mut compressed);
2044            compressor.compress();
2045            compressed
2046        }
2047
2048        #[test]
2049        fn expect_dict_id_none_default_allows_anything() {
2050            let compressed = compress(b"hello-no-expect");
2051            let mut decoder = FrameDecoder::new();
2052            decoder
2053                .reset(compressed.as_slice())
2054                .expect("default None passes");
2055        }
2056
2057        #[test]
2058        fn expect_dict_id_zero_matches_frame_without_dict_id() {
2059            // Default-encoded frame has no dict_id; pinning Some(0)
2060            // ("no dictionary expected") must accept it.
2061            let compressed = compress(b"payload");
2062            let mut decoder = FrameDecoder::new();
2063            decoder.expect_dict_id(Some(0));
2064            decoder
2065                .reset(compressed.as_slice())
2066                .expect("Some(0) ~ None");
2067        }
2068
2069        #[test]
2070        fn expect_dict_id_matching_value_passes() {
2071            let dict_raw = include_bytes!("../../dict_tests/dictionary");
2072            let handle = DictionaryHandle::decode_dict(dict_raw).expect("dict parse");
2073            let actual_id = handle.id();
2074
2075            let compressed = compress_with_dict(b"payload-with-dict", dict_raw);
2076
2077            let mut decoder = FrameDecoder::new();
2078            decoder.expect_dict_id(Some(actual_id));
2079            // Decode requires the dict to be registered; using
2080            // reset_with_dict_handle for that.
2081            decoder
2082                .reset_with_dict_handle(compressed.as_slice(), &handle)
2083                .expect("matching dict_id passes");
2084        }
2085
2086        #[test]
2087        fn expect_dict_id_mismatching_value_fails_before_decode() {
2088            let dict_raw = include_bytes!("../../dict_tests/dictionary");
2089            let handle = DictionaryHandle::decode_dict(dict_raw).expect("dict parse");
2090            let actual_id = handle.id();
2091            let wrong_id = actual_id.wrapping_add(1);
2092
2093            let compressed = compress_with_dict(b"payload-with-dict", dict_raw);
2094
2095            let mut decoder = FrameDecoder::new();
2096            decoder.expect_dict_id(Some(wrong_id));
2097            let err = decoder
2098                .reset_with_dict_handle(compressed.as_slice(), &handle)
2099                .expect_err("mismatch must fail");
2100            match err {
2101                FrameDecoderError::UnexpectedDictId { expected, found } => {
2102                    assert_eq!(expected, Some(wrong_id));
2103                    assert_eq!(found, Some(actual_id));
2104                }
2105                other => panic!("expected UnexpectedDictId, got {other:?}"),
2106            }
2107        }
2108
2109        #[test]
2110        fn expect_dict_id_nonzero_fails_on_frame_without_dict_id() {
2111            // Frame has no dict_id; expecting Some(42) (non-zero)
2112            // must fail with found = None.
2113            let compressed = compress(b"no-dict-frame");
2114            let mut decoder = FrameDecoder::new();
2115            decoder.expect_dict_id(Some(42));
2116            let err = decoder
2117                .reset(compressed.as_slice())
2118                .expect_err("nonzero expectation on dictless frame must fail");
2119            match err {
2120                FrameDecoderError::UnexpectedDictId { expected, found } => {
2121                    assert_eq!(expected, Some(42));
2122                    assert_eq!(found, None);
2123                }
2124                other => panic!("expected UnexpectedDictId, got {other:?}"),
2125            }
2126        }
2127
2128        #[test]
2129        fn expect_window_descriptor_none_default_allows_anything() {
2130            let compressed = compress(b"hello-no-wd-expect");
2131            let mut decoder = FrameDecoder::new();
2132            decoder
2133                .reset(compressed.as_slice())
2134                .expect("default None passes");
2135        }
2136
2137        #[test]
2138        fn expect_window_descriptor_mismatch_fails_before_decode() {
2139            // Compress a payload large enough to force a
2140            // multi-segment frame (window_descriptor on wire).
2141            // Default compression at >256 KiB produces multi-
2142            // segment frames with a real window_descriptor byte.
2143            let payload = alloc::vec![0xABu8; 512 * 1024];
2144            let compressed = compress(&payload);
2145
2146            // Read the actual window_descriptor by decoding once
2147            // without expectations, then pin a wrong value.
2148            let mut probe_decoder = FrameDecoder::new();
2149            probe_decoder.reset(compressed.as_slice()).unwrap();
2150            let probe_state = probe_decoder.state.as_ref().unwrap();
2151            let actual_wd = probe_state
2152                .frame_header
2153                .window_descriptor()
2154                .expect("multi-segment frame should expose window_descriptor");
2155            let wrong_wd = actual_wd.wrapping_add(0x10); // bump exponent
2156
2157            let mut decoder = FrameDecoder::new();
2158            decoder.expect_window_descriptor(Some(wrong_wd));
2159            let err = decoder
2160                .reset(compressed.as_slice())
2161                .expect_err("wrong window_descriptor must fail");
2162            match err {
2163                FrameDecoderError::UnexpectedWindowDescriptor { expected, found } => {
2164                    assert_eq!(expected, wrong_wd);
2165                    assert_eq!(found, Some(actual_wd));
2166                }
2167                other => panic!("expected UnexpectedWindowDescriptor, got {other:?}"),
2168            }
2169        }
2170
2171        /// Build a minimal synthetic single-segment zstd frame
2172        /// carrying a 4-byte raw payload. RFC 8878 §3.1.1.1
2173        /// layout, hand-rolled because our default
2174        /// `FrameCompressor` settings don't emit
2175        /// `single_segment_flag` for tiny inputs.
2176        ///
2177        /// Wire bytes (13 total for 4-byte payload):
2178        /// ```text
2179        /// 28 B5 2F FD       magic
2180        /// 20                FHD: single_segment=1, FCS_flag=0
2181        /// 04                FCS (single byte, value = payload.len())
2182        /// 21 00 00          block header: raw, last, size=4
2183        /// .. .. .. ..       payload bytes
2184        /// ```
2185        fn synth_single_segment_frame(payload: &[u8]) -> Vec<u8> {
2186            assert!(payload.len() <= 255, "1-byte FCS field caps at 255");
2187            assert!(payload.len() < (1usize << 21), "block size 21-bit max");
2188            let mut out = Vec::new();
2189            // Magic 0xFD2FB528 LE.
2190            out.extend_from_slice(&0xFD2F_B528u32.to_le_bytes());
2191            // FHD: single_segment_flag (bit 5) set, everything
2192            // else zero. With single_segment + FCS_flag=0 the FCS
2193            // field is 1 byte. No window_descriptor on wire.
2194            out.push(0b0010_0000);
2195            // 1-byte FCS = payload length.
2196            out.push(payload.len() as u8);
2197            // Block header (3 bytes LE):
2198            // last_block=1, block_type=0 (Raw), block_size=payload.len().
2199            // Encoded: (size << 3) | (block_type << 1) | last_block.
2200            // Block header: last_block flag in bit 0, block_type
2201            // (0 = Raw) in bits 1-2, block size in bits 3+.
2202            let bh: u32 = ((payload.len() as u32) << 3) | 1;
2203            out.push((bh & 0xFF) as u8);
2204            out.push(((bh >> 8) & 0xFF) as u8);
2205            out.push(((bh >> 16) & 0xFF) as u8);
2206            // Raw payload.
2207            out.extend_from_slice(payload);
2208            out
2209        }
2210
2211        #[test]
2212        fn expect_window_descriptor_on_single_segment_frame_fails_with_found_none() {
2213            // Single-segment frames omit the window_descriptor
2214            // byte from the wire entirely. Setting an expectation
2215            // here must surface `found: None` so callers
2216            // distinguish "wrong descriptor" from "no descriptor
2217            // on the wire" — never silently pass.
2218            let compressed = synth_single_segment_frame(b"tiny");
2219
2220            // First sanity-check: the synthetic frame decodes
2221            // cleanly without any expectation.
2222            {
2223                let mut probe = FrameDecoder::new();
2224                probe
2225                    .reset(compressed.as_slice())
2226                    .expect("synth frame parses");
2227                let probe_state = probe.state.as_ref().unwrap();
2228                assert!(
2229                    probe_state.frame_header.window_descriptor().is_none(),
2230                    "synth frame must be single-segment"
2231                );
2232            }
2233
2234            let mut decoder = FrameDecoder::new();
2235            decoder.expect_window_descriptor(Some(0x40));
2236            let err = decoder
2237                .reset(compressed.as_slice())
2238                .expect_err("single-segment + expectation must fail");
2239            match err {
2240                FrameDecoderError::UnexpectedWindowDescriptor { expected, found } => {
2241                    assert_eq!(expected, 0x40);
2242                    assert_eq!(found, None);
2243                }
2244                other => panic!("expected UnexpectedWindowDescriptor, got {other:?}"),
2245            }
2246        }
2247
2248        #[test]
2249        fn validation_failure_leaves_decoder_re_resettable() {
2250            // After UnexpectedDictId on a wrong-expectation reset,
2251            // clearing the expectation and re-calling reset must
2252            // succeed on the same source — no lingering failed
2253            // state.
2254            let compressed = compress(b"re-resettable");
2255
2256            let mut decoder = FrameDecoder::new();
2257            decoder.expect_dict_id(Some(42));
2258            let err = decoder
2259                .reset(compressed.as_slice())
2260                .expect_err("first reset fails");
2261            assert!(matches!(err, FrameDecoderError::UnexpectedDictId { .. }));
2262
2263            // Clear expectation and retry on a fresh source.
2264            decoder.expect_dict_id(None);
2265            decoder
2266                .reset(compressed.as_slice())
2267                .expect("retry after clearing expectation should succeed");
2268        }
2269    }
2270}