Skip to main content

structured_zstd/decoding/
frame_decoder.rs

1//! Framedecoder is the main low-level struct users interact with to decode zstd frames
2//!
3//! Zstandard compressed data is made of one or more frames. Each frame is independent and can be
4//! decompressed independently of other frames. This module contains structures
5//! and utilities that can be used to decode a frame.
6
7use super::frame;
8use crate::decoding;
9use crate::decoding::block_decoder::BlockDecoder;
10use crate::decoding::buffer_backend::BufferBackend;
11use crate::decoding::dictionary::{Dictionary, DictionaryHandle};
12use crate::decoding::errors::{DecodeBlockContentError, FrameDecoderError};
13use crate::decoding::flat_buf::FlatBuf;
14use crate::decoding::ringbuffer::RingBuffer;
15use crate::decoding::scratch::DecoderScratch;
16use crate::io::{Error, Read, Write};
17use alloc::collections::BTreeMap;
18use alloc::vec::Vec;
19use core::convert::TryInto;
20
21use crate::common::MAXIMUM_ALLOWED_WINDOW_SIZE;
22
23/// Build the block-header decode error. With the `lsm` feature it captures
24/// the failing block's index and frame offset (block-precise recovery);
25/// without it, the legacy positionless variant — so the default build's
26/// error surface stays byte-identical to the donor.
27#[cfg(feature = "lsm")]
28fn block_header_decode_error(
29    source: crate::decoding::errors::BlockHeaderReadError,
30    block_index: u32,
31    frame_offset: u32,
32) -> FrameDecoderError {
33    FrameDecoderError::FailedToReadBlockHeaderAt {
34        source,
35        block_index,
36        frame_offset,
37    }
38}
39#[cfg(not(feature = "lsm"))]
40fn block_header_decode_error(
41    source: crate::decoding::errors::BlockHeaderReadError,
42    _block_index: u32,
43    _frame_offset: u32,
44) -> FrameDecoderError {
45    FrameDecoderError::FailedToReadBlockHeader(source)
46}
47
48/// Build the block-body decode error. With `lsm` it captures the block
49/// index, frame offset, and the failing block's structural metadata
50/// (reconstructed from its header); without it, the legacy variant.
51#[cfg(feature = "lsm")]
52fn block_body_decode_error(
53    source: DecodeBlockContentError,
54    block_index: u32,
55    frame_offset: u32,
56    header: &crate::blocks::block::BlockHeader,
57    header_size: u8,
58) -> FrameDecoderError {
59    use crate::blocks::block::BlockType;
60    // Physical wire body vs the raw `Block_Size` field: RLE writes a single
61    // body byte while `Block_Size` carries the repeat count; Raw/Compressed
62    // bodies match the field.
63    let (body_size, block_size_field) = match header.block_type {
64        BlockType::RLE => (1u32, header.decompressed_size),
65        _ => (header.content_size, header.content_size),
66    };
67    FrameDecoderError::FailedToReadBlockBodyAt {
68        source,
69        block_index,
70        frame_offset,
71        block: crate::encoding::frame_emit_info::FrameBlock {
72            offset_in_frame: frame_offset,
73            header_size,
74            body_size,
75            block_size_field,
76            block_type: header.block_type,
77            last_block: header.last_block,
78            // Raw/RLE carry their regenerated size in the header;
79            // a Compressed block's is unknown until decoded, so
80            // `read_block_header` leaves `decompressed_size` 0 here.
81            decompressed_size: header.decompressed_size,
82        },
83    }
84}
85#[cfg(not(feature = "lsm"))]
86fn block_body_decode_error(
87    source: DecodeBlockContentError,
88    _block_index: u32,
89    _frame_offset: u32,
90    _header: &crate::blocks::block::BlockHeader,
91    _header_size: u8,
92) -> FrameDecoderError {
93    FrameDecoderError::FailedToReadBlockBody(source)
94}
95
96/// Low level Zstandard decoder that can be used to decompress frames with fine control over when and how many bytes are decoded.
97///
98/// This decoder is able to decode frames only partially and gives control
99/// over how many bytes/blocks will be decoded at a time (so you don't have to decode a 10GB file into memory all at once).
100/// It reads bytes as needed from a provided source and can be read from to collect partial results.
101///
102/// If you want to just read the whole frame with an `io::Read` without having to deal with manually calling [FrameDecoder::decode_blocks]
103/// you can use the provided [crate::decoding::StreamingDecoder] wich wraps this FrameDecoder.
104///
105/// Workflow is as follows:
106/// ```
107/// use structured_zstd::decoding::BlockDecodingStrategy;
108///
109/// # #[cfg(feature = "std")]
110/// use std::io::{Read, Write};
111///
112/// // no_std environments can use the crate's own Read traits
113/// # #[cfg(not(feature = "std"))]
114/// use structured_zstd::io::{Read, Write};
115///
116/// fn decode_this(mut file: impl Read) {
117///     //Create a new decoder
118///     let mut frame_dec = structured_zstd::decoding::FrameDecoder::new();
119///     let mut result = Vec::new();
120///
121///     // Use reset or init to make the decoder ready to decode the frame from the io::Read
122///     frame_dec.reset(&mut file).unwrap();
123///
124///     // Loop until the frame has been decoded completely
125///     while !frame_dec.is_finished() {
126///         // decode (roughly) batch_size many bytes
127///         frame_dec.decode_blocks(&mut file, BlockDecodingStrategy::UptoBytes(1024)).unwrap();
128///
129///         // read from the decoder to collect bytes from the internal buffer
130///         let bytes_read = frame_dec.read(result.as_mut_slice()).unwrap();
131///
132///         // then do something with it
133///         do_something(&result[0..bytes_read]);
134///     }
135///
136///     // handle the last chunk of data
137///     while frame_dec.can_collect() > 0 {
138///         let x = frame_dec.read(result.as_mut_slice()).unwrap();
139///
140///         do_something(&result[0..x]);
141///     }
142/// }
143///
144/// fn do_something(data: &[u8]) {
145/// # #[cfg(feature = "std")]
146///     std::io::stdout().write_all(data).unwrap();
147/// }
148/// ```
149pub struct FrameDecoder {
150    state: Option<FrameDecoderState>,
151    /// Test-only observability: frames decoded via `run_direct_decode`.
152    /// The direct and buffered paths are byte-identical, so dispatch
153    /// regressions (e.g. re-excluding dictionary frames from the direct
154    /// gate) are invisible to output assertions; tests pin the path here.
155    #[cfg(test)]
156    direct_frames: u64,
157    // Registered dictionaries are stored by shared handle (Arc/Rc) so a
158    // single content copy is referenced by every frame the decoder decodes
159    // (donor `ZSTD_refDDict`), rather than re-copied into the decode buffer
160    // per frame. `add_dict` wraps an owned `Dictionary` into a handle.
161    owned_dicts: BTreeMap<u32, DictionaryHandle>,
162    #[cfg(target_has_atomic = "ptr")]
163    shared_dicts: BTreeMap<u32, DictionaryHandle>,
164    #[cfg(not(target_has_atomic = "ptr"))]
165    shared_dicts: (),
166    /// `ZSTD_f_zstd1_magicless` — when true, [`init`] / [`reset`]
167    /// expect frames without the 4-byte magic number prefix.
168    /// Default false (standard zstd format).
169    magicless: bool,
170    /// How the optional content checksum is handled. Default
171    /// [`ContentChecksum::EmitOnly`] (compute + expose, no error on
172    /// mismatch). Set via [`Self::set_content_checksum`].
173    content_checksum: ContentChecksum,
174    /// Pinned `Dictionary_ID` expectation set via
175    /// [`Self::expect_dict_id`]. `None` (default) disables the
176    /// check; `Some(0)` matches frames whose header omits the
177    /// optional dict_id (treated as "no dictionary"). Validated in
178    /// [`Self::reset`] AFTER the frame header parses successfully
179    /// and BEFORE any block decode work.
180    #[cfg(feature = "lsm")]
181    expect_dict_id: Option<u32>,
182    /// Pinned `Window_Descriptor` byte expectation set via
183    /// [`Self::expect_window_descriptor`]. `None` (default)
184    /// disables the check. Validated in [`Self::reset`] AFTER the
185    /// frame header parses successfully and BEFORE any block
186    /// decode work. Single-segment frames (which omit the
187    /// `Window_Descriptor` byte from the wire) surface as
188    /// [`crate::decoding::errors::FrameDecoderError::UnexpectedWindowDescriptor`]
189    /// with `found: None`.
190    #[cfg(feature = "lsm")]
191    expect_window_descriptor: Option<u8>,
192    /// When `true`, the per-block decode loop XXH64-hashes each
193    /// block's decompressed bytes and stores the low-32-bit digest in
194    /// [`Self::computed_block_checksums`]. Default `false` (zero
195    /// cost). Set via [`Self::enable_per_block_checksums`]. Gated on
196    /// `all(lsm, hash)` because XXH64 lives behind the `hash`
197    /// feature.
198    #[cfg(all(feature = "lsm", feature = "hash"))]
199    per_block_checksums_enabled: bool,
200    /// Per-block XXH64 (low 32 bits) digests captured during the
201    /// current frame's decode when `per_block_checksums_enabled` is
202    /// set. Reset at the start of every new frame. Gated on
203    /// `all(lsm, hash)` (see `per_block_checksums_enabled`).
204    #[cfg(all(feature = "lsm", feature = "hash"))]
205    computed_block_checksums: alloc::vec::Vec<u32>,
206}
207
208/// How the decoder treats a frame's optional XXH64 content checksum
209/// (RFC 8878 Content_Checksum_flag). The XXH64 pass over the decompressed
210/// output is a measurable share of decode time, so it is made skippable.
211///
212/// ```
213/// use structured_zstd::decoding::{ContentChecksum, FrameDecoder};
214/// let mut decoder = FrameDecoder::new();
215/// decoder.set_content_checksum(ContentChecksum::Verify);
216/// ```
217#[derive(Copy, Clone, PartialEq, Eq, Debug, Default)]
218pub enum ContentChecksum {
219    /// Skip the XXH64 pass entirely: no compute, no verify.
220    /// `get_calculated_checksum()` returns `None`.
221    None,
222    /// Compute the checksum and expose it via the accessors, but do not
223    /// error on a mismatch. This is the default and matches the historical
224    /// behaviour (callers verify manually if they wish).
225    #[default]
226    EmitOnly,
227    /// Compute the checksum and compare it against the frame's stored value;
228    /// a disagreement fails the decode with
229    /// [`FrameDecoderError::ChecksumMismatch`](crate::decoding::errors::FrameDecoderError::ChecksumMismatch).
230    /// Without the `hash` feature there is no way to compute a digest, so
231    /// `Verify` cannot detect a mismatch and behaves like `None`.
232    Verify,
233}
234
235/// Decode-relevant identity of a frame, used to reject a [`ResumeState`]
236/// captured from one frame being applied to a frame of a different shape. Covers
237/// every header field that changes how blocks decode (buffer sizing, backend
238/// kind, entropy/dictionary context, trailing-checksum handling, declared
239/// content size, magicless framing).
240///
241/// This is a SHAPE guard, not a content-unique fingerprint: two distinct frames
242/// that happen to share all these header fields produce the same key (no cheap
243/// header field uniquely identifies frame content). It catches the realistic
244/// accidental misuse — applying a snapshot to a frame with a different
245/// window/dictionary/size — with a typed error instead of byte-wrong output.
246/// Pairing a `ResumeState` with the correct frame's compressed source and
247/// `window_prime` remains the caller's contract.
248#[cfg(feature = "lsm")]
249#[derive(Clone, Copy, PartialEq, Eq, Debug)]
250struct FrameKey {
251    window_size: u64,
252    frame_content_size: u64,
253    /// `Dictionary_ID` declared in the frame header (`None` when omitted).
254    dictionary_id: Option<u32>,
255    /// Dictionary actually applied to the decoder (`state.using_dict`). This is
256    /// distinct from `dictionary_id`: a frame with a dictless header can still
257    /// be decoded with an explicit dictionary via `reset_with_dict_handle` /
258    /// `force_dict`, and two such decodes with different dictionaries must NOT
259    /// compare equal — keying only on the header field would miss that.
260    active_dictionary_id: Option<u32>,
261    single_segment: bool,
262    content_checksum: bool,
263    magicless: bool,
264}
265
266#[cfg(feature = "lsm")]
267impl FrameKey {
268    fn from_state(state: &FrameDecoderState, magicless: bool) -> FrameKey {
269        let header = &state.frame_header;
270        FrameKey {
271            window_size: header.window_size().unwrap_or(0),
272            frame_content_size: header.frame_content_size(),
273            dictionary_id: header.dictionary_id(),
274            active_dictionary_id: state.using_dict,
275            single_segment: header.descriptor.single_segment_flag(),
276            content_checksum: header.descriptor.content_checksum_flag(),
277            magicless,
278        }
279    }
280}
281
282/// XXH64 of a contiguous byte slice — the resume-side counterpart to
283/// [`DecoderScratchKind::window_tail_hash`]. Streaming XXH64 is chunk-boundary
284/// independent, so this single-slice hash equals the emit-side two-slice hash
285/// over the same bytes.
286#[cfg(all(feature = "lsm", feature = "hash"))]
287fn xxh64_of(bytes: &[u8]) -> u64 {
288    use core::hash::Hasher;
289    let mut h = twox_hash::XxHash64::with_seed(0);
290    h.write(bytes);
291    h.finish()
292}
293
294/// Cross-block decode state needed to resume a cold partial decode at an inner
295/// block boundary, emitted by [`FrameDecoder::decode_blocks_partial`] when its
296/// `emit_resume` argument is `true` (returned in
297/// [`PartialDecode::resume_state`]) and fed back via that same method's
298/// [`resume`](FrameDecoder::decode_blocks_partial) argument
299/// ([`ResumeInput`]).
300///
301/// A zstd block does not carry all the state required to decode it in
302/// isolation: besides the shared match window (the decompressed output history),
303/// a Compressed block may reuse the previous block's entropy tables via
304/// `Repeat_Mode` (literals Huffman + the LL/OF/ML FSE distributions) and always
305/// continues the running repeat-offset history. This snapshot carries exactly
306/// that carry-over state plus the resume coordinates, so resuming is
307/// byte-identical to a contiguous decode even across a dropped decoder. The
308/// window itself is NOT stored here — the caller supplies it back through
309/// [`ResumeInput::window_prime`] from the decompressed output it already
310/// persists. Neither is the dictionary: for a dictionary frame the caller
311/// re-attaches it to the resuming decoder via [`FrameDecoder::reset`] /
312/// [`FrameDecoder::reset_with_dict_handle`] (it already holds the dictionary
313/// from encode time), and the snapshot records only the dictionary's identity
314/// so a resume under a different dictionary is rejected.
315///
316/// Behind the `lsm` Cargo feature.
317#[cfg(feature = "lsm")]
318#[cfg_attr(docsrs, doc(cfg(feature = "lsm")))]
319pub struct ResumeState {
320    /// Identity of the frame this state was captured from. Compared against the
321    /// frame currently reset into the decoder before any state is restored, so a
322    /// snapshot from a different frame shape is rejected with
323    /// [`FrameDecoderError::ResumeFrameMismatch`] instead of silently producing
324    /// byte-wrong output.
325    frame_key: FrameKey,
326    /// Index of the block to resume AT (the first block NOT yet decoded).
327    block_index: u32,
328    /// Cumulative decompressed byte count produced before `block_index`.
329    output_offset: u64,
330    /// FSE tables (LL/OF/ML) as of the last decoded block — the source for a
331    /// `Repeat_Mode` resume block.
332    fse: crate::decoding::scratch::FSEScratch,
333    /// Huffman literals table as of the last decoded block — the source for a
334    /// treeless (repeat) literals resume block.
335    huf: crate::decoding::scratch::HuffmanScratch,
336    /// Running repeat-offset history (`offset_hist`) as of the last decoded
337    /// block.
338    offset_hist: [u32; 3],
339    /// XXH64 of the exact window-prime bytes (the last `min(window_size,
340    /// output_offset)` decompressed bytes) captured at emit. Verified at resume
341    /// against the caller-supplied [`ResumeInput::window_prime`]: a content
342    /// mismatch (wrong frame, wrong or corrupted prime) is a near-unique
343    /// (≈2⁻⁶⁴) signal and is rejected with
344    /// [`FrameDecoderError::ResumeFrameMismatch`]. This is the content-exact
345    /// guard; [`FrameKey`] is the cheap shape pre-check that works without the
346    /// `hash` feature. Behind `all(lsm, hash)`.
347    #[cfg(feature = "hash")]
348    window_hash: u64,
349}
350
351#[cfg(feature = "lsm")]
352impl ResumeState {
353    /// Inner block index this state resumes at (the first block not yet
354    /// decoded). Pass it as the `end_block` lower bound (and as `start_block`)
355    /// of the resuming
356    /// [`decode_blocks_partial`](FrameDecoder::decode_blocks_partial) call.
357    pub fn block_index(&self) -> u32 {
358        self.block_index
359    }
360
361    /// Cumulative decompressed byte count produced before
362    /// [`block_index`](Self::block_index) — i.e. the decompressed offset at
363    /// which the resumed output begins. Equals
364    /// `FrameEmitInfo::decompressed_byte_range(block_index).start`. Use it to
365    /// slice the `window_prime` tail the resumed call needs.
366    pub fn output_offset(&self) -> u64 {
367        self.output_offset
368    }
369}
370
371// Manual Debug: the entropy tables are large internal scratch with no useful
372// Debug surface; only the resume coordinates are worth printing (and this lets
373// `PartialDecode` keep its derived Debug).
374#[cfg(feature = "lsm")]
375impl core::fmt::Debug for ResumeState {
376    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
377        f.debug_struct("ResumeState")
378            .field("block_index", &self.block_index)
379            .field("output_offset", &self.output_offset)
380            .finish_non_exhaustive()
381    }
382}
383
384/// Resume input fed to [`FrameDecoder::decode_blocks_partial`]'s `resume`
385/// argument to continue a cold partial decode without re-decompressing the
386/// preceding blocks.
387///
388/// Behind the `lsm` Cargo feature.
389#[cfg(feature = "lsm")]
390#[cfg_attr(docsrs, doc(cfg(feature = "lsm")))]
391pub struct ResumeInput<'a> {
392    /// The caller's already-decompressed output ending just before
393    /// [`ResumeState::block_index`]. Must contain at least the last
394    /// `min(window_size, output_offset)` bytes (a full match window, or the
395    /// whole prefix when it is shorter than one window); anything beyond the
396    /// last `window_size` bytes is ignored, so passing the entire prefix is
397    /// also valid (capped internally, bounding resume memory to one window).
398    pub window_prime: &'a [u8],
399    /// Cross-block entropy/repcode state emitted by the prior
400    /// [`decode_blocks_partial`](FrameDecoder::decode_blocks_partial) call.
401    pub state: &'a ResumeState,
402}
403
404/// Backend-tagged decode scratch — chosen at frame-reset time based
405/// on the parsed `FrameHeader.descriptor.single_segment_flag()` and
406/// kept stable through the lifetime of the frame. The match in each
407/// helper below dispatches **once per call** (e.g. once per block in
408/// `decode_block_content`, once per drain in `drain_to_writer`) —
409/// never inside the hot push/repeat loop, which is fully
410/// monomorphised through the `DecoderScratch<B>` generic.
411enum DecoderScratchKind {
412    Ring(DecoderScratch<RingBuffer>),
413    Flat(DecoderScratch<FlatBuf>),
414}
415
416impl DecoderScratchKind {
417    fn new_ring(window_size: usize) -> Self {
418        // Lazy ring-buffer allocation: do NOT `reserve(window_size)` here.
419        // The direct-decode path (`run_direct_decode`) writes through
420        // `UserSliceBackend` and never touches the ring; allocating it
421        // eagerly wastes one full window of peak memory on the common
422        // direct-eligible frame. On the non-direct path the window is
423        // pre-reserved once at frame entry (`decode_all_impl` and
424        // `decode_blocks` both call `DecoderScratchKind::reserve_buffer`
425        // before any block writes), so multi-block frames pay one
426        // amortised grow instead of repeated `reserve_amortized` steps
427        // per block. Issue #279 round 2.
428        let s = DecoderScratch::<RingBuffer>::new(window_size);
429        Self::Ring(s)
430    }
431
432    /// Construct a flat-backed scratch for a single-segment frame.
433    /// `frame_content_size` is the upcoming output size in bytes
434    /// (== `window_size` when the flag is set).
435    ///
436    /// Lazy buffer allocation (mirrors [`Self::new_ring`]): do NOT
437    /// pre-size the `FlatBuf`. The direct-decode path
438    /// (`run_direct_decode`) writes through `UserSliceBackend` and never
439    /// touches this buffer, so eagerly allocating a full FCS wastes one
440    /// whole content-size of peak memory on the common direct-eligible
441    /// single-segment frame. The non-direct fallback reserves it once via
442    /// `reserve_buffer(window_size)` at frame entry before any block
443    /// write (`FlatBuf::reserve` adds the `WILDCOPY_OVERLENGTH` slack),
444    /// and every inline-exec site (trait method and per-kernel macros)
445    /// now carries a tight-tail bounded copy, so a tight buffer can never
446    /// overshoot regardless of construction-time slack.
447    fn new_flat(frame_content_size: usize) -> Self {
448        let s = DecoderScratch::<FlatBuf>::new(frame_content_size);
449        Self::Flat(s)
450    }
451
452    /// Reset (or transition between) backends for a new frame.
453    /// Reuses the existing `DecoderScratch` allocations (FSE / HUF
454    /// tables, sequence vec, etc.) when the backend kind is unchanged
455    /// — only the underlying buffer is re-sized for the new frame.
456    /// Building a fresh `DecoderScratch` on every frame would
457    /// re-allocate everything and was measured at +255 % vs ring on
458    /// small frames; reusing it keeps the small-frame cost flat.
459    fn reset(&mut self, frame: &frame::FrameHeader, window_size: usize) {
460        if frame.descriptor.single_segment_flag() {
461            match self {
462                Self::Flat(s) => {
463                    s.reset(window_size);
464                    // `DecoderScratch::reset` clears the backing buffer and
465                    // updates `window_size` WITHOUT reserving it (it may still
466                    // resize the per-block scratch Vecs up to
467                    // `min(window_size, MAX_BLOCK_SIZE)`). Backing-buffer
468                    // capacity is decided one layer up: direct-eligible frames
469                    // never touch it, and the non-direct path pre-reserves once
470                    // via `reserve_buffer(window_size)` at frame entry.
471                }
472                Self::Ring(_) => *self = Self::new_flat(window_size),
473            }
474        } else {
475            match self {
476                Self::Ring(s) => s.reset(window_size),
477                Self::Flat(_) => *self = Self::new_ring(window_size),
478            }
479        }
480    }
481
482    fn init_from_dict(&mut self, dict: &DictionaryHandle) {
483        match self {
484            Self::Ring(s) => s.init_from_dict(dict),
485            Self::Flat(s) => s.init_from_dict(dict),
486        }
487    }
488
489    #[inline]
490    fn buffer_len(&self) -> usize {
491        match self {
492            Self::Ring(s) => s.buffer.len(),
493            Self::Flat(s) => s.buffer.len(),
494        }
495    }
496
497    fn workspace_bytes(&self) -> usize {
498        match self {
499            Self::Ring(s) => s.workspace_bytes(),
500            Self::Flat(s) => s.workspace_bytes(),
501        }
502    }
503
504    /// Pre-reserve the backing buffer to `window_size` in a single
505    /// allocation. Called once on the non-direct (`decode_blocks`) path
506    /// after direct-eligibility is ruled out, so multi-segment fallback
507    /// decodes don't pay repeated `reserve_amortized` grow steps
508    /// (128 KiB → 256 KiB → ... → window) as blocks accumulate.
509    ///
510    /// Direct-eligible frames never call this and pay zero backing-buffer
511    /// allocation for the window, on BOTH backends: `new_ring` and
512    /// `new_flat` are each lazy (no pre-reserve), so a direct-eligible
513    /// frame writes only through `UserSliceBackend` and leaves this
514    /// buffer empty.
515    ///
516    /// `window_size` is the TARGET visible-window capacity: callers pass
517    /// the full window, and the method itself computes the shortfall past
518    /// the bytes already buffered before calling the backend's
519    /// ADDITIONAL-semantics `reserve_exact`. That keeps re-entries (the
520    /// decode_all fallback loop runs `decode_blocks` once per strategy
521    /// chunk, and streaming callers invoke it per call) from growing a
522    /// window-full buffer toward 2x window, while per-block growth keeps
523    /// the amortized `reserve`.
524    #[inline]
525    fn reserve_buffer(&mut self, window_size: usize) {
526        // Exact growth: this is the one-shot pre-reservation, and a request
527        // landing one slack past the retained capacity (e.g. a dictionary
528        // prefix already loaded into the buffer) must not DOUBLE a
529        // window-sized allocation through the amortized policy. Per-block
530        // growth keeps the amortized `reserve`.
531        //
532        // `reserve_exact` takes ADDITIONAL capacity, so request only the
533        // shortfall past the bytes already buffered: the decode_all
534        // fallback loop re-enters `decode_blocks` once per strategy chunk,
535        // and re-requesting the full window each iteration would grow a
536        // window-sized buffer toward 2x window.
537        match self {
538            Self::Ring(s) => {
539                let additional = window_size.saturating_sub(s.buffer.len());
540                s.buffer.reserve_exact(additional);
541            }
542            Self::Flat(s) => {
543                let additional = window_size.saturating_sub(s.buffer.len());
544                s.buffer.reserve_exact(additional);
545            }
546        }
547    }
548
549    /// Last `n` bytes of the visible buffer as `(s1, s2)` (wrap-aware).
550    /// Routes through whichever backend the current scratch holds.
551    #[cfg(all(feature = "lsm", feature = "hash"))]
552    fn last_n_as_slices(&self, n: usize) -> (&[u8], &[u8]) {
553        match self {
554            Self::Ring(s) => s.buffer.last_n_as_slices(n),
555            Self::Flat(s) => s.buffer.last_n_as_slices(n),
556        }
557    }
558
559    fn buffer_drain(&mut self) -> Vec<u8> {
560        match self {
561            Self::Ring(s) => s.buffer.drain(),
562            Self::Flat(s) => s.buffer.drain(),
563        }
564    }
565
566    fn buffer_drain_to_window_size(&mut self) -> Option<Vec<u8>> {
567        match self {
568            Self::Ring(s) => s.buffer.drain_to_window_size(),
569            Self::Flat(s) => s.buffer.drain_to_window_size(),
570        }
571    }
572
573    fn buffer_drain_to_writer(&mut self, sink: impl Write) -> Result<usize, Error> {
574        match self {
575            Self::Ring(s) => s.buffer.drain_to_writer(sink),
576            Self::Flat(s) => s.buffer.drain_to_writer(sink),
577        }
578    }
579
580    fn buffer_drain_to_window_size_writer(&mut self, sink: impl Write) -> Result<usize, Error> {
581        match self {
582            Self::Ring(s) => s.buffer.drain_to_window_size_writer(sink),
583            Self::Flat(s) => s.buffer.drain_to_window_size_writer(sink),
584        }
585    }
586
587    fn buffer_can_drain(&self) -> usize {
588        match self {
589            Self::Ring(s) => s.buffer.can_drain(),
590            Self::Flat(s) => s.buffer.can_drain(),
591        }
592    }
593
594    fn buffer_can_drain_to_window_size(&self) -> Option<usize> {
595        match self {
596            Self::Ring(s) => s.buffer.can_drain_to_window_size(),
597            Self::Flat(s) => s.buffer.can_drain_to_window_size(),
598        }
599    }
600
601    fn buffer_read(&mut self, target: &mut [u8]) -> Result<usize, Error> {
602        match self {
603            Self::Ring(s) => s.buffer.read(target),
604            Self::Flat(s) => s.buffer.read(target),
605        }
606    }
607
608    fn buffer_read_all(&mut self, target: &mut [u8]) -> Result<usize, Error> {
609        match self {
610            Self::Ring(s) => s.buffer.read_all(target),
611            Self::Flat(s) => s.buffer.read_all(target),
612        }
613    }
614
615    /// Drop visible output beyond `window_size` without producing it,
616    /// keeping the most recent `window_size` bytes available to back
617    /// future match copies. Used by `decode_blocks_partial` to bound
618    /// memory while decoding the leading (skipped) blocks into the window.
619    #[cfg(feature = "lsm")]
620    fn buffer_drop_to_window_size(&mut self) -> usize {
621        match self {
622            Self::Ring(s) => s.buffer.drop_to_window_size(),
623            Self::Flat(s) => s.buffer.drop_to_window_size(),
624        }
625    }
626
627    /// Drop exactly `n` bytes from the front of the visible output without
628    /// producing them. Used by `decode_blocks_partial` to discard the
629    /// leading blocks' window-context bytes once the in-range blocks are
630    /// decoded (match resolution complete), leaving only the in-range output.
631    #[cfg(feature = "lsm")]
632    fn buffer_discard_front(&mut self, n: usize) {
633        match self {
634            Self::Ring(s) => s.buffer.discard_front(n),
635            Self::Flat(s) => s.buffer.discard_front(n),
636        }
637    }
638
639    /// Prime the match window with the caller's already-decompressed tail for
640    /// a resumed partial decode. Routes through whichever backend the current
641    /// scratch holds. See [`DecodeBuffer::prime_window`].
642    #[cfg(feature = "lsm")]
643    fn prime_window(&mut self, prefix: &[u8], total_output: u64) {
644        match self {
645            Self::Ring(s) => s.buffer.prime_window(prefix, total_output),
646            Self::Flat(s) => s.buffer.prime_window(prefix, total_output),
647        }
648    }
649
650    /// Total decompressed bytes produced so far (the buffer's running output
651    /// counter, unaffected by window drops / drains). Used to stamp a captured
652    /// [`ResumeState`]'s `output_offset`.
653    #[cfg(feature = "lsm")]
654    fn total_output(&self) -> u64 {
655        match self {
656            Self::Ring(s) => s.buffer.total_output(),
657            Self::Flat(s) => s.buffer.total_output(),
658        }
659    }
660
661    /// Clone the cross-block entropy/repcode state (FSE + Huffman tables +
662    /// `offset_hist`) out of the live scratch for a [`ResumeState`] snapshot.
663    #[cfg(feature = "lsm")]
664    fn export_entropy(
665        &self,
666    ) -> (
667        crate::decoding::scratch::FSEScratch,
668        crate::decoding::scratch::HuffmanScratch,
669        [u32; 3],
670    ) {
671        let (fse_src, huf_src, offset_hist) = match self {
672            Self::Ring(s) => (&s.fse, &s.huf, s.offset_hist),
673            Self::Flat(s) => (&s.fse, &s.huf, s.offset_hist),
674        };
675        let mut fse = crate::decoding::scratch::FSEScratch::new();
676        fse.reinit_from(fse_src);
677        let mut huf = crate::decoding::scratch::HuffmanScratch::new();
678        huf.reinit_resolved_from(huf_src);
679        (fse, huf, offset_hist)
680    }
681
682    /// Install entropy/repcode state from a [`ResumeState`] into the live
683    /// scratch so a `Repeat_Mode` / treeless resume block resolves against the
684    /// same tables a contiguous decode would have carried over.
685    #[cfg(feature = "lsm")]
686    fn restore_entropy(&mut self, state: &ResumeState) {
687        match self {
688            Self::Ring(s) => {
689                s.fse.reinit_from(&state.fse);
690                s.huf.reinit_resolved_from(&state.huf);
691                s.offset_hist = state.offset_hist;
692            }
693            Self::Flat(s) => {
694                s.fse.reinit_from(&state.fse);
695                s.huf.reinit_resolved_from(&state.huf);
696                s.offset_hist = state.offset_hist;
697            }
698        }
699    }
700
701    /// XXH64 of the window-prime bytes for a [`ResumeState`]: the last
702    /// `min(window_size, buffer_len)` bytes of the current buffer, which at emit
703    /// time are exactly the match-window context the resume block will see.
704    /// Wrap-aware via `last_n_as_slices` — streaming XXH64 over the two slices
705    /// equals a single hash over the contiguous `window_prime` at resume.
706    #[cfg(all(feature = "lsm", feature = "hash"))]
707    fn window_tail_hash(&self, window_size: usize) -> u64 {
708        use core::hash::Hasher;
709        let n = core::cmp::min(window_size, self.buffer_len());
710        let (s1, s2) = self.last_n_as_slices(n);
711        let mut h = twox_hash::XxHash64::with_seed(0);
712        h.write(s1);
713        h.write(s2);
714        h.finish()
715    }
716
717    fn decode_block_content<R: Read>(
718        &mut self,
719        decoder: &mut BlockDecoder,
720        header: &crate::blocks::block::BlockHeader,
721        source: R,
722    ) -> Result<u64, DecodeBlockContentError> {
723        match self {
724            Self::Ring(s) => decoder.decode_block_content(header, s, source),
725            Self::Flat(s) => decoder.decode_block_content(header, s, source),
726        }
727    }
728
729    #[cfg(feature = "hash")]
730    fn hash_finish(&self) -> u64 {
731        use core::hash::Hasher;
732        match self {
733            Self::Ring(s) => s.buffer.hash.finish(),
734            Self::Flat(s) => s.buffer.hash.finish(),
735        }
736    }
737
738    /// Forward the drain-time hash toggle to the inner `DecodeBuffer`
739    /// (streaming path). Called by the frame layer from the decoder's
740    /// `ContentChecksum` mode before each decode.
741    #[cfg(feature = "hash")]
742    fn set_compute_hash(&mut self, compute: bool) {
743        match self {
744            Self::Ring(s) => s.buffer.set_compute_hash(compute),
745            Self::Flat(s) => s.buffer.set_compute_hash(compute),
746        }
747    }
748}
749
750struct FrameDecoderState {
751    pub frame_header: frame::FrameHeader,
752    decoder_scratch: DecoderScratchKind,
753    frame_finished: bool,
754    block_counter: usize,
755    bytes_read_counter: u64,
756    check_sum: Option<u32>,
757    using_dict: Option<u32>,
758}
759
760pub enum BlockDecodingStrategy {
761    All,
762    UptoBlocks(usize),
763    UptoBytes(usize),
764}
765
766/// Outcome of [`FrameDecoder::decode_blocks_partial`]: the decompressed
767/// bytes of the requested inner-block range plus where (if anywhere)
768/// decoding stopped early.
769///
770/// Behind the `lsm` Cargo feature.
771#[cfg(feature = "lsm")]
772#[derive(Debug)]
773pub struct PartialDecode {
774    /// Decompressed bytes of the in-range blocks actually decoded, in
775    /// frame order, as one contiguous buffer. `data.len()` equals the sum
776    /// of the decompressed sizes of blocks `start_block .. start_block +
777    /// blocks_decoded`.
778    pub data: alloc::vec::Vec<u8>,
779    /// First block whose output is in [`data`](Self::data): the requested
780    /// `start_block` on a fresh decode, or [`ResumeState::block_index`] when
781    /// resuming (the caller-supplied `start_block` is ignored in resume mode).
782    pub start_block: u32,
783    /// Number of in-range blocks successfully decoded into
784    /// [`data`](Self::data).
785    pub blocks_decoded: u32,
786    /// `Some((block_index, error))` if decoding stopped on a failing block
787    /// before reaching `end_block` (a corrupt block inside the range, or a
788    /// leading block needed for window context). `None` if the requested
789    /// range decoded cleanly or the frame's last block was reached first.
790    ///
791    /// When the failing block is a leading context block
792    /// (`block_index < start_block`), the in-range window could not be
793    /// built so [`data`](Self::data) is empty and `blocks_decoded` is 0.
794    pub stopped_at: Option<(u32, FrameDecoderError)>,
795    /// `true` if the frame's last block was reached during this decode.
796    pub frame_finished: bool,
797    /// Cross-block carry-over state for resuming the next extent. Feed it back
798    /// (with the matching `window_prime`) via the `resume` argument of a
799    /// later [`FrameDecoder::decode_blocks_partial`] to continue from
800    /// [`ResumeState::block_index`] without re-decompressing the prefix.
801    ///
802    /// `None` in two cases: emission was not requested (`emit_resume = false`),
803    /// OR this decode reached the frame's last block ([`frame_finished`] is
804    /// `true`) — there is no following block to resume from, so no snapshot is
805    /// emitted even with `emit_resume = true`. Callers walking a frame
806    /// incrementally should therefore stop when `frame_finished` is set rather
807    /// than treat a `None` here as "emission disabled".
808    ///
809    /// [`frame_finished`]: Self::frame_finished
810    pub resume_state: Option<ResumeState>,
811}
812
813impl FrameDecoderState {
814    /// Window size to actually reserve for this frame's decode buffer.
815    /// A declared content size caps the useful window: matches can never
816    /// reference further back than the bytes that will ever exist, so an
817    /// encoder-declared window above the FCS (e.g. a level-preset window
818    /// on a smaller input) must not inflate the reservation. Every
819    /// `reserve_buffer` site routes through this so the cap is uniform
820    /// across `decode_all_impl`, `decode_blocks`, and the partial path.
821    fn useful_window_size(&self) -> usize {
822        let window_size = self.frame_header.window_size().unwrap_or(0);
823        if self.frame_header.fcs_declared() {
824            window_size.min(self.frame_header.frame_content_size()) as usize
825        } else {
826            window_size as usize
827        }
828    }
829
830    /// Construct a new frame decoder state, reading the frame header
831    /// from `source`. When `magicless` is `true`, the 4-byte magic
832    /// number prefix is NOT consumed (donor `ZSTD_f_zstd1_magicless`).
833    /// Crate-internal — reached only via `FrameDecoder::init` /
834    /// `FrameDecoder::init_with_dict_handle`. The decode buffer is
835    /// allocated lazily on BOTH backends (`new_ring` and `new_flat`):
836    /// direct-eligible frames pay zero buffer allocation, and the
837    /// non-direct fallback reserves `window_size` once in
838    /// `decode_all_impl` / `decode_blocks` via `reserve_buffer` before
839    /// any block write.
840    pub(crate) fn new_with_format(
841        source: impl Read,
842        magicless: bool,
843    ) -> Result<FrameDecoderState, FrameDecoderError> {
844        let (frame, header_size) = frame::read_frame_header_with_format(source, magicless)?;
845        let window_size = frame.window_size()?;
846
847        if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
848            return Err(FrameDecoderError::WindowSizeTooBig {
849                requested: window_size,
850            });
851        }
852
853        let decoder_scratch = if frame.descriptor.single_segment_flag() {
854            DecoderScratchKind::new_flat(window_size as usize)
855        } else {
856            DecoderScratchKind::new_ring(window_size as usize)
857        };
858        Ok(FrameDecoderState {
859            frame_header: frame,
860            frame_finished: false,
861            block_counter: 0,
862            decoder_scratch,
863            bytes_read_counter: u64::from(header_size),
864            check_sum: None,
865            using_dict: None,
866        })
867    }
868
869    /// Reset this state for a new frame read from `source`, reusing
870    /// existing allocations. When `magicless` is `true`, the frame
871    /// header is read WITHOUT expecting a magic-number prefix
872    /// (donor `ZSTD_f_zstd1_magicless`). Crate-internal — reached
873    /// only via `FrameDecoder::reset`.
874    ///
875    /// `DecodeBuffer::reset` no longer reserves window_size for either
876    /// backend — capacity decisions live one layer up. Both backends are
877    /// lazy: direct-eligible frames pay zero backing-buffer allocation
878    /// here (they write through `UserSliceBackend`), and the non-direct
879    /// path is pre-reserved by `decode_all_impl` / `decode_blocks` via
880    /// `DecoderScratchKind::reserve_buffer(window_size)` before any block
881    /// write. A reused scratch whose new frame fits within prior capacity
882    /// reuses it; a larger one grows on that same `reserve_buffer` call.
883    pub(crate) fn reset_with_format(
884        &mut self,
885        source: impl Read,
886        magicless: bool,
887    ) -> Result<(), FrameDecoderError> {
888        let (frame_header, header_size) = frame::read_frame_header_with_format(source, magicless)?;
889        let window_size = frame_header.window_size()?;
890
891        if window_size > MAXIMUM_ALLOWED_WINDOW_SIZE {
892            return Err(FrameDecoderError::WindowSizeTooBig {
893                requested: window_size,
894            });
895        }
896
897        self.decoder_scratch
898            .reset(&frame_header, window_size as usize);
899        self.frame_header = frame_header;
900        self.frame_finished = false;
901        self.block_counter = 0;
902        self.bytes_read_counter = u64::from(header_size);
903        self.check_sum = None;
904        self.using_dict = None;
905        Ok(())
906    }
907}
908
909impl Default for FrameDecoder {
910    fn default() -> Self {
911        Self::new()
912    }
913}
914
915impl FrameDecoder {
916    /// This will create a new decoder without allocating anything yet.
917    /// init()/reset() will allocate all needed buffers if it is the first time this decoder is used
918    /// else they just reset these buffers with not further allocations
919    pub fn new() -> FrameDecoder {
920        FrameDecoder {
921            state: None,
922            #[cfg(test)]
923            direct_frames: 0,
924            owned_dicts: BTreeMap::new(),
925            #[cfg(target_has_atomic = "ptr")]
926            shared_dicts: BTreeMap::new(),
927            #[cfg(not(target_has_atomic = "ptr"))]
928            shared_dicts: (),
929            magicless: false,
930            content_checksum: ContentChecksum::EmitOnly,
931            #[cfg(feature = "lsm")]
932            expect_dict_id: None,
933            #[cfg(feature = "lsm")]
934            expect_window_descriptor: None,
935            #[cfg(all(feature = "lsm", feature = "hash"))]
936            per_block_checksums_enabled: false,
937            #[cfg(all(feature = "lsm", feature = "hash"))]
938            computed_block_checksums: alloc::vec::Vec::new(),
939        }
940    }
941
942    /// Heap bytes currently held by the decoder's lazily-grown workspace:
943    /// the decode-window buffer plus the per-block literal/content buffers
944    /// and the entropy tables. Returns 0 before the first frame is initialised
945    /// (no workspace allocated yet). The window allocation dominates and grows
946    /// with the frame's window size; this is the value to track for decode-time
947    /// memory pressure, mirroring the workspace term of upstream
948    /// `ZSTD_sizeof_DCtx`. Shared dictionaries (ref-counted handles) are not
949    /// counted, matching upstream excluding `refDDict` memory.
950    pub fn workspace_size(&self) -> usize {
951        self.state
952            .as_ref()
953            .map_or(0, |s| s.decoder_scratch.workspace_bytes())
954    }
955
956    /// Select how the frame's optional content checksum is handled
957    /// (compute, expose, verify, or skip). See [`ContentChecksum`].
958    /// Default [`ContentChecksum::EmitOnly`]. Takes effect on the next
959    /// decode; safe to call between frames on a reused decoder.
960    pub fn set_content_checksum(&mut self, mode: ContentChecksum) {
961        self.content_checksum = mode;
962    }
963
964    /// Opt in to per-block XXH64 verification during decode.
965    /// Default off; zero cost when disabled. Each block's decompressed
966    /// bytes are XXH64-hashed (low 32 bits) and appended to
967    /// [`Self::computed_block_checksums`] as the decode progresses.
968    /// Callers compare the captured digests against externally-stored
969    /// expected values (e.g. from a per-block sidecar in the
970    /// containing application protocol).
971    ///
972    /// Behind `all(feature = "lsm", feature = "hash")` — the XXH64
973    /// primitive lives behind the `hash` feature, so this method
974    /// only compiles when both are enabled.
975    #[cfg(all(feature = "lsm", feature = "hash"))]
976    pub fn enable_per_block_checksums(&mut self) {
977        self.per_block_checksums_enabled = true;
978    }
979
980    /// Per-block XXH64 (low 32 bits) digests captured during the
981    /// current frame's decode. Empty unless
982    /// [`Self::enable_per_block_checksums`] was called before
983    /// [`Self::decode_all`] / [`Self::reset`].
984    ///
985    /// Reset at the start of every new frame.
986    ///
987    /// Behind `all(feature = "lsm", feature = "hash")`.
988    #[cfg(all(feature = "lsm", feature = "hash"))]
989    pub fn computed_block_checksums(&self) -> &[u32] {
990        &self.computed_block_checksums
991    }
992
993    /// Pin the expected `Dictionary_ID` for the next frame.
994    ///
995    /// When `expected` is set, [`Self::init`] / [`Self::reset`]
996    /// validate it against the parsed frame header BEFORE any
997    /// block decode work runs. A mismatch returns
998    /// [`crate::decoding::errors::FrameDecoderError::UnexpectedDictId`]
999    /// before any block decode and before any output is produced.
1000    /// Scratch buffer allocation / reservation for the decode
1001    /// pipeline happens during frame-header parsing, which is
1002    /// already complete when this validation fires — the cost of
1003    /// scratch sizing is paid even on a mismatched header. The
1004    /// guarantee is "no block decode, no XXH64 init, no partial
1005    /// output", not "zero allocation".
1006    ///
1007    /// `Some(0)` is treated as "no dictionary expected": a frame
1008    /// whose header omits the optional `Dictionary_ID` field
1009    /// (flag value 0) passes the check; a frame that carries an
1010    /// explicit non-zero id fails.
1011    ///
1012    /// `None` (default) disables the check.
1013    ///
1014    /// Primary use case: post-AEAD-decrypt sanity check in
1015    /// wire-format consumers (e.g. lsm-tree's encrypted block
1016    /// format pins the `dict_id` baked into the AAD against the
1017    /// inner zstd frame's `dict_id` to defeat dict-substitution
1018    /// attacks).
1019    ///
1020    /// NOT a replacement for AEAD authentication. NOT the same
1021    /// semantic as donor `ZSTD_d_windowLogMax` (which is a
1022    /// ceiling-style limit, separate concern).
1023    #[cfg(feature = "lsm")]
1024    #[cfg_attr(docsrs, doc(cfg(feature = "lsm")))]
1025    pub fn expect_dict_id(&mut self, expected: Option<u32>) {
1026        self.expect_dict_id = expected;
1027    }
1028
1029    /// Pin the expected raw `Window_Descriptor` byte (RFC 8878
1030    /// §3.1.1.1.2 layout: `(exp << 3) | mantissa`) for the next
1031    /// frame.
1032    ///
1033    /// When `expected` is set, [`Self::init`] / [`Self::reset`]
1034    /// validate it against the parsed frame header BEFORE any
1035    /// block decode work runs. A mismatch returns
1036    /// [`crate::decoding::errors::FrameDecoderError::UnexpectedWindowDescriptor`].
1037    ///
1038    /// Single-segment frames omit the `Window_Descriptor` byte
1039    /// from the wire entirely. Setting an expectation while
1040    /// receiving a single-segment frame fails the check with
1041    /// `found: None` — there is no on-wire byte to match against,
1042    /// which is reported explicitly rather than silently passing.
1043    ///
1044    /// `None` (default) disables the check.
1045    ///
1046    /// Byte-exact equality, NOT a ceiling. Donor
1047    /// `ZSTD_d_windowLogMax` is a separate ceiling-style limit
1048    /// available through the C FFI surface; this method is for
1049    /// strict equality validation against a pinned expectation
1050    /// (e.g. lsm-tree's wire format pins the window descriptor
1051    /// from the AAD to defeat decompression-bomb-swap attacks).
1052    #[cfg(feature = "lsm")]
1053    #[cfg_attr(docsrs, doc(cfg(feature = "lsm")))]
1054    pub fn expect_window_descriptor(&mut self, expected: Option<u8>) {
1055        self.expect_window_descriptor = expected;
1056    }
1057
1058    /// Validate the just-parsed frame header against any pinned
1059    /// expectations set via [`Self::expect_dict_id`] /
1060    /// [`Self::expect_window_descriptor`].
1061    ///
1062    /// Returns the typed error variant on mismatch and leaves
1063    /// `self.state` in a re-resettable shape — a subsequent
1064    /// `reset()` will overwrite `frame_header` from the new source
1065    /// without needing intermediate cleanup.
1066    #[cfg(feature = "lsm")]
1067    fn validate_expectations(
1068        &self,
1069        frame_header: &frame::FrameHeader,
1070    ) -> Result<(), FrameDecoderError> {
1071        if let Some(expected) = self.expect_dict_id {
1072            let found = frame_header.dictionary_id();
1073            // `Some(0)` is the "no dictionary expected" sentinel —
1074            // matches a frame whose header omits the optional
1075            // dict_id field (which is reported as `None` by the
1076            // parser). All other values must match exactly.
1077            let matches = match (expected, found) {
1078                (0, None) => true,
1079                (e, Some(f)) => e == f,
1080                _ => false,
1081            };
1082            if !matches {
1083                return Err(FrameDecoderError::UnexpectedDictId {
1084                    expected: Some(expected),
1085                    found,
1086                });
1087            }
1088        }
1089        if let Some(expected) = self.expect_window_descriptor {
1090            let found = frame_header.window_descriptor();
1091            if found != Some(expected) {
1092                return Err(FrameDecoderError::UnexpectedWindowDescriptor { expected, found });
1093            }
1094        }
1095        Ok(())
1096    }
1097
1098    /// Enable or disable magicless frame format
1099    /// (`ZSTD_f_zstd1_magicless`). When set to `true`, subsequent
1100    /// [`init`] / [`reset`] calls expect the frame header to begin
1101    /// directly with the frame-header descriptor — no 4-byte magic
1102    /// number prefix. Default false. Must match the encoder's
1103    /// magicless setting; the format is unambiguous only when the
1104    /// caller knows it out-of-band.
1105    ///
1106    /// Note: magicless mode also disables skippable-frame detection.
1107    /// The `0x184D2A50..=0x184D2A5F` skippable-frame magic range is
1108    /// only recognised when the 4-byte magic prefix is consumed, so
1109    /// `decode_all` / `init` / `reset` will treat a skippable frame
1110    /// at the head of a magicless stream as a malformed frame header
1111    /// (bad descriptor / window-size error) instead of skipping it.
1112    /// Mixed-format streams that interleave skippable frames must be
1113    /// pre-split by the caller; `set_magicless(true)` is only safe
1114    /// when the entire stream is known to be magicless zstd frames.
1115    pub fn set_magicless(&mut self, magicless: bool) {
1116        self.magicless = magicless;
1117    }
1118
1119    #[cfg(target_has_atomic = "ptr")]
1120    fn shared_dict_exists(&self, dict_id: u32) -> bool {
1121        self.shared_dicts.contains_key(&dict_id)
1122    }
1123
1124    #[cfg(not(target_has_atomic = "ptr"))]
1125    fn shared_dict_exists(&self, _dict_id: u32) -> bool {
1126        false
1127    }
1128
1129    fn validate_registered_dictionary(dict: &Dictionary) -> Result<(), FrameDecoderError> {
1130        use crate::decoding::errors::DictionaryDecodeError as dict_err;
1131
1132        if dict.id == 0 {
1133            return Err(FrameDecoderError::from(dict_err::ZeroDictionaryId));
1134        }
1135        if let Some(index) = dict.offset_hist.iter().position(|&rep| rep == 0) {
1136            return Err(FrameDecoderError::from(
1137                dict_err::ZeroRepeatOffsetInDictionary { index: index as u8 },
1138            ));
1139        }
1140        Ok(())
1141    }
1142
1143    /// init() will allocate all needed buffers if it is the first time this decoder is used
1144    /// else they just reset these buffers with not further allocations
1145    ///
1146    /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
1147    ///
1148    /// equivalent to reset()
1149    pub fn init(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
1150        self.reset(source)
1151    }
1152
1153    /// Initialize the decoder for a new frame using a pre-parsed dictionary handle.
1154    ///
1155    /// If the frame header has a dictionary ID, this validates it against
1156    /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
1157    ///
1158    /// If the header omits the optional dictionary ID, this still applies the
1159    /// provided dictionary handle.
1160    ///
1161    /// # Warning
1162    ///
1163    /// This method always applies `dict` unless the frame header contains a
1164    /// non-matching dictionary ID. Callers must only use this API when they
1165    /// already know the frame was encoded with the provided dictionary, even if
1166    /// the frame header omits the dictionary ID or encodes an explicit
1167    /// dictionary ID of `0`.
1168    ///
1169    /// Passing a dictionary for a frame that was not encoded with it can
1170    /// silently corrupt the decoded output.
1171    pub fn init_with_dict_handle(
1172        &mut self,
1173        source: impl Read,
1174        dict: &DictionaryHandle,
1175    ) -> Result<(), FrameDecoderError> {
1176        self.reset_with_dict_handle(source, dict)
1177    }
1178
1179    /// reset() will allocate all needed buffers if it is the first time this decoder is used
1180    /// else they just reset these buffers with not further allocations
1181    ///
1182    /// Note that all bytes currently in the decodebuffer from any previous frame will be lost. Collect them with collect()/collect_to_writer()
1183    ///
1184    /// equivalent to init()
1185    pub fn reset(&mut self, source: impl Read) -> Result<(), FrameDecoderError> {
1186        use FrameDecoderError as err;
1187        // Fresh frame → start with an empty per-block checksum vec so
1188        // the values for the next frame don't carry over from the
1189        // previous one.
1190        #[cfg(all(feature = "lsm", feature = "hash"))]
1191        self.computed_block_checksums.clear();
1192        let magicless = self.magicless;
1193        let dict_id = match &mut self.state {
1194            Some(s) => {
1195                s.reset_with_format(source, magicless)?;
1196                s.frame_header.dictionary_id()
1197            }
1198            None => {
1199                self.state = Some(FrameDecoderState::new_with_format(source, magicless)?);
1200                self.state
1201                    .as_ref()
1202                    .and_then(|state| state.frame_header.dictionary_id())
1203            }
1204        };
1205        // Validate any pinned expectations BEFORE block decode work
1206        // runs. Catches dict_id substitution / window-descriptor
1207        // tampering on inputs already authenticated by an outer
1208        // layer (e.g. AEAD). Returning here leaves `self.state` in
1209        // a re-resettable shape — next `reset()` re-parses the
1210        // frame header without intermediate cleanup.
1211        #[cfg(feature = "lsm")]
1212        if let Some(state) = self.state.as_ref() {
1213            self.validate_expectations(&state.frame_header)?;
1214        }
1215        if let Some(dict_id) = dict_id {
1216            let state = self.state.as_mut().expect("state initialized");
1217            let owned_dicts = &self.owned_dicts;
1218            #[cfg(target_has_atomic = "ptr")]
1219            let shared_dicts = &self.shared_dicts;
1220            let dict = owned_dicts
1221                .get(&dict_id)
1222                .or_else(|| {
1223                    #[cfg(target_has_atomic = "ptr")]
1224                    {
1225                        shared_dicts.get(&dict_id)
1226                    }
1227                    #[cfg(not(target_has_atomic = "ptr"))]
1228                    {
1229                        None
1230                    }
1231                })
1232                .ok_or(err::DictNotProvided { dict_id })?;
1233            state.decoder_scratch.init_from_dict(dict);
1234            state.using_dict = Some(dict_id);
1235        }
1236        Ok(())
1237    }
1238
1239    /// Reset this decoder for a new frame using a pre-parsed dictionary handle.
1240    ///
1241    /// If the frame header has a dictionary ID, this validates it against
1242    /// `dict.id()` and returns [`FrameDecoderError::DictIdMismatch`] on mismatch.
1243    ///
1244    /// If the header omits the optional dictionary ID, this still applies the
1245    /// provided dictionary handle.
1246    ///
1247    /// # Warning
1248    ///
1249    /// This method always applies `dict` unless the frame header contains a
1250    /// non-matching dictionary ID. Callers must only use this API when they
1251    /// already know the frame was encoded with the provided dictionary, even if
1252    /// the frame header omits the dictionary ID or encodes an explicit
1253    /// dictionary ID of `0`.
1254    ///
1255    /// Passing a dictionary for a frame that was not encoded with it can
1256    /// silently corrupt the decoded output.
1257    pub fn reset_with_dict_handle(
1258        &mut self,
1259        source: impl Read,
1260        dict: &DictionaryHandle,
1261    ) -> Result<(), FrameDecoderError> {
1262        use FrameDecoderError as err;
1263        // Fresh frame → drop the previous frame's per-block checksum
1264        // digests so the next decode starts with an empty vec.
1265        // Mirrors the same clear in `reset()`; reset_with_dict_handle
1266        // is a parallel entry point so it needs its own call.
1267        #[cfg(all(feature = "lsm", feature = "hash"))]
1268        self.computed_block_checksums.clear();
1269        Self::validate_registered_dictionary(dict.as_dict())?;
1270        let magicless = self.magicless;
1271        // Scope the &mut borrow of `self.state` to the header parse
1272        // alone, so the subsequent `validate_expectations(&self, ...)`
1273        // call below can take a fresh shared borrow of self without
1274        // tripping the borrow checker.
1275        match &mut self.state {
1276            Some(s) => s.reset_with_format(source, magicless)?,
1277            None => {
1278                self.state = Some(FrameDecoderState::new_with_format(source, magicless)?);
1279            }
1280        }
1281        // Single source of truth: route through the same
1282        // `validate_expectations` used by `reset()`. Routing through
1283        // the helper keeps the two code paths from drifting (e.g.,
1284        // if expect-semantics or error wiring changes later).
1285        #[cfg(feature = "lsm")]
1286        {
1287            let header = &self
1288                .state
1289                .as_ref()
1290                .expect("state populated by reset_with_format/new_with_format")
1291                .frame_header;
1292            self.validate_expectations(header)?;
1293        }
1294        let state = self
1295            .state
1296            .as_mut()
1297            .expect("state populated by reset_with_format/new_with_format");
1298        if let Some(dict_id) = state.frame_header.dictionary_id()
1299            && dict_id != dict.id()
1300        {
1301            return Err(err::DictIdMismatch {
1302                expected: dict_id,
1303                provided: dict.id(),
1304            });
1305        }
1306        state.decoder_scratch.init_from_dict(dict);
1307        state.using_dict = Some(dict.id());
1308        Ok(())
1309    }
1310
1311    /// Add a dictionary that can be selected dynamically by frame dictionary ID.
1312    ///
1313    /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
1314    /// registered (either as owned or shared).
1315    pub fn add_dict(&mut self, dict: Dictionary) -> Result<(), FrameDecoderError> {
1316        Self::validate_registered_dictionary(&dict)?;
1317        let dict_id = dict.id;
1318        if self.owned_dicts.contains_key(&dict_id) || self.shared_dict_exists(dict_id) {
1319            return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
1320        }
1321        self.owned_dicts
1322            .insert(dict_id, DictionaryHandle::from_dictionary(dict));
1323        Ok(())
1324    }
1325
1326    /// Parse and add a serialized dictionary blob.
1327    pub fn add_dict_from_bytes(&mut self, raw_dictionary: &[u8]) -> Result<(), FrameDecoderError> {
1328        let dict = Dictionary::decode_dict(raw_dictionary)?;
1329        self.add_dict(dict)
1330    }
1331
1332    /// Add a pre-parsed dictionary handle for reuse across decoders.
1333    ///
1334    /// This API is available on targets with pointer-width atomics
1335    /// (`target_has_atomic = "ptr"`).
1336    ///
1337    /// Returns [`FrameDecoderError::DictAlreadyRegistered`] if the ID is already
1338    /// registered (either as owned or shared).
1339    #[cfg(target_has_atomic = "ptr")]
1340    pub fn add_dict_handle(&mut self, dict: DictionaryHandle) -> Result<(), FrameDecoderError> {
1341        Self::validate_registered_dictionary(dict.as_dict())?;
1342        let dict_id = dict.id();
1343        if self.owned_dicts.contains_key(&dict_id) || self.shared_dicts.contains_key(&dict_id) {
1344            return Err(FrameDecoderError::DictAlreadyRegistered { dict_id });
1345        }
1346        self.shared_dicts.insert(dict_id, dict);
1347        Ok(())
1348    }
1349
1350    pub fn force_dict(&mut self, dict_id: u32) -> Result<(), FrameDecoderError> {
1351        use FrameDecoderError as err;
1352        let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
1353        let owned_dicts = &self.owned_dicts;
1354        #[cfg(target_has_atomic = "ptr")]
1355        let shared_dicts = &self.shared_dicts;
1356
1357        let dict = owned_dicts
1358            .get(&dict_id)
1359            .or_else(|| {
1360                #[cfg(target_has_atomic = "ptr")]
1361                {
1362                    shared_dicts.get(&dict_id)
1363                }
1364                #[cfg(not(target_has_atomic = "ptr"))]
1365                {
1366                    None
1367                }
1368            })
1369            .ok_or(err::DictNotProvided { dict_id })?;
1370        state.decoder_scratch.init_from_dict(dict);
1371        state.using_dict = Some(dict_id);
1372
1373        Ok(())
1374    }
1375
1376    /// Returns how many bytes the frame contains after decompression
1377    pub fn content_size(&self) -> u64 {
1378        match &self.state {
1379            None => 0,
1380            Some(s) => s.frame_header.frame_content_size(),
1381        }
1382    }
1383
1384    /// Returns the checksum that was read from the data. Only available after all bytes have been read. It is the last 4 bytes of a zstd-frame
1385    pub fn get_checksum_from_data(&self) -> Option<u32> {
1386        let state = self.state.as_ref()?;
1387
1388        state.check_sum
1389    }
1390
1391    /// Returns the checksum that was calculated while decoding.
1392    /// Only a sensible value after all decoded bytes have been collected/read from the FrameDecoder.
1393    /// Returns `None` when the frame header has `content_checksum_flag = 0`:
1394    /// no hash is computed for such frames (the post-decode XXH64 pass was a
1395    /// 63 % decode-wall hotspot on flag-off frames; skipping it when the
1396    /// frame format declares no trailing digest avoids that wasted work).
1397    #[cfg(feature = "hash")]
1398    pub fn get_calculated_checksum(&self) -> Option<u32> {
1399        let state = self.state.as_ref()?;
1400        // `ContentChecksum::None` skips the XXH64 pass entirely, so there is
1401        // no calculated digest to report.
1402        if self.content_checksum == ContentChecksum::None {
1403            return None;
1404        }
1405        if !state.frame_header.descriptor.content_checksum_flag() {
1406            return None;
1407        }
1408        let cksum_64bit = state.decoder_scratch.hash_finish();
1409        //truncate to lower 32bit because reasons...
1410        Some(cksum_64bit as u32)
1411    }
1412
1413    /// Compare the frame's stored content checksum against the digest the
1414    /// decoder computed, returning [`FrameDecoderError::ChecksumMismatch`] on
1415    /// disagreement. No-op unless the mode is [`ContentChecksum::Verify`] and
1416    /// the frame carries a trailing checksum.
1417    ///
1418    /// [`decode_all`](Self::decode_all) and the streaming reader call this
1419    /// automatically. Callers driving [`decode_blocks`](Self::decode_blocks)
1420    /// directly invoke it themselves once per frame, after the frame is fully
1421    /// decoded AND fully drained (e.g. via [`collect`](Self::collect)), so both
1422    /// the stored value and the running digest are final.
1423    #[cfg(feature = "hash")]
1424    pub fn verify_content_checksum(&self) -> Result<(), FrameDecoderError> {
1425        if self.content_checksum != ContentChecksum::Verify {
1426            return Ok(());
1427        }
1428        let Some(state) = self.state.as_ref() else {
1429            return Ok(());
1430        };
1431        if !state.frame_header.descriptor.content_checksum_flag() {
1432            return Ok(());
1433        }
1434        let Some(expected) = state.check_sum else {
1435            return Ok(());
1436        };
1437        let calculated = state.decoder_scratch.hash_finish() as u32;
1438        if expected != calculated {
1439            return Err(FrameDecoderError::ChecksumMismatch {
1440                expected,
1441                calculated,
1442            });
1443        }
1444        Ok(())
1445    }
1446
1447    /// Counter for how many bytes have been consumed while decoding the frame
1448    pub fn bytes_read_from_source(&self) -> u64 {
1449        let state = match &self.state {
1450            None => return 0,
1451            Some(s) => s,
1452        };
1453        state.bytes_read_counter
1454    }
1455
1456    /// Whether the current frames last block has been decoded yet
1457    /// If this returns true you can call the drain* functions to get all content
1458    /// (the read() function will drain automatically if this returns true)
1459    pub fn is_finished(&self) -> bool {
1460        let state = match &self.state {
1461            None => return true,
1462            Some(s) => s,
1463        };
1464        if state.frame_header.descriptor.content_checksum_flag() {
1465            state.frame_finished && state.check_sum.is_some()
1466        } else {
1467            state.frame_finished
1468        }
1469    }
1470
1471    /// Counter for how many blocks have already been decoded
1472    pub fn blocks_decoded(&self) -> usize {
1473        let state = match &self.state {
1474            None => return 0,
1475            Some(s) => s,
1476        };
1477        state.block_counter
1478    }
1479
1480    /// Decodes blocks from a reader. It requires that the framedecoder has been initialized first.
1481    /// The Strategy influences how many blocks will be decoded before the function returns
1482    /// This is important if you want to manage memory consumption carefully. If you don't care
1483    /// about that you can just choose the strategy "All" and have all blocks of the frame decoded into the buffer
1484    pub fn decode_blocks(
1485        &mut self,
1486        mut source: impl Read,
1487        strat: BlockDecodingStrategy,
1488    ) -> Result<bool, FrameDecoderError> {
1489        use FrameDecoderError as err;
1490        // Apply the content-checksum mode to the streaming drain hash before
1491        // any block decodes into the ring. Hash only when a digest is both
1492        // wanted (mode != None) AND present in the frame (content_checksum_flag
1493        // set) — a flag-off frame has nothing to verify or expose, so hashing
1494        // it is wasted work. Mirrors the direct path and get_calculated_checksum.
1495        #[cfg(feature = "hash")]
1496        let checksum_mode = self.content_checksum;
1497        let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
1498        #[cfg(feature = "hash")]
1499        {
1500            let compute_hash = checksum_mode != ContentChecksum::None
1501                && state.frame_header.descriptor.content_checksum_flag();
1502            state.decoder_scratch.set_compute_hash(compute_hash);
1503        }
1504
1505        // Streaming entry point: pre-reserve the backing buffer to
1506        // the FCS-capped window so multi-block frames don't pay repeated
1507        // `reserve_amortized` grow steps (128 KiB → 256 KiB → ... →
1508        // window) as blocks accumulate. `decode_all` does the same up
1509        // front in `decode_all_impl`; this mirrors it for callers
1510        // driving `decode_blocks` directly. Idempotent — the
1511        // backend's `reserve` early-returns when capacity is already
1512        // sufficient.
1513        let useful_window = state.useful_window_size();
1514        state.decoder_scratch.reserve_buffer(useful_window);
1515
1516        let mut block_dec = decoding::block_decoder::new();
1517
1518        let buffer_size_before = state.decoder_scratch.buffer_len();
1519        let block_counter_before = state.block_counter;
1520        loop {
1521            vprintln!("################");
1522            vprintln!("Next Block: {}", state.block_counter);
1523            vprintln!("################");
1524            // Capture the failing-block coordinates BEFORE the header read so
1525            // the error carries where it happened: `bytes_read_counter` is the
1526            // frame-absolute offset of this block's header (not yet advanced),
1527            // `block_counter` its 0-based index. Used by both the header- and
1528            // body-error builders below (block-precise recovery under `lsm`).
1529            let block_index = state.block_counter as u32;
1530            let block_frame_offset = state.bytes_read_counter as u32;
1531            let (block_header, block_header_size) =
1532                block_dec.read_block_header(&mut source).map_err(|source| {
1533                    block_header_decode_error(source, block_index, block_frame_offset)
1534                })?;
1535            state.bytes_read_counter += u64::from(block_header_size);
1536
1537            vprintln!();
1538            vprintln!(
1539                "Found {} block with size: {}, which will be of size: {}",
1540                block_header.block_type,
1541                block_header.content_size,
1542                block_header.decompressed_size
1543            );
1544
1545            #[cfg(all(feature = "lsm", feature = "hash"))]
1546            let len_before_block: Option<usize> = if self.per_block_checksums_enabled {
1547                Some(state.decoder_scratch.buffer_len())
1548            } else {
1549                None
1550            };
1551            let bytes_read_in_block_body = state
1552                .decoder_scratch
1553                .decode_block_content(&mut block_dec, &block_header, &mut source)
1554                .map_err(|source| {
1555                    block_body_decode_error(
1556                        source,
1557                        block_index,
1558                        block_frame_offset,
1559                        &block_header,
1560                        block_header_size,
1561                    )
1562                })?;
1563            state.bytes_read_counter += bytes_read_in_block_body;
1564
1565            // Per-block XXH64 (low 32 bits) of the just-decompressed
1566            // bytes. Hashed from `last_n_as_slices` so RingBuffer wrap
1567            // is handled in-place, no extra copy.
1568            #[cfg(all(feature = "lsm", feature = "hash"))]
1569            if let Some(len_before_block) = len_before_block {
1570                let added = state.decoder_scratch.buffer_len() - len_before_block;
1571                let (s1, s2) = state.decoder_scratch.last_n_as_slices(added);
1572                let mut h = twox_hash::XxHash64::with_seed(0);
1573                use core::hash::Hasher;
1574                h.write(s1);
1575                h.write(s2);
1576                self.computed_block_checksums.push(h.finish() as u32);
1577            }
1578
1579            state.block_counter += 1;
1580
1581            vprintln!("Output: {}", state.decoder_scratch.buffer_len());
1582
1583            if block_header.last_block {
1584                state.frame_finished = true;
1585                if state.frame_header.descriptor.content_checksum_flag() {
1586                    let mut chksum = [0u8; 4];
1587                    source
1588                        .read_exact(&mut chksum)
1589                        .map_err(err::FailedToReadChecksum)?;
1590                    state.bytes_read_counter += 4;
1591                    let chksum = u32::from_le_bytes(chksum);
1592                    state.check_sum = Some(chksum);
1593                }
1594                break;
1595            }
1596
1597            match strat {
1598                BlockDecodingStrategy::All => { /* keep going */ }
1599                BlockDecodingStrategy::UptoBlocks(n) => {
1600                    if state.block_counter - block_counter_before >= n {
1601                        break;
1602                    }
1603                }
1604                BlockDecodingStrategy::UptoBytes(n) => {
1605                    if state.decoder_scratch.buffer_len() - buffer_size_before >= n {
1606                        break;
1607                    }
1608                }
1609            }
1610        }
1611
1612        Ok(state.frame_finished)
1613    }
1614
1615    /// Decode the inner blocks `[start_block, end_block)` of the current
1616    /// frame and return their decompressed bytes as one contiguous buffer.
1617    ///
1618    /// Serves two consumer needs with one call:
1619    ///
1620    /// - **Range-query performance:** decode only the inner zstd blocks that
1621    ///   cover a key range instead of the whole frame. Blocks before
1622    ///   `start_block` are decoded into the window (zstd blocks share one
1623    ///   window, so a leading block's bytes may be the match source for an
1624    ///   in-range block and cannot simply be skipped) but their output is not
1625    ///   returned; blocks at or after `end_block` are not decoded at all,
1626    ///   which is the trailing-block work saving. Map a decompressed byte
1627    ///   offset to a block index with
1628    ///   [`FrameEmitInfo::decompressed_byte_range`].
1629    /// - **Best-effort recovery:** if a block decode fails, decoding stops,
1630    ///   the clean prefix of in-range output is preserved in
1631    ///   [`PartialDecode::data`], and the failure is reported via
1632    ///   [`PartialDecode::stopped_at`]. Passing `(0, u32::MAX)` decodes the
1633    ///   whole frame, stopping at the first corrupt block (pure recovery).
1634    ///
1635    /// `end_block` is exclusive; pass `u32::MAX` to decode to the end of the
1636    /// frame. Call on a freshly [`reset`](Self::reset) decoder (it decodes
1637    /// from the frame's first block).
1638    ///
1639    /// # Resume (cold incremental / top-up)
1640    ///
1641    /// A plain call drains its in-range output from the match window on return,
1642    /// so two consecutive calls cannot resume one another and growing a decoded
1643    /// extent would mean re-decoding the covering prefix from block 0
1644    /// (`O(extent)` per growth, `O(N²)` for a forward walk). The `resume` /
1645    /// `emit_resume` arguments make a symmetric one-call grow-loop possible:
1646    ///
1647    /// - `emit_resume = true` captures the cross-block carry-over state (entropy
1648    ///   tables + repcode history + the next block index / output offset) into
1649    ///   [`PartialDecode::resume_state`]. The entropy-table snapshot clone is
1650    ///   only paid when this is set. The snapshot is `None` when the decode
1651    ///   reaches the frame's last block ([`PartialDecode::frame_finished`]):
1652    ///   there is no following block to resume from, so an incremental walk
1653    ///   stops on `frame_finished` rather than on a `None` snapshot.
1654    /// - `resume = Some(`[`ResumeInput`]`)` continues from a previously emitted
1655    ///   [`ResumeState`] WITHOUT re-decompressing the preceding blocks: the
1656    ///   match window is primed from [`ResumeInput::window_prime`] and the
1657    ///   entropy/repcode tables are restored from the state, so a `Repeat_Mode`
1658    ///   resume block resolves byte-identically to a contiguous decode — even
1659    ///   across a dropped (cold) decoder.
1660    ///
1661    /// When `resume` is `Some`, decoding resumes at
1662    /// [`ResumeState::block_index`] and the `start_block` argument is ignored
1663    /// (pass `resume.state.block_index()`); position `source` at that block's
1664    /// compressed frame offset
1665    /// ([`FrameEmitInfo::blocks`]`[block_index].offset_in_frame`). After a
1666    /// resumed call, [`bytes_read_from_source`](Self::bytes_read_from_source)
1667    /// and any `stopped_at` offsets are relative to the repositioned `source`.
1668    ///
1669    /// **Dictionaries:** [`ResumeState`] does NOT carry the dictionary content.
1670    /// For a dictionary frame, attach the dictionary to the resuming decoder the
1671    /// same way as for a fresh decode — [`reset`](Self::reset) with the
1672    /// dictionary registered (or
1673    /// [`reset_with_dict_handle`](Self::reset_with_dict_handle)) BEFORE this
1674    /// call — so dict-sourced matches near the frame start resolve. The caller
1675    /// already holds the dictionary (it supplied it at encode time), so
1676    /// re-supplying it on resume is free; storing it in the snapshot would only
1677    /// duplicate it. The resume guard records the applied dictionary's identity
1678    /// and rejects ([`FrameDecoderError::ResumeFrameMismatch`]) a resume whose
1679    /// active dictionary differs from the one the snapshot was captured under.
1680    ///
1681    /// # Errors
1682    ///
1683    /// Returns [`FrameDecoderError::NotYetInitialized`] if the decoder has not
1684    /// been reset, [`FrameDecoderError::InvalidBlockRange`] if the effective
1685    /// start exceeds `end_block`, [`FrameDecoderError::ResumeWindowTooShort`]
1686    /// if `resume`'s `window_prime` is shorter than the match window the resume
1687    /// block can reach back into (`min(window_size, output_offset)`), and
1688    /// [`FrameDecoderError::ResumeFrameMismatch`] if the snapshot was captured
1689    /// from a frame with a different decode shape / dictionary, or (with the
1690    /// `hash` feature) a `window_prime` whose content does not match what was
1691    /// captured — all rejected up front rather than silently mis-resolving
1692    /// matches. A corrupt block is NOT an `Err` here: it is reported via
1693    /// [`PartialDecode::stopped_at`] so the clean prefix survives.
1694    ///
1695    /// [`FrameEmitInfo::decompressed_byte_range`]: crate::encoding::frame_emit_info::FrameEmitInfo::decompressed_byte_range
1696    /// [`FrameEmitInfo::blocks`]: crate::encoding::frame_emit_info::FrameEmitInfo::blocks
1697    #[cfg(feature = "lsm")]
1698    #[cfg_attr(docsrs, doc(cfg(feature = "lsm")))]
1699    pub fn decode_blocks_partial(
1700        &mut self,
1701        mut source: impl Read,
1702        start_block: u32,
1703        end_block: u32,
1704        resume: Option<ResumeInput<'_>>,
1705        emit_resume: bool,
1706    ) -> Result<PartialDecode, FrameDecoderError> {
1707        use FrameDecoderError as err;
1708        #[cfg(feature = "hash")]
1709        let checksum_mode = self.content_checksum;
1710        let magicless = self.magicless;
1711        let state = self.state.as_mut().ok_or(err::NotYetInitialized)?;
1712
1713        // Honor the checksum mode before any drain/read can hash: `None` must
1714        // compute no XXH64. `decode_blocks` sets this; the partial path must too,
1715        // or a reused scratch keeps hashing with the default-enabled state.
1716        #[cfg(feature = "hash")]
1717        {
1718            let compute_hash = checksum_mode != ContentChecksum::None
1719                && state.frame_header.descriptor.content_checksum_flag();
1720            state.decoder_scratch.set_compute_hash(compute_hash);
1721        }
1722
1723        // Mirror `decode_blocks`: pre-reserve the backing buffer to the
1724        // FCS-capped window so multi-block frames don't pay repeated grow
1725        // steps. The RAW frame window stays separately bound — the resume
1726        // logic below bounds match reach by the frame's window semantics,
1727        // not by the (possibly smaller) reservation cap.
1728        let window_size = state.frame_header.window_size().unwrap_or(0) as usize;
1729        let useful_window = state.useful_window_size();
1730        state.decoder_scratch.reserve_buffer(useful_window);
1731
1732        // Cold resume: prime the match window + restore entropy/repcode state +
1733        // advance the block cursor BEFORE the loop, so the first in-range block
1734        // resolves its matches and `Repeat_Mode` tables against the caller's
1735        // persisted state instead of re-decoded prefix blocks. The effective
1736        // start is the resume state's block index (the passed `start_block` is
1737        // ignored in resume mode, per the doc).
1738        let effective_start = if let Some(r) = resume {
1739            // Reject a snapshot captured from a different frame shape BEFORE
1740            // touching any decoder state: restoring entropy/repcode tables that
1741            // belong to another frame would silently produce byte-wrong output.
1742            let current_key = FrameKey::from_state(state, magicless);
1743            if current_key != r.state.frame_key {
1744                return Err(err::ResumeFrameMismatch);
1745            }
1746            let output_offset = r.state.output_offset;
1747            // The window the resume block can reach back into is bounded by the
1748            // smaller of the frame's window_size and the bytes produced so far.
1749            let required = core::cmp::min(window_size as u64, output_offset) as usize;
1750            if r.window_prime.len() < required {
1751                return Err(err::ResumeWindowTooShort {
1752                    got: r.window_prime.len(),
1753                    need: required,
1754                });
1755            }
1756            // Only the most recent `window_size` bytes can ever back a match
1757            // (offset <= window_size by the frame invariant); load just those
1758            // even if the caller handed us a longer prefix, bounding resume
1759            // memory to one window regardless of the skipped prefix's size.
1760            let prime = if r.window_prime.len() > window_size {
1761                &r.window_prime[r.window_prime.len() - window_size..]
1762            } else {
1763                r.window_prime
1764            };
1765            // Content-exact identity: the primed window must hash to what was
1766            // captured at emit. Catches a same-shape-but-different-frame
1767            // snapshot and a wrong/corrupted window_prime (which FrameKey alone
1768            // cannot), before any state is restored. O(window) one-time per
1769            // resume — negligible next to the decode it guards.
1770            #[cfg(feature = "hash")]
1771            if xxh64_of(prime) != r.state.window_hash {
1772                return Err(err::ResumeFrameMismatch);
1773            }
1774            // Validate the effective range (resume mode begins at the resume
1775            // block, ignoring the caller's `start_block`) BEFORE mutating the
1776            // decoder: an inverted `end_block` must fail without priming the
1777            // window / entropy or advancing the cursor, leaving the decoder
1778            // re-resettable rather than in a half-resumed state.
1779            let effective_start = r.state.block_index;
1780            if effective_start > end_block {
1781                return Err(err::InvalidBlockRange {
1782                    start_block: effective_start,
1783                    end_block,
1784                });
1785            }
1786            state.decoder_scratch.restore_entropy(r.state);
1787            state.decoder_scratch.prime_window(prime, output_offset);
1788            state.block_counter = effective_start as usize;
1789            // The caller repositions `source` to the resume block; report
1790            // consumed bytes relative to that point (reset left this at the
1791            // frame-header size).
1792            state.bytes_read_counter = 0;
1793            effective_start
1794        } else {
1795            // Fresh decode: validate the caller's range (no state to mutate).
1796            if start_block > end_block {
1797                return Err(err::InvalidBlockRange {
1798                    start_block,
1799                    end_block,
1800                });
1801            }
1802            start_block
1803        };
1804
1805        let mut block_dec = decoding::block_decoder::new();
1806
1807        // Bytes of prefix-window output that physically precede the first
1808        // in-range block in the buffer. Captured at the prefix → in-range
1809        // transition (after leading blocks were dropped to the window) so we
1810        // can discard exactly those bytes once decoding is done. `None` until
1811        // the first in-range block is reached.
1812        let mut prefix_window_len: Option<usize> = None;
1813        // Exact count of clean in-range decompressed bytes (sum of per-block
1814        // length deltas of the in-range blocks that succeeded). Any partial
1815        // bytes of a failing in-range block are excluded — the fused executor
1816        // rolls the buffer back to the pre-block checkpoint on a sequence
1817        // error, and anything left over is never counted here, so it is not
1818        // drained into `data`.
1819        let mut subset_bytes: u64 = 0;
1820        let mut blocks_decoded: u32 = 0;
1821        let mut stopped_at: Option<(u32, FrameDecoderError)> = None;
1822
1823        loop {
1824            let block_index = state.block_counter as u32;
1825            // Stop before decoding `end_block`: the trailing blocks are never
1826            // touched (the perf win), and the frame's tail is left unread.
1827            if block_index >= end_block || state.frame_finished {
1828                break;
1829            }
1830            let in_range = block_index >= effective_start;
1831            // Snapshot the window length at the prefix → in-range boundary.
1832            if in_range && prefix_window_len.is_none() {
1833                prefix_window_len = Some(state.decoder_scratch.buffer_len());
1834            }
1835
1836            let block_frame_offset = state.bytes_read_counter as u32;
1837            let (block_header, block_header_size) = match block_dec.read_block_header(&mut source) {
1838                Ok(v) => v,
1839                Err(e) => {
1840                    stopped_at = Some((
1841                        block_index,
1842                        block_header_decode_error(e, block_index, block_frame_offset),
1843                    ));
1844                    break;
1845                }
1846            };
1847            state.bytes_read_counter += u64::from(block_header_size);
1848
1849            let len_before = state.decoder_scratch.buffer_len();
1850            match state.decoder_scratch.decode_block_content(
1851                &mut block_dec,
1852                &block_header,
1853                &mut source,
1854            ) {
1855                Ok(body_read) => state.bytes_read_counter += body_read,
1856                Err(e) => {
1857                    stopped_at = Some((
1858                        block_index,
1859                        block_body_decode_error(
1860                            e,
1861                            block_index,
1862                            block_frame_offset,
1863                            &block_header,
1864                            block_header_size,
1865                        ),
1866                    ));
1867                    break;
1868                }
1869            }
1870            let produced = state.decoder_scratch.buffer_len() - len_before;
1871            // Per-block XXH64 capture, mirroring `decode_blocks`: hash this
1872            // block's just-decoded bytes BEFORE any window drop so the digest
1873            // count stays 1:1 with the blocks decoded on this path too. Covers
1874            // context (out-of-range) blocks as well, matching `decode_blocks`
1875            // which hashes every block it decodes.
1876            #[cfg(all(feature = "lsm", feature = "hash"))]
1877            if self.per_block_checksums_enabled {
1878                use core::hash::Hasher;
1879                let (s1, s2) = state.decoder_scratch.last_n_as_slices(produced);
1880                let mut h = twox_hash::XxHash64::with_seed(0);
1881                h.write(s1);
1882                h.write(s2);
1883                self.computed_block_checksums.push(h.finish() as u32);
1884            }
1885            state.block_counter += 1;
1886            if in_range {
1887                subset_bytes += produced as u64;
1888                blocks_decoded += 1;
1889            }
1890
1891            if block_header.last_block {
1892                state.frame_finished = true;
1893                if state.frame_header.descriptor.content_checksum_flag() {
1894                    let mut chksum = [0u8; 4];
1895                    match source.read_exact(&mut chksum) {
1896                        Ok(()) => {
1897                            state.bytes_read_counter += 4;
1898                            state.check_sum = Some(u32::from_le_bytes(chksum));
1899                        }
1900                        // A trailing-checksum read failure does not invalidate
1901                        // the decoded bytes; surface it so the caller knows the
1902                        // frame tail was truncated, but keep `data`.
1903                        Err(e) => {
1904                            stopped_at = Some((block_index, err::FailedToReadChecksum(e)));
1905                        }
1906                    }
1907                }
1908                break;
1909            }
1910
1911            // Leading (out-of-range) block: bound memory to the window. We
1912            // must NOT drop once in-range, or the in-range output we are about
1913            // to return would be discarded.
1914            if !in_range {
1915                state.decoder_scratch.buffer_drop_to_window_size();
1916            }
1917        }
1918
1919        // Emit cross-block carry-over state for a later resume, if requested.
1920        // Captured AFTER the loop (entropy tables / repcode history are final)
1921        // but BEFORE the drain — the drain only touches the visible output, not
1922        // the entropy state or `total_output_counter`. `block_counter` /
1923        // `total_output()` give the resume coordinates: the next block to decode
1924        // and the cumulative decompressed offset before it (clean even after an
1925        // early stop, since a failed block rolls both back to its checkpoint).
1926        // Suppress the snapshot on the terminal block: `block_counter` is then
1927        // one past the last block (EOF), for which there is no next-block source
1928        // position to resume from. A resume needs a real following block.
1929        let resume_state = if emit_resume && !state.frame_finished {
1930            let (fse, huf, offset_hist) = state.decoder_scratch.export_entropy();
1931            Some(ResumeState {
1932                frame_key: FrameKey::from_state(state, magicless),
1933                block_index: state.block_counter as u32,
1934                output_offset: state.decoder_scratch.total_output(),
1935                fse,
1936                huf,
1937                offset_hist,
1938                #[cfg(feature = "hash")]
1939                window_hash: state.decoder_scratch.window_tail_hash(window_size),
1940            })
1941        } else {
1942            None
1943        };
1944
1945        // The visible buffer is now `[prefix window][in-range clean][maybe
1946        // trailing garbage from a failed in-range block]`. Drop the prefix
1947        // window from the front (match resolution is complete, so it is no
1948        // longer needed), then drain exactly the clean in-range byte count.
1949        let w = prefix_window_len.unwrap_or(0);
1950        state.decoder_scratch.buffer_discard_front(w);
1951        let mut data = alloc::vec![0u8; subset_bytes as usize];
1952        state
1953            .decoder_scratch
1954            .buffer_read_all(&mut data)
1955            .map_err(err::FailedToDrainDecodebuffer)?;
1956
1957        // Clear anything still buffered so a later `read()`/`collect()` on this
1958        // decoder cannot surface out-of-range bytes: the leading-block window
1959        // when no in-range block was reached (`prefix_window_len` stayed
1960        // `None`, so `w` was 0), or trailing garbage from a failed in-range
1961        // block. Only the returned `data` is the partial decode's output.
1962        let residual = state.decoder_scratch.buffer_len();
1963        state.decoder_scratch.buffer_discard_front(residual);
1964
1965        Ok(PartialDecode {
1966            data,
1967            start_block: effective_start,
1968            blocks_decoded,
1969            stopped_at,
1970            frame_finished: state.frame_finished,
1971            resume_state,
1972        })
1973    }
1974
1975    /// Collect bytes and retain window_size bytes while decoding is still going on.
1976    /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
1977    pub fn collect(&mut self) -> Option<Vec<u8>> {
1978        let finished = self.is_finished();
1979        let state = self.state.as_mut()?;
1980        if finished {
1981            Some(state.decoder_scratch.buffer_drain())
1982        } else {
1983            state.decoder_scratch.buffer_drain_to_window_size()
1984        }
1985    }
1986
1987    /// Collect bytes and retain window_size bytes while decoding is still going on.
1988    /// After decoding of the frame (is_finished() == true) has finished it will collect all remaining bytes
1989    pub fn collect_to_writer(&mut self, w: impl Write) -> Result<usize, Error> {
1990        let finished = self.is_finished();
1991        let state = match &mut self.state {
1992            None => return Ok(0),
1993            Some(s) => s,
1994        };
1995        if finished {
1996            state.decoder_scratch.buffer_drain_to_writer(w)
1997        } else {
1998            state.decoder_scratch.buffer_drain_to_window_size_writer(w)
1999        }
2000    }
2001
2002    /// How many bytes can currently be collected from the decodebuffer, while decoding is going on this will be lower than the actual decodbuffer size
2003    /// because window_size bytes need to be retained for decoding.
2004    /// After decoding of the frame (is_finished() == true) has finished it will report all remaining bytes
2005    pub fn can_collect(&self) -> usize {
2006        let finished = self.is_finished();
2007        let state = match &self.state {
2008            None => return 0,
2009            Some(s) => s,
2010        };
2011        if finished {
2012            state.decoder_scratch.buffer_can_drain()
2013        } else {
2014            state
2015                .decoder_scratch
2016                .buffer_can_drain_to_window_size()
2017                .unwrap_or(0)
2018        }
2019    }
2020
2021    /// Decodes as many blocks as possible from the source slice and reads from the decodebuffer into the target slice
2022    /// The source slice may contain only parts of a frame but must contain at least one full block to make progress
2023    ///
2024    /// By all means use decode_blocks if you have a io.Reader available. This is just for compatibility with other decompressors
2025    /// which try to serve an old-style c api
2026    ///
2027    /// Returns (read, written), if read == 0 then the source did not contain a full block and further calls with the same
2028    /// input will not make any progress!
2029    ///
2030    /// Note that no kind of block can be bigger than 128kb.
2031    /// So to be safe use at least 128*1024 (max block content size) + 3 (block_header size) + 18 (max frame_header size) bytes as your source buffer
2032    ///
2033    /// You may call this function with an empty source after all bytes have been decoded. This is equivalent to just call decoder.read(&mut target)
2034    pub fn decode_from_to(
2035        &mut self,
2036        source: &[u8],
2037        target: &mut [u8],
2038    ) -> Result<(usize, usize), FrameDecoderError> {
2039        use FrameDecoderError as err;
2040        let bytes_read_at_start = match &self.state {
2041            Some(s) => s.bytes_read_counter,
2042            None => 0,
2043        };
2044
2045        if !self.is_finished() || self.state.is_none() {
2046            let mut mt_source = source;
2047
2048            if self.state.is_none() {
2049                self.init(&mut mt_source)?;
2050            }
2051
2052            //pseudo block to scope "state" so we can borrow self again after the block
2053            {
2054                let state = match &mut self.state {
2055                    Some(s) => s,
2056                    None => panic!("Bug in library"),
2057                };
2058                let mut block_dec = decoding::block_decoder::new();
2059
2060                // Honour the content-checksum mode on this hand-rolled decode
2061                // loop (it does not go through `decode_blocks`): hash only when
2062                // a digest is wanted and the frame carries one. `None` skips the
2063                // XXH64 pass; verification happens after the final drain below.
2064                #[cfg(feature = "hash")]
2065                {
2066                    let compute_hash = self.content_checksum != ContentChecksum::None
2067                        && state.frame_header.descriptor.content_checksum_flag();
2068                    state.decoder_scratch.set_compute_hash(compute_hash);
2069                }
2070
2071                if state.frame_header.descriptor.content_checksum_flag()
2072                    && state.frame_finished
2073                    && state.check_sum.is_none()
2074                {
2075                    // The trailing checksum arrived on a separate call (the last
2076                    // block finished earlier). Consume it and fall through to the
2077                    // shared `self.read` + post-drain verify below — NOT an early
2078                    // return — so any output still buffered from a prior
2079                    // small-`target` call is flushed on this call too, and the
2080                    // checksum is verified through the one shared path.
2081                    if mt_source.len() >= 4 {
2082                        let chksum = mt_source[..4].try_into().expect("optimized away");
2083                        state.bytes_read_counter += 4;
2084                        let chksum = u32::from_le_bytes(chksum);
2085                        state.check_sum = Some(chksum);
2086                        mt_source = &mt_source[4..];
2087                    }
2088                }
2089
2090                loop {
2091                    // The frame is fully decoded (last block seen, trailer
2092                    // consumed above); no more blocks to read. Any leftover
2093                    // bytes are not a block header — stop before misreading them.
2094                    if state.frame_finished {
2095                        break;
2096                    }
2097                    //check if there are enough bytes for the next header
2098                    if mt_source.len() < 3 {
2099                        break;
2100                    }
2101                    let block_index = state.block_counter as u32;
2102                    let block_frame_offset = state.bytes_read_counter as u32;
2103                    let (block_header, block_header_size) = block_dec
2104                        .read_block_header(&mut mt_source)
2105                        .map_err(|source| {
2106                            block_header_decode_error(source, block_index, block_frame_offset)
2107                        })?;
2108
2109                    // check the needed size for the block before updating counters.
2110                    // If not enough bytes are in the source, the header will have to be read again, so act like we never read it in the first place
2111                    if mt_source.len() < block_header.content_size as usize {
2112                        break;
2113                    }
2114                    state.bytes_read_counter += u64::from(block_header_size);
2115
2116                    let bytes_read_in_block_body = state
2117                        .decoder_scratch
2118                        .decode_block_content(&mut block_dec, &block_header, &mut mt_source)
2119                        .map_err(|source| {
2120                            block_body_decode_error(
2121                                source,
2122                                block_index,
2123                                block_frame_offset,
2124                                &block_header,
2125                                block_header_size,
2126                            )
2127                        })?;
2128                    state.bytes_read_counter += bytes_read_in_block_body;
2129                    state.block_counter += 1;
2130
2131                    if block_header.last_block {
2132                        state.frame_finished = true;
2133                        if state.frame_header.descriptor.content_checksum_flag() {
2134                            //if there are enough bytes handle this here. Else the block at the start of this function will handle it at the next call
2135                            if mt_source.len() >= 4 {
2136                                let chksum = mt_source[..4].try_into().expect("optimized away");
2137                                state.bytes_read_counter += 4;
2138                                let chksum = u32::from_le_bytes(chksum);
2139                                state.check_sum = Some(chksum);
2140                            }
2141                        }
2142                        break;
2143                    }
2144                }
2145            }
2146        }
2147
2148        let result_len = self.read(target).map_err(err::FailedToDrainDecodebuffer)?;
2149        // Once the frame is fully decoded and drained, the running digest is
2150        // final: validate it in `Verify` mode (no-op otherwise). Same finish
2151        // point as the streaming reader.
2152        #[cfg(feature = "hash")]
2153        if self.is_finished() && self.can_collect() == 0 {
2154            self.verify_content_checksum()?;
2155        }
2156        let bytes_read_at_end = match &mut self.state {
2157            Some(s) => s.bytes_read_counter,
2158            None => panic!("Bug in library"),
2159        };
2160        let read_len = bytes_read_at_end - bytes_read_at_start;
2161        Ok((read_len as usize, result_len))
2162    }
2163
2164    /// Decode multiple frames into the output slice.
2165    ///
2166    /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
2167    /// skipped during decode.
2168    ///
2169    /// `output` must be large enough to hold the decompressed data. If you don't know
2170    /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
2171    ///
2172    /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
2173    ///
2174    /// Returns the number of bytes written to `output`.
2175    pub fn decode_all(
2176        &mut self,
2177        input: &[u8],
2178        output: &mut [u8],
2179    ) -> Result<usize, FrameDecoderError> {
2180        #[cfg(not(feature = "lsm"))]
2181        {
2182            self.decode_all_impl(input, output, |this, src| this.init(src))
2183        }
2184        #[cfg(feature = "lsm")]
2185        {
2186            self.decode_all_impl(input, output, |this, src| this.init(src), None)
2187        }
2188    }
2189
2190    /// Decode multiple frames into the output slice, invoking `visitor`
2191    /// for every skippable frame encountered before advancing past it.
2192    ///
2193    /// `input` must contain an exact number of frames. Skippable frames
2194    /// (RFC 8878 §3.1.2 magic numbers `0x184D2A50..=0x184D2A5F`) are
2195    /// allowed and will be both visited AND skipped: the visitor gets
2196    /// `(magic_variant, payload)` where `magic_variant` is the low
2197    /// nibble of the magic (`magic - 0x184D2A50`, range `0..=15`) and
2198    /// `payload` is a borrowed slice of the on-wire payload bytes (the
2199    /// skippable frame's `Frame_Size` field worth of data) into
2200    /// `input` — no allocation.
2201    ///
2202    /// The visitor sees skippable frames in stream order; interleaved
2203    /// regular zstd frames continue to decompress into `output` exactly
2204    /// as `decode_all` does.
2205    ///
2206    /// `output` must be large enough to hold the decompressed data.
2207    /// Returns the number of bytes written to `output`.
2208    ///
2209    /// # Example
2210    ///
2211    /// ```ignore
2212    /// use structured_zstd::decoding::FrameDecoder;
2213    ///
2214    /// let mut decoder = FrameDecoder::new();
2215    /// let mut output = vec![0u8; 1024];
2216    /// let mut collected: Vec<(u8, Vec<u8>)> = Vec::new();
2217    /// let n = decoder.decode_all_with_skippable_visitor(
2218    ///     input,
2219    ///     &mut output,
2220    ///     |variant, payload| collected.push((variant, payload.to_vec())),
2221    /// )?;
2222    /// ```
2223    #[cfg(feature = "lsm")]
2224    #[cfg_attr(docsrs, doc(cfg(feature = "lsm")))]
2225    pub fn decode_all_with_skippable_visitor<F>(
2226        &mut self,
2227        input: &[u8],
2228        output: &mut [u8],
2229        mut visitor: F,
2230    ) -> Result<usize, FrameDecoderError>
2231    where
2232        F: FnMut(u8, &[u8]),
2233    {
2234        self.decode_all_impl(
2235            input,
2236            output,
2237            |this, src| this.init(src),
2238            Some(&mut visitor),
2239        )
2240    }
2241
2242    /// Decode multiple frames into the output slice using a pre-parsed dictionary handle.
2243    ///
2244    /// `input` must contain an exact number of frames. Skippable frames are allowed and will be
2245    /// skipped during decode.
2246    ///
2247    /// `output` must be large enough to hold the decompressed data. If you don't know
2248    /// how large the output will be, use [`FrameDecoder::decode_blocks`] instead.
2249    ///
2250    /// This calls [`FrameDecoder::init_with_dict_handle`], and all bytes currently in the
2251    /// decoder will be lost.
2252    ///
2253    /// # Warning
2254    ///
2255    /// Each decoded frame is initialized with `dict`, even when a frame header
2256    /// omits the optional dictionary ID. Callers must only use this API when
2257    /// they already know the input frames were encoded with the provided
2258    /// dictionary; otherwise decoded output can be silently corrupted.
2259    pub fn decode_all_with_dict_handle(
2260        &mut self,
2261        input: &[u8],
2262        output: &mut [u8],
2263        dict: &DictionaryHandle,
2264    ) -> Result<usize, FrameDecoderError> {
2265        #[cfg(not(feature = "lsm"))]
2266        {
2267            self.decode_all_impl(input, output, |this, src| {
2268                this.init_with_dict_handle(src, dict)
2269            })
2270        }
2271        #[cfg(feature = "lsm")]
2272        {
2273            self.decode_all_impl(
2274                input,
2275                output,
2276                |this, src| this.init_with_dict_handle(src, dict),
2277                None,
2278            )
2279        }
2280    }
2281
2282    /// Default-feature decode_all_impl: no visitor parameter so the
2283    /// no-lsm build's call surface and codegen are byte-identical to
2284    /// the pre-#172 implementation. Compiles only when `lsm` is OFF.
2285    #[cfg(not(feature = "lsm"))]
2286    fn decode_all_impl(
2287        &mut self,
2288        mut input: &[u8],
2289        mut output: &mut [u8],
2290        mut init_frame: impl FnMut(&mut Self, &mut &[u8]) -> Result<(), FrameDecoderError>,
2291    ) -> Result<usize, FrameDecoderError> {
2292        let mut total_bytes_written = 0;
2293        while !input.is_empty() {
2294            match init_frame(self, &mut input) {
2295                Ok(_) => {}
2296                Err(FrameDecoderError::ReadFrameHeaderError(
2297                    crate::decoding::errors::ReadFrameHeaderError::SkipFrame { length, .. },
2298                )) => {
2299                    input = input
2300                        .get(length as usize..)
2301                        .ok_or(FrameDecoderError::FailedToSkipFrame)?;
2302                    continue;
2303                }
2304                Err(e) => return Err(e),
2305            };
2306            // Per-frame direct-path dispatch. Now safe to route the
2307            // public `decode_all` here because
2308            // `UserSliceBackend::exec_sequence_inline` returns
2309            // `Result<(), ExecuteSequencesError>` instead of
2310            // panicking on capacity overflow; the error propagates
2311            // up as `FrameDecoderError`. Eligibility (FCS > 0,
2312            // remaining `output` slice holds the declared content)
2313            // puts the frame on the fast path that bypasses the
2314            // FlatBuf/Ring -> `read()` drain copy. Ineligible frames
2315            // (no FCS, output too small) fall through to the legacy
2316            // `decode_blocks` + `read` drain loop below. Dictionary
2317            // frames are eligible: `run_direct_decode` hands the
2318            // shared dict handle to its buffer, and beyond-prefix
2319            // offsets resolve through `repeat_from_dict`.
2320            let (content_size, fcs_declared) = {
2321                let state_ref = self.state.as_ref().expect("init populated state");
2322                (
2323                    state_ref.frame_header.frame_content_size(),
2324                    state_ref.frame_header.fcs_declared(),
2325                )
2326            };
2327            // Direct decode requires only that the caller slice holds the
2328            // declared content; the inline sequence-exec path no longer
2329            // needs `WILDCOPY_OVERLENGTH` trailing slack because the
2330            // trailing sequence(s) take the bounded (non-overshooting)
2331            // copy in `UserSliceBackend::exec_sequence_bounded`. This is
2332            // the universal "decode into an FCS-sized buffer" case (a
2333            // caller sizing `output` to exactly `frame_content_size`),
2334            // so dropping the slack requirement halves its peak alloc.
2335            //
2336            // Per-block checksums collected inside `run_direct_decode`
2337            // post-loop (over recorded (start, end) ranges of `output`)
2338            // so the direct path stays eligible AND keeps the
2339            // window-size cap (`drop_to_window_size`) between blocks
2340            // that the spec relies on for `offset <= window_size`
2341            // validation. Path choice no longer alters checksum
2342            // semantics.
2343            let direct_eligible = content_size > 0 && (output.len() as u64) >= content_size;
2344            if direct_eligible {
2345                let written = self.run_direct_decode(&mut input, output, content_size)?;
2346                output = &mut output[written..];
2347                total_bytes_written += written;
2348                // Per-frame content-checksum verification (no-op unless the
2349                // mode is `Verify` and the frame carries a checksum).
2350                #[cfg(feature = "hash")]
2351                self.verify_content_checksum()?;
2352                continue;
2353            }
2354            // Non-direct fallback: pre-reserve the backing buffer to
2355            // `window_size` in a single allocation before block decode
2356            // starts, so multi-segment frames don't pay repeated
2357            // `reserve_amortized` grow steps as blocks accumulate (each
2358            // block only reserves MAX_BLOCK_SIZE = 128 KiB, so a window
2359            // > 128 KiB otherwise grows through several intermediate
2360            // sizes with `alloc_zeroed + memcpy` each time).
2361            if let Some(state) = self.state.as_mut() {
2362                // FCS-capped via `useful_window_size` — the same cap
2363                // `decode_blocks` applies, so its per-iteration reserve in
2364                // the loop below cannot grow the buffer back to the raw
2365                // frame window.
2366                let useful_window = state.useful_window_size();
2367                state.decoder_scratch.reserve_buffer(useful_window);
2368            }
2369            let frame_start_total = total_bytes_written;
2370            loop {
2371                self.decode_blocks(&mut input, BlockDecodingStrategy::UptoBytes(1024 * 1024))?;
2372                let bytes_written = self
2373                    .read(output)
2374                    .map_err(FrameDecoderError::FailedToDrainDecodebuffer)?;
2375                output = &mut output[bytes_written..];
2376                total_bytes_written += bytes_written;
2377                if self.can_collect() != 0 {
2378                    return Err(FrameDecoderError::TargetTooSmall);
2379                }
2380                if self.is_finished() {
2381                    break;
2382                }
2383            }
2384            // Per-frame FCS validation on the legacy fallback path.
2385            // Use `fcs_declared()` (NOT `content_size > 0`) so an
2386            // empty frame with explicit FCS=0 on the wire still gets
2387            // validated.
2388            if fcs_declared {
2389                let produced = (total_bytes_written - frame_start_total) as u64;
2390                if produced != content_size {
2391                    return Err(FrameDecoderError::FrameContentSizeMismatch {
2392                        declared: content_size,
2393                        produced,
2394                    });
2395                }
2396            }
2397            // Per-frame content-checksum verification on the drain path: the
2398            // frame is fully decoded and drained here (is_finished + nothing
2399            // left to collect), so the running digest and stored value are
2400            // final. No-op unless the mode is `Verify`.
2401            #[cfg(feature = "hash")]
2402            self.verify_content_checksum()?;
2403        }
2404
2405        Ok(total_bytes_written)
2406    }
2407
2408    /// `lsm`-feature decode_all_impl: adds the optional skippable
2409    /// visitor parameter consumed by
2410    /// [`Self::decode_all_with_skippable_visitor`]. Mirrors the no-lsm
2411    /// variant including the direct-path dispatch + FCS-validation
2412    /// rationale comments, so the two functions stay in sync; the only
2413    /// behavioral difference is the SkipFrame arm, which uses
2414    /// `split_at(length)` (single bounds check) instead of two
2415    /// separate `get(..length)` / `get(length..)` slices and invokes
2416    /// the visitor (when `Some`) on the borrowed payload before
2417    /// advancing past it.
2418    #[cfg(feature = "lsm")]
2419    #[allow(clippy::type_complexity)]
2420    fn decode_all_impl(
2421        &mut self,
2422        mut input: &[u8],
2423        mut output: &mut [u8],
2424        mut init_frame: impl FnMut(&mut Self, &mut &[u8]) -> Result<(), FrameDecoderError>,
2425        mut skippable_visitor: Option<&mut dyn FnMut(u8, &[u8])>,
2426    ) -> Result<usize, FrameDecoderError> {
2427        let mut total_bytes_written = 0;
2428        while !input.is_empty() {
2429            match init_frame(self, &mut input) {
2430                Ok(_) => {}
2431                Err(FrameDecoderError::ReadFrameHeaderError(
2432                    crate::decoding::errors::ReadFrameHeaderError::SkipFrame {
2433                        magic_number,
2434                        length,
2435                    },
2436                )) => {
2437                    let length = length as usize;
2438                    // Visitor sees the payload slice BEFORE we advance
2439                    // past it. Borrowed slice — no allocation. The
2440                    // variant is the low nibble of the magic number
2441                    // (RFC 8878 §3.1.2). `read_frame_header` only emits
2442                    // SkipFrame for magic in 0x184D2A50..=0x184D2A5F, so
2443                    // the subtraction fits in 0..=15.
2444                    if input.len() < length {
2445                        return Err(FrameDecoderError::FailedToSkipFrame);
2446                    }
2447                    let (payload, rest) = input.split_at(length);
2448                    if let Some(visitor) = skippable_visitor.as_mut() {
2449                        let variant = (magic_number - 0x184D2A50) as u8;
2450                        visitor(variant, payload);
2451                    }
2452                    input = rest;
2453                    continue;
2454                }
2455                Err(e) => return Err(e),
2456            };
2457            // Per-frame direct-path dispatch. Now safe to route the
2458            // public `decode_all` here because
2459            // `UserSliceBackend::exec_sequence_inline` returns
2460            // `Result<(), ExecuteSequencesError>` instead of
2461            // panicking on capacity overflow; the error propagates
2462            // up as `FrameDecoderError`. Eligibility (FCS > 0,
2463            // remaining `output` slice holds the declared content)
2464            // puts the frame on the fast path that bypasses the
2465            // FlatBuf/Ring -> `read()` drain copy. Ineligible frames
2466            // (no FCS, output too small) fall through to the legacy
2467            // `decode_blocks` + `read` drain loop below. Dictionary
2468            // frames are eligible (see the no-lsm path above).
2469            let (content_size, fcs_declared) = {
2470                let state_ref = self.state.as_ref().expect("init populated state");
2471                (
2472                    state_ref.frame_header.frame_content_size(),
2473                    state_ref.frame_header.fcs_declared(),
2474                )
2475            };
2476            // Only `cap >= frame_content_size` needed; the trailing
2477            // sequence(s) take the bounded copy in
2478            // `UserSliceBackend::exec_sequence_bounded`, so no
2479            // `WILDCOPY_OVERLENGTH` trailing slack is required (see the
2480            // no-lsm path above).
2481            let direct_eligible = content_size > 0 && (output.len() as u64) >= content_size;
2482            if direct_eligible {
2483                let written = self.run_direct_decode(&mut input, output, content_size)?;
2484                output = &mut output[written..];
2485                total_bytes_written += written;
2486                // Per-frame content-checksum verification (no-op unless the
2487                // mode is `Verify` and the frame carries a checksum).
2488                #[cfg(feature = "hash")]
2489                self.verify_content_checksum()?;
2490                continue;
2491            }
2492            // Non-direct fallback: pre-reserve the backing buffer to
2493            // `window_size` once so the per-block growth cycle is
2494            // skipped (see same comment on the no-lsm path above).
2495            if let Some(state) = self.state.as_mut() {
2496                // FCS-capped via `useful_window_size` — the same cap
2497                // `decode_blocks` applies, so its per-iteration reserve in
2498                // the loop below cannot grow the buffer back to the raw
2499                // frame window.
2500                let useful_window = state.useful_window_size();
2501                state.decoder_scratch.reserve_buffer(useful_window);
2502            }
2503            let frame_start_total = total_bytes_written;
2504            loop {
2505                self.decode_blocks(&mut input, BlockDecodingStrategy::UptoBytes(1024 * 1024))?;
2506                let bytes_written = self
2507                    .read(output)
2508                    .map_err(FrameDecoderError::FailedToDrainDecodebuffer)?;
2509                output = &mut output[bytes_written..];
2510                total_bytes_written += bytes_written;
2511                if self.can_collect() != 0 {
2512                    return Err(FrameDecoderError::TargetTooSmall);
2513                }
2514                if self.is_finished() {
2515                    break;
2516                }
2517            }
2518            // Per-frame FCS validation on the legacy fallback path.
2519            // Use `fcs_declared()` (NOT `content_size > 0`) so an
2520            // empty frame with explicit FCS=0 on the wire still gets
2521            // validated.
2522            if fcs_declared {
2523                let produced = (total_bytes_written - frame_start_total) as u64;
2524                if produced != content_size {
2525                    return Err(FrameDecoderError::FrameContentSizeMismatch {
2526                        declared: content_size,
2527                        produced,
2528                    });
2529                }
2530            }
2531            // Per-frame content-checksum verification on the drain path: the
2532            // frame is fully decoded and drained here (is_finished + nothing
2533            // left to collect), so the running digest and stored value are
2534            // final. No-op unless the mode is `Verify`.
2535            #[cfg(feature = "hash")]
2536            self.verify_content_checksum()?;
2537        }
2538
2539        Ok(total_bytes_written)
2540    }
2541
2542    /// Decode multiple frames into the output slice using a serialized dictionary.
2543    ///
2544    /// # Warning
2545    ///
2546    /// Each decoded frame is initialized with the parsed dictionary, even when a
2547    /// frame header omits the optional dictionary ID. Callers must only use this
2548    /// API when they already know the input frames were encoded with that
2549    /// dictionary; otherwise decoded output can be silently corrupted.
2550    pub fn decode_all_with_dict_bytes(
2551        &mut self,
2552        input: &[u8],
2553        output: &mut [u8],
2554        raw_dictionary: &[u8],
2555    ) -> Result<usize, FrameDecoderError> {
2556        let dict = DictionaryHandle::decode_dict(raw_dictionary)?;
2557        self.decode_all_with_dict_handle(input, output, &dict)
2558    }
2559
2560    /// Decode multiple frames into the extra capacity of the output vector.
2561    ///
2562    /// `input` must contain an exact number of frames.
2563    ///
2564    /// `output` must have enough spare capacity to hold the decompressed
2565    /// data. This adds no extra slack: exact-fit output is now eligible
2566    /// for the direct decode path, so a `Vec::with_capacity(fcs)` is
2567    /// decoded straight into without a growth/reallocation. It will NOT
2568    /// grow the vector to fit the decompressed payload itself; the
2569    /// caller's pre-allocated capacity must already cover the data. If
2570    /// you don't know how large the output will be, use
2571    /// [`FrameDecoder::decode_blocks`] instead.
2572    ///
2573    /// This calls [`FrameDecoder::init`], and all bytes currently in the decoder will be lost.
2574    ///
2575    /// The length of the output vector is updated to include the
2576    /// decompressed data. The length is not changed if an error occurs.
2577    pub fn decode_all_to_vec(
2578        &mut self,
2579        input: &[u8],
2580        output: &mut Vec<u8>,
2581    ) -> Result<(), FrameDecoderError> {
2582        let len = output.len();
2583        let cap = output.capacity();
2584        output.resize(cap, 0);
2585        match self.decode_all(input, &mut output[len..]) {
2586            Ok(bytes_written) => {
2587                let new_len = core::cmp::min(len + bytes_written, cap); // Sanitizes `bytes_written`.
2588                output.resize(new_len, 0);
2589                Ok(())
2590            }
2591            Err(e) => {
2592                output.resize(len, 0);
2593                Err(e)
2594            }
2595        }
2596    }
2597
2598    /// Single-frame direct-decode path. Decodes one zstd frame into
2599    /// `output[..content_size]` via a stack-local
2600    /// `DecodeBuffer<UserSliceBackend>`, bypassing the per-block
2601    /// FlatBuf/Ring -> `read()` drain copy.
2602    ///
2603    /// # Preconditions (caller-enforced)
2604    ///
2605    /// - `self.init` (or `init_with_dict_handle`) was called for
2606    ///   this frame so `self.state` is populated.
2607    /// - `content_size` matches `self.state.frame_header
2608    ///   .frame_content_size()` and is `> 0` (caller already passed
2609    ///   the eligibility gate).
2610    /// - `output.len() >= content_size`. No `WILDCOPY_OVERLENGTH`
2611    ///   trailing slack is required: the trailing sequence(s) take the
2612    ///   bounded (non-overshooting) copy in
2613    ///   [`UserSliceBackend::exec_sequence_bounded`].
2614    ///
2615    /// Dictionary frames are supported: the scratch buffer's shared
2616    /// dict handle is forwarded to the stack-local `DecodeBuffer`, so
2617    /// offsets reaching past the frame's own output resolve through
2618    /// `repeat_from_dict` (the ext-dict slow path).
2619    ///
2620    /// On return, `input` points at the byte immediately after the
2621    /// frame's checksum (or after the last block, when the frame
2622    /// has `content_checksum_flag = 0`). `self.state.frame_finished`
2623    /// is set so [`Self::is_finished`] reports `true`.
2624    fn run_direct_decode(
2625        &mut self,
2626        input: &mut &[u8],
2627        output: &mut [u8],
2628        content_size: u64,
2629    ) -> Result<usize, FrameDecoderError> {
2630        #[cfg(test)]
2631        {
2632            self.direct_frames += 1;
2633        }
2634        use super::block_decoder;
2635        use super::decode_buffer::DecodeBuffer;
2636        use super::scratch::DirectScratch;
2637        use super::user_slice_buf::UserSliceBackend;
2638        use crate::io::Read;
2639        use FrameDecoderError as err;
2640
2641        let state = self
2642            .state
2643            .as_mut()
2644            .expect("caller ensures init populated state");
2645
2646        // Borrow persistent fields out of whichever scratch variant
2647        // `init` produced (Flat for single_segment, Ring for
2648        // multi-segment) — both expose the same HUF/FSE/Vec
2649        // fields; only `buffer` differs and we don't use that here.
2650        // Macro-style binding avoids the closure / generic
2651        // gymnastics of returning multiple `&mut` from a match arm.
2652        let (huf, fse, offset_hist, literals_buffer, block_content_buffer, window_size, dict) =
2653            match &mut state.decoder_scratch {
2654                DecoderScratchKind::Flat(s) => (
2655                    &mut s.huf,
2656                    &mut s.fse,
2657                    &mut s.offset_hist,
2658                    &mut s.literals_buffer,
2659                    &mut s.block_content_buffer,
2660                    s.buffer.window_size,
2661                    s.buffer.dict.clone(),
2662                ),
2663                DecoderScratchKind::Ring(s) => (
2664                    &mut s.huf,
2665                    &mut s.fse,
2666                    &mut s.offset_hist,
2667                    &mut s.literals_buffer,
2668                    &mut s.block_content_buffer,
2669                    s.buffer.window_size,
2670                    s.buffer.dict.clone(),
2671                ),
2672            };
2673        let backend = UserSliceBackend::from_slice(output);
2674        let mut buffer = DecodeBuffer::from_backend(backend, window_size);
2675        // Dictionary matches on the direct path: hand the shared handle
2676        // (refcount bump, no copy) to the stack-local buffer so offsets
2677        // reaching past the frame's own output resolve through
2678        // `repeat_from_dict` — the same ext-dict slow path the
2679        // FlatBuf/Ring backends use. The per-sequence hot path is
2680        // untouched: the inline-exec dispatch already routes
2681        // beyond-prefix offsets to the cold `repeat()` fallback.
2682        if let Some(handle) = dict {
2683            buffer.set_dict(handle);
2684        }
2685        let mut direct = DirectScratch {
2686            huf,
2687            fse,
2688            offset_hist,
2689            literals_buffer,
2690            block_content_buffer,
2691            buffer,
2692        };
2693
2694        // Block loop. Mirrors `decode_blocks` (without the
2695        // strategy-bounded early exit — we always decode the whole
2696        // frame in one shot for the direct path). Keeps
2697        // `state.bytes_read_counter` / `state.block_counter` in
2698        // sync with `decode_blocks` so post-call accessors
2699        // (`bytes_read_from_source`, `blocks_decoded`) return
2700        // accurate values.
2701        let mut block_dec = block_decoder::new();
2702        // Track total output bytes against the declared
2703        // `frame_content_size` via the buffer's actual write
2704        // counter — `BlockHeader.decompressed_size` is 0 for
2705        // Compressed blocks (the header parser can't know the
2706        // expanded size before decoding the body), so per-header
2707        // tracking would always count 0 for those blocks and
2708        // miscount frames that aren't pure Raw/RLE.
2709        let mut produced: u64 = 0;
2710        // Per-block output ranges captured during the direct-path
2711        // loop. After the loop we re-borrow `output` (post-drop of
2712        // `direct`) and XXH64 each range into
2713        // `self.computed_block_checksums`, so the digests vector
2714        // stays consistent with the legacy `decode_blocks` path
2715        // regardless of which dispatch the frame took.
2716        // `Vec::new()` does not allocate, so this stays free when
2717        // `per_block_checksums_enabled` is false: the `push` and the
2718        // post-loop hashing loop are both gated by the same flag.
2719        #[cfg(all(feature = "lsm", feature = "hash"))]
2720        let mut block_ranges: alloc::vec::Vec<(usize, usize)> = alloc::vec::Vec::new();
2721        // Frame-level XXH64, accumulated PER BLOCK right after each block
2722        // decodes — the bytes are still cache-resident then. The previous
2723        // shape hashed the whole output once after the loop, which re-read
2724        // the entire frame cold: a full extra memory pass that the
2725        // reference implementation does not make (it hashes incrementally
2726        // per block). Invisible on outputs that fit L3, ~1.14x wall on a
2727        // 100 MiB all-raw decode and the dominant CI gap on
2728        // bandwidth-limited hosts.
2729        #[cfg(feature = "hash")]
2730        let mut running_hash: Option<twox_hash::XxHash64> =
2731            if state.frame_header.descriptor.content_checksum_flag()
2732                && self.content_checksum != ContentChecksum::None
2733            {
2734                Some(twox_hash::XxHash64::with_seed(0))
2735            } else {
2736                None
2737            };
2738        loop {
2739            #[cfg(all(feature = "lsm", feature = "hash"))]
2740            let produced_before: Option<usize> = if self.per_block_checksums_enabled {
2741                Some(produced as usize)
2742            } else {
2743                None
2744            };
2745            // Failing-block coordinates captured before the header read (see
2746            // the `decode_blocks` loop for the rationale).
2747            let block_index = state.block_counter as u32;
2748            let block_frame_offset = state.bytes_read_counter as u32;
2749            let (block_header, hsize) =
2750                block_dec.read_block_header(&mut *input).map_err(|source| {
2751                    block_header_decode_error(source, block_index, block_frame_offset)
2752                })?;
2753            state.bytes_read_counter += u64::from(hsize);
2754            // Pre-flight FCS check ONLY for Raw / RLE blocks where
2755            // `decompressed_size` is the actual block output size.
2756            // For Compressed blocks the header field is 0; the
2757            // post-decode check below catches overflow via the
2758            // backend's actual write counter delta.
2759            let block_upper = u64::from(block_header.decompressed_size);
2760            if block_upper > 0 && produced + block_upper > content_size {
2761                // Frame is corrupt — Raw/RLE block headers claim
2762                // more output than the FCS allows.
2763                return Err(err::FrameContentSizeMismatch {
2764                    declared: content_size,
2765                    produced: produced + block_upper,
2766                });
2767            }
2768            // Slice-source fast path: consume the block body
2769            // straight from `input` without copying into the
2770            // persistent `block_content_buffer`.
2771            let body_consumed = match block_dec.decode_block_content_from_slice(
2772                &block_header,
2773                &mut direct,
2774                &mut *input,
2775            ) {
2776                Ok(n) => n,
2777                // Defense-in-depth: RLE / Raw block whose declared
2778                // `decompressed_size` slipped past the per-block
2779                // pre-flight above and tripped the backend's
2780                // fallible write surface.
2781                Err(crate::decoding::errors::DecodeBlockContentError::BackendOverflow {
2782                    ..
2783                }) => {
2784                    // Use saturating_add on the
2785                    // `produced + decompressed_size` sum. Each block
2786                    // is bounded by 128 KiB (MAX_BLOCK_SIZE), but
2787                    // accumulated `produced` can grow toward
2788                    // u64::MAX across adversarial frames. Saturating
2789                    // avoids a panic on the error path itself.
2790                    return Err(err::FrameContentSizeMismatch {
2791                        declared: content_size,
2792                        produced: produced
2793                            .saturating_add(u64::from(block_header.decompressed_size)),
2794                    });
2795                }
2796                // Compressed-block in-block overshoot: the sequence
2797                // executor (donor-inline path) or the match-repeat
2798                // fallback tripped the fixed-capacity backend's per-write
2799                // check. Unlike Raw/RLE, a Compressed block carries no
2800                // header-declared output size, so `produced` is computed
2801                // from the partial fill: `tail` bytes were written before
2802                // the failing op, and `requested` is what overflowed —
2803                // their sum is a strict lower bound on the frame's true
2804                // expanded size and is always > `content_size` (the
2805                // direct path is only entered when the slice is sized to
2806                // `content_size + WILDCOPY_OVERLENGTH`, so any overflow
2807                // means the frame exceeded the declared FCS, never a
2808                // caller-undersized buffer). Folds into the same
2809                // `FrameContentSizeMismatch` contract as Raw/RLE.
2810                Err(crate::decoding::errors::DecodeBlockContentError::DecompressBlockError(
2811                    crate::decoding::errors::DecompressBlockError::ExecuteSequencesError(ref e),
2812                )) if e.output_overflow_requested().is_some() => {
2813                    let requested = e
2814                        .output_overflow_requested()
2815                        .expect("guard guarantees Some") as u64;
2816                    let tail = direct.buffer.buffer_ref().tail() as u64;
2817                    return Err(err::FrameContentSizeMismatch {
2818                        declared: content_size,
2819                        produced: tail.saturating_add(requested),
2820                    });
2821                }
2822                Err(e) => {
2823                    return Err(block_body_decode_error(
2824                        e,
2825                        block_index,
2826                        block_frame_offset,
2827                        &block_header,
2828                        hsize,
2829                    ));
2830                }
2831            };
2832            // Hash this block's freshly-written bytes while they are hot
2833            // (see `running_hash` above). `tail()` is the physical write
2834            // cursor: `drop_to_window_size` below only advances the head,
2835            // so `[prev_tail, tail)` is exactly this block's output.
2836            #[cfg(feature = "hash")]
2837            if let Some(hasher) = running_hash.as_mut() {
2838                use core::hash::Hasher;
2839                hasher.write(direct.buffer.buffer_ref().written_since(produced as usize));
2840            }
2841            produced = direct.buffer.buffer_ref().tail() as u64;
2842            // Post-decode FCS overflow check.
2843            if produced > content_size {
2844                return Err(err::FrameContentSizeMismatch {
2845                    declared: content_size,
2846                    produced,
2847                });
2848            }
2849            state.bytes_read_counter += body_consumed;
2850            state.block_counter += 1;
2851            #[cfg(all(feature = "lsm", feature = "hash"))]
2852            if let Some(produced_before) = produced_before {
2853                block_ranges.push((produced_before, produced as usize));
2854            }
2855            // Cap the visible buffer at window_size between blocks
2856            // so the next block's match-offset validation matches
2857            // the spec's `offset <= window_size` rule.
2858            direct.buffer.drop_to_window_size();
2859            if block_header.last_block {
2860                if state.frame_header.descriptor.content_checksum_flag() {
2861                    let mut chksum = [0u8; 4];
2862                    input
2863                        .read_exact(&mut chksum)
2864                        .map_err(err::FailedToReadChecksum)?;
2865                    state.bytes_read_counter += 4;
2866                    state.check_sum = Some(u32::from_le_bytes(chksum));
2867                }
2868                break;
2869            }
2870        }
2871        // Final sanity: blocks summed to exactly `content_size`.
2872        if produced != content_size {
2873            return Err(err::FrameContentSizeMismatch {
2874                declared: content_size,
2875                produced,
2876            });
2877        }
2878
2879        let written = content_size as usize;
2880        state.frame_finished = true;
2881        // Drop the stack-local DirectScratch (and its DecodeBuffer
2882        // borrow on `output`) so we can re-borrow `output` for the
2883        // hash pass below.
2884        drop(direct);
2885        // Per-block XXH64 (low 32 bits) over the captured ranges.
2886        // Mirrors `decode_blocks`' per-block hashing so the digests
2887        // vector stays identical regardless of which dispatch path
2888        // the frame took. Ranges were recorded inside the loop while
2889        // `direct` held a mutable borrow on `output`; now that the
2890        // borrow is dropped we can read the slices directly.
2891        #[cfg(all(feature = "lsm", feature = "hash"))]
2892        if self.per_block_checksums_enabled {
2893            use core::hash::Hasher;
2894            for (start, end) in &block_ranges {
2895                let mut h = twox_hash::XxHash64::with_seed(0);
2896                h.write(&output[*start..*end]);
2897                self.computed_block_checksums.push(h.finish() as u32);
2898            }
2899        }
2900        #[cfg(feature = "hash")]
2901        if let Some(hasher) = running_hash {
2902            // Propagate the per-block-accumulated hasher state (see the
2903            // `running_hash` rationale above the loop) so the frame-tail
2904            // XXH64 check and `get_calculated_checksum()` read the digest.
2905            // `running_hash` is `None` for flag-off frames or
2906            // `ContentChecksum::None` — nothing to verify there, and
2907            // `get_calculated_checksum()` returns `None`, matching the skip.
2908            match &mut state.decoder_scratch {
2909                DecoderScratchKind::Flat(s) => s.buffer.hash = hasher,
2910                DecoderScratchKind::Ring(s) => s.buffer.hash = hasher,
2911            }
2912        }
2913        Ok(written)
2914    }
2915}
2916
2917/// Read bytes from the decode_buffer that are no longer needed. While the frame is not yet finished
2918/// this will retain window_size bytes, else it will drain it completely
2919impl Read for FrameDecoder {
2920    fn read(&mut self, target: &mut [u8]) -> Result<usize, Error> {
2921        let state = match &mut self.state {
2922            None => return Ok(0),
2923            Some(s) => s,
2924        };
2925        if state.frame_finished {
2926            state.decoder_scratch.buffer_read_all(target)
2927        } else {
2928            state.decoder_scratch.buffer_read(target)
2929        }
2930    }
2931}
2932
2933#[cfg(test)]
2934mod tests {
2935    extern crate std;
2936
2937    use super::{DictionaryHandle, FrameDecoder};
2938    use crate::encoding::{CompressionLevel, FrameCompressor};
2939    use alloc::vec::Vec;
2940
2941    #[test]
2942    fn decode_all_tight_and_slack_outputs_match_on_single_segment_frame() {
2943        // Roundtrip a small payload through the encoder, then decode
2944        // it via `decode_all` on two output shapes that select
2945        // different internal sequence-exec paths within the direct
2946        // decode:
2947        //   1. Tight output (exactly `frame_content_size`, no
2948        //      WILDCOPY_OVERLENGTH slack) → direct path whose trailing
2949        //      sequence(s) take the bounded (non-overshooting) copy in
2950        //      `UserSliceBackend::exec_sequence_bounded`.
2951        //   2. Output with WILDCOPY slack → direct path whose
2952        //      sequences all take the SIMD wildcopy fast path.
2953        // Both must produce identical output bytes — the bounded tail
2954        // copy must reconstruct the same data as the overshooting fast
2955        // path. This is the regression gate for the relaxed
2956        // direct-decode gate (`cap >= content_size`).
2957        let payload: Vec<u8> = (0..4096u32).map(|i| (i & 0xFF) as u8).collect();
2958        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
2959        compressor.set_source(payload.as_slice());
2960        let mut compressed = Vec::new();
2961        compressor.set_drain(&mut compressed);
2962        compressor.compress();
2963
2964        // Baseline: tight output → legacy drain path.
2965        let mut dec_a = FrameDecoder::new();
2966        let mut out_a = alloc::vec![0u8; payload.len()];
2967        let n_a = dec_a
2968            .decode_all(compressed.as_slice(), &mut out_a)
2969            .expect("decode_all (legacy drain) should succeed");
2970        assert_eq!(n_a, payload.len());
2971        assert_eq!(&out_a[..n_a], payload.as_slice());
2972
2973        // Direct: output with WILDCOPY slack → direct path.
2974        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
2975        let mut dec_b = FrameDecoder::new();
2976        let mut out_b = alloc::vec![0u8; payload.len() + slack];
2977        let n_b = dec_b
2978            .decode_all(compressed.as_slice(), &mut out_b)
2979            .expect("decode_all (direct path) should succeed");
2980        assert_eq!(
2981            n_b,
2982            payload.len(),
2983            "direct decode produced wrong byte count"
2984        );
2985        assert_eq!(&out_b[..n_b], payload.as_slice());
2986    }
2987
2988    #[test]
2989    fn decode_all_tight_output_overlapping_tail_match_roundtrips() {
2990        // The bounded tail copy must handle an OVERLAPPING match
2991        // (offset < match_length) as the trailing sequence when the
2992        // output slice is sized to exactly `frame_content_size`. A long
2993        // run of a single byte at the end of the payload encodes as an
2994        // offset-1 match whose length far exceeds the offset, so the
2995        // bounded copy's overlapping (forward byte-by-byte) branch is
2996        // exercised at the buffer tail where the SIMD overshoot would
2997        // otherwise run past `cap`. Decoding into a tight buffer and
2998        // matching the original payload byte-for-byte is the regression
2999        // gate for the overlap branch of `exec_sequence_bounded`.
3000        let mut payload: Vec<u8> = (0..256u32).map(|i| (i & 0xFF) as u8).collect();
3001        payload.extend(core::iter::repeat_n(0xABu8, 8192));
3002        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3003        compressor.set_source(payload.as_slice());
3004        let mut compressed = Vec::new();
3005        compressor.set_drain(&mut compressed);
3006        compressor.compress();
3007
3008        // Anti-vacuous precondition: the 8 KiB trailing run of a single
3009        // byte must compress to a Compressed block dominated by ONE long
3010        // offset-1 (overlapping, offset < match_length) match — not a Raw
3011        // block. If the encoder ever stopped emitting that overlapping
3012        // tail match the test would pass without exercising
3013        // `exec_sequence_bounded`'s overlapping forward-copy branch, so
3014        // gate on the output being a tiny fraction of the input (a raw
3015        // block would be ~`payload.len()`; an offset-1 run match is tens
3016        // of bytes).
3017        assert!(
3018            compressed.len() < payload.len() / 8,
3019            "expected an overlapping-tail match to dominate the frame \
3020             (compressed={} payload={}); the bounded overlap branch would \
3021             not be exercised otherwise",
3022            compressed.len(),
3023            payload.len(),
3024        );
3025
3026        // Tight output: exactly content_size, no WILDCOPY slack.
3027        let mut dec = FrameDecoder::new();
3028        let mut out = alloc::vec![0u8; payload.len()];
3029        let n = dec
3030            .decode_all(compressed.as_slice(), &mut out)
3031            .expect("tight-output decode with overlapping tail match should succeed");
3032        assert_eq!(n, payload.len());
3033        assert_eq!(out, payload, "bounded overlap tail copy corrupted output");
3034    }
3035
3036    #[test]
3037    fn decode_all_multi_segment_frame_decodes_correctly() {
3038        // Multi-segment frame: payload large enough that the
3039        // encoder's default frame layout has `single_segment_flag =
3040        // false` and `window_size < frame_content_size`. The direct
3041        // path must cap the visible buffer at window_size after each
3042        // block (drop_to_window_size) so match-offset validation
3043        // matches the spec rule `offset <= window_size`, and still
3044        // produce the same bytes as decode_all on the
3045        // FlatBuf/Ring-backed path.
3046        //
3047        // Make the payload structured so multi-segment behavior
3048        // actually kicks in: 2 MiB of repeating + random-ish bytes
3049        // forces window_size lower than content_size at the encoder.
3050        let mut payload: Vec<u8> = Vec::with_capacity(2 * 1024 * 1024);
3051        for i in 0..payload.capacity() {
3052            payload.push((i.wrapping_mul(2_654_435_761) & 0xFF) as u8);
3053        }
3054        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3055        compressor.set_source(payload.as_slice());
3056        let mut compressed = Vec::new();
3057        compressor.set_drain(&mut compressed);
3058        compressor.compress();
3059
3060        // Baseline: decode_all through the FlatBuf+drain path.
3061        let mut dec_a = FrameDecoder::new();
3062        let mut out_a = alloc::vec![0u8; payload.len()];
3063        let n_a = dec_a
3064            .decode_all(compressed.as_slice(), &mut out_a)
3065            .expect("decode_all should succeed");
3066        assert_eq!(n_a, payload.len());
3067        assert_eq!(&out_a[..n_a], payload.as_slice());
3068
3069        // Direct path: must give identical bytes via UserSliceBackend
3070        // + per-block drop_to_window_size.
3071        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
3072        let mut dec_b = FrameDecoder::new();
3073        let mut out_b = alloc::vec![0u8; payload.len() + slack];
3074        let n_b = dec_b
3075            .decode_all(compressed.as_slice(), &mut out_b)
3076            .expect("decode_all should succeed on multi-segment frame");
3077        assert_eq!(n_b, payload.len(), "wrong byte count on direct path");
3078        assert_eq!(&out_b[..n_b], payload.as_slice());
3079
3080        // Sanity-check: confirm the encoded frame really IS
3081        // multi-segment. If a future encoder default changes,
3082        // catching the assumption here is better than silently
3083        // testing single_segment on this name.
3084        let mut sanity = FrameDecoder::new();
3085        sanity.init(&mut compressed.as_slice()).unwrap();
3086        assert!(
3087            !sanity
3088                .state
3089                .as_ref()
3090                .unwrap()
3091                .frame_header
3092                .descriptor
3093                .single_segment_flag(),
3094            "test precondition violated: frame is single-segment, rename or resize"
3095        );
3096    }
3097
3098    #[cfg(feature = "hash")]
3099    #[test]
3100    fn decode_all_propagates_checksum_into_persistent_scratch() {
3101        // Direct path on a checksum-flagged frame: the FrameCompressor
3102        // under `feature = "hash"` sets content_checksum_flag, so the
3103        // decoded frame has a recorded checksum. After
3104        // decode_all we must be able to verify it matches via
3105        // the public get_calculated_checksum() accessor — the digest
3106        // is computed by walking output at end of decode and stored
3107        // into the persistent scratch's hasher.
3108        let payload: Vec<u8> = (0..8192u32).map(|i| (i & 0xFF) as u8).collect();
3109        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3110        compressor.set_content_checksum(true);
3111        compressor.set_source(payload.as_slice());
3112        let mut compressed = Vec::new();
3113        compressor.set_drain(&mut compressed);
3114        compressor.compress();
3115
3116        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
3117        let mut dec = FrameDecoder::new();
3118        let mut out = alloc::vec![0u8; payload.len() + slack];
3119        let n = dec
3120            .decode_all(compressed.as_slice(), &mut out)
3121            .expect("decode_all with checksum must succeed");
3122        assert_eq!(n, payload.len());
3123        assert_eq!(&out[..n], payload.as_slice());
3124
3125        // Both sides must report the same checksum: the frame header
3126        // carries the stored u32, and get_calculated_checksum reads
3127        // the running digest the direct path just propagated.
3128        let stored = dec.get_checksum_from_data();
3129        let calculated = dec.get_calculated_checksum();
3130        assert!(stored.is_some(), "frame must carry stored checksum");
3131        assert!(
3132            calculated.is_some(),
3133            "direct path must propagate calculated checksum"
3134        );
3135        assert_eq!(
3136            stored, calculated,
3137            "stored vs calculated checksum mismatch on direct path"
3138        );
3139    }
3140
3141    #[cfg(feature = "hash")]
3142    #[test]
3143    fn verify_mode_accepts_a_valid_frame() {
3144        use crate::decoding::ContentChecksum;
3145        let payload: Vec<u8> = (0..8192u32).map(|i| (i & 0xFF) as u8).collect();
3146        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3147        compressor.set_content_checksum(true);
3148        compressor.set_source(payload.as_slice());
3149        let mut compressed = Vec::new();
3150        compressor.set_drain(&mut compressed);
3151        compressor.compress();
3152
3153        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
3154        let mut dec = FrameDecoder::new();
3155        dec.set_content_checksum(ContentChecksum::Verify);
3156        let mut out = alloc::vec![0u8; payload.len() + slack];
3157        let n = dec
3158            .decode_all(compressed.as_slice(), &mut out)
3159            .expect("Verify mode must accept a frame with a correct checksum");
3160        assert_eq!(&out[..n], payload.as_slice());
3161    }
3162
3163    #[cfg(feature = "hash")]
3164    #[test]
3165    fn verify_mode_rejects_a_corrupted_checksum() {
3166        use crate::decoding::ContentChecksum;
3167        use crate::decoding::errors::FrameDecoderError;
3168        let payload: Vec<u8> = (0..8192u32).map(|i| (i & 0xFF) as u8).collect();
3169        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3170        compressor.set_content_checksum(true);
3171        compressor.set_source(payload.as_slice());
3172        let mut compressed = Vec::new();
3173        compressor.set_drain(&mut compressed);
3174        compressor.compress();
3175
3176        // Flip a bit in the trailing 4-byte content checksum: the frame body
3177        // still decodes to the correct bytes, but the stored digest no longer
3178        // matches the one the decoder computes.
3179        let last = compressed.len() - 1;
3180        compressed[last] ^= 0xFF;
3181
3182        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
3183        let mut dec = FrameDecoder::new();
3184        dec.set_content_checksum(ContentChecksum::Verify);
3185        let mut out = alloc::vec![0u8; payload.len() + slack];
3186        let err = dec
3187            .decode_all(compressed.as_slice(), &mut out)
3188            .expect_err("Verify mode must reject a corrupted checksum");
3189        assert!(
3190            matches!(err, FrameDecoderError::ChecksumMismatch { .. }),
3191            "expected ChecksumMismatch, got {err:?}"
3192        );
3193    }
3194
3195    #[cfg(feature = "hash")]
3196    #[test]
3197    fn decode_from_to_verify_rejects_corrupted_checksum() {
3198        // decode_from_to has its own block loop (not decode_blocks); it must
3199        // still honour Verify and reject a corrupted trailer rather than
3200        // silently accept it.
3201        use crate::decoding::ContentChecksum;
3202        use crate::decoding::errors::FrameDecoderError;
3203        let payload: Vec<u8> = (0..8192u32).map(|i| (i & 0xFF) as u8).collect();
3204        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3205        compressor.set_content_checksum(true);
3206        compressor.set_source(payload.as_slice());
3207        let mut compressed = Vec::new();
3208        compressor.set_drain(&mut compressed);
3209        compressor.compress();
3210        let last = compressed.len() - 1;
3211        compressed[last] ^= 0xFF;
3212
3213        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
3214        let mut dec = FrameDecoder::new();
3215        dec.set_content_checksum(ContentChecksum::Verify);
3216        let mut out = alloc::vec![0u8; payload.len() + slack];
3217
3218        // Split the trailing 4-byte checksum into a SEPARATE call so the
3219        // verification must happen on the checksum-only early-return path (not
3220        // the post-drain path) — the incremental case CodeRabbit flagged.
3221        let split = compressed.len() - 4;
3222        let (_r1, w1) = dec
3223            .decode_from_to(&compressed[..split], &mut out)
3224            .expect("blocks decode without the trailer");
3225        let err = dec
3226            .decode_from_to(&compressed[split..], &mut out[w1..])
3227            .expect_err("decode_from_to in Verify mode must reject a corrupted checksum");
3228        assert!(
3229            matches!(err, FrameDecoderError::ChecksumMismatch { .. }),
3230            "expected ChecksumMismatch, got {err:?}"
3231        );
3232    }
3233
3234    #[cfg(feature = "hash")]
3235    #[test]
3236    fn decode_from_to_small_target_split_trailer_flushes_tail() {
3237        // Regression: when a prior call decoded the last block but a small
3238        // `target` left output buffered, the trailer-only call must still flush
3239        // the buffered tail (it used to early-return Ok((4,0)) and lose it).
3240        let payload: Vec<u8> = (0..8192u32).map(|i| (i & 0xFF) as u8).collect();
3241        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3242        compressor.set_content_checksum(true);
3243        compressor.set_source(payload.as_slice());
3244        let mut compressed = Vec::new();
3245        compressor.set_drain(&mut compressed);
3246        compressor.compress();
3247
3248        let split = compressed.len() - 4;
3249        let mut dec = FrameDecoder::new();
3250        let mut out = alloc::vec![0u8; payload.len()];
3251        // Call 1: all blocks, but a SMALL (64-byte) target leaves the rest
3252        // buffered on the decoder side.
3253        let (_r1, w1) = dec
3254            .decode_from_to(&compressed[..split], &mut out[..64])
3255            .expect("blocks decode with a small target");
3256        assert!(w1 <= 64);
3257        // Call 2: the 4-byte trailer alone must flush the buffered tail through
3258        // the shared read path, not return early and drop it.
3259        let (_r2, w2) = dec
3260            .decode_from_to(&compressed[split..], &mut out[w1..])
3261            .expect("trailer call must flush the buffered tail");
3262        assert_eq!(w1 + w2, payload.len(), "buffered tail was dropped");
3263        assert_eq!(&out[..w1 + w2], payload.as_slice());
3264    }
3265
3266    #[cfg(feature = "hash")]
3267    #[test]
3268    fn none_mode_skips_the_checksum_pass() {
3269        use crate::decoding::ContentChecksum;
3270        let payload: Vec<u8> = (0..8192u32).map(|i| (i & 0xFF) as u8).collect();
3271        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3272        compressor.set_content_checksum(true);
3273        compressor.set_source(payload.as_slice());
3274        let mut compressed = Vec::new();
3275        compressor.set_drain(&mut compressed);
3276        compressor.compress();
3277
3278        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
3279        let mut dec = FrameDecoder::new();
3280        dec.set_content_checksum(ContentChecksum::None);
3281        let mut out = alloc::vec![0u8; payload.len() + slack];
3282        let n = dec
3283            .decode_all(compressed.as_slice(), &mut out)
3284            .expect("None mode must still decode correctly");
3285        assert_eq!(&out[..n], payload.as_slice());
3286        // No digest is computed in None mode, even though the frame carries one.
3287        assert!(dec.get_checksum_from_data().is_some());
3288        assert!(dec.get_calculated_checksum().is_none());
3289    }
3290
3291    #[cfg(feature = "hash")]
3292    #[test]
3293    fn encoder_without_checksum_emits_no_trailing_digest() {
3294        let payload: Vec<u8> = (0..8192u32).map(|i| (i & 0xFF) as u8).collect();
3295
3296        let mut with = Vec::new();
3297        let mut c_with = FrameCompressor::new(CompressionLevel::Default);
3298        c_with.set_content_checksum(true);
3299        c_with.set_source(payload.as_slice());
3300        c_with.set_drain(&mut with);
3301        c_with.compress();
3302
3303        let mut without = Vec::new();
3304        let mut c_without = FrameCompressor::new(CompressionLevel::Default);
3305        c_without.set_content_checksum(false);
3306        c_without.set_source(payload.as_slice());
3307        c_without.set_drain(&mut without);
3308        c_without.compress();
3309
3310        // The checksum-off frame is exactly the 4-byte trailing digest shorter.
3311        assert_eq!(with.len(), without.len() + 4);
3312
3313        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
3314        let mut dec = FrameDecoder::new();
3315        let mut out = alloc::vec![0u8; payload.len() + slack];
3316        let n = dec
3317            .decode_all(without.as_slice(), &mut out)
3318            .expect("a frame without a content checksum must decode");
3319        assert_eq!(&out[..n], payload.as_slice());
3320        assert!(
3321            dec.get_checksum_from_data().is_none(),
3322            "no trailing checksum should be reported"
3323        );
3324    }
3325
3326    #[test]
3327    fn decode_all_fcs_overflow_via_corrupt_frame_returns_structured_error() {
3328        // Hand-build a corrupt frame that declares
3329        // frame_content_size = 4 but the (last) block carries a
3330        // larger Raw payload. The pre-flight FCS check inside the
3331        // direct path's block loop catches this and returns the
3332        // structured FrameContentSizeMismatch variant — not a
3333        // panic, not a generic TargetTooSmall.
3334        //
3335        // Frame layout (single_segment, FCS=4):
3336        //   magic            4 bytes  0xFD2FB528
3337        //   FHD              1 byte   single_segment=1, no checksum,
3338        //                              FCS field size = 0 (-> 1-byte FCS)
3339        //   FCS              1 byte   0x04
3340        //   block_header     3 bytes  last=1, type=Raw, block_size=10
3341        //   block_payload    10 bytes 0xAA repeated
3342        let mut frame = alloc::vec::Vec::new();
3343        // magic
3344        frame.extend_from_slice(&0xFD2FB528u32.to_le_bytes());
3345        // FHD: single_segment=1, fcs_flag=0 (1-byte FCS), no checksum,
3346        // no dict. Bit layout: FCS(7-6)=0, single_segment(5)=1,
3347        // reserved/uncs(4)=0, content_checksum(2)=0, dict(0-1)=00.
3348        frame.push(0b0010_0000);
3349        // FCS: 1 byte
3350        frame.push(4);
3351        // Block header: cBlockSize=10, type=Raw (0), last=1
3352        // 3-byte LE: bit0=last, bits1-2=type(2 bits), bits3-23=size
3353        let cblock_size: u32 = 10;
3354        let bh: u32 = 1 | (cblock_size << 3); // last=1, type=Raw=0
3355        frame.push((bh & 0xFF) as u8);
3356        frame.push((bh >> 8) as u8);
3357        frame.push((bh >> 16) as u8);
3358        // Payload — 10 bytes that, if decoded, would exceed FCS=4.
3359        frame.extend(core::iter::repeat_n(0xAAu8, 10));
3360
3361        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
3362        let mut dec = FrameDecoder::new();
3363        let mut out = alloc::vec![0u8; 4 + slack];
3364        let err = dec
3365            .decode_all(&frame, &mut out)
3366            .expect_err("FCS-overflow frame must fail decode");
3367        assert!(
3368            matches!(
3369                err,
3370                super::FrameDecoderError::FrameContentSizeMismatch { .. }
3371            ),
3372            "expected FrameContentSizeMismatch, got {:?}",
3373            err
3374        );
3375    }
3376
3377    #[test]
3378    fn decode_all_compressed_block_fcs_overflow_returns_structured_error() {
3379        // Acceptance test for #246: a malformed frame whose *Compressed*
3380        // block expands past the declared `frame_content_size` must
3381        // surface `FrameContentSizeMismatch` from the direct-decode path
3382        // (UserSliceBackend sequence executor), NOT panic and NOT a
3383        // generic FailedToReadBlockBody. The Raw-block sibling above
3384        // covers the `BackendOverflow` arm; this covers the Compressed
3385        // sequence-executor overflow arm (`ExecuteSequencesError::
3386        // OutputBufferOverflow` folded into FrameContentSizeMismatch in
3387        // `run_direct_decode`).
3388        //
3389        // Construction: compress a compressible payload to get a genuine
3390        // Compressed block + a header-declared FCS, then surgically patch
3391        // the FCS field down to a tiny value. The block body still
3392        // decodes (literals/sequences are independent of FCS) and the
3393        // sequence executor overflows the small output slice.
3394        // Highly compressible payload (repeated phrase) → Compressed
3395        // block whose sequence executor produces ~4 KiB of output.
3396        let unit = b"The quick brown fox jumps over the lazy dog. ";
3397        let mut payload = Vec::with_capacity(4 * 1024);
3398        while payload.len() < 4 * 1024 {
3399            payload.extend_from_slice(unit);
3400        }
3401        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3402        compressor.set_source(payload.as_slice());
3403        let mut frame = Vec::new();
3404        compressor.set_drain(&mut frame);
3405        compressor.compress();
3406        // Sanity: the encoder actually compressed (=> a Compressed block,
3407        // not a raw-stored fallback) so we exercise the sequence path.
3408        assert!(frame.len() < payload.len());
3409
3410        // Locate the FCS field: it is the last `fcs_len` bytes of the
3411        // frame header, whose total size `header_size` includes the magic.
3412        // A ~4 KiB single-segment frame declares FCS = 4096, which lands in
3413        // the 2-byte field range [256, 65791] (RFC 8878 §3.1.1.1.4) — assert
3414        // that so the patch logic below stays a single deterministic branch.
3415        let (header, header_size) =
3416            super::super::frame::read_frame_header(frame.as_slice()).expect("valid header");
3417        let fcs_len = header
3418            .descriptor
3419            .frame_content_size_bytes()
3420            .expect("fcs present") as usize;
3421        assert_eq!(
3422            fcs_len, 2,
3423            "4 KiB single-segment frame must use a 2-byte FCS"
3424        );
3425        let fcs_off = header_size as usize - fcs_len;
3426
3427        // Patch the 2-byte FCS to its floor: stored bytes 0 decode to 256
3428        // (the field's `+256` bias), far below the 4 KiB the block actually
3429        // produces, so the sequence executor overflows the output slice.
3430        let patched_declared: u64 = 256;
3431        frame[fcs_off] = 0;
3432        frame[fcs_off + 1] = 0;
3433
3434        // Size the output to declared + WILDCOPY slack so the direct path
3435        // is eligible (output.len() >= content_size + slack) — the
3436        // overflow then comes from the frame, not an undersized buffer.
3437        let slack = super::super::buffer_backend::WILDCOPY_OVERLENGTH;
3438        let mut out = alloc::vec![0u8; patched_declared as usize + slack];
3439        let mut dec = FrameDecoder::new();
3440        let err = dec
3441            .decode_all(frame.as_slice(), &mut out)
3442            .expect_err("Compressed block exceeding FCS must fail decode");
3443        match err {
3444            super::FrameDecoderError::FrameContentSizeMismatch { declared, produced } => {
3445                assert_eq!(declared, patched_declared, "declared echoes patched FCS");
3446                assert!(produced > declared, "produced must exceed declared");
3447            }
3448            other => panic!("expected FrameContentSizeMismatch, got {other:?}"),
3449        }
3450    }
3451
3452    /// Block-precise error positions (#174): a failing block header / body
3453    /// reports its 0-based index and frame-absolute offset, consistent with
3454    /// the encoder's `FrameEmitInfo.blocks[index].offset_in_frame`.
3455    #[cfg(feature = "lsm")]
3456    #[test]
3457    fn block_precise_errors_carry_index_and_offset() {
3458        use crate::encoding::{CompressionLevel, FrameCompressor};
3459        // ~1.3 MiB of incompressible (xorshift) bytes → many 128 KiB raw
3460        // blocks, so blocks 3 and 7 both exist and are not the last block.
3461        let mut data = alloc::vec::Vec::with_capacity(1_300_000);
3462        let mut s: u64 = 0x2545_F491_4F6C_DD1D;
3463        while data.len() < 1_300_000 {
3464            s ^= s << 13;
3465            s ^= s >> 7;
3466            s ^= s << 17;
3467            data.push((s >> 33) as u8);
3468        }
3469
3470        let mut frame = alloc::vec::Vec::new();
3471        let blocks = {
3472            let mut fc = FrameCompressor::new(CompressionLevel::Level(1));
3473            fc.set_source(data.as_slice());
3474            fc.set_drain(&mut frame);
3475            fc.compress();
3476            fc.last_frame_emit_info()
3477                .expect("emit info present under lsm")
3478                .blocks
3479                .clone()
3480        };
3481        assert!(blocks.len() > 7, "need >7 blocks, got {}", blocks.len());
3482
3483        let mut out = alloc::vec![0u8; data.len() + 4096];
3484
3485        // (1) Corrupt block 7's header: force its Block_Type to Reserved (3)
3486        // by setting both type bits — fails the header read at block 7.
3487        let off7 = blocks[7].offset_in_frame as usize;
3488        let mut corrupt = frame.clone();
3489        corrupt[off7] |= 0b0000_0110;
3490        let mut dec = FrameDecoder::new();
3491        let err = dec
3492            .decode_all(&corrupt, &mut out)
3493            .expect_err("reserved block-7 header must fail");
3494        match err {
3495            super::FrameDecoderError::FailedToReadBlockHeaderAt {
3496                block_index,
3497                frame_offset,
3498                ..
3499            } => {
3500                assert_eq!(block_index, 7);
3501                assert_eq!(frame_offset, blocks[7].offset_in_frame);
3502            }
3503            other => panic!("expected FailedToReadBlockHeaderAt, got {other:?}"),
3504        }
3505
3506        // (2) Truncate at block 3's body start: header intact, body missing
3507        // → the body decode fails at block 3 with its FrameBlock metadata.
3508        let body3 = blocks[3].offset_in_frame as usize + blocks[3].header_size as usize;
3509        let mut dec = FrameDecoder::new();
3510        let err = dec
3511            .decode_all(&frame[..body3], &mut out)
3512            .expect_err("truncated block-3 body must fail");
3513        match err {
3514            super::FrameDecoderError::FailedToReadBlockBodyAt {
3515                block_index,
3516                frame_offset,
3517                block,
3518                ..
3519            } => {
3520                assert_eq!(block_index, 3);
3521                assert_eq!(frame_offset, blocks[3].offset_in_frame);
3522                assert_eq!(block.offset_in_frame, blocks[3].offset_in_frame);
3523            }
3524            other => panic!("expected FailedToReadBlockBodyAt, got {other:?}"),
3525        }
3526    }
3527
3528    #[test]
3529    fn decode_all_exact_fit_output_decodes_correctly() {
3530        // Output sized exactly to frame_content_size (no
3531        // WILDCOPY_OVERLENGTH slack) is now eligible for the direct
3532        // path: every output-write site is exact-fit-safe (sequence
3533        // exec falls back to the bounded, non-overshooting copy on the
3534        // trailing sequence(s), Raw/RLE blocks copy exactly). This must
3535        // produce the same bytes as a slack-padded buffer. Exercised on
3536        // x86 through the per-kernel AVX2/SSE2 inline-exec macros, which
3537        // carry the same tight-tail branch.
3538        let payload: Vec<u8> = (0..2048u32)
3539            .map(|i| (i.wrapping_mul(31) & 0xFF) as u8)
3540            .collect();
3541        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3542        compressor.set_source(payload.as_slice());
3543        let mut compressed = Vec::new();
3544        compressor.set_drain(&mut compressed);
3545        compressor.compress();
3546
3547        let mut dec = FrameDecoder::new();
3548        // Exactly payload.len(), no slack.
3549        let mut out = alloc::vec![0u8; payload.len()];
3550        let n = dec
3551            .decode_all(compressed.as_slice(), &mut out)
3552            .expect("exact-fit decode_all should succeed");
3553        assert_eq!(n, payload.len());
3554        assert_eq!(&out[..n], payload.as_slice());
3555    }
3556
3557    #[test]
3558    fn decode_all_fallback_validates_fcs_against_total_output() {
3559        // Synthetic single-segment frame: FCS = 20 bytes, but the
3560        // last-block flag fires after only 4 bytes of raw payload.
3561        // On the direct path this would trip the post-block
3562        // `produced > content_size` check; the fallback path
3563        // (eligible=false because output is sized exactly to FCS,
3564        // no WILDCOPY slack) used to silently return Ok(4). With
3565        // the fix it now surfaces `FrameContentSizeMismatch`
3566        // matching the direct path.
3567        //
3568        // Frame layout: 4 B magic | 1 B FHD (single_segment=1,
3569        // FCS_flag=3 → 8-byte FCS) | 8 B FCS=20 | block header
3570        // (Raw, last, size=4) | 4 raw bytes.
3571        let mut wire = Vec::new();
3572        wire.extend_from_slice(&0xFD2F_B528u32.to_le_bytes()); // magic
3573        // FHD: FCS_flag=3 (8-byte FCS) <<6 | single_segment=1 <<5.
3574        wire.push(0b1110_0000);
3575        wire.extend_from_slice(&20u64.to_le_bytes()); // declared FCS
3576        // Block header: (size << 3) | (block_type << 1) | last_block.
3577        // Raw block (block_type=0), last_block=1, size=4 → 0b00100001 = 0x21.
3578        wire.push(0x21);
3579        wire.push(0x00);
3580        wire.push(0x00);
3581        wire.extend_from_slice(&[1u8, 2, 3, 4]);
3582
3583        let mut dec = FrameDecoder::new();
3584        // Size output SMALLER than the declared FCS so direct-decode is
3585        // gated out (`output.len() >= content_size` is false) and the
3586        // frame takes the legacy fallback drain loop — the path this test
3587        // guards. The corrupt frame only produces 4 bytes, so 19 is ample
3588        // room; the point is `19 != declared FCS (20)`.
3589        const DECLARED_FCS: usize = 20;
3590        let mut out = alloc::vec![0u8; DECLARED_FCS - 1];
3591        assert_ne!(
3592            out.len(),
3593            DECLARED_FCS,
3594            "output must be smaller than FCS to exercise the fallback path",
3595        );
3596        let err = dec
3597            .decode_all(wire.as_slice(), &mut out)
3598            .expect_err("fallback must reject corrupt FCS underflow");
3599        match err {
3600            crate::decoding::errors::FrameDecoderError::FrameContentSizeMismatch {
3601                declared,
3602                produced,
3603            } => {
3604                assert_eq!(declared, 20);
3605                assert_eq!(produced, 4);
3606            }
3607            other => panic!("expected FrameContentSizeMismatch, got {other:?}"),
3608        }
3609    }
3610
3611    #[test]
3612    fn decode_all_fallback_treats_explicit_fcs_zero_as_declared() {
3613        // Synthetic multi-segment frame with FCS_flag=2 (4-byte
3614        // FCS) explicitly set to 0. The header DECLARES zero
3615        // content, but the body carries a 5-byte raw last-block.
3616        // `fcs_declared()` must return true (the field is on the
3617        // wire) so the fallback's post-decode size check sees the
3618        // mismatch — even though `frame_content_size == 0`. This
3619        // is exactly the FCS=0 edge case where the previous
3620        // `content_size > 0` proxy would have silently accepted
3621        // the corrupt frame.
3622        //
3623        // Frame layout:
3624        //   4 B magic            — 28 B5 2F FD
3625        //   1 B FHD              — FCS_flag=2 (bits 7-6), no
3626        //                          single_segment, content_checksum=0,
3627        //                          dict_id_flag=0 → 0b1000_0000
3628        //   1 B window_descriptor — exp=10, mantissa=0 → window=1 MiB
3629        //   4 B FCS              — 0 LE
3630        //   3 B block header     — raw, last, size=5 → 0x29 0x00 0x00
3631        //   5 B raw payload      — anything non-empty
3632        let mut wire = Vec::new();
3633        wire.extend_from_slice(&0xFD2F_B528u32.to_le_bytes());
3634        wire.push(0b1000_0000); // FHD: FCS_flag=2, others 0.
3635        wire.push(0x50); // window_descriptor: exp=10, mantissa=0.
3636        wire.extend_from_slice(&0u32.to_le_bytes()); // FCS = 0.
3637        // Block header (24-bit LE): (size << 3) | (block_type << 1) | last_block
3638        // = (5 << 3) | (0 << 1) | 1 = 0x29.
3639        wire.push(0x29);
3640        wire.push(0x00);
3641        wire.push(0x00);
3642        wire.extend_from_slice(&[1u8, 2, 3, 4, 5]);
3643
3644        let mut dec = FrameDecoder::new();
3645        // FCS=0 declared, so eligibility (`content_size > 0`)
3646        // false — falls through to the drain loop. Output buffer
3647        // size doesn't matter for the eligibility check here;
3648        // give it some room so `read()` can drain the block.
3649        let mut out = alloc::vec![0u8; 16];
3650        let err = dec
3651            .decode_all(wire.as_slice(), &mut out)
3652            .expect_err("corrupt FCS=0 + 5-byte block must error");
3653        match err {
3654            crate::decoding::errors::FrameDecoderError::FrameContentSizeMismatch {
3655                declared,
3656                produced,
3657            } => {
3658                assert_eq!(declared, 0);
3659                assert_eq!(produced, 5);
3660            }
3661            other => panic!("expected FrameContentSizeMismatch, got {other:?}"),
3662        }
3663    }
3664
3665    #[test]
3666    fn decode_all_fallback_accepts_honest_explicit_fcs_zero() {
3667        // Companion to the corrupt-FCS=0 test above: an HONEST
3668        // empty frame with FCS_flag=2 (4-byte FCS) explicitly set
3669        // to 0 AND a 0-byte raw last-block. `fcs_declared()`
3670        // returns true and `content_size == 0 == total_written`,
3671        // so the fallback validation accepts the frame instead of
3672        // misreporting a mismatch.
3673        //
3674        // (Single-segment FCS=0 would test a similar invariant
3675        // but trips header-stage validation: `window_size =
3676        // frame_content_size = 0 < MIN_WINDOW_SIZE` fails the
3677        // window-size sanity check before decode runs. Use the
3678        // multi-segment shape where `window_size` comes from
3679        // `window_descriptor` independently of FCS.)
3680        //
3681        // Frame layout:
3682        //   4 B magic
3683        //   1 B FHD              — FCS_flag=2, others 0 → 0x80
3684        //   1 B window_descriptor — exp=10 → 1 MiB window
3685        //   4 B FCS              — 0 LE
3686        //   3 B block header     — raw, last, size=0 → 0x01 0x00 0x00
3687        let mut wire = Vec::new();
3688        wire.extend_from_slice(&0xFD2F_B528u32.to_le_bytes());
3689        wire.push(0b1000_0000);
3690        wire.push(0x50);
3691        wire.extend_from_slice(&0u32.to_le_bytes());
3692        // Block header: (0 << 3) | (0 << 1) | 1 = 0x01.
3693        wire.push(0x01);
3694        wire.push(0x00);
3695        wire.push(0x00);
3696
3697        let mut dec = FrameDecoder::new();
3698        let mut out = alloc::vec![0u8; 16];
3699        let n = dec
3700            .decode_all(wire.as_slice(), &mut out)
3701            .expect("honest FCS=0 + empty block must succeed");
3702        assert_eq!(n, 0);
3703    }
3704
3705    #[test]
3706    fn reset_with_dict_handle_applies_dict_when_no_dict_id() {
3707        let payload = b"reset-without-dict-id";
3708        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3709        compressor.set_source(payload.as_slice());
3710        let mut compressed = Vec::new();
3711        compressor.set_drain(&mut compressed);
3712        compressor.compress();
3713
3714        let dict_raw = include_bytes!("../../dict_tests/dictionary");
3715        let handle = DictionaryHandle::decode_dict(dict_raw).expect("dictionary should parse");
3716
3717        let mut decoder = FrameDecoder::new();
3718        decoder
3719            .reset_with_dict_handle(compressed.as_slice(), &handle)
3720            .expect("reset should succeed");
3721        let state = decoder.state.as_ref().expect("state should be initialized");
3722        assert!(state.frame_header.dictionary_id().is_none());
3723        assert_eq!(state.using_dict, Some(handle.id()));
3724    }
3725
3726    #[test]
3727    fn reserve_buffer_reserves_the_shortfall_not_the_full_window_again() {
3728        // `Vec::reserve_exact` takes ADDITIONAL capacity. The decode_all
3729        // fallback loop re-enters decode_blocks once per strategy chunk,
3730        // and each entry pre-reserves the window: re-requesting the FULL
3731        // window on a buffer already holding ~window bytes of history
3732        // would grow it toward 2x window, defeating the peak-memory cap
3733        // the exact-growth policy exists for.
3734        use super::DecoderScratchKind;
3735        let window = 1usize << 20;
3736        let mut scratch = DecoderScratchKind::new_flat(window);
3737        scratch.reserve_buffer(window);
3738        let data = alloc::vec![0u8; window];
3739        match &mut scratch {
3740            super::DecoderScratchKind::Flat(s) => s.buffer.push(&data),
3741            super::DecoderScratchKind::Ring(_) => unreachable!("new_flat builds Flat"),
3742        }
3743        scratch.reserve_buffer(window);
3744        let workspace = scratch.workspace_bytes();
3745        assert!(
3746            workspace < window * 3 / 2,
3747            "second reserve_buffer grew a full window past the buffered \
3748             history: workspace {workspace} bytes vs window {window}"
3749        );
3750    }
3751
3752    #[test]
3753    fn dict_frame_decodes_through_direct_path() {
3754        // A dictionary frame decoded via `decode_all_with_dict_handle`
3755        // into a buffer sized exactly to FCS takes the direct path
3756        // (UserSliceBackend); matches reaching into the dictionary
3757        // content must resolve through `repeat_from_dict`. The payload
3758        // embeds dictionary content verbatim so the encoder emits
3759        // dict-region matches from the first bytes of the frame.
3760        let dict_raw = include_bytes!("../../dict_tests/dictionary");
3761        let handle = DictionaryHandle::decode_dict(dict_raw).expect("dictionary should parse");
3762        let dict_tail: alloc::vec::Vec<u8> = handle
3763            .as_dict()
3764            .dict_content
3765            .iter()
3766            .rev()
3767            .take(2048)
3768            .rev()
3769            .copied()
3770            .collect();
3771        // No in-frame duplicate of the dictionary bytes: with a second
3772        // copy in the payload the encoder may emit the later copy as an
3773        // in-frame match, and the test would stay green even if the
3774        // direct path stopped forwarding the dictionary handle. A
3775        // single copy forces every dict-region match through
3776        // `repeat_from_dict`.
3777        let mut payload = dict_tail;
3778        payload.extend_from_slice(b"unique suffix after dictionary material 0123456789");
3779
3780        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3781        compressor
3782            .set_dictionary_from_bytes(dict_raw)
3783            .expect("dict load");
3784        compressor.set_source(payload.as_slice());
3785        let mut compressed = Vec::new();
3786        compressor.set_drain(&mut compressed);
3787        compressor.compress();
3788
3789        // Fixture sanity: the frame must actually depend on the
3790        // dictionary, otherwise the decode below never exercises
3791        // dict-region match resolution.
3792        let mut plain = Vec::new();
3793        let mut no_dict = FrameCompressor::new(CompressionLevel::Default);
3794        no_dict.set_source(payload.as_slice());
3795        no_dict.set_drain(&mut plain);
3796        no_dict.compress();
3797        assert!(
3798            compressed.len() < plain.len(),
3799            "fixture must depend on the dictionary: dict {} bytes vs plain {} bytes",
3800            compressed.len(),
3801            plain.len()
3802        );
3803
3804        let mut decoder = FrameDecoder::new();
3805        let mut out = alloc::vec![0u8; payload.len()];
3806        let n = decoder
3807            .decode_all_with_dict_handle(compressed.as_slice(), &mut out, &handle)
3808            .expect("dict frame must decode on the direct path");
3809        assert_eq!(n, payload.len());
3810        assert_eq!(out, payload, "direct-path dict decode must be byte-exact");
3811        // Both paths are byte-identical, so pin the dispatch itself: a
3812        // re-introduced dict exclusion in the direct gate would silently
3813        // fall back to the buffered path and leave the asserts above green.
3814        assert_eq!(
3815            decoder.direct_frames, 1,
3816            "dict frame must take the direct path, not the buffered fallback"
3817        );
3818    }
3819
3820    #[cfg(feature = "lsm")]
3821    mod expect_validation {
3822        use super::*;
3823        use crate::decoding::errors::FrameDecoderError;
3824
3825        fn compress(payload: &[u8]) -> Vec<u8> {
3826            let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3827            compressor.set_source(payload);
3828            let mut compressed = Vec::new();
3829            compressor.set_drain(&mut compressed);
3830            compressor.compress();
3831            compressed
3832        }
3833
3834        fn compress_with_dict(payload: &[u8], dict_raw: &[u8]) -> Vec<u8> {
3835            let mut compressor = FrameCompressor::new(CompressionLevel::Default);
3836            compressor
3837                .set_dictionary_from_bytes(dict_raw)
3838                .expect("dict load");
3839            compressor.set_source(payload);
3840            let mut compressed = Vec::new();
3841            compressor.set_drain(&mut compressed);
3842            compressor.compress();
3843            compressed
3844        }
3845
3846        #[test]
3847        fn expect_dict_id_none_default_allows_anything() {
3848            let compressed = compress(b"hello-no-expect");
3849            let mut decoder = FrameDecoder::new();
3850            decoder
3851                .reset(compressed.as_slice())
3852                .expect("default None passes");
3853        }
3854
3855        #[test]
3856        fn expect_dict_id_zero_matches_frame_without_dict_id() {
3857            // Default-encoded frame has no dict_id; pinning Some(0)
3858            // ("no dictionary expected") must accept it.
3859            let compressed = compress(b"payload");
3860            let mut decoder = FrameDecoder::new();
3861            decoder.expect_dict_id(Some(0));
3862            decoder
3863                .reset(compressed.as_slice())
3864                .expect("Some(0) ~ None");
3865        }
3866
3867        #[test]
3868        fn expect_dict_id_matching_value_passes() {
3869            let dict_raw = include_bytes!("../../dict_tests/dictionary");
3870            let handle = DictionaryHandle::decode_dict(dict_raw).expect("dict parse");
3871            let actual_id = handle.id();
3872
3873            let compressed = compress_with_dict(b"payload-with-dict", dict_raw);
3874
3875            let mut decoder = FrameDecoder::new();
3876            decoder.expect_dict_id(Some(actual_id));
3877            // Decode requires the dict to be registered; using
3878            // reset_with_dict_handle for that.
3879            decoder
3880                .reset_with_dict_handle(compressed.as_slice(), &handle)
3881                .expect("matching dict_id passes");
3882        }
3883
3884        #[test]
3885        fn expect_dict_id_mismatching_value_fails_before_decode() {
3886            let dict_raw = include_bytes!("../../dict_tests/dictionary");
3887            let handle = DictionaryHandle::decode_dict(dict_raw).expect("dict parse");
3888            let actual_id = handle.id();
3889            let wrong_id = actual_id.wrapping_add(1);
3890
3891            let compressed = compress_with_dict(b"payload-with-dict", dict_raw);
3892
3893            let mut decoder = FrameDecoder::new();
3894            decoder.expect_dict_id(Some(wrong_id));
3895            let err = decoder
3896                .reset_with_dict_handle(compressed.as_slice(), &handle)
3897                .expect_err("mismatch must fail");
3898            match err {
3899                FrameDecoderError::UnexpectedDictId { expected, found } => {
3900                    assert_eq!(expected, Some(wrong_id));
3901                    assert_eq!(found, Some(actual_id));
3902                }
3903                other => panic!("expected UnexpectedDictId, got {other:?}"),
3904            }
3905        }
3906
3907        #[test]
3908        fn expect_dict_id_nonzero_fails_on_frame_without_dict_id() {
3909            // Frame has no dict_id; expecting Some(42) (non-zero)
3910            // must fail with found = None.
3911            let compressed = compress(b"no-dict-frame");
3912            let mut decoder = FrameDecoder::new();
3913            decoder.expect_dict_id(Some(42));
3914            let err = decoder
3915                .reset(compressed.as_slice())
3916                .expect_err("nonzero expectation on dictless frame must fail");
3917            match err {
3918                FrameDecoderError::UnexpectedDictId { expected, found } => {
3919                    assert_eq!(expected, Some(42));
3920                    assert_eq!(found, None);
3921                }
3922                other => panic!("expected UnexpectedDictId, got {other:?}"),
3923            }
3924        }
3925
3926        #[test]
3927        fn expect_window_descriptor_none_default_allows_anything() {
3928            let compressed = compress(b"hello-no-wd-expect");
3929            let mut decoder = FrameDecoder::new();
3930            decoder
3931                .reset(compressed.as_slice())
3932                .expect("default None passes");
3933        }
3934
3935        #[test]
3936        fn expect_window_descriptor_mismatch_fails_before_decode() {
3937            // Compress a payload large enough to force a
3938            // multi-segment frame (window_descriptor on wire).
3939            // Default compression at >256 KiB produces multi-
3940            // segment frames with a real window_descriptor byte.
3941            let payload = alloc::vec![0xABu8; 512 * 1024];
3942            let compressed = compress(&payload);
3943
3944            // Read the actual window_descriptor by decoding once
3945            // without expectations, then pin a wrong value.
3946            let mut probe_decoder = FrameDecoder::new();
3947            probe_decoder.reset(compressed.as_slice()).unwrap();
3948            let probe_state = probe_decoder.state.as_ref().unwrap();
3949            let actual_wd = probe_state
3950                .frame_header
3951                .window_descriptor()
3952                .expect("multi-segment frame should expose window_descriptor");
3953            let wrong_wd = actual_wd.wrapping_add(0x10); // bump exponent
3954
3955            let mut decoder = FrameDecoder::new();
3956            decoder.expect_window_descriptor(Some(wrong_wd));
3957            let err = decoder
3958                .reset(compressed.as_slice())
3959                .expect_err("wrong window_descriptor must fail");
3960            match err {
3961                FrameDecoderError::UnexpectedWindowDescriptor { expected, found } => {
3962                    assert_eq!(expected, wrong_wd);
3963                    assert_eq!(found, Some(actual_wd));
3964                }
3965                other => panic!("expected UnexpectedWindowDescriptor, got {other:?}"),
3966            }
3967        }
3968
3969        /// Build a minimal synthetic single-segment zstd frame
3970        /// carrying a 4-byte raw payload. RFC 8878 §3.1.1.1
3971        /// layout, hand-rolled because our default
3972        /// `FrameCompressor` settings don't emit
3973        /// `single_segment_flag` for tiny inputs.
3974        ///
3975        /// Wire bytes (13 total for 4-byte payload):
3976        /// ```text
3977        /// 28 B5 2F FD       magic
3978        /// 20                FHD: single_segment=1, FCS_flag=0
3979        /// 04                FCS (single byte, value = payload.len())
3980        /// 21 00 00          block header: raw, last, size=4
3981        /// .. .. .. ..       payload bytes
3982        /// ```
3983        fn synth_single_segment_frame(payload: &[u8]) -> Vec<u8> {
3984            assert!(payload.len() <= 255, "1-byte FCS field caps at 255");
3985            assert!(payload.len() < (1usize << 21), "block size 21-bit max");
3986            let mut out = Vec::new();
3987            // Magic 0xFD2FB528 LE.
3988            out.extend_from_slice(&0xFD2F_B528u32.to_le_bytes());
3989            // FHD: single_segment_flag (bit 5) set, everything
3990            // else zero. With single_segment + FCS_flag=0 the FCS
3991            // field is 1 byte. No window_descriptor on wire.
3992            out.push(0b0010_0000);
3993            // 1-byte FCS = payload length.
3994            out.push(payload.len() as u8);
3995            // Block header (3 bytes LE):
3996            // last_block=1, block_type=0 (Raw), block_size=payload.len().
3997            // Encoded: (size << 3) | (block_type << 1) | last_block.
3998            // Block header: last_block flag in bit 0, block_type
3999            // (0 = Raw) in bits 1-2, block size in bits 3+.
4000            let bh: u32 = ((payload.len() as u32) << 3) | 1;
4001            out.push((bh & 0xFF) as u8);
4002            out.push(((bh >> 8) & 0xFF) as u8);
4003            out.push(((bh >> 16) & 0xFF) as u8);
4004            // Raw payload.
4005            out.extend_from_slice(payload);
4006            out
4007        }
4008
4009        #[test]
4010        fn expect_window_descriptor_on_single_segment_frame_fails_with_found_none() {
4011            // Single-segment frames omit the window_descriptor
4012            // byte from the wire entirely. Setting an expectation
4013            // here must surface `found: None` so callers
4014            // distinguish "wrong descriptor" from "no descriptor
4015            // on the wire" — never silently pass.
4016            let compressed = synth_single_segment_frame(b"tiny");
4017
4018            // First sanity-check: the synthetic frame decodes
4019            // cleanly without any expectation.
4020            {
4021                let mut probe = FrameDecoder::new();
4022                probe
4023                    .reset(compressed.as_slice())
4024                    .expect("synth frame parses");
4025                let probe_state = probe.state.as_ref().unwrap();
4026                assert!(
4027                    probe_state.frame_header.window_descriptor().is_none(),
4028                    "synth frame must be single-segment"
4029                );
4030            }
4031
4032            let mut decoder = FrameDecoder::new();
4033            decoder.expect_window_descriptor(Some(0x40));
4034            let err = decoder
4035                .reset(compressed.as_slice())
4036                .expect_err("single-segment + expectation must fail");
4037            match err {
4038                FrameDecoderError::UnexpectedWindowDescriptor { expected, found } => {
4039                    assert_eq!(expected, 0x40);
4040                    assert_eq!(found, None);
4041                }
4042                other => panic!("expected UnexpectedWindowDescriptor, got {other:?}"),
4043            }
4044        }
4045
4046        #[test]
4047        fn validation_failure_leaves_decoder_re_resettable() {
4048            // After UnexpectedDictId on a wrong-expectation reset,
4049            // clearing the expectation and re-calling reset must
4050            // succeed on the same source — no lingering failed
4051            // state.
4052            let compressed = compress(b"re-resettable");
4053
4054            let mut decoder = FrameDecoder::new();
4055            decoder.expect_dict_id(Some(42));
4056            let err = decoder
4057                .reset(compressed.as_slice())
4058                .expect_err("first reset fails");
4059            assert!(matches!(err, FrameDecoderError::UnexpectedDictId { .. }));
4060
4061            // Clear expectation and retry on a fresh source.
4062            decoder.expect_dict_id(None);
4063            decoder
4064                .reset(compressed.as_slice())
4065                .expect("retry after clearing expectation should succeed");
4066        }
4067    }
4068
4069    /// Build a skippable frame on the wire: 4-byte LE magic + 4-byte LE
4070    /// length + payload bytes. RFC 8878 §3.1.2 restricts the magic
4071    /// variant to `0..=15`; assert here so accidental misuse of the
4072    /// helper can't smuggle a non-skippable magic past the tests.
4073    #[cfg(feature = "lsm")]
4074    fn build_skippable_frame(variant: u8, payload: &[u8]) -> Vec<u8> {
4075        assert!(
4076            variant <= 15,
4077            "skippable-frame variant {variant} outside RFC 8878 0..=15 range",
4078        );
4079        let mut out = Vec::with_capacity(8 + payload.len());
4080        let magic: u32 = 0x184D2A50 + u32::from(variant);
4081        out.extend_from_slice(&magic.to_le_bytes());
4082        out.extend_from_slice(&u32::try_from(payload.len()).unwrap().to_le_bytes());
4083        out.extend_from_slice(payload);
4084        out
4085    }
4086
4087    #[cfg(feature = "lsm")]
4088    #[test]
4089    fn decode_all_with_skippable_visitor_sees_payloads_in_order() {
4090        // Build a stream: skippable(v0, "alpha") + zstd_frame +
4091        // skippable(v3, "beta") + zstd_frame + skippable(v15, "")
4092        // and verify the visitor is invoked exactly three times with
4093        // the correct (variant, payload) pairs in stream order while
4094        // the zstd frames decode normally.
4095        let payload_a: Vec<u8> = (0..256u16).map(|i| i as u8).collect();
4096        let payload_b: Vec<u8> = (0..256u16).map(|i| (i ^ 0xAA) as u8).collect();
4097
4098        let mut comp_a = Vec::new();
4099        let mut c = FrameCompressor::new(CompressionLevel::Default);
4100        c.set_source(payload_a.as_slice());
4101        c.set_drain(&mut comp_a);
4102        c.compress();
4103
4104        let mut comp_b = Vec::new();
4105        let mut c = FrameCompressor::new(CompressionLevel::Default);
4106        c.set_source(payload_b.as_slice());
4107        c.set_drain(&mut comp_b);
4108        c.compress();
4109
4110        let skip0 = build_skippable_frame(0, b"alpha");
4111        let skip3 = build_skippable_frame(3, b"beta");
4112        let skip15 = build_skippable_frame(15, &[]);
4113
4114        let mut stream = Vec::new();
4115        stream.extend_from_slice(&skip0);
4116        stream.extend_from_slice(&comp_a);
4117        stream.extend_from_slice(&skip3);
4118        stream.extend_from_slice(&comp_b);
4119        stream.extend_from_slice(&skip15);
4120
4121        let mut decoder = FrameDecoder::new();
4122        let mut out = alloc::vec![0u8; payload_a.len() + payload_b.len()];
4123        let mut collected: Vec<(u8, Vec<u8>)> = Vec::new();
4124        let n = decoder
4125            .decode_all_with_skippable_visitor(stream.as_slice(), &mut out, |variant, payload| {
4126                collected.push((variant, payload.to_vec()));
4127            })
4128            .expect("decode_all_with_skippable_visitor should succeed");
4129
4130        // All three skippables visited in stream order.
4131        assert_eq!(collected.len(), 3);
4132        assert_eq!(collected[0], (0u8, b"alpha".to_vec()));
4133        assert_eq!(collected[1], (3u8, b"beta".to_vec()));
4134        assert_eq!(collected[2], (15u8, Vec::<u8>::new()));
4135
4136        // Both zstd frames decoded into `out` back-to-back.
4137        assert_eq!(n, payload_a.len() + payload_b.len());
4138        assert_eq!(&out[..payload_a.len()], payload_a.as_slice());
4139        assert_eq!(&out[payload_a.len()..n], payload_b.as_slice());
4140    }
4141
4142    #[cfg(feature = "lsm")]
4143    #[test]
4144    fn decode_all_silently_skips_when_no_visitor() {
4145        // Regression gate: plain decode_all must still silently skip
4146        // skippable frames (RFC 8878 mandated behavior) with no
4147        // behavioral change after the visitor refactor.
4148        let payload: Vec<u8> = (0..512u16).map(|i| i as u8).collect();
4149        let mut comp = Vec::new();
4150        let mut c = FrameCompressor::new(CompressionLevel::Default);
4151        c.set_source(payload.as_slice());
4152        c.set_drain(&mut comp);
4153        c.compress();
4154
4155        let skip = build_skippable_frame(7, b"ignored sidecar");
4156        let mut stream = Vec::new();
4157        stream.extend_from_slice(&skip);
4158        stream.extend_from_slice(&comp);
4159
4160        let mut decoder = FrameDecoder::new();
4161        let mut out = alloc::vec![0u8; payload.len()];
4162        let n = decoder
4163            .decode_all(stream.as_slice(), &mut out)
4164            .expect("decode_all should succeed on skippable + zstd stream");
4165        assert_eq!(n, payload.len());
4166        assert_eq!(&out[..n], payload.as_slice());
4167    }
4168
4169    #[cfg(feature = "lsm")]
4170    #[test]
4171    fn frame_emit_info_describes_emitted_block_layout() {
4172        // Encode a payload large enough to force >1 block, fetch
4173        // FrameEmitInfo, walk blocks[] and verify each block's
4174        // (offset_in_frame, header_size, body_size) matches the bytes
4175        // actually emitted into the drain buffer.
4176        let payload: Vec<u8> = (0..200_000u32).map(|i| (i & 0xFF) as u8).collect();
4177        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
4178        // Content checksum is opt-in (library default mirrors libzstd's
4179        // checksum-off); request it so the checksum_range assertion below
4180        // exercises the hash-gated trailer accounting.
4181        compressor.set_content_checksum(true);
4182        compressor.set_source(payload.as_slice());
4183        let mut compressed = Vec::new();
4184        compressor.set_drain(&mut compressed);
4185        compressor.compress();
4186
4187        let info = compressor
4188            .last_frame_emit_info()
4189            .expect("last_frame_emit_info populated after compress")
4190            .clone();
4191        drop(compressor);
4192
4193        // Frame header range starts at 0 and is non-empty.
4194        assert_eq!(info.frame_header_range.start, 0);
4195        assert!(info.frame_header_range.end > 0);
4196        // Total size matches what was written to the drain.
4197        assert_eq!(info.total_size as usize, compressed.len());
4198        // At least one block, and the last entry has last_block=true.
4199        assert!(!info.blocks.is_empty());
4200        assert!(info.blocks.last().unwrap().last_block);
4201        // All non-final blocks have last_block=false.
4202        for b in &info.blocks[..info.blocks.len() - 1] {
4203            assert!(!b.last_block);
4204        }
4205        // Walk and verify each block's header bytes match the
4206        // recorded type / size by re-decoding the 3-byte header.
4207        // Walking arithmetic: offset_in_frame + header_size + body_size
4208        // must land exactly on the next block's offset_in_frame (or,
4209        // for the last block, on the checksum / end of frame).
4210        for (i, b) in info.blocks.iter().enumerate() {
4211            let off = b.offset_in_frame as usize;
4212            assert_eq!(b.header_size, 3);
4213            let mut hdr = [0u8; 4];
4214            hdr[..3].copy_from_slice(&compressed[off..off + 3]);
4215            let raw = u32::from_le_bytes(hdr);
4216            let last = (raw & 1) != 0;
4217            let ty = (raw >> 1) & 0b11;
4218            let sz = raw >> 3;
4219            assert_eq!(last, b.last_block);
4220            assert_eq!(sz, b.block_size_field);
4221            // body_size is the PHYSICAL length on the wire: spec's
4222            // Block_Size for Raw/Compressed, always 1 for RLE.
4223            let expected_physical = match b.block_type {
4224                crate::encoding::frame_emit_info::BlockType::RLE => 1,
4225                _ => sz,
4226            };
4227            assert_eq!(b.body_size, expected_physical);
4228            let expected_ty = match b.block_type {
4229                crate::encoding::frame_emit_info::BlockType::Raw => 0,
4230                crate::encoding::frame_emit_info::BlockType::RLE => 1,
4231                crate::encoding::frame_emit_info::BlockType::Compressed => 2,
4232                crate::encoding::frame_emit_info::BlockType::Reserved => 3,
4233            };
4234            assert_eq!(ty, expected_ty);
4235            // Walking-arithmetic invariant.
4236            let next_off = b.offset_in_frame + b.header_size as u32 + b.body_size;
4237            if let Some(next) = info.blocks.get(i + 1) {
4238                assert_eq!(
4239                    next_off, next.offset_in_frame,
4240                    "block {i} body_size doesn't reach next block's offset_in_frame",
4241                );
4242            } else if let Some(cs) = info.checksum_range.as_ref() {
4243                assert_eq!(
4244                    next_off, cs.start,
4245                    "last block body_size doesn't reach checksum_range.start",
4246                );
4247            } else {
4248                assert_eq!(
4249                    next_off, info.total_size,
4250                    "last block body_size doesn't reach total_size",
4251                );
4252            }
4253        }
4254        // Checksum range present iff `feature = "hash"` is enabled.
4255        assert_eq!(info.checksum_range.is_some(), cfg!(feature = "hash"));
4256    }
4257
4258    #[cfg(all(feature = "lsm", feature = "hash"))]
4259    #[test]
4260    fn per_block_checksum_round_trip() {
4261        // Encode with per-block checksums enabled. Decode with
4262        // per-block verification. Both sides emit exactly 1
4263        // checksum per physical block written to / read from the
4264        // wire (encoder hashes per emission site, including each
4265        // post-split partition; decoder hashes each decoded block).
4266        // Cardinality and element-wise contents must match
4267        // round-trip.
4268        let payload: Vec<u8> = (0..200_000u32).map(|i| (i & 0xFF) as u8).collect();
4269        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
4270        compressor.set_source(payload.as_slice());
4271        compressor.enable_per_block_checksums();
4272        let mut compressed = Vec::new();
4273        compressor.set_drain(&mut compressed);
4274        compressor.compress();
4275
4276        let encoder_checksums = compressor
4277            .last_frame_block_checksums()
4278            .expect("checksums populated after enable + compress")
4279            .to_vec();
4280        drop(compressor);
4281        assert!(!encoder_checksums.is_empty());
4282
4283        // Decode side: enable verification, decode, compare.
4284        let mut decoder = FrameDecoder::new();
4285        decoder.enable_per_block_checksums();
4286        let mut output = alloc::vec![0u8; payload.len()];
4287        let n = decoder
4288            .decode_all(compressed.as_slice(), &mut output)
4289            .expect("decode_all should succeed");
4290        assert_eq!(n, payload.len());
4291        assert_eq!(&output[..n], payload.as_slice());
4292
4293        let decoder_checksums = decoder.computed_block_checksums();
4294        assert_eq!(decoder_checksums, encoder_checksums.as_slice());
4295    }
4296
4297    // ── decode_blocks_partial (block-subset partial decode, lsm) ──
4298
4299    /// Build a multi-block compressible frame and return
4300    /// `(compressed, full_decode, emit_info)`. The emit info's
4301    /// `decompressed_byte_range` maps decompressed offsets to block indices.
4302    #[cfg(feature = "lsm")]
4303    fn multi_block_fixture() -> (
4304        Vec<u8>,
4305        Vec<u8>,
4306        crate::encoding::frame_emit_info::FrameEmitInfo,
4307    ) {
4308        let mut data: Vec<u8> = Vec::with_capacity(400 * 1024);
4309        let mut x = 0x9E37_79B9u32;
4310        while data.len() < 400 * 1024 {
4311            x ^= x << 13;
4312            x ^= x >> 17;
4313            x ^= x << 5;
4314            let run = 16 + (x as usize % 48);
4315            let byte = (x >> 24) as u8;
4316            for _ in 0..run {
4317                data.push(byte);
4318            }
4319            data.extend_from_slice(b"the quick brown fox jumps over the lazy dog\n");
4320        }
4321
4322        let mut compressed = Vec::new();
4323        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
4324        compressor.set_source(data.as_slice());
4325        compressor.set_drain(&mut compressed);
4326        compressor.compress();
4327        let info = compressor
4328            .last_frame_emit_info()
4329            .expect("emit info populated")
4330            .clone();
4331        drop(compressor);
4332
4333        let mut dec = FrameDecoder::new();
4334        let mut full = alloc::vec![0u8; data.len()];
4335        let n = dec
4336            .decode_all(compressed.as_slice(), &mut full)
4337            .expect("full decode");
4338        full.truncate(n);
4339        assert_eq!(full, data, "fixture must round-trip");
4340        (compressed, full, info)
4341    }
4342
4343    #[cfg(feature = "lsm")]
4344    #[test]
4345    fn decode_blocks_partial_subset_matches_full_decode() {
4346        let (compressed, full, info) = multi_block_fixture();
4347        let nblocks = info.blocks.len() as u32;
4348        assert!(
4349            nblocks >= 4,
4350            "fixture must have several blocks, got {nblocks}"
4351        );
4352        let half = nblocks / 2;
4353        // Boundaries: 1 block, 2 blocks, half, all, and a non-zero start.
4354        // `(0, u32::MAX)` exercises the "decode to end of frame" sentinel,
4355        // a distinct public contract from an explicit upper bound.
4356        for &(s, e) in &[
4357            (0u32, u32::MAX),
4358            (0, 1),
4359            (0, 2),
4360            (0, half),
4361            (0, nblocks),
4362            (1, 2),
4363            (half, nblocks),
4364        ] {
4365            // The sentinel decodes through the last block; map it to nblocks
4366            // for the expected-slice / block-count arithmetic below.
4367            let effective_end = if e == u32::MAX { nblocks } else { e };
4368            let mut source = compressed.as_slice();
4369            let mut dec = FrameDecoder::new();
4370            dec.reset(&mut source).unwrap();
4371            let pd = dec
4372                .decode_blocks_partial(&mut source, s, e, None, false)
4373                .unwrap_or_else(|err| panic!("range [{s},{e}) errored: {err:?}"));
4374
4375            let start = info.decompressed_byte_range(s as usize).unwrap().start as usize;
4376            let end = info
4377                .decompressed_byte_range((effective_end - 1) as usize)
4378                .unwrap()
4379                .end as usize;
4380            assert_eq!(
4381                pd.data.as_slice(),
4382                &full[start..end],
4383                "subset bytes must equal the full-decode slice for [{s},{e})"
4384            );
4385            assert_eq!(pd.start_block, s);
4386            assert_eq!(pd.blocks_decoded, effective_end - s);
4387            assert!(pd.stopped_at.is_none(), "clean range [{s},{e})");
4388        }
4389    }
4390
4391    #[cfg(feature = "lsm")]
4392    #[test]
4393    fn decode_blocks_partial_recovers_clean_prefix_on_truncated_block() {
4394        let (compressed, full, info) = multi_block_fixture();
4395        let nblocks = info.blocks.len();
4396        let k = nblocks / 2;
4397        assert!(k >= 1, "need a clean prefix before the failing block");
4398
4399        // Truncate the source right after block k's 3-byte header, so its body
4400        // read fails regardless of block type (0 body bytes available).
4401        let cut = info.blocks[k].offset_in_frame as usize + info.blocks[k].header_size as usize;
4402        let truncated = &compressed[..cut];
4403
4404        let mut source = truncated;
4405        let mut dec = FrameDecoder::new();
4406        dec.reset(&mut source).unwrap();
4407        let pd = dec
4408            .decode_blocks_partial(&mut source, 0, u32::MAX, None, false)
4409            .unwrap();
4410
4411        let (idx, _err) = pd.stopped_at.expect("must stop on the truncated block");
4412        assert_eq!(idx, k as u32, "stopped at the truncated block index");
4413        assert_eq!(pd.blocks_decoded, k as u32, "blocks 0..k decoded cleanly");
4414        assert!(!pd.frame_finished);
4415        let clean_end = info.decompressed_byte_range(k).unwrap().start as usize;
4416        assert_eq!(
4417            pd.data.as_slice(),
4418            &full[..clean_end],
4419            "clean prefix preserved through the failure"
4420        );
4421    }
4422
4423    #[cfg(feature = "lsm")]
4424    #[test]
4425    fn decode_blocks_partial_invalid_range_errors() {
4426        let (compressed, _full, _info) = multi_block_fixture();
4427        let mut source = compressed.as_slice();
4428        let mut dec = FrameDecoder::new();
4429        dec.reset(&mut source).unwrap();
4430        let err = dec
4431            .decode_blocks_partial(&mut source, 5, 2, None, false)
4432            .expect_err("start > end must error");
4433        assert!(matches!(
4434            err,
4435            crate::decoding::errors::FrameDecoderError::InvalidBlockRange {
4436                start_block: 5,
4437                end_block: 2,
4438            }
4439        ));
4440    }
4441
4442    #[cfg(feature = "lsm")]
4443    #[test]
4444    fn decode_blocks_partial_skips_trailing_blocks() {
4445        let (compressed, full, info) = multi_block_fixture();
4446        assert!(info.blocks.len() >= 3);
4447        let mut source = compressed.as_slice();
4448        let mut dec = FrameDecoder::new();
4449        dec.reset(&mut source).unwrap();
4450        let pd = dec
4451            .decode_blocks_partial(&mut source, 0, 1, None, false)
4452            .unwrap();
4453
4454        assert_eq!(pd.blocks_decoded, 1);
4455        assert!(pd.stopped_at.is_none());
4456        assert!(!pd.frame_finished, "block 0 is not the last block");
4457        let end = info.decompressed_byte_range(0).unwrap().end as usize;
4458        assert_eq!(pd.data.as_slice(), &full[..end]);
4459        // The trailing blocks + checksum were never consumed from the source.
4460        assert!(
4461            dec.bytes_read_from_source() < u64::from(info.total_size),
4462            "only block 0's region should be consumed, read {} of {}",
4463            dec.bytes_read_from_source(),
4464            info.total_size
4465        );
4466    }
4467
4468    #[cfg(feature = "lsm")]
4469    #[test]
4470    fn lsm_style_range_query_partial_recovery() {
4471        // Simulates lsm-tree's range-query path: a key range resolves to a
4472        // decompressed byte window, which maps to inner zstd block indices via
4473        // `decompressed_byte_range`; decode only the covering blocks and check
4474        // the wanted window is recovered exactly (no key outside, all inside).
4475        let (compressed, full, info) = multi_block_fixture();
4476        let total = full.len() as u64;
4477        let want_start = total / 3;
4478        let want_end = (total * 2) / 3;
4479
4480        // Map [want_start, want_end) to covering block indices.
4481        let nblocks = info.blocks.len();
4482        let mut start_block = 0u32;
4483        let mut end_block = nblocks as u32;
4484        for i in 0..nblocks {
4485            let r = info.decompressed_byte_range(i).unwrap();
4486            if r.start <= want_start && want_start < r.end {
4487                start_block = i as u32;
4488            }
4489            if r.start < want_end && want_end <= r.end {
4490                end_block = i as u32 + 1;
4491                break;
4492            }
4493        }
4494
4495        let mut source = compressed.as_slice();
4496        let mut dec = FrameDecoder::new();
4497        dec.reset(&mut source).unwrap();
4498        let pd = dec
4499            .decode_blocks_partial(&mut source, start_block, end_block, None, false)
4500            .unwrap();
4501        assert!(pd.stopped_at.is_none());
4502
4503        let covered_start = info
4504            .decompressed_byte_range(start_block as usize)
4505            .unwrap()
4506            .start;
4507        let covered_end = info
4508            .decompressed_byte_range((end_block - 1) as usize)
4509            .unwrap()
4510            .end;
4511        assert!(
4512            covered_start <= want_start && want_end <= covered_end,
4513            "covering blocks must contain the wanted window"
4514        );
4515        assert_eq!(
4516            pd.data.as_slice(),
4517            &full[covered_start as usize..covered_end as usize],
4518            "covered subset must equal the full-decode slice"
4519        );
4520        // Slice the exact key range out of the covered subset.
4521        let off = (want_start - covered_start) as usize;
4522        let len = (want_end - want_start) as usize;
4523        assert_eq!(
4524            &pd.data[off..off + len],
4525            &full[want_start as usize..want_end as usize],
4526            "exact key range recovered from the partial decode"
4527        );
4528    }
4529
4530    #[cfg(feature = "lsm")]
4531    #[test]
4532    fn decode_blocks_partial_leaves_no_residual_when_no_in_range_block() {
4533        // Regression: when the requested range reaches no in-range block (here
4534        // start_block is past EOF, so every block is decoded only as window
4535        // context), `PartialDecode::data` is empty — but the context bytes must
4536        // NOT linger in the decoder buffer, or a later collect()/read() on the
4537        // same decoder returns out-of-range data.
4538        let (compressed, _full, info) = multi_block_fixture();
4539        let nblocks = info.blocks.len() as u32;
4540        let mut source = compressed.as_slice();
4541        let mut dec = FrameDecoder::new();
4542        dec.reset(&mut source).unwrap();
4543        let pd = dec
4544            .decode_blocks_partial(&mut source, nblocks + 5, u32::MAX, None, false)
4545            .unwrap();
4546        assert!(pd.data.is_empty(), "no in-range block → empty data");
4547        assert_eq!(pd.blocks_decoded, 0);
4548        assert!(
4549            pd.frame_finished,
4550            "frame's last block was reached as context"
4551        );
4552        assert_eq!(
4553            dec.can_collect(),
4554            0,
4555            "context bytes must not leak via collect()/read() when data is empty"
4556        );
4557    }
4558
4559    #[cfg(feature = "lsm")]
4560    #[test]
4561    fn decode_blocks_partial_empty_range_leaves_no_residual() {
4562        // Companion to the start-past-EOF case: an in-frame empty range `[k, k)`
4563        // (k < EOF) takes the same `prefix_window_len == None` path but with
4564        // `frame_finished == false` and up to `window_size` context bytes still
4565        // physically present. Assert the buffer is fully cleared directly (a
4566        // `can_collect()` check alone would pass even with <= window_size bytes
4567        // retained, because it holds the window back).
4568        let (compressed, _full, info) = multi_block_fixture();
4569        let k = ((info.blocks.len() as u32) / 2).max(1);
4570        let mut source = compressed.as_slice();
4571        let mut dec = FrameDecoder::new();
4572        dec.reset(&mut source).unwrap();
4573        let pd = dec
4574            .decode_blocks_partial(&mut source, k, k, None, false)
4575            .unwrap();
4576
4577        assert!(pd.data.is_empty(), "empty range must yield empty data");
4578        assert_eq!(pd.blocks_decoded, 0);
4579        assert!(
4580            !pd.frame_finished,
4581            "frame should still have trailing blocks"
4582        );
4583        assert_eq!(
4584            dec.state.as_ref().unwrap().decoder_scratch.buffer_len(),
4585            0,
4586            "empty-range partial decode must not retain context bytes"
4587        );
4588    }
4589
4590    #[cfg(all(feature = "lsm", feature = "hash"))]
4591    #[test]
4592    fn decode_blocks_partial_captures_per_block_checksums() {
4593        // Regression: with per-block checksums enabled, decode_blocks_partial
4594        // must populate computed_block_checksums just like decode_blocks /
4595        // decode_all — otherwise callers verifying per-block digests silently
4596        // lose them on the partial path.
4597        let (compressed, full, _info) = multi_block_fixture();
4598
4599        // Reference digests via decode_blocks (the path that captures them).
4600        let mut ref_dec = FrameDecoder::new();
4601        ref_dec.enable_per_block_checksums();
4602        let mut rsrc = compressed.as_slice();
4603        ref_dec.reset(&mut rsrc).unwrap();
4604        while !ref_dec.is_finished() {
4605            ref_dec
4606                .decode_blocks(&mut rsrc, crate::decoding::BlockDecodingStrategy::All)
4607                .unwrap();
4608        }
4609        let expected = ref_dec.computed_block_checksums().to_vec();
4610        assert!(!expected.is_empty(), "fixture must have multiple blocks");
4611        let _ = full;
4612
4613        // Partial decode of the whole frame must capture the same digests.
4614        let mut source = compressed.as_slice();
4615        let mut dec = FrameDecoder::new();
4616        dec.enable_per_block_checksums();
4617        dec.reset(&mut source).unwrap();
4618        let _ = dec
4619            .decode_blocks_partial(&mut source, 0, u32::MAX, None, false)
4620            .unwrap();
4621        assert_eq!(
4622            dec.computed_block_checksums(),
4623            expected.as_slice(),
4624            "partial decode must capture the same per-block checksums as full decode"
4625        );
4626    }
4627
4628    // ── resume (window-priming + entropy cold resume, lsm) ───────────
4629
4630    /// Window size of `compressed`'s frame, read from a freshly-reset decoder.
4631    #[cfg(feature = "lsm")]
4632    fn frame_window_size(compressed: &[u8]) -> usize {
4633        let mut src = compressed;
4634        let mut dec = FrameDecoder::new();
4635        dec.reset(&mut src).unwrap();
4636        dec.state
4637            .as_ref()
4638            .unwrap()
4639            .frame_header
4640            .window_size()
4641            .unwrap_or(0) as usize
4642    }
4643
4644    /// Build a large compressible MULTI-SEGMENT frame (window_size < content,
4645    /// so mid-frame blocks reach back only into a bounded window) and return
4646    /// `(compressed, full_decode, emit_info)`.
4647    #[cfg(feature = "lsm")]
4648    fn multi_segment_block_fixture() -> (
4649        Vec<u8>,
4650        Vec<u8>,
4651        crate::encoding::frame_emit_info::FrameEmitInfo,
4652    ) {
4653        // ~3 MiB of compressible (runs + repeated phrase) data — large enough
4654        // that the encoder picks window_size < content_size (multi-segment).
4655        let mut data: Vec<u8> = Vec::with_capacity(3 * 1024 * 1024);
4656        let mut x = 0x9E37_79B9u32;
4657        while data.len() < 3 * 1024 * 1024 {
4658            x ^= x << 13;
4659            x ^= x >> 17;
4660            x ^= x << 5;
4661            let run = 16 + (x as usize % 48);
4662            let byte = (x >> 24) as u8;
4663            for _ in 0..run {
4664                data.push(byte);
4665            }
4666            data.extend_from_slice(b"the quick brown fox jumps over the lazy dog\n");
4667        }
4668
4669        let mut compressed = Vec::new();
4670        let mut compressor = FrameCompressor::new(CompressionLevel::Default);
4671        compressor.set_source(data.as_slice());
4672        compressor.set_drain(&mut compressed);
4673        compressor.compress();
4674        let info = compressor
4675            .last_frame_emit_info()
4676            .expect("emit info populated")
4677            .clone();
4678        drop(compressor);
4679
4680        // Confirm the precondition: the frame must be multi-segment.
4681        let mut sanity = FrameDecoder::new();
4682        sanity.init(&mut compressed.as_slice()).unwrap();
4683        assert!(
4684            !sanity
4685                .state
4686                .as_ref()
4687                .unwrap()
4688                .frame_header
4689                .descriptor
4690                .single_segment_flag(),
4691            "fixture precondition: frame must be multi-segment (resize if encoder default changed)"
4692        );
4693
4694        let mut dec = FrameDecoder::new();
4695        let mut full = alloc::vec![0u8; data.len()];
4696        let n = dec
4697            .decode_all(compressed.as_slice(), &mut full)
4698            .expect("full decode");
4699        full.truncate(n);
4700        assert_eq!(full, data, "fixture must round-trip");
4701        (compressed, full, info)
4702    }
4703
4704    /// Emit a [`ResumeState`] for resuming at block `n` by decoding `[0, n)` on
4705    /// a throwaway decoder with `emit_resume = true`.
4706    #[cfg(feature = "lsm")]
4707    fn emit_resume_state_at(compressed: &[u8], n: u32) -> super::ResumeState {
4708        let mut src = compressed;
4709        let mut dec = FrameDecoder::new();
4710        dec.reset(&mut src).unwrap();
4711        let pd = dec
4712            .decode_blocks_partial(&mut src, 0, n, None, true)
4713            .expect("prefix decode for resume-state emission");
4714        pd.resume_state
4715            .expect("emit_resume should populate resume_state")
4716    }
4717
4718    #[cfg(feature = "lsm")]
4719    #[test]
4720    fn resume_matches_full_decode_at_first_mid_last() {
4721        // Acceptance criterion: after resuming at block N (cold decoder, primed
4722        // window + restored entropy), decode_blocks_partial yields bytes
4723        // byte-identical to a full decode's [ends[N-1]..ends[end-1]) slice, for
4724        // N in {1, mid, last}. Repeat_Mode entropy blocks are covered because
4725        // the emitted ResumeState carries the carry-over tables.
4726        let (compressed, full, info) = multi_block_fixture();
4727        let nblocks = info.blocks.len() as u32;
4728        assert!(nblocks >= 4, "need several blocks, got {nblocks}");
4729
4730        for &n in &[1u32, nblocks / 2, nblocks - 1] {
4731            // Producer: emit resume state for block n (separate decoder).
4732            let st = emit_resume_state_at(&compressed, n);
4733            assert_eq!(st.block_index(), n);
4734            let output_offset = info.decompressed_byte_range(n as usize).unwrap().start;
4735            assert_eq!(st.output_offset(), output_offset);
4736
4737            // Consumer: a FRESH (cold) decoder resumes at n. Pass the WHOLE
4738            // decompressed prefix as window_prime; it is capped to one window
4739            // internally, exercising the cap path.
4740            let window_prime = &full[..output_offset as usize];
4741            let mut header_src = compressed.as_slice();
4742            let mut dec = FrameDecoder::new();
4743            dec.reset(&mut header_src).unwrap();
4744            // Caller positions the source at block n's compressed frame offset.
4745            let off = info.blocks[n as usize].offset_in_frame as usize;
4746            let mut block_src = &compressed[off..];
4747            let pd = dec
4748                .decode_blocks_partial(
4749                    &mut block_src,
4750                    n,
4751                    u32::MAX,
4752                    Some(super::ResumeInput {
4753                        window_prime,
4754                        state: &st,
4755                    }),
4756                    false,
4757                )
4758                .unwrap_or_else(|e| panic!("resume decode at N={n} errored: {e:?}"));
4759
4760            let start = output_offset as usize;
4761            let end = info
4762                .decompressed_byte_range((nblocks - 1) as usize)
4763                .unwrap()
4764                .end as usize;
4765            assert_eq!(
4766                pd.data.as_slice(),
4767                &full[start..end],
4768                "resumed bytes must equal the full-decode slice for N={n}"
4769            );
4770            assert_eq!(pd.start_block, n);
4771            assert_eq!(pd.blocks_decoded, nblocks - n);
4772            assert!(pd.stopped_at.is_none(), "clean resume at N={n}");
4773            assert!(pd.frame_finished, "decoded through the last block");
4774        }
4775    }
4776
4777    #[cfg(feature = "lsm")]
4778    #[test]
4779    fn resume_with_exact_window_tail_matches_full_decode() {
4780        // Realistic cold-resume shape on a MULTI-SEGMENT frame: caller supplies
4781        // only the last `window_size` decompressed bytes (not the whole prefix),
4782        // which is all that can ever back a match.
4783        let (compressed, full, info) = multi_segment_block_fixture();
4784        let nblocks = info.blocks.len() as u32;
4785        let window_size = frame_window_size(&compressed);
4786        // First block whose preceding output exceeds one window, so the tail
4787        // genuinely truncates the prefix.
4788        let n = (1..nblocks)
4789            .find(|&i| {
4790                info.decompressed_byte_range(i as usize).unwrap().start as usize > window_size
4791            })
4792            .expect("multi-segment frame must have a block past one window");
4793        let st = emit_resume_state_at(&compressed, n);
4794        let output_offset = info.decompressed_byte_range(n as usize).unwrap().start;
4795        assert!(output_offset as usize > window_size);
4796        let tail_start = output_offset as usize - window_size;
4797        let window_prime = &full[tail_start..output_offset as usize];
4798
4799        let mut header_src = compressed.as_slice();
4800        let mut dec = FrameDecoder::new();
4801        dec.reset(&mut header_src).unwrap();
4802        let off = info.blocks[n as usize].offset_in_frame as usize;
4803        let mut block_src = &compressed[off..];
4804        let pd = dec
4805            .decode_blocks_partial(
4806                &mut block_src,
4807                n,
4808                u32::MAX,
4809                Some(super::ResumeInput {
4810                    window_prime,
4811                    state: &st,
4812                }),
4813                false,
4814            )
4815            .unwrap();
4816
4817        let end = info
4818            .decompressed_byte_range((nblocks - 1) as usize)
4819            .unwrap()
4820            .end as usize;
4821        assert_eq!(pd.data.as_slice(), &full[output_offset as usize..end]);
4822        assert_eq!(pd.blocks_decoded, nblocks - n);
4823    }
4824
4825    #[cfg(feature = "lsm")]
4826    #[test]
4827    fn resume_rejects_short_window_prime() {
4828        // Acceptance criterion: a window_prime shorter than the required window
4829        // is rejected with a typed error, not a silent mis-decode.
4830        let (compressed, full, info) = multi_block_fixture();
4831        let nblocks = info.blocks.len() as u32;
4832        let window_size = frame_window_size(&compressed);
4833        let n = nblocks / 2;
4834        let st = emit_resume_state_at(&compressed, n);
4835        let output_offset = info.decompressed_byte_range(n as usize).unwrap().start;
4836        let required = core::cmp::min(window_size as u64, output_offset) as usize;
4837        assert!(required > 0, "mid block must require a non-empty window");
4838
4839        // One byte short of the required window.
4840        let prime = &full[output_offset as usize - (required - 1)..output_offset as usize];
4841
4842        let mut header_src = compressed.as_slice();
4843        let mut dec = FrameDecoder::new();
4844        dec.reset(&mut header_src).unwrap();
4845        let off = info.blocks[n as usize].offset_in_frame as usize;
4846        let mut block_src = &compressed[off..];
4847        let err = dec
4848            .decode_blocks_partial(
4849                &mut block_src,
4850                n,
4851                u32::MAX,
4852                Some(super::ResumeInput {
4853                    window_prime: prime,
4854                    state: &st,
4855                }),
4856                false,
4857            )
4858            .expect_err("short window_prime must be rejected");
4859        match err {
4860            crate::decoding::errors::FrameDecoderError::ResumeWindowTooShort { got, need } => {
4861                assert_eq!(got, required - 1);
4862                assert_eq!(need, required);
4863            }
4864            other => panic!("expected ResumeWindowTooShort, got {other:?}"),
4865        }
4866    }
4867
4868    #[cfg(feature = "lsm")]
4869    #[test]
4870    fn resume_range_validates_against_effective_start_not_start_block() {
4871        // In resume mode `start_block` is ignored and decoding begins at
4872        // `state.block_index()`. The range guard must therefore validate the
4873        // EFFECTIVE start against `end_block`: `end_block` below the resume
4874        // block is an inverted range and must error, not silently return an
4875        // empty decode. Caller passes the conventional ignored `start_block = 0`.
4876        let (compressed, _full, info) = multi_block_fixture();
4877        let nblocks = info.blocks.len() as u32;
4878        let n = (nblocks / 2).max(2);
4879        let st = emit_resume_state_at(&compressed, n);
4880        let output_offset = info.decompressed_byte_range(n as usize).unwrap().start;
4881
4882        let mut header_src = compressed.as_slice();
4883        let mut dec = FrameDecoder::new();
4884        dec.reset(&mut header_src).unwrap();
4885        let off = info.blocks[n as usize].offset_in_frame as usize;
4886        let mut block_src = &compressed[off..];
4887        // end_block = n - 1 is below the resume block n → inverted range.
4888        let err = dec
4889            .decode_blocks_partial(
4890                &mut block_src,
4891                0,
4892                n - 1,
4893                Some(super::ResumeInput {
4894                    window_prime: &_full[..output_offset as usize],
4895                    state: &st,
4896                }),
4897                false,
4898            )
4899            .expect_err("end_block below the resume block must be an inverted range");
4900        match err {
4901            crate::decoding::errors::FrameDecoderError::InvalidBlockRange {
4902                start_block,
4903                end_block,
4904            } => {
4905                assert_eq!(start_block, n, "error must report the effective start");
4906                assert_eq!(end_block, n - 1);
4907            }
4908            other => panic!("expected InvalidBlockRange, got {other:?}"),
4909        }
4910    }
4911
4912    #[cfg(feature = "lsm")]
4913    #[test]
4914    fn resume_rejects_state_from_a_different_frame() {
4915        // A ResumeState captured from one frame must not be applied to a frame
4916        // with a different decode shape (window size / single-segment / dict):
4917        // restoring foreign entropy tables would yield byte-wrong output. The
4918        // frame-identity guard must reject it up front with a typed error.
4919        let (frame_a, _full_a, info_a) = multi_block_fixture();
4920        let (frame_b, full_b, _info_b) = multi_segment_block_fixture();
4921        // Sanity: the two fixtures must differ in decode shape for the guard to
4922        // be exercised (single-segment vs multi-segment here).
4923        let st = emit_resume_state_at(&frame_a, (info_a.blocks.len() as u32 / 2).max(1));
4924
4925        let mut header_src = frame_b.as_slice();
4926        let mut dec = FrameDecoder::new();
4927        dec.reset(&mut header_src).unwrap();
4928        // The frame-key check runs before the window-length check, so even a
4929        // valid-length window_prime for frame B is rejected on identity.
4930        let err = dec
4931            .decode_blocks_partial(
4932                &mut frame_b.as_slice(),
4933                st.block_index(),
4934                u32::MAX,
4935                Some(super::ResumeInput {
4936                    window_prime: &full_b,
4937                    state: &st,
4938                }),
4939                false,
4940            )
4941            .expect_err("resume state from a different frame must be rejected");
4942        assert!(
4943            matches!(
4944                err,
4945                crate::decoding::errors::FrameDecoderError::ResumeFrameMismatch
4946            ),
4947            "expected ResumeFrameMismatch, got {err:?}"
4948        );
4949    }
4950
4951    #[cfg(all(feature = "lsm", feature = "hash"))]
4952    #[test]
4953    fn resume_rejects_wrong_window_prime_content() {
4954        // Same frame (FrameKey matches) but the caller supplies a window_prime
4955        // with one byte flipped. The shape key cannot catch this; the
4956        // content-exact XXH64 of the window must, rejecting before any restore
4957        // rather than mis-resolving matches against corrupted history.
4958        let (compressed, full, info) = multi_block_fixture();
4959        let nblocks = info.blocks.len() as u32;
4960        let n = (nblocks / 2).max(1);
4961        let st = emit_resume_state_at(&compressed, n);
4962        let output_offset = info.decompressed_byte_range(n as usize).unwrap().start as usize;
4963        assert!(output_offset > 0);
4964
4965        // Correct prefix with the last byte corrupted (this byte is inside the
4966        // window the resume block reaches back into).
4967        let mut corrupted = full[..output_offset].to_vec();
4968        let last = corrupted.len() - 1;
4969        corrupted[last] ^= 0xFF;
4970
4971        let mut header_src = compressed.as_slice();
4972        let mut dec = FrameDecoder::new();
4973        dec.reset(&mut header_src).unwrap();
4974        let off = info.blocks[n as usize].offset_in_frame as usize;
4975        let mut block_src = &compressed[off..];
4976        let err = dec
4977            .decode_blocks_partial(
4978                &mut block_src,
4979                n,
4980                u32::MAX,
4981                Some(super::ResumeInput {
4982                    window_prime: &corrupted,
4983                    state: &st,
4984                }),
4985                false,
4986            )
4987            .expect_err("corrupted window_prime must be rejected by content hash");
4988        assert!(
4989            matches!(
4990                err,
4991                crate::decoding::errors::FrameDecoderError::ResumeFrameMismatch
4992            ),
4993            "expected ResumeFrameMismatch, got {err:?}"
4994        );
4995    }
4996
4997    #[cfg(feature = "lsm")]
4998    #[test]
4999    fn resume_rejects_state_with_different_active_dictionary() {
5000        // A dictless-header frame can be decoded with an explicit dictionary
5001        // applied at runtime (force_dict / reset_with_dict_handle). Two such
5002        // decodes differ in entropy/repcode/dict context even though the header
5003        // dictionary_id is identically absent, so the resume guard must key on
5004        // the ACTIVE dictionary, not just the header field. Here the snapshot is
5005        // captured with no active dictionary; resuming with one applied must be
5006        // rejected before any state is restored.
5007        let (compressed, full, info) = multi_block_fixture();
5008        let nblocks = info.blocks.len() as u32;
5009        let n = (nblocks / 2).max(1);
5010        let st = emit_resume_state_at(&compressed, n); // active_dictionary_id = None
5011        let output_offset = info.decompressed_byte_range(n as usize).unwrap().start as usize;
5012
5013        let raw = std::fs::read("./dict_tests/dictionary").expect("dictionary fixture");
5014        let dict = crate::decoding::dictionary::Dictionary::decode_dict(&raw).expect("parse dict");
5015        let dict_id = dict.id;
5016
5017        let mut header_src = compressed.as_slice();
5018        let mut dec = FrameDecoder::new();
5019        dec.add_dict(dict).unwrap();
5020        dec.reset(&mut header_src).unwrap();
5021        dec.force_dict(dict_id).unwrap(); // active_dictionary_id = Some(dict_id)
5022        let off = info.blocks[n as usize].offset_in_frame as usize;
5023        let mut block_src = &compressed[off..];
5024        let err = dec
5025            .decode_blocks_partial(
5026                &mut block_src,
5027                n,
5028                u32::MAX,
5029                Some(super::ResumeInput {
5030                    window_prime: &full[..output_offset],
5031                    state: &st,
5032                }),
5033                false,
5034            )
5035            .expect_err("resume with a different active dictionary must be rejected");
5036        assert!(
5037            matches!(
5038                err,
5039                crate::decoding::errors::FrameDecoderError::ResumeFrameMismatch
5040            ),
5041            "expected ResumeFrameMismatch, got {err:?}"
5042        );
5043    }
5044
5045    #[cfg(feature = "lsm")]
5046    #[test]
5047    fn resume_invalid_range_does_not_mutate_decoder_state() {
5048        // An inverted effective range must be rejected WITHOUT priming the
5049        // decoder: no entropy restore, no window prime, no cursor advance. As
5050        // written before the fix, those mutations ran before the range check,
5051        // leaving the decoder in a synthetic resumed state on the error path.
5052        let (compressed, full, info) = multi_block_fixture();
5053        let nblocks = info.blocks.len() as u32;
5054        let n = (nblocks / 2).max(2);
5055        let st = emit_resume_state_at(&compressed, n);
5056        let output_offset = info.decompressed_byte_range(n as usize).unwrap().start as usize;
5057
5058        let mut header_src = compressed.as_slice();
5059        let mut dec = FrameDecoder::new();
5060        dec.reset(&mut header_src).unwrap();
5061        // Freshly reset: cursor at block 0.
5062        assert_eq!(dec.state.as_ref().unwrap().block_counter, 0);
5063
5064        let off = info.blocks[n as usize].offset_in_frame as usize;
5065        let mut block_src = &compressed[off..];
5066        let err = dec
5067            .decode_blocks_partial(
5068                &mut block_src,
5069                0,
5070                n - 1, // below the resume block → inverted range
5071                Some(super::ResumeInput {
5072                    window_prime: &full[..output_offset],
5073                    state: &st,
5074                }),
5075                false,
5076            )
5077            .expect_err("inverted range must error");
5078        assert!(matches!(
5079            err,
5080            crate::decoding::errors::FrameDecoderError::InvalidBlockRange { .. }
5081        ));
5082        assert_eq!(
5083            dec.state.as_ref().unwrap().block_counter,
5084            0,
5085            "error path must not advance the cursor (validate before priming)"
5086        );
5087    }
5088
5089    #[cfg(feature = "lsm")]
5090    #[test]
5091    fn emit_resume_state_absent_on_terminal_block() {
5092        // When a decode reaches the frame's last block there is no "next block"
5093        // to resume at: the snapshot's block_index would be one past EOF and the
5094        // caller has no offset_in_frame for it. emit_resume must therefore yield
5095        // None on the terminal block, not a dangling snapshot.
5096        let (compressed, _full, info) = multi_block_fixture();
5097        let nblocks = info.blocks.len() as u32;
5098        let mut src = compressed.as_slice();
5099        let mut dec = FrameDecoder::new();
5100        dec.reset(&mut src).unwrap();
5101        let pd = dec
5102            .decode_blocks_partial(&mut src, 0, nblocks, None, true)
5103            .unwrap();
5104        assert!(pd.frame_finished, "decode must reach the last block");
5105        assert!(
5106            pd.resume_state.is_none(),
5107            "no resume state past the frame's last block"
5108        );
5109    }
5110
5111    #[cfg(feature = "lsm")]
5112    #[test]
5113    fn emit_resume_state_absent_when_not_requested() {
5114        // Default partial decode (emit_resume = false) must NOT pay the entropy
5115        // clone: resume_state stays None.
5116        let (compressed, _full, info) = multi_block_fixture();
5117        let nblocks = info.blocks.len() as u32;
5118        let mut src = compressed.as_slice();
5119        let mut dec = FrameDecoder::new();
5120        dec.reset(&mut src).unwrap();
5121        let pd = dec
5122            .decode_blocks_partial(&mut src, 0, nblocks, None, false)
5123            .unwrap();
5124        assert!(
5125            pd.resume_state.is_none(),
5126            "resume_state must be None unless emit_resume is set"
5127        );
5128    }
5129
5130    #[cfg(feature = "lsm")]
5131    #[test]
5132    fn resume_grow_loop_reconstructs_full() {
5133        // The motivating scenario: a symmetric one-call grow-loop. Each call
5134        // takes the previous ResumeState and emits the next, decoding only the
5135        // new extent — concatenated, the extents reconstruct the full output
5136        // with no prefix ever re-decompressed.
5137        let (compressed, full, info) = multi_block_fixture();
5138        let nblocks = info.blocks.len() as u32;
5139        assert!(nblocks >= 4);
5140
5141        // Walk the frame in extents of `step` blocks each.
5142        let step = (nblocks / 3).max(1);
5143        let mut combined: Vec<u8> = Vec::new();
5144        let mut next: u32 = 0;
5145        let mut carry: Option<super::ResumeState> = None;
5146
5147        while next < nblocks {
5148            let end = (next + step).min(nblocks);
5149            let mut dec = FrameDecoder::new();
5150            let mut header_src = compressed.as_slice();
5151            dec.reset(&mut header_src).unwrap();
5152
5153            let off = info.blocks[next as usize].offset_in_frame as usize;
5154            let mut block_src = &compressed[off..];
5155
5156            let output_offset = info.decompressed_byte_range(next as usize).unwrap().start;
5157            let pd = if let Some(st) = carry.as_ref() {
5158                // Resume from the prior extent's state (cold: fresh decoder).
5159                let window_prime = &full[..output_offset as usize];
5160                dec.decode_blocks_partial(
5161                    &mut block_src,
5162                    next,
5163                    end,
5164                    Some(super::ResumeInput {
5165                        window_prime,
5166                        state: st,
5167                    }),
5168                    true,
5169                )
5170                .unwrap()
5171            } else {
5172                // First extent: no resume input, just emit for the next.
5173                dec.decode_blocks_partial(&mut block_src, next, end, None, true)
5174                    .unwrap()
5175            };
5176
5177            combined.extend_from_slice(&pd.data);
5178            carry = pd.resume_state;
5179            next = end;
5180        }
5181
5182        assert_eq!(
5183            combined, full,
5184            "grow-loop extents must reconstruct the full output"
5185        );
5186    }
5187
5188    #[cfg(all(feature = "lsm", feature = "hash"))]
5189    #[test]
5190    fn resume_does_not_redecode_prefix_blocks() {
5191        // Instrumented confirmation that blocks < N are not re-decoded on
5192        // resume. With per-block checksums enabled on the resuming decoder, the
5193        // resumed decode must record exactly one digest per in-range block
5194        // (end - N), never one per frame block.
5195        let (compressed, full, info) = multi_block_fixture();
5196        let nblocks = info.blocks.len() as u32;
5197        let n = nblocks / 2;
5198        let st = emit_resume_state_at(&compressed, n);
5199        let output_offset = info.decompressed_byte_range(n as usize).unwrap().start;
5200
5201        let mut header_src = compressed.as_slice();
5202        let mut dec = FrameDecoder::new();
5203        dec.enable_per_block_checksums();
5204        dec.reset(&mut header_src).unwrap();
5205        let off = info.blocks[n as usize].offset_in_frame as usize;
5206        let mut block_src = &compressed[off..];
5207        let _ = dec
5208            .decode_blocks_partial(
5209                &mut block_src,
5210                n,
5211                u32::MAX,
5212                Some(super::ResumeInput {
5213                    window_prime: &full[..output_offset as usize],
5214                    state: &st,
5215                }),
5216                false,
5217            )
5218            .unwrap();
5219
5220        assert_eq!(
5221            dec.computed_block_checksums().len() as u32,
5222            nblocks - n,
5223            "resume must decode only in-range blocks, not re-decode the prefix"
5224        );
5225    }
5226}