Skip to main content

structured_zstd/encoding/
frame_compressor.rs

1//! Utilities and interfaces for encoding an entire frame. Allows reusing resources
2
3use alloc::{boxed::Box, vec::Vec};
4use core::convert::TryInto;
5#[cfg(feature = "hash")]
6use twox_hash::XxHash64;
7
8#[cfg(feature = "hash")]
9use core::hash::Hasher;
10
11use super::{
12    CompressionLevel, Matcher, block_header::BlockHeader, frame_header::FrameHeader, levels::*,
13    match_generator::MatchGeneratorDriver,
14};
15use crate::common::MAX_BLOCK_SIZE;
16use crate::fse::fse_encoder::{FSETable, default_ll_table, default_ml_table, default_of_table};
17
18use crate::io::{Read, Write};
19
20/// An interface for compressing arbitrary data with the ZStandard compression algorithm.
21///
22/// `FrameCompressor` will generally be used by:
23/// 1. Initializing a compressor by providing a buffer of data using `FrameCompressor::new()`
24/// 2. Starting compression and writing that compression into a vec using `FrameCompressor::begin`
25///
26/// # Examples
27/// ```
28/// use structured_zstd::encoding::{FrameCompressor, CompressionLevel};
29/// let mock_data: &[_] = &[0x1, 0x2, 0x3, 0x4];
30/// let mut output = std::vec::Vec::new();
31/// // Initialize a compressor.
32/// let mut compressor = FrameCompressor::new(CompressionLevel::Uncompressed);
33/// compressor.set_source(mock_data);
34/// compressor.set_drain(&mut output);
35///
36/// // `compress` writes the compressed output into the provided buffer.
37/// compressor.compress();
38/// ```
39pub struct FrameCompressor<R: Read, W: Write, M: Matcher> {
40    uncompressed_data: Option<R>,
41    compressed_data: Option<W>,
42    compression_level: CompressionLevel,
43    dictionary: Option<crate::decoding::Dictionary>,
44    dictionary_entropy_cache: Option<CachedDictionaryEntropy>,
45    source_size_hint: Option<u64>,
46    state: CompressState<M>,
47    /// When true, emitted frames omit the 4-byte magic number prefix
48    /// (`ZSTD_f_zstd1_magicless`). Default false. The caller is
49    /// responsible for ensuring the decoder is configured for the
50    /// matching format — wire-format only round-trips with a
51    /// magicless-aware decoder.
52    magicless: bool,
53    #[cfg(feature = "hash")]
54    hasher: XxHash64,
55}
56
57#[derive(Clone, Default)]
58struct CachedDictionaryEntropy {
59    huff: Option<crate::huff0::huff0_encoder::HuffmanTable>,
60    ll_previous: Option<PreviousFseTable>,
61    ml_previous: Option<PreviousFseTable>,
62    of_previous: Option<PreviousFseTable>,
63}
64
65#[derive(Clone)]
66pub(crate) enum PreviousFseTable {
67    // Default tables are immutable and already stored alongside the state, so
68    // repeating them only needs a lightweight marker instead of cloning FSETable.
69    Default,
70    Custom(Box<FSETable>),
71    Rle(u8),
72}
73
74impl PreviousFseTable {
75    pub(crate) fn as_table<'a>(&'a self, default: &'a FSETable) -> Option<&'a FSETable> {
76        match self {
77            Self::Default => Some(default),
78            Self::Custom(table) => Some(table),
79            Self::Rle(_) => None,
80        }
81    }
82}
83
84pub(crate) struct FseTables {
85    /// The three predefined LL/ML/OF tables are functions of
86    /// compile-time-constant distributions. The
87    /// [`fse_encoder::FseDefaultTable`] type alias resolves to
88    /// `&'static FSETable` when a process-wide cache is available
89    /// (atomic-pointer targets, or no-atomic targets with the
90    /// `critical-section` feature) and to `Box<FSETable>` on the
91    /// cache-less no-atomic path (one per-frame allocation, dropped
92    /// with the compressor — no `Box::leak`, no unbounded growth).
93    /// Both arms `Deref` to `FSETable`, so consumers in
94    /// `encoding/blocks/compressed.rs` borrow through `&` uniformly
95    /// without seeing the per-target divergence.
96    pub(crate) ll_default: crate::fse::fse_encoder::FseDefaultTable,
97    pub(crate) ll_previous: Option<PreviousFseTable>,
98    pub(crate) ml_default: crate::fse::fse_encoder::FseDefaultTable,
99    pub(crate) ml_previous: Option<PreviousFseTable>,
100    pub(crate) of_default: crate::fse::fse_encoder::FseDefaultTable,
101    pub(crate) of_previous: Option<PreviousFseTable>,
102}
103
104impl FseTables {
105    pub fn new() -> Self {
106        Self {
107            ll_default: default_ll_table(),
108            ll_previous: None,
109            ml_default: default_ml_table(),
110            ml_previous: None,
111            of_default: default_of_table(),
112            of_previous: None,
113        }
114    }
115
116    /// Borrow the LL default table as `&FSETable`. Abstracts the cfg
117    /// split in [`crate::fse::fse_encoder::FseDefaultTable`] —
118    /// `&'static FSETable` (atomic / `critical-section`) auto-derefs
119    /// directly; `Box<FSETable>` (cache-less no-atomic) derefs
120    /// through `Box`. Both arms yield `&FSETable` uniformly so
121    /// downstream consumers can stay cfg-agnostic.
122    #[inline]
123    #[allow(clippy::borrow_deref_ref)]
124    pub(crate) fn ll_default_ref(&self) -> &FSETable {
125        &*self.ll_default
126    }
127
128    /// Borrow the ML default table as `&FSETable`. See [`Self::ll_default_ref`].
129    #[inline]
130    #[allow(clippy::borrow_deref_ref)]
131    pub(crate) fn ml_default_ref(&self) -> &FSETable {
132        &*self.ml_default
133    }
134
135    /// Borrow the OF default table as `&FSETable`. See [`Self::ll_default_ref`].
136    #[inline]
137    #[allow(clippy::borrow_deref_ref)]
138    pub(crate) fn of_default_ref(&self) -> &FSETable {
139        &*self.of_default
140    }
141}
142
143const PRESPLIT_BLOCK_MIN: usize = 3500;
144const PRESPLIT_THRESHOLD_PENALTY_RATE: u64 = 16;
145const PRESPLIT_THRESHOLD_BASE: u64 = PRESPLIT_THRESHOLD_PENALTY_RATE - 2;
146const PRESPLIT_THRESHOLD_PENALTY: i32 = 3;
147const PRESPLIT_CHUNK_SIZE: usize = 8 << 10;
148const PRESPLIT_HASH_LOG_MAX: usize = 10;
149const PRESPLIT_HASH_TABLE_SIZE: usize = 1 << PRESPLIT_HASH_LOG_MAX;
150const PRESPLIT_KNUTH: u32 = 0x9E37_79B9;
151/// Donor `SEGMENT_SIZE` in `ZSTD_splitBlock_fromBorders` (`zstd_preSplit.c:201`).
152/// Two `SEGMENT_SIZE`-byte fingerprints — one from the start, one from the end —
153/// drive the cheap border heuristic; a third one from the middle disambiguates
154/// where in the block the transition sits.
155const PRESPLIT_BORDERS_SEGMENT: usize = 512;
156
157#[derive(Clone)]
158struct PreSplitFingerprint {
159    events: [u32; PRESPLIT_HASH_TABLE_SIZE],
160    nb_events: usize,
161}
162
163impl Default for PreSplitFingerprint {
164    fn default() -> Self {
165        Self {
166            events: [0; PRESPLIT_HASH_TABLE_SIZE],
167            nb_events: 0,
168        }
169    }
170}
171
172fn presplit_hash2(bytes: &[u8], hash_log: usize) -> usize {
173    debug_assert!(hash_log >= 8);
174    if hash_log == 8 {
175        return bytes[0] as usize;
176    }
177    debug_assert!(hash_log <= PRESPLIT_HASH_LOG_MAX);
178    let value = u16::from_le_bytes([bytes[0], bytes[1]]) as u32;
179    (value.wrapping_mul(PRESPLIT_KNUTH) >> (32 - hash_log)) as usize
180}
181
182fn presplit_record_fingerprint(
183    fp: &mut PreSplitFingerprint,
184    src: &[u8],
185    sampling_rate: usize,
186    hash_log: usize,
187) {
188    fp.events.fill(0);
189    fp.nb_events = 0;
190    if src.len() < 2 {
191        return;
192    }
193    let limit = src.len() - 1;
194    let mut n = 0usize;
195    while n < limit {
196        fp.events[presplit_hash2(&src[n..], hash_log)] += 1;
197        n += sampling_rate;
198    }
199    // Donor parity: zstd_preSplit.c records the integer division, not the
200    // rounded-up number of sampled events from the loop above.
201    fp.nb_events += limit / sampling_rate;
202}
203
204/// Single-byte histogram pass — matches donor `HIST_add` over a small
205/// segment with `hashLog == 8` (the `hash2` shortcut at
206/// `zstd_preSplit.c:36` returns the raw byte). The byChunks path uses
207/// 2-byte hashing for `hashLog >= 9`; this helper exists so the borders
208/// heuristic doesn't pay for that wider hash on its 512-byte windows.
209fn presplit_record_byte_histogram(fp: &mut PreSplitFingerprint, src: &[u8]) {
210    fp.events.fill(0);
211    for &b in src {
212        fp.events[b as usize] += 1;
213    }
214    // Donor `HIST_add` returns the maximum symbol; the caller then sets
215    // `nbEvents = SEGMENT_SIZE` explicitly (see `zstd_preSplit.c:213`).
216    fp.nb_events = src.len();
217}
218
219fn presplit_distance(lhs: &PreSplitFingerprint, rhs: &PreSplitFingerprint, hash_log: usize) -> u64 {
220    let slots = 1usize << hash_log;
221    let mut distance = 0u64;
222    for idx in 0..slots {
223        let left = lhs.events[idx] as i128 * rhs.nb_events as i128;
224        let right = rhs.events[idx] as i128 * lhs.nb_events as i128;
225        distance = distance.saturating_add(left.abs_diff(right) as u64);
226    }
227    distance
228}
229
230fn presplit_fingerprints_differ(
231    reference: &PreSplitFingerprint,
232    new_fp: &PreSplitFingerprint,
233    penalty: i32,
234    hash_log: usize,
235) -> bool {
236    debug_assert!(reference.nb_events > 0);
237    debug_assert!(new_fp.nb_events > 0);
238    let p50 = reference.nb_events as u64 * new_fp.nb_events as u64;
239    let deviation = presplit_distance(reference, new_fp, hash_log);
240    let threshold = p50.saturating_mul(PRESPLIT_THRESHOLD_BASE + penalty as u64)
241        / PRESPLIT_THRESHOLD_PENALTY_RATE;
242    deviation >= threshold
243}
244
245fn presplit_merge_events(acc: &mut PreSplitFingerprint, new_fp: &PreSplitFingerprint) {
246    for idx in 0..PRESPLIT_HASH_TABLE_SIZE {
247        acc.events[idx] = acc.events[idx].saturating_add(new_fp.events[idx]);
248    }
249    acc.nb_events = acc.nb_events.saturating_add(new_fp.nb_events);
250}
251
252fn donor_split_block_by_chunks(block: &[u8], level: usize) -> usize {
253    debug_assert_eq!(block.len(), MAX_BLOCK_SIZE as usize);
254    debug_assert!((1..=4).contains(&level));
255    let (sampling_rate, hash_log) = match level - 1 {
256        0 => (43, 8),
257        1 => (11, 9),
258        2 => (5, 10),
259        _ => (1, 10),
260    };
261
262    let mut past = PreSplitFingerprint::default();
263    let mut new_events = PreSplitFingerprint::default();
264    let mut penalty = PRESPLIT_THRESHOLD_PENALTY;
265    presplit_record_fingerprint(
266        &mut past,
267        &block[..PRESPLIT_CHUNK_SIZE],
268        sampling_rate,
269        hash_log,
270    );
271    let mut pos = PRESPLIT_CHUNK_SIZE;
272    while pos <= block.len() - PRESPLIT_CHUNK_SIZE {
273        presplit_record_fingerprint(
274            &mut new_events,
275            &block[pos..pos + PRESPLIT_CHUNK_SIZE],
276            sampling_rate,
277            hash_log,
278        );
279        if presplit_fingerprints_differ(&past, &new_events, penalty, hash_log) {
280            return pos;
281        }
282        presplit_merge_events(&mut past, &new_events);
283        if penalty > 0 {
284            penalty -= 1;
285        }
286        pos += PRESPLIT_CHUNK_SIZE;
287    }
288    block.len()
289}
290
291/// Donor port of `ZSTD_splitBlock_fromBorders` (`zstd_preSplit.c:198`).
292/// Records two 512-byte byte-histograms — one from each end of a 128 KB
293/// block — and a third from the middle as a tie-breaker; returns either
294/// a quantised split point (32 KB / 64 KB / 96 KB) or the full block
295/// size when the two ends look indistinguishable. Cheaper than the
296/// chunk-based path because it touches at most 1.5 KB of input
297/// regardless of block size.
298fn donor_split_block_from_borders(block: &[u8]) -> usize {
299    debug_assert_eq!(block.len(), MAX_BLOCK_SIZE as usize);
300    let block_size = block.len();
301    let mut past = PreSplitFingerprint::default();
302    let mut new_fp = PreSplitFingerprint::default();
303    presplit_record_byte_histogram(&mut past, &block[..PRESPLIT_BORDERS_SEGMENT]);
304    presplit_record_byte_histogram(&mut new_fp, &block[block_size - PRESPLIT_BORDERS_SEGMENT..]);
305    // Donor uses `penalty = 0, hash_log = 8` — i.e. raw byte histogram
306    // distance with no threshold padding (`zstd_preSplit.c:214`).
307    if !presplit_fingerprints_differ(&past, &new_fp, 0, 8) {
308        return block_size;
309    }
310
311    let mut middle = PreSplitFingerprint::default();
312    let mid_start = block_size / 2 - PRESPLIT_BORDERS_SEGMENT / 2;
313    presplit_record_byte_histogram(
314        &mut middle,
315        &block[mid_start..mid_start + PRESPLIT_BORDERS_SEGMENT],
316    );
317
318    let dist_from_begin = presplit_distance(&past, &middle, 8);
319    let dist_from_end = presplit_distance(&new_fp, &middle, 8);
320    // Donor `SEGMENT_SIZE * SEGMENT_SIZE / 3` (`zstd_preSplit.c:221`):
321    // if the middle is roughly equidistant from both ends, the change
322    // sits near the centre — split at the midpoint.
323    let min_distance = (PRESPLIT_BORDERS_SEGMENT as u64) * (PRESPLIT_BORDERS_SEGMENT as u64) / 3;
324    if dist_from_begin.abs_diff(dist_from_end) < min_distance {
325        return 64 * 1024;
326    }
327    // Larger `dist_from_begin` (i.e. `middle` farther from the head
328    // fingerprint, equivalently closer to the tail) means the new
329    // statistics already dominate the centre — the transition
330    // happened EARLY → emit a small 32 KB head and let the 96 KB
331    // tail absorb the rest. Inverse case: `dist_from_end` larger
332    // (middle still resembles the head) means the transition is
333    // LATE → emit a 96 KB head so the trailing 32 KB carries the
334    // new statistics alone.
335    if dist_from_begin > dist_from_end {
336        32 * 1024
337    } else {
338        96 * 1024
339    }
340}
341
342fn donor_pre_split_level(level: CompressionLevel) -> Option<usize> {
343    match level {
344        // Donor `ZSTD_blockSplitter_level` table (`clevels.h`): cheap
345        // borders heuristic for lazy2 / btlazy2 strategies (levels
346        // 11..=15) — the splitter still pays for itself on
347        // heterogeneous payloads but the per-block cost stays bounded
348        // by two 512-byte histograms.
349        CompressionLevel::Level(11..=15) => Some(0),
350        // C zstd's default splitter level for btopt/btultra/btultra2 is 4
351        // (`ZSTD_splitBlock_byChunks` with internal level 3 — sampling
352        // rate 1, `hashLog` 10).
353        CompressionLevel::Level(16..=22) => Some(4),
354        _ => None,
355    }
356}
357
358pub(crate) fn donor_optimal_block_size(
359    level: CompressionLevel,
360    block: &[u8],
361    remaining_src_size: usize,
362    block_size_max: usize,
363    savings: i64,
364) -> usize {
365    let Some(split_level) = donor_pre_split_level(level) else {
366        return remaining_src_size.min(block_size_max);
367    };
368    if remaining_src_size < MAX_BLOCK_SIZE as usize || block_size_max < MAX_BLOCK_SIZE as usize {
369        return remaining_src_size.min(block_size_max);
370    }
371    if savings < 3 {
372        return MAX_BLOCK_SIZE as usize;
373    }
374    if block.len() < MAX_BLOCK_SIZE as usize {
375        return remaining_src_size.min(block_size_max);
376    }
377    // Donor `ZSTD_splitBlock` dispatch (`zstd_preSplit.c:234`):
378    // `split_level == 0` → cheap borders heuristic;
379    // `split_level == 1..=4` → byChunks with internal sampling level
380    // `split_level - 1`.
381    let raw_split = if split_level == 0 {
382        donor_split_block_from_borders(&block[..MAX_BLOCK_SIZE as usize])
383    } else {
384        donor_split_block_by_chunks(&block[..MAX_BLOCK_SIZE as usize], split_level)
385    };
386    raw_split
387        .max(PRESPLIT_BLOCK_MIN)
388        .min(MAX_BLOCK_SIZE as usize)
389}
390
391pub(crate) struct CompressState<M: Matcher> {
392    pub(crate) matcher: M,
393    pub(crate) last_huff_table: Option<crate::huff0::huff0_encoder::HuffmanTable>,
394    pub(crate) fse_tables: FseTables,
395    pub(crate) block_scratch: crate::encoding::blocks::CompressedBlockScratch,
396    /// Offset history for repeat offset encoding: [rep0, rep1, rep2].
397    /// Initialized to [1, 4, 8] per RFC 8878 §3.1.2.5.
398    pub(crate) offset_hist: [u32; 3],
399    /// Strategy tag resolved from the current `CompressionLevel` at every
400    /// `matcher.reset()` call. Used by the literal-compression gates
401    /// (`min_literals_to_compress`, `min_gain`) in
402    /// `encoding::blocks::compressed` to mirror donor's strategy-aware
403    /// thresholds (`zstd_compress_literals.c:114-127, 187-188`).
404    ///
405    /// **Invariant (required of every construction site):** must be
406    /// initialized from the active `CompressionLevel` via
407    /// `StrategyTag::for_compression_level`, and re-synced from the
408    /// active level alongside every `matcher.reset()` call so the
409    /// level-aware gates stay correct after a level change. The two
410    /// reset sites that own this sync are `FrameCompressor::compress`
411    /// and `StreamingEncoder::ensure_frame_started`. There is no
412    /// `Default` impl — production constructors
413    /// (`FrameCompressor::new`, `new_with_matcher`, the streaming
414    /// encoder constructor) plumb this explicitly. Tests that build
415    /// `CompressState` by hand must also supply a value.
416    pub(crate) strategy_tag: crate::encoding::strategy::StrategyTag,
417}
418
419impl<R: Read, W: Write> FrameCompressor<R, W, MatchGeneratorDriver> {
420    /// Create a new `FrameCompressor`
421    pub fn new(compression_level: CompressionLevel) -> Self {
422        Self {
423            uncompressed_data: None,
424            compressed_data: None,
425            compression_level,
426            dictionary: None,
427            dictionary_entropy_cache: None,
428            source_size_hint: None,
429            state: CompressState {
430                matcher: MatchGeneratorDriver::new(1024 * 128, 1),
431                last_huff_table: None,
432                fse_tables: FseTables::new(),
433                block_scratch: crate::encoding::blocks::CompressedBlockScratch::new(),
434                offset_hist: [1, 4, 8],
435                strategy_tag: crate::encoding::strategy::StrategyTag::for_compression_level(
436                    compression_level,
437                ),
438            },
439            magicless: false,
440            #[cfg(feature = "hash")]
441            hasher: XxHash64::with_seed(0),
442        }
443    }
444}
445
446impl<R: Read, W: Write, M: Matcher> FrameCompressor<R, W, M> {
447    /// Create a new `FrameCompressor` with a custom matching algorithm implementation
448    pub fn new_with_matcher(matcher: M, compression_level: CompressionLevel) -> Self {
449        Self {
450            uncompressed_data: None,
451            compressed_data: None,
452            dictionary: None,
453            dictionary_entropy_cache: None,
454            source_size_hint: None,
455            state: CompressState {
456                matcher,
457                last_huff_table: None,
458                fse_tables: FseTables::new(),
459                block_scratch: crate::encoding::blocks::CompressedBlockScratch::new(),
460                offset_hist: [1, 4, 8],
461                strategy_tag: crate::encoding::strategy::StrategyTag::for_compression_level(
462                    compression_level,
463                ),
464            },
465            compression_level,
466            magicless: false,
467            #[cfg(feature = "hash")]
468            hasher: XxHash64::with_seed(0),
469        }
470    }
471
472    /// Enable or disable magicless frame format (`ZSTD_f_zstd1_magicless`).
473    ///
474    /// When set to `true`, emitted frames omit the 4-byte magic number
475    /// prefix. The matching decoder must be configured to expect a
476    /// magicless stream — wire-format only round-trips with a
477    /// magicless-aware decoder.
478    pub fn set_magicless(&mut self, magicless: bool) {
479        self.magicless = magicless;
480    }
481
482    /// Before calling [FrameCompressor::compress] you need to set the source.
483    ///
484    /// This is the data that is compressed and written into the drain.
485    pub fn set_source(&mut self, uncompressed_data: R) -> Option<R> {
486        self.uncompressed_data.replace(uncompressed_data)
487    }
488
489    /// Before calling [FrameCompressor::compress] you need to set the drain.
490    ///
491    /// As the compressor compresses data, the drain serves as a place for the output to be writte.
492    pub fn set_drain(&mut self, compressed_data: W) -> Option<W> {
493        self.compressed_data.replace(compressed_data)
494    }
495
496    /// Provide a hint about the total uncompressed size for the next frame.
497    ///
498    /// When set, the encoder selects smaller hash tables and windows for
499    /// small inputs, matching the C zstd source-size-class behavior.
500    ///
501    /// This hint applies only to frame payload bytes (`size`). Dictionary
502    /// history is primed separately and does not inflate the hinted size or
503    /// advertised frame window.
504    /// Must be called before [`compress`](Self::compress).
505    pub fn set_source_size_hint(&mut self, size: u64) {
506        self.source_size_hint = Some(size);
507    }
508
509    /// Compress the uncompressed data from the provided source as one Zstd frame and write it to the provided drain
510    ///
511    /// This will repeatedly call [Read::read] on the source to fill up blocks until the source returns 0 on the read call.
512    /// All compressed blocks are buffered in memory so that the frame header can include the
513    /// `Frame_Content_Size` field (which requires knowing the total uncompressed size). The
514    /// entire frame — header, blocks, and optional checksum — is then written to the drain
515    /// at the end. This means peak memory usage is O(compressed_size).
516    ///
517    /// To avoid endlessly encoding from a potentially endless source (like a network socket) you can use the
518    /// [Read::take] function
519    pub fn compress(&mut self) {
520        let initial_size_hint = self.source_size_hint;
521        let source_size_hint_known = initial_size_hint.is_some();
522        let use_dictionary_state =
523            !matches!(self.compression_level, CompressionLevel::Uncompressed)
524                && self.state.matcher.supports_dictionary_priming()
525                && self.dictionary.is_some();
526        if let Some(size_hint) = self.source_size_hint.take() {
527            // Keep source-size hint scoped to payload bytes; dictionary priming
528            // is applied separately and should not force larger matcher sizing.
529            self.state.matcher.set_source_size_hint(size_hint);
530        }
531        // Clearing buffers to allow re-using of the compressor
532        self.state.matcher.reset(self.compression_level);
533        self.state.offset_hist = [1, 4, 8];
534        // Sync `state.strategy_tag` to the level resolved at this reset so
535        // the literal-compression gates (`min_literals_to_compress` /
536        // `min_gain` in `encoding::blocks::compressed`) see the correct
537        // strategy for the next frame. Frame-by-frame level changes go
538        // through this same `compress()` entry point, so re-syncing here
539        // covers level switches without touching the matcher dispatch.
540        self.state.strategy_tag =
541            crate::encoding::strategy::StrategyTag::for_compression_level(self.compression_level);
542        let cached_entropy = if use_dictionary_state {
543            self.dictionary_entropy_cache.as_ref()
544        } else {
545            None
546        };
547        if use_dictionary_state && let Some(dict) = self.dictionary.as_ref() {
548            // This state drives sequence encoding, while matcher priming below updates
549            // the match generator's internal repeat-offset history for match finding.
550            self.state.offset_hist = dict.offset_hist;
551            self.state
552                .matcher
553                .prime_with_dictionary(dict.dict_content.as_slice(), dict.offset_hist);
554        }
555        if let Some(cache) = cached_entropy {
556            self.state.last_huff_table.clone_from(&cache.huff);
557        } else {
558            self.state.last_huff_table = None;
559        }
560        // `clone_from` keeps frame-to-frame seeding cheap for reused compressors by
561        // reusing existing allocations where possible instead of reallocating every frame.
562        if let Some(cache) = cached_entropy {
563            self.state
564                .fse_tables
565                .ll_previous
566                .clone_from(&cache.ll_previous);
567            self.state
568                .fse_tables
569                .ml_previous
570                .clone_from(&cache.ml_previous);
571            self.state
572                .fse_tables
573                .of_previous
574                .clone_from(&cache.of_previous);
575        } else {
576            self.state.fse_tables.ll_previous = None;
577            self.state.fse_tables.ml_previous = None;
578            self.state.fse_tables.of_previous = None;
579        }
580        let ll_entropy = cached_entropy.and_then(|cache| match cache.ll_previous.as_ref() {
581            Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
582            _ => None,
583        });
584        let ml_entropy = cached_entropy.and_then(|cache| match cache.ml_previous.as_ref() {
585            Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
586            _ => None,
587        });
588        let of_entropy = cached_entropy.and_then(|cache| match cache.of_previous.as_ref() {
589            Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
590            _ => None,
591        });
592        self.state.matcher.seed_dictionary_entropy(
593            self.state.last_huff_table.as_ref(),
594            ll_entropy,
595            ml_entropy,
596            of_entropy,
597        );
598        #[cfg(feature = "hash")]
599        {
600            self.hasher = XxHash64::with_seed(0);
601        }
602        let source = self.uncompressed_data.as_mut().unwrap();
603        let drain = self.compressed_data.as_mut().unwrap();
604        let window_size = self.state.matcher.window_size();
605        assert!(
606            window_size != 0,
607            "matcher reported window_size == 0, which is invalid"
608        );
609        // Accumulate all compressed blocks; the frame header is written
610        // after all input has been read so that Frame_Content_Size is
611        // known. The default seed is one donor block; smaller seeds for
612        // small payloads avoid pinning a full block worth of bytes when
613        // the compressed output fits in a few hundred bytes. For larger
614        // inputs the default seed amortises the first few `Vec::extend`
615        // doublings cheaply and the `peak - default_seed` residue is
616        // dominated by internal `compress_block_encoded` buffers anyway,
617        // so changing it produces no measurable savings.
618        //
619        // Seed-size tiers (mirrors donor `ZSTD_CStreamOutSize` naming):
620        //
621        // * `ALL_BLOCKS_TINY_CAP` — payload ≤ this size, seed equals
622        //   payload bound; ≥ everything compressed output could need
623        //   for a tiny input.
624        // * `ALL_BLOCKS_SMALL_CAP` — small-input seed picked to absorb
625        //   one or two doublings without over-allocating.
626        // * `ALL_BLOCKS_DEFAULT_CAP` — one donor block; the value the
627        //   rest of the encoder is sized around.
628        const ALL_BLOCKS_TINY_THRESHOLD: u64 = 4 * 1024;
629        const ALL_BLOCKS_SMALL_THRESHOLD: u64 = 64 * 1024;
630        const ALL_BLOCKS_TINY_CAP: usize = 4 * 1024;
631        const ALL_BLOCKS_SMALL_CAP: usize = 16 * 1024;
632        const ALL_BLOCKS_DEFAULT_CAP: usize = 130 * 1024;
633        let initial_all_blocks_cap = match initial_size_hint {
634            Some(h) if h <= ALL_BLOCKS_TINY_THRESHOLD => ALL_BLOCKS_TINY_CAP,
635            Some(h) if h <= ALL_BLOCKS_SMALL_THRESHOLD => ALL_BLOCKS_SMALL_CAP,
636            _ => ALL_BLOCKS_DEFAULT_CAP,
637        };
638        let mut all_blocks: Vec<u8> = Vec::with_capacity(initial_all_blocks_cap);
639        let mut total_uncompressed: u64 = 0;
640        let mut pending_input: Vec<u8> = Vec::new();
641        let mut reached_eof = false;
642        let mut savings = 0i64;
643        // Compress block by block
644        loop {
645            // Read up to one donor block. When the pre-block splitter keeps a
646            // suffix, top it back up before compressing the next block, matching
647            // ZSTD_compress_frameChunk() over a contiguous input buffer.
648            let block_capacity = MAX_BLOCK_SIZE as usize;
649            let had_pending = !pending_input.is_empty();
650            let mut uncompressed_data = if had_pending {
651                core::mem::take(&mut pending_input)
652            } else {
653                self.state.matcher.get_next_space()
654            };
655            let mut filled = if had_pending {
656                uncompressed_data.len()
657            } else {
658                0
659            };
660            if uncompressed_data.len() < block_capacity {
661                uncompressed_data.resize(block_capacity, 0);
662            }
663            'read_loop: loop {
664                if reached_eof || filled == block_capacity {
665                    break 'read_loop;
666                }
667                let new_bytes = source
668                    .read(&mut uncompressed_data[filled..block_capacity])
669                    .unwrap();
670                if new_bytes == 0 {
671                    reached_eof = true;
672                    break 'read_loop;
673                }
674                filled += new_bytes;
675                total_uncompressed += new_bytes as u64;
676            }
677            uncompressed_data.truncate(filled);
678            let mut last_block = reached_eof;
679            let remaining_for_split = if reached_eof {
680                uncompressed_data.len()
681            } else {
682                block_capacity
683            };
684            if !matches!(self.compression_level, CompressionLevel::Uncompressed)
685                && uncompressed_data.len() == block_capacity
686            {
687                let block_len = donor_optimal_block_size(
688                    self.compression_level,
689                    &uncompressed_data,
690                    remaining_for_split,
691                    block_capacity,
692                    savings,
693                );
694                if block_len < uncompressed_data.len() {
695                    pending_input = uncompressed_data.split_off(block_len);
696                    // `split_off` returns a Vec whose capacity is typically
697                    // close to its length. Next iteration's `had_pending`
698                    // branch moves `pending_input` into `uncompressed_data`
699                    // and resizes to `block_capacity`, which would reallocate
700                    // from scratch on every pre-split. Pre-reserve here so
701                    // the resize stays in-place.
702                    if pending_input.capacity() < block_capacity {
703                        pending_input.reserve_exact(block_capacity - pending_input.len());
704                    }
705                    last_block = false;
706                }
707            }
708            // As we read, hash that data too
709            #[cfg(feature = "hash")]
710            self.hasher.write(&uncompressed_data);
711            // Special handling is needed for compression of a totally empty file
712            if uncompressed_data.is_empty() {
713                let header = BlockHeader {
714                    last_block: true,
715                    block_type: crate::blocks::block::BlockType::Raw,
716                    block_size: 0,
717                };
718                header.serialize(&mut all_blocks);
719                break;
720            }
721
722            match self.compression_level {
723                CompressionLevel::Uncompressed => {
724                    let header = BlockHeader {
725                        last_block,
726                        block_type: crate::blocks::block::BlockType::Raw,
727                        block_size: uncompressed_data.len().try_into().unwrap(),
728                    };
729                    header.serialize(&mut all_blocks);
730                    all_blocks.extend_from_slice(&uncompressed_data);
731                    savings +=
732                        uncompressed_data.len() as i64 - (3 + uncompressed_data.len()) as i64;
733                }
734                CompressionLevel::Fastest
735                | CompressionLevel::Default
736                | CompressionLevel::Better
737                | CompressionLevel::Best
738                | CompressionLevel::Level(_) => {
739                    let before_len = all_blocks.len();
740                    let block_len = uncompressed_data.len();
741                    compress_block_encoded(
742                        &mut self.state,
743                        self.compression_level,
744                        last_block,
745                        uncompressed_data,
746                        &mut all_blocks,
747                    );
748                    savings += block_len as i64 - (all_blocks.len() - before_len) as i64;
749                }
750            }
751            if last_block && pending_input.is_empty() {
752                break;
753            }
754        }
755
756        // Now that total_uncompressed is known, write the frame header with FCS.
757        // Match the donor framing policy for pledged one-shot inputs: use a
758        // single-segment frame whenever the source fits the active window.
759        let single_segment = !use_dictionary_state
760            && source_size_hint_known
761            && total_uncompressed >= 512
762            && total_uncompressed <= window_size;
763        let header = FrameHeader {
764            frame_content_size: Some(total_uncompressed),
765            single_segment,
766            content_checksum: cfg!(feature = "hash"),
767            dictionary_id: if use_dictionary_state {
768                self.dictionary.as_ref().map(|dict| dict.id as u64)
769            } else {
770                None
771            },
772            window_size: if single_segment {
773                None
774            } else {
775                Some(window_size)
776            },
777            magicless: self.magicless,
778        };
779        // Write the frame header and compressed blocks separately to avoid
780        // shifting the entire `all_blocks` buffer to prepend the header.
781        let mut header_buf: Vec<u8> = Vec::with_capacity(14);
782        header.serialize(&mut header_buf);
783        drain.write_all(&header_buf).unwrap();
784        drain.write_all(&all_blocks).unwrap();
785
786        // If the `hash` feature is enabled, then `content_checksum` is set to true in the header
787        // and a 32 bit hash is written at the end of the data.
788        #[cfg(feature = "hash")]
789        {
790            // Because we only have the data as a reader, we need to read all of it to calculate the checksum
791            // Possible TODO: create a wrapper around self.uncompressed data that hashes the data as it's read?
792            let content_checksum = self.hasher.finish();
793            drain
794                .write_all(&(content_checksum as u32).to_le_bytes())
795                .unwrap();
796        }
797    }
798
799    /// Get a mutable reference to the source
800    pub fn source_mut(&mut self) -> Option<&mut R> {
801        self.uncompressed_data.as_mut()
802    }
803
804    /// Get a mutable reference to the drain
805    pub fn drain_mut(&mut self) -> Option<&mut W> {
806        self.compressed_data.as_mut()
807    }
808
809    /// Get a reference to the source
810    pub fn source(&self) -> Option<&R> {
811        self.uncompressed_data.as_ref()
812    }
813
814    /// Get a reference to the drain
815    pub fn drain(&self) -> Option<&W> {
816        self.compressed_data.as_ref()
817    }
818
819    /// Retrieve the source
820    pub fn take_source(&mut self) -> Option<R> {
821        self.uncompressed_data.take()
822    }
823
824    /// Retrieve the drain
825    pub fn take_drain(&mut self) -> Option<W> {
826        self.compressed_data.take()
827    }
828
829    /// Before calling [FrameCompressor::compress] you can replace the matcher
830    pub fn replace_matcher(&mut self, mut match_generator: M) -> M {
831        core::mem::swap(&mut match_generator, &mut self.state.matcher);
832        match_generator
833    }
834
835    /// Before calling [FrameCompressor::compress] you can replace the compression level
836    pub fn set_compression_level(
837        &mut self,
838        compression_level: CompressionLevel,
839    ) -> CompressionLevel {
840        let old = self.compression_level;
841        self.compression_level = compression_level;
842        old
843    }
844
845    /// Get the current compression level
846    pub fn compression_level(&self) -> CompressionLevel {
847        self.compression_level
848    }
849
850    /// Attach a pre-parsed dictionary to be used for subsequent compressions.
851    ///
852    /// In compressed modes, the dictionary id is written only when the active
853    /// matcher supports dictionary priming.
854    /// Uncompressed mode and non-priming matchers ignore the attached dictionary
855    /// at encode time.
856    pub fn set_dictionary(
857        &mut self,
858        dictionary: crate::decoding::Dictionary,
859    ) -> Result<Option<crate::decoding::Dictionary>, crate::decoding::errors::DictionaryDecodeError>
860    {
861        if dictionary.id == 0 {
862            return Err(crate::decoding::errors::DictionaryDecodeError::ZeroDictionaryId);
863        }
864        if let Some(index) = dictionary.offset_hist.iter().position(|&rep| rep == 0) {
865            return Err(
866                crate::decoding::errors::DictionaryDecodeError::ZeroRepeatOffsetInDictionary {
867                    index: index as u8,
868                },
869            );
870        }
871        self.dictionary_entropy_cache = Some(CachedDictionaryEntropy {
872            huff: dictionary.huf.table.to_encoder_table(),
873            ll_previous: dictionary
874                .fse
875                .literal_lengths
876                .to_encoder_table()
877                .map(|table| PreviousFseTable::Custom(Box::new(table))),
878            ml_previous: dictionary
879                .fse
880                .match_lengths
881                .to_encoder_table()
882                .map(|table| PreviousFseTable::Custom(Box::new(table))),
883            of_previous: dictionary
884                .fse
885                .offsets
886                .to_encoder_table()
887                .map(|table| PreviousFseTable::Custom(Box::new(table))),
888        });
889        Ok(self.dictionary.replace(dictionary))
890    }
891
892    /// Parse and attach a serialized dictionary blob.
893    pub fn set_dictionary_from_bytes(
894        &mut self,
895        raw_dictionary: &[u8],
896    ) -> Result<Option<crate::decoding::Dictionary>, crate::decoding::errors::DictionaryDecodeError>
897    {
898        let dictionary = crate::decoding::Dictionary::decode_dict(raw_dictionary)?;
899        self.set_dictionary(dictionary)
900    }
901
902    /// Remove the attached dictionary.
903    pub fn clear_dictionary(&mut self) -> Option<crate::decoding::Dictionary> {
904        self.dictionary_entropy_cache = None;
905        self.dictionary.take()
906    }
907}
908
909#[cfg(test)]
910mod tests {
911    #[cfg(all(feature = "dict_builder", feature = "std"))]
912    use alloc::format;
913    use alloc::vec;
914
915    use super::FrameCompressor;
916    use crate::blocks::block::BlockType;
917    use crate::common::{MAGIC_NUM, MAX_BLOCK_SIZE};
918    use crate::decoding::{FrameDecoder, block_decoder, frame::read_frame_header};
919    use crate::encoding::{Matcher, Sequence};
920    use alloc::vec::Vec;
921
922    fn generate_data(seed: u64, len: usize) -> Vec<u8> {
923        let mut state = seed;
924        let mut data = Vec::with_capacity(len);
925        for _ in 0..len {
926            state = state
927                .wrapping_mul(6364136223846793005)
928                .wrapping_add(1442695040888963407);
929            data.push((state >> 33) as u8);
930        }
931        data
932    }
933
934    fn first_block_type(frame: &[u8]) -> BlockType {
935        let (_, header_size) = read_frame_header(frame).expect("frame header should parse");
936        let mut decoder = block_decoder::new();
937        let (header, _) = decoder
938            .read_block_header(&frame[header_size as usize..])
939            .expect("block header should parse");
940        header.block_type
941    }
942
943    /// Frame content size is written correctly and C zstd can decompress the output.
944    #[cfg(feature = "std")]
945    #[test]
946    fn fcs_header_written_and_c_zstd_compatible() {
947        let levels = [
948            crate::encoding::CompressionLevel::Uncompressed,
949            crate::encoding::CompressionLevel::Fastest,
950            crate::encoding::CompressionLevel::Default,
951            crate::encoding::CompressionLevel::Better,
952            crate::encoding::CompressionLevel::Best,
953        ];
954        let fcs_2byte = vec![0xCDu8; 300]; // 300 bytes → 2-byte FCS (256..=65791 range)
955        let large = vec![0xABu8; 100_000];
956        let inputs: [&[u8]; 5] = [
957            &[],
958            &[0x00],
959            b"abcdefghijklmnopqrstuvwxy\n",
960            &fcs_2byte,
961            &large,
962        ];
963        for level in levels {
964            for data in &inputs {
965                let compressed = crate::encoding::compress_to_vec(*data, level);
966                // Verify FCS is present and correct
967                let header = crate::decoding::frame::read_frame_header(compressed.as_slice())
968                    .unwrap()
969                    .0;
970                assert_eq!(
971                    header.frame_content_size(),
972                    data.len() as u64,
973                    "FCS mismatch for len={} level={:?}",
974                    data.len(),
975                    level,
976                );
977                // Confirm the FCS field is actually present in the header
978                // (not just the decoder returning 0 for absent FCS).
979                assert_ne!(
980                    header.descriptor.frame_content_size_bytes().unwrap(),
981                    0,
982                    "FCS field must be present for len={} level={:?}",
983                    data.len(),
984                    level,
985                );
986                // Verify C zstd can decompress
987                let mut decoded = Vec::new();
988                zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
989                    |e| {
990                        panic!(
991                            "C zstd decode failed for len={} level={level:?}: {e}",
992                            data.len()
993                        )
994                    },
995                );
996                assert_eq!(
997                    decoded.as_slice(),
998                    *data,
999                    "C zstd roundtrip failed for len={}",
1000                    data.len()
1001                );
1002            }
1003        }
1004    }
1005
1006    #[cfg(feature = "std")]
1007    #[test]
1008    fn source_size_hint_fastest_remains_ffi_compatible_small_input() {
1009        let data = vec![0xAB; 2047];
1010        let compressed = {
1011            let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1012            compressor.set_source_size_hint(data.len() as u64);
1013            compressor.set_source(data.as_slice());
1014            let mut out = Vec::new();
1015            compressor.set_drain(&mut out);
1016            compressor.compress();
1017            out
1018        };
1019
1020        let mut decoded = Vec::new();
1021        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1022        assert_eq!(decoded, data);
1023    }
1024
1025    #[cfg(feature = "std")]
1026    #[test]
1027    fn small_hinted_default_frame_uses_single_segment_header() {
1028        let data = generate_data(0xD15E_A5ED, 1024);
1029        let compressed = {
1030            let mut compressor = FrameCompressor::new(super::CompressionLevel::Default);
1031            compressor.set_source_size_hint(data.len() as u64);
1032            compressor.set_source(data.as_slice());
1033            let mut out = Vec::new();
1034            compressor.set_drain(&mut out);
1035            compressor.compress();
1036            out
1037        };
1038
1039        let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1040        assert!(
1041            frame_header.descriptor.single_segment_flag(),
1042            "small hinted default frames should use single-segment header for Rust/FFI parity"
1043        );
1044        assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1045        let mut decoded = Vec::new();
1046        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded)
1047            .expect("ffi decoder must accept single-segment small hinted default frame");
1048        assert_eq!(decoded, data);
1049    }
1050
1051    #[cfg(feature = "std")]
1052    #[test]
1053    fn small_hinted_numeric_default_levels_use_single_segment_header() {
1054        let data = generate_data(0xA11C_E003, 1024);
1055        for level in [
1056            super::CompressionLevel::Level(0),
1057            super::CompressionLevel::Level(3),
1058        ] {
1059            let compressed = {
1060                let mut compressor = FrameCompressor::new(level);
1061                compressor.set_source_size_hint(data.len() as u64);
1062                compressor.set_source(data.as_slice());
1063                let mut out = Vec::new();
1064                compressor.set_drain(&mut out);
1065                compressor.compress();
1066                out
1067            };
1068
1069            let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1070            assert!(
1071                frame_header.descriptor.single_segment_flag(),
1072                "small hinted numeric default level frames should use single-segment header (level={level:?})"
1073            );
1074            assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1075            let mut decoded = Vec::new();
1076            zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(|e| {
1077                panic!(
1078                    "ffi decoder must accept single-segment small hinted numeric default level frame (level={level:?}): {e}"
1079                )
1080            });
1081            assert_eq!(decoded, data);
1082        }
1083    }
1084
1085    #[cfg(feature = "std")]
1086    #[test]
1087    fn source_size_hint_levels_remain_ffi_compatible_small_inputs_matrix() {
1088        let levels = [
1089            super::CompressionLevel::Fastest,
1090            super::CompressionLevel::Default,
1091            super::CompressionLevel::Better,
1092            super::CompressionLevel::Best,
1093            super::CompressionLevel::Level(-1),
1094            super::CompressionLevel::Level(2),
1095            super::CompressionLevel::Level(3),
1096            super::CompressionLevel::Level(4),
1097            super::CompressionLevel::Level(11),
1098        ];
1099        let sizes = [
1100            511usize, 512, 513, 1023, 1024, 1536, 2047, 2048, 4095, 4096, 8191, 16_384, 16_385,
1101        ];
1102
1103        for (seed_idx, seed) in [11u64, 23, 41].into_iter().enumerate() {
1104            for &size in &sizes {
1105                let data = generate_data(seed + seed_idx as u64, size);
1106                for &level in &levels {
1107                    let compressed = {
1108                        let mut compressor = FrameCompressor::new(level);
1109                        compressor.set_source_size_hint(data.len() as u64);
1110                        compressor.set_source(data.as_slice());
1111                        let mut out = Vec::new();
1112                        compressor.set_drain(&mut out);
1113                        compressor.compress();
1114                        out
1115                    };
1116                    if matches!(size, 511 | 512) {
1117                        let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1118                        assert_eq!(
1119                            frame_header.descriptor.single_segment_flag(),
1120                            size == 512,
1121                            "single_segment 511/512 boundary mismatch: level={level:?} size={size}"
1122                        );
1123                    }
1124
1125                    let mut decoded = Vec::new();
1126                    zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
1127                        |e| {
1128                            panic!(
1129                                "ffi decode failed with source-size hint: level={level:?} size={size} seed={} err={e}",
1130                                seed + seed_idx as u64
1131                            )
1132                        },
1133                    );
1134                    assert_eq!(
1135                        decoded,
1136                        data,
1137                        "hinted ffi roundtrip mismatch: level={level:?} size={size} seed={}",
1138                        seed + seed_idx as u64
1139                    );
1140                }
1141            }
1142        }
1143    }
1144
1145    #[cfg(feature = "std")]
1146    #[test]
1147    fn hinted_levels_use_single_segment_header_symmetrically() {
1148        let levels = [
1149            super::CompressionLevel::Fastest,
1150            super::CompressionLevel::Default,
1151            super::CompressionLevel::Better,
1152            super::CompressionLevel::Best,
1153            super::CompressionLevel::Level(0),
1154            super::CompressionLevel::Level(2),
1155            super::CompressionLevel::Level(3),
1156            super::CompressionLevel::Level(4),
1157            super::CompressionLevel::Level(11),
1158        ];
1159        for (seed_idx, seed) in [7u64, 23, 41].into_iter().enumerate() {
1160            let size = 1024 + seed_idx * 97;
1161            let data = generate_data(seed, size);
1162            for &level in &levels {
1163                let compressed = {
1164                    let mut compressor = FrameCompressor::new(level);
1165                    compressor.set_source_size_hint(data.len() as u64);
1166                    compressor.set_source(data.as_slice());
1167                    let mut out = Vec::new();
1168                    compressor.set_drain(&mut out);
1169                    compressor.compress();
1170                    out
1171                };
1172                let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1173                assert!(
1174                    frame_header.descriptor.single_segment_flag(),
1175                    "hinted frame should be single-segment for level={level:?} size={}",
1176                    data.len()
1177                );
1178                assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1179                let mut decoded = Vec::new();
1180                zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(|e| {
1181                    panic!(
1182                        "ffi decode failed for hinted single-segment parity: level={level:?} size={} err={e}",
1183                        data.len()
1184                    )
1185                });
1186                assert_eq!(decoded, data);
1187            }
1188        }
1189    }
1190
1191    #[cfg(feature = "std")]
1192    #[test]
1193    fn hinted_levels_pin_511_512_single_segment_boundary() {
1194        let levels = [
1195            super::CompressionLevel::Fastest,
1196            super::CompressionLevel::Default,
1197            super::CompressionLevel::Better,
1198            super::CompressionLevel::Best,
1199            super::CompressionLevel::Level(0),
1200            super::CompressionLevel::Level(2),
1201            super::CompressionLevel::Level(3),
1202            super::CompressionLevel::Level(4),
1203            super::CompressionLevel::Level(11),
1204        ];
1205        for (seed_idx, seed) in [7u64, 23, 41].into_iter().enumerate() {
1206            for &size in &[511usize, 512] {
1207                let data = generate_data(seed + seed_idx as u64, size);
1208                for &level in &levels {
1209                    let compressed = {
1210                        let mut compressor = FrameCompressor::new(level);
1211                        compressor.set_source_size_hint(data.len() as u64);
1212                        compressor.set_source(data.as_slice());
1213                        let mut out = Vec::new();
1214                        compressor.set_drain(&mut out);
1215                        compressor.compress();
1216                        out
1217                    };
1218                    let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1219                    assert_eq!(
1220                        frame_header.descriptor.single_segment_flag(),
1221                        size == 512,
1222                        "single_segment 511/512 boundary mismatch: level={level:?} size={size}"
1223                    );
1224                    let mut decoded = Vec::new();
1225                    zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
1226                        |e| {
1227                            panic!(
1228                                "ffi decode failed at single-segment boundary: level={level:?} size={size} seed={} err={e}",
1229                                seed + seed_idx as u64
1230                            )
1231                        },
1232                    );
1233                    assert_eq!(decoded, data);
1234                }
1235            }
1236        }
1237    }
1238
1239    #[cfg(feature = "std")]
1240    #[test]
1241    fn fastest_random_block_uses_raw_fast_path() {
1242        let data = generate_data(0xC0FF_EE11, 10 * 1024);
1243        let compressed =
1244            crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Fastest);
1245
1246        assert_eq!(first_block_type(&compressed), BlockType::Raw);
1247
1248        let mut decoded = Vec::new();
1249        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1250        assert_eq!(decoded, data);
1251    }
1252
1253    #[cfg(feature = "std")]
1254    #[test]
1255    fn default_random_block_uses_raw_fast_path() {
1256        let data = generate_data(0xD15E_A5ED, 10 * 1024);
1257        let compressed =
1258            crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Default);
1259
1260        assert_eq!(first_block_type(&compressed), BlockType::Raw);
1261
1262        let mut decoded = Vec::new();
1263        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1264        assert_eq!(decoded, data);
1265    }
1266
1267    #[cfg(feature = "std")]
1268    #[test]
1269    fn best_random_block_uses_raw_fast_path() {
1270        let data = generate_data(0xB35C_AFE1, 10 * 1024);
1271        let compressed =
1272            crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Best);
1273
1274        assert_eq!(first_block_type(&compressed), BlockType::Raw);
1275
1276        let mut decoded = Vec::new();
1277        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1278        assert_eq!(decoded, data);
1279    }
1280
1281    #[cfg(feature = "std")]
1282    #[test]
1283    fn level2_random_block_uses_raw_fast_path() {
1284        let data = generate_data(0xA11C_E222, 10 * 1024);
1285        let compressed =
1286            crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Level(2));
1287
1288        assert_eq!(first_block_type(&compressed), BlockType::Raw);
1289
1290        let mut decoded = Vec::new();
1291        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1292        assert_eq!(decoded, data);
1293    }
1294
1295    #[cfg(feature = "std")]
1296    #[test]
1297    fn better_random_block_uses_raw_fast_path() {
1298        let data = generate_data(0xBE77_E111, 10 * 1024);
1299        let compressed =
1300            crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Better);
1301
1302        assert_eq!(first_block_type(&compressed), BlockType::Raw);
1303
1304        let mut decoded = Vec::new();
1305        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1306        assert_eq!(decoded, data);
1307    }
1308
1309    #[cfg(feature = "std")]
1310    #[test]
1311    fn compressible_logs_do_not_fall_back_to_raw_fast_path() {
1312        let mut data = Vec::with_capacity(16 * 1024);
1313        const LINE: &[u8] =
1314            b"ts=2026-04-10T00:00:00Z level=INFO tenant=demo op=flush table=orders\n";
1315        while data.len() < 16 * 1024 {
1316            let remaining = 16 * 1024 - data.len();
1317            data.extend_from_slice(&LINE[..LINE.len().min(remaining)]);
1318        }
1319
1320        fn assert_not_raw_for_level(data: &[u8], level: super::CompressionLevel) {
1321            let compressed = crate::encoding::compress_to_vec(data, level);
1322            assert_ne!(first_block_type(&compressed), BlockType::Raw);
1323            assert!(
1324                compressed.len() < data.len(),
1325                "compressible input should remain compressible for level={level:?}"
1326            );
1327            let mut decoded = Vec::new();
1328            zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1329            assert_eq!(decoded, data);
1330        }
1331
1332        assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Fastest);
1333        assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Default);
1334        assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Level(3));
1335        assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Better);
1336        assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Best);
1337    }
1338
1339    #[cfg(feature = "std")]
1340    #[test]
1341    fn hinted_small_compressible_frames_use_single_segment_across_levels() {
1342        let mut data = Vec::with_capacity(4 * 1024);
1343        const LINE: &[u8] =
1344            b"ts=2026-04-10T00:00:00Z level=INFO tenant=demo op=flush table=orders\n";
1345        while data.len() < 4 * 1024 {
1346            let remaining = 4 * 1024 - data.len();
1347            data.extend_from_slice(&LINE[..LINE.len().min(remaining)]);
1348        }
1349
1350        for level in [
1351            super::CompressionLevel::Fastest,
1352            super::CompressionLevel::Default,
1353            super::CompressionLevel::Better,
1354            super::CompressionLevel::Best,
1355            super::CompressionLevel::Level(0),
1356            super::CompressionLevel::Level(3),
1357            super::CompressionLevel::Level(4),
1358            super::CompressionLevel::Level(11),
1359        ] {
1360            let compressed = {
1361                let mut compressor = FrameCompressor::new(level);
1362                compressor.set_source_size_hint(data.len() as u64);
1363                compressor.set_source(data.as_slice());
1364                let mut out = Vec::new();
1365                compressor.set_drain(&mut out);
1366                compressor.compress();
1367                out
1368            };
1369            let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1370            assert!(
1371                frame_header.descriptor.single_segment_flag(),
1372                "hinted small compressible frame should use single-segment (level={level:?})"
1373            );
1374            assert_ne!(
1375                first_block_type(&compressed),
1376                BlockType::Raw,
1377                "compressible hinted frame should stay off raw fast path (level={level:?})"
1378            );
1379            assert!(
1380                compressed.len() < data.len(),
1381                "compressible hinted frame should still shrink (level={level:?})"
1382            );
1383            let mut decoded = Vec::new();
1384            zstd::stream::copy_decode(compressed.as_slice(), &mut decoded)
1385                .unwrap_or_else(|e| panic!("ffi decode failed (level={level:?}): {e}"));
1386            assert_eq!(decoded, data);
1387        }
1388    }
1389
1390    struct NoDictionaryMatcher {
1391        last_space: Vec<u8>,
1392        window_size: u64,
1393    }
1394
1395    impl NoDictionaryMatcher {
1396        fn new(window_size: u64) -> Self {
1397            Self {
1398                last_space: Vec::new(),
1399                window_size,
1400            }
1401        }
1402    }
1403
1404    impl Matcher for NoDictionaryMatcher {
1405        fn get_next_space(&mut self) -> Vec<u8> {
1406            vec![0; self.window_size as usize]
1407        }
1408
1409        fn get_last_space(&mut self) -> &[u8] {
1410            self.last_space.as_slice()
1411        }
1412
1413        fn commit_space(&mut self, space: Vec<u8>) {
1414            self.last_space = space;
1415        }
1416
1417        fn skip_matching(&mut self) {}
1418
1419        fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1420            handle_sequence(Sequence::Literals {
1421                literals: self.last_space.as_slice(),
1422            });
1423        }
1424
1425        fn reset(&mut self, _level: super::CompressionLevel) {
1426            self.last_space.clear();
1427        }
1428
1429        fn window_size(&self) -> u64 {
1430            self.window_size
1431        }
1432    }
1433
1434    #[test]
1435    fn frame_starts_with_magic_num() {
1436        let mock_data = [1_u8, 2, 3].as_slice();
1437        let mut output: Vec<u8> = Vec::new();
1438        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1439        compressor.set_source(mock_data);
1440        compressor.set_drain(&mut output);
1441
1442        compressor.compress();
1443        assert!(output.starts_with(&MAGIC_NUM.to_le_bytes()));
1444    }
1445
1446    #[test]
1447    fn very_simple_raw_compress() {
1448        let mock_data = [1_u8, 2, 3].as_slice();
1449        let mut output: Vec<u8> = Vec::new();
1450        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1451        compressor.set_source(mock_data);
1452        compressor.set_drain(&mut output);
1453
1454        compressor.compress();
1455    }
1456
1457    #[test]
1458    fn very_simple_compress() {
1459        let mut mock_data = vec![0; 1 << 17];
1460        mock_data.extend(vec![1; (1 << 17) - 1]);
1461        mock_data.extend(vec![2; (1 << 18) - 1]);
1462        mock_data.extend(vec![2; 1 << 17]);
1463        mock_data.extend(vec![3; (1 << 17) - 1]);
1464        let mut output: Vec<u8> = Vec::new();
1465        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1466        compressor.set_source(mock_data.as_slice());
1467        compressor.set_drain(&mut output);
1468
1469        compressor.compress();
1470
1471        let mut decoder = FrameDecoder::new();
1472        let mut decoded = Vec::with_capacity(mock_data.len());
1473        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1474        assert_eq!(mock_data, decoded);
1475
1476        let mut decoded = Vec::new();
1477        zstd::stream::copy_decode(output.as_slice(), &mut decoded).unwrap();
1478        assert_eq!(mock_data, decoded);
1479    }
1480
1481    #[test]
1482    fn rle_compress() {
1483        let mock_data = vec![0; 1 << 19];
1484        let mut output: Vec<u8> = Vec::new();
1485        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1486        compressor.set_source(mock_data.as_slice());
1487        compressor.set_drain(&mut output);
1488
1489        compressor.compress();
1490
1491        let mut decoder = FrameDecoder::new();
1492        let mut decoded = Vec::with_capacity(mock_data.len());
1493        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1494        assert_eq!(mock_data, decoded);
1495    }
1496
1497    #[test]
1498    fn aaa_compress() {
1499        let mock_data = vec![0, 1, 3, 4, 5];
1500        let mut output: Vec<u8> = Vec::new();
1501        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1502        compressor.set_source(mock_data.as_slice());
1503        compressor.set_drain(&mut output);
1504
1505        compressor.compress();
1506
1507        let mut decoder = FrameDecoder::new();
1508        let mut decoded = Vec::with_capacity(mock_data.len());
1509        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1510        assert_eq!(mock_data, decoded);
1511
1512        let mut decoded = Vec::new();
1513        zstd::stream::copy_decode(output.as_slice(), &mut decoded).unwrap();
1514        assert_eq!(mock_data, decoded);
1515    }
1516
1517    #[test]
1518    fn dictionary_compression_sets_required_dict_id_and_roundtrips() {
1519        let dict_raw = include_bytes!("../../dict_tests/dictionary");
1520        let dict_for_encoder = crate::decoding::Dictionary::decode_dict(dict_raw).unwrap();
1521        let dict_for_decoder = crate::decoding::Dictionary::decode_dict(dict_raw).unwrap();
1522
1523        let mut data = Vec::new();
1524        for _ in 0..8 {
1525            data.extend_from_slice(&dict_for_decoder.dict_content[..2048]);
1526        }
1527
1528        let mut with_dict = Vec::new();
1529        let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1530        let previous = compressor
1531            .set_dictionary_from_bytes(dict_raw)
1532            .expect("dictionary bytes should parse");
1533        assert!(
1534            previous.is_none(),
1535            "first dictionary insert should return None"
1536        );
1537        assert_eq!(
1538            compressor
1539                .set_dictionary(dict_for_encoder)
1540                .expect("valid dictionary should attach")
1541                .expect("set_dictionary_from_bytes inserted previous dictionary")
1542                .id,
1543            dict_for_decoder.id
1544        );
1545        compressor.set_source(data.as_slice());
1546        compressor.set_drain(&mut with_dict);
1547        compressor.compress();
1548
1549        let (frame_header, _) = crate::decoding::frame::read_frame_header(with_dict.as_slice())
1550            .expect("encoded stream should have a frame header");
1551        assert_eq!(frame_header.dictionary_id(), Some(dict_for_decoder.id));
1552
1553        let mut decoder = FrameDecoder::new();
1554        let mut missing_dict_target = Vec::with_capacity(data.len());
1555        let err = decoder
1556            .decode_all_to_vec(&with_dict, &mut missing_dict_target)
1557            .unwrap_err();
1558        assert!(
1559            matches!(
1560                &err,
1561                crate::decoding::errors::FrameDecoderError::DictNotProvided { .. }
1562            ),
1563            "dict-compressed stream should require dictionary id, got: {err:?}"
1564        );
1565
1566        let mut decoder = FrameDecoder::new();
1567        decoder.add_dict(dict_for_decoder).unwrap();
1568        let mut decoded = Vec::with_capacity(data.len());
1569        decoder.decode_all_to_vec(&with_dict, &mut decoded).unwrap();
1570        assert_eq!(decoded, data);
1571
1572        let mut ffi_decoder = zstd::bulk::Decompressor::with_dictionary(dict_raw).unwrap();
1573        let mut ffi_decoded = Vec::with_capacity(data.len());
1574        let ffi_written = ffi_decoder
1575            .decompress_to_buffer(with_dict.as_slice(), &mut ffi_decoded)
1576            .unwrap();
1577        assert_eq!(ffi_written, data.len());
1578        assert_eq!(ffi_decoded, data);
1579    }
1580
1581    #[cfg(all(feature = "dict_builder", feature = "std"))]
1582    #[test]
1583    fn dictionary_compression_roundtrips_with_dict_builder_dictionary() {
1584        use std::io::Cursor;
1585
1586        let mut training = Vec::new();
1587        for idx in 0..256u32 {
1588            training.extend_from_slice(
1589                format!("tenant=demo table=orders key={idx} region=eu\n").as_bytes(),
1590            );
1591        }
1592        let mut raw_dict = Vec::new();
1593        crate::dictionary::create_raw_dict_from_source(
1594            Cursor::new(training.as_slice()),
1595            training.len(),
1596            &mut raw_dict,
1597            4096,
1598        )
1599        .expect("dict_builder training should succeed");
1600        assert!(
1601            !raw_dict.is_empty(),
1602            "dict_builder produced an empty dictionary"
1603        );
1604
1605        let dict_id = 0xD1C7_0008;
1606        let encoder_dict =
1607            crate::decoding::Dictionary::from_raw_content(dict_id, raw_dict.clone()).unwrap();
1608        let decoder_dict =
1609            crate::decoding::Dictionary::from_raw_content(dict_id, raw_dict.clone()).unwrap();
1610
1611        let mut payload = Vec::new();
1612        for idx in 0..96u32 {
1613            payload.extend_from_slice(
1614                format!(
1615                    "tenant=demo table=orders op=put key={idx} value=aaaaabbbbbcccccdddddeeeee\n"
1616                )
1617                .as_bytes(),
1618            );
1619        }
1620
1621        let mut without_dict = Vec::new();
1622        let mut baseline = FrameCompressor::new(super::CompressionLevel::Fastest);
1623        baseline.set_source(payload.as_slice());
1624        baseline.set_drain(&mut without_dict);
1625        baseline.compress();
1626
1627        let mut with_dict = Vec::new();
1628        let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1629        compressor
1630            .set_dictionary(encoder_dict)
1631            .expect("valid dict_builder dictionary should attach");
1632        compressor.set_source(payload.as_slice());
1633        compressor.set_drain(&mut with_dict);
1634        compressor.compress();
1635
1636        let (frame_header, _) = crate::decoding::frame::read_frame_header(with_dict.as_slice())
1637            .expect("encoded stream should have a frame header");
1638        assert_eq!(frame_header.dictionary_id(), Some(dict_id));
1639        let mut decoder = FrameDecoder::new();
1640        decoder.add_dict(decoder_dict).unwrap();
1641        let mut decoded = Vec::with_capacity(payload.len());
1642        decoder.decode_all_to_vec(&with_dict, &mut decoded).unwrap();
1643        assert_eq!(decoded, payload);
1644        assert!(
1645            with_dict.len() < without_dict.len(),
1646            "trained dictionary should improve compression for this small payload"
1647        );
1648    }
1649
1650    #[test]
1651    fn set_dictionary_from_bytes_seeds_entropy_tables_for_first_block() {
1652        let dict_raw = include_bytes!("../../dict_tests/dictionary");
1653        let mut output = Vec::new();
1654        let input = b"";
1655
1656        let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1657        let previous = compressor
1658            .set_dictionary_from_bytes(dict_raw)
1659            .expect("dictionary bytes should parse");
1660        assert!(previous.is_none());
1661
1662        compressor.set_source(input.as_slice());
1663        compressor.set_drain(&mut output);
1664        compressor.compress();
1665
1666        assert!(
1667            compressor.state.last_huff_table.is_some(),
1668            "dictionary entropy should seed previous huffman table before first block"
1669        );
1670        assert!(
1671            compressor.state.fse_tables.ll_previous.is_some(),
1672            "dictionary entropy should seed previous ll table before first block"
1673        );
1674        assert!(
1675            compressor.state.fse_tables.ml_previous.is_some(),
1676            "dictionary entropy should seed previous ml table before first block"
1677        );
1678        assert!(
1679            compressor.state.fse_tables.of_previous.is_some(),
1680            "dictionary entropy should seed previous of table before first block"
1681        );
1682    }
1683
1684    #[test]
1685    fn set_dictionary_rejects_zero_dictionary_id() {
1686        let invalid = crate::decoding::Dictionary {
1687            id: 0,
1688            fse: crate::decoding::scratch::FSEScratch::new(),
1689            huf: crate::decoding::scratch::HuffmanScratch::new(),
1690            dict_content: vec![1, 2, 3],
1691            offset_hist: [1, 4, 8],
1692        };
1693
1694        let mut compressor: FrameCompressor<
1695            &[u8],
1696            Vec<u8>,
1697            crate::encoding::match_generator::MatchGeneratorDriver,
1698        > = FrameCompressor::new(super::CompressionLevel::Fastest);
1699        let result = compressor.set_dictionary(invalid);
1700        assert!(matches!(
1701            result,
1702            Err(crate::decoding::errors::DictionaryDecodeError::ZeroDictionaryId)
1703        ));
1704    }
1705
1706    #[test]
1707    fn set_dictionary_rejects_zero_repeat_offsets() {
1708        let invalid = crate::decoding::Dictionary {
1709            id: 1,
1710            fse: crate::decoding::scratch::FSEScratch::new(),
1711            huf: crate::decoding::scratch::HuffmanScratch::new(),
1712            dict_content: vec![1, 2, 3],
1713            offset_hist: [0, 4, 8],
1714        };
1715
1716        let mut compressor: FrameCompressor<
1717            &[u8],
1718            Vec<u8>,
1719            crate::encoding::match_generator::MatchGeneratorDriver,
1720        > = FrameCompressor::new(super::CompressionLevel::Fastest);
1721        let result = compressor.set_dictionary(invalid);
1722        assert!(matches!(
1723            result,
1724            Err(
1725                crate::decoding::errors::DictionaryDecodeError::ZeroRepeatOffsetInDictionary {
1726                    index: 0
1727                }
1728            )
1729        ));
1730    }
1731
1732    #[test]
1733    fn uncompressed_mode_does_not_require_dictionary() {
1734        let dict_id = 0xABCD_0001;
1735        let dict =
1736            crate::decoding::Dictionary::from_raw_content(dict_id, b"shared-history".to_vec())
1737                .expect("raw dictionary should be valid");
1738
1739        let payload = b"plain-bytes-that-should-stay-raw";
1740        let mut output = Vec::new();
1741        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1742        compressor
1743            .set_dictionary(dict)
1744            .expect("dictionary should attach in uncompressed mode");
1745        compressor.set_source(payload.as_slice());
1746        compressor.set_drain(&mut output);
1747        compressor.compress();
1748
1749        let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1750            .expect("encoded frame should have a header");
1751        assert_eq!(
1752            frame_header.dictionary_id(),
1753            None,
1754            "raw/uncompressed frames must not advertise dictionary dependency"
1755        );
1756
1757        let mut decoder = FrameDecoder::new();
1758        let mut decoded = Vec::with_capacity(payload.len());
1759        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1760        assert_eq!(decoded, payload);
1761    }
1762
1763    #[test]
1764    fn dictionary_roundtrip_stays_valid_after_output_exceeds_window() {
1765        use crate::encoding::match_generator::MatchGeneratorDriver;
1766
1767        let dict_id = 0xABCD_0002;
1768        let dict = crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1769            .expect("raw dictionary should be valid");
1770        let dict_for_decoder =
1771            crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1772                .expect("raw dictionary should be valid");
1773
1774        // Payload must exceed the encoder's advertised window (512 KiB
1775        // for Fastest after `window_log = 19` alignment with donor's
1776        // L1 fast row in `clevels.h`) so the test actually exercises
1777        // cross-window-boundary behavior.
1778        let payload = b"abcdefgh".repeat(512 * 1024 / 8 + 64);
1779        let matcher = MatchGeneratorDriver::new(1024, 1);
1780
1781        let mut no_dict_output = Vec::new();
1782        let mut no_dict_compressor =
1783            FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1784        no_dict_compressor.set_source(payload.as_slice());
1785        no_dict_compressor.set_drain(&mut no_dict_output);
1786        no_dict_compressor.compress();
1787        let (no_dict_frame_header, _) =
1788            crate::decoding::frame::read_frame_header(no_dict_output.as_slice())
1789                .expect("baseline frame should have a header");
1790        let no_dict_window = no_dict_frame_header
1791            .window_size()
1792            .expect("window size should be present");
1793
1794        let mut output = Vec::new();
1795        let matcher = MatchGeneratorDriver::new(1024, 1);
1796        let mut compressor =
1797            FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1798        compressor
1799            .set_dictionary(dict)
1800            .expect("dictionary should attach");
1801        compressor.set_source(payload.as_slice());
1802        compressor.set_drain(&mut output);
1803        compressor.compress();
1804
1805        let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1806            .expect("encoded frame should have a header");
1807        let advertised_window = frame_header
1808            .window_size()
1809            .expect("window size should be present");
1810        assert_eq!(
1811            advertised_window, no_dict_window,
1812            "dictionary priming must not inflate advertised window size"
1813        );
1814        assert!(
1815            payload.len() > advertised_window as usize,
1816            "test must cross the advertised window boundary"
1817        );
1818
1819        let mut decoder = FrameDecoder::new();
1820        decoder.add_dict(dict_for_decoder).unwrap();
1821        let mut decoded = Vec::with_capacity(payload.len());
1822        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1823        assert_eq!(decoded, payload);
1824    }
1825
1826    #[test]
1827    fn source_size_hint_with_dictionary_keeps_roundtrip_and_nonincreasing_window() {
1828        let dict_id = 0xABCD_0004;
1829        let dict_content = b"abcd".repeat(1024); // 4 KiB dictionary history
1830        let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap();
1831        let dict_for_decoder =
1832            crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap();
1833        let payload = b"abcdabcdabcdabcd".repeat(128);
1834
1835        let mut hinted_output = Vec::new();
1836        let mut hinted = FrameCompressor::new(super::CompressionLevel::Fastest);
1837        hinted.set_dictionary(dict).unwrap();
1838        hinted.set_source_size_hint(1);
1839        hinted.set_source(payload.as_slice());
1840        hinted.set_drain(&mut hinted_output);
1841        hinted.compress();
1842
1843        let mut no_hint_output = Vec::new();
1844        let mut no_hint = FrameCompressor::new(super::CompressionLevel::Fastest);
1845        no_hint
1846            .set_dictionary(
1847                crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024))
1848                    .unwrap(),
1849            )
1850            .unwrap();
1851        no_hint.set_source(payload.as_slice());
1852        no_hint.set_drain(&mut no_hint_output);
1853        no_hint.compress();
1854
1855        let hinted_window = crate::decoding::frame::read_frame_header(hinted_output.as_slice())
1856            .expect("encoded frame should have a header")
1857            .0
1858            .window_size()
1859            .expect("window size should be present");
1860        let no_hint_window = crate::decoding::frame::read_frame_header(no_hint_output.as_slice())
1861            .expect("encoded frame should have a header")
1862            .0
1863            .window_size()
1864            .expect("window size should be present");
1865        assert!(
1866            hinted_window <= no_hint_window,
1867            "source-size hint should not increase advertised window with dictionary priming",
1868        );
1869
1870        let mut decoder = FrameDecoder::new();
1871        decoder.add_dict(dict_for_decoder).unwrap();
1872        let mut decoded = Vec::with_capacity(payload.len());
1873        decoder
1874            .decode_all_to_vec(&hinted_output, &mut decoded)
1875            .unwrap();
1876        assert_eq!(decoded, payload);
1877    }
1878
1879    #[test]
1880    fn source_size_hint_with_dictionary_keeps_roundtrip_for_larger_payload() {
1881        let dict_id = 0xABCD_0005;
1882        let dict_content = b"abcd".repeat(1024); // 4 KiB dictionary history
1883        let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap();
1884        let dict_for_decoder =
1885            crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap();
1886        let payload = b"abcd".repeat(1024); // 4 KiB payload
1887        let payload_len = payload.len() as u64;
1888
1889        let mut hinted_output = Vec::new();
1890        let mut hinted = FrameCompressor::new(super::CompressionLevel::Fastest);
1891        hinted.set_dictionary(dict).unwrap();
1892        hinted.set_source_size_hint(payload_len);
1893        hinted.set_source(payload.as_slice());
1894        hinted.set_drain(&mut hinted_output);
1895        hinted.compress();
1896
1897        let mut no_hint_output = Vec::new();
1898        let mut no_hint = FrameCompressor::new(super::CompressionLevel::Fastest);
1899        no_hint
1900            .set_dictionary(
1901                crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024))
1902                    .unwrap(),
1903            )
1904            .unwrap();
1905        no_hint.set_source(payload.as_slice());
1906        no_hint.set_drain(&mut no_hint_output);
1907        no_hint.compress();
1908
1909        let hinted_window = crate::decoding::frame::read_frame_header(hinted_output.as_slice())
1910            .expect("encoded frame should have a header")
1911            .0
1912            .window_size()
1913            .expect("window size should be present");
1914        let no_hint_window = crate::decoding::frame::read_frame_header(no_hint_output.as_slice())
1915            .expect("encoded frame should have a header")
1916            .0
1917            .window_size()
1918            .expect("window size should be present");
1919        assert!(
1920            hinted_window <= no_hint_window,
1921            "source-size hint should not increase advertised window with dictionary priming",
1922        );
1923
1924        let mut decoder = FrameDecoder::new();
1925        decoder.add_dict(dict_for_decoder).unwrap();
1926        let mut decoded = Vec::with_capacity(payload.len());
1927        decoder
1928            .decode_all_to_vec(&hinted_output, &mut decoded)
1929            .unwrap();
1930        assert_eq!(decoded, payload);
1931    }
1932
1933    #[test]
1934    fn custom_matcher_without_dictionary_priming_does_not_advertise_dict_id() {
1935        let dict_id = 0xABCD_0003;
1936        let dict = crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1937            .expect("raw dictionary should be valid");
1938        let payload = b"abcdefghabcdefgh";
1939
1940        let mut output = Vec::new();
1941        let matcher = NoDictionaryMatcher::new(64);
1942        let mut compressor =
1943            FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1944        compressor
1945            .set_dictionary(dict)
1946            .expect("dictionary should attach");
1947        compressor.set_source(payload.as_slice());
1948        compressor.set_drain(&mut output);
1949        compressor.compress();
1950
1951        let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1952            .expect("encoded frame should have a header");
1953        assert_eq!(
1954            frame_header.dictionary_id(),
1955            None,
1956            "matchers that do not support dictionary priming must not advertise dictionary dependency"
1957        );
1958
1959        let mut decoder = FrameDecoder::new();
1960        let mut decoded = Vec::with_capacity(payload.len());
1961        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1962        assert_eq!(decoded, payload);
1963    }
1964
1965    #[cfg(feature = "hash")]
1966    #[test]
1967    fn checksum_two_frames_reused_compressor() {
1968        // Compress the same data twice using the same compressor and verify that:
1969        // 1. The checksum written in each frame matches what the decoder calculates.
1970        // 2. The hasher is correctly reset between frames (no cross-contamination).
1971        //    If the hasher were NOT reset, the second frame's calculated checksum
1972        //    would differ from the one stored in the frame data, causing assert_eq to fail.
1973        let data: Vec<u8> = (0u8..=255).cycle().take(1024).collect();
1974
1975        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1976
1977        // --- Frame 1 ---
1978        let mut compressed1 = Vec::new();
1979        compressor.set_source(data.as_slice());
1980        compressor.set_drain(&mut compressed1);
1981        compressor.compress();
1982
1983        // --- Frame 2 (reuse the same compressor) ---
1984        let mut compressed2 = Vec::new();
1985        compressor.set_source(data.as_slice());
1986        compressor.set_drain(&mut compressed2);
1987        compressor.compress();
1988
1989        fn decode_and_collect(compressed: &[u8]) -> (Vec<u8>, Option<u32>, Option<u32>) {
1990            let mut decoder = FrameDecoder::new();
1991            let mut source = compressed;
1992            decoder.reset(&mut source).unwrap();
1993            while !decoder.is_finished() {
1994                decoder
1995                    .decode_blocks(&mut source, crate::decoding::BlockDecodingStrategy::All)
1996                    .unwrap();
1997            }
1998            let mut decoded = Vec::new();
1999            decoder.collect_to_writer(&mut decoded).unwrap();
2000            (
2001                decoded,
2002                decoder.get_checksum_from_data(),
2003                decoder.get_calculated_checksum(),
2004            )
2005        }
2006
2007        let (decoded1, chksum_from_data1, chksum_calculated1) = decode_and_collect(&compressed1);
2008        assert_eq!(decoded1, data, "frame 1: decoded data mismatch");
2009        assert_eq!(
2010            chksum_from_data1, chksum_calculated1,
2011            "frame 1: checksum mismatch"
2012        );
2013
2014        let (decoded2, chksum_from_data2, chksum_calculated2) = decode_and_collect(&compressed2);
2015        assert_eq!(decoded2, data, "frame 2: decoded data mismatch");
2016        assert_eq!(
2017            chksum_from_data2, chksum_calculated2,
2018            "frame 2: checksum mismatch"
2019        );
2020
2021        // Same data compressed twice must produce the same checksum.
2022        // If state leaked across frames, the second calculated checksum would differ.
2023        assert_eq!(
2024            chksum_from_data1, chksum_from_data2,
2025            "frame 1 and frame 2 should have the same checksum (same data, hash must reset per frame)"
2026        );
2027    }
2028
2029    #[cfg(feature = "std")]
2030    #[test]
2031    fn fuzz_targets() {
2032        use std::io::Read;
2033        fn decode_szstd(data: &mut dyn std::io::Read) -> Vec<u8> {
2034            let mut decoder = crate::decoding::StreamingDecoder::new(data).unwrap();
2035            let mut result: Vec<u8> = Vec::new();
2036            decoder.read_to_end(&mut result).expect("Decoding failed");
2037            result
2038        }
2039
2040        fn decode_szstd_writer(mut data: impl Read) -> Vec<u8> {
2041            let mut decoder = crate::decoding::FrameDecoder::new();
2042            decoder.reset(&mut data).unwrap();
2043            let mut result = vec![];
2044            while !decoder.is_finished() || decoder.can_collect() > 0 {
2045                decoder
2046                    .decode_blocks(
2047                        &mut data,
2048                        crate::decoding::BlockDecodingStrategy::UptoBytes(1024 * 1024),
2049                    )
2050                    .unwrap();
2051                decoder.collect_to_writer(&mut result).unwrap();
2052            }
2053            result
2054        }
2055
2056        fn encode_zstd(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
2057            zstd::stream::encode_all(std::io::Cursor::new(data), 3)
2058        }
2059
2060        fn encode_szstd_uncompressed(data: &mut dyn std::io::Read) -> Vec<u8> {
2061            let mut input = Vec::new();
2062            data.read_to_end(&mut input).unwrap();
2063
2064            crate::encoding::compress_to_vec(
2065                input.as_slice(),
2066                crate::encoding::CompressionLevel::Uncompressed,
2067            )
2068        }
2069
2070        fn encode_szstd_compressed(data: &mut dyn std::io::Read) -> Vec<u8> {
2071            let mut input = Vec::new();
2072            data.read_to_end(&mut input).unwrap();
2073
2074            crate::encoding::compress_to_vec(
2075                input.as_slice(),
2076                crate::encoding::CompressionLevel::Fastest,
2077            )
2078        }
2079
2080        fn decode_zstd(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
2081            let mut output = Vec::new();
2082            zstd::stream::copy_decode(data, &mut output)?;
2083            Ok(output)
2084        }
2085        if std::fs::exists("fuzz/artifacts/interop").unwrap_or(false) {
2086            for file in std::fs::read_dir("fuzz/artifacts/interop").unwrap() {
2087                if file.as_ref().unwrap().file_type().unwrap().is_file() {
2088                    let data = std::fs::read(file.unwrap().path()).unwrap();
2089                    let data = data.as_slice();
2090                    // Decoding
2091                    let compressed = encode_zstd(data).unwrap();
2092                    let decoded = decode_szstd(&mut compressed.as_slice());
2093                    let decoded2 = decode_szstd_writer(&mut compressed.as_slice());
2094                    assert!(
2095                        decoded == data,
2096                        "Decoded data did not match the original input during decompression"
2097                    );
2098                    assert_eq!(
2099                        decoded2, data,
2100                        "Decoded data did not match the original input during decompression"
2101                    );
2102
2103                    // Encoding
2104                    // Uncompressed encoding
2105                    let mut input = data;
2106                    let compressed = encode_szstd_uncompressed(&mut input);
2107                    let decoded = decode_zstd(&compressed).unwrap();
2108                    assert_eq!(
2109                        decoded, data,
2110                        "Decoded data did not match the original input during compression"
2111                    );
2112                    // Compressed encoding
2113                    let mut input = data;
2114                    let compressed = encode_szstd_compressed(&mut input);
2115                    let decoded = decode_zstd(&compressed).unwrap();
2116                    assert_eq!(
2117                        decoded, data,
2118                        "Decoded data did not match the original input during compression"
2119                    );
2120                }
2121            }
2122        }
2123    }
2124
2125    /// Homogeneous input — every byte the same — must NOT be split:
2126    /// both border histograms are identical (all 512 hits on a single
2127    /// slot), so `presplit_fingerprints_differ` returns `false` and the
2128    /// function takes the early-return path at
2129    /// `zstd_preSplit.c:214` returning `blockSize`.
2130    #[test]
2131    fn donor_split_block_from_borders_keeps_homogeneous_block() {
2132        let block = vec![0xAAu8; MAX_BLOCK_SIZE as usize];
2133        let split = super::donor_split_block_from_borders(&block);
2134        assert_eq!(split, MAX_BLOCK_SIZE as usize);
2135    }
2136
2137    /// Heterogeneous input — first half all zeros, second half a
2138    /// counter sequence — has clearly distinguishable border
2139    /// histograms, so the borders heuristic decides to split.
2140    ///
2141    /// The transition sits at exactly the block midpoint, so the
2142    /// middle 512-byte sample (`block[mid-256..mid+256]`) is half
2143    /// zeros + half counter values. That makes it roughly
2144    /// equidistant from both border fingerprints — the
2145    /// `abs_diff(dist_from_begin, dist_from_end) < min_distance`
2146    /// branch fires and the heuristic returns the midpoint (64 KiB)
2147    /// per `zstd_preSplit.c:222`. The test asserts the exact value
2148    /// rather than just "one of {32K, 64K, 96K}" so a regression
2149    /// to a different quantised arm cannot silently slip through.
2150    #[test]
2151    fn donor_split_block_from_borders_returns_midpoint_for_centred_transition() {
2152        let mut block = vec![0u8; MAX_BLOCK_SIZE as usize];
2153        for (i, byte) in block
2154            .iter_mut()
2155            .enumerate()
2156            .skip(MAX_BLOCK_SIZE as usize / 2)
2157        {
2158            *byte = (i % 251 + 1) as u8;
2159        }
2160        let split = super::donor_split_block_from_borders(&block);
2161        assert_eq!(
2162            split,
2163            64 * 1024,
2164            "centred-transition fixture must take the symmetric \
2165             midpoint arm (`abs_diff < min_distance`), got {split}"
2166        );
2167    }
2168
2169    /// `donor_pre_split_level` maps mid-range levels to the cheap
2170    /// borders heuristic and high levels to the byChunks path. Levels
2171    /// below 11 stay unsplit so the splitter never runs on fast /
2172    /// default presets where its per-block cost would dominate.
2173    #[test]
2174    fn donor_pre_split_level_dispatches_by_compression_level() {
2175        use crate::encoding::CompressionLevel;
2176        assert_eq!(
2177            super::donor_pre_split_level(CompressionLevel::Fastest),
2178            None
2179        );
2180        assert_eq!(
2181            super::donor_pre_split_level(CompressionLevel::Default),
2182            None
2183        );
2184        assert_eq!(super::donor_pre_split_level(CompressionLevel::Better), None);
2185        assert_eq!(
2186            super::donor_pre_split_level(CompressionLevel::Level(7)),
2187            None
2188        );
2189        assert_eq!(
2190            super::donor_pre_split_level(CompressionLevel::Level(11)),
2191            Some(0)
2192        );
2193        assert_eq!(
2194            super::donor_pre_split_level(CompressionLevel::Level(15)),
2195            Some(0)
2196        );
2197        assert_eq!(
2198            super::donor_pre_split_level(CompressionLevel::Level(16)),
2199            Some(4)
2200        );
2201        assert_eq!(
2202            super::donor_pre_split_level(CompressionLevel::Level(22)),
2203            Some(4)
2204        );
2205    }
2206
2207    /// End-to-end: a 256 KB heterogeneous payload compressed at
2208    /// Level(13) (borders heuristic active) round-trips through the
2209    /// crate's own decoder. The pre-split path runs over the first
2210    /// 128 KB block and emits two consecutive sub-blocks; the second
2211    /// 128 KB block goes through the splitter on its own. The test
2212    /// proves the split decisions do not corrupt the frame bitstream.
2213    #[test]
2214    fn level_13_borders_split_roundtrips_through_own_decoder() {
2215        use crate::encoding::CompressionLevel;
2216        let mut data = vec![0u8; 256 * 1024];
2217        // First 128 KB: low-entropy repeating run; second 128 KB:
2218        // counter sequence — clearly distinct border histograms.
2219        for (i, byte) in data.iter_mut().enumerate() {
2220            *byte = if i < 128 * 1024 {
2221                (i & 0x07) as u8
2222            } else {
2223                (i % 251 + 1) as u8
2224            };
2225        }
2226
2227        let mut compressed = Vec::new();
2228        let mut compressor = FrameCompressor::new(CompressionLevel::Level(13));
2229        compressor.set_source(data.as_slice());
2230        compressor.set_drain(&mut compressed);
2231        compressor.compress();
2232
2233        let mut decoder = FrameDecoder::new();
2234        let mut source = compressed.as_slice();
2235        decoder
2236            .reset(&mut source)
2237            .expect("frame header should parse");
2238        while !decoder.is_finished() {
2239            decoder
2240                .decode_blocks(&mut source, crate::decoding::BlockDecodingStrategy::All)
2241                .expect("decode should succeed");
2242        }
2243        let mut decoded = Vec::with_capacity(data.len());
2244        decoder.collect_to_writer(&mut decoded).unwrap();
2245        assert_eq!(decoded, data, "roundtrip must reproduce the input verbatim");
2246    }
2247
2248    /// Regression: `set_compression_level` followed by `compress()` must
2249    /// refresh `state.strategy_tag` through the reset-time sync so the
2250    /// literal-compression gates (`min_literals_to_compress`,
2251    /// `min_gain`) use the NEW level's strategy. Picks a level pair
2252    /// that genuinely crosses strategy bands — `Fastest` resolves to
2253    /// `Fast`, `Level(20)` resolves to `BtUltra2` — so a missed sync
2254    /// would leave the construction-time tag visible and trip the
2255    /// assertion. `CompressionLevel::Best` would also pass type-wise
2256    /// but resolves to `Lazy` today, which keeps `min_literals_to_compress`
2257    /// in the same `shift=3 → 64-byte` band as `Fast` and weakens the
2258    /// signal that the gate floor actually moved.
2259    #[cfg(feature = "std")]
2260    #[test]
2261    fn set_compression_level_then_compress_refreshes_strategy_tag() {
2262        use super::CompressionLevel;
2263        use crate::encoding::strategy::StrategyTag;
2264
2265        let data = vec![0xABu8; 256];
2266        let mut out = Vec::new();
2267        let mut compressor = FrameCompressor::new(CompressionLevel::Fastest);
2268        let initial_tag = compressor.state.strategy_tag;
2269        assert_eq!(
2270            initial_tag,
2271            StrategyTag::for_compression_level(CompressionLevel::Fastest),
2272            "construction-time strategy_tag must reflect initial level",
2273        );
2274
2275        // Switch to a level whose resolved strategy lives in a different
2276        // band, then run a full compress cycle — the matcher.reset()
2277        // inside `compress` is the only site that can refresh the tag.
2278        let new_level = CompressionLevel::Level(20);
2279        compressor.set_compression_level(new_level);
2280        compressor.set_source(data.as_slice());
2281        compressor.set_drain(&mut out);
2282        compressor.compress();
2283
2284        let new_tag = compressor.state.strategy_tag;
2285        let expected = StrategyTag::for_compression_level(new_level);
2286        assert_eq!(
2287            new_tag, expected,
2288            "strategy_tag must follow set_compression_level → compress, \
2289             got {new_tag:?} expected {expected:?}",
2290        );
2291        assert_eq!(
2292            expected,
2293            StrategyTag::BtUltra2,
2294            "test fixture invariant: Level(20) must resolve to BtUltra2 \
2295             so the post-switch tag visibly crosses the band boundary",
2296        );
2297        assert_ne!(
2298            new_tag, initial_tag,
2299            "test fixture invariant: chosen levels must resolve to \
2300             different StrategyTag variants",
2301        );
2302    }
2303
2304    /// Magicless mode (`ZSTD_f_zstd1_magicless`): encoded frame
2305    /// MUST NOT start with the 4-byte magic prefix, AND must
2306    /// round-trip through a magicless-aware decoder.
2307    #[test]
2308    fn magicless_frame_omits_magic_and_roundtrips() {
2309        use crate::common::MAGIC_NUM;
2310        let input: alloc::vec::Vec<u8> = (0..512u32).map(|i| (i ^ 0xA5) as u8).collect();
2311
2312        // Encode with magicless = true.
2313        let mut output: Vec<u8> = Vec::new();
2314        let mut compressor = FrameCompressor::new(super::CompressionLevel::Default);
2315        compressor.set_magicless(true);
2316        compressor.set_source(input.as_slice());
2317        compressor.set_drain(&mut output);
2318        compressor.compress();
2319
2320        // 1. Encoded output must NOT begin with the zstd magic number.
2321        assert!(
2322            !output.starts_with(&MAGIC_NUM.to_le_bytes()),
2323            "magicless frame must omit the 4-byte magic prefix",
2324        );
2325
2326        // 2. A magicless-aware decoder must round-trip the payload.
2327        let mut decoder = crate::decoding::FrameDecoder::new();
2328        decoder.set_magicless(true);
2329        let mut cursor: &[u8] = output.as_slice();
2330        decoder.init(&mut cursor).expect("magicless init");
2331        decoder
2332            .decode_blocks(&mut cursor, crate::decoding::BlockDecodingStrategy::All)
2333            .expect("decode_blocks");
2334        let mut decoded: Vec<u8> = Vec::new();
2335        decoder
2336            .collect_to_writer(&mut decoded)
2337            .expect("collect_to_writer");
2338        assert_eq!(decoded, input, "magicless roundtrip must preserve bytes");
2339
2340        // 3. A standard (magicful) decoder MUST reject a magicless
2341        //    frame at the header-read step — the first 4 bytes are
2342        //    the frame-header descriptor + window / dictionary / FCS
2343        //    metadata, not the magic. We accept either
2344        //    `BadMagicNumber` (typical case: first 4 bytes don't
2345        //    match `MAGIC_NUM` and don't fall in the skippable-frame
2346        //    magic range) or `SkipFrame` (rare: the first 4 bytes
2347        //    coincidentally land in `0x184D2A50..=0x184D2A5F`). Both
2348        //    prove the standard decoder did not treat the bytes as a
2349        //    real magicful frame.
2350        use crate::decoding::errors::{FrameDecoderError, ReadFrameHeaderError};
2351        let mut std_decoder = crate::decoding::FrameDecoder::new();
2352        let std_init = std_decoder.init(output.as_slice());
2353        match std_init {
2354            Err(FrameDecoderError::ReadFrameHeaderError(
2355                ReadFrameHeaderError::BadMagicNumber(_) | ReadFrameHeaderError::SkipFrame { .. },
2356            )) => {}
2357            other => panic!(
2358                "standard decoder must reject a magicless frame with \
2359                 ReadFrameHeaderError::BadMagicNumber or SkipFrame, got {other:?}",
2360            ),
2361        }
2362    }
2363}