structured_zstd/encoding/
frame_compressor.rs

1//! Utilities and interfaces for encoding an entire frame. Allows reusing resources
2
3use alloc::{boxed::Box, vec::Vec};
4use core::convert::TryInto;
5#[cfg(feature = "hash")]
6use twox_hash::XxHash64;
7
8#[cfg(feature = "hash")]
9use core::hash::Hasher;
10
11use super::{
12    CompressionLevel, Matcher, block_header::BlockHeader, frame_header::FrameHeader, levels::*,
13    match_generator::MatchGeneratorDriver,
14};
15use crate::common::MAX_BLOCK_SIZE;
16use crate::fse::fse_encoder::{FSETable, default_ll_table, default_ml_table, default_of_table};
17
18use crate::io::{Read, Write};
19
20/// An interface for compressing arbitrary data with the ZStandard compression algorithm.
21///
22/// `FrameCompressor` will generally be used by:
23/// 1. Initializing a compressor by providing a buffer of data using `FrameCompressor::new()`
24/// 2. Starting compression and writing that compression into a vec using `FrameCompressor::begin`
25///
26/// # Examples
27/// ```
28/// use structured_zstd::encoding::{FrameCompressor, CompressionLevel};
29/// let mock_data: &[_] = &[0x1, 0x2, 0x3, 0x4];
30/// let mut output = std::vec::Vec::new();
31/// // Initialize a compressor.
32/// let mut compressor = FrameCompressor::new(CompressionLevel::Uncompressed);
33/// compressor.set_source(mock_data);
34/// compressor.set_drain(&mut output);
35///
36/// // `compress` writes the compressed output into the provided buffer.
37/// compressor.compress();
38/// ```
39pub struct FrameCompressor<R: Read, W: Write, M: Matcher> {
40    uncompressed_data: Option<R>,
41    compressed_data: Option<W>,
42    compression_level: CompressionLevel,
43    dictionary: Option<crate::decoding::Dictionary>,
44    dictionary_entropy_cache: Option<CachedDictionaryEntropy>,
45    source_size_hint: Option<u64>,
46    state: CompressState<M>,
47    /// When true, emitted frames omit the 4-byte magic number prefix
48    /// (`ZSTD_f_zstd1_magicless`). Default false. The caller is
49    /// responsible for ensuring the decoder is configured for the
50    /// matching format — wire-format only round-trips with a
51    /// magicless-aware decoder.
52    magicless: bool,
53    #[cfg(feature = "hash")]
54    hasher: XxHash64,
55}
56
57#[derive(Clone, Default)]
58struct CachedDictionaryEntropy {
59    huff: Option<crate::huff0::huff0_encoder::HuffmanTable>,
60    ll_previous: Option<PreviousFseTable>,
61    ml_previous: Option<PreviousFseTable>,
62    of_previous: Option<PreviousFseTable>,
63}
64
65#[derive(Clone)]
66pub(crate) enum PreviousFseTable {
67    // Default tables are immutable and already stored alongside the state, so
68    // repeating them only needs a lightweight marker instead of cloning FSETable.
69    Default,
70    Custom(Box<FSETable>),
71    Rle(u8),
72}
73
74impl PreviousFseTable {
75    pub(crate) fn as_table<'a>(&'a self, default: &'a FSETable) -> Option<&'a FSETable> {
76        match self {
77            Self::Default => Some(default),
78            Self::Custom(table) => Some(table),
79            Self::Rle(_) => None,
80        }
81    }
82}
83
84pub(crate) struct FseTables {
85    /// The three predefined LL/ML/OF tables are functions of
86    /// compile-time-constant distributions. The
87    /// [`fse_encoder::FseDefaultTable`] type alias resolves to
88    /// `&'static FSETable` when a process-wide cache is available
89    /// (atomic-pointer targets, or no-atomic targets with the
90    /// `critical-section` feature) and to `Box<FSETable>` on the
91    /// cache-less no-atomic path (one per-frame allocation, dropped
92    /// with the compressor — no `Box::leak`, no unbounded growth).
93    /// Both arms `Deref` to `FSETable`, so consumers in
94    /// `encoding/blocks/compressed.rs` borrow through `&` uniformly
95    /// without seeing the per-target divergence.
96    pub(crate) ll_default: crate::fse::fse_encoder::FseDefaultTable,
97    pub(crate) ll_previous: Option<PreviousFseTable>,
98    pub(crate) ml_default: crate::fse::fse_encoder::FseDefaultTable,
99    pub(crate) ml_previous: Option<PreviousFseTable>,
100    pub(crate) of_default: crate::fse::fse_encoder::FseDefaultTable,
101    pub(crate) of_previous: Option<PreviousFseTable>,
102}
103
104impl FseTables {
105    pub fn new() -> Self {
106        Self {
107            ll_default: default_ll_table(),
108            ll_previous: None,
109            ml_default: default_ml_table(),
110            ml_previous: None,
111            of_default: default_of_table(),
112            of_previous: None,
113        }
114    }
115
116    /// Borrow the LL default table as `&FSETable`. Abstracts the cfg
117    /// split in [`crate::fse::fse_encoder::FseDefaultTable`] —
118    /// `&'static FSETable` (atomic / `critical-section`) auto-derefs
119    /// directly; `Box<FSETable>` (cache-less no-atomic) derefs
120    /// through `Box`. Both arms yield `&FSETable` uniformly so
121    /// downstream consumers can stay cfg-agnostic.
122    #[inline]
123    #[allow(clippy::borrow_deref_ref)]
124    pub(crate) fn ll_default_ref(&self) -> &FSETable {
125        &*self.ll_default
126    }
127
128    /// Borrow the ML default table as `&FSETable`. See [`Self::ll_default_ref`].
129    #[inline]
130    #[allow(clippy::borrow_deref_ref)]
131    pub(crate) fn ml_default_ref(&self) -> &FSETable {
132        &*self.ml_default
133    }
134
135    /// Borrow the OF default table as `&FSETable`. See [`Self::ll_default_ref`].
136    #[inline]
137    #[allow(clippy::borrow_deref_ref)]
138    pub(crate) fn of_default_ref(&self) -> &FSETable {
139        &*self.of_default
140    }
141}
142
143const PRESPLIT_BLOCK_MIN: usize = 3500;
144const PRESPLIT_THRESHOLD_PENALTY_RATE: u64 = 16;
145const PRESPLIT_THRESHOLD_BASE: u64 = PRESPLIT_THRESHOLD_PENALTY_RATE - 2;
146const PRESPLIT_THRESHOLD_PENALTY: i32 = 3;
147const PRESPLIT_CHUNK_SIZE: usize = 8 << 10;
148const PRESPLIT_HASH_LOG_MAX: usize = 10;
149const PRESPLIT_HASH_TABLE_SIZE: usize = 1 << PRESPLIT_HASH_LOG_MAX;
150const PRESPLIT_KNUTH: u32 = 0x9E37_79B9;
151/// Donor `SEGMENT_SIZE` in `ZSTD_splitBlock_fromBorders` (`zstd_preSplit.c:201`).
152/// Two `SEGMENT_SIZE`-byte fingerprints — one from the start, one from the end —
153/// drive the cheap border heuristic; a third one from the middle disambiguates
154/// where in the block the transition sits.
155const PRESPLIT_BORDERS_SEGMENT: usize = 512;
156
157#[derive(Clone)]
158struct PreSplitFingerprint {
159    events: [u32; PRESPLIT_HASH_TABLE_SIZE],
160    nb_events: usize,
161}
162
163impl Default for PreSplitFingerprint {
164    fn default() -> Self {
165        Self {
166            events: [0; PRESPLIT_HASH_TABLE_SIZE],
167            nb_events: 0,
168        }
169    }
170}
171
172fn presplit_hash2(bytes: &[u8], hash_log: usize) -> usize {
173    debug_assert!(hash_log >= 8);
174    if hash_log == 8 {
175        return bytes[0] as usize;
176    }
177    debug_assert!(hash_log <= PRESPLIT_HASH_LOG_MAX);
178    let value = u16::from_le_bytes([bytes[0], bytes[1]]) as u32;
179    (value.wrapping_mul(PRESPLIT_KNUTH) >> (32 - hash_log)) as usize
180}
181
182fn presplit_record_fingerprint(
183    fp: &mut PreSplitFingerprint,
184    src: &[u8],
185    sampling_rate: usize,
186    hash_log: usize,
187) {
188    fp.events.fill(0);
189    fp.nb_events = 0;
190    if src.len() < 2 {
191        return;
192    }
193    let limit = src.len() - 1;
194    let mut n = 0usize;
195    while n < limit {
196        fp.events[presplit_hash2(&src[n..], hash_log)] += 1;
197        n += sampling_rate;
198    }
199    // Donor parity: zstd_preSplit.c records the integer division, not the
200    // rounded-up number of sampled events from the loop above.
201    fp.nb_events += limit / sampling_rate;
202}
203
204/// Single-byte histogram pass — matches donor `HIST_add` over a small
205/// segment with `hashLog == 8` (the `hash2` shortcut at
206/// `zstd_preSplit.c:36` returns the raw byte). The byChunks path uses
207/// 2-byte hashing for `hashLog >= 9`; this helper exists so the borders
208/// heuristic doesn't pay for that wider hash on its 512-byte windows.
209fn presplit_record_byte_histogram(fp: &mut PreSplitFingerprint, src: &[u8]) {
210    fp.events.fill(0);
211    for &b in src {
212        fp.events[b as usize] += 1;
213    }
214    // Donor `HIST_add` returns the maximum symbol; the caller then sets
215    // `nbEvents = SEGMENT_SIZE` explicitly (see `zstd_preSplit.c:213`).
216    fp.nb_events = src.len();
217}
218
219fn presplit_distance(lhs: &PreSplitFingerprint, rhs: &PreSplitFingerprint, hash_log: usize) -> u64 {
220    let slots = 1usize << hash_log;
221    let mut distance = 0u64;
222    for idx in 0..slots {
223        let left = lhs.events[idx] as i128 * rhs.nb_events as i128;
224        let right = rhs.events[idx] as i128 * lhs.nb_events as i128;
225        distance = distance.saturating_add(left.abs_diff(right) as u64);
226    }
227    distance
228}
229
230fn presplit_fingerprints_differ(
231    reference: &PreSplitFingerprint,
232    new_fp: &PreSplitFingerprint,
233    penalty: i32,
234    hash_log: usize,
235) -> bool {
236    debug_assert!(reference.nb_events > 0);
237    debug_assert!(new_fp.nb_events > 0);
238    let p50 = reference.nb_events as u64 * new_fp.nb_events as u64;
239    let deviation = presplit_distance(reference, new_fp, hash_log);
240    let threshold = p50.saturating_mul(PRESPLIT_THRESHOLD_BASE + penalty as u64)
241        / PRESPLIT_THRESHOLD_PENALTY_RATE;
242    deviation >= threshold
243}
244
245fn presplit_merge_events(acc: &mut PreSplitFingerprint, new_fp: &PreSplitFingerprint) {
246    for idx in 0..PRESPLIT_HASH_TABLE_SIZE {
247        acc.events[idx] = acc.events[idx].saturating_add(new_fp.events[idx]);
248    }
249    acc.nb_events = acc.nb_events.saturating_add(new_fp.nb_events);
250}
251
252fn donor_split_block_by_chunks(block: &[u8], level: usize) -> usize {
253    debug_assert_eq!(block.len(), MAX_BLOCK_SIZE as usize);
254    debug_assert!((1..=4).contains(&level));
255    let (sampling_rate, hash_log) = match level - 1 {
256        0 => (43, 8),
257        1 => (11, 9),
258        2 => (5, 10),
259        _ => (1, 10),
260    };
261
262    let mut past = PreSplitFingerprint::default();
263    let mut new_events = PreSplitFingerprint::default();
264    let mut penalty = PRESPLIT_THRESHOLD_PENALTY;
265    presplit_record_fingerprint(
266        &mut past,
267        &block[..PRESPLIT_CHUNK_SIZE],
268        sampling_rate,
269        hash_log,
270    );
271    let mut pos = PRESPLIT_CHUNK_SIZE;
272    while pos <= block.len() - PRESPLIT_CHUNK_SIZE {
273        presplit_record_fingerprint(
274            &mut new_events,
275            &block[pos..pos + PRESPLIT_CHUNK_SIZE],
276            sampling_rate,
277            hash_log,
278        );
279        if presplit_fingerprints_differ(&past, &new_events, penalty, hash_log) {
280            return pos;
281        }
282        presplit_merge_events(&mut past, &new_events);
283        if penalty > 0 {
284            penalty -= 1;
285        }
286        pos += PRESPLIT_CHUNK_SIZE;
287    }
288    block.len()
289}
290
291/// Donor port of `ZSTD_splitBlock_fromBorders` (`zstd_preSplit.c:198`).
292/// Records two 512-byte byte-histograms — one from each end of a 128 KB
293/// block — and a third from the middle as a tie-breaker; returns either
294/// a quantised split point (32 KB / 64 KB / 96 KB) or the full block
295/// size when the two ends look indistinguishable. Cheaper than the
296/// chunk-based path because it touches at most 1.5 KB of input
297/// regardless of block size.
298fn donor_split_block_from_borders(block: &[u8]) -> usize {
299    debug_assert_eq!(block.len(), MAX_BLOCK_SIZE as usize);
300    let block_size = block.len();
301    let mut past = PreSplitFingerprint::default();
302    let mut new_fp = PreSplitFingerprint::default();
303    presplit_record_byte_histogram(&mut past, &block[..PRESPLIT_BORDERS_SEGMENT]);
304    presplit_record_byte_histogram(&mut new_fp, &block[block_size - PRESPLIT_BORDERS_SEGMENT..]);
305    // Donor uses `penalty = 0, hash_log = 8` — i.e. raw byte histogram
306    // distance with no threshold padding (`zstd_preSplit.c:214`).
307    if !presplit_fingerprints_differ(&past, &new_fp, 0, 8) {
308        return block_size;
309    }
310
311    let mut middle = PreSplitFingerprint::default();
312    let mid_start = block_size / 2 - PRESPLIT_BORDERS_SEGMENT / 2;
313    presplit_record_byte_histogram(
314        &mut middle,
315        &block[mid_start..mid_start + PRESPLIT_BORDERS_SEGMENT],
316    );
317
318    let dist_from_begin = presplit_distance(&past, &middle, 8);
319    let dist_from_end = presplit_distance(&new_fp, &middle, 8);
320    // Donor `SEGMENT_SIZE * SEGMENT_SIZE / 3` (`zstd_preSplit.c:221`):
321    // if the middle is roughly equidistant from both ends, the change
322    // sits near the centre — split at the midpoint.
323    let min_distance = (PRESPLIT_BORDERS_SEGMENT as u64) * (PRESPLIT_BORDERS_SEGMENT as u64) / 3;
324    if dist_from_begin.abs_diff(dist_from_end) < min_distance {
325        return 64 * 1024;
326    }
327    // Larger `dist_from_begin` (i.e. `middle` farther from the head
328    // fingerprint, equivalently closer to the tail) means the new
329    // statistics already dominate the centre — the transition
330    // happened EARLY → emit a small 32 KB head and let the 96 KB
331    // tail absorb the rest. Inverse case: `dist_from_end` larger
332    // (middle still resembles the head) means the transition is
333    // LATE → emit a 96 KB head so the trailing 32 KB carries the
334    // new statistics alone.
335    if dist_from_begin > dist_from_end {
336        32 * 1024
337    } else {
338        96 * 1024
339    }
340}
341
342fn donor_pre_split_level(level: CompressionLevel) -> Option<usize> {
343    match level {
344        // Donor `ZSTD_blockSplitter_level` table (`clevels.h`): cheap
345        // borders heuristic for lazy2 / btlazy2 strategies (levels
346        // 11..=15) — the splitter still pays for itself on
347        // heterogeneous payloads but the per-block cost stays bounded
348        // by two 512-byte histograms.
349        CompressionLevel::Level(11..=15) => Some(0),
350        // C zstd's default splitter level for btopt/btultra/btultra2 is 4
351        // (`ZSTD_splitBlock_byChunks` with internal level 3 — sampling
352        // rate 1, `hashLog` 10).
353        CompressionLevel::Level(16..=22) => Some(4),
354        _ => None,
355    }
356}
357
358/// Bench-only entry point for the donor-parity comparator test in
359/// `tests/block_splitter_donor_parity.rs`. Dispatches to the same
360/// `_from_borders` (split_level == 0) / `_by_chunks` (split_level ∈
361/// 1..=4) ports that `donor_optimal_block_size` itself routes
362/// through. Caller is responsible for passing exactly
363/// `MAX_BLOCK_SIZE` bytes (per donor `ZSTD_splitBlock` contract —
364/// "@blockSize must be == 128 KB" in `zstd_preSplit.h`).
365#[cfg(feature = "bench_internals")]
366pub(crate) fn block_splitter_decision_for_bench(block: &[u8], split_level: usize) -> usize {
367    assert_eq!(
368        block.len(),
369        MAX_BLOCK_SIZE as usize,
370        "block_splitter_decision_for_bench expects exactly MAX_BLOCK_SIZE bytes"
371    );
372    assert!(
373        split_level <= 4,
374        "block_splitter_decision_for_bench: split_level must be in 0..=4, got {split_level}"
375    );
376    if split_level == 0 {
377        donor_split_block_from_borders(block)
378    } else {
379        donor_split_block_by_chunks(block, split_level)
380    }
381}
382
383pub(crate) fn donor_optimal_block_size(
384    level: CompressionLevel,
385    block: &[u8],
386    remaining_src_size: usize,
387    block_size_max: usize,
388    savings: i64,
389) -> usize {
390    let Some(split_level) = donor_pre_split_level(level) else {
391        return remaining_src_size.min(block_size_max);
392    };
393    if remaining_src_size < MAX_BLOCK_SIZE as usize || block_size_max < MAX_BLOCK_SIZE as usize {
394        return remaining_src_size.min(block_size_max);
395    }
396    if savings < 3 {
397        return MAX_BLOCK_SIZE as usize;
398    }
399    if block.len() < MAX_BLOCK_SIZE as usize {
400        return remaining_src_size.min(block_size_max);
401    }
402    // Donor `ZSTD_splitBlock` dispatch (`zstd_preSplit.c:234`):
403    // `split_level == 0` → cheap borders heuristic;
404    // `split_level == 1..=4` → byChunks with internal sampling level
405    // `split_level - 1`.
406    let raw_split = if split_level == 0 {
407        donor_split_block_from_borders(&block[..MAX_BLOCK_SIZE as usize])
408    } else {
409        donor_split_block_by_chunks(&block[..MAX_BLOCK_SIZE as usize], split_level)
410    };
411    raw_split
412        .max(PRESPLIT_BLOCK_MIN)
413        .min(MAX_BLOCK_SIZE as usize)
414}
415
416pub(crate) struct CompressState<M: Matcher> {
417    pub(crate) matcher: M,
418    pub(crate) last_huff_table: Option<crate::huff0::huff0_encoder::HuffmanTable>,
419    pub(crate) fse_tables: FseTables,
420    pub(crate) block_scratch: crate::encoding::blocks::CompressedBlockScratch,
421    /// Offset history for repeat offset encoding: [rep0, rep1, rep2].
422    /// Initialized to [1, 4, 8] per RFC 8878 §3.1.2.5.
423    pub(crate) offset_hist: [u32; 3],
424    /// Strategy tag resolved from the current `CompressionLevel` at every
425    /// `matcher.reset()` call. Used by the literal-compression gates
426    /// (`min_literals_to_compress`, `min_gain`) in
427    /// `encoding::blocks::compressed` to mirror donor's strategy-aware
428    /// thresholds (`zstd_compress_literals.c:114-127, 187-188`).
429    ///
430    /// **Invariant (required of every construction site):** must be
431    /// initialized from the active `CompressionLevel` via
432    /// `StrategyTag::for_compression_level`, and re-synced from the
433    /// active level alongside every `matcher.reset()` call so the
434    /// level-aware gates stay correct after a level change. The two
435    /// reset sites that own this sync are `FrameCompressor::compress`
436    /// and `StreamingEncoder::ensure_frame_started`. There is no
437    /// `Default` impl — production constructors
438    /// (`FrameCompressor::new`, `new_with_matcher`, the streaming
439    /// encoder constructor) plumb this explicitly. Tests that build
440    /// `CompressState` by hand must also supply a value.
441    pub(crate) strategy_tag: crate::encoding::strategy::StrategyTag,
442}
443
444impl<R: Read, W: Write> FrameCompressor<R, W, MatchGeneratorDriver> {
445    /// Create a new `FrameCompressor`
446    pub fn new(compression_level: CompressionLevel) -> Self {
447        Self {
448            uncompressed_data: None,
449            compressed_data: None,
450            compression_level,
451            dictionary: None,
452            dictionary_entropy_cache: None,
453            source_size_hint: None,
454            state: CompressState {
455                matcher: MatchGeneratorDriver::new(1024 * 128, 1),
456                last_huff_table: None,
457                fse_tables: FseTables::new(),
458                block_scratch: crate::encoding::blocks::CompressedBlockScratch::new(),
459                offset_hist: [1, 4, 8],
460                strategy_tag: crate::encoding::strategy::StrategyTag::for_compression_level(
461                    compression_level,
462                ),
463            },
464            magicless: false,
465            #[cfg(feature = "hash")]
466            hasher: XxHash64::with_seed(0),
467        }
468    }
469}
470
471impl<R: Read, W: Write, M: Matcher> FrameCompressor<R, W, M> {
472    /// Create a new `FrameCompressor` with a custom matching algorithm implementation
473    pub fn new_with_matcher(matcher: M, compression_level: CompressionLevel) -> Self {
474        Self {
475            uncompressed_data: None,
476            compressed_data: None,
477            dictionary: None,
478            dictionary_entropy_cache: None,
479            source_size_hint: None,
480            state: CompressState {
481                matcher,
482                last_huff_table: None,
483                fse_tables: FseTables::new(),
484                block_scratch: crate::encoding::blocks::CompressedBlockScratch::new(),
485                offset_hist: [1, 4, 8],
486                strategy_tag: crate::encoding::strategy::StrategyTag::for_compression_level(
487                    compression_level,
488                ),
489            },
490            compression_level,
491            magicless: false,
492            #[cfg(feature = "hash")]
493            hasher: XxHash64::with_seed(0),
494        }
495    }
496
497    /// Enable or disable magicless frame format (`ZSTD_f_zstd1_magicless`).
498    ///
499    /// When set to `true`, emitted frames omit the 4-byte magic number
500    /// prefix. The matching decoder must be configured to expect a
501    /// magicless stream — wire-format only round-trips with a
502    /// magicless-aware decoder.
503    pub fn set_magicless(&mut self, magicless: bool) {
504        self.magicless = magicless;
505    }
506
507    /// Before calling [FrameCompressor::compress] you need to set the source.
508    ///
509    /// This is the data that is compressed and written into the drain.
510    pub fn set_source(&mut self, uncompressed_data: R) -> Option<R> {
511        self.uncompressed_data.replace(uncompressed_data)
512    }
513
514    /// Before calling [FrameCompressor::compress] you need to set the drain.
515    ///
516    /// As the compressor compresses data, the drain serves as a place for the output to be writte.
517    pub fn set_drain(&mut self, compressed_data: W) -> Option<W> {
518        self.compressed_data.replace(compressed_data)
519    }
520
521    /// Provide a hint about the total uncompressed size for the next frame.
522    ///
523    /// When set, the encoder selects smaller hash tables and windows for
524    /// small inputs, matching the C zstd source-size-class behavior.
525    ///
526    /// This hint applies only to frame payload bytes (`size`). Dictionary
527    /// history is primed separately and does not inflate the hinted size or
528    /// advertised frame window.
529    /// Must be called before [`compress`](Self::compress).
530    pub fn set_source_size_hint(&mut self, size: u64) {
531        self.source_size_hint = Some(size);
532    }
533
534    /// Compress the uncompressed data from the provided source as one Zstd frame and write it to the provided drain
535    ///
536    /// This will repeatedly call [Read::read] on the source to fill up blocks until the source returns 0 on the read call.
537    /// All compressed blocks are buffered in memory so that the frame header can include the
538    /// `Frame_Content_Size` field (which requires knowing the total uncompressed size). The
539    /// entire frame — header, blocks, and optional checksum — is then written to the drain
540    /// at the end. This means peak memory usage is O(compressed_size).
541    ///
542    /// To avoid endlessly encoding from a potentially endless source (like a network socket) you can use the
543    /// [Read::take] function
544    pub fn compress(&mut self) {
545        let initial_size_hint = self.source_size_hint;
546        let source_size_hint_known = initial_size_hint.is_some();
547        let use_dictionary_state =
548            !matches!(self.compression_level, CompressionLevel::Uncompressed)
549                && self.state.matcher.supports_dictionary_priming()
550                && self.dictionary.is_some();
551        if let Some(size_hint) = self.source_size_hint.take() {
552            // Keep source-size hint scoped to payload bytes; dictionary priming
553            // is applied separately and should not force larger matcher sizing.
554            self.state.matcher.set_source_size_hint(size_hint);
555        }
556        // Clearing buffers to allow re-using of the compressor
557        self.state.matcher.reset(self.compression_level);
558        self.state.offset_hist = [1, 4, 8];
559        // Sync `state.strategy_tag` to the level resolved at this reset so
560        // the literal-compression gates (`min_literals_to_compress` /
561        // `min_gain` in `encoding::blocks::compressed`) see the correct
562        // strategy for the next frame. Frame-by-frame level changes go
563        // through this same `compress()` entry point, so re-syncing here
564        // covers level switches without touching the matcher dispatch.
565        self.state.strategy_tag =
566            crate::encoding::strategy::StrategyTag::for_compression_level(self.compression_level);
567        let cached_entropy = if use_dictionary_state {
568            self.dictionary_entropy_cache.as_ref()
569        } else {
570            None
571        };
572        if use_dictionary_state && let Some(dict) = self.dictionary.as_ref() {
573            // This state drives sequence encoding, while matcher priming below updates
574            // the match generator's internal repeat-offset history for match finding.
575            self.state.offset_hist = dict.offset_hist;
576            self.state
577                .matcher
578                .prime_with_dictionary(dict.dict_content.as_slice(), dict.offset_hist);
579        }
580        if let Some(cache) = cached_entropy {
581            self.state.last_huff_table.clone_from(&cache.huff);
582        } else {
583            self.state.last_huff_table = None;
584        }
585        // `clone_from` keeps frame-to-frame seeding cheap for reused compressors by
586        // reusing existing allocations where possible instead of reallocating every frame.
587        if let Some(cache) = cached_entropy {
588            self.state
589                .fse_tables
590                .ll_previous
591                .clone_from(&cache.ll_previous);
592            self.state
593                .fse_tables
594                .ml_previous
595                .clone_from(&cache.ml_previous);
596            self.state
597                .fse_tables
598                .of_previous
599                .clone_from(&cache.of_previous);
600        } else {
601            self.state.fse_tables.ll_previous = None;
602            self.state.fse_tables.ml_previous = None;
603            self.state.fse_tables.of_previous = None;
604        }
605        let ll_entropy = cached_entropy.and_then(|cache| match cache.ll_previous.as_ref() {
606            Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
607            _ => None,
608        });
609        let ml_entropy = cached_entropy.and_then(|cache| match cache.ml_previous.as_ref() {
610            Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
611            _ => None,
612        });
613        let of_entropy = cached_entropy.and_then(|cache| match cache.of_previous.as_ref() {
614            Some(PreviousFseTable::Custom(table)) => Some(table.as_ref()),
615            _ => None,
616        });
617        self.state.matcher.seed_dictionary_entropy(
618            self.state.last_huff_table.as_ref(),
619            ll_entropy,
620            ml_entropy,
621            of_entropy,
622        );
623        #[cfg(feature = "hash")]
624        {
625            self.hasher = XxHash64::with_seed(0);
626        }
627        let source = self.uncompressed_data.as_mut().unwrap();
628        let drain = self.compressed_data.as_mut().unwrap();
629        let window_size = self.state.matcher.window_size();
630        assert!(
631            window_size != 0,
632            "matcher reported window_size == 0, which is invalid"
633        );
634        // Accumulate all compressed blocks; the frame header is written
635        // after all input has been read so that Frame_Content_Size is
636        // known. The default seed is one donor block; smaller seeds for
637        // small payloads avoid pinning a full block worth of bytes when
638        // the compressed output fits in a few hundred bytes. For larger
639        // inputs the default seed amortises the first few `Vec::extend`
640        // doublings cheaply and the `peak - default_seed` residue is
641        // dominated by internal `compress_block_encoded` buffers anyway,
642        // so changing it produces no measurable savings.
643        //
644        // Seed-size tiers (mirrors donor `ZSTD_CStreamOutSize` naming):
645        //
646        // * `ALL_BLOCKS_TINY_CAP` — payload ≤ this size, seed equals
647        //   payload bound; ≥ everything compressed output could need
648        //   for a tiny input.
649        // * `ALL_BLOCKS_SMALL_CAP` — small-input seed picked to absorb
650        //   one or two doublings without over-allocating.
651        // * `ALL_BLOCKS_DEFAULT_CAP` — one donor block; the value the
652        //   rest of the encoder is sized around.
653        const ALL_BLOCKS_TINY_THRESHOLD: u64 = 4 * 1024;
654        const ALL_BLOCKS_SMALL_THRESHOLD: u64 = 64 * 1024;
655        const ALL_BLOCKS_TINY_CAP: usize = 4 * 1024;
656        const ALL_BLOCKS_SMALL_CAP: usize = 16 * 1024;
657        const ALL_BLOCKS_DEFAULT_CAP: usize = 130 * 1024;
658        let initial_all_blocks_cap = match initial_size_hint {
659            Some(h) if h <= ALL_BLOCKS_TINY_THRESHOLD => ALL_BLOCKS_TINY_CAP,
660            Some(h) if h <= ALL_BLOCKS_SMALL_THRESHOLD => ALL_BLOCKS_SMALL_CAP,
661            _ => ALL_BLOCKS_DEFAULT_CAP,
662        };
663        let mut all_blocks: Vec<u8> = Vec::with_capacity(initial_all_blocks_cap);
664        let mut total_uncompressed: u64 = 0;
665        let mut pending_input: Vec<u8> = Vec::new();
666        let mut reached_eof = false;
667        let mut savings = 0i64;
668        // Compress block by block
669        loop {
670            // Read up to one donor block. When the pre-block splitter keeps a
671            // suffix, top it back up before compressing the next block, matching
672            // ZSTD_compress_frameChunk() over a contiguous input buffer.
673            let block_capacity = MAX_BLOCK_SIZE as usize;
674            let had_pending = !pending_input.is_empty();
675            let mut uncompressed_data = if had_pending {
676                core::mem::take(&mut pending_input)
677            } else {
678                self.state.matcher.get_next_space()
679            };
680            let mut filled = if had_pending {
681                uncompressed_data.len()
682            } else {
683                0
684            };
685            if uncompressed_data.len() < block_capacity {
686                uncompressed_data.resize(block_capacity, 0);
687            }
688            'read_loop: loop {
689                if reached_eof || filled == block_capacity {
690                    break 'read_loop;
691                }
692                let new_bytes = source
693                    .read(&mut uncompressed_data[filled..block_capacity])
694                    .unwrap();
695                if new_bytes == 0 {
696                    reached_eof = true;
697                    break 'read_loop;
698                }
699                filled += new_bytes;
700                total_uncompressed += new_bytes as u64;
701            }
702            uncompressed_data.truncate(filled);
703            let mut last_block = reached_eof;
704            let remaining_for_split = if reached_eof {
705                uncompressed_data.len()
706            } else {
707                block_capacity
708            };
709            if !matches!(self.compression_level, CompressionLevel::Uncompressed)
710                && uncompressed_data.len() == block_capacity
711            {
712                let block_len = donor_optimal_block_size(
713                    self.compression_level,
714                    &uncompressed_data,
715                    remaining_for_split,
716                    block_capacity,
717                    savings,
718                );
719                if block_len < uncompressed_data.len() {
720                    pending_input = uncompressed_data.split_off(block_len);
721                    // `split_off` returns a Vec whose capacity is typically
722                    // close to its length. Next iteration's `had_pending`
723                    // branch moves `pending_input` into `uncompressed_data`
724                    // and resizes to `block_capacity`, which would reallocate
725                    // from scratch on every pre-split. Pre-reserve here so
726                    // the resize stays in-place.
727                    if pending_input.capacity() < block_capacity {
728                        pending_input.reserve_exact(block_capacity - pending_input.len());
729                    }
730                    last_block = false;
731                }
732            }
733            // As we read, hash that data too
734            #[cfg(feature = "hash")]
735            self.hasher.write(&uncompressed_data);
736            // Special handling is needed for compression of a totally empty file
737            if uncompressed_data.is_empty() {
738                let header = BlockHeader {
739                    last_block: true,
740                    block_type: crate::blocks::block::BlockType::Raw,
741                    block_size: 0,
742                };
743                header.serialize(&mut all_blocks);
744                break;
745            }
746
747            match self.compression_level {
748                CompressionLevel::Uncompressed => {
749                    let header = BlockHeader {
750                        last_block,
751                        block_type: crate::blocks::block::BlockType::Raw,
752                        block_size: uncompressed_data.len().try_into().unwrap(),
753                    };
754                    header.serialize(&mut all_blocks);
755                    all_blocks.extend_from_slice(&uncompressed_data);
756                    savings +=
757                        uncompressed_data.len() as i64 - (3 + uncompressed_data.len()) as i64;
758                }
759                CompressionLevel::Fastest
760                | CompressionLevel::Default
761                | CompressionLevel::Better
762                | CompressionLevel::Best
763                | CompressionLevel::Level(_) => {
764                    let before_len = all_blocks.len();
765                    let block_len = uncompressed_data.len();
766                    compress_block_encoded(
767                        &mut self.state,
768                        self.compression_level,
769                        last_block,
770                        uncompressed_data,
771                        &mut all_blocks,
772                    );
773                    savings += block_len as i64 - (all_blocks.len() - before_len) as i64;
774                }
775            }
776            if last_block && pending_input.is_empty() {
777                break;
778            }
779        }
780
781        // Now that total_uncompressed is known, write the frame header with FCS.
782        // Match the donor framing policy for pledged one-shot inputs: use a
783        // single-segment frame whenever the source fits the active window.
784        let single_segment = !use_dictionary_state
785            && source_size_hint_known
786            && total_uncompressed >= 512
787            && total_uncompressed <= window_size;
788        let header = FrameHeader {
789            frame_content_size: Some(total_uncompressed),
790            single_segment,
791            content_checksum: cfg!(feature = "hash"),
792            dictionary_id: if use_dictionary_state {
793                self.dictionary.as_ref().map(|dict| dict.id as u64)
794            } else {
795                None
796            },
797            window_size: if single_segment {
798                None
799            } else {
800                Some(window_size)
801            },
802            magicless: self.magicless,
803        };
804        // Write the frame header and compressed blocks separately to avoid
805        // shifting the entire `all_blocks` buffer to prepend the header.
806        let mut header_buf: Vec<u8> = Vec::with_capacity(14);
807        header.serialize(&mut header_buf);
808        drain.write_all(&header_buf).unwrap();
809        drain.write_all(&all_blocks).unwrap();
810
811        // If the `hash` feature is enabled, then `content_checksum` is set to true in the header
812        // and a 32 bit hash is written at the end of the data.
813        #[cfg(feature = "hash")]
814        {
815            // Because we only have the data as a reader, we need to read all of it to calculate the checksum
816            // Possible TODO: create a wrapper around self.uncompressed data that hashes the data as it's read?
817            let content_checksum = self.hasher.finish();
818            drain
819                .write_all(&(content_checksum as u32).to_le_bytes())
820                .unwrap();
821        }
822    }
823
824    /// Get a mutable reference to the source
825    pub fn source_mut(&mut self) -> Option<&mut R> {
826        self.uncompressed_data.as_mut()
827    }
828
829    /// Get a mutable reference to the drain
830    pub fn drain_mut(&mut self) -> Option<&mut W> {
831        self.compressed_data.as_mut()
832    }
833
834    /// Get a reference to the source
835    pub fn source(&self) -> Option<&R> {
836        self.uncompressed_data.as_ref()
837    }
838
839    /// Get a reference to the drain
840    pub fn drain(&self) -> Option<&W> {
841        self.compressed_data.as_ref()
842    }
843
844    /// Retrieve the source
845    pub fn take_source(&mut self) -> Option<R> {
846        self.uncompressed_data.take()
847    }
848
849    /// Retrieve the drain
850    pub fn take_drain(&mut self) -> Option<W> {
851        self.compressed_data.take()
852    }
853
854    /// Before calling [FrameCompressor::compress] you can replace the matcher
855    pub fn replace_matcher(&mut self, mut match_generator: M) -> M {
856        core::mem::swap(&mut match_generator, &mut self.state.matcher);
857        match_generator
858    }
859
860    /// Before calling [FrameCompressor::compress] you can replace the compression level
861    pub fn set_compression_level(
862        &mut self,
863        compression_level: CompressionLevel,
864    ) -> CompressionLevel {
865        let old = self.compression_level;
866        self.compression_level = compression_level;
867        old
868    }
869
870    /// Get the current compression level
871    pub fn compression_level(&self) -> CompressionLevel {
872        self.compression_level
873    }
874
875    /// Attach a pre-parsed dictionary to be used for subsequent compressions.
876    ///
877    /// In compressed modes, the dictionary id is written only when the active
878    /// matcher supports dictionary priming.
879    /// Uncompressed mode and non-priming matchers ignore the attached dictionary
880    /// at encode time.
881    pub fn set_dictionary(
882        &mut self,
883        dictionary: crate::decoding::Dictionary,
884    ) -> Result<Option<crate::decoding::Dictionary>, crate::decoding::errors::DictionaryDecodeError>
885    {
886        if dictionary.id == 0 {
887            return Err(crate::decoding::errors::DictionaryDecodeError::ZeroDictionaryId);
888        }
889        if let Some(index) = dictionary.offset_hist.iter().position(|&rep| rep == 0) {
890            return Err(
891                crate::decoding::errors::DictionaryDecodeError::ZeroRepeatOffsetInDictionary {
892                    index: index as u8,
893                },
894            );
895        }
896        self.dictionary_entropy_cache = Some(CachedDictionaryEntropy {
897            huff: dictionary.huf.table.to_encoder_table(),
898            ll_previous: dictionary
899                .fse
900                .literal_lengths
901                .to_encoder_table()
902                .map(|table| PreviousFseTable::Custom(Box::new(table))),
903            ml_previous: dictionary
904                .fse
905                .match_lengths
906                .to_encoder_table()
907                .map(|table| PreviousFseTable::Custom(Box::new(table))),
908            of_previous: dictionary
909                .fse
910                .offsets
911                .to_encoder_table()
912                .map(|table| PreviousFseTable::Custom(Box::new(table))),
913        });
914        Ok(self.dictionary.replace(dictionary))
915    }
916
917    /// Parse and attach a serialized dictionary blob.
918    pub fn set_dictionary_from_bytes(
919        &mut self,
920        raw_dictionary: &[u8],
921    ) -> Result<Option<crate::decoding::Dictionary>, crate::decoding::errors::DictionaryDecodeError>
922    {
923        let dictionary = crate::decoding::Dictionary::decode_dict(raw_dictionary)?;
924        self.set_dictionary(dictionary)
925    }
926
927    /// Remove the attached dictionary.
928    pub fn clear_dictionary(&mut self) -> Option<crate::decoding::Dictionary> {
929        self.dictionary_entropy_cache = None;
930        self.dictionary.take()
931    }
932}
933
934#[cfg(test)]
935mod tests {
936    #[cfg(all(feature = "dict_builder", feature = "std"))]
937    use alloc::format;
938    use alloc::vec;
939
940    use super::FrameCompressor;
941    use crate::blocks::block::BlockType;
942    use crate::common::{MAGIC_NUM, MAX_BLOCK_SIZE};
943    use crate::decoding::{FrameDecoder, block_decoder, frame::read_frame_header};
944    use crate::encoding::{Matcher, Sequence};
945    use alloc::vec::Vec;
946
947    fn generate_data(seed: u64, len: usize) -> Vec<u8> {
948        let mut state = seed;
949        let mut data = Vec::with_capacity(len);
950        for _ in 0..len {
951            state = state
952                .wrapping_mul(6364136223846793005)
953                .wrapping_add(1442695040888963407);
954            data.push((state >> 33) as u8);
955        }
956        data
957    }
958
959    fn first_block_type(frame: &[u8]) -> BlockType {
960        let (_, header_size) = read_frame_header(frame).expect("frame header should parse");
961        let mut decoder = block_decoder::new();
962        let (header, _) = decoder
963            .read_block_header(&frame[header_size as usize..])
964            .expect("block header should parse");
965        header.block_type
966    }
967
968    /// Frame content size is written correctly and C zstd can decompress the output.
969    #[cfg(feature = "std")]
970    #[test]
971    fn fcs_header_written_and_c_zstd_compatible() {
972        let levels = [
973            crate::encoding::CompressionLevel::Uncompressed,
974            crate::encoding::CompressionLevel::Fastest,
975            crate::encoding::CompressionLevel::Default,
976            crate::encoding::CompressionLevel::Better,
977            crate::encoding::CompressionLevel::Best,
978        ];
979        let fcs_2byte = vec![0xCDu8; 300]; // 300 bytes → 2-byte FCS (256..=65791 range)
980        let large = vec![0xABu8; 100_000];
981        let inputs: [&[u8]; 5] = [
982            &[],
983            &[0x00],
984            b"abcdefghijklmnopqrstuvwxy\n",
985            &fcs_2byte,
986            &large,
987        ];
988        for level in levels {
989            for data in &inputs {
990                let compressed = crate::encoding::compress_to_vec(*data, level);
991                // Verify FCS is present and correct
992                let header = crate::decoding::frame::read_frame_header(compressed.as_slice())
993                    .unwrap()
994                    .0;
995                assert_eq!(
996                    header.frame_content_size(),
997                    data.len() as u64,
998                    "FCS mismatch for len={} level={:?}",
999                    data.len(),
1000                    level,
1001                );
1002                // Confirm the FCS field is actually present in the header
1003                // (not just the decoder returning 0 for absent FCS).
1004                assert_ne!(
1005                    header.descriptor.frame_content_size_bytes().unwrap(),
1006                    0,
1007                    "FCS field must be present for len={} level={:?}",
1008                    data.len(),
1009                    level,
1010                );
1011                // Verify C zstd can decompress
1012                let mut decoded = Vec::new();
1013                zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
1014                    |e| {
1015                        panic!(
1016                            "C zstd decode failed for len={} level={level:?}: {e}",
1017                            data.len()
1018                        )
1019                    },
1020                );
1021                assert_eq!(
1022                    decoded.as_slice(),
1023                    *data,
1024                    "C zstd roundtrip failed for len={}",
1025                    data.len()
1026                );
1027            }
1028        }
1029    }
1030
1031    #[cfg(feature = "std")]
1032    #[test]
1033    fn source_size_hint_fastest_remains_ffi_compatible_small_input() {
1034        let data = vec![0xAB; 2047];
1035        let compressed = {
1036            let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1037            compressor.set_source_size_hint(data.len() as u64);
1038            compressor.set_source(data.as_slice());
1039            let mut out = Vec::new();
1040            compressor.set_drain(&mut out);
1041            compressor.compress();
1042            out
1043        };
1044
1045        let mut decoded = Vec::new();
1046        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1047        assert_eq!(decoded, data);
1048    }
1049
1050    #[cfg(feature = "std")]
1051    #[test]
1052    fn small_hinted_default_frame_uses_single_segment_header() {
1053        let data = generate_data(0xD15E_A5ED, 1024);
1054        let compressed = {
1055            let mut compressor = FrameCompressor::new(super::CompressionLevel::Default);
1056            compressor.set_source_size_hint(data.len() as u64);
1057            compressor.set_source(data.as_slice());
1058            let mut out = Vec::new();
1059            compressor.set_drain(&mut out);
1060            compressor.compress();
1061            out
1062        };
1063
1064        let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1065        assert!(
1066            frame_header.descriptor.single_segment_flag(),
1067            "small hinted default frames should use single-segment header for Rust/FFI parity"
1068        );
1069        assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1070        let mut decoded = Vec::new();
1071        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded)
1072            .expect("ffi decoder must accept single-segment small hinted default frame");
1073        assert_eq!(decoded, data);
1074    }
1075
1076    #[cfg(feature = "std")]
1077    #[test]
1078    fn small_hinted_numeric_default_levels_use_single_segment_header() {
1079        let data = generate_data(0xA11C_E003, 1024);
1080        for level in [
1081            super::CompressionLevel::Level(0),
1082            super::CompressionLevel::Level(3),
1083        ] {
1084            let compressed = {
1085                let mut compressor = FrameCompressor::new(level);
1086                compressor.set_source_size_hint(data.len() as u64);
1087                compressor.set_source(data.as_slice());
1088                let mut out = Vec::new();
1089                compressor.set_drain(&mut out);
1090                compressor.compress();
1091                out
1092            };
1093
1094            let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1095            assert!(
1096                frame_header.descriptor.single_segment_flag(),
1097                "small hinted numeric default level frames should use single-segment header (level={level:?})"
1098            );
1099            assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1100            let mut decoded = Vec::new();
1101            zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(|e| {
1102                panic!(
1103                    "ffi decoder must accept single-segment small hinted numeric default level frame (level={level:?}): {e}"
1104                )
1105            });
1106            assert_eq!(decoded, data);
1107        }
1108    }
1109
1110    #[cfg(feature = "std")]
1111    #[test]
1112    fn source_size_hint_levels_remain_ffi_compatible_small_inputs_matrix() {
1113        let levels = [
1114            super::CompressionLevel::Fastest,
1115            super::CompressionLevel::Default,
1116            super::CompressionLevel::Better,
1117            super::CompressionLevel::Best,
1118            super::CompressionLevel::Level(-1),
1119            super::CompressionLevel::Level(2),
1120            super::CompressionLevel::Level(3),
1121            super::CompressionLevel::Level(4),
1122            super::CompressionLevel::Level(11),
1123        ];
1124        let sizes = [
1125            511usize, 512, 513, 1023, 1024, 1536, 2047, 2048, 4095, 4096, 8191, 16_384, 16_385,
1126        ];
1127
1128        for (seed_idx, seed) in [11u64, 23, 41].into_iter().enumerate() {
1129            for &size in &sizes {
1130                let data = generate_data(seed + seed_idx as u64, size);
1131                for &level in &levels {
1132                    let compressed = {
1133                        let mut compressor = FrameCompressor::new(level);
1134                        compressor.set_source_size_hint(data.len() as u64);
1135                        compressor.set_source(data.as_slice());
1136                        let mut out = Vec::new();
1137                        compressor.set_drain(&mut out);
1138                        compressor.compress();
1139                        out
1140                    };
1141                    if matches!(size, 511 | 512) {
1142                        let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1143                        assert_eq!(
1144                            frame_header.descriptor.single_segment_flag(),
1145                            size == 512,
1146                            "single_segment 511/512 boundary mismatch: level={level:?} size={size}"
1147                        );
1148                    }
1149
1150                    let mut decoded = Vec::new();
1151                    zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
1152                        |e| {
1153                            panic!(
1154                                "ffi decode failed with source-size hint: level={level:?} size={size} seed={} err={e}",
1155                                seed + seed_idx as u64
1156                            )
1157                        },
1158                    );
1159                    assert_eq!(
1160                        decoded,
1161                        data,
1162                        "hinted ffi roundtrip mismatch: level={level:?} size={size} seed={}",
1163                        seed + seed_idx as u64
1164                    );
1165                }
1166            }
1167        }
1168    }
1169
1170    #[cfg(feature = "std")]
1171    #[test]
1172    fn hinted_levels_use_single_segment_header_symmetrically() {
1173        let levels = [
1174            super::CompressionLevel::Fastest,
1175            super::CompressionLevel::Default,
1176            super::CompressionLevel::Better,
1177            super::CompressionLevel::Best,
1178            super::CompressionLevel::Level(0),
1179            super::CompressionLevel::Level(2),
1180            super::CompressionLevel::Level(3),
1181            super::CompressionLevel::Level(4),
1182            super::CompressionLevel::Level(11),
1183        ];
1184        for (seed_idx, seed) in [7u64, 23, 41].into_iter().enumerate() {
1185            let size = 1024 + seed_idx * 97;
1186            let data = generate_data(seed, size);
1187            for &level in &levels {
1188                let compressed = {
1189                    let mut compressor = FrameCompressor::new(level);
1190                    compressor.set_source_size_hint(data.len() as u64);
1191                    compressor.set_source(data.as_slice());
1192                    let mut out = Vec::new();
1193                    compressor.set_drain(&mut out);
1194                    compressor.compress();
1195                    out
1196                };
1197                let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1198                assert!(
1199                    frame_header.descriptor.single_segment_flag(),
1200                    "hinted frame should be single-segment for level={level:?} size={}",
1201                    data.len()
1202                );
1203                assert_eq!(frame_header.frame_content_size(), data.len() as u64);
1204                let mut decoded = Vec::new();
1205                zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(|e| {
1206                    panic!(
1207                        "ffi decode failed for hinted single-segment parity: level={level:?} size={} err={e}",
1208                        data.len()
1209                    )
1210                });
1211                assert_eq!(decoded, data);
1212            }
1213        }
1214    }
1215
1216    #[cfg(feature = "std")]
1217    #[test]
1218    fn hinted_levels_pin_511_512_single_segment_boundary() {
1219        let levels = [
1220            super::CompressionLevel::Fastest,
1221            super::CompressionLevel::Default,
1222            super::CompressionLevel::Better,
1223            super::CompressionLevel::Best,
1224            super::CompressionLevel::Level(0),
1225            super::CompressionLevel::Level(2),
1226            super::CompressionLevel::Level(3),
1227            super::CompressionLevel::Level(4),
1228            super::CompressionLevel::Level(11),
1229        ];
1230        for (seed_idx, seed) in [7u64, 23, 41].into_iter().enumerate() {
1231            for &size in &[511usize, 512] {
1232                let data = generate_data(seed + seed_idx as u64, size);
1233                for &level in &levels {
1234                    let compressed = {
1235                        let mut compressor = FrameCompressor::new(level);
1236                        compressor.set_source_size_hint(data.len() as u64);
1237                        compressor.set_source(data.as_slice());
1238                        let mut out = Vec::new();
1239                        compressor.set_drain(&mut out);
1240                        compressor.compress();
1241                        out
1242                    };
1243                    let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1244                    assert_eq!(
1245                        frame_header.descriptor.single_segment_flag(),
1246                        size == 512,
1247                        "single_segment 511/512 boundary mismatch: level={level:?} size={size}"
1248                    );
1249                    let mut decoded = Vec::new();
1250                    zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap_or_else(
1251                        |e| {
1252                            panic!(
1253                                "ffi decode failed at single-segment boundary: level={level:?} size={size} seed={} err={e}",
1254                                seed + seed_idx as u64
1255                            )
1256                        },
1257                    );
1258                    assert_eq!(decoded, data);
1259                }
1260            }
1261        }
1262    }
1263
1264    #[cfg(feature = "std")]
1265    #[test]
1266    fn fastest_random_block_uses_raw_fast_path() {
1267        let data = generate_data(0xC0FF_EE11, 10 * 1024);
1268        let compressed =
1269            crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Fastest);
1270
1271        assert_eq!(first_block_type(&compressed), BlockType::Raw);
1272
1273        let mut decoded = Vec::new();
1274        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1275        assert_eq!(decoded, data);
1276    }
1277
1278    #[cfg(feature = "std")]
1279    #[test]
1280    fn default_random_block_uses_raw_fast_path() {
1281        let data = generate_data(0xD15E_A5ED, 10 * 1024);
1282        let compressed =
1283            crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Default);
1284
1285        assert_eq!(first_block_type(&compressed), BlockType::Raw);
1286
1287        let mut decoded = Vec::new();
1288        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1289        assert_eq!(decoded, data);
1290    }
1291
1292    #[cfg(feature = "std")]
1293    #[test]
1294    fn best_random_block_uses_raw_fast_path() {
1295        let data = generate_data(0xB35C_AFE1, 10 * 1024);
1296        let compressed =
1297            crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Best);
1298
1299        assert_eq!(first_block_type(&compressed), BlockType::Raw);
1300
1301        let mut decoded = Vec::new();
1302        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1303        assert_eq!(decoded, data);
1304    }
1305
1306    #[cfg(feature = "std")]
1307    #[test]
1308    fn level2_random_block_uses_raw_fast_path() {
1309        let data = generate_data(0xA11C_E222, 10 * 1024);
1310        let compressed =
1311            crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Level(2));
1312
1313        assert_eq!(first_block_type(&compressed), BlockType::Raw);
1314
1315        let mut decoded = Vec::new();
1316        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1317        assert_eq!(decoded, data);
1318    }
1319
1320    #[cfg(feature = "std")]
1321    #[test]
1322    fn better_random_block_uses_raw_fast_path() {
1323        let data = generate_data(0xBE77_E111, 10 * 1024);
1324        let compressed =
1325            crate::encoding::compress_to_vec(data.as_slice(), super::CompressionLevel::Better);
1326
1327        assert_eq!(first_block_type(&compressed), BlockType::Raw);
1328
1329        let mut decoded = Vec::new();
1330        zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1331        assert_eq!(decoded, data);
1332    }
1333
1334    #[cfg(feature = "std")]
1335    #[test]
1336    fn compressible_logs_do_not_fall_back_to_raw_fast_path() {
1337        let mut data = Vec::with_capacity(16 * 1024);
1338        const LINE: &[u8] =
1339            b"ts=2026-04-10T00:00:00Z level=INFO tenant=demo op=flush table=orders\n";
1340        while data.len() < 16 * 1024 {
1341            let remaining = 16 * 1024 - data.len();
1342            data.extend_from_slice(&LINE[..LINE.len().min(remaining)]);
1343        }
1344
1345        fn assert_not_raw_for_level(data: &[u8], level: super::CompressionLevel) {
1346            let compressed = crate::encoding::compress_to_vec(data, level);
1347            assert_ne!(first_block_type(&compressed), BlockType::Raw);
1348            assert!(
1349                compressed.len() < data.len(),
1350                "compressible input should remain compressible for level={level:?}"
1351            );
1352            let mut decoded = Vec::new();
1353            zstd::stream::copy_decode(compressed.as_slice(), &mut decoded).unwrap();
1354            assert_eq!(decoded, data);
1355        }
1356
1357        assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Fastest);
1358        assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Default);
1359        assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Level(3));
1360        assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Better);
1361        assert_not_raw_for_level(data.as_slice(), super::CompressionLevel::Best);
1362    }
1363
1364    #[cfg(feature = "std")]
1365    #[test]
1366    fn hinted_small_compressible_frames_use_single_segment_across_levels() {
1367        let mut data = Vec::with_capacity(4 * 1024);
1368        const LINE: &[u8] =
1369            b"ts=2026-04-10T00:00:00Z level=INFO tenant=demo op=flush table=orders\n";
1370        while data.len() < 4 * 1024 {
1371            let remaining = 4 * 1024 - data.len();
1372            data.extend_from_slice(&LINE[..LINE.len().min(remaining)]);
1373        }
1374
1375        for level in [
1376            super::CompressionLevel::Fastest,
1377            super::CompressionLevel::Default,
1378            super::CompressionLevel::Better,
1379            super::CompressionLevel::Best,
1380            super::CompressionLevel::Level(0),
1381            super::CompressionLevel::Level(3),
1382            super::CompressionLevel::Level(4),
1383            super::CompressionLevel::Level(11),
1384        ] {
1385            let compressed = {
1386                let mut compressor = FrameCompressor::new(level);
1387                compressor.set_source_size_hint(data.len() as u64);
1388                compressor.set_source(data.as_slice());
1389                let mut out = Vec::new();
1390                compressor.set_drain(&mut out);
1391                compressor.compress();
1392                out
1393            };
1394            let (frame_header, _) = read_frame_header(compressed.as_slice()).unwrap();
1395            assert!(
1396                frame_header.descriptor.single_segment_flag(),
1397                "hinted small compressible frame should use single-segment (level={level:?})"
1398            );
1399            assert_ne!(
1400                first_block_type(&compressed),
1401                BlockType::Raw,
1402                "compressible hinted frame should stay off raw fast path (level={level:?})"
1403            );
1404            assert!(
1405                compressed.len() < data.len(),
1406                "compressible hinted frame should still shrink (level={level:?})"
1407            );
1408            let mut decoded = Vec::new();
1409            zstd::stream::copy_decode(compressed.as_slice(), &mut decoded)
1410                .unwrap_or_else(|e| panic!("ffi decode failed (level={level:?}): {e}"));
1411            assert_eq!(decoded, data);
1412        }
1413    }
1414
1415    struct NoDictionaryMatcher {
1416        last_space: Vec<u8>,
1417        window_size: u64,
1418    }
1419
1420    impl NoDictionaryMatcher {
1421        fn new(window_size: u64) -> Self {
1422            Self {
1423                last_space: Vec::new(),
1424                window_size,
1425            }
1426        }
1427    }
1428
1429    impl Matcher for NoDictionaryMatcher {
1430        fn get_next_space(&mut self) -> Vec<u8> {
1431            vec![0; self.window_size as usize]
1432        }
1433
1434        fn get_last_space(&mut self) -> &[u8] {
1435            self.last_space.as_slice()
1436        }
1437
1438        fn commit_space(&mut self, space: Vec<u8>) {
1439            self.last_space = space;
1440        }
1441
1442        fn skip_matching(&mut self) {}
1443
1444        fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
1445            handle_sequence(Sequence::Literals {
1446                literals: self.last_space.as_slice(),
1447            });
1448        }
1449
1450        fn reset(&mut self, _level: super::CompressionLevel) {
1451            self.last_space.clear();
1452        }
1453
1454        fn window_size(&self) -> u64 {
1455            self.window_size
1456        }
1457    }
1458
1459    #[test]
1460    fn frame_starts_with_magic_num() {
1461        let mock_data = [1_u8, 2, 3].as_slice();
1462        let mut output: Vec<u8> = Vec::new();
1463        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1464        compressor.set_source(mock_data);
1465        compressor.set_drain(&mut output);
1466
1467        compressor.compress();
1468        assert!(output.starts_with(&MAGIC_NUM.to_le_bytes()));
1469    }
1470
1471    #[test]
1472    fn very_simple_raw_compress() {
1473        let mock_data = [1_u8, 2, 3].as_slice();
1474        let mut output: Vec<u8> = Vec::new();
1475        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1476        compressor.set_source(mock_data);
1477        compressor.set_drain(&mut output);
1478
1479        compressor.compress();
1480    }
1481
1482    #[test]
1483    fn very_simple_compress() {
1484        let mut mock_data = vec![0; 1 << 17];
1485        mock_data.extend(vec![1; (1 << 17) - 1]);
1486        mock_data.extend(vec![2; (1 << 18) - 1]);
1487        mock_data.extend(vec![2; 1 << 17]);
1488        mock_data.extend(vec![3; (1 << 17) - 1]);
1489        let mut output: Vec<u8> = Vec::new();
1490        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1491        compressor.set_source(mock_data.as_slice());
1492        compressor.set_drain(&mut output);
1493
1494        compressor.compress();
1495
1496        let mut decoder = FrameDecoder::new();
1497        let mut decoded = Vec::with_capacity(mock_data.len());
1498        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1499        assert_eq!(mock_data, decoded);
1500
1501        let mut decoded = Vec::new();
1502        zstd::stream::copy_decode(output.as_slice(), &mut decoded).unwrap();
1503        assert_eq!(mock_data, decoded);
1504    }
1505
1506    #[test]
1507    fn rle_compress() {
1508        let mock_data = vec![0; 1 << 19];
1509        let mut output: Vec<u8> = Vec::new();
1510        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1511        compressor.set_source(mock_data.as_slice());
1512        compressor.set_drain(&mut output);
1513
1514        compressor.compress();
1515
1516        let mut decoder = FrameDecoder::new();
1517        let mut decoded = Vec::with_capacity(mock_data.len());
1518        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1519        assert_eq!(mock_data, decoded);
1520    }
1521
1522    #[test]
1523    fn aaa_compress() {
1524        let mock_data = vec![0, 1, 3, 4, 5];
1525        let mut output: Vec<u8> = Vec::new();
1526        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1527        compressor.set_source(mock_data.as_slice());
1528        compressor.set_drain(&mut output);
1529
1530        compressor.compress();
1531
1532        let mut decoder = FrameDecoder::new();
1533        let mut decoded = Vec::with_capacity(mock_data.len());
1534        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1535        assert_eq!(mock_data, decoded);
1536
1537        let mut decoded = Vec::new();
1538        zstd::stream::copy_decode(output.as_slice(), &mut decoded).unwrap();
1539        assert_eq!(mock_data, decoded);
1540    }
1541
1542    #[test]
1543    fn dictionary_compression_sets_required_dict_id_and_roundtrips() {
1544        let dict_raw = include_bytes!("../../dict_tests/dictionary");
1545        let dict_for_encoder = crate::decoding::Dictionary::decode_dict(dict_raw).unwrap();
1546        let dict_for_decoder = crate::decoding::Dictionary::decode_dict(dict_raw).unwrap();
1547
1548        let mut data = Vec::new();
1549        for _ in 0..8 {
1550            data.extend_from_slice(&dict_for_decoder.dict_content[..2048]);
1551        }
1552
1553        let mut with_dict = Vec::new();
1554        let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1555        let previous = compressor
1556            .set_dictionary_from_bytes(dict_raw)
1557            .expect("dictionary bytes should parse");
1558        assert!(
1559            previous.is_none(),
1560            "first dictionary insert should return None"
1561        );
1562        assert_eq!(
1563            compressor
1564                .set_dictionary(dict_for_encoder)
1565                .expect("valid dictionary should attach")
1566                .expect("set_dictionary_from_bytes inserted previous dictionary")
1567                .id,
1568            dict_for_decoder.id
1569        );
1570        compressor.set_source(data.as_slice());
1571        compressor.set_drain(&mut with_dict);
1572        compressor.compress();
1573
1574        let (frame_header, _) = crate::decoding::frame::read_frame_header(with_dict.as_slice())
1575            .expect("encoded stream should have a frame header");
1576        assert_eq!(frame_header.dictionary_id(), Some(dict_for_decoder.id));
1577
1578        let mut decoder = FrameDecoder::new();
1579        let mut missing_dict_target = Vec::with_capacity(data.len());
1580        let err = decoder
1581            .decode_all_to_vec(&with_dict, &mut missing_dict_target)
1582            .unwrap_err();
1583        assert!(
1584            matches!(
1585                &err,
1586                crate::decoding::errors::FrameDecoderError::DictNotProvided { .. }
1587            ),
1588            "dict-compressed stream should require dictionary id, got: {err:?}"
1589        );
1590
1591        let mut decoder = FrameDecoder::new();
1592        decoder.add_dict(dict_for_decoder).unwrap();
1593        let mut decoded = Vec::with_capacity(data.len());
1594        decoder.decode_all_to_vec(&with_dict, &mut decoded).unwrap();
1595        assert_eq!(decoded, data);
1596
1597        let mut ffi_decoder = zstd::bulk::Decompressor::with_dictionary(dict_raw).unwrap();
1598        let mut ffi_decoded = Vec::with_capacity(data.len());
1599        let ffi_written = ffi_decoder
1600            .decompress_to_buffer(with_dict.as_slice(), &mut ffi_decoded)
1601            .unwrap();
1602        assert_eq!(ffi_written, data.len());
1603        assert_eq!(ffi_decoded, data);
1604    }
1605
1606    #[cfg(all(feature = "dict_builder", feature = "std"))]
1607    #[test]
1608    fn dictionary_compression_roundtrips_with_dict_builder_dictionary() {
1609        use std::io::Cursor;
1610
1611        let mut training = Vec::new();
1612        for idx in 0..256u32 {
1613            training.extend_from_slice(
1614                format!("tenant=demo table=orders key={idx} region=eu\n").as_bytes(),
1615            );
1616        }
1617        let mut raw_dict = Vec::new();
1618        crate::dictionary::create_raw_dict_from_source(
1619            Cursor::new(training.as_slice()),
1620            training.len(),
1621            &mut raw_dict,
1622            4096,
1623        )
1624        .expect("dict_builder training should succeed");
1625        assert!(
1626            !raw_dict.is_empty(),
1627            "dict_builder produced an empty dictionary"
1628        );
1629
1630        let dict_id = 0xD1C7_0008;
1631        let encoder_dict =
1632            crate::decoding::Dictionary::from_raw_content(dict_id, raw_dict.clone()).unwrap();
1633        let decoder_dict =
1634            crate::decoding::Dictionary::from_raw_content(dict_id, raw_dict.clone()).unwrap();
1635
1636        let mut payload = Vec::new();
1637        for idx in 0..96u32 {
1638            payload.extend_from_slice(
1639                format!(
1640                    "tenant=demo table=orders op=put key={idx} value=aaaaabbbbbcccccdddddeeeee\n"
1641                )
1642                .as_bytes(),
1643            );
1644        }
1645
1646        let mut without_dict = Vec::new();
1647        let mut baseline = FrameCompressor::new(super::CompressionLevel::Fastest);
1648        baseline.set_source(payload.as_slice());
1649        baseline.set_drain(&mut without_dict);
1650        baseline.compress();
1651
1652        let mut with_dict = Vec::new();
1653        let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1654        compressor
1655            .set_dictionary(encoder_dict)
1656            .expect("valid dict_builder dictionary should attach");
1657        compressor.set_source(payload.as_slice());
1658        compressor.set_drain(&mut with_dict);
1659        compressor.compress();
1660
1661        let (frame_header, _) = crate::decoding::frame::read_frame_header(with_dict.as_slice())
1662            .expect("encoded stream should have a frame header");
1663        assert_eq!(frame_header.dictionary_id(), Some(dict_id));
1664        let mut decoder = FrameDecoder::new();
1665        decoder.add_dict(decoder_dict).unwrap();
1666        let mut decoded = Vec::with_capacity(payload.len());
1667        decoder.decode_all_to_vec(&with_dict, &mut decoded).unwrap();
1668        assert_eq!(decoded, payload);
1669        assert!(
1670            with_dict.len() < without_dict.len(),
1671            "trained dictionary should improve compression for this small payload"
1672        );
1673    }
1674
1675    #[test]
1676    fn set_dictionary_from_bytes_seeds_entropy_tables_for_first_block() {
1677        let dict_raw = include_bytes!("../../dict_tests/dictionary");
1678        let mut output = Vec::new();
1679        let input = b"";
1680
1681        let mut compressor = FrameCompressor::new(super::CompressionLevel::Fastest);
1682        let previous = compressor
1683            .set_dictionary_from_bytes(dict_raw)
1684            .expect("dictionary bytes should parse");
1685        assert!(previous.is_none());
1686
1687        compressor.set_source(input.as_slice());
1688        compressor.set_drain(&mut output);
1689        compressor.compress();
1690
1691        assert!(
1692            compressor.state.last_huff_table.is_some(),
1693            "dictionary entropy should seed previous huffman table before first block"
1694        );
1695        assert!(
1696            compressor.state.fse_tables.ll_previous.is_some(),
1697            "dictionary entropy should seed previous ll table before first block"
1698        );
1699        assert!(
1700            compressor.state.fse_tables.ml_previous.is_some(),
1701            "dictionary entropy should seed previous ml table before first block"
1702        );
1703        assert!(
1704            compressor.state.fse_tables.of_previous.is_some(),
1705            "dictionary entropy should seed previous of table before first block"
1706        );
1707    }
1708
1709    #[test]
1710    fn set_dictionary_rejects_zero_dictionary_id() {
1711        let invalid = crate::decoding::Dictionary {
1712            id: 0,
1713            fse: crate::decoding::scratch::FSEScratch::new(),
1714            huf: crate::decoding::scratch::HuffmanScratch::new(),
1715            dict_content: vec![1, 2, 3],
1716            offset_hist: [1, 4, 8],
1717        };
1718
1719        let mut compressor: FrameCompressor<
1720            &[u8],
1721            Vec<u8>,
1722            crate::encoding::match_generator::MatchGeneratorDriver,
1723        > = FrameCompressor::new(super::CompressionLevel::Fastest);
1724        let result = compressor.set_dictionary(invalid);
1725        assert!(matches!(
1726            result,
1727            Err(crate::decoding::errors::DictionaryDecodeError::ZeroDictionaryId)
1728        ));
1729    }
1730
1731    #[test]
1732    fn set_dictionary_rejects_zero_repeat_offsets() {
1733        let invalid = crate::decoding::Dictionary {
1734            id: 1,
1735            fse: crate::decoding::scratch::FSEScratch::new(),
1736            huf: crate::decoding::scratch::HuffmanScratch::new(),
1737            dict_content: vec![1, 2, 3],
1738            offset_hist: [0, 4, 8],
1739        };
1740
1741        let mut compressor: FrameCompressor<
1742            &[u8],
1743            Vec<u8>,
1744            crate::encoding::match_generator::MatchGeneratorDriver,
1745        > = FrameCompressor::new(super::CompressionLevel::Fastest);
1746        let result = compressor.set_dictionary(invalid);
1747        assert!(matches!(
1748            result,
1749            Err(
1750                crate::decoding::errors::DictionaryDecodeError::ZeroRepeatOffsetInDictionary {
1751                    index: 0
1752                }
1753            )
1754        ));
1755    }
1756
1757    #[test]
1758    fn uncompressed_mode_does_not_require_dictionary() {
1759        let dict_id = 0xABCD_0001;
1760        let dict =
1761            crate::decoding::Dictionary::from_raw_content(dict_id, b"shared-history".to_vec())
1762                .expect("raw dictionary should be valid");
1763
1764        let payload = b"plain-bytes-that-should-stay-raw";
1765        let mut output = Vec::new();
1766        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
1767        compressor
1768            .set_dictionary(dict)
1769            .expect("dictionary should attach in uncompressed mode");
1770        compressor.set_source(payload.as_slice());
1771        compressor.set_drain(&mut output);
1772        compressor.compress();
1773
1774        let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1775            .expect("encoded frame should have a header");
1776        assert_eq!(
1777            frame_header.dictionary_id(),
1778            None,
1779            "raw/uncompressed frames must not advertise dictionary dependency"
1780        );
1781
1782        let mut decoder = FrameDecoder::new();
1783        let mut decoded = Vec::with_capacity(payload.len());
1784        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1785        assert_eq!(decoded, payload);
1786    }
1787
1788    #[test]
1789    fn dictionary_roundtrip_stays_valid_after_output_exceeds_window() {
1790        use crate::encoding::match_generator::MatchGeneratorDriver;
1791
1792        let dict_id = 0xABCD_0002;
1793        let dict = crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1794            .expect("raw dictionary should be valid");
1795        let dict_for_decoder =
1796            crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1797                .expect("raw dictionary should be valid");
1798
1799        // Payload must exceed the encoder's advertised window (512 KiB
1800        // for Fastest after `window_log = 19` alignment with donor's
1801        // L1 fast row in `clevels.h`) so the test actually exercises
1802        // cross-window-boundary behavior.
1803        let payload = b"abcdefgh".repeat(512 * 1024 / 8 + 64);
1804        let matcher = MatchGeneratorDriver::new(1024, 1);
1805
1806        let mut no_dict_output = Vec::new();
1807        let mut no_dict_compressor =
1808            FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1809        no_dict_compressor.set_source(payload.as_slice());
1810        no_dict_compressor.set_drain(&mut no_dict_output);
1811        no_dict_compressor.compress();
1812        let (no_dict_frame_header, _) =
1813            crate::decoding::frame::read_frame_header(no_dict_output.as_slice())
1814                .expect("baseline frame should have a header");
1815        let no_dict_window = no_dict_frame_header
1816            .window_size()
1817            .expect("window size should be present");
1818
1819        let mut output = Vec::new();
1820        let matcher = MatchGeneratorDriver::new(1024, 1);
1821        let mut compressor =
1822            FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1823        compressor
1824            .set_dictionary(dict)
1825            .expect("dictionary should attach");
1826        compressor.set_source(payload.as_slice());
1827        compressor.set_drain(&mut output);
1828        compressor.compress();
1829
1830        let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1831            .expect("encoded frame should have a header");
1832        let advertised_window = frame_header
1833            .window_size()
1834            .expect("window size should be present");
1835        assert_eq!(
1836            advertised_window, no_dict_window,
1837            "dictionary priming must not inflate advertised window size"
1838        );
1839        assert!(
1840            payload.len() > advertised_window as usize,
1841            "test must cross the advertised window boundary"
1842        );
1843
1844        let mut decoder = FrameDecoder::new();
1845        decoder.add_dict(dict_for_decoder).unwrap();
1846        let mut decoded = Vec::with_capacity(payload.len());
1847        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1848        assert_eq!(decoded, payload);
1849    }
1850
1851    #[test]
1852    fn source_size_hint_with_dictionary_keeps_roundtrip_and_nonincreasing_window() {
1853        let dict_id = 0xABCD_0004;
1854        let dict_content = b"abcd".repeat(1024); // 4 KiB dictionary history
1855        let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap();
1856        let dict_for_decoder =
1857            crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap();
1858        let payload = b"abcdabcdabcdabcd".repeat(128);
1859
1860        let mut hinted_output = Vec::new();
1861        let mut hinted = FrameCompressor::new(super::CompressionLevel::Fastest);
1862        hinted.set_dictionary(dict).unwrap();
1863        hinted.set_source_size_hint(1);
1864        hinted.set_source(payload.as_slice());
1865        hinted.set_drain(&mut hinted_output);
1866        hinted.compress();
1867
1868        let mut no_hint_output = Vec::new();
1869        let mut no_hint = FrameCompressor::new(super::CompressionLevel::Fastest);
1870        no_hint
1871            .set_dictionary(
1872                crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024))
1873                    .unwrap(),
1874            )
1875            .unwrap();
1876        no_hint.set_source(payload.as_slice());
1877        no_hint.set_drain(&mut no_hint_output);
1878        no_hint.compress();
1879
1880        let hinted_window = crate::decoding::frame::read_frame_header(hinted_output.as_slice())
1881            .expect("encoded frame should have a header")
1882            .0
1883            .window_size()
1884            .expect("window size should be present");
1885        let no_hint_window = crate::decoding::frame::read_frame_header(no_hint_output.as_slice())
1886            .expect("encoded frame should have a header")
1887            .0
1888            .window_size()
1889            .expect("window size should be present");
1890        assert!(
1891            hinted_window <= no_hint_window,
1892            "source-size hint should not increase advertised window with dictionary priming",
1893        );
1894
1895        let mut decoder = FrameDecoder::new();
1896        decoder.add_dict(dict_for_decoder).unwrap();
1897        let mut decoded = Vec::with_capacity(payload.len());
1898        decoder
1899            .decode_all_to_vec(&hinted_output, &mut decoded)
1900            .unwrap();
1901        assert_eq!(decoded, payload);
1902    }
1903
1904    #[test]
1905    fn source_size_hint_with_dictionary_keeps_roundtrip_for_larger_payload() {
1906        let dict_id = 0xABCD_0005;
1907        let dict_content = b"abcd".repeat(1024); // 4 KiB dictionary history
1908        let dict = crate::decoding::Dictionary::from_raw_content(dict_id, dict_content).unwrap();
1909        let dict_for_decoder =
1910            crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024)).unwrap();
1911        let payload = b"abcd".repeat(1024); // 4 KiB payload
1912        let payload_len = payload.len() as u64;
1913
1914        let mut hinted_output = Vec::new();
1915        let mut hinted = FrameCompressor::new(super::CompressionLevel::Fastest);
1916        hinted.set_dictionary(dict).unwrap();
1917        hinted.set_source_size_hint(payload_len);
1918        hinted.set_source(payload.as_slice());
1919        hinted.set_drain(&mut hinted_output);
1920        hinted.compress();
1921
1922        let mut no_hint_output = Vec::new();
1923        let mut no_hint = FrameCompressor::new(super::CompressionLevel::Fastest);
1924        no_hint
1925            .set_dictionary(
1926                crate::decoding::Dictionary::from_raw_content(dict_id, b"abcd".repeat(1024))
1927                    .unwrap(),
1928            )
1929            .unwrap();
1930        no_hint.set_source(payload.as_slice());
1931        no_hint.set_drain(&mut no_hint_output);
1932        no_hint.compress();
1933
1934        let hinted_window = crate::decoding::frame::read_frame_header(hinted_output.as_slice())
1935            .expect("encoded frame should have a header")
1936            .0
1937            .window_size()
1938            .expect("window size should be present");
1939        let no_hint_window = crate::decoding::frame::read_frame_header(no_hint_output.as_slice())
1940            .expect("encoded frame should have a header")
1941            .0
1942            .window_size()
1943            .expect("window size should be present");
1944        assert!(
1945            hinted_window <= no_hint_window,
1946            "source-size hint should not increase advertised window with dictionary priming",
1947        );
1948
1949        let mut decoder = FrameDecoder::new();
1950        decoder.add_dict(dict_for_decoder).unwrap();
1951        let mut decoded = Vec::with_capacity(payload.len());
1952        decoder
1953            .decode_all_to_vec(&hinted_output, &mut decoded)
1954            .unwrap();
1955        assert_eq!(decoded, payload);
1956    }
1957
1958    #[test]
1959    fn custom_matcher_without_dictionary_priming_does_not_advertise_dict_id() {
1960        let dict_id = 0xABCD_0003;
1961        let dict = crate::decoding::Dictionary::from_raw_content(dict_id, b"abcdefgh".to_vec())
1962            .expect("raw dictionary should be valid");
1963        let payload = b"abcdefghabcdefgh";
1964
1965        let mut output = Vec::new();
1966        let matcher = NoDictionaryMatcher::new(64);
1967        let mut compressor =
1968            FrameCompressor::new_with_matcher(matcher, super::CompressionLevel::Fastest);
1969        compressor
1970            .set_dictionary(dict)
1971            .expect("dictionary should attach");
1972        compressor.set_source(payload.as_slice());
1973        compressor.set_drain(&mut output);
1974        compressor.compress();
1975
1976        let (frame_header, _) = crate::decoding::frame::read_frame_header(output.as_slice())
1977            .expect("encoded frame should have a header");
1978        assert_eq!(
1979            frame_header.dictionary_id(),
1980            None,
1981            "matchers that do not support dictionary priming must not advertise dictionary dependency"
1982        );
1983
1984        let mut decoder = FrameDecoder::new();
1985        let mut decoded = Vec::with_capacity(payload.len());
1986        decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
1987        assert_eq!(decoded, payload);
1988    }
1989
1990    #[cfg(feature = "hash")]
1991    #[test]
1992    fn checksum_two_frames_reused_compressor() {
1993        // Compress the same data twice using the same compressor and verify that:
1994        // 1. The checksum written in each frame matches what the decoder calculates.
1995        // 2. The hasher is correctly reset between frames (no cross-contamination).
1996        //    If the hasher were NOT reset, the second frame's calculated checksum
1997        //    would differ from the one stored in the frame data, causing assert_eq to fail.
1998        let data: Vec<u8> = (0u8..=255).cycle().take(1024).collect();
1999
2000        let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed);
2001
2002        // --- Frame 1 ---
2003        let mut compressed1 = Vec::new();
2004        compressor.set_source(data.as_slice());
2005        compressor.set_drain(&mut compressed1);
2006        compressor.compress();
2007
2008        // --- Frame 2 (reuse the same compressor) ---
2009        let mut compressed2 = Vec::new();
2010        compressor.set_source(data.as_slice());
2011        compressor.set_drain(&mut compressed2);
2012        compressor.compress();
2013
2014        fn decode_and_collect(compressed: &[u8]) -> (Vec<u8>, Option<u32>, Option<u32>) {
2015            let mut decoder = FrameDecoder::new();
2016            let mut source = compressed;
2017            decoder.reset(&mut source).unwrap();
2018            while !decoder.is_finished() {
2019                decoder
2020                    .decode_blocks(&mut source, crate::decoding::BlockDecodingStrategy::All)
2021                    .unwrap();
2022            }
2023            let mut decoded = Vec::new();
2024            decoder.collect_to_writer(&mut decoded).unwrap();
2025            (
2026                decoded,
2027                decoder.get_checksum_from_data(),
2028                decoder.get_calculated_checksum(),
2029            )
2030        }
2031
2032        let (decoded1, chksum_from_data1, chksum_calculated1) = decode_and_collect(&compressed1);
2033        assert_eq!(decoded1, data, "frame 1: decoded data mismatch");
2034        assert_eq!(
2035            chksum_from_data1, chksum_calculated1,
2036            "frame 1: checksum mismatch"
2037        );
2038
2039        let (decoded2, chksum_from_data2, chksum_calculated2) = decode_and_collect(&compressed2);
2040        assert_eq!(decoded2, data, "frame 2: decoded data mismatch");
2041        assert_eq!(
2042            chksum_from_data2, chksum_calculated2,
2043            "frame 2: checksum mismatch"
2044        );
2045
2046        // Same data compressed twice must produce the same checksum.
2047        // If state leaked across frames, the second calculated checksum would differ.
2048        assert_eq!(
2049            chksum_from_data1, chksum_from_data2,
2050            "frame 1 and frame 2 should have the same checksum (same data, hash must reset per frame)"
2051        );
2052    }
2053
2054    #[cfg(feature = "std")]
2055    #[test]
2056    fn fuzz_targets() {
2057        use std::io::Read;
2058        fn decode_szstd(data: &mut dyn std::io::Read) -> Vec<u8> {
2059            let mut decoder = crate::decoding::StreamingDecoder::new(data).unwrap();
2060            let mut result: Vec<u8> = Vec::new();
2061            decoder.read_to_end(&mut result).expect("Decoding failed");
2062            result
2063        }
2064
2065        fn decode_szstd_writer(mut data: impl Read) -> Vec<u8> {
2066            let mut decoder = crate::decoding::FrameDecoder::new();
2067            decoder.reset(&mut data).unwrap();
2068            let mut result = vec![];
2069            while !decoder.is_finished() || decoder.can_collect() > 0 {
2070                decoder
2071                    .decode_blocks(
2072                        &mut data,
2073                        crate::decoding::BlockDecodingStrategy::UptoBytes(1024 * 1024),
2074                    )
2075                    .unwrap();
2076                decoder.collect_to_writer(&mut result).unwrap();
2077            }
2078            result
2079        }
2080
2081        fn encode_zstd(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
2082            zstd::stream::encode_all(std::io::Cursor::new(data), 3)
2083        }
2084
2085        fn encode_szstd_uncompressed(data: &mut dyn std::io::Read) -> Vec<u8> {
2086            let mut input = Vec::new();
2087            data.read_to_end(&mut input).unwrap();
2088
2089            crate::encoding::compress_to_vec(
2090                input.as_slice(),
2091                crate::encoding::CompressionLevel::Uncompressed,
2092            )
2093        }
2094
2095        fn encode_szstd_compressed(data: &mut dyn std::io::Read) -> Vec<u8> {
2096            let mut input = Vec::new();
2097            data.read_to_end(&mut input).unwrap();
2098
2099            crate::encoding::compress_to_vec(
2100                input.as_slice(),
2101                crate::encoding::CompressionLevel::Fastest,
2102            )
2103        }
2104
2105        fn decode_zstd(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
2106            let mut output = Vec::new();
2107            zstd::stream::copy_decode(data, &mut output)?;
2108            Ok(output)
2109        }
2110        if std::fs::exists("fuzz/artifacts/interop").unwrap_or(false) {
2111            for file in std::fs::read_dir("fuzz/artifacts/interop").unwrap() {
2112                if file.as_ref().unwrap().file_type().unwrap().is_file() {
2113                    let data = std::fs::read(file.unwrap().path()).unwrap();
2114                    let data = data.as_slice();
2115                    // Decoding
2116                    let compressed = encode_zstd(data).unwrap();
2117                    let decoded = decode_szstd(&mut compressed.as_slice());
2118                    let decoded2 = decode_szstd_writer(&mut compressed.as_slice());
2119                    assert!(
2120                        decoded == data,
2121                        "Decoded data did not match the original input during decompression"
2122                    );
2123                    assert_eq!(
2124                        decoded2, data,
2125                        "Decoded data did not match the original input during decompression"
2126                    );
2127
2128                    // Encoding
2129                    // Uncompressed encoding
2130                    let mut input = data;
2131                    let compressed = encode_szstd_uncompressed(&mut input);
2132                    let decoded = decode_zstd(&compressed).unwrap();
2133                    assert_eq!(
2134                        decoded, data,
2135                        "Decoded data did not match the original input during compression"
2136                    );
2137                    // Compressed encoding
2138                    let mut input = data;
2139                    let compressed = encode_szstd_compressed(&mut input);
2140                    let decoded = decode_zstd(&compressed).unwrap();
2141                    assert_eq!(
2142                        decoded, data,
2143                        "Decoded data did not match the original input during compression"
2144                    );
2145                }
2146            }
2147        }
2148    }
2149
2150    /// Homogeneous input — every byte the same — must NOT be split:
2151    /// both border histograms are identical (all 512 hits on a single
2152    /// slot), so `presplit_fingerprints_differ` returns `false` and the
2153    /// function takes the early-return path at
2154    /// `zstd_preSplit.c:214` returning `blockSize`.
2155    #[test]
2156    fn donor_split_block_from_borders_keeps_homogeneous_block() {
2157        let block = vec![0xAAu8; MAX_BLOCK_SIZE as usize];
2158        let split = super::donor_split_block_from_borders(&block);
2159        assert_eq!(split, MAX_BLOCK_SIZE as usize);
2160    }
2161
2162    /// Heterogeneous input — first half all zeros, second half a
2163    /// counter sequence — has clearly distinguishable border
2164    /// histograms, so the borders heuristic decides to split.
2165    ///
2166    /// The transition sits at exactly the block midpoint, so the
2167    /// middle 512-byte sample (`block[mid-256..mid+256]`) is half
2168    /// zeros + half counter values. That makes it roughly
2169    /// equidistant from both border fingerprints — the
2170    /// `abs_diff(dist_from_begin, dist_from_end) < min_distance`
2171    /// branch fires and the heuristic returns the midpoint (64 KiB)
2172    /// per `zstd_preSplit.c:222`. The test asserts the exact value
2173    /// rather than just "one of {32K, 64K, 96K}" so a regression
2174    /// to a different quantised arm cannot silently slip through.
2175    #[test]
2176    fn donor_split_block_from_borders_returns_midpoint_for_centred_transition() {
2177        let mut block = vec![0u8; MAX_BLOCK_SIZE as usize];
2178        for (i, byte) in block
2179            .iter_mut()
2180            .enumerate()
2181            .skip(MAX_BLOCK_SIZE as usize / 2)
2182        {
2183            *byte = (i % 251 + 1) as u8;
2184        }
2185        let split = super::donor_split_block_from_borders(&block);
2186        assert_eq!(
2187            split,
2188            64 * 1024,
2189            "centred-transition fixture must take the symmetric \
2190             midpoint arm (`abs_diff < min_distance`), got {split}"
2191        );
2192    }
2193
2194    /// `donor_pre_split_level` maps mid-range levels to the cheap
2195    /// borders heuristic and high levels to the byChunks path. Levels
2196    /// below 11 stay unsplit so the splitter never runs on fast /
2197    /// default presets where its per-block cost would dominate.
2198    #[test]
2199    fn donor_pre_split_level_dispatches_by_compression_level() {
2200        use crate::encoding::CompressionLevel;
2201        assert_eq!(
2202            super::donor_pre_split_level(CompressionLevel::Fastest),
2203            None
2204        );
2205        assert_eq!(
2206            super::donor_pre_split_level(CompressionLevel::Default),
2207            None
2208        );
2209        assert_eq!(super::donor_pre_split_level(CompressionLevel::Better), None);
2210        assert_eq!(
2211            super::donor_pre_split_level(CompressionLevel::Level(7)),
2212            None
2213        );
2214        assert_eq!(
2215            super::donor_pre_split_level(CompressionLevel::Level(11)),
2216            Some(0)
2217        );
2218        assert_eq!(
2219            super::donor_pre_split_level(CompressionLevel::Level(15)),
2220            Some(0)
2221        );
2222        assert_eq!(
2223            super::donor_pre_split_level(CompressionLevel::Level(16)),
2224            Some(4)
2225        );
2226        assert_eq!(
2227            super::donor_pre_split_level(CompressionLevel::Level(22)),
2228            Some(4)
2229        );
2230    }
2231
2232    /// End-to-end: a 256 KB heterogeneous payload compressed at
2233    /// Level(13) (borders heuristic active) round-trips through the
2234    /// crate's own decoder. The pre-split path runs over the first
2235    /// 128 KB block and emits two consecutive sub-blocks; the second
2236    /// 128 KB block goes through the splitter on its own. The test
2237    /// proves the split decisions do not corrupt the frame bitstream.
2238    #[test]
2239    fn level_13_borders_split_roundtrips_through_own_decoder() {
2240        use crate::encoding::CompressionLevel;
2241        let mut data = vec![0u8; 256 * 1024];
2242        // First 128 KB: low-entropy repeating run; second 128 KB:
2243        // counter sequence — clearly distinct border histograms.
2244        for (i, byte) in data.iter_mut().enumerate() {
2245            *byte = if i < 128 * 1024 {
2246                (i & 0x07) as u8
2247            } else {
2248                (i % 251 + 1) as u8
2249            };
2250        }
2251
2252        let mut compressed = Vec::new();
2253        let mut compressor = FrameCompressor::new(CompressionLevel::Level(13));
2254        compressor.set_source(data.as_slice());
2255        compressor.set_drain(&mut compressed);
2256        compressor.compress();
2257
2258        let mut decoder = FrameDecoder::new();
2259        let mut source = compressed.as_slice();
2260        decoder
2261            .reset(&mut source)
2262            .expect("frame header should parse");
2263        while !decoder.is_finished() {
2264            decoder
2265                .decode_blocks(&mut source, crate::decoding::BlockDecodingStrategy::All)
2266                .expect("decode should succeed");
2267        }
2268        let mut decoded = Vec::with_capacity(data.len());
2269        decoder.collect_to_writer(&mut decoded).unwrap();
2270        assert_eq!(decoded, data, "roundtrip must reproduce the input verbatim");
2271    }
2272
2273    /// Regression: `set_compression_level` followed by `compress()` must
2274    /// refresh `state.strategy_tag` through the reset-time sync so the
2275    /// literal-compression gates (`min_literals_to_compress`,
2276    /// `min_gain`) use the NEW level's strategy. Picks a level pair
2277    /// that genuinely crosses strategy bands — `Fastest` resolves to
2278    /// `Fast`, `Level(20)` resolves to `BtUltra2` — so a missed sync
2279    /// would leave the construction-time tag visible and trip the
2280    /// assertion. `CompressionLevel::Best` would also pass type-wise
2281    /// but resolves to `Lazy` today, which keeps `min_literals_to_compress`
2282    /// in the same `shift=3 → 64-byte` band as `Fast` and weakens the
2283    /// signal that the gate floor actually moved.
2284    #[cfg(feature = "std")]
2285    #[test]
2286    fn set_compression_level_then_compress_refreshes_strategy_tag() {
2287        use super::CompressionLevel;
2288        use crate::encoding::strategy::StrategyTag;
2289
2290        let data = vec![0xABu8; 256];
2291        let mut out = Vec::new();
2292        let mut compressor = FrameCompressor::new(CompressionLevel::Fastest);
2293        let initial_tag = compressor.state.strategy_tag;
2294        assert_eq!(
2295            initial_tag,
2296            StrategyTag::for_compression_level(CompressionLevel::Fastest),
2297            "construction-time strategy_tag must reflect initial level",
2298        );
2299
2300        // Switch to a level whose resolved strategy lives in a different
2301        // band, then run a full compress cycle — the matcher.reset()
2302        // inside `compress` is the only site that can refresh the tag.
2303        let new_level = CompressionLevel::Level(20);
2304        compressor.set_compression_level(new_level);
2305        compressor.set_source(data.as_slice());
2306        compressor.set_drain(&mut out);
2307        compressor.compress();
2308
2309        let new_tag = compressor.state.strategy_tag;
2310        let expected = StrategyTag::for_compression_level(new_level);
2311        assert_eq!(
2312            new_tag, expected,
2313            "strategy_tag must follow set_compression_level → compress, \
2314             got {new_tag:?} expected {expected:?}",
2315        );
2316        assert_eq!(
2317            expected,
2318            StrategyTag::BtUltra2,
2319            "test fixture invariant: Level(20) must resolve to BtUltra2 \
2320             so the post-switch tag visibly crosses the band boundary",
2321        );
2322        assert_ne!(
2323            new_tag, initial_tag,
2324            "test fixture invariant: chosen levels must resolve to \
2325             different StrategyTag variants",
2326        );
2327    }
2328
2329    /// Magicless mode (`ZSTD_f_zstd1_magicless`): encoded frame
2330    /// MUST NOT start with the 4-byte magic prefix, AND must
2331    /// round-trip through a magicless-aware decoder.
2332    #[test]
2333    fn magicless_frame_omits_magic_and_roundtrips() {
2334        use crate::common::MAGIC_NUM;
2335        let input: alloc::vec::Vec<u8> = (0..512u32).map(|i| (i ^ 0xA5) as u8).collect();
2336
2337        // Encode with magicless = true.
2338        let mut output: Vec<u8> = Vec::new();
2339        let mut compressor = FrameCompressor::new(super::CompressionLevel::Default);
2340        compressor.set_magicless(true);
2341        compressor.set_source(input.as_slice());
2342        compressor.set_drain(&mut output);
2343        compressor.compress();
2344
2345        // 1. Encoded output must NOT begin with the zstd magic number.
2346        assert!(
2347            !output.starts_with(&MAGIC_NUM.to_le_bytes()),
2348            "magicless frame must omit the 4-byte magic prefix",
2349        );
2350
2351        // 2. A magicless-aware decoder must round-trip the payload.
2352        let mut decoder = crate::decoding::FrameDecoder::new();
2353        decoder.set_magicless(true);
2354        let mut cursor: &[u8] = output.as_slice();
2355        decoder.init(&mut cursor).expect("magicless init");
2356        decoder
2357            .decode_blocks(&mut cursor, crate::decoding::BlockDecodingStrategy::All)
2358            .expect("decode_blocks");
2359        let mut decoded: Vec<u8> = Vec::new();
2360        decoder
2361            .collect_to_writer(&mut decoded)
2362            .expect("collect_to_writer");
2363        assert_eq!(decoded, input, "magicless roundtrip must preserve bytes");
2364
2365        // 3. A standard (magicful) decoder MUST reject a magicless
2366        //    frame at the header-read step — the first 4 bytes are
2367        //    the frame-header descriptor + window / dictionary / FCS
2368        //    metadata, not the magic. We accept either
2369        //    `BadMagicNumber` (typical case: first 4 bytes don't
2370        //    match `MAGIC_NUM` and don't fall in the skippable-frame
2371        //    magic range) or `SkipFrame` (rare: the first 4 bytes
2372        //    coincidentally land in `0x184D2A50..=0x184D2A5F`). Both
2373        //    prove the standard decoder did not treat the bytes as a
2374        //    real magicful frame.
2375        use crate::decoding::errors::{FrameDecoderError, ReadFrameHeaderError};
2376        let mut std_decoder = crate::decoding::FrameDecoder::new();
2377        let std_init = std_decoder.init(output.as_slice());
2378        match std_init {
2379            Err(FrameDecoderError::ReadFrameHeaderError(
2380                ReadFrameHeaderError::BadMagicNumber(_) | ReadFrameHeaderError::SkipFrame { .. },
2381            )) => {}
2382            other => panic!(
2383                "standard decoder must reject a magicless frame with \
2384                 ReadFrameHeaderError::BadMagicNumber or SkipFrame, got {other:?}",
2385            ),
2386        }
2387    }
2388}
structured_zstd/encoding/frame_compressor.rs

structured_zstd/encoding/
frame_compressor.rs