structured_zstd/encoding/
mod.rs

1//! Zstandard encoder — frame compression, streaming, dictionary support.
2//!
3//! Four entry points cover the common use cases:
4//!
5//! * [`compress`] — one-shot helper that builds a self-contained
6//!   Zstandard frame from a `Read` source to a `Write` sink. The
7//!   input is consumed incrementally from `Read`, so input buffering
8//!   stays bounded; however, the compressed output is buffered in
9//!   memory until the frame is complete so the Frame Content Size
10//!   field can be filled in the header — peak memory is
11//!   `O(compressed_size)` (worst-case `O(input_size)` for
12//!   incompressible payloads, plus a small frame overhead). The
13//!   savings vs [`compress_to_vec`] come from not materialising the
14//!   input alongside the output.
15//! * [`compress_to_vec`] — same one-shot path as [`compress`] but
16//!   the input is eagerly drained into an internal `Vec` first
17//!   (`read_to_end`) so the encoder can be handed a `&[u8]` and a
18//!   precise source-size hint. Peak memory is therefore ≈
19//!   `input_size + output_size`; prefer [`compress`] or
20//!   [`StreamingEncoder`] when the input is large or unbounded.
21//! * [`StreamingEncoder`] — implements [`crate::io::Write`], which
22//!   re-exports [`std::io::Write`] under the `std` feature and falls
23//!   back to a `no_std`-friendly trait otherwise. Accepts bytes
24//!   incrementally and flushes compressed output as blocks fill.
25//!   Requires `set_pledged_content_size` before the first write if
26//!   the Frame Content Size field is to be populated.
27//! * [`FrameCompressor`] — lower-level builder that owns the matcher and
28//!   the per-frame configuration; the streaming and one-shot helpers are
29//!   thin wrappers over it. Reach for it when you need to swap in a custom
30//!   [`Matcher`] implementation or share the matcher across frames.
31//!
32//! Compression intensity is selected via [`CompressionLevel`], which
33//! provides both named presets (`Fastest`, `Default`, `Better`, `Best`) and
34//! numeric levels (`from_level(n)`) that mirror C zstd's level numbering
35//! (negative for ultra-fast, `0` = default, `1..=22` for the standard
36//! range).
37//!
38//! All produced frames are valid RFC 8878 Zstandard streams and decode
39//! through both this crate's [`crate::decoding`] module and upstream C zstd.
40//!
41//! For memory budgeting, [`estimated_compression_workspace_bytes`] reports
42//! the approximate steady-state heap footprint of a one-shot compression at
43//! a given level (window + match-finder tables + block staging).
44
45pub(crate) mod block_header;
46pub(crate) mod blocks;
47pub(crate) mod cparams;
48pub(crate) mod dict_attach;
49pub(crate) mod fastpath;
50pub(crate) mod frame_header;
51pub(crate) mod incompressible;
52pub(crate) mod match_generator;
53pub(crate) mod util;
54
55// `#111` encoder architecture rewrite. `cost_model`, `opt`,
56// `strategy`, `dfast`, `row`, and `simple` host the relocated
57// cost-model types, the optimal-parser plain-data types, the
58// const-generic [`strategy::Strategy`] trait + per-level [`strategy::
59// StrategyTag`] dispatcher, and the Dfast / Row / Simple matchers
60// respectively. `match_table::helpers` hosts the shared match-finder
61// primitives. The rewrite plan is tracked in
62// <https://github.com/structured-world/structured-zstd/issues/111>;
63// per-phase boundaries are `perf/post-pr-110-baseline` (start),
64// `perf/post-pr-121-baseline` (post-Phase-2).
65pub(crate) mod bt;
66pub(crate) mod cost_model;
67pub(crate) mod dfast;
68pub(crate) mod hc;
69// LDM uses `twox_hash::XxHash64` (per-window XXH64 over the
70// `min_match_length` byte slice, upstream zstd `zstd_ldm.c:315`). The
71// `twox-hash` dependency is gated behind the `hash` feature so
72// `default-features = false` builds (no_std, embedded) don't pull
73// it in. `BtMatcher::ldm_producer` and the `cfg(feature = "hash")`
74// blocks inside `BtMatcher::prepare_ldm_candidates` /
75// `BtMatcher::reset` carry the same gate; the call site in
76// `match_generator.rs::start_matching_optimal` invokes
77// `prepare_ldm_candidates` unconditionally because the
78// gating is internal to the method body (under
79// `not(feature = "hash")` the method shrinks to the legacy
80// `ldm_sequences.clear()` stub).
81#[cfg(feature = "hash")]
82pub(crate) mod ldm;
83pub(crate) mod match_table;
84pub(crate) mod opt;
85pub(crate) mod row;
86pub(crate) mod simple;
87pub(crate) mod strategy;
88
89pub(crate) mod frame_compressor;
90#[cfg(feature = "lsm")]
91pub mod frame_emit_info;
92mod levels;
93pub(crate) mod parameters;
94#[cfg(feature = "bench_internals")]
95pub mod sequence_capture;
96mod streaming_encoder;
97pub use frame_compressor::{EncoderDictionary, FrameCompressor};
98#[cfg(feature = "lsm")]
99pub use frame_emit_info::{BlockType, FrameBlock, FrameEmitInfo};
100pub use match_generator::{
101    MatchGeneratorDriver, estimated_bt_strategy_extra_bytes, estimated_compression_workspace_bytes,
102};
103pub use parameters::{
104    Bounds, CParameter, CompressionParameters, CompressionParametersBuilder, ParameterError,
105    Strategy,
106};
107pub use streaming_encoder::StreamingEncoder;
108
109use crate::io::{Read, Write};
110use alloc::vec::Vec;
111
112/// Convenience function to compress some source into a target without reusing any resources of the compressor
113/// ```rust
114/// use structured_zstd::encoding::{compress, CompressionLevel};
115/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
116/// let mut target = Vec::new();
117/// compress(data, &mut target, CompressionLevel::Fastest);
118/// ```
119pub fn compress<R: Read, W: Write>(source: R, target: W, level: CompressionLevel) {
120    let mut frame_enc = FrameCompressor::new(level);
121    frame_enc.set_source(source);
122    frame_enc.set_drain(target);
123    frame_enc.compress();
124}
125
126/// Convenience function to compress some source into a Vec without reusing any resources of the compressor.
127///
128/// This helper eagerly buffers the full input (`Read`) before compression so it
129/// can provide a source-size hint to the one-shot encoder path. Peak memory can
130/// therefore be roughly `input_size + output_size`. For very large payloads or
131/// tighter memory budgets, prefer streaming APIs such as [`StreamingEncoder`].
132///
133/// **This is NOT a streaming API.** The source is fully buffered
134/// into a `Vec<u8>` before any compression work begins, so peak input
135/// memory is bounded by `source.len()` (not "constant regardless of
136/// payload size" as a stream-shaped encoder would offer). If the
137/// source is large enough that holding it in memory is not acceptable,
138/// use [`StreamingEncoder`] which consumes chunks incrementally
139/// without the up-front Vec build.
140///
141/// This helper drives `read_to_end` to materialize the full source
142/// into a `Vec<u8>` before forwarding the slice to
143/// [`compress_slice_to_vec`]. For a `Read` whose size is unknown ahead
144/// of time, `read_to_end` grows that input `Vec` via power-of-two
145/// doubling: peak input allocation can be up to 2× the final source
146/// length transiently. The live working set on this entry point is
147/// roughly `input.capacity()` plus the block-accumulation buffer and
148/// per-block scratch carried by [`compress_slice_to_vec`], plus the
149/// exactly-sized output `Vec`. [`StreamingEncoder`] avoids the input
150/// materialization step entirely and is the right entry point when
151/// the source is large or unbounded.
152///
153/// ```rust
154/// use structured_zstd::encoding::{compress_to_vec, CompressionLevel};
155/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
156/// let compressed = compress_to_vec(data, CompressionLevel::Fastest);
157/// ```
158pub fn compress_to_vec<R: Read>(source: R, level: CompressionLevel) -> Vec<u8> {
159    let mut source = source;
160    let mut input = Vec::new();
161    source.read_to_end(&mut input).unwrap();
162    compress_slice_to_vec(input.as_slice(), level)
163}
164
165/// Compress a contiguous byte slice into a fresh `Vec<u8>` without the
166/// input-buffering step that [`compress_to_vec`] performs to adapt a
167/// `Read` source.
168///
169/// One-shot wrapper over
170/// [`FrameCompressor::compress_independent_frame`]: the input is read by
171/// reference (the eligible Fast path scans it in place, no per-block
172/// history copy), and the returned `Vec` is allocated exactly once at the
173/// final frame size after compression. Peak transient memory is the
174/// block-accumulation buffer (grown via amortized doubling, ≈ 2× current
175/// compressed size at the last realloc) plus the exactly-sized output. The
176/// worst-case compressed-size bound is never pinned upfront, so a highly
177/// compressible 100 MiB input does not charge ~100 MiB of worst-case
178/// expansion against peak.
179///
180/// To compress many slices, construct one [`FrameCompressor`] and call
181/// [`compress_independent_frame_into`](FrameCompressor::compress_independent_frame_into)
182/// in a loop instead, which reuses the matcher tables, scratch, and output
183/// buffer across frames (this function allocates and primes from scratch
184/// each call).
185///
186/// # Panics
187///
188/// Panics on encoder error (matches the failure surface of
189/// [`compress_to_vec`], which this function backs). Out-of-memory during
190/// the output / per-block scratch allocations is handled by the global
191/// allocator's abort policy. The slice/Vec entry points mirror the upstream zstd
192/// `ZSTD_compress` shape (no error return on the bulk path).
193///
194/// ```rust
195/// use structured_zstd::encoding::{compress_slice_to_vec, CompressionLevel};
196/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
197/// let compressed = compress_slice_to_vec(data, CompressionLevel::Fastest);
198/// ```
199pub fn compress_slice_to_vec(source: &[u8], level: CompressionLevel) -> Vec<u8> {
200    // Bare `FrameCompressor` resolves all three type params to their
201    // defaults (`&'static [u8]` reader, `Vec<u8>` drain, MatchGeneratorDriver);
202    // neither the reader nor the drain is used by the in-place
203    // `compress_independent_frame` path.
204    let mut enc: FrameCompressor = FrameCompressor::new(level);
205    enc.compress_independent_frame(source)
206}
207
208/// Worst-case compressed-frame size for an input of `src_size` bytes.
209///
210/// A destination buffer of this size is always large enough to hold the
211/// output of [`compress_slice_to_vec`] (or any single-frame compression) for
212/// an input of `src_size` bytes, so a caller sizing a fixed buffer once (the
213/// shape the C `ZSTD_compress` entry point needs) never has to grow it.
214///
215/// Mirrors the upstream `ZSTD_COMPRESSBOUND` formula exactly:
216/// `src_size + (src_size >> 8) + margin`, where `margin` is
217/// `(128 KiB - src_size) >> 11` for inputs below 128 KiB and `0` otherwise.
218/// The margin guarantees `bound(a) + bound(b) <= bound(a + b)` for blocks of
219/// at least 128 KiB, which keeps multi-frame concatenation sizing sound.
220///
221/// Saturates at [`usize::MAX`] if the formula would overflow on a
222/// pathologically large `src_size` — no allocation that large can exist, so
223/// the saturated value is the correct "cannot fit" sentinel rather than a
224/// masked wrap.
225///
226/// ```rust
227/// use structured_zstd::encoding::{compress_bound, compress_slice_to_vec, CompressionLevel};
228/// let data = [7u8; 4096];
229/// assert!(compress_slice_to_vec(&data, CompressionLevel::Default).len() <= compress_bound(data.len()));
230/// ```
231pub const fn compress_bound(src_size: usize) -> usize {
232    const LOWER: usize = 128 * 1024;
233    let margin = if src_size < LOWER {
234        (LOWER - src_size) >> 11
235    } else {
236        0
237    };
238    // Saturating is the correct UPPER-BOUND semantic here, not a masked bug:
239    // this is a public API over an arbitrary `usize`, and the largest meaningful
240    // bound is `usize::MAX`. A real slice is at most `isize::MAX` bytes, so the
241    // `* 1.004 + margin` cannot overflow for genuine inputs; the saturation only
242    // caps a pathological caller-supplied size at the representable ceiling.
243    src_size
244        .saturating_add(src_size >> 8)
245        .saturating_add(margin)
246}
247
248/// Compress a byte slice into a fresh `Vec<u8>` using fine-grained
249/// [`CompressionParameters`] (#27) instead of a bare
250/// [`CompressionLevel`].
251///
252/// One-shot wrapper over [`FrameCompressor::set_parameters`] +
253/// [`FrameCompressor::compress_independent_frame`]. The produced frame is
254/// a valid RFC 8878 stream regardless of the knobs chosen.
255///
256/// ```rust
257/// use structured_zstd::encoding::{
258///     compress_with_parameters, CompressionLevel, CompressionParameters, Strategy,
259/// };
260/// let data: &[u8] = b"the quick brown fox jumps over the lazy dog";
261/// let params = CompressionParameters::builder(CompressionLevel::Level(5))
262///     .strategy(Strategy::Greedy)
263///     .build()
264///     .unwrap();
265/// let compressed = compress_with_parameters(data, &params);
266/// assert!(!compressed.is_empty());
267/// ```
268pub fn compress_with_parameters(source: &[u8], params: &CompressionParameters) -> Vec<u8> {
269    let mut enc: FrameCompressor = FrameCompressor::new(params.level());
270    enc.set_parameters(params);
271    enc.compress_independent_frame(source)
272}
273
274/// The compression mode used impacts the speed of compression,
275/// and resulting compression ratios. Faster compression will result
276/// in worse compression ratios, and vice versa.
277#[derive(Copy, Clone, Debug, PartialEq, Eq)]
278pub enum CompressionLevel {
279    /// This level does not compress the data at all, and simply wraps
280    /// it in a Zstandard frame.
281    Uncompressed,
282    /// This level is roughly equivalent to Zstd compression level 1
283    Fastest,
284    /// This level uses the crate's dedicated `dfast`-style matcher to
285    /// target a better speed/ratio tradeoff than [`CompressionLevel::Fastest`].
286    ///
287    /// It represents this crate's "default" compression setting and may
288    /// evolve in future versions as the implementation moves closer to
289    /// reference zstd level 3 behavior.
290    Default,
291    /// This level is roughly equivalent to Zstd level 7.
292    ///
293    /// Uses the hash-chain matcher with a lazy2 matching strategy: the encoder
294    /// evaluates up to two positions ahead before committing to a match,
295    /// trading speed for a better compression ratio than [`CompressionLevel::Default`].
296    Better,
297    /// This level is roughly equivalent to Zstd level 11.
298    ///
299    /// Uses the hash-chain matcher with a deep lazy2 matching strategy and
300    /// a 16 MiB window. Compared to [`CompressionLevel::Better`], this level
301    /// uses larger hash and chain tables (2 M / 1 M entries vs 1 M / 512 K),
302    /// a deeper search (32 candidates vs 16), and a higher target match
303    /// length (128 vs 48), trading speed for the best compression ratio
304    /// available in this crate.
305    Best,
306    /// Numeric compression level.
307    ///
308    /// Levels 1–22 correspond to the C zstd level numbering.  Higher values
309    /// produce smaller output at the cost of more CPU time.  Negative values
310    /// select ultra-fast modes that trade ratio for speed.  Level 0 is
311    /// treated as [`DEFAULT_LEVEL`](Self::DEFAULT_LEVEL), matching C zstd
312    /// semantics.
313    ///
314    /// Named variants map to specific numeric levels:
315    /// [`Fastest`](Self::Fastest) = 1, [`Default`](Self::Default) = 3,
316    /// [`Better`](Self::Better) = 7, [`Best`](Self::Best) = 11.
317    /// [`Best`](Self::Best) remains the highest-ratio named preset, but
318    /// [`Level`](Self::Level) values above 11 can target stronger (slower)
319    /// tuning than the named hierarchy.
320    ///
321    /// Levels above 11 use progressively larger windows and deeper search.
322    /// Levels 16–17 use a `btopt`-style price parser, 18–19 use `btultra`,
323    /// and 20–22 use a `btultra2`-style two-pass selection profile.
324    ///
325    /// Semver note: this variant was added after the initial enum shape and
326    /// is a breaking API change for downstream crates that exhaustively
327    /// `match` on [`CompressionLevel`] without a wildcard arm.
328    Level(i32),
329}
330
331impl CompressionLevel {
332    /// The minimum supported numeric compression level (ultra-fast mode).
333    pub const MIN_LEVEL: i32 = -131072;
334    /// The maximum supported numeric compression level.
335    pub const MAX_LEVEL: i32 = 22;
336    /// The default numeric compression level (equivalent to [`Default`](Self::Default)).
337    pub const DEFAULT_LEVEL: i32 = 3;
338
339    /// Create a compression level from a numeric value.
340    ///
341    /// Returns named variants for canonical levels (`0`/`3`, `1`, `7`, `11`)
342    /// and [`Level`](Self::Level) for all other values.
343    ///
344    /// With the default matcher backend (`MatchGeneratorDriver`), values
345    /// outside [`MIN_LEVEL`](Self::MIN_LEVEL)..=[`MAX_LEVEL`](Self::MAX_LEVEL)
346    /// are silently clamped during built-in level parameter resolution.
347    pub const fn from_level(level: i32) -> Self {
348        match level {
349            0 | Self::DEFAULT_LEVEL => Self::Default,
350            1 => Self::Fastest,
351            7 => Self::Better,
352            11 => Self::Best,
353            _ => Self::Level(level),
354        }
355    }
356}
357
358/// Trait used by the encoder that users can use to extend the matching facilities with their own algorithm
359/// making their own tradeoffs between runtime, memory usage and compression ratio
360///
361/// This trait operates on buffers that represent the chunks of data the matching algorithm wants to work on.
362/// Each one of these buffers is referred to as a *space*. One or more of these buffers represent the window
363/// the decoder will need to decode the data again.
364///
365/// This library asks the Matcher for a new buffer using `get_next_space` to allow reusing of allocated buffers when they are no longer part of the
366/// window of data that is being used for matching.
367///
368/// The library fills the buffer with data that is to be compressed and commits them back to the matcher using `commit_space`.
369///
370/// Then it will either call `start_matching` or, if the space is deemed not worth compressing, `skip_matching` is called.
371///
372/// This is repeated until no more data is left to be compressed.
373pub trait Matcher {
374    /// Get a space where we can put data to be matched on. Will be encoded as one block. The maximum allowed size is 128 kB.
375    fn get_next_space(&mut self) -> alloc::vec::Vec<u8>;
376    /// Get a reference to the last committed space
377    fn get_last_space(&mut self) -> &[u8];
378    /// Commit a space to the matcher so it can be matched against
379    fn commit_space(&mut self, space: alloc::vec::Vec<u8>);
380    /// Just process the data in the last committed space for future matching.
381    fn skip_matching(&mut self);
382    /// Hint-aware skip path used internally to thread a precomputed block
383    /// incompressibility verdict to matcher backends.
384    ///
385    /// Default implementation preserves backwards compatibility for external
386    /// custom matchers by delegating to [`skip_matching`](Self::skip_matching).
387    fn skip_matching_with_hint(&mut self, _incompressible_hint: Option<bool>) {
388        self.skip_matching();
389    }
390    /// Process the data in the last committed space for future matching AND generate matches for the data
391    fn start_matching(&mut self, handle_sequence: impl for<'a> FnMut(Sequence<'a>));
392    /// Reset this matcher so it can be used for the next new frame
393    fn reset(&mut self, level: CompressionLevel);
394    /// Provide a hint about the total uncompressed size for the next frame.
395    ///
396    /// Implementations may use this to select smaller hash tables and windows
397    /// for small inputs, matching the C zstd source-size-class behavior.
398    /// Called before [`reset`](Self::reset) when the caller knows the input
399    /// size (e.g. from pledged content size or file metadata).
400    ///
401    /// The default implementation is a no-op for custom matchers and
402    /// test stubs. The built-in runtime matcher (`MatchGeneratorDriver`)
403    /// overrides this hook and applies the hint during level resolution.
404    fn set_source_size_hint(&mut self, _size: u64) {}
405    /// Hint the byte size of the dictionary that will be primed into the next
406    /// frame. The built-in runtime matcher uses it to size the binary-tree /
407    /// hash-chain match-finder tables from the dictionary's cParams tier rather
408    /// than the source window (upstream zstd CDict economics), while keeping the
409    /// eviction window source-sized. Default no-op for custom matchers and test
410    /// stubs; consumed at the next [`reset`](Self::reset).
411    fn set_dictionary_size_hint(&mut self, _size: usize) {}
412    /// Drop any per-frame fine-grained parameter overrides installed via
413    /// the public parameter API, reverting to plain level-based geometry
414    /// at the next [`reset`](Self::reset). Called by
415    /// [`FrameCompressor::set_compression_level`](crate::encoding::FrameCompressor::set_compression_level)
416    /// so switching back to a bare level after a customized frame does not
417    /// keep the old overrides sticky. Default no-op for custom matchers.
418    fn clear_param_overrides(&mut self) {}
419    /// Prime matcher state with dictionary history before compressing the next frame.
420    /// Default implementation is a no-op for custom matchers that do not support this.
421    fn prime_with_dictionary(&mut self, _dict_content: &[u8], _offset_hist: [u32; 3]) {}
422    /// Whether the most recent [`reset`](Self::reset) re-borrowed a resident
423    /// attach-mode dictionary (kept the dict bytes + cached index in place).
424    /// When `true` the caller MUST skip [`Self::prime_with_dictionary`] and only
425    /// reapply the offset history via [`Self::reapply_resident_dictionary`].
426    fn dictionary_is_resident(&self) -> bool {
427        false
428    }
429    /// Reapply the dictionary's offset history to a re-borrowed frame — the cheap
430    /// tail of priming, without the dict commit / re-index. Default no-op.
431    fn reapply_resident_dictionary(&mut self, _offset_hist: [u32; 3]) {}
432    /// CDict-equivalent fast path for repeated frames sharing one dictionary.
433    /// Restore the matcher state captured by [`Self::capture_primed_dictionary`]
434    /// at the SAME level (a table copy) instead of re-running
435    /// [`Self::prime_with_dictionary`] (which re-hashes every dictionary
436    /// position). Returns `true` when a matching snapshot was restored;
437    /// `false` (the default) means the caller must prime then capture.
438    fn restore_primed_dictionary(&mut self, _level: CompressionLevel) -> bool {
439        false
440    }
441    /// Snapshot the post-prime matcher state for the given level so later
442    /// frames can [`Self::restore_primed_dictionary`] it. Default no-op.
443    fn capture_primed_dictionary(&mut self, _level: CompressionLevel) {}
444    /// Drop any captured prime snapshot (dictionary or level changed).
445    /// Default no-op.
446    fn invalidate_primed_dictionary(&mut self) {}
447    /// Seed matcher cost model with dictionary entropy tables before the next frame.
448    /// Default implementation is a no-op for custom matchers.
449    fn seed_dictionary_entropy(
450        &mut self,
451        _huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
452        _ll: Option<&crate::fse::fse_encoder::FSETable>,
453        _ml: Option<&crate::fse::fse_encoder::FSETable>,
454        _of: Option<&crate::fse::fse_encoder::FSETable>,
455    ) {
456    }
457    /// Returns whether this matcher can consume dictionary priming state and produce
458    /// dictionary-dependent sequences. Defaults to `false` for custom matchers.
459    fn supports_dictionary_priming(&self) -> bool {
460        false
461    }
462    /// Whether a sample of `block` hashes to a match in an attached dictionary.
463    /// The raw-fast-path uses this to avoid skipping the scan on a block that
464    /// looks incompressible but compresses against the dictionary (an external
465    /// match the block's own content cannot reveal). Defaults to `false` for
466    /// custom matchers (and the no-dict case), leaving the content-only verdict.
467    fn block_samples_match_dict(&self, _block: &[u8]) -> bool {
468        false
469    }
470    /// Heap bytes this matcher's allocations hold (tables, history, scratch),
471    /// excluding the inline struct itself. Lets a context report its true
472    /// footprint via `ZSTD_sizeof_CCtx`. Defaults to `0` for custom matchers.
473    fn heap_size(&self) -> usize {
474        0
475    }
476    /// The size of the window the decoder will need to execute all sequences produced by this matcher.
477    ///
478    /// Must return a positive (non-zero) value; returning 0 causes
479    /// [`StreamingEncoder`] to reject the first write with an invalid-input error
480    /// (`InvalidInput` with `std`, `Other` with `no_std`).
481    ///
482    /// Must remain stable for the lifetime of a frame.
483    /// It may change only after `reset()` is called for the next frame
484    /// (for example because the compression level changed).
485    fn window_size(&self) -> u64;
486}
487
488#[derive(PartialEq, Eq, Debug)]
489/// Sequences that a [`Matcher`] can produce
490pub enum Sequence<'data> {
491    /// Is encoded as a sequence for the decoder sequence execution.
492    ///
493    /// First the literals will be copied to the decoded data,
494    /// then `match_len` bytes are copied from `offset` bytes back in the decoded data
495    Triple {
496        literals: &'data [u8],
497        offset: usize,
498        match_len: usize,
499    },
500    /// This is returned as the last sequence in a block
501    ///
502    /// These literals will just be copied at the end of the sequence execution by the decoder
503    Literals { literals: &'data [u8] },
504}
505
506#[cfg(test)]
507mod compress_bound_tests {
508    use super::{CompressionLevel, compress_bound, compress_slice_to_vec};
509
510    #[test]
511    fn matches_upstream_formula_below_threshold() {
512        // src_size + (src_size >> 8) + ((128 KiB - src_size) >> 11).
513        assert_eq!(compress_bound(0), 64);
514        assert_eq!(compress_bound(4096), 4096 + 16 + 62);
515    }
516
517    #[test]
518    fn drops_margin_at_and_above_threshold() {
519        let lower = 128 * 1024;
520        assert_eq!(compress_bound(lower), lower + (lower >> 8));
521        assert_eq!(compress_bound(lower + 1), (lower + 1) + ((lower + 1) >> 8));
522    }
523
524    #[test]
525    fn saturates_instead_of_wrapping() {
526        // No allocation this large can exist; the ceiling is the right sentinel.
527        assert_eq!(compress_bound(usize::MAX), usize::MAX);
528    }
529
530    #[test]
531    fn always_fits_real_compressed_output() {
532        for len in [0usize, 1, 100, 4096, 200_000] {
533            let data = alloc::vec![7u8; len];
534            let out = compress_slice_to_vec(&data, CompressionLevel::Default);
535            assert!(out.len() <= compress_bound(len), "len={len}");
536        }
537    }
538}
structured_zstd/encoding/mod.rs

structured_zstd/encoding/
mod.rs