structured_zstd/encoding/
mod.rs

1//! Zstandard encoder — frame compression, streaming, dictionary support.
2//!
3//! Four entry points cover the common use cases:
4//!
5//! * [`compress`] — one-shot helper that builds a self-contained
6//!   Zstandard frame from a `Read` source to a `Write` sink. The
7//!   input is consumed incrementally from `Read`, so input buffering
8//!   stays bounded; however, the compressed output is buffered in
9//!   memory until the frame is complete so the Frame Content Size
10//!   field can be filled in the header — peak memory is
11//!   `O(compressed_size)` (worst-case `O(input_size)` for
12//!   incompressible payloads, plus a small frame overhead). The
13//!   savings vs [`compress_to_vec`] come from not materialising the
14//!   input alongside the output.
15//! * [`compress_to_vec`] — same one-shot path as [`compress`] but
16//!   the input is eagerly drained into an internal `Vec` first
17//!   (`read_to_end`) so the encoder can be handed a `&[u8]` and a
18//!   precise source-size hint. Peak memory is therefore ≈
19//!   `input_size + output_size`; prefer [`compress`] or
20//!   [`StreamingEncoder`] when the input is large or unbounded.
21//! * [`StreamingEncoder`] — implements [`crate::io::Write`], which
22//!   re-exports [`std::io::Write`] under the `std` feature and falls
23//!   back to a `no_std`-friendly trait otherwise. Accepts bytes
24//!   incrementally and flushes compressed output as blocks fill.
25//!   Requires `set_pledged_content_size` before the first write if
26//!   the Frame Content Size field is to be populated.
27//! * [`FrameCompressor`] — lower-level builder that owns the matcher and
28//!   the per-frame configuration; the streaming and one-shot helpers are
29//!   thin wrappers over it. Reach for it when you need to swap in a custom
30//!   [`Matcher`] implementation or share the matcher across frames.
31//!
32//! Compression intensity is selected via [`CompressionLevel`], which
33//! provides both named presets (`Fastest`, `Default`, `Better`, `Best`) and
34//! numeric levels (`from_level(n)`) that mirror C zstd's level numbering
35//! (negative for ultra-fast, `0` = default, `1..=22` for the standard
36//! range).
37//!
38//! All produced frames are valid RFC 8878 Zstandard streams and decode
39//! through both this crate's [`crate::decoding`] module and upstream C zstd.
40
41pub(crate) mod block_header;
42pub(crate) mod blocks;
43pub(crate) mod fastpath;
44pub(crate) mod frame_header;
45pub(crate) mod incompressible;
46pub(crate) mod match_generator;
47pub(crate) mod util;
48
49// `#111` encoder architecture rewrite. `cost_model`, `opt`,
50// `strategy`, `dfast`, `row`, and `simple` host the relocated
51// cost-model types, the optimal-parser plain-data types, the
52// const-generic [`strategy::Strategy`] trait + per-level [`strategy::
53// StrategyTag`] dispatcher, and the Dfast / Row / Simple matchers
54// respectively. `match_table::helpers` hosts the shared match-finder
55// primitives. The rewrite plan is tracked in
56// <https://github.com/structured-world/structured-zstd/issues/111>;
57// per-phase boundaries are `perf/post-pr-110-baseline` (start),
58// `perf/post-pr-121-baseline` (post-Phase-2).
59pub(crate) mod bt;
60pub(crate) mod cost_model;
61pub(crate) mod dfast;
62pub(crate) mod hc;
63// LDM uses `twox_hash::XxHash64` (per-window XXH64 over the
64// `min_match_length` byte slice, donor `zstd_ldm.c:315`). The
65// `twox-hash` dependency is gated behind the `hash` feature so
66// `default-features = false` builds (no_std, embedded) don't pull
67// it in. `BtMatcher::ldm_producer` and the `cfg(feature = "hash")`
68// blocks inside `BtMatcher::prepare_ldm_candidates` /
69// `BtMatcher::reset` carry the same gate; the call site in
70// `match_generator.rs::start_matching_optimal` invokes
71// `prepare_ldm_candidates` unconditionally because the
72// gating is internal to the method body (under
73// `not(feature = "hash")` the method shrinks to the legacy
74// `ldm_sequences.clear()` stub).
75#[cfg(feature = "hash")]
76pub(crate) mod ldm;
77pub(crate) mod match_table;
78pub(crate) mod opt;
79pub(crate) mod row;
80pub(crate) mod simple;
81pub(crate) mod strategy;
82
83pub(crate) mod frame_compressor;
84#[cfg(feature = "lsm")]
85pub mod frame_emit_info;
86mod levels;
87#[cfg(feature = "bench_internals")]
88pub mod sequence_capture;
89mod streaming_encoder;
90pub use frame_compressor::FrameCompressor;
91#[cfg(feature = "lsm")]
92pub use frame_emit_info::{BlockType, FrameBlock, FrameEmitInfo};
93pub use match_generator::MatchGeneratorDriver;
94pub use streaming_encoder::StreamingEncoder;
95
96use crate::io::{Read, Write};
97use alloc::vec::Vec;
98
99pub(crate) const BETTER_WINDOW_LOG: u8 = 23;
100
101/// Worst-case compressed-size bound for `src_size` uncompressed
102/// bytes. Mirrors the upstream `ZSTD_COMPRESSBOUND(srcSize)` macro
103/// from `lib/zstd.h` (zstd 1.5.7) — the C *macro*, NOT the public
104/// `ZSTD_compressBound()` function. The function adds frame/block-
105/// header headroom in some code paths; the macro is a tighter
106/// expression suitable for sizing an internal output Vec, which is
107/// the only call site here. Callers that need a hard upper bound on
108/// the wire-format compressed size should NOT reuse this — use the
109/// `zstd-sys` function call directly. If the macro changes in a
110/// future upstream revision, treat the upstream header as
111/// authoritative and re-derive this function — do not trust the
112/// inline copy without cross-checking.
113///
114/// Inline approximation (this revision):
115///
116/// ```text
117/// srcSize
118///   + (srcSize >> 8)
119///   + (srcSize < 128 KiB ? ((128 KiB - srcSize) >> 11) : 0)
120/// ```
121///
122/// Consulted by [`compress_slice_to_vec`] for the small-input branch
123/// of its output-`Vec` seed: the seed is
124/// `min(compress_bound(src), OUTPUT_BLOCK_CAP = 128 KiB)`, so for
125/// inputs that fit within one donor block (`≤ ~128 KiB`) the
126/// destination never reallocates inside the measured window. Without
127/// the bound, `Vec::push` growth doubles in powers of two and pins
128/// `~2 × final_compressed_size` resident at the last realloc, which
129/// shows up as ~1 MiB of peak-RSS noise on 1 MiB inputs and prevents
130/// FFI-parity on the memory bench. For inputs larger than the cap the
131/// `Vec` still grows by amortized doubling — see
132/// [`compress_slice_to_vec`] for the trade-off rationale.
133#[inline]
134pub(crate) const fn compress_bound(src_size: usize) -> usize {
135    const SMALL_INPUT_THRESHOLD: usize = 128 * 1024;
136    let tail = if src_size < SMALL_INPUT_THRESHOLD {
137        (SMALL_INPUT_THRESHOLD - src_size) >> 11
138    } else {
139        0
140    };
141    // `saturating_add` guards against the corner case where `src_size`
142    // is close to `usize::MAX`. The current sole caller
143    // (`compress_slice_to_vec`) clamps the result with `.min(OUTPUT_BLOCK_CAP)`
144    // so an overflow there would still produce a sensible cap, but
145    // future `pub(crate)` callers may use the raw bound — keep the
146    // arithmetic itself overflow-safe.
147    src_size.saturating_add(src_size >> 8).saturating_add(tail)
148}
149
150/// Convenience function to compress some source into a target without reusing any resources of the compressor
151/// ```rust
152/// use structured_zstd::encoding::{compress, CompressionLevel};
153/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
154/// let mut target = Vec::new();
155/// compress(data, &mut target, CompressionLevel::Fastest);
156/// ```
157pub fn compress<R: Read, W: Write>(source: R, target: W, level: CompressionLevel) {
158    let mut frame_enc = FrameCompressor::new(level);
159    frame_enc.set_source(source);
160    frame_enc.set_drain(target);
161    frame_enc.compress();
162}
163
164/// Convenience function to compress some source into a Vec without reusing any resources of the compressor.
165///
166/// This helper eagerly buffers the full input (`Read`) before compression so it
167/// can provide a source-size hint to the one-shot encoder path. Peak memory can
168/// therefore be roughly `input_size + output_size`. For very large payloads or
169/// tighter memory budgets, prefer streaming APIs such as [`StreamingEncoder`].
170///
171/// **Peak-memory shape change in this revision.** The implementation
172/// delegates to [`compress_slice_to_vec`], which seeds the output
173/// `Vec` with `min(compress_bound(input.len()), OUTPUT_BLOCK_CAP =
174/// 128 KiB)` instead of the previous `Vec::new()` (zero-capacity +
175/// power-of-two growth). For inputs in the few-KiB to ~128 KiB range
176/// this is a strict improvement (no doubling spikes inside the
177/// measured window). For inputs significantly larger than 128 KiB the
178/// allocation curve still grows by amortized doubling but starts from
179/// a 128 KiB floor rather than 0. Downstream consumers that measure
180/// peak RSS on this entry point will see a different curve than
181/// pre-revision; bench shape, not steady-state, is what changed.
182///
183/// **This is NOT a streaming API.** The source is fully buffered
184/// into a `Vec<u8>` before any compression work begins, so peak input
185/// memory is bounded by `source.len()` (not "constant regardless of
186/// payload size" as a stream-shaped encoder would offer). The RSS
187/// notes below apply to the materialization-then-compress shape; if
188/// the source is large enough that holding it in memory is not
189/// acceptable, use [`StreamingEncoder`] which consumes chunks
190/// incrementally without the up-front Vec build.
191///
192/// The other side of the peak shape is the input buffering: this
193/// helper drives `read_to_end` to materialize the full source into a
194/// `Vec<u8>` before forwarding the slice to [`compress_slice_to_vec`].
195/// For a `Read` whose size is unknown ahead of time, `read_to_end`
196/// grows that input `Vec` via power-of-two doubling — peak input
197/// allocation can be up to 2× the final source length transiently.
198/// At the moment that input buffer crosses ~128 KiB the output Vec
199/// seed kicks in concurrently. The total live working set on this
200/// entry point is approximately
201/// `input.capacity() + output_vec_seed + internal_accumulators`,
202/// where `output_vec_seed` is `min(compress_bound(input.len()), 128
203/// KiB)` and `internal_accumulators` covers
204/// `FrameCompressor::all_blocks` (pre-reserved at frame start, up to
205/// ~130 KiB at default block cap) plus per-block scratch (hash tables,
206/// literal/sequence staging). Round the helper's RSS peak to
207/// `input.capacity() + output_vec_seed + ~130 KiB internal +
208/// per-block scratch` rather than the bare `input.capacity() + 128
209/// KiB` figure quoted in earlier revisions, which only accounted for
210/// the output seed. [`StreamingEncoder`] avoids the input
211/// materialization step entirely and is the right entry point when
212/// the source is large or unbounded.
213///
214/// ```rust
215/// use structured_zstd::encoding::{compress_to_vec, CompressionLevel};
216/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
217/// let compressed = compress_to_vec(data, CompressionLevel::Fastest);
218/// ```
219pub fn compress_to_vec<R: Read>(source: R, level: CompressionLevel) -> Vec<u8> {
220    let mut source = source;
221    let mut input = Vec::new();
222    source.read_to_end(&mut input).unwrap();
223    compress_slice_to_vec(input.as_slice(), level)
224}
225
226/// Compress a contiguous byte slice into a fresh `Vec<u8>` without
227/// the input-buffering step that [`compress_to_vec`] performs to
228/// adapt a `Read` source. Donor-parity peak-memory shape: the input
229/// is read by reference, and the output `Vec` is seeded with
230/// `min(compress_bound(src), OUTPUT_BLOCK_CAP = 128 KiB)`.
231///
232/// The seed is intentionally capped at one donor block
233/// (`OUTPUT_BLOCK_CAP = 128 KiB`). Three regimes:
234///
235/// * Inputs up to ~127 KiB where `compress_bound(src) <
236///   OUTPUT_BLOCK_CAP` — the `min` picks the tighter bound and the
237///   `Vec` never reallocates inside the measured window (cheap, no
238///   doubling spikes, no over-allocation).
239/// * Inputs around 128 KiB where `compress_bound(src) >=
240///   OUTPUT_BLOCK_CAP` — the `min` clamps to the cap, so the "no
241///   reallocation" property holds only as long as the actual
242///   compressed output also fits within `OUTPUT_BLOCK_CAP`. For
243///   high-entropy inputs near the cap boundary the `Vec` may grow
244///   by one doubling step before the next block lands.
245/// * Larger inputs — the `Vec` grows via amortized doubling, with
246///   peak transient memory ≈ 2× current compressed size at the last
247///   realloc. Deliberate trade-off against pinning
248///   `compress_bound(src)` upfront, which on the 100 MiB
249///   `large-log-stream` scenario pinned 100.4 MiB even though the
250///   actual compressed output was ≪ 1 MiB.
251///
252/// # Panics
253///
254/// Panics on encoder error (matches the failure surface of
255/// [`compress_to_vec`], which this function backs). The internal
256/// [`FrameCompressor::compress`] call propagates `io::Error` from the
257/// output [`Vec`] writer; under the in-tree default writer that
258/// channel is infallible and any error becomes a `panic`. Out-of-
259/// memory during `Vec::with_capacity` or the encoder's per-block
260/// scratch allocations is handled by the global allocator's abort
261/// policy. Migrating to a fallible variant requires plumbing
262/// `Result` through `FrameCompressor::compress` — out of scope for
263/// the slice/Vec entry points which mirror the donor `ZSTD_compress`
264/// shape (no error return on the bulk path).
265///
266/// ```rust
267/// use structured_zstd::encoding::{compress_slice_to_vec, CompressionLevel};
268/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
269/// let compressed = compress_slice_to_vec(data, CompressionLevel::Fastest);
270/// ```
271pub fn compress_slice_to_vec(source: &[u8], level: CompressionLevel) -> Vec<u8> {
272    // Initial capacity sized to a single output block, matching donor
273    // `ZSTD_CStreamOutSize()` (≈ `ZSTD_BLOCKSIZE_MAX` + headroom). For
274    // small inputs `compress_bound(src)` is smaller than this block —
275    // use the tighter bound to avoid over-allocation. For larger inputs
276    // the `Vec` grows via amortized doubling, with peak ≈ 2×current
277    // compressed size at the last realloc (donor streaming shape: the
278    // caller buffer is one block, not `ZSTD_compressBound(srcSize)`).
279    // Pre-allocating `compress_bound(src)` here would charge worst-case
280    // expansion against peak even on highly compressible inputs — on
281    // the 100 MiB `large-log-stream` scenario that single allocation
282    // dominated the encoder's measured footprint (`compress_bound` ≈
283    // 100.4 MiB pinned, actual compressed output ≪ 1 MiB).
284    const OUTPUT_BLOCK_CAP: usize = 128 * 1024;
285    let cap = compress_bound(source.len()).min(OUTPUT_BLOCK_CAP);
286    let mut vec = Vec::with_capacity(cap);
287    let mut frame_enc = FrameCompressor::new(level);
288    frame_enc.set_source_size_hint(source.len() as u64);
289    frame_enc.set_source(source);
290    frame_enc.set_drain(&mut vec);
291    frame_enc.compress();
292    vec
293}
294
295/// The compression mode used impacts the speed of compression,
296/// and resulting compression ratios. Faster compression will result
297/// in worse compression ratios, and vice versa.
298#[derive(Copy, Clone, Debug)]
299pub enum CompressionLevel {
300    /// This level does not compress the data at all, and simply wraps
301    /// it in a Zstandard frame.
302    Uncompressed,
303    /// This level is roughly equivalent to Zstd compression level 1
304    Fastest,
305    /// This level uses the crate's dedicated `dfast`-style matcher to
306    /// target a better speed/ratio tradeoff than [`CompressionLevel::Fastest`].
307    ///
308    /// It represents this crate's "default" compression setting and may
309    /// evolve in future versions as the implementation moves closer to
310    /// reference zstd level 3 behavior.
311    Default,
312    /// This level is roughly equivalent to Zstd level 7.
313    ///
314    /// Uses the hash-chain matcher with a lazy2 matching strategy: the encoder
315    /// evaluates up to two positions ahead before committing to a match,
316    /// trading speed for a better compression ratio than [`CompressionLevel::Default`].
317    Better,
318    /// This level is roughly equivalent to Zstd level 11.
319    ///
320    /// Uses the hash-chain matcher with a deep lazy2 matching strategy and
321    /// a 16 MiB window. Compared to [`CompressionLevel::Better`], this level
322    /// uses larger hash and chain tables (2 M / 1 M entries vs 1 M / 512 K),
323    /// a deeper search (32 candidates vs 16), and a higher target match
324    /// length (128 vs 48), trading speed for the best compression ratio
325    /// available in this crate.
326    Best,
327    /// Numeric compression level.
328    ///
329    /// Levels 1–22 correspond to the C zstd level numbering.  Higher values
330    /// produce smaller output at the cost of more CPU time.  Negative values
331    /// select ultra-fast modes that trade ratio for speed.  Level 0 is
332    /// treated as [`DEFAULT_LEVEL`](Self::DEFAULT_LEVEL), matching C zstd
333    /// semantics.
334    ///
335    /// Named variants map to specific numeric levels:
336    /// [`Fastest`](Self::Fastest) = 1, [`Default`](Self::Default) = 3,
337    /// [`Better`](Self::Better) = 7, [`Best`](Self::Best) = 11.
338    /// [`Best`](Self::Best) remains the highest-ratio named preset, but
339    /// [`Level`](Self::Level) values above 11 can target stronger (slower)
340    /// tuning than the named hierarchy.
341    ///
342    /// Levels above 11 use progressively larger windows and deeper search.
343    /// Levels 16–17 use a `btopt`-style price parser, 18–19 use `btultra`,
344    /// and 20–22 use a `btultra2`-style two-pass selection profile.
345    ///
346    /// Semver note: this variant was added after the initial enum shape and
347    /// is a breaking API change for downstream crates that exhaustively
348    /// `match` on [`CompressionLevel`] without a wildcard arm.
349    Level(i32),
350}
351
352impl CompressionLevel {
353    /// The minimum supported numeric compression level (ultra-fast mode).
354    pub const MIN_LEVEL: i32 = -131072;
355    /// The maximum supported numeric compression level.
356    pub const MAX_LEVEL: i32 = 22;
357    /// The default numeric compression level (equivalent to [`Default`](Self::Default)).
358    pub const DEFAULT_LEVEL: i32 = 3;
359
360    /// Create a compression level from a numeric value.
361    ///
362    /// Returns named variants for canonical levels (`0`/`3`, `1`, `7`, `11`)
363    /// and [`Level`](Self::Level) for all other values.
364    ///
365    /// With the default matcher backend (`MatchGeneratorDriver`), values
366    /// outside [`MIN_LEVEL`](Self::MIN_LEVEL)..=[`MAX_LEVEL`](Self::MAX_LEVEL)
367    /// are silently clamped during built-in level parameter resolution.
368    pub const fn from_level(level: i32) -> Self {
369        match level {
370            0 | Self::DEFAULT_LEVEL => Self::Default,
371            1 => Self::Fastest,
372            7 => Self::Better,
373            11 => Self::Best,
374            _ => Self::Level(level),
375        }
376    }
377}
378
379/// Trait used by the encoder that users can use to extend the matching facilities with their own algorithm
380/// making their own tradeoffs between runtime, memory usage and compression ratio
381///
382/// This trait operates on buffers that represent the chunks of data the matching algorithm wants to work on.
383/// Each one of these buffers is referred to as a *space*. One or more of these buffers represent the window
384/// the decoder will need to decode the data again.
385///
386/// This library asks the Matcher for a new buffer using `get_next_space` to allow reusing of allocated buffers when they are no longer part of the
387/// window of data that is being used for matching.
388///
389/// The library fills the buffer with data that is to be compressed and commits them back to the matcher using `commit_space`.
390///
391/// Then it will either call `start_matching` or, if the space is deemed not worth compressing, `skip_matching` is called.
392///
393/// This is repeated until no more data is left to be compressed.
394pub trait Matcher {
395    /// Get a space where we can put data to be matched on. Will be encoded as one block. The maximum allowed size is 128 kB.
396    fn get_next_space(&mut self) -> alloc::vec::Vec<u8>;
397    /// Get a reference to the last committed space
398    fn get_last_space(&mut self) -> &[u8];
399    /// Commit a space to the matcher so it can be matched against
400    fn commit_space(&mut self, space: alloc::vec::Vec<u8>);
401    /// Just process the data in the last committed space for future matching.
402    fn skip_matching(&mut self);
403    /// Hint-aware skip path used internally to thread a precomputed block
404    /// incompressibility verdict to matcher backends.
405    ///
406    /// Default implementation preserves backwards compatibility for external
407    /// custom matchers by delegating to [`skip_matching`](Self::skip_matching).
408    fn skip_matching_with_hint(&mut self, _incompressible_hint: Option<bool>) {
409        self.skip_matching();
410    }
411    /// Process the data in the last committed space for future matching AND generate matches for the data
412    fn start_matching(&mut self, handle_sequence: impl for<'a> FnMut(Sequence<'a>));
413    /// Reset this matcher so it can be used for the next new frame
414    fn reset(&mut self, level: CompressionLevel);
415    /// Provide a hint about the total uncompressed size for the next frame.
416    ///
417    /// Implementations may use this to select smaller hash tables and windows
418    /// for small inputs, matching the C zstd source-size-class behavior.
419    /// Called before [`reset`](Self::reset) when the caller knows the input
420    /// size (e.g. from pledged content size or file metadata).
421    ///
422    /// The default implementation is a no-op for custom matchers and
423    /// test stubs. The built-in runtime matcher (`MatchGeneratorDriver`)
424    /// overrides this hook and applies the hint during level resolution.
425    fn set_source_size_hint(&mut self, _size: u64) {}
426    /// Prime matcher state with dictionary history before compressing the next frame.
427    /// Default implementation is a no-op for custom matchers that do not support this.
428    fn prime_with_dictionary(&mut self, _dict_content: &[u8], _offset_hist: [u32; 3]) {}
429    /// Seed matcher cost model with dictionary entropy tables before the next frame.
430    /// Default implementation is a no-op for custom matchers.
431    fn seed_dictionary_entropy(
432        &mut self,
433        _huff: Option<&crate::huff0::huff0_encoder::HuffmanTable>,
434        _ll: Option<&crate::fse::fse_encoder::FSETable>,
435        _ml: Option<&crate::fse::fse_encoder::FSETable>,
436        _of: Option<&crate::fse::fse_encoder::FSETable>,
437    ) {
438    }
439    /// Returns whether this matcher can consume dictionary priming state and produce
440    /// dictionary-dependent sequences. Defaults to `false` for custom matchers.
441    fn supports_dictionary_priming(&self) -> bool {
442        false
443    }
444    /// The size of the window the decoder will need to execute all sequences produced by this matcher.
445    ///
446    /// Must return a positive (non-zero) value; returning 0 causes
447    /// [`StreamingEncoder`] to reject the first write with an invalid-input error
448    /// (`InvalidInput` with `std`, `Other` with `no_std`).
449    ///
450    /// Must remain stable for the lifetime of a frame.
451    /// It may change only after `reset()` is called for the next frame
452    /// (for example because the compression level changed).
453    fn window_size(&self) -> u64;
454}
455
456#[derive(PartialEq, Eq, Debug)]
457/// Sequences that a [`Matcher`] can produce
458pub enum Sequence<'data> {
459    /// Is encoded as a sequence for the decoder sequence execution.
460    ///
461    /// First the literals will be copied to the decoded data,
462    /// then `match_len` bytes are copied from `offset` bytes back in the decoded data
463    Triple {
464        literals: &'data [u8],
465        offset: usize,
466        match_len: usize,
467    },
468    /// This is returned as the last sequence in a block
469    ///
470    /// These literals will just be copied at the end of the sequence execution by the decoder
471    Literals { literals: &'data [u8] },
472}
structured_zstd/encoding/mod.rs

structured_zstd/encoding/
mod.rs