Skip to main content

structured_zstd/
lib.rs

1//! Pure-Rust Zstandard codec with a production-grade decoder, dictionary
2//! handle reuse, and an actively-improved encoder.
3//!
4//! The crate ships:
5//!
6//! * [`decoding`] — [RFC 8878] decoder ([`decoding::StreamingDecoder`],
7//!   [`decoding::FrameDecoder`], dictionary-backed paths via
8//!   [`decoding::DictionaryHandle`]).
9//! * [`encoding`] — frame compressor, streaming encoder, named and numeric
10//!   compression levels ([`encoding::CompressionLevel`]).
11//! * [`dictionary`] (feature `dict_builder`) — COVER / FastCOVER training
12//!   plus raw-to-finalized dictionary helpers.
13//!
14//! No FFI, no cmake, no system zstd. `no_std` builds are supported by
15//! disabling the default `std` feature.
16//!
17//! # CPU kernel features
18//!
19//! The decode hot paths ship per-CPU-tier SIMD kernels. With `std` the tier
20//! is chosen at runtime (CPU-feature detection, cached on first use); on
21//! `no_std` it is chosen at compile time from `cfg(target_feature)`.
22//! Each tier is gated by a cargo feature, all enabled by default (a universal
23//! binary that picks the best available tier per the above): `kernel_scalar`,
24//! `kernel_sse2`, `kernel_bmi2`, `kernel_avx2`, `kernel_vbmi2` (x86) and
25//! `kernel_neon`, `kernel_sve` (aarch64). The chain mirrors the ISA
26//! dependency (`kernel_avx2` implies `kernel_bmi2` implies `kernel_sse2`;
27//! `kernel_sve` implies `kernel_neon`). The scalar kernel is always compiled,
28//! so any subset is valid; a flag is inert on architectures it doesn't apply
29//! to. Constrained targets can shrink the binary by trimming tiers, e.g.
30//! `--no-default-features --features kernel_scalar` compiles out the per-tier
31//! SIMD kernel dispatch, its BMI2/AVX2/VBMI2/NEON trampolines, and the
32//! explicit SSE2/NEON intrinsics in the small fixed-size copy primitives —
33//! all of which are gated on the matching `kernel_*` feature. The `kernel_*`
34//! features control the crate's own explicit SIMD; they do not constrain the
35//! compiler's autovectorizer, which may still emit vector instructions from
36//! ordinary scalar code regardless of the enabled tiers.
37//!
38//! The packaged README is included below for the docs.rs landing page; the
39//! API anchors above link straight into the per-module documentation.
40//!
41//! [RFC 8878]: https://www.rfc-editor.org/rfc/rfc8878
42// Keep crate docs aligned with the packaged README via the crate-local symlink in `zstd/README.md`.
43#![doc = include_str!("../README.md")]
44#![no_std]
45#![deny(trivial_casts, trivial_numeric_casts, rust_2018_idioms)]
46#![cfg_attr(docsrs, feature(doc_cfg))]
47
48#[cfg(feature = "std")]
49extern crate std;
50
51#[cfg(not(feature = "rustc-dep-of-std"))]
52extern crate alloc;
53
54#[cfg(feature = "std")]
55pub(crate) const VERBOSE: bool = false;
56
57macro_rules! vprintln {
58    ($($x:expr),*) => {
59        #[cfg(feature = "std")]
60        if crate::VERBOSE {
61            std::println!($($x),*);
62        }
63    }
64}
65
66mod bit_io;
67mod common;
68/// Smallest accepted block-size target (the `ZSTD_TARGETCBLOCKSIZE_MIN`
69/// bound): the single source of truth shared by the Rust setters
70/// (`set_target_block_size`) and the C ABI parameter surface.
71pub use common::MIN_TARGET_BLOCK_SIZE;
72mod cpu_kernel;
73pub mod decoding;
74#[cfg(feature = "dict_builder")]
75#[cfg_attr(docsrs, doc(cfg(feature = "dict_builder")))]
76pub mod dictionary;
77pub mod encoding;
78mod histogram;
79
80#[cfg(feature = "lsm")]
81#[cfg_attr(docsrs, doc(cfg(feature = "lsm")))]
82pub mod skippable;
83
84pub(crate) mod blocks;
85
86#[cfg(feature = "fuzz_exports")]
87pub mod fse;
88#[cfg(feature = "fuzz_exports")]
89pub mod huff0;
90
91// `pub fn init_state<K: CpuKernel>` and friends inside the
92// fuzz_exports-public `huff0` module name `crate::cpu_kernel::CpuKernel`
93// in their signatures. Without a publicly-reachable path to `CpuKernel`
94// the bound triggers `private_bounds` / `private_interfaces`. Re-export
95// under the same feature gate so the fuzz harness build is clean.
96#[cfg(feature = "fuzz_exports")]
97pub use crate::cpu_kernel::{CpuKernel, ScalarKernel};
98
99/// Name of the active CPU kernel tier (entropy / sequence hot paths) for this
100/// process — for diagnostics and benchmark/dashboard reporting. See
101/// [`cpu_kernel::active_cpu_kernel_name`].
102pub use crate::cpu_kernel::active_cpu_kernel_name;
103
104#[cfg(not(feature = "fuzz_exports"))]
105pub(crate) mod fse;
106#[cfg(not(feature = "fuzz_exports"))]
107pub(crate) mod huff0;
108
109#[cfg(feature = "std")]
110pub mod io_std;
111
112#[cfg(feature = "std")]
113pub use io_std as io;
114
115#[cfg(not(feature = "std"))]
116pub mod io_nostd;
117
118#[cfg(not(feature = "std"))]
119pub use io_nostd as io;
120
121#[cfg(test)]
122mod tests;
123
124/// Re-exports of internal types used by benchmarks.
125///
126/// Gated behind the `bench_internals` feature so normal builds do not
127/// widen the public API surface. Not part of the stable API; items may
128/// change or disappear without notice.
129#[cfg(feature = "bench_internals")]
130#[doc(hidden)]
131pub mod testing {
132    /// Compression parameters selected for `(level, srcSize, dictSize)` →
133    /// `(windowLog, chainLog, hashLog, searchLog, minMatch, targetLength,
134    /// strategy)`. Facade for the `ffi-bench` parity test that diffs the
135    /// selection against the reference `ZSTD_getCParams`.
136    pub fn compression_params(
137        level: i32,
138        src: u64,
139        dict: usize,
140    ) -> (u32, u32, u32, u32, u32, u32, u32) {
141        let cp = crate::encoding::cparams::get_cparams_public(level, src, dict);
142        (
143            cp.window_log,
144            cp.chain_log,
145            cp.hash_log,
146            cp.search_log,
147            cp.min_match,
148            cp.target_length,
149            cp.strategy,
150        )
151    }
152
153    /// Force every HUF table build onto the cheap single-build path (skip the
154    /// #167 table-log search) so a bench harness can A/B the search across
155    /// levels. Measurement-only.
156    pub fn set_force_cheap_huf(on: bool) {
157        crate::huff0::huff0_encoder::set_force_cheap_huf(on);
158    }
159
160    pub use crate::bit_io::BitReaderReversed;
161    // `BitReaderReversed` is generic over `K: CpuKernel = ScalarKernel`,
162    // so both the trait bound and the default need a `pub` path to
163    // match the re-exported type's visibility. Without this the
164    // bench-build trips `private_bounds` / `private_interfaces`.
165    pub use crate::cpu_kernel::{CpuKernel, ScalarKernel};
166
167    /// Bench-only facade for the decoder wildcopy implementation.
168    ///
169    /// # Safety
170    /// Caller must satisfy the same safety contract as
171    /// `decoding::copy_bytes_overshooting_for_bench`.
172    #[inline(always)]
173    pub unsafe fn copy_bytes_overshooting_for_bench(
174        src: (*const u8, usize),
175        dst: (*mut u8, usize),
176        copy_at_least: usize,
177    ) {
178        // Keep decoder internals crate-private and expose only this bench shim.
179        unsafe { crate::decoding::copy_bytes_overshooting_for_bench(src, dst, copy_at_least) };
180    }
181
182    /// Maximum block size per RFC 8878 §3.1.1.2.3 (128 KiB).
183    /// Exposed for parity tests that feed exactly-one-block chunks
184    /// into the block-splitter comparator.
185    pub const MAX_BLOCK_SIZE: u32 = crate::common::MAX_BLOCK_SIZE;
186
187    /// Run our block splitter on a 128 KB chunk.
188    ///
189    /// `split_level` mirrors upstream zstd `ZSTD_splitBlock(level)`: `0` selects
190    /// the borders heuristic (`ZSTD_splitBlock_fromBorders`), `1..=4`
191    /// select `ZSTD_splitBlock_byChunks` at the corresponding sampling
192    /// level. Returns the split position (or `block.len()` if no split).
193    ///
194    /// Crate-internal facade for the block-splitter parity comparator test —
195    /// the underlying functions stay `fn` so they don't widen the
196    /// stable API surface.
197    pub fn block_splitter_decision(block: &[u8], split_level: usize) -> usize {
198        crate::encoding::frame_compressor::block_splitter_decision_for_bench(block, split_level)
199    }
200
201    /// White-box capture of our Huffman weight description for `data`:
202    /// `(description, weights)` where `description` is the length-prefixed
203    /// FSE payload and `weights` the raw per-symbol weights. Facade for the
204    /// `ffi-bench` conformance test that feeds it through the C `HUF_readStats`.
205    pub fn huf_weight_description(data: &[u8]) -> (alloc::vec::Vec<u8>, alloc::vec::Vec<u8>) {
206        crate::huff0::huff0_encoder::huf_weight_description_for_test(data)
207    }
208
209    /// White-box capture of our 4-stream Huffman payload for `data`. Facade for
210    /// the `ffi-bench` conformance test that decodes it through the C HUF reader.
211    pub fn huf_encode4x(data: &[u8]) -> alloc::vec::Vec<u8> {
212        crate::huff0::huff0_encoder::huf_encode4x_for_test(data)
213    }
214
215    /// White-box capture of the level-22 sequence stream (literal-length,
216    /// offset, match-length triples) our match generator emits for `data`.
217    /// Facade for the sequence-conformance test in `ffi-bench`, which
218    /// compares this stream against the C reference's `ZSTD_generateSequences`
219    /// output. Pure Rust; the C side stays out of this crate.
220    pub fn collect_level22_sequences(data: &[u8]) -> alloc::vec::Vec<(usize, usize, usize)> {
221        crate::encoding::match_generator::collect_level22_sequences(data)
222    }
223
224    /// FastCOVER dictionary roundtrip fixture: `(finalized_dictionary,
225    /// compressed_frame, original_payload)`. Facade for the `ffi-bench`
226    /// conformance test that decodes `compressed_frame` against the dictionary
227    /// through the C decoder and compares to `original_payload`.
228    #[cfg(feature = "dict_builder")]
229    pub fn dict_roundtrip_fixture() -> (
230        alloc::vec::Vec<u8>,
231        alloc::vec::Vec<u8>,
232        alloc::vec::Vec<u8>,
233    ) {
234        crate::dictionary::dict_roundtrip_fixture()
235    }
236
237    pub use crate::blocks::block::BlockType;
238
239    /// First block's type (raw / rle / compressed) in a frame. Facade over the
240    /// internal block decoder for the FFI parity tests in `ffi-bench`.
241    pub fn first_block_type(frame: &[u8]) -> BlockType {
242        let (_, header_size) = crate::decoding::frame::read_frame_header_with_format(frame, false)
243            .expect("frame header should parse");
244        let mut decoder = crate::decoding::block_decoder::new();
245        let (header, _) = decoder
246            .read_block_header(&frame[header_size as usize..])
247            .expect("block header should parse");
248        header.block_type
249    }
250
251    /// `(single_segment_flag, frame_content_size, fcs_field_size_bytes)` parsed
252    /// from a frame header. Facade for the FFI parity tests in `ffi-bench` so
253    /// they need not reach into the internal `FrameHeader` type.
254    pub fn frame_header_info(frame: &[u8]) -> (bool, u64, u8) {
255        let (h, _) = crate::decoding::frame::read_frame_header_with_format(frame, false)
256            .expect("frame header should parse");
257        (
258            h.descriptor.single_segment_flag(),
259            h.frame_content_size(),
260            h.descriptor.frame_content_size_bytes().unwrap_or(0),
261        )
262    }
263}
264
265/// SIMD wildcopy overshoot slack carried by every decoder backend
266/// (currently **32 bytes**). Sized so the AVX2 chunked kernel in
267/// `simd_copy::copy_bytes_overshooting` (32-byte stride on x86-64) can
268/// fire on tail copies near the end of a fixed-capacity output buffer.
269/// Upstream zstd's `WILDCOPY_OVERLENGTH` is also 32 bytes today; this
270/// matches that contract.
271///
272/// Public so callers sizing an output slice for
273/// [`crate::decoding::FrameDecoder::decode_all`] can size
274/// `frame_content_size + WILDCOPY_OVERLENGTH` symbolically without
275/// duplicating the value. Use the const reference rather than a
276/// hardcoded literal — `simd_copy::copy_bytes_overshooting` already
277/// ships an AVX-512 64-byte chunked kernel, and the slack may grow
278/// further to reliably enable that wider kernel at buffer tails
279/// (mirroring how the bump from 16 → 32 enabled the AVX2 32-byte
280/// kernel at the tail).
281pub const WILDCOPY_OVERLENGTH: usize = crate::decoding::buffer_backend::WILDCOPY_OVERLENGTH;