Skip to main content

oxiarc_zstd/
lib.rs

1//! # OxiArc Zstandard
2//!
3//! Pure Rust implementation of the Zstandard (zstd) compression format (RFC 8878).
4//!
5//! Zstandard is a modern, fast compression algorithm providing excellent compression
6//! ratios. This implementation provides full compression and decompression support.
7//!
8//! ## Features
9//!
10//! - Full LZ77 + Huffman + FSE compression (levels 1-22)
11//! - Complete Zstandard frame parsing and decompression
12//! - FSE (Finite State Entropy) encoding and decoding
13//! - Huffman encoding and decoding for literals
14//! - Dictionary-based compression for small data
15//! - Streaming Write/Read API
16//! - XXH64 checksum verification
17//! - Optional parallel compression
18//!
19//! ## Example
20//!
21//! ```rust,no_run
22//! use oxiarc_zstd::{compress_with_level, decompress, encode_all, decode_all};
23//!
24//! // Buffer-based compression with level
25//! let data = b"Hello, Zstandard!";
26//! let compressed = compress_with_level(data, 3).unwrap();
27//! let decompressed = decompress(&compressed).unwrap();
28//! assert_eq!(decompressed, data);
29//!
30//! // Convenience functions (zstd crate compatible pattern)
31//! let compressed = encode_all(data, 3).unwrap();
32//! let decompressed = decode_all(&compressed).unwrap();
33//! assert_eq!(decompressed, data);
34//! ```
35
36#![warn(missing_docs)]
37#![warn(clippy::all)]
38
39mod bitwriter;
40mod compressed_block;
41/// Dictionary support for improved compression of small data.
42pub mod dict;
43mod encode;
44mod frame;
45mod fse;
46#[allow(dead_code)]
47mod fse_encoder;
48mod huffman;
49#[allow(dead_code)]
50mod huffman_encoder;
51mod literals;
52mod lz77;
53mod sequences;
54/// Streaming compression and decompression.
55pub mod streaming;
56mod xxhash;
57
58// Primary compression API
59pub use encode::{
60    CompressionStrategy, ZstdEncoder, compress, compress_no_checksum, compress_with_level,
61    decode_all, encode_all,
62};
63
64// Decompression API
65pub use frame::{
66    ZstdDecoder, decompress, decompress_frame, decompress_multi_frame, decompress_with_dict,
67    write_skippable_frame,
68};
69
70// Streaming API
71pub use streaming::{ZstdStreamDecoder, ZstdStreamEncoder};
72
73/// Alias for [`ZstdStreamEncoder`] — a streaming writer that emits incremental
74/// Zstandard frames.
75pub type ZstdWriter<W> = ZstdStreamEncoder<W>;
76
77// Dictionary API
78pub use dict::{ZstdDict, train_dictionary};
79
80// Advanced: LZ77 types (for users who want fine-grained control)
81pub use lz77::{LevelConfig, Lz77Sequence, MatchFinder};
82
83// Advanced: Bitstream writers (for custom encoding)
84pub use bitwriter::{BackwardBitWriter, ForwardBitWriter};
85
86#[cfg(feature = "parallel")]
87pub use encode::compress_parallel;
88
89use oxiarc_core::error::{OxiArcError, Result};
90
91/// Zstandard magic number (0xFD2FB528 little-endian).
92pub const ZSTD_MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
93
94/// Skippable frame magic number range start (0x184D2A50).
95pub const SKIPPABLE_MAGIC_LOW: u32 = 0x184D2A50;
96
97/// Skippable frame magic number range end (0x184D2A5F).
98pub const SKIPPABLE_MAGIC_HIGH: u32 = 0x184D2A5F;
99
100/// Maximum window size (8 MB default, 2 GB max per spec).
101pub const MAX_WINDOW_SIZE: usize = 8 * 1024 * 1024;
102
103/// Maximum block size (128 KB).
104pub const MAX_BLOCK_SIZE: usize = 128 * 1024;
105
106/// Block types in Zstandard.
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108pub enum BlockType {
109    /// Raw uncompressed block.
110    Raw,
111    /// RLE block (single byte repeated).
112    Rle,
113    /// Compressed block with literals and sequences.
114    Compressed,
115    /// Reserved (invalid).
116    Reserved,
117}
118
119impl BlockType {
120    /// Create block type from 2-bit value.
121    pub fn from_bits(bits: u8) -> Result<Self> {
122        match bits & 0x03 {
123            0 => Ok(BlockType::Raw),
124            1 => Ok(BlockType::Rle),
125            2 => Ok(BlockType::Compressed),
126            3 => Err(OxiArcError::CorruptedData {
127                offset: 0,
128                message: "reserved block type".to_string(),
129            }),
130            _ => unreachable!(),
131        }
132    }
133}
134
135/// Literals block type.
136#[derive(Debug, Clone, Copy, PartialEq, Eq)]
137pub enum LiteralsBlockType {
138    /// Raw literals (uncompressed).
139    Raw,
140    /// RLE literals (single byte).
141    Rle,
142    /// Compressed with Huffman, tree included.
143    Compressed,
144    /// Compressed with Huffman, uses previous tree.
145    Treeless,
146}
147
148impl LiteralsBlockType {
149    /// Create from 2-bit value.
150    pub fn from_bits(bits: u8) -> Self {
151        match bits & 0x03 {
152            0 => LiteralsBlockType::Raw,
153            1 => LiteralsBlockType::Rle,
154            2 => LiteralsBlockType::Compressed,
155            3 => LiteralsBlockType::Treeless,
156            _ => unreachable!(),
157        }
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    #[test]
166    fn test_multi_frame_decompress() {
167        let frame1 = compress_with_level(b"Hello ", 3).unwrap();
168        let frame2 = compress_with_level(b"World!", 3).unwrap();
169        let mut combined = frame1;
170        combined.extend_from_slice(&frame2);
171        let result = decompress_multi_frame(&combined).unwrap();
172        assert_eq!(result, b"Hello World!");
173    }
174
175    #[test]
176    fn test_skippable_frame_ignored() {
177        let skip = write_skippable_frame(b"metadata", 0);
178        let frame = compress_with_level(b"data", 3).unwrap();
179        let mut combined = skip;
180        combined.extend_from_slice(&frame);
181        let result = decompress_multi_frame(&combined).unwrap();
182        assert_eq!(result, b"data");
183    }
184
185    #[test]
186    fn test_incremental_streaming_writer() {
187        use std::io::Write;
188        let mut buf = Vec::new();
189        let mut writer = ZstdWriter::new(&mut buf, 3);
190        // Write in small chunks.
191        for chunk in b"Hello World! ".chunks(3) {
192            writer.write_all(chunk).unwrap();
193        }
194        writer.finish().unwrap();
195        let decompressed = decompress_multi_frame(&buf).unwrap();
196        assert_eq!(decompressed, b"Hello World! ");
197    }
198
199    #[test]
200    fn test_decompress_frame_returns_consumed() {
201        let frame1 = compress_with_level(b"abc", 1).unwrap();
202        let frame2 = compress_with_level(b"xyz", 1).unwrap();
203        let mut combined = frame1.clone();
204        combined.extend_from_slice(&frame2);
205        let (data, consumed) = decompress_frame(&combined).unwrap();
206        assert_eq!(data, b"abc");
207        assert_eq!(consumed, frame1.len());
208    }
209
210    #[test]
211    fn test_skippable_frame_magic_nibble() {
212        for nibble in 0u8..=15 {
213            let frame = write_skippable_frame(b"test", nibble);
214            let magic = u32::from_le_bytes([frame[0], frame[1], frame[2], frame[3]]);
215            assert!((SKIPPABLE_MAGIC_LOW..=SKIPPABLE_MAGIC_HIGH).contains(&magic));
216        }
217    }
218
219    #[test]
220    fn test_multi_frame_empty_input() {
221        let result = decompress_multi_frame(&[]).unwrap();
222        assert!(result.is_empty());
223    }
224
225    #[test]
226    fn test_multi_frame_skippable_only() {
227        let skip = write_skippable_frame(b"some metadata", 3);
228        let result = decompress_multi_frame(&skip).unwrap();
229        assert!(result.is_empty());
230    }
231
232    #[test]
233    fn test_block_type_from_bits() {
234        assert_eq!(BlockType::from_bits(0).unwrap(), BlockType::Raw);
235        assert_eq!(BlockType::from_bits(1).unwrap(), BlockType::Rle);
236        assert_eq!(BlockType::from_bits(2).unwrap(), BlockType::Compressed);
237        assert!(BlockType::from_bits(3).is_err());
238    }
239
240    #[test]
241    fn test_literals_block_type() {
242        assert_eq!(LiteralsBlockType::from_bits(0), LiteralsBlockType::Raw);
243        assert_eq!(LiteralsBlockType::from_bits(1), LiteralsBlockType::Rle);
244        assert_eq!(
245            LiteralsBlockType::from_bits(2),
246            LiteralsBlockType::Compressed
247        );
248        assert_eq!(LiteralsBlockType::from_bits(3), LiteralsBlockType::Treeless);
249    }
250
251    #[test]
252    fn test_zstd_magic() {
253        assert_eq!(u32::from_le_bytes(ZSTD_MAGIC), 0xFD2FB528);
254    }
255}