zeekstd/
lib.rs

1//! This crate provides a Rust implementation of the Zstandard Seekable Format, as outlined in
2//! the [specification].
3//!
4//! The seekable format splits compressed data into a series of independent "frames", each
5//! compressed individually, so that decompression of a section in the middle of a compressed file
6//! only requires zstd to decompress at most a frame's worth of extra data, instead of the entire
7//! file.
8//!
9//! The frames are appended, so that the decompression of the entire payload still regenerates the
10//! original content, using any compliant zstd decoder.
11//!
12//! Zeekstd uses bindings from the [zstd_safe] crate.
13//!
14//! # Getting Started
15//!
16//! - The [`RawEncoder`] and [`Encoder`] compress data.
17//! - The [`Decoder`] performs seekable decompression.
18//! - The [`SeekTable`] holds information of the frames of a seekable comressed file, it gets
19//!   created and updated automatically during compression.
20//!
21//! [specification]: https://github.com/rorosen/zeekstd/blob/main/seekable_format.md
22//! [zstd_safe]: https://docs.rs/zstd-safe/latest/zstd_safe/
23
24#![no_std]
25#![cfg_attr(docsrs, feature(doc_cfg))]
26
27extern crate alloc;
28
29#[cfg(feature = "std")]
30extern crate std;
31
32mod decode;
33mod encode;
34mod error;
35pub mod seek_table;
36mod seekable;
37
38pub use decode::{DecodeOptions, Decoder};
39#[cfg(feature = "std")]
40#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
41pub use encode::Encoder;
42pub use encode::{
43    CompressionProgress, EncodeOptions, EpilogueProgress, FrameSizePolicy, RawEncoder,
44};
45pub use error::{Error, Result};
46pub use seek_table::SeekTable;
47pub use seekable::{BytesWrapper, OffsetFrom, Seekable};
48// Re-export as it's part of the API.
49pub use zstd_safe::CompressionLevel;
50
51/// The magic number of the seek table integrity field.
52pub const SEEKABLE_MAGIC_NUMBER: u32 = 0x8F92_EAB1;
53/// The maximum number of frames in a seekable compressed file.
54pub const SEEKABLE_MAX_FRAMES: u32 = 0x0800_0000;
55/// The size of the seek table integrity field.
56pub const SEEK_TABLE_INTEGRITY_SIZE: usize = 9;
57/// The maximum size of the uncompressed data of a frame.
58pub const SEEKABLE_MAX_FRAME_SIZE: usize = 0x4000_0000;
59/// The size of the skippable frame header.
60///
61/// Skippable magic number (4 bytes) + frame size field (4 bytes)
62pub(crate) const SKIPPABLE_HEADER_SIZE: usize = 8;
63
64#[doc = include_str!("../../README.md")]
65#[cfg(doctest)]
66#[cfg(feature = "std")]
67pub struct ReadmeDoctests;
68
69#[cfg(test)]
70mod tests {
71    use alloc::vec;
72    use alloc::vec::Vec;
73
74    use proptest::prelude::*;
75
76    use crate::seek_table::Format;
77
78    use super::*;
79
80    pub const INPUT: &str = include_str!("./lib.rs");
81
82    fn test_cycle(frame_size_policy: Option<FrameSizePolicy>) {
83        let mut seekable = vec![];
84        let mut opts = EncodeOptions::new();
85
86        if let Some(policy) = frame_size_policy {
87            opts = opts.frame_size_policy(policy);
88        }
89        let mut encoder = opts.into_raw_encoder().unwrap();
90
91        // Make buf small enough to compress/end frame/write seek table/decompress in multiple
92        // steps
93        let mut buf = vec![0; INPUT.len() / 500];
94
95        let mut in_progress = 0;
96        while in_progress < INPUT.len() {
97            let progress = encoder
98                .compress(&INPUT.as_bytes()[in_progress..], &mut buf)
99                .unwrap();
100            seekable.extend(&buf[..progress.out_progress()]);
101            in_progress += progress.in_progress();
102        }
103
104        loop {
105            let prog = encoder.end_frame(&mut buf).unwrap();
106            seekable.extend(&buf[..prog.out_progress()]);
107            if prog.data_left() == 0 {
108                break;
109            }
110        }
111
112        let mut ser = encoder.into_seek_table().into_serializer();
113        loop {
114            let n = ser.write_into(&mut buf);
115            if n == 0 {
116                break;
117            }
118            seekable.extend(&buf[..n]);
119        }
120
121        let wrapper = BytesWrapper::new(&seekable);
122        let mut decoder = Decoder::new(wrapper).unwrap();
123        let mut output = Vec::with_capacity(INPUT.len());
124
125        loop {
126            let n = decoder.decompress(&mut buf).unwrap();
127            if n == 0 {
128                break;
129            }
130            output.extend(&buf[..n]);
131        }
132
133        assert_eq!(&INPUT.as_bytes(), &output);
134    }
135
136    fn test_cycle_stand_alone_seek_table(
137        frame_size_policy: Option<FrameSizePolicy>,
138        format: Format,
139    ) {
140        let mut seekable = vec![];
141        let mut opts = EncodeOptions::new();
142
143        if let Some(policy) = frame_size_policy {
144            opts = opts.frame_size_policy(policy);
145        }
146        let mut encoder = opts.into_raw_encoder().unwrap();
147
148        // Make buf small enough to compress/end frame/write seek table/decompress in multiple
149        // steps
150        let mut buf = vec![0; INPUT.len() / 500];
151
152        let mut in_progress = 0;
153        while in_progress < INPUT.len() {
154            let progress = encoder
155                .compress(&INPUT.as_bytes()[in_progress..], &mut buf)
156                .unwrap();
157            seekable.extend(&buf[..progress.out_progress()]);
158            in_progress += progress.in_progress();
159        }
160
161        loop {
162            let prog = encoder.end_frame(&mut buf).unwrap();
163            seekable.extend(&buf[..prog.out_progress()]);
164            if prog.data_left() == 0 {
165                break;
166            }
167        }
168
169        let mut ser = encoder.into_seek_table().into_format_serializer(format);
170        let mut seek_table = Vec::with_capacity(ser.encoded_len());
171        loop {
172            let n = ser.write_into(&mut buf);
173            if n == 0 {
174                break;
175            }
176            seek_table.extend(&buf[..n]);
177        }
178
179        assert_eq!(seek_table.len(), ser.encoded_len());
180
181        let mut wrapper = BytesWrapper::new(&seek_table);
182        let seek_table = SeekTable::from_seekable_format(&mut wrapper, format).unwrap();
183
184        let wrapper = BytesWrapper::new(&seekable);
185        let mut decoder = DecodeOptions::new(wrapper)
186            .seek_table(seek_table)
187            .into_decoder()
188            .unwrap();
189        let mut output = Vec::with_capacity(INPUT.len());
190
191        loop {
192            let n = decoder.decompress(&mut buf).unwrap();
193            if n == 0 {
194                break;
195            }
196            output.extend(&buf[..n]);
197        }
198
199        assert_eq!(&INPUT.as_bytes(), &output);
200    }
201
202    fn test_patch_cycle(frame_size_policy: Option<FrameSizePolicy>) {
203        let old = INPUT;
204        let new = alloc::format!("{INPUT}\nThe End");
205        let mut patch = vec![];
206        let mut opts = EncodeOptions::new();
207
208        if let Some(policy) = frame_size_policy {
209            opts = opts.frame_size_policy(policy);
210        }
211        let mut encoder = opts.into_raw_encoder().unwrap();
212
213        // Make buf small enough to compress/end frame/write seek table/decompress in multiple
214        // steps
215        let mut buf = vec![0; INPUT.len() / 500];
216
217        // Create a binary patch
218        let mut in_progress = 0;
219        while in_progress < new.len() {
220            let progress = encoder
221                .compress_with_prefix(
222                    &new.as_bytes()[in_progress..],
223                    &mut buf,
224                    Some(old.as_bytes()),
225                )
226                .unwrap();
227            patch.extend(&buf[..progress.out_progress()]);
228            in_progress += progress.in_progress();
229        }
230
231        loop {
232            let prog = encoder.end_frame(&mut buf).unwrap();
233            patch.extend(&buf[..prog.out_progress()]);
234            if prog.data_left() == 0 {
235                break;
236            }
237        }
238
239        let mut ser = encoder.into_seek_table().into_serializer();
240        loop {
241            let n = ser.write_into(&mut buf);
242            if n == 0 {
243                break;
244            }
245            patch.extend(&buf[..n]);
246        }
247
248        let wrapper = BytesWrapper::new(&patch);
249        let mut decoder = Decoder::new(wrapper).unwrap();
250        let mut output: Vec<u8> = Vec::with_capacity(new.len());
251
252        loop {
253            let n = decoder
254                .decompress_with_prefix(&mut buf, Some(old.as_bytes()))
255                .unwrap();
256            if n == 0 {
257                break;
258            }
259            output.extend(&buf[..n]);
260        }
261
262        assert_eq!(new.as_bytes(), &output);
263    }
264
265    #[cfg(feature = "std")]
266    fn test_cycle_std(frame_size_policy: Option<FrameSizePolicy>) {
267        use std::io::{Cursor, copy};
268
269        let mut input = Cursor::new(INPUT);
270        let mut seekable = Cursor::new(vec![]);
271        let mut opts = EncodeOptions::new();
272        if let Some(policy) = frame_size_policy {
273            opts = opts.frame_size_policy(policy);
274        }
275
276        let mut encoder = opts.into_encoder(&mut seekable).unwrap();
277        copy(&mut input, &mut encoder).unwrap();
278
279        let n = encoder.finish().unwrap();
280        assert_eq!(n, seekable.position());
281
282        let mut decoder = Decoder::new(seekable).unwrap();
283        let mut output = Cursor::new(vec![]);
284        copy(&mut decoder, &mut output).unwrap();
285
286        assert_eq!(INPUT.as_bytes(), output.get_ref());
287    }
288
289    #[test]
290    fn cycle() {
291        test_cycle(None);
292    }
293
294    #[test]
295    fn patch_cycle() {
296        test_patch_cycle(None);
297    }
298
299    #[test]
300    fn cycle_stand_alone_seek_table_head() {
301        test_cycle_stand_alone_seek_table(None, Format::Head);
302    }
303
304    #[test]
305    fn cycle_stand_alone_seek_table_foot() {
306        test_cycle_stand_alone_seek_table(None, Format::Foot);
307    }
308
309    #[test]
310    #[cfg(feature = "std")]
311    fn cycle_std() {
312        test_cycle_std(None);
313    }
314
315    proptest! {
316        #[test]
317        fn cycle_custom_compressed_frame_size(frame_size in 1..1024u32) {
318            test_cycle(Some(FrameSizePolicy::Compressed(frame_size)));
319        }
320
321        #[test]
322        fn cycle_custom_decompressed_frame_size(frame_size in 1..1024u32) {
323            test_cycle(Some(FrameSizePolicy::Uncompressed(frame_size)));
324        }
325
326        #[test]
327        #[cfg(feature = "std")]
328        fn cycle_custom_compressed_frame_size_std(frame_size in 1..1024u32) {
329            test_cycle(Some(FrameSizePolicy::Compressed(frame_size)));
330        }
331
332        #[test]
333        #[cfg(feature = "std")]
334        fn cycle_custom_decompressed_frame_size_std(frame_size in 1..1024u32) {
335            test_cycle_std(Some(FrameSizePolicy::Uncompressed(frame_size)));
336        }
337
338        #[test]
339        fn cycle_stand_alone_seek_table_foot_custom_compressed_frame_size(frame_size in 1..1024u32) {
340            test_cycle_stand_alone_seek_table(Some(FrameSizePolicy::Compressed(frame_size)), Format::Head);
341        }
342
343        #[test]
344        fn cycle_stand_alone_seek_table_foot_custom_decompressed_frame_size(frame_size in 1..1024u32) {
345            test_cycle_stand_alone_seek_table(Some(FrameSizePolicy::Uncompressed(frame_size)), Format::Foot);
346        }
347
348        #[test]
349        fn patch_cycle_custom_compressed_frame_size(frame_size in 1..1024u32) {
350            test_patch_cycle(Some(FrameSizePolicy::Compressed(frame_size)));
351        }
352
353        #[test]
354        fn patch_cycle_custom_decompressed_frame_size(frame_size in 1..1024u32) {
355            test_patch_cycle(Some(FrameSizePolicy::Uncompressed(frame_size)));
356        }
357    }
358}