Skip to main content

structured_zstd/encoding/
mod.rs

1//! Structures and utilities used for compressing/encoding data into the Zstd format.
2
3pub(crate) mod block_header;
4pub(crate) mod blocks;
5pub(crate) mod frame_header;
6pub(crate) mod match_generator;
7pub(crate) mod util;
8
9mod frame_compressor;
10mod levels;
11pub use frame_compressor::FrameCompressor;
12pub use match_generator::MatchGeneratorDriver;
13
14use crate::io::{Read, Write};
15use alloc::vec::Vec;
16
17/// Convenience function to compress some source into a target without reusing any resources of the compressor
18/// ```rust
19/// use structured_zstd::encoding::{compress, CompressionLevel};
20/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
21/// let mut target = Vec::new();
22/// compress(data, &mut target, CompressionLevel::Fastest);
23/// ```
24pub fn compress<R: Read, W: Write>(source: R, target: W, level: CompressionLevel) {
25    let mut frame_enc = FrameCompressor::new(level);
26    frame_enc.set_source(source);
27    frame_enc.set_drain(target);
28    frame_enc.compress();
29}
30
31/// Convenience function to compress some source into a Vec without reusing any resources of the compressor
32/// ```rust
33/// use structured_zstd::encoding::{compress_to_vec, CompressionLevel};
34/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
35/// let compressed = compress_to_vec(data, CompressionLevel::Fastest);
36/// ```
37pub fn compress_to_vec<R: Read>(source: R, level: CompressionLevel) -> Vec<u8> {
38    let mut vec = Vec::new();
39    compress(source, &mut vec, level);
40    vec
41}
42
43/// The compression mode used impacts the speed of compression,
44/// and resulting compression ratios. Faster compression will result
45/// in worse compression ratios, and vice versa.
46#[derive(Copy, Clone)]
47pub enum CompressionLevel {
48    /// This level does not compress the data at all, and simply wraps
49    /// it in a Zstandard frame.
50    Uncompressed,
51    /// This level is roughly equivalent to Zstd compression level 1
52    Fastest,
53    /// This level uses the crate's dedicated `dfast`-style matcher to
54    /// target a better speed/ratio tradeoff than [`CompressionLevel::Fastest`].
55    ///
56    /// It represents this crate's "default" compression setting and may
57    /// evolve in future versions as the implementation moves closer to
58    /// reference zstd level 3 behavior.
59    Default,
60    /// This level is roughly equivalent to Zstd level 7.
61    ///
62    /// UNIMPLEMENTED
63    Better,
64    /// This level is roughly equivalent to Zstd level 11.
65    ///
66    /// UNIMPLEMENTED
67    Best,
68}
69
70/// Trait used by the encoder that users can use to extend the matching facilities with their own algorithm
71/// making their own tradeoffs between runtime, memory usage and compression ratio
72///
73/// This trait operates on buffers that represent the chunks of data the matching algorithm wants to work on.
74/// Each one of these buffers is referred to as a *space*. One or more of these buffers represent the window
75/// the decoder will need to decode the data again.
76///
77/// This library asks the Matcher for a new buffer using `get_next_space` to allow reusing of allocated buffers when they are no longer part of the
78/// window of data that is being used for matching.
79///
80/// The library fills the buffer with data that is to be compressed and commits them back to the matcher using `commit_space`.
81///
82/// Then it will either call `start_matching` or, if the space is deemed not worth compressing, `skip_matching` is called.
83///
84/// This is repeated until no more data is left to be compressed.
85pub trait Matcher {
86    /// Get a space where we can put data to be matched on. Will be encoded as one block. The maximum allowed size is 128 kB.
87    fn get_next_space(&mut self) -> alloc::vec::Vec<u8>;
88    /// Get a reference to the last commited space
89    fn get_last_space(&mut self) -> &[u8];
90    /// Commit a space to the matcher so it can be matched against
91    fn commit_space(&mut self, space: alloc::vec::Vec<u8>);
92    /// Just process the data in the last commited space for future matching
93    fn skip_matching(&mut self);
94    /// Process the data in the last commited space for future matching AND generate matches for the data
95    fn start_matching(&mut self, handle_sequence: impl for<'a> FnMut(Sequence<'a>));
96    /// Reset this matcher so it can be used for the next new frame
97    fn reset(&mut self, level: CompressionLevel);
98    /// The size of the window the decoder will need to execute all sequences produced by this matcher
99    ///
100    /// May change after a call to reset with a different compression level
101    fn window_size(&self) -> u64;
102}
103
104#[derive(PartialEq, Eq, Debug)]
105/// Sequences that a [`Matcher`] can produce
106pub enum Sequence<'data> {
107    /// Is encoded as a sequence for the decoder sequence execution.
108    ///
109    /// First the literals will be copied to the decoded data,
110    /// then `match_len` bytes are copied from `offset` bytes back in the decoded data
111    Triple {
112        literals: &'data [u8],
113        offset: usize,
114        match_len: usize,
115    },
116    /// This is returned as the last sequence in a block
117    ///
118    /// These literals will just be copied at the end of the sequence execution by the decoder
119    Literals { literals: &'data [u8] },
120}