Skip to main content

array_format/
codec.rs

1//! Compression codec trait and built-in implementations.
2//!
3//! The [`CompressionCodec`] trait allows plugging in different compression
4//! algorithms. The footer records which codec was used per block via
5//! [`CodecId`], so the reader must be configured
6//! with a codec that can handle all codec ids present in the file.
7
8use crate::block::CodecId;
9use crate::error::{Error, Result};
10
11/// A compression codec that can compress and decompress block data.
12///
13/// Implementations must be `Send + Sync` so they can be shared across
14/// threads and async tasks.
15///
16/// # Examples
17///
18/// Any built-in codec round-trips block bytes:
19///
20/// ```
21/// use array_format::{CompressionCodec, Lz4Codec};
22///
23/// let codec = Lz4Codec;
24/// let data = b"some block bytes";
25/// let compressed = codec.compress(data)?;
26/// let restored = codec.decompress(&compressed, data.len())?;
27/// assert_eq!(restored, data);
28/// # Ok::<(), array_format::Error>(())
29/// ```
30///
31/// # Extensibility
32///
33/// Implement this trait to add support for custom compression algorithms
34/// (e.g. zstd, lz4, snappy). Register the codec by its
35/// [`CodecId::Named`] identifier.
36pub trait CompressionCodec: Send + Sync {
37    /// Returns the [`CodecId`] that identifies this codec in the footer.
38    fn id(&self) -> CodecId;
39
40    /// Compresses `data` and returns the compressed bytes.
41    fn compress(&self, data: &[u8]) -> Result<Vec<u8>>;
42
43    /// Decompresses `data` and returns the original bytes.
44    fn decompress(&self, data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>>;
45}
46
47/// A no-op codec that stores blocks uncompressed.
48///
49/// This is the default codec used when no compression is configured.
50#[derive(Debug, Clone, Copy)]
51pub struct NoCompression;
52
53impl CompressionCodec for NoCompression {
54    fn id(&self) -> CodecId {
55        CodecId::None
56    }
57
58    fn compress(&self, data: &[u8]) -> Result<Vec<u8>> {
59        Ok(data.to_vec())
60    }
61
62    fn decompress(&self, data: &[u8], _uncompressed_size: usize) -> Result<Vec<u8>> {
63        Ok(data.to_vec())
64    }
65}
66
67/// Zstandard compression codec.
68///
69/// Uses a configurable compression level (default: 3).
70#[derive(Debug, Clone)]
71pub struct ZstdCodec {
72    /// Zstd compression level (typically 1–22).
73    pub level: i32,
74}
75
76impl ZstdCodec {
77    /// Creates a new `ZstdCodec` with the given compression level.
78    pub fn new(level: i32) -> Self {
79        Self { level }
80    }
81}
82
83impl Default for ZstdCodec {
84    fn default() -> Self {
85        Self { level: 3 }
86    }
87}
88
89impl CompressionCodec for ZstdCodec {
90    fn id(&self) -> CodecId {
91        CodecId::Named("zstd".into())
92    }
93
94    fn compress(&self, data: &[u8]) -> Result<Vec<u8>> {
95        zstd::bulk::compress(data, self.level).map_err(|e| Error::Codec(e.to_string()))
96    }
97
98    fn decompress(&self, data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
99        zstd::bulk::decompress(data, uncompressed_size).map_err(|e| Error::Codec(e.to_string()))
100    }
101}
102
103/// LZ4 compression codec using `lz4_flex`.
104///
105/// Provides fast compression and decompression at the cost of a slightly
106/// lower compression ratio compared to zstd.
107#[derive(Debug, Clone, Copy, Default)]
108pub struct Lz4Codec;
109
110impl CompressionCodec for Lz4Codec {
111    fn id(&self) -> CodecId {
112        CodecId::Named("lz4".into())
113    }
114
115    fn compress(&self, data: &[u8]) -> Result<Vec<u8>> {
116        Ok(lz4_flex::compress_prepend_size(data))
117    }
118
119    fn decompress(&self, data: &[u8], _uncompressed_size: usize) -> Result<Vec<u8>> {
120        lz4_flex::decompress_size_prepended(data).map_err(|e| Error::Codec(e.to_string()))
121    }
122}
123
124/// Decompresses `data` by dispatching on the [`CodecId`] stored in the block footer.
125///
126/// This allows the reader to decompress blocks without requiring a statically
127/// known codec — the codec is inferred from the block metadata at read time.
128pub fn decompress_by_id(
129    codec_id: &CodecId,
130    data: &[u8],
131    uncompressed_size: usize,
132) -> Result<Vec<u8>> {
133    match codec_id {
134        CodecId::None => NoCompression.decompress(data, uncompressed_size),
135        CodecId::Named(name) => match name.as_str() {
136            "zstd" => ZstdCodec::default().decompress(data, uncompressed_size),
137            "lz4" => Lz4Codec.decompress(data, uncompressed_size),
138            other => Err(Error::Codec(format!("unknown codec: {other}"))),
139        },
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn no_compression_roundtrip() {
149        let codec = NoCompression;
150        let data = b"hello world, this is a test payload";
151        let compressed = codec.compress(data).unwrap();
152        let decompressed = codec.decompress(&compressed, data.len()).unwrap();
153        assert_eq!(decompressed, data);
154    }
155
156    #[test]
157    fn no_compression_id() {
158        assert_eq!(NoCompression.id(), CodecId::None);
159    }
160
161    #[test]
162    fn codec_is_object_safe() {
163        // Verify the trait can be used as a trait object.
164        let codec: Box<dyn CompressionCodec> = Box::new(NoCompression);
165        assert_eq!(codec.id(), CodecId::None);
166    }
167
168    #[test]
169    fn zstd_roundtrip() {
170        let codec = ZstdCodec::default();
171        let data = b"aaabbbccc repeated data for compression aaabbbccc";
172        let compressed = codec.compress(data).unwrap();
173        let decompressed = codec.decompress(&compressed, data.len()).unwrap();
174        assert_eq!(decompressed, data);
175    }
176
177    #[test]
178    fn zstd_id() {
179        assert_eq!(ZstdCodec::default().id(), CodecId::Named("zstd".into()));
180    }
181
182    #[test]
183    fn lz4_roundtrip() {
184        let codec = Lz4Codec;
185        let data = b"aaabbbccc repeated data for compression aaabbbccc";
186        let compressed = codec.compress(data).unwrap();
187        let decompressed = codec.decompress(&compressed, data.len()).unwrap();
188        assert_eq!(decompressed, data);
189    }
190
191    #[test]
192    fn lz4_id() {
193        assert_eq!(Lz4Codec.id(), CodecId::Named("lz4".into()));
194    }
195}