Skip to main content

array_format/
codec.rs

1//! Compression codec trait and built-in implementations.
2//!
3//! The [`CompressionCodec`] trait allows plugging in different compression
4//! algorithms. The footer records which codec was used per block via
5//! [`CodecId`], so the reader must be configured
6//! with a codec that can handle all codec ids present in the file.
7
8use crate::block::CodecId;
9use crate::error::{Error, Result};
10
11/// A compression codec that can compress and decompress block data.
12///
13/// Implementations must be `Send + Sync` so they can be shared across
14/// threads and async tasks.
15///
16/// # Extensibility
17///
18/// Implement this trait to add support for custom compression algorithms
19/// (e.g. zstd, lz4, snappy). Register the codec by its
20/// [`CodecId::Named`] identifier.
21pub trait CompressionCodec: Send + Sync {
22    /// Returns the [`CodecId`] that identifies this codec in the footer.
23    fn id(&self) -> CodecId;
24
25    /// Compresses `data` and returns the compressed bytes.
26    fn compress(&self, data: &[u8]) -> Result<Vec<u8>>;
27
28    /// Decompresses `data` and returns the original bytes.
29    fn decompress(&self, data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>>;
30}
31
32/// A no-op codec that stores blocks uncompressed.
33///
34/// This is the default codec used when no compression is configured.
35#[derive(Debug, Clone, Copy)]
36pub struct NoCompression;
37
38impl CompressionCodec for NoCompression {
39    fn id(&self) -> CodecId {
40        CodecId::None
41    }
42
43    fn compress(&self, data: &[u8]) -> Result<Vec<u8>> {
44        Ok(data.to_vec())
45    }
46
47    fn decompress(&self, data: &[u8], _uncompressed_size: usize) -> Result<Vec<u8>> {
48        Ok(data.to_vec())
49    }
50}
51
52/// Zstandard compression codec.
53///
54/// Uses a configurable compression level (default: 3).
55#[derive(Debug, Clone)]
56pub struct ZstdCodec {
57    /// Zstd compression level (typically 1–22).
58    pub level: i32,
59}
60
61impl ZstdCodec {
62    /// Creates a new `ZstdCodec` with the given compression level.
63    pub fn new(level: i32) -> Self {
64        Self { level }
65    }
66}
67
68impl Default for ZstdCodec {
69    fn default() -> Self {
70        Self { level: 3 }
71    }
72}
73
74impl CompressionCodec for ZstdCodec {
75    fn id(&self) -> CodecId {
76        CodecId::Named("zstd".into())
77    }
78
79    fn compress(&self, data: &[u8]) -> Result<Vec<u8>> {
80        zstd::bulk::compress(data, self.level).map_err(|e| Error::Codec(e.to_string()))
81    }
82
83    fn decompress(&self, data: &[u8], uncompressed_size: usize) -> Result<Vec<u8>> {
84        zstd::bulk::decompress(data, uncompressed_size).map_err(|e| Error::Codec(e.to_string()))
85    }
86}
87
88/// LZ4 compression codec using `lz4_flex`.
89///
90/// Provides fast compression and decompression at the cost of a slightly
91/// lower compression ratio compared to zstd.
92#[derive(Debug, Clone, Copy, Default)]
93pub struct Lz4Codec;
94
95impl CompressionCodec for Lz4Codec {
96    fn id(&self) -> CodecId {
97        CodecId::Named("lz4".into())
98    }
99
100    fn compress(&self, data: &[u8]) -> Result<Vec<u8>> {
101        Ok(lz4_flex::compress_prepend_size(data))
102    }
103
104    fn decompress(&self, data: &[u8], _uncompressed_size: usize) -> Result<Vec<u8>> {
105        lz4_flex::decompress_size_prepended(data).map_err(|e| Error::Codec(e.to_string()))
106    }
107}
108
109/// Decompresses `data` by dispatching on the [`CodecId`] stored in the block footer.
110///
111/// This allows the reader to decompress blocks without requiring a statically
112/// known codec — the codec is inferred from the block metadata at read time.
113pub fn decompress_by_id(
114    codec_id: &CodecId,
115    data: &[u8],
116    uncompressed_size: usize,
117) -> Result<Vec<u8>> {
118    match codec_id {
119        CodecId::None => NoCompression.decompress(data, uncompressed_size),
120        CodecId::Named(name) => match name.as_str() {
121            "zstd" => ZstdCodec::default().decompress(data, uncompressed_size),
122            "lz4" => Lz4Codec.decompress(data, uncompressed_size),
123            other => Err(Error::Codec(format!("unknown codec: {other}"))),
124        },
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131
132    #[test]
133    fn no_compression_roundtrip() {
134        let codec = NoCompression;
135        let data = b"hello world, this is a test payload";
136        let compressed = codec.compress(data).unwrap();
137        let decompressed = codec.decompress(&compressed, data.len()).unwrap();
138        assert_eq!(decompressed, data);
139    }
140
141    #[test]
142    fn no_compression_id() {
143        assert_eq!(NoCompression.id(), CodecId::None);
144    }
145
146    #[test]
147    fn codec_is_object_safe() {
148        // Verify the trait can be used as a trait object.
149        let codec: Box<dyn CompressionCodec> = Box::new(NoCompression);
150        assert_eq!(codec.id(), CodecId::None);
151    }
152
153    #[test]
154    fn zstd_roundtrip() {
155        let codec = ZstdCodec::default();
156        let data = b"aaabbbccc repeated data for compression aaabbbccc";
157        let compressed = codec.compress(data).unwrap();
158        let decompressed = codec.decompress(&compressed, data.len()).unwrap();
159        assert_eq!(decompressed, data);
160    }
161
162    #[test]
163    fn zstd_id() {
164        assert_eq!(ZstdCodec::default().id(), CodecId::Named("zstd".into()));
165    }
166
167    #[test]
168    fn lz4_roundtrip() {
169        let codec = Lz4Codec;
170        let data = b"aaabbbccc repeated data for compression aaabbbccc";
171        let compressed = codec.compress(data).unwrap();
172        let decompressed = codec.decompress(&compressed, data.len()).unwrap();
173        assert_eq!(decompressed, data);
174    }
175
176    #[test]
177    fn lz4_id() {
178        assert_eq!(Lz4Codec.id(), CodecId::Named("lz4".into()));
179    }
180}