Skip to main content

dbx_core/storage/compression/
config.rs

1//! Compression configuration for the DBX storage engine.
2//!
3//! Provides configurable compression algorithms for Parquet (ROS, Tier 5)
4//! and future tier-specific compression settings.
5//!
6//! # Supported Algorithms
7//!
8//! | Algorithm | Speed | Ratio | Use Case |
9//! |-----------|-------|-------|----------|
10//! | Snappy | ★★★★★ | ★★★ | Default — balanced speed/ratio |
11//! | LZ4 | ★★★★★ | ★★★ | Ultra-fast — latency-sensitive workloads |
12//! | ZSTD | ★★★ | ★★★★★ | Best ratio — archival/cold storage |
13//! | Brotli | ★★ | ★★★★★ | Maximum ratio — web deployment |
14//! | None | ★★★★★ | ★ | No compression — debugging/diagnostics |
15//!
16//! # Example
17//!
18//! ```rust
19//! use dbx_core::storage::compression::{CompressionConfig, CompressionAlgorithm};
20//!
21//! // Default: Snappy
22//! let config = CompressionConfig::default();
23//! assert_eq!(config.algorithm(), CompressionAlgorithm::Snappy);
24//!
25//! // Maximum compression with ZSTD level 9
26//! let config = CompressionConfig::new(CompressionAlgorithm::Zstd)
27//!     .with_level(9);
28//!
29//! // Ultra-fast for real-time workloads
30//! let config = CompressionConfig::new(CompressionAlgorithm::Lz4);
31//! ```
32
33use parquet::basic::Compression;
34
35/// Compression algorithm selection.
36///
37/// Maps to Parquet's native compression codecs. All algorithms are
38/// included via parquet crate default features — no extra dependencies needed.
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
40pub enum CompressionAlgorithm {
41    /// No compression — fastest writes, largest files.
42    None,
43    /// Snappy — fast compression/decompression, moderate ratio.
44    /// Default algorithm. Speed: 250-500 MB/s compress, 500-1500 MB/s decompress.
45    Snappy,
46    /// LZ4 — ultra-fast compression, comparable ratio to Snappy.
47    /// Best for latency-sensitive and streaming workloads.
48    Lz4,
49    /// Zstandard — excellent compression ratio with configurable levels (1-22).
50    /// Best for archival, cold storage, and batch analytics.
51    Zstd,
52    /// Brotli — maximum compression ratio with configurable levels (0-11).
53    /// Best for web deployment and network transfer.
54    Brotli,
55}
56
57impl std::fmt::Display for CompressionAlgorithm {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        match self {
60            Self::None => write!(f, "None"),
61            Self::Snappy => write!(f, "Snappy"),
62            Self::Lz4 => write!(f, "LZ4"),
63            Self::Zstd => write!(f, "ZSTD"),
64            Self::Brotli => write!(f, "Brotli"),
65        }
66    }
67}
68
69impl CompressionAlgorithm {
70    /// All supported compression algorithms for enumeration/benchmarks.
71    pub const ALL: &'static [CompressionAlgorithm] = &[
72        CompressionAlgorithm::None,
73        CompressionAlgorithm::Snappy,
74        CompressionAlgorithm::Lz4,
75        CompressionAlgorithm::Zstd,
76        CompressionAlgorithm::Brotli,
77    ];
78}
79
80/// Compression configuration for Parquet file writing.
81///
82/// Controls which compression algorithm is used and its level (for
83/// algorithms that support configurable compression levels).
84///
85/// # Examples
86///
87/// ```rust
88/// use dbx_core::storage::compression::{CompressionConfig, CompressionAlgorithm};
89///
90/// // Default (Snappy)
91/// let config = CompressionConfig::default();
92///
93/// // ZSTD with level 3 (balanced)
94/// let config = CompressionConfig::new(CompressionAlgorithm::Zstd).with_level(3);
95///
96/// // Convert to Parquet compression setting
97/// let parquet_compression = config.to_parquet_compression();
98/// ```
99#[derive(Debug, Clone, Copy, PartialEq, Eq)]
100pub struct CompressionConfig {
101    algorithm: CompressionAlgorithm,
102    /// Compression level (only used by ZSTD and Brotli).
103    /// - ZSTD: 1-22 (default: 3, recommended: 1-9)
104    /// - Brotli: 0-11 (default: 6)
105    /// - Snappy/LZ4/None: ignored
106    level: Option<u32>,
107}
108
109impl Default for CompressionConfig {
110    /// Default compression: Snappy (best general-purpose balance).
111    fn default() -> Self {
112        Self {
113            algorithm: CompressionAlgorithm::Snappy,
114            level: None,
115        }
116    }
117}
118
119impl CompressionConfig {
120    /// Create a new compression config with the specified algorithm.
121    pub fn new(algorithm: CompressionAlgorithm) -> Self {
122        Self {
123            algorithm,
124            level: None,
125        }
126    }
127
128    /// Set the compression level (for ZSTD and Brotli only).
129    ///
130    /// Levels are clamped to valid ranges:
131    /// - ZSTD: 1-22
132    /// - Brotli: 0-11
133    /// - Others: ignored
134    pub fn with_level(mut self, level: u32) -> Self {
135        self.level = Some(level);
136        self
137    }
138
139    /// Get the configured algorithm.
140    pub fn algorithm(&self) -> CompressionAlgorithm {
141        self.algorithm
142    }
143
144    /// Get the configured level (if any).
145    pub fn level(&self) -> Option<u32> {
146        self.level
147    }
148
149    /// Convert to Parquet's `Compression` enum for use in `WriterProperties`.
150    pub fn to_parquet_compression(&self) -> Compression {
151        match self.algorithm {
152            CompressionAlgorithm::None => Compression::UNCOMPRESSED,
153            CompressionAlgorithm::Snappy => Compression::SNAPPY,
154            CompressionAlgorithm::Lz4 => Compression::LZ4,
155            CompressionAlgorithm::Zstd => {
156                let level = self.level.map(|l| l.clamp(1, 22) as i32);
157                match level {
158                    Some(l) => Compression::ZSTD(parquet::basic::ZstdLevel::try_new(l).unwrap()),
159                    None => Compression::ZSTD(parquet::basic::ZstdLevel::default()),
160                }
161            }
162            CompressionAlgorithm::Brotli => {
163                let level = self.level.map(|l| l.clamp(0, 11));
164                match level {
165                    Some(l) => {
166                        Compression::BROTLI(parquet::basic::BrotliLevel::try_new(l).unwrap())
167                    }
168                    None => Compression::BROTLI(parquet::basic::BrotliLevel::default()),
169                }
170            }
171        }
172    }
173
174    // ===== Convenience constructors for common presets =====
175
176    /// Preset: No compression (fastest I/O, largest files).
177    pub fn none() -> Self {
178        Self::new(CompressionAlgorithm::None)
179    }
180
181    /// Preset: Snappy (default — balanced speed and compression).
182    pub fn snappy() -> Self {
183        Self::new(CompressionAlgorithm::Snappy)
184    }
185
186    /// Preset: LZ4 (ultra-fast, similar ratio to Snappy).
187    pub fn lz4() -> Self {
188        Self::new(CompressionAlgorithm::Lz4)
189    }
190
191    /// Preset: ZSTD with default level (excellent ratio).
192    pub fn zstd() -> Self {
193        Self::new(CompressionAlgorithm::Zstd)
194    }
195
196    /// Preset: ZSTD with specified compression level (1-22).
197    pub fn zstd_level(level: u32) -> Self {
198        Self::new(CompressionAlgorithm::Zstd).with_level(level)
199    }
200
201    /// Preset: Brotli with default level (maximum ratio).
202    pub fn brotli() -> Self {
203        Self::new(CompressionAlgorithm::Brotli)
204    }
205
206    /// Preset: Brotli with specified compression level (0-11).
207    pub fn brotli_level(level: u32) -> Self {
208        Self::new(CompressionAlgorithm::Brotli).with_level(level)
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    #[test]
217    fn default_is_snappy() {
218        let config = CompressionConfig::default();
219        assert_eq!(config.algorithm(), CompressionAlgorithm::Snappy);
220        assert_eq!(config.level(), None);
221    }
222
223    #[test]
224    fn parquet_compression_mapping() {
225        // None
226        let c = CompressionConfig::none().to_parquet_compression();
227        assert_eq!(c, Compression::UNCOMPRESSED);
228
229        // Snappy
230        let c = CompressionConfig::snappy().to_parquet_compression();
231        assert_eq!(c, Compression::SNAPPY);
232
233        // LZ4
234        let c = CompressionConfig::lz4().to_parquet_compression();
235        assert_eq!(c, Compression::LZ4);
236
237        // ZSTD default
238        let c = CompressionConfig::zstd().to_parquet_compression();
239        matches!(c, Compression::ZSTD(_));
240
241        // ZSTD with level
242        let c = CompressionConfig::zstd_level(9).to_parquet_compression();
243        matches!(c, Compression::ZSTD(_));
244
245        // Brotli default
246        let c = CompressionConfig::brotli().to_parquet_compression();
247        matches!(c, Compression::BROTLI(_));
248
249        // Brotli with level
250        let c = CompressionConfig::brotli_level(11).to_parquet_compression();
251        matches!(c, Compression::BROTLI(_));
252    }
253
254    #[test]
255    fn level_clamping() {
256        // ZSTD: level 100 → clamped to 22
257        let c = CompressionConfig::zstd_level(100).to_parquet_compression();
258        matches!(c, Compression::ZSTD(_));
259
260        // Brotli: level 99 → clamped to 11
261        let c = CompressionConfig::brotli_level(99).to_parquet_compression();
262        matches!(c, Compression::BROTLI(_));
263    }
264
265    #[test]
266    fn display_names() {
267        assert_eq!(format!("{}", CompressionAlgorithm::None), "None");
268        assert_eq!(format!("{}", CompressionAlgorithm::Snappy), "Snappy");
269        assert_eq!(format!("{}", CompressionAlgorithm::Lz4), "LZ4");
270        assert_eq!(format!("{}", CompressionAlgorithm::Zstd), "ZSTD");
271        assert_eq!(format!("{}", CompressionAlgorithm::Brotli), "Brotli");
272    }
273
274    #[test]
275    fn all_algorithms_count() {
276        assert_eq!(CompressionAlgorithm::ALL.len(), 5);
277    }
278}