reifydb_column/
compress.rs1use std::sync::Arc;
5
6use reifydb_core::value::column::{
7 data::{Column, canonical::Canonical},
8 encoding::EncodingId,
9};
10use reifydb_type::Result;
11
12use crate::encoding::{self, Encoding};
13
14#[derive(Clone, Debug)]
15pub struct CompressConfig {
16 pub sample_size: usize,
17 pub sample_count: usize,
18 pub max_depth: u8,
19 pub min_compression_ratio: f32,
20}
21
22impl Default for CompressConfig {
23 fn default() -> Self {
24 Self {
25 sample_size: 1024,
26 sample_count: 4,
27 max_depth: 3,
28 min_compression_ratio: 0.85,
29 }
30 }
31}
32
33pub struct Compressor {
34 candidates: Vec<Arc<dyn Encoding>>,
35 cfg: CompressConfig,
36}
37
38impl Compressor {
39 pub fn new(cfg: CompressConfig) -> Self {
40 let registry = encoding::global();
41 let order = [
42 EncodingId::CANONICAL_BOOL,
43 EncodingId::CONSTANT,
44 EncodingId::ALL_NONE,
45 EncodingId::DICT,
46 EncodingId::RLE,
47 EncodingId::DELTA,
48 EncodingId::DELTA_RLE,
49 EncodingId::FOR,
50 EncodingId::BITPACK,
51 EncodingId::SPARSE,
52 ];
53
54 let candidates = order
55 .into_iter()
56 .filter(|id| {
57 !matches!(
58 *id,
59 EncodingId::CANONICAL_BOOL
60 | EncodingId::CANONICAL_FIXED | EncodingId::CANONICAL_VARLEN
61 | EncodingId::CANONICAL_BIGNUM
62 )
63 })
64 .filter_map(|id| registry.get(id).cloned())
65 .collect();
66 Self {
67 candidates,
68 cfg,
69 }
70 }
71
72 pub fn compress(&self, input: &Canonical) -> Result<Column> {
73 for candidate in &self.candidates {
74 if let Some(compressed) = candidate.try_compress(input, &self.cfg)? {
75 return Ok(compressed);
76 }
77 }
78 Ok(Column::from_canonical(input.clone()))
79 }
80}
81
82pub fn compress(input: &Canonical) -> Result<Column> {
83 Compressor::new(CompressConfig::default()).compress(input)
84}
85
86#[cfg(test)]
87mod tests {
88 use reifydb_core::value::column::buffer::ColumnBuffer;
89
90 use super::*;
91
92 #[test]
93 fn compress_falls_back_to_canonical_when_no_stub_applies() {
94 let cd = ColumnBuffer::int4([1i32, 2, 3, 4]);
95 let canon = Canonical::from_column_buffer(&cd).unwrap();
96 let out = compress(&canon).unwrap();
97 assert_eq!(out.encoding(), EncodingId::CANONICAL_FIXED);
98 assert_eq!(out.len(), 4);
99 }
100
101 #[test]
102 fn compress_utf8_falls_back_to_canonical_varlen() {
103 let cd = ColumnBuffer::utf8(["alpha", "bravo"]);
104 let canon = Canonical::from_column_buffer(&cd).unwrap();
105 let out = compress(&canon).unwrap();
106 assert_eq!(out.encoding(), EncodingId::CANONICAL_VARLEN);
107 assert_eq!(out.len(), 2);
108 }
109}