reifydb_column/
compress.rs1use std::sync::Arc;
5
6use reifydb_core::value::column::{
7 array::{Column, canonical::Canonical},
8 encoding::EncodingId,
9};
10use reifydb_type::Result;
11
12use crate::encoding::{self, Encoding};
13
14#[derive(Clone, Debug)]
15pub struct CompressConfig {
16 pub sample_size: usize,
17 pub sample_count: usize,
18 pub max_depth: u8,
19 pub min_compression_ratio: f32,
20}
21
22impl Default for CompressConfig {
23 fn default() -> Self {
24 Self {
25 sample_size: 1024,
26 sample_count: 4,
27 max_depth: 3,
28 min_compression_ratio: 0.85,
29 }
30 }
31}
32
33pub struct Compressor {
34 candidates: Vec<Arc<dyn Encoding>>,
35 cfg: CompressConfig,
36}
37
38impl Compressor {
39 pub fn new(cfg: CompressConfig) -> Self {
40 let registry = encoding::global();
41 let order = [
42 EncodingId::CANONICAL_BOOL, EncodingId::CONSTANT,
44 EncodingId::ALL_NONE,
45 EncodingId::DICT,
46 EncodingId::RLE,
47 EncodingId::DELTA,
48 EncodingId::DELTA_RLE,
49 EncodingId::FOR,
50 EncodingId::BITPACK,
51 EncodingId::SPARSE,
52 ];
53 let candidates = order
56 .into_iter()
57 .filter(|id| {
58 !matches!(
59 *id,
60 EncodingId::CANONICAL_BOOL
61 | EncodingId::CANONICAL_FIXED | EncodingId::CANONICAL_VARLEN
62 | EncodingId::CANONICAL_BIGNUM
63 )
64 })
65 .filter_map(|id| registry.get(id).cloned())
66 .collect();
67 Self {
68 candidates,
69 cfg,
70 }
71 }
72
73 pub fn compress(&self, input: &Canonical) -> Result<Column> {
74 for candidate in &self.candidates {
75 if let Some(compressed) = candidate.try_compress(input, &self.cfg)? {
76 return Ok(compressed);
77 }
78 }
79 Ok(Column::from_canonical(input.clone()))
80 }
81}
82
83pub fn compress(input: &Canonical) -> Result<Column> {
84 Compressor::new(CompressConfig::default()).compress(input)
85}
86
87#[cfg(test)]
88mod tests {
89 use reifydb_core::value::column::buffer::ColumnBuffer;
90
91 use super::*;
92
93 #[test]
94 fn compress_falls_back_to_canonical_when_no_stub_applies() {
95 let cd = ColumnBuffer::int4([1i32, 2, 3, 4]);
96 let canon = Canonical::from_column_buffer(&cd).unwrap();
97 let out = compress(&canon).unwrap();
98 assert_eq!(out.encoding(), EncodingId::CANONICAL_FIXED);
99 assert_eq!(out.len(), 4);
100 }
101
102 #[test]
103 fn compress_utf8_falls_back_to_canonical_varlen() {
104 let cd = ColumnBuffer::utf8(["alpha", "bravo"]);
105 let canon = Canonical::from_column_buffer(&cd).unwrap();
106 let out = compress(&canon).unwrap();
107 assert_eq!(out.encoding(), EncodingId::CANONICAL_VARLEN);
108 assert_eq!(out.len(), 2);
109 }
110}