vortex_sampling_compressor/
lib.rs1use std::sync::{Arc, LazyLock};
2
3use compressors::bitpacked::BITPACK_WITH_PATCHES;
4use compressors::chunked::DEFAULT_CHUNKED_COMPRESSOR;
5use compressors::constant::ConstantCompressor;
6use compressors::delta::DeltaCompressor;
7use compressors::fsst::FSSTCompressor;
8use compressors::struct_::StructCompressor;
9use compressors::varbin::VarBinCompressor;
10use compressors::{CompressedArray, CompressorRef};
11use vortex_alp::{ALPEncoding, ALPRDEncoding};
12use vortex_array::array::{
13 BoolEncoding, ChunkedEncoding, ConstantEncoding, ListEncoding, NullEncoding, PrimitiveEncoding,
14 StructEncoding, VarBinEncoding, VarBinViewEncoding,
15};
16use vortex_array::{Context, ContextRef};
17use vortex_bytebool::ByteBoolEncoding;
18use vortex_datetime_parts::DateTimePartsEncoding;
19use vortex_dict::DictEncoding;
20use vortex_fastlanes::{BitPackedEncoding, DeltaEncoding, FoREncoding};
21use vortex_fsst::FSSTEncoding;
22use vortex_runend::RunEndEncoding;
23use vortex_zigzag::ZigZagEncoding;
24
25use crate::compressors::alp::ALPCompressor;
26use crate::compressors::date_time_parts::DateTimePartsCompressor;
27use crate::compressors::dict::DictCompressor;
28use crate::compressors::list::ListCompressor;
29use crate::compressors::r#for::FoRCompressor;
30use crate::compressors::runend::DEFAULT_RUN_END_COMPRESSOR;
31use crate::compressors::sparse::SparseCompressor;
32use crate::compressors::zigzag::ZigZagCompressor;
33
34#[cfg(feature = "arbitrary")]
35pub mod arbitrary;
36pub mod compressors;
37mod constants;
38mod downscale;
39mod sampling;
40mod sampling_compressor;
41
42pub use sampling_compressor::*;
43use vortex_sparse::SparseEncoding;
44
45use crate::compressors::alp_rd::ALPRDCompressor;
46
47pub const DEFAULT_COMPRESSORS: [CompressorRef; 15] = [
48 &ALPCompressor as CompressorRef,
49 &ALPRDCompressor,
50 &BITPACK_WITH_PATCHES,
51 &DEFAULT_CHUNKED_COMPRESSOR,
52 &ConstantCompressor,
53 &DateTimePartsCompressor,
54 &DictCompressor,
56 &FoRCompressor,
57 &FSSTCompressor,
58 &DEFAULT_RUN_END_COMPRESSOR,
59 &SparseCompressor,
60 &StructCompressor,
61 &ListCompressor,
62 &VarBinCompressor,
63 &ZigZagCompressor,
64];
65
66pub const ALL_COMPRESSORS: [CompressorRef; 16] = [
67 &ALPCompressor as CompressorRef,
68 &ALPRDCompressor,
69 &BITPACK_WITH_PATCHES,
70 &DEFAULT_CHUNKED_COMPRESSOR,
71 &ConstantCompressor,
72 &DateTimePartsCompressor,
73 &DeltaCompressor,
74 &DictCompressor,
75 &FoRCompressor,
76 &FSSTCompressor,
77 &ListCompressor,
78 &DEFAULT_RUN_END_COMPRESSOR,
79 &SparseCompressor,
80 &StructCompressor,
81 &VarBinCompressor,
82 &ZigZagCompressor,
83];
84
85pub static ALL_ENCODINGS_CONTEXT: LazyLock<ContextRef> = LazyLock::new(|| {
86 Arc::new(Context::default().with_encodings([
87 ALPEncoding::vtable(),
88 ALPRDEncoding::vtable(),
89 BitPackedEncoding::vtable(),
90 BoolEncoding::vtable(),
91 ByteBoolEncoding::vtable(),
92 ChunkedEncoding::vtable(),
93 ConstantEncoding::vtable(),
94 DateTimePartsEncoding::vtable(),
95 DeltaEncoding::vtable(),
96 DictEncoding::vtable(),
97 FoREncoding::vtable(),
98 FSSTEncoding::vtable(),
99 ListEncoding::vtable(),
100 NullEncoding::vtable(),
101 PrimitiveEncoding::vtable(),
102 RunEndEncoding::vtable(),
103 SparseEncoding::vtable(),
104 StructEncoding::vtable(),
105 VarBinEncoding::vtable(),
106 VarBinViewEncoding::vtable(),
107 ZigZagEncoding::vtable(),
108 ]))
109});
110
111#[derive(Debug, Clone)]
112pub enum Objective {
113 MinSize,
114}
115
116impl Objective {
117 pub fn starting_value(&self) -> f64 {
118 1.0
119 }
120
121 pub fn evaluate(
122 array: &CompressedArray,
123 base_size_bytes: usize,
124 config: &CompressConfig,
125 ) -> f64 {
126 match &config.objective {
127 Objective::MinSize => (array.nbytes() as f64) / (base_size_bytes as f64),
128 }
129 }
130}
131
132#[derive(Debug, Clone)]
133pub struct CompressConfig {
134 sample_size: u16,
136 sample_count: u16,
138 rng_seed: u64,
140
141 max_cost: u8,
143 objective: Objective,
145
146 target_block_bytesize: usize,
148 target_block_size: usize,
150}
151
152impl CompressConfig {
153 pub fn with_sample_size(mut self, sample_size: u16) -> Self {
154 self.sample_size = sample_size;
155 self
156 }
157
158 pub fn with_sample_count(mut self, sample_count: u16) -> Self {
159 self.sample_count = sample_count;
160 self
161 }
162}
163
164impl Default for CompressConfig {
165 fn default() -> Self {
166 let kib = 1 << 10;
167 let mib = 1 << 20;
168 Self {
169 sample_size: 64,
171 sample_count: 16,
172 max_cost: constants::DEFAULT_MAX_COST,
173 objective: Objective::MinSize,
174 target_block_bytesize: 16 * mib,
175 target_block_size: 64 * kib,
176 rng_seed: 0,
177 }
178 }
179}