vortex_btrblocks/builder.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builder for configuring `BtrBlocksCompressor` instances.
5
6use vortex_utils::aliases::hash_set::HashSet;
7
8use crate::BtrBlocksCompressor;
9use crate::CascadingCompressor;
10use crate::Scheme;
11use crate::SchemeExt;
12use crate::SchemeId;
13use crate::schemes::binary;
14use crate::schemes::bool;
15use crate::schemes::decimal;
16use crate::schemes::float;
17use crate::schemes::integer;
18use crate::schemes::string;
19use crate::schemes::temporal;
20
21/// All available compression schemes.
22///
23/// This list is order-sensitive: the builder preserves this order when constructing
24/// the final scheme list, so that tie-breaking is deterministic.
25pub const ALL_SCHEMES: &[&dyn Scheme] = &[
26 ////////////////////////////////////////////////////////////////////////////////////////////////
27 // Bool schemes.
28 ////////////////////////////////////////////////////////////////////////////////////////////////
29 &bool::BoolConstantScheme,
30 ////////////////////////////////////////////////////////////////////////////////////////////////
31 // Integer schemes.
32 ////////////////////////////////////////////////////////////////////////////////////////////////
33 &integer::IntConstantScheme,
34 // NOTE: FoR must precede BitPacking to avoid unnecessary patches.
35 &integer::FoRScheme,
36 // NOTE: ZigZag should precede BitPacking because we don't want negative numbers.
37 &integer::ZigZagScheme,
38 &integer::BitPackingScheme,
39 &integer::SparseScheme,
40 &integer::IntDictScheme,
41 &integer::RunEndScheme,
42 &integer::SequenceScheme,
43 &integer::IntRLEScheme,
44 ////////////////////////////////////////////////////////////////////////////////////////////////
45 // Float schemes.
46 ////////////////////////////////////////////////////////////////////////////////////////////////
47 &float::FloatConstantScheme,
48 &float::ALPScheme,
49 &float::ALPRDScheme,
50 &float::FloatDictScheme,
51 &float::NullDominatedSparseScheme,
52 &float::FloatRLEScheme,
53 ////////////////////////////////////////////////////////////////////////////////////////////////
54 // String schemes.
55 ////////////////////////////////////////////////////////////////////////////////////////////////
56 &string::StringDictScheme,
57 // Both string-fragmentation schemes are registered; the sample-based
58 // selector keeps whichever is smaller per column.
59 &string::FSSTScheme,
60 #[cfg(feature = "unstable_encodings")]
61 &string::OnPairScheme,
62 &string::StringConstantScheme,
63 &string::NullDominatedSparseScheme,
64 ////////////////////////////////////////////////////////////////////////////////////////////////
65 // Binary schemes.
66 ////////////////////////////////////////////////////////////////////////////////////////////////
67 &binary::BinaryDictScheme,
68 &binary::BinaryConstantScheme,
69 // Decimal schemes.
70 &decimal::DecimalScheme,
71 // Temporal schemes.
72 &temporal::TemporalScheme,
73];
74
75/// Builder for creating configured [`BtrBlocksCompressor`] instances.
76///
77/// By default, all schemes in [`ALL_SCHEMES`] are enabled. Feature-gated schemes (Pco, Zstd)
78/// are not in `ALL_SCHEMES` and must be added explicitly via
79/// [`with_scheme`](BtrBlocksCompressorBuilder::with_new_scheme) or
80/// [`with_compact`](BtrBlocksCompressorBuilder::with_compact).
81///
82/// # Examples
83///
84/// ```rust
85/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt};
86/// use vortex_btrblocks::schemes::integer::IntDictScheme;
87///
88/// // Default compressor with all schemes in ALL_SCHEMES.
89/// let compressor = BtrBlocksCompressorBuilder::default().build();
90///
91/// // Remove specific schemes.
92/// let compressor = BtrBlocksCompressorBuilder::default()
93/// .exclude_schemes([IntDictScheme.id()])
94/// .build();
95/// ```
96#[derive(Debug, Clone)]
97pub struct BtrBlocksCompressorBuilder {
98 schemes: Vec<&'static dyn Scheme>,
99}
100
101impl Default for BtrBlocksCompressorBuilder {
102 fn default() -> Self {
103 Self {
104 schemes: ALL_SCHEMES.to_vec(),
105 }
106 }
107}
108
109impl BtrBlocksCompressorBuilder {
110 /// Creates a builder with no schemes registered.
111 ///
112 /// Useful when the caller wants explicit, scheme-by-scheme control over the compressor.
113 pub fn empty() -> Self {
114 Self {
115 schemes: Vec::new(),
116 }
117 }
118
119 /// Adds an external compression scheme not in [`ALL_SCHEMES`].
120 ///
121 /// This allows encoding crates outside of `vortex-btrblocks` to register their own schemes
122 /// with the compressor.
123 ///
124 /// # Panics
125 ///
126 /// Panics if a scheme with the same [`SchemeId`] is already present.
127 pub fn with_new_scheme(mut self, scheme: &'static dyn Scheme) -> Self {
128 assert!(
129 !self.schemes.iter().any(|s| s.id() == scheme.id()),
130 "scheme {:?} is already present in the builder",
131 scheme.id(),
132 );
133
134 self.schemes.push(scheme);
135 self
136 }
137
138 /// Adds compact encoding schemes (Zstd for strings, Pco for numerics).
139 ///
140 /// This provides better compression ratios than the default, especially for floating-point
141 /// heavy datasets. Requires the `zstd` feature. When the `pco` feature is also enabled,
142 /// Pco schemes for integers and floats are included.
143 ///
144 /// # Panics
145 ///
146 /// Panics if any of the compact schemes are already present.
147 #[cfg(feature = "zstd")]
148 pub fn with_compact(self) -> Self {
149 let builder = self.with_new_scheme(&string::ZstdScheme);
150
151 #[cfg(feature = "pco")]
152 let builder = builder
153 .with_new_scheme(&integer::PcoScheme)
154 .with_new_scheme(&float::PcoScheme);
155
156 builder
157 }
158
159 /// Adds the TurboQuant lossy vector quantization scheme.
160 ///
161 /// When enabled, [`Vector`] extension arrays are compressed using the TurboQuant algorithm
162 /// with MSE-optimal scalar quantization.
163 ///
164 /// # Panics
165 ///
166 /// Panics if the TurboQuant scheme is already present.
167 ///
168 /// [`Vector`]: vortex_tensor::vector::Vector
169 #[cfg(feature = "unstable_encodings")]
170 pub fn with_turboquant(self) -> Self {
171 use vortex_tensor::encodings::turboquant::TurboQuantScheme;
172 self.with_new_scheme(&TurboQuantScheme)
173 }
174
175 /// Excludes schemes without CUDA kernel support and adds Zstd for string compression.
176 ///
177 /// With the `unstable_encodings` feature, buffer-level Zstd compression is used which
178 /// preserves the array buffer layout for zero-conversion GPU decompression. Without it,
179 /// interleaved Zstd compression is used.
180 pub fn only_cuda_compatible(self) -> Self {
181 // String fragmentation schemes (OnPair, FSST) require host-side
182 // dictionary expansion at decode time, which is incompatible with
183 // pure-GPU decompression paths. Strip whichever string-fragment
184 // scheme is enabled by feature.
185 #[cfg_attr(not(feature = "unstable_encodings"), allow(unused_mut))]
186 let mut excluded: Vec<SchemeId> = vec![
187 integer::SparseScheme.id(),
188 integer::IntRLEScheme.id(),
189 float::FloatRLEScheme.id(),
190 float::NullDominatedSparseScheme.id(),
191 string::StringDictScheme.id(),
192 string::FSSTScheme.id(),
193 binary::BinaryDictScheme.id(),
194 ];
195 #[cfg(feature = "unstable_encodings")]
196 excluded.push(string::OnPairScheme.id());
197 let builder = self.exclude_schemes(excluded);
198
199 #[cfg(all(feature = "zstd", feature = "unstable_encodings"))]
200 let builder = builder.with_new_scheme(&string::ZstdBuffersScheme);
201 #[cfg(all(feature = "zstd", not(feature = "unstable_encodings")))]
202 let builder = builder.with_new_scheme(&string::ZstdScheme);
203
204 builder
205 }
206
207 /// Removes the specified compression schemes by their [`SchemeId`].
208 pub fn exclude_schemes(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
209 let ids: HashSet<_> = ids.into_iter().collect();
210 self.schemes.retain(|s| !ids.contains(&s.id()));
211 self
212 }
213
214 /// Builds the configured [`BtrBlocksCompressor`].
215 pub fn build(self) -> BtrBlocksCompressor {
216 BtrBlocksCompressor(CascadingCompressor::new(self.schemes))
217 }
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223
224 #[test]
225 fn empty_starts_with_no_schemes() {
226 let builder = BtrBlocksCompressorBuilder::empty();
227 assert!(builder.schemes.is_empty());
228 }
229
230 #[test]
231 fn default_includes_all_schemes() {
232 let builder = BtrBlocksCompressorBuilder::default();
233 assert_eq!(builder.schemes.len(), ALL_SCHEMES.len());
234 }
235}