vortex_btrblocks/builder.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builder for configuring `BtrBlocksCompressor` instances.
5
6use vortex_utils::aliases::hash_set::HashSet;
7
8use crate::BtrBlocksCompressor;
9use crate::CascadingCompressor;
10use crate::Scheme;
11use crate::SchemeExt;
12use crate::SchemeId;
13use crate::schemes::bool;
14use crate::schemes::decimal;
15use crate::schemes::float;
16use crate::schemes::integer;
17use crate::schemes::string;
18use crate::schemes::temporal;
19
20/// All available compression schemes.
21///
22/// This list is order-sensitive: the builder preserves this order when constructing
23/// the final scheme list, so that tie-breaking is deterministic.
24pub const ALL_SCHEMES: &[&dyn Scheme] = &[
25 ////////////////////////////////////////////////////////////////////////////////////////////////
26 // Bool schemes.
27 ////////////////////////////////////////////////////////////////////////////////////////////////
28 &bool::BoolConstantScheme,
29 ////////////////////////////////////////////////////////////////////////////////////////////////
30 // Integer schemes.
31 ////////////////////////////////////////////////////////////////////////////////////////////////
32 &integer::IntConstantScheme,
33 // NOTE: FoR must precede BitPacking to avoid unnecessary patches.
34 &integer::FoRScheme,
35 // NOTE: ZigZag should precede BitPacking because we don't want negative numbers.
36 &integer::ZigZagScheme,
37 &integer::BitPackingScheme,
38 &integer::SparseScheme,
39 &integer::IntDictScheme,
40 &integer::RunEndScheme,
41 &integer::SequenceScheme,
42 &integer::IntRLEScheme,
43 ////////////////////////////////////////////////////////////////////////////////////////////////
44 // Float schemes.
45 ////////////////////////////////////////////////////////////////////////////////////////////////
46 &float::FloatConstantScheme,
47 &float::ALPScheme,
48 &float::ALPRDScheme,
49 &float::FloatDictScheme,
50 &float::NullDominatedSparseScheme,
51 &float::FloatRLEScheme,
52 ////////////////////////////////////////////////////////////////////////////////////////////////
53 // String schemes.
54 ////////////////////////////////////////////////////////////////////////////////////////////////
55 &string::StringDictScheme,
56 &string::FSSTScheme,
57 &string::StringConstantScheme,
58 &string::NullDominatedSparseScheme,
59 // Decimal schemes.
60 &decimal::DecimalScheme,
61 // Temporal schemes.
62 &temporal::TemporalScheme,
63];
64
65/// Builder for creating configured [`BtrBlocksCompressor`] instances.
66///
67/// By default, all schemes in [`ALL_SCHEMES`] are enabled. Feature-gated schemes (Pco, Zstd)
68/// are not in `ALL_SCHEMES` and must be added explicitly via
69/// [`with_scheme`](BtrBlocksCompressorBuilder::with_new_scheme) or
70/// [`with_compact`](BtrBlocksCompressorBuilder::with_compact).
71///
72/// # Examples
73///
74/// ```rust
75/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt};
76/// use vortex_btrblocks::schemes::integer::IntDictScheme;
77///
78/// // Default compressor with all schemes in ALL_SCHEMES.
79/// let compressor = BtrBlocksCompressorBuilder::default().build();
80///
81/// // Remove specific schemes.
82/// let compressor = BtrBlocksCompressorBuilder::default()
83/// .exclude_schemes([IntDictScheme.id()])
84/// .build();
85/// ```
86#[derive(Debug, Clone)]
87pub struct BtrBlocksCompressorBuilder {
88 schemes: Vec<&'static dyn Scheme>,
89}
90
91impl Default for BtrBlocksCompressorBuilder {
92 fn default() -> Self {
93 Self {
94 schemes: ALL_SCHEMES.to_vec(),
95 }
96 }
97}
98
99impl BtrBlocksCompressorBuilder {
100 /// Creates a builder with no schemes registered.
101 ///
102 /// Useful when the caller wants explicit, scheme-by-scheme control over the compressor.
103 pub fn empty() -> Self {
104 Self {
105 schemes: Vec::new(),
106 }
107 }
108
109 /// Adds an external compression scheme not in [`ALL_SCHEMES`].
110 ///
111 /// This allows encoding crates outside of `vortex-btrblocks` to register their own schemes
112 /// with the compressor.
113 ///
114 /// # Panics
115 ///
116 /// Panics if a scheme with the same [`SchemeId`] is already present.
117 pub fn with_new_scheme(mut self, scheme: &'static dyn Scheme) -> Self {
118 assert!(
119 !self.schemes.iter().any(|s| s.id() == scheme.id()),
120 "scheme {:?} is already present in the builder",
121 scheme.id(),
122 );
123
124 self.schemes.push(scheme);
125 self
126 }
127
128 /// Adds compact encoding schemes (Zstd for strings, Pco for numerics).
129 ///
130 /// This provides better compression ratios than the default, especially for floating-point
131 /// heavy datasets. Requires the `zstd` feature. When the `pco` feature is also enabled,
132 /// Pco schemes for integers and floats are included.
133 ///
134 /// # Panics
135 ///
136 /// Panics if any of the compact schemes are already present.
137 #[cfg(feature = "zstd")]
138 pub fn with_compact(self) -> Self {
139 let builder = self.with_new_scheme(&string::ZstdScheme);
140
141 #[cfg(feature = "pco")]
142 let builder = builder
143 .with_new_scheme(&integer::PcoScheme)
144 .with_new_scheme(&float::PcoScheme);
145
146 builder
147 }
148
149 /// Adds the TurboQuant lossy vector quantization scheme.
150 ///
151 /// When enabled, [`Vector`] extension arrays are compressed using the TurboQuant algorithm
152 /// with MSE-optimal scalar quantization.
153 ///
154 /// # Panics
155 ///
156 /// Panics if the TurboQuant scheme is already present.
157 ///
158 /// [`Vector`]: vortex_tensor::vector::Vector
159 #[cfg(feature = "unstable_encodings")]
160 pub fn with_turboquant(self) -> Self {
161 use vortex_tensor::encodings::turboquant::TurboQuantScheme;
162 self.with_new_scheme(&TurboQuantScheme)
163 }
164
165 /// Excludes schemes without CUDA kernel support and adds Zstd for string compression.
166 ///
167 /// With the `unstable_encodings` feature, buffer-level Zstd compression is used which
168 /// preserves the array buffer layout for zero-conversion GPU decompression. Without it,
169 /// interleaved Zstd compression is used.
170 pub fn only_cuda_compatible(self) -> Self {
171 let builder = self.exclude_schemes([
172 integer::SparseScheme.id(),
173 integer::IntRLEScheme.id(),
174 float::FloatRLEScheme.id(),
175 float::NullDominatedSparseScheme.id(),
176 string::StringDictScheme.id(),
177 string::FSSTScheme.id(),
178 ]);
179
180 #[cfg(all(feature = "zstd", feature = "unstable_encodings"))]
181 let builder = builder.with_new_scheme(&string::ZstdBuffersScheme);
182 #[cfg(all(feature = "zstd", not(feature = "unstable_encodings")))]
183 let builder = builder.with_new_scheme(&string::ZstdScheme);
184
185 builder
186 }
187
188 /// Removes the specified compression schemes by their [`SchemeId`].
189 pub fn exclude_schemes(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
190 let ids: HashSet<_> = ids.into_iter().collect();
191 self.schemes.retain(|s| !ids.contains(&s.id()));
192 self
193 }
194
195 /// Builds the configured [`BtrBlocksCompressor`].
196 pub fn build(self) -> BtrBlocksCompressor {
197 BtrBlocksCompressor(CascadingCompressor::new(self.schemes))
198 }
199}
200
201#[cfg(test)]
202mod tests {
203 use super::*;
204
205 #[test]
206 fn empty_starts_with_no_schemes() {
207 let builder = BtrBlocksCompressorBuilder::empty();
208 assert!(builder.schemes.is_empty());
209 }
210
211 #[test]
212 fn default_includes_all_schemes() {
213 let builder = BtrBlocksCompressorBuilder::default();
214 assert_eq!(builder.schemes.len(), ALL_SCHEMES.len());
215 }
216}