vortex_btrblocks/builder.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builder for configuring `BtrBlocksCompressor` instances.
5
6use vortex_utils::aliases::hash_set::HashSet;
7
8use crate::BtrBlocksCompressor;
9use crate::CascadingCompressor;
10use crate::Scheme;
11use crate::SchemeExt;
12use crate::SchemeId;
13use crate::schemes::bool;
14use crate::schemes::decimal;
15use crate::schemes::float;
16use crate::schemes::integer;
17use crate::schemes::string;
18use crate::schemes::temporal;
19
20/// All available compression schemes.
21///
22/// This list is order-sensitive: the builder preserves this order when constructing
23/// the final scheme list, so that tie-breaking is deterministic.
24pub const ALL_SCHEMES: &[&dyn Scheme] = &[
25 ////////////////////////////////////////////////////////////////////////////////////////////////
26 // Bool schemes.
27 ////////////////////////////////////////////////////////////////////////////////////////////////
28 &bool::BoolConstantScheme,
29 ////////////////////////////////////////////////////////////////////////////////////////////////
30 // Integer schemes.
31 ////////////////////////////////////////////////////////////////////////////////////////////////
32 &integer::IntConstantScheme,
33 // NOTE: FoR must precede BitPacking to avoid unnecessary patches.
34 &integer::FoRScheme,
35 // NOTE: ZigZag should precede BitPacking because we don't want negative numbers.
36 &integer::ZigZagScheme,
37 &integer::BitPackingScheme,
38 &integer::SparseScheme,
39 &integer::IntDictScheme,
40 &integer::RunEndScheme,
41 &integer::SequenceScheme,
42 &integer::IntRLEScheme,
43 ////////////////////////////////////////////////////////////////////////////////////////////////
44 // Float schemes.
45 ////////////////////////////////////////////////////////////////////////////////////////////////
46 &float::FloatConstantScheme,
47 &float::ALPScheme,
48 &float::ALPRDScheme,
49 &float::FloatDictScheme,
50 &float::NullDominatedSparseScheme,
51 &float::FloatRLEScheme,
52 ////////////////////////////////////////////////////////////////////////////////////////////////
53 // String schemes.
54 ////////////////////////////////////////////////////////////////////////////////////////////////
55 &string::StringDictScheme,
56 &string::FSSTScheme,
57 &string::StringConstantScheme,
58 &string::NullDominatedSparseScheme,
59 // Decimal schemes.
60 &decimal::DecimalScheme,
61 // Temporal schemes.
62 &temporal::TemporalScheme,
63];
64
65/// Builder for creating configured [`BtrBlocksCompressor`] instances.
66///
67/// By default, all schemes in [`ALL_SCHEMES`] are enabled. Feature-gated schemes (Pco, Zstd)
68/// are not in `ALL_SCHEMES` and must be added explicitly via
69/// [`with_scheme`](BtrBlocksCompressorBuilder::with_new_scheme) or
70/// [`with_compact`](BtrBlocksCompressorBuilder::with_compact).
71///
72/// # Examples
73///
74/// ```rust
75/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt};
76/// use vortex_btrblocks::schemes::integer::IntDictScheme;
77///
78/// // Default compressor with all schemes in ALL_SCHEMES.
79/// let compressor = BtrBlocksCompressorBuilder::default().build();
80///
81/// // Remove specific schemes.
82/// let compressor = BtrBlocksCompressorBuilder::default()
83/// .exclude_schemes([IntDictScheme.id()])
84/// .build();
85/// ```
86#[derive(Debug, Clone)]
87pub struct BtrBlocksCompressorBuilder {
88 schemes: Vec<&'static dyn Scheme>,
89}
90
91impl Default for BtrBlocksCompressorBuilder {
92 fn default() -> Self {
93 Self {
94 schemes: ALL_SCHEMES.to_vec(),
95 }
96 }
97}
98
99impl BtrBlocksCompressorBuilder {
100 /// Adds an external compression scheme not in [`ALL_SCHEMES`].
101 ///
102 /// This allows encoding crates outside of `vortex-btrblocks` to register their own schemes
103 /// with the compressor.
104 ///
105 /// # Panics
106 ///
107 /// Panics if a scheme with the same [`SchemeId`] is already present.
108 pub fn with_new_scheme(mut self, scheme: &'static dyn Scheme) -> Self {
109 assert!(
110 !self.schemes.iter().any(|s| s.id() == scheme.id()),
111 "scheme {:?} is already present in the builder",
112 scheme.id(),
113 );
114
115 self.schemes.push(scheme);
116 self
117 }
118
119 /// Adds compact encoding schemes (Zstd for strings, Pco for numerics).
120 ///
121 /// This provides better compression ratios than the default, especially for floating-point
122 /// heavy datasets. Requires the `zstd` feature. When the `pco` feature is also enabled,
123 /// Pco schemes for integers and floats are included.
124 ///
125 /// # Panics
126 ///
127 /// Panics if any of the compact schemes are already present.
128 #[cfg(feature = "zstd")]
129 pub fn with_compact(self) -> Self {
130 let builder = self.with_new_scheme(&string::ZstdScheme);
131
132 #[cfg(feature = "pco")]
133 let builder = builder
134 .with_new_scheme(&integer::PcoScheme)
135 .with_new_scheme(&float::PcoScheme);
136
137 builder
138 }
139
140 /// Adds the TurboQuant lossy vector quantization scheme.
141 ///
142 /// When enabled, [`Vector`] extension arrays are compressed using the TurboQuant algorithm
143 /// with MSE-optimal scalar quantization.
144 ///
145 /// # Panics
146 ///
147 /// Panics if the TurboQuant scheme is already present.
148 ///
149 /// [`Vector`]: vortex_tensor::vector::Vector
150 #[cfg(feature = "unstable_encodings")]
151 pub fn with_turboquant(self) -> Self {
152 use vortex_tensor::encodings::turboquant::TurboQuantScheme;
153 self.with_new_scheme(&TurboQuantScheme)
154 }
155
156 /// Excludes schemes without CUDA kernel support and adds Zstd for string compression.
157 ///
158 /// With the `unstable_encodings` feature, buffer-level Zstd compression is used which
159 /// preserves the array buffer layout for zero-conversion GPU decompression. Without it,
160 /// interleaved Zstd compression is used.
161 pub fn only_cuda_compatible(self) -> Self {
162 let builder = self.exclude_schemes([
163 integer::SparseScheme.id(),
164 integer::IntRLEScheme.id(),
165 float::FloatRLEScheme.id(),
166 float::NullDominatedSparseScheme.id(),
167 string::StringDictScheme.id(),
168 string::FSSTScheme.id(),
169 ]);
170
171 #[cfg(all(feature = "zstd", feature = "unstable_encodings"))]
172 let builder = builder.with_new_scheme(&string::ZstdBuffersScheme);
173 #[cfg(all(feature = "zstd", not(feature = "unstable_encodings")))]
174 let builder = builder.with_new_scheme(&string::ZstdScheme);
175
176 builder
177 }
178
179 /// Removes the specified compression schemes by their [`SchemeId`].
180 pub fn exclude_schemes(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
181 let ids: HashSet<_> = ids.into_iter().collect();
182 self.schemes.retain(|s| !ids.contains(&s.id()));
183 self
184 }
185
186 /// Builds the configured [`BtrBlocksCompressor`].
187 pub fn build(self) -> BtrBlocksCompressor {
188 BtrBlocksCompressor(CascadingCompressor::new(self.schemes))
189 }
190}