Skip to main content

vortex_btrblocks/
builder.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builder for configuring `BtrBlocksCompressor` instances.
5
6use vortex_utils::aliases::hash_set::HashSet;
7
8use crate::BtrBlocksCompressor;
9use crate::CascadingCompressor;
10use crate::Scheme;
11use crate::SchemeExt;
12use crate::SchemeId;
13use crate::schemes::bool;
14use crate::schemes::decimal;
15use crate::schemes::float;
16use crate::schemes::integer;
17use crate::schemes::string;
18use crate::schemes::temporal;
19
20/// All available compression schemes.
21///
22/// This list is order-sensitive: the builder preserves this order when constructing
23/// the final scheme list, so that tie-breaking is deterministic.
24pub const ALL_SCHEMES: &[&dyn Scheme] = &[
25    ////////////////////////////////////////////////////////////////////////////////////////////////
26    // Bool schemes.
27    ////////////////////////////////////////////////////////////////////////////////////////////////
28    &bool::BoolConstantScheme,
29    ////////////////////////////////////////////////////////////////////////////////////////////////
30    // Integer schemes.
31    ////////////////////////////////////////////////////////////////////////////////////////////////
32    &integer::IntConstantScheme,
33    // NOTE: FoR must precede BitPacking to avoid unnecessary patches.
34    &integer::FoRScheme,
35    // NOTE: ZigZag should precede BitPacking because we don't want negative numbers.
36    &integer::ZigZagScheme,
37    &integer::BitPackingScheme,
38    &integer::SparseScheme,
39    &integer::IntDictScheme,
40    &integer::RunEndScheme,
41    &integer::SequenceScheme,
42    &integer::IntRLEScheme,
43    ////////////////////////////////////////////////////////////////////////////////////////////////
44    // Float schemes.
45    ////////////////////////////////////////////////////////////////////////////////////////////////
46    &float::FloatConstantScheme,
47    &float::ALPScheme,
48    &float::ALPRDScheme,
49    &float::FloatDictScheme,
50    &float::NullDominatedSparseScheme,
51    &float::FloatRLEScheme,
52    ////////////////////////////////////////////////////////////////////////////////////////////////
53    // String schemes.
54    ////////////////////////////////////////////////////////////////////////////////////////////////
55    &string::StringDictScheme,
56    &string::FSSTScheme,
57    &string::StringConstantScheme,
58    &string::NullDominatedSparseScheme,
59    // Decimal schemes.
60    &decimal::DecimalScheme,
61    // Temporal schemes.
62    &temporal::TemporalScheme,
63];
64
65/// Builder for creating configured [`BtrBlocksCompressor`] instances.
66///
67/// By default, all schemes in [`ALL_SCHEMES`] are enabled. Feature-gated schemes (Pco, Zstd)
68/// are not in `ALL_SCHEMES` and must be added explicitly via
69/// [`with_scheme`](BtrBlocksCompressorBuilder::with_new_scheme) or
70/// [`with_compact`](BtrBlocksCompressorBuilder::with_compact).
71///
72/// # Examples
73///
74/// ```rust
75/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt};
76/// use vortex_btrblocks::schemes::integer::IntDictScheme;
77///
78/// // Default compressor with all schemes in ALL_SCHEMES.
79/// let compressor = BtrBlocksCompressorBuilder::default().build();
80///
81/// // Remove specific schemes.
82/// let compressor = BtrBlocksCompressorBuilder::default()
83///     .exclude_schemes([IntDictScheme.id()])
84///     .build();
85/// ```
86#[derive(Debug, Clone)]
87pub struct BtrBlocksCompressorBuilder {
88    schemes: Vec<&'static dyn Scheme>,
89}
90
91impl Default for BtrBlocksCompressorBuilder {
92    fn default() -> Self {
93        Self {
94            schemes: ALL_SCHEMES.to_vec(),
95        }
96    }
97}
98
99impl BtrBlocksCompressorBuilder {
100    /// Creates a builder with no schemes registered.
101    ///
102    /// Useful when the caller wants explicit, scheme-by-scheme control over the compressor.
103    pub fn empty() -> Self {
104        Self {
105            schemes: Vec::new(),
106        }
107    }
108
109    /// Adds an external compression scheme not in [`ALL_SCHEMES`].
110    ///
111    /// This allows encoding crates outside of `vortex-btrblocks` to register their own schemes
112    /// with the compressor.
113    ///
114    /// # Panics
115    ///
116    /// Panics if a scheme with the same [`SchemeId`] is already present.
117    pub fn with_new_scheme(mut self, scheme: &'static dyn Scheme) -> Self {
118        assert!(
119            !self.schemes.iter().any(|s| s.id() == scheme.id()),
120            "scheme {:?} is already present in the builder",
121            scheme.id(),
122        );
123
124        self.schemes.push(scheme);
125        self
126    }
127
128    /// Adds compact encoding schemes (Zstd for strings, Pco for numerics).
129    ///
130    /// This provides better compression ratios than the default, especially for floating-point
131    /// heavy datasets. Requires the `zstd` feature. When the `pco` feature is also enabled,
132    /// Pco schemes for integers and floats are included.
133    ///
134    /// # Panics
135    ///
136    /// Panics if any of the compact schemes are already present.
137    #[cfg(feature = "zstd")]
138    pub fn with_compact(self) -> Self {
139        let builder = self.with_new_scheme(&string::ZstdScheme);
140
141        #[cfg(feature = "pco")]
142        let builder = builder
143            .with_new_scheme(&integer::PcoScheme)
144            .with_new_scheme(&float::PcoScheme);
145
146        builder
147    }
148
149    /// Adds the TurboQuant lossy vector quantization scheme.
150    ///
151    /// When enabled, [`Vector`] extension arrays are compressed using the TurboQuant algorithm
152    /// with MSE-optimal scalar quantization.
153    ///
154    /// # Panics
155    ///
156    /// Panics if the TurboQuant scheme is already present.
157    ///
158    /// [`Vector`]: vortex_tensor::vector::Vector
159    #[cfg(feature = "unstable_encodings")]
160    pub fn with_turboquant(self) -> Self {
161        use vortex_tensor::encodings::turboquant::TurboQuantScheme;
162        self.with_new_scheme(&TurboQuantScheme)
163    }
164
165    /// Excludes schemes without CUDA kernel support and adds Zstd for string compression.
166    ///
167    /// With the `unstable_encodings` feature, buffer-level Zstd compression is used which
168    /// preserves the array buffer layout for zero-conversion GPU decompression. Without it,
169    /// interleaved Zstd compression is used.
170    pub fn only_cuda_compatible(self) -> Self {
171        let builder = self.exclude_schemes([
172            integer::SparseScheme.id(),
173            integer::IntRLEScheme.id(),
174            float::FloatRLEScheme.id(),
175            float::NullDominatedSparseScheme.id(),
176            string::StringDictScheme.id(),
177            string::FSSTScheme.id(),
178        ]);
179
180        #[cfg(all(feature = "zstd", feature = "unstable_encodings"))]
181        let builder = builder.with_new_scheme(&string::ZstdBuffersScheme);
182        #[cfg(all(feature = "zstd", not(feature = "unstable_encodings")))]
183        let builder = builder.with_new_scheme(&string::ZstdScheme);
184
185        builder
186    }
187
188    /// Removes the specified compression schemes by their [`SchemeId`].
189    pub fn exclude_schemes(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
190        let ids: HashSet<_> = ids.into_iter().collect();
191        self.schemes.retain(|s| !ids.contains(&s.id()));
192        self
193    }
194
195    /// Builds the configured [`BtrBlocksCompressor`].
196    pub fn build(self) -> BtrBlocksCompressor {
197        BtrBlocksCompressor(CascadingCompressor::new(self.schemes))
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204
205    #[test]
206    fn empty_starts_with_no_schemes() {
207        let builder = BtrBlocksCompressorBuilder::empty();
208        assert!(builder.schemes.is_empty());
209    }
210
211    #[test]
212    fn default_includes_all_schemes() {
213        let builder = BtrBlocksCompressorBuilder::default();
214        assert_eq!(builder.schemes.len(), ALL_SCHEMES.len());
215    }
216}