Skip to main content

vortex_btrblocks/
builder.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builder for configuring `BtrBlocksCompressor` instances.
5
6use vortex_utils::aliases::hash_set::HashSet;
7
8use crate::BtrBlocksCompressor;
9use crate::CascadingCompressor;
10use crate::Scheme;
11use crate::SchemeExt;
12use crate::SchemeId;
13use crate::schemes::bool;
14use crate::schemes::decimal;
15use crate::schemes::float;
16use crate::schemes::integer;
17use crate::schemes::string;
18use crate::schemes::temporal;
19
20/// All available compression schemes.
21///
22/// This list is order-sensitive: the builder preserves this order when constructing
23/// the final scheme list, so that tie-breaking is deterministic.
24pub const ALL_SCHEMES: &[&dyn Scheme] = &[
25    ////////////////////////////////////////////////////////////////////////////////////////////////
26    // Bool schemes.
27    ////////////////////////////////////////////////////////////////////////////////////////////////
28    &bool::BoolConstantScheme,
29    ////////////////////////////////////////////////////////////////////////////////////////////////
30    // Integer schemes.
31    ////////////////////////////////////////////////////////////////////////////////////////////////
32    &integer::IntConstantScheme,
33    // NOTE: FoR must precede BitPacking to avoid unnecessary patches.
34    &integer::FoRScheme,
35    // NOTE: ZigZag should precede BitPacking because we don't want negative numbers.
36    &integer::ZigZagScheme,
37    &integer::BitPackingScheme,
38    &integer::SparseScheme,
39    &integer::IntDictScheme,
40    &integer::RunEndScheme,
41    &integer::SequenceScheme,
42    &integer::IntRLEScheme,
43    ////////////////////////////////////////////////////////////////////////////////////////////////
44    // Float schemes.
45    ////////////////////////////////////////////////////////////////////////////////////////////////
46    &float::FloatConstantScheme,
47    &float::ALPScheme,
48    &float::ALPRDScheme,
49    &float::FloatDictScheme,
50    &float::NullDominatedSparseScheme,
51    &float::FloatRLEScheme,
52    ////////////////////////////////////////////////////////////////////////////////////////////////
53    // String schemes.
54    ////////////////////////////////////////////////////////////////////////////////////////////////
55    &string::StringDictScheme,
56    &string::FSSTScheme,
57    &string::StringConstantScheme,
58    &string::NullDominatedSparseScheme,
59    // Decimal schemes.
60    &decimal::DecimalScheme,
61    // Temporal schemes.
62    &temporal::TemporalScheme,
63];
64
65/// Builder for creating configured [`BtrBlocksCompressor`] instances.
66///
67/// By default, all schemes in [`ALL_SCHEMES`] are enabled. Feature-gated schemes (Pco, Zstd)
68/// are not in `ALL_SCHEMES` and must be added explicitly via
69/// [`with_scheme`](BtrBlocksCompressorBuilder::with_new_scheme) or
70/// [`with_compact`](BtrBlocksCompressorBuilder::with_compact).
71///
72/// # Examples
73///
74/// ```rust
75/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt};
76/// use vortex_btrblocks::schemes::integer::IntDictScheme;
77///
78/// // Default compressor with all schemes in ALL_SCHEMES.
79/// let compressor = BtrBlocksCompressorBuilder::default().build();
80///
81/// // Remove specific schemes.
82/// let compressor = BtrBlocksCompressorBuilder::default()
83///     .exclude_schemes([IntDictScheme.id()])
84///     .build();
85/// ```
86#[derive(Debug, Clone)]
87pub struct BtrBlocksCompressorBuilder {
88    schemes: Vec<&'static dyn Scheme>,
89}
90
91impl Default for BtrBlocksCompressorBuilder {
92    fn default() -> Self {
93        Self {
94            schemes: ALL_SCHEMES.to_vec(),
95        }
96    }
97}
98
99impl BtrBlocksCompressorBuilder {
100    /// Adds an external compression scheme not in [`ALL_SCHEMES`].
101    ///
102    /// This allows encoding crates outside of `vortex-btrblocks` to register their own schemes
103    /// with the compressor.
104    ///
105    /// # Panics
106    ///
107    /// Panics if a scheme with the same [`SchemeId`] is already present.
108    pub fn with_new_scheme(mut self, scheme: &'static dyn Scheme) -> Self {
109        assert!(
110            !self.schemes.iter().any(|s| s.id() == scheme.id()),
111            "scheme {:?} is already present in the builder",
112            scheme.id(),
113        );
114
115        self.schemes.push(scheme);
116        self
117    }
118
119    /// Adds compact encoding schemes (Zstd for strings, Pco for numerics).
120    ///
121    /// This provides better compression ratios than the default, especially for floating-point
122    /// heavy datasets. Requires the `zstd` feature. When the `pco` feature is also enabled,
123    /// Pco schemes for integers and floats are included.
124    ///
125    /// # Panics
126    ///
127    /// Panics if any of the compact schemes are already present.
128    #[cfg(feature = "zstd")]
129    pub fn with_compact(self) -> Self {
130        let builder = self.with_new_scheme(&string::ZstdScheme);
131
132        #[cfg(feature = "pco")]
133        let builder = builder
134            .with_new_scheme(&integer::PcoScheme)
135            .with_new_scheme(&float::PcoScheme);
136
137        builder
138    }
139
140    /// Adds the TurboQuant lossy vector quantization scheme.
141    ///
142    /// When enabled, [`Vector`] extension arrays are compressed using the TurboQuant algorithm
143    /// with MSE-optimal scalar quantization.
144    ///
145    /// # Panics
146    ///
147    /// Panics if the TurboQuant scheme is already present.
148    ///
149    /// [`Vector`]: vortex_tensor::vector::Vector
150    #[cfg(feature = "unstable_encodings")]
151    pub fn with_turboquant(self) -> Self {
152        use vortex_tensor::encodings::turboquant::TurboQuantScheme;
153        self.with_new_scheme(&TurboQuantScheme)
154    }
155
156    /// Excludes schemes without CUDA kernel support and adds Zstd for string compression.
157    ///
158    /// With the `unstable_encodings` feature, buffer-level Zstd compression is used which
159    /// preserves the array buffer layout for zero-conversion GPU decompression. Without it,
160    /// interleaved Zstd compression is used.
161    pub fn only_cuda_compatible(self) -> Self {
162        let builder = self.exclude_schemes([
163            integer::SparseScheme.id(),
164            integer::IntRLEScheme.id(),
165            float::FloatRLEScheme.id(),
166            float::NullDominatedSparseScheme.id(),
167            string::StringDictScheme.id(),
168            string::FSSTScheme.id(),
169        ]);
170
171        #[cfg(all(feature = "zstd", feature = "unstable_encodings"))]
172        let builder = builder.with_new_scheme(&string::ZstdBuffersScheme);
173        #[cfg(all(feature = "zstd", not(feature = "unstable_encodings")))]
174        let builder = builder.with_new_scheme(&string::ZstdScheme);
175
176        builder
177    }
178
179    /// Removes the specified compression schemes by their [`SchemeId`].
180    pub fn exclude_schemes(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
181        let ids: HashSet<_> = ids.into_iter().collect();
182        self.schemes.retain(|s| !ids.contains(&s.id()));
183        self
184    }
185
186    /// Builds the configured [`BtrBlocksCompressor`].
187    pub fn build(self) -> BtrBlocksCompressor {
188        BtrBlocksCompressor(CascadingCompressor::new(self.schemes))
189    }
190}