Skip to main content

vortex_btrblocks/
builder.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Builder for configuring `BtrBlocksCompressor` instances.
5
6use vortex_utils::aliases::hash_set::HashSet;
7
8use crate::BtrBlocksCompressor;
9use crate::CascadingCompressor;
10use crate::Scheme;
11use crate::SchemeExt;
12use crate::SchemeId;
13use crate::schemes::binary;
14use crate::schemes::bool;
15use crate::schemes::decimal;
16use crate::schemes::float;
17use crate::schemes::integer;
18use crate::schemes::string;
19use crate::schemes::temporal;
20
21/// All available compression schemes.
22///
23/// This list is order-sensitive: the builder preserves this order when constructing
24/// the final scheme list, so that tie-breaking is deterministic.
25pub const ALL_SCHEMES: &[&dyn Scheme] = &[
26    ////////////////////////////////////////////////////////////////////////////////////////////////
27    // Bool schemes.
28    ////////////////////////////////////////////////////////////////////////////////////////////////
29    &bool::BoolConstantScheme,
30    ////////////////////////////////////////////////////////////////////////////////////////////////
31    // Integer schemes.
32    ////////////////////////////////////////////////////////////////////////////////////////////////
33    &integer::IntConstantScheme,
34    // NOTE: FoR must precede BitPacking to avoid unnecessary patches.
35    &integer::FoRScheme,
36    // NOTE: ZigZag should precede BitPacking because we don't want negative numbers.
37    &integer::ZigZagScheme,
38    &integer::BitPackingScheme,
39    &integer::SparseScheme,
40    &integer::IntDictScheme,
41    &integer::RunEndScheme,
42    &integer::SequenceScheme,
43    &integer::IntRLEScheme,
44    ////////////////////////////////////////////////////////////////////////////////////////////////
45    // Float schemes.
46    ////////////////////////////////////////////////////////////////////////////////////////////////
47    &float::FloatConstantScheme,
48    &float::ALPScheme,
49    &float::ALPRDScheme,
50    &float::FloatDictScheme,
51    &float::NullDominatedSparseScheme,
52    &float::FloatRLEScheme,
53    ////////////////////////////////////////////////////////////////////////////////////////////////
54    // String schemes.
55    ////////////////////////////////////////////////////////////////////////////////////////////////
56    &string::StringDictScheme,
57    // Both string-fragmentation schemes are registered; the sample-based
58    // selector keeps whichever is smaller per column.
59    &string::FSSTScheme,
60    #[cfg(feature = "unstable_encodings")]
61    &string::OnPairScheme,
62    &string::StringConstantScheme,
63    &string::NullDominatedSparseScheme,
64    ////////////////////////////////////////////////////////////////////////////////////////////////
65    // Binary schemes.
66    ////////////////////////////////////////////////////////////////////////////////////////////////
67    &binary::BinaryDictScheme,
68    &binary::BinaryConstantScheme,
69    // Decimal schemes.
70    &decimal::DecimalScheme,
71    // Temporal schemes.
72    &temporal::TemporalScheme,
73];
74
75/// Builder for creating configured [`BtrBlocksCompressor`] instances.
76///
77/// By default, all schemes in [`ALL_SCHEMES`] are enabled. Feature-gated schemes (Pco, Zstd)
78/// are not in `ALL_SCHEMES` and must be added explicitly via
79/// [`with_scheme`](BtrBlocksCompressorBuilder::with_new_scheme) or
80/// [`with_compact`](BtrBlocksCompressorBuilder::with_compact).
81///
82/// # Examples
83///
84/// ```rust
85/// use vortex_btrblocks::{BtrBlocksCompressorBuilder, Scheme, SchemeExt};
86/// use vortex_btrblocks::schemes::integer::IntDictScheme;
87///
88/// // Default compressor with all schemes in ALL_SCHEMES.
89/// let compressor = BtrBlocksCompressorBuilder::default().build();
90///
91/// // Remove specific schemes.
92/// let compressor = BtrBlocksCompressorBuilder::default()
93///     .exclude_schemes([IntDictScheme.id()])
94///     .build();
95/// ```
96#[derive(Debug, Clone)]
97pub struct BtrBlocksCompressorBuilder {
98    schemes: Vec<&'static dyn Scheme>,
99}
100
101impl Default for BtrBlocksCompressorBuilder {
102    fn default() -> Self {
103        Self {
104            schemes: ALL_SCHEMES.to_vec(),
105        }
106    }
107}
108
109impl BtrBlocksCompressorBuilder {
110    /// Creates a builder with no schemes registered.
111    ///
112    /// Useful when the caller wants explicit, scheme-by-scheme control over the compressor.
113    pub fn empty() -> Self {
114        Self {
115            schemes: Vec::new(),
116        }
117    }
118
119    /// Adds an external compression scheme not in [`ALL_SCHEMES`].
120    ///
121    /// This allows encoding crates outside of `vortex-btrblocks` to register their own schemes
122    /// with the compressor.
123    ///
124    /// # Panics
125    ///
126    /// Panics if a scheme with the same [`SchemeId`] is already present.
127    pub fn with_new_scheme(mut self, scheme: &'static dyn Scheme) -> Self {
128        assert!(
129            !self.schemes.iter().any(|s| s.id() == scheme.id()),
130            "scheme {:?} is already present in the builder",
131            scheme.id(),
132        );
133
134        self.schemes.push(scheme);
135        self
136    }
137
138    /// Adds compact encoding schemes (Zstd for strings and binary, Pco for numerics).
139    ///
140    /// This provides better compression ratios than the default, especially for floating-point
141    /// heavy datasets. Requires the `zstd` feature. When the `pco` feature is also enabled,
142    /// Pco schemes for integers and floats are included.
143    ///
144    /// # Panics
145    ///
146    /// Panics if any of the compact schemes are already present.
147    #[cfg(feature = "zstd")]
148    pub fn with_compact(self) -> Self {
149        let builder = self
150            .with_new_scheme(&string::ZstdScheme)
151            .with_new_scheme(&binary::ZstdScheme);
152
153        #[cfg(feature = "pco")]
154        let builder = builder
155            .with_new_scheme(&integer::PcoScheme)
156            .with_new_scheme(&float::PcoScheme);
157
158        builder
159    }
160
161    /// Excludes schemes without CUDA kernel support and adds Zstd for string and binary compression.
162    ///
163    /// With the `unstable_encodings` feature, buffer-level Zstd compression is used which
164    /// preserves the array buffer layout for zero-conversion GPU decompression. Without it,
165    /// interleaved Zstd compression is used.
166    pub fn only_cuda_compatible(self) -> Self {
167        // String fragmentation schemes (OnPair, FSST) require host-side
168        // dictionary expansion at decode time, which is incompatible with
169        // pure-GPU decompression paths. Strip whichever string-fragment
170        // scheme is enabled by feature.
171        #[cfg_attr(not(feature = "unstable_encodings"), allow(unused_mut))]
172        let mut excluded: Vec<SchemeId> = vec![
173            integer::SparseScheme.id(),
174            integer::IntRLEScheme.id(),
175            float::FloatRLEScheme.id(),
176            float::NullDominatedSparseScheme.id(),
177            string::StringDictScheme.id(),
178            string::FSSTScheme.id(),
179            binary::BinaryDictScheme.id(),
180        ];
181        #[cfg(feature = "unstable_encodings")]
182        excluded.push(string::OnPairScheme.id());
183        let builder = self.exclude_schemes(excluded);
184
185        #[cfg(all(feature = "zstd", feature = "unstable_encodings"))]
186        let builder = builder
187            .with_new_scheme(&string::ZstdBuffersScheme)
188            .with_new_scheme(&binary::ZstdBuffersScheme);
189        #[cfg(all(feature = "zstd", not(feature = "unstable_encodings")))]
190        let builder = builder
191            .with_new_scheme(&string::ZstdScheme)
192            .with_new_scheme(&binary::ZstdScheme);
193
194        builder
195    }
196
197    /// Removes the specified compression schemes by their [`SchemeId`].
198    pub fn exclude_schemes(mut self, ids: impl IntoIterator<Item = SchemeId>) -> Self {
199        let ids: HashSet<_> = ids.into_iter().collect();
200        self.schemes.retain(|s| !ids.contains(&s.id()));
201        self
202    }
203
204    /// Builds the configured [`BtrBlocksCompressor`].
205    pub fn build(self) -> BtrBlocksCompressor {
206        BtrBlocksCompressor(CascadingCompressor::new(self.schemes))
207    }
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    #[test]
215    fn empty_starts_with_no_schemes() {
216        let builder = BtrBlocksCompressorBuilder::empty();
217        assert!(builder.schemes.is_empty());
218    }
219
220    #[test]
221    fn default_includes_all_schemes() {
222        let builder = BtrBlocksCompressorBuilder::default();
223        assert_eq!(builder.schemes.len(), ALL_SCHEMES.len());
224    }
225}