Skip to main content

vortex_btrblocks/
canonical_compressor.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! BtrBlocks-specific compressor wrapping the generic [`CascadingCompressor`].
5
6use std::ops::Deref;
7
8use vortex_array::ArrayRef;
9use vortex_array::ExecutionCtx;
10use vortex_error::VortexResult;
11
12use crate::BtrBlocksCompressorBuilder;
13use crate::CascadingCompressor;
14
15/// The BtrBlocks-style compressor with all built-in schemes pre-registered.
16///
17/// This is a thin wrapper around [`CascadingCompressor`] that provides a default set of
18/// compression schemes via [`BtrBlocksCompressorBuilder`].
19///
20/// # Examples
21///
22/// ```rust
23/// use vortex_btrblocks::{BtrBlocksCompressor, BtrBlocksCompressorBuilder, Scheme, SchemeExt};
24/// use vortex_btrblocks::schemes::integer::IntDictScheme;
25///
26/// // Default compressor - all schemes allowed.
27/// let compressor = BtrBlocksCompressor::default();
28///
29/// // Remove specific schemes using the builder.
30/// let compressor = BtrBlocksCompressorBuilder::default()
31///     .exclude_schemes([IntDictScheme.id()])
32///     .build();
33/// ```
34#[derive(Clone)]
35pub struct BtrBlocksCompressor(
36    /// The underlying cascading compressor.
37    pub CascadingCompressor,
38);
39
40impl BtrBlocksCompressor {
41    /// Compresses an array using BtrBlocks-inspired compression.
42    pub fn compress(&self, array: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
43        self.0.compress(array, ctx)
44    }
45}
46
47impl Deref for BtrBlocksCompressor {
48    type Target = CascadingCompressor;
49
50    fn deref(&self) -> &CascadingCompressor {
51        &self.0
52    }
53}
54
55impl Default for BtrBlocksCompressor {
56    fn default() -> Self {
57        BtrBlocksCompressorBuilder::default().build()
58    }
59}
60
61#[cfg(test)]
62mod tests {
63    use std::sync::LazyLock;
64
65    use rstest::rstest;
66    use vortex_array::IntoArray;
67    use vortex_array::VortexSessionExecute;
68    use vortex_array::arrays::BoolArray;
69    use vortex_array::arrays::Constant;
70    use vortex_array::arrays::Dict;
71    use vortex_array::arrays::List;
72    use vortex_array::arrays::ListView;
73    use vortex_array::arrays::ListViewArray;
74    use vortex_array::arrays::VarBinViewArray;
75    use vortex_array::assert_arrays_eq;
76    use vortex_array::dtype::DType;
77    use vortex_array::dtype::Nullability;
78    use vortex_array::session::ArraySession;
79    use vortex_array::validity::Validity;
80    use vortex_buffer::BitBuffer;
81    use vortex_buffer::buffer;
82    use vortex_error::VortexResult;
83    use vortex_session::VortexSession;
84
85    use crate::BtrBlocksCompressor;
86
87    static SESSION: LazyLock<VortexSession> =
88        LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
89
90    #[rstest]
91    #[case::zctl(
92        unsafe {
93            ListViewArray::new_unchecked(
94                buffer![1i32, 2, 3, 4, 5].into_array(),
95                buffer![0i32, 3].into_array(),
96                buffer![3i32, 2].into_array(),
97                Validity::NonNullable,
98            ).with_zero_copy_to_list(true)
99        },
100        true,
101    )]
102    #[case::overlapping(
103        ListViewArray::new(
104            buffer![1i32, 2, 3].into_array(),
105            buffer![0i32, 0, 0].into_array(),
106            buffer![3i32, 3, 3].into_array(),
107            Validity::NonNullable,
108        ),
109        false,
110    )]
111    fn listview_compress_roundtrip(
112        #[case] input: ListViewArray,
113        #[case] expect_list: bool,
114    ) -> VortexResult<()> {
115        let array_ref = input.clone().into_array();
116        let result = BtrBlocksCompressor::default()
117            .compress(&array_ref, &mut SESSION.create_execution_ctx())?;
118        if expect_list {
119            assert!(result.as_opt::<List>().is_some());
120        } else {
121            assert!(result.as_opt::<ListView>().is_some());
122        }
123        assert_arrays_eq!(result, input);
124        Ok(())
125    }
126
127    #[test]
128    fn test_constant_all_true() -> VortexResult<()> {
129        let array = BoolArray::new(BitBuffer::from(vec![true; 100]), Validity::NonNullable);
130        let btr = BtrBlocksCompressor::default();
131        let compressed = btr.compress(
132            &array.clone().into_array(),
133            &mut SESSION.create_execution_ctx(),
134        )?;
135        assert!(compressed.is::<Constant>());
136        assert_arrays_eq!(compressed, array);
137        Ok(())
138    }
139
140    #[test]
141    fn test_constant_all_false() -> VortexResult<()> {
142        let array = BoolArray::new(BitBuffer::from(vec![false; 100]), Validity::NonNullable);
143        let btr = BtrBlocksCompressor::default();
144        let compressed = btr.compress(
145            &array.clone().into_array(),
146            &mut SESSION.create_execution_ctx(),
147        )?;
148        assert!(compressed.is::<Constant>());
149        assert_arrays_eq!(compressed, array);
150        Ok(())
151    }
152
153    #[test]
154    fn test_nullable_all_valid_compressed() -> VortexResult<()> {
155        let array = BoolArray::new(
156            BitBuffer::from(vec![true; 100]),
157            Validity::from(BitBuffer::from(vec![true; 100])),
158        );
159        let btr = BtrBlocksCompressor::default();
160        let compressed = btr.compress(
161            &array.clone().into_array(),
162            &mut SESSION.create_execution_ctx(),
163        )?;
164        assert!(compressed.is::<Constant>());
165        assert_arrays_eq!(compressed, array);
166        Ok(())
167    }
168
169    #[test]
170    fn test_nullable_with_nulls_not_compressed() -> VortexResult<()> {
171        let validity = Validity::from(BitBuffer::from_iter((0..100).map(|i| i % 3 != 0)));
172        let array = BoolArray::new(BitBuffer::from(vec![true; 100]), validity);
173        let btr = BtrBlocksCompressor::default();
174        let compressed = btr.compress(
175            &array.clone().into_array(),
176            &mut SESSION.create_execution_ctx(),
177        )?;
178        assert!(!compressed.is::<Constant>());
179        assert_arrays_eq!(compressed, array);
180        Ok(())
181    }
182
183    #[test]
184    fn test_mixed_not_constant() -> VortexResult<()> {
185        let array = BoolArray::new(
186            BitBuffer::from(vec![true, false, true, false, true]),
187            Validity::NonNullable,
188        );
189        let btr = BtrBlocksCompressor::default();
190        let compressed = btr.compress(
191            &array.clone().into_array(),
192            &mut SESSION.create_execution_ctx(),
193        )?;
194        assert!(!compressed.is::<Constant>());
195        assert_arrays_eq!(compressed, array);
196        Ok(())
197    }
198
199    #[test]
200    fn test_binary_constant_compressed() -> VortexResult<()> {
201        let values = vec![Some(b"constant-bytes".as_slice()); 100];
202        let array = VarBinViewArray::from_iter(values, DType::Binary(Nullability::NonNullable));
203        let btr = BtrBlocksCompressor::default();
204        let compressed = btr.compress(
205            &array.clone().into_array(),
206            &mut SESSION.create_execution_ctx(),
207        )?;
208        assert!(compressed.is::<Constant>());
209        assert_arrays_eq!(compressed, array);
210        Ok(())
211    }
212
213    #[test]
214    fn test_binary_dict_compressed() -> VortexResult<()> {
215        let distinct_values: [&[u8]; 3] = [b"alpha", b"beta", b"gamma"];
216        let values = (0..1000)
217            .map(|idx| Some(distinct_values[idx % distinct_values.len()]))
218            .collect::<Vec<_>>();
219        let array = VarBinViewArray::from_iter(values, DType::Binary(Nullability::NonNullable));
220        let btr = BtrBlocksCompressor::default();
221        let compressed = btr.compress(
222            &array.clone().into_array(),
223            &mut SESSION.create_execution_ctx(),
224        )?;
225        assert!(compressed.is::<Dict>());
226        assert_arrays_eq!(compressed, array);
227        Ok(())
228    }
229}