Skip to main content

vortex_fastlanes/bitpacking/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use fastlanes::BitPacking;
5use vortex_array::Array;
6use vortex_array::arrays::PrimitiveVTable;
7use vortex_array::buffer::BufferHandle;
8use vortex_array::dtype::DType;
9use vortex_array::dtype::NativePType;
10use vortex_array::dtype::PType;
11use vortex_array::patches::Patches;
12use vortex_array::stats::ArrayStats;
13use vortex_array::validity::Validity;
14use vortex_error::VortexResult;
15use vortex_error::vortex_bail;
16use vortex_error::vortex_ensure;
17
18pub mod bitpack_compress;
19pub mod bitpack_decompress;
20pub mod unpack_iter;
21
22use crate::bitpack_compress::bitpack_encode;
23use crate::unpack_iter::BitPacked;
24use crate::unpack_iter::BitUnpackedChunks;
25
26pub struct BitPackedArrayParts {
27    pub offset: u16,
28    pub bit_width: u8,
29    pub len: usize,
30    pub packed: BufferHandle,
31    pub patches: Option<Patches>,
32    pub validity: Validity,
33}
34
35#[derive(Clone, Debug)]
36pub struct BitPackedArray {
37    /// The offset within the first block (created with a slice).
38    /// 0 <= offset < 1024
39    pub(super) offset: u16,
40    pub(super) len: usize,
41    pub(super) dtype: DType,
42    pub(super) bit_width: u8,
43    pub(super) packed: BufferHandle,
44    pub(super) patches: Option<Patches>,
45    pub(super) validity: Validity,
46    pub(super) stats_set: ArrayStats,
47}
48
49impl BitPackedArray {
50    /// Create a new bitpacked array using a buffer of packed data.
51    ///
52    /// The packed data should be interpreted as a sequence of values with size `bit_width`.
53    ///
54    /// # Errors
55    ///
56    /// This method returns errors if any of the metadata is inconsistent, for example the packed
57    /// buffer provided does not have the right size according to the supplied length and target
58    /// PType.
59    ///
60    /// # Safety
61    ///
62    /// For signed arrays, it is the caller's responsibility to ensure that there are no values
63    /// that can be interpreted once unpacked to the provided PType.
64    ///
65    /// This invariant is upheld by the compressor, but callers must ensure this if they wish to
66    /// construct a new `BitPackedArray` from parts.
67    ///
68    /// See also the [`encode`][Self::encode] method on this type for a safe path to create a new
69    /// bit-packed array.
70    pub(crate) unsafe fn new_unchecked(
71        packed: BufferHandle,
72        dtype: DType,
73        validity: Validity,
74        patches: Option<Patches>,
75        bit_width: u8,
76        len: usize,
77        offset: u16,
78    ) -> Self {
79        Self {
80            offset,
81            len,
82            dtype,
83            bit_width,
84            packed,
85            patches,
86            validity,
87            stats_set: Default::default(),
88        }
89    }
90
91    /// A safe constructor for a `BitPackedArray` from its components:
92    ///
93    /// * `packed` is ByteBuffer holding the compressed data that was packed with FastLanes
94    ///   bit-packing to a `bit_width` bits per value. `length` is the length of the original
95    ///   vector. Note that the packed is padded with zeros to the next multiple of 1024 elements
96    ///   if `length` is not divisible by 1024.
97    /// * `ptype` of the original data
98    /// * `validity` to track any nulls
99    /// * `patches` optionally provided for values that did not pack
100    ///
101    /// Any failure in validation will result in an error.
102    ///
103    /// # Validation
104    ///
105    /// * The `ptype` must be an integer
106    /// * `validity` must have `length` len
107    /// * Any patches must have any `array_len` equal to `length`
108    /// * The `packed` buffer must be exactly sized to hold `length` values of `bit_width` rounded
109    ///   up to the next multiple of 1024.
110    ///
111    /// Any violation of these preconditions will result in an error.
112    pub fn try_new(
113        packed: BufferHandle,
114        ptype: PType,
115        validity: Validity,
116        patches: Option<Patches>,
117        bit_width: u8,
118        length: usize,
119        offset: u16,
120    ) -> VortexResult<Self> {
121        Self::validate(
122            &packed,
123            ptype,
124            &validity,
125            patches.as_ref(),
126            bit_width,
127            length,
128            offset,
129        )?;
130
131        let dtype = DType::Primitive(ptype, validity.nullability());
132
133        // SAFETY: all components validated above
134        unsafe {
135            Ok(Self::new_unchecked(
136                packed, dtype, validity, patches, bit_width, length, offset,
137            ))
138        }
139    }
140
141    fn validate(
142        packed: &BufferHandle,
143        ptype: PType,
144        validity: &Validity,
145        patches: Option<&Patches>,
146        bit_width: u8,
147        length: usize,
148        offset: u16,
149    ) -> VortexResult<()> {
150        vortex_ensure!(ptype.is_int(), MismatchedTypes: "integer", ptype);
151        vortex_ensure!(bit_width <= 64, "Unsupported bit width {bit_width}");
152
153        if let Some(validity_len) = validity.maybe_len() {
154            vortex_ensure!(
155                validity_len == length,
156                "BitPackedArray validity length {validity_len} != array length {length}",
157            );
158        }
159
160        // Validate offset for sliced arrays
161        vortex_ensure!(
162            offset < 1024,
163            "Offset must be less than the full block i.e., 1024, got {offset}"
164        );
165
166        // Validate patches
167        if let Some(patches) = patches {
168            Self::validate_patches(patches, ptype, length)?;
169        }
170
171        // Validate packed buffer
172        let expected_packed_len =
173            (length + offset as usize).div_ceil(1024) * (128 * bit_width as usize);
174        vortex_ensure!(
175            packed.len() == expected_packed_len,
176            "Expected {} packed bytes, got {}",
177            expected_packed_len,
178            packed.len()
179        );
180
181        Ok(())
182    }
183
184    fn validate_patches(patches: &Patches, ptype: PType, len: usize) -> VortexResult<()> {
185        // Ensure that array and patches have same ptype
186        vortex_ensure!(
187            patches.dtype().eq_ignore_nullability(ptype.into()),
188            "Patches DType {} does not match BitPackedArray dtype {}",
189            patches.dtype().as_nonnullable(),
190            ptype
191        );
192
193        vortex_ensure!(
194            patches.array_len() == len,
195            "BitPackedArray patches length {} != expected {len}",
196            patches.array_len(),
197        );
198
199        Ok(())
200    }
201
202    pub fn ptype(&self) -> PType {
203        self.dtype.as_ptype()
204    }
205
206    /// Underlying bit packed values as byte array
207    #[inline]
208    pub fn packed(&self) -> &BufferHandle {
209        &self.packed
210    }
211
212    /// Access the slice of packed values as an array of `T`
213    #[inline]
214    pub fn packed_slice<T: NativePType + BitPacking>(&self) -> &[T] {
215        let packed_bytes = self.packed().as_host();
216        let packed_ptr: *const T = packed_bytes.as_ptr().cast();
217        // Return number of elements of type `T` packed in the buffer
218        let packed_len = packed_bytes.len() / size_of::<T>();
219
220        // SAFETY: as_slice points to buffer memory that outlives the lifetime of `self`.
221        //  Unfortunately Rust cannot understand this, so we reconstruct the slice from raw parts
222        //  to get it to reinterpret the lifetime.
223        unsafe { std::slice::from_raw_parts(packed_ptr, packed_len) }
224    }
225
226    /// Accessor for bit unpacked chunks
227    pub fn unpacked_chunks<T: BitPacked>(&self) -> BitUnpackedChunks<T> {
228        assert_eq!(
229            T::PTYPE,
230            self.ptype(),
231            "Requested type doesn't match the array ptype"
232        );
233        BitUnpackedChunks::new(self)
234    }
235
236    /// Bit-width of the packed values
237    #[inline]
238    pub fn bit_width(&self) -> u8 {
239        self.bit_width
240    }
241
242    /// Access the patches array.
243    ///
244    /// If present, patches MUST be a `SparseArray` with equal-length to this array, and whose
245    /// indices indicate the locations of patches. The indices must have non-zero length.
246    #[inline]
247    pub fn patches(&self) -> Option<&Patches> {
248        self.patches.as_ref()
249    }
250
251    pub fn replace_patches(&mut self, patches: Option<Patches>) {
252        self.patches = patches;
253    }
254
255    #[inline]
256    pub fn offset(&self) -> u16 {
257        self.offset
258    }
259
260    /// Bit-pack an array of primitive integers down to the target bit-width using the FastLanes
261    /// SIMD-accelerated packing kernels.
262    ///
263    /// # Errors
264    ///
265    /// If the provided array is not an integer type, an error will be returned.
266    ///
267    /// If the provided array contains negative values, an error will be returned.
268    ///
269    /// If the requested bit-width for packing is larger than the array's native width, an
270    /// error will be returned.
271    // FIXME(ngates): take a PrimitiveArray
272    pub fn encode(array: &dyn Array, bit_width: u8) -> VortexResult<Self> {
273        if let Some(parray) = array.as_opt::<PrimitiveVTable>() {
274            bitpack_encode(parray, bit_width, None)
275        } else {
276            vortex_bail!(InvalidArgument: "Bitpacking can only encode primitive arrays");
277        }
278    }
279
280    /// Calculate the maximum value that **can** be contained by this array, given its bit-width.
281    ///
282    /// Note that this value need not actually be present in the array.
283    #[inline]
284    pub fn max_packed_value(&self) -> usize {
285        (1 << self.bit_width()) - 1
286    }
287
288    pub fn into_parts(self) -> BitPackedArrayParts {
289        BitPackedArrayParts {
290            offset: self.offset,
291            bit_width: self.bit_width,
292            len: self.len,
293            packed: self.packed,
294            patches: self.patches,
295            validity: self.validity,
296        }
297    }
298}
299
300#[cfg(test)]
301mod test {
302    use vortex_array::IntoArray;
303    use vortex_array::ToCanonical;
304    use vortex_array::arrays::PrimitiveArray;
305    use vortex_array::assert_arrays_eq;
306    use vortex_buffer::Buffer;
307
308    use crate::BitPackedArray;
309
310    #[test]
311    fn test_encode() {
312        let values = [
313            Some(1u64),
314            None,
315            Some(1),
316            None,
317            Some(1),
318            None,
319            Some(u64::MAX),
320        ];
321        let uncompressed = PrimitiveArray::from_option_iter(values);
322        let packed = BitPackedArray::encode(uncompressed.as_ref(), 1).unwrap();
323        let expected = PrimitiveArray::from_option_iter(values);
324        assert_arrays_eq!(packed.to_primitive(), expected);
325    }
326
327    #[test]
328    fn test_encode_too_wide() {
329        let values = [Some(1u8), None, Some(1), None, Some(1), None];
330        let uncompressed = PrimitiveArray::from_option_iter(values);
331        let _packed = BitPackedArray::encode(uncompressed.as_ref(), 8)
332            .expect_err("Cannot pack value into the same width");
333        let _packed = BitPackedArray::encode(uncompressed.as_ref(), 9)
334            .expect_err("Cannot pack value into larger width");
335    }
336
337    #[test]
338    fn signed_with_patches() {
339        let values: Buffer<i32> = (0i32..=512).collect();
340        let parray = values.clone().into_array();
341
342        let packed_with_patches = BitPackedArray::encode(&parray, 9).unwrap();
343        assert!(packed_with_patches.patches().is_some());
344        assert_arrays_eq!(
345            packed_with_patches.to_primitive(),
346            PrimitiveArray::new(values, vortex_array::validity::Validity::NonNullable)
347        );
348    }
349}