vortex_fastlanes/bitpacking/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use fastlanes::BitPacking;
5use vortex_array::Array;
6use vortex_array::arrays::PrimitiveVTable;
7use vortex_array::patches::Patches;
8use vortex_array::stats::ArrayStats;
9use vortex_array::validity::Validity;
10use vortex_buffer::ByteBuffer;
11use vortex_dtype::DType;
12use vortex_dtype::NativePType;
13use vortex_dtype::PType;
14use vortex_error::VortexResult;
15use vortex_error::vortex_bail;
16use vortex_error::vortex_ensure;
17
18pub mod bitpack_compress;
19pub mod bitpack_decompress;
20pub mod unpack_iter;
21
22use crate::bitpack_compress::bitpack_encode;
23use crate::unpack_iter::BitPacked;
24use crate::unpack_iter::BitUnpackedChunks;
25
26#[derive(Clone, Debug)]
27pub struct BitPackedArray {
28    /// The offset within the first block (created with a slice).
29    /// 0 <= offset < 1024
30    pub(super) offset: u16,
31    pub(super) len: usize,
32    pub(super) dtype: DType,
33    pub(super) bit_width: u8,
34    pub(super) packed: ByteBuffer,
35    pub(super) patches: Option<Patches>,
36    pub(super) validity: Validity,
37    pub(super) stats_set: ArrayStats,
38}
39
40impl BitPackedArray {
41    /// Create a new bitpacked array using a buffer of packed data.
42    ///
43    /// The packed data should be interpreted as a sequence of values with size `bit_width`.
44    ///
45    /// # Errors
46    ///
47    /// This method returns errors if any of the metadata is inconsistent, for example the packed
48    /// buffer provided does not have the right size according to the supplied length and target
49    /// PType.
50    ///
51    /// # Safety
52    ///
53    /// For signed arrays, it is the caller's responsibility to ensure that there are no values
54    /// that can be interpreted once unpacked to the provided PType.
55    ///
56    /// This invariant is upheld by the compressor, but callers must ensure this if they wish to
57    /// construct a new `BitPackedArray` from parts.
58    ///
59    /// See also the [`encode`][Self::encode] method on this type for a safe path to create a new
60    /// bit-packed array.
61    pub(crate) unsafe fn new_unchecked(
62        packed: ByteBuffer,
63        dtype: DType,
64        validity: Validity,
65        patches: Option<Patches>,
66        bit_width: u8,
67        len: usize,
68        offset: u16,
69    ) -> Self {
70        Self {
71            offset,
72            len,
73            dtype,
74            bit_width,
75            packed,
76            patches,
77            validity,
78            stats_set: Default::default(),
79        }
80    }
81
82    /// A safe constructor for a `BitPackedArray` from its components:
83    ///
84    /// * `packed` is ByteBuffer holding the compressed data that was packed with FastLanes
85    ///   bit-packing to a `bit_width` bits per value. `length` is the length of the original
86    ///   vector. Note that the packed is padded with zeros to the next multiple of 1024 elements
87    ///   if `length` is not divisible by 1024.
88    /// * `ptype` of the original data
89    /// * `validity` to track any nulls
90    /// * `patches` optionally provided for values that did not pack
91    ///
92    /// Any failure in validation will result in an error.
93    ///
94    /// # Validation
95    ///
96    /// * The `ptype` must be an integer
97    /// * `validity` must have `length` len
98    /// * Any patches must have any `array_len` equal to `length`
99    /// * The `packed` buffer must be exactly sized to hold `length` values of `bit_width` rounded
100    ///   up to the next multiple of 1024.
101    ///
102    /// Any violation of these preconditions will result in an error.
103    pub fn try_new(
104        packed: ByteBuffer,
105        ptype: PType,
106        validity: Validity,
107        patches: Option<Patches>,
108        bit_width: u8,
109        length: usize,
110        offset: u16,
111    ) -> VortexResult<Self> {
112        Self::validate(
113            &packed,
114            ptype,
115            &validity,
116            patches.as_ref(),
117            bit_width,
118            length,
119            offset,
120        )?;
121
122        let dtype = DType::Primitive(ptype, validity.nullability());
123
124        // SAFETY: all components validated above
125        unsafe {
126            Ok(Self::new_unchecked(
127                packed, dtype, validity, patches, bit_width, length, offset,
128            ))
129        }
130    }
131
132    fn validate(
133        packed: &ByteBuffer,
134        ptype: PType,
135        validity: &Validity,
136        patches: Option<&Patches>,
137        bit_width: u8,
138        length: usize,
139        offset: u16,
140    ) -> VortexResult<()> {
141        vortex_ensure!(ptype.is_int(), MismatchedTypes: "integer", ptype);
142        vortex_ensure!(bit_width <= 64, "Unsupported bit width {bit_width}");
143
144        if let Some(validity_len) = validity.maybe_len() {
145            vortex_ensure!(
146                validity_len == length,
147                "BitPackedArray validity length {validity_len} != array length {length}",
148            );
149        }
150
151        // Validate offset for sliced arrays
152        vortex_ensure!(
153            offset < 1024,
154            "Offset must be less than the full block i.e., 1024, got {offset}"
155        );
156
157        // Validate patches
158        if let Some(patches) = patches {
159            Self::validate_patches(patches, ptype, length)?;
160        }
161
162        // Validate packed buffer
163        let expected_packed_len =
164            (length + offset as usize).div_ceil(1024) * (128 * bit_width as usize);
165        vortex_ensure!(
166            packed.len() == expected_packed_len,
167            "Expected {} packed bytes, got {}",
168            expected_packed_len,
169            packed.len()
170        );
171
172        Ok(())
173    }
174
175    fn validate_patches(patches: &Patches, ptype: PType, len: usize) -> VortexResult<()> {
176        // Ensure that array and patches have same ptype
177        vortex_ensure!(
178            patches.dtype().eq_ignore_nullability(ptype.into()),
179            "Patches DType {} does not match BitPackedArray dtype {}",
180            patches.dtype().as_nonnullable(),
181            ptype
182        );
183
184        vortex_ensure!(
185            patches.array_len() == len,
186            "BitPackedArray patches length {} != expected {len}",
187            patches.array_len(),
188        );
189
190        Ok(())
191    }
192
193    pub fn ptype(&self) -> PType {
194        self.dtype.as_ptype()
195    }
196
197    /// Underlying bit packed values as byte array
198    #[inline]
199    pub fn packed(&self) -> &ByteBuffer {
200        &self.packed
201    }
202
203    /// Access the slice of packed values as an array of `T`
204    #[inline]
205    pub fn packed_slice<T: NativePType + BitPacking>(&self) -> &[T] {
206        let packed_bytes = self.packed();
207        let packed_ptr: *const T = packed_bytes.as_ptr().cast();
208        // Return number of elements of type `T` packed in the buffer
209        let packed_len = packed_bytes.len() / size_of::<T>();
210
211        // SAFETY: as_slice points to buffer memory that outlives the lifetime of `self`.
212        //  Unfortunately Rust cannot understand this, so we reconstruct the slice from raw parts
213        //  to get it to reinterpret the lifetime.
214        unsafe { std::slice::from_raw_parts(packed_ptr, packed_len) }
215    }
216
217    /// Accessor for bit unpacked chunks
218    pub fn unpacked_chunks<T: BitPacked>(&self) -> BitUnpackedChunks<T> {
219        assert_eq!(
220            T::PTYPE,
221            self.ptype(),
222            "Requested type doesn't match the array ptype"
223        );
224        BitUnpackedChunks::new(self)
225    }
226
227    /// Bit-width of the packed values
228    #[inline]
229    pub fn bit_width(&self) -> u8 {
230        self.bit_width
231    }
232
233    /// Access the patches array.
234    ///
235    /// If present, patches MUST be a `SparseArray` with equal-length to this array, and whose
236    /// indices indicate the locations of patches. The indices must have non-zero length.
237    #[inline]
238    pub fn patches(&self) -> Option<&Patches> {
239        self.patches.as_ref()
240    }
241
242    pub fn replace_patches(&mut self, patches: Option<Patches>) {
243        self.patches = patches;
244    }
245
246    #[inline]
247    pub fn offset(&self) -> u16 {
248        self.offset
249    }
250
251    /// Bit-pack an array of primitive integers down to the target bit-width using the FastLanes
252    /// SIMD-accelerated packing kernels.
253    ///
254    /// # Errors
255    ///
256    /// If the provided array is not an integer type, an error will be returned.
257    ///
258    /// If the provided array contains negative values, an error will be returned.
259    ///
260    /// If the requested bit-width for packing is larger than the array's native width, an
261    /// error will be returned.
262    // FIXME(ngates): take a PrimitiveArray
263    pub fn encode(array: &dyn Array, bit_width: u8) -> VortexResult<Self> {
264        if let Some(parray) = array.as_opt::<PrimitiveVTable>() {
265            bitpack_encode(parray, bit_width, None)
266        } else {
267            vortex_bail!("Bitpacking can only encode primitive arrays");
268        }
269    }
270
271    /// Calculate the maximum value that **can** be contained by this array, given its bit-width.
272    ///
273    /// Note that this value need not actually be present in the array.
274    #[inline]
275    pub fn max_packed_value(&self) -> usize {
276        (1 << self.bit_width()) - 1
277    }
278}
279
280#[cfg(test)]
281mod test {
282    use vortex_array::IntoArray;
283    use vortex_array::ToCanonical;
284    use vortex_array::arrays::PrimitiveArray;
285    use vortex_buffer::Buffer;
286
287    use crate::BitPackedArray;
288
289    // #[cfg_attr(miri, ignore)]
290    // #[test]
291    // fn test_bitpacked_metadata() {
292    //     check_metadata(
293    //         "bitpacked.metadata",
294    //         RkyvMetadata(BitPackedMetadata {
295    //             patches: Some(PatchesMetadata::new(usize::MAX, usize::MAX, PType::U64)),
296    //             validity: ValidityMetadata::AllValid,
297    //             offset: u16::MAX,
298    //             bit_width: u8::MAX,
299    //         }),
300    //     );
301    // }
302
303    #[test]
304    fn test_encode() {
305        let values = [Some(1), None, Some(1), None, Some(1), None, Some(u64::MAX)];
306        let uncompressed = PrimitiveArray::from_option_iter(values);
307        let packed = BitPackedArray::encode(uncompressed.as_ref(), 1).unwrap();
308        let expected = &[1, 0, 1, 0, 1, 0, u64::MAX];
309        let results = packed.to_primitive().as_slice::<u64>().to_vec();
310        assert_eq!(results, expected);
311    }
312
313    #[test]
314    fn test_encode_too_wide() {
315        let values = [Some(1u8), None, Some(1), None, Some(1), None];
316        let uncompressed = PrimitiveArray::from_option_iter(values);
317        let _packed = BitPackedArray::encode(uncompressed.as_ref(), 8)
318            .expect_err("Cannot pack value into the same width");
319        let _packed = BitPackedArray::encode(uncompressed.as_ref(), 9)
320            .expect_err("Cannot pack value into larger width");
321    }
322
323    #[test]
324    fn signed_with_patches() {
325        let values: Buffer<i32> = (0i32..=512).collect();
326        let parray = values.clone().into_array();
327
328        let packed_with_patches = BitPackedArray::encode(&parray, 9).unwrap();
329        assert!(packed_with_patches.patches().is_some());
330        assert_eq!(
331            packed_with_patches.to_primitive().as_slice::<i32>(),
332            values.as_slice()
333        );
334    }
335}