vortex_fastlanes/bitpacking/
mod.rs

1use std::fmt::Debug;
2
3pub use compress::*;
4use fastlanes::BitPacking;
5use vortex_array::arrays::PrimitiveArray;
6use vortex_array::builders::ArrayBuilder;
7use vortex_array::patches::Patches;
8use vortex_array::stats::{ArrayStats, StatsSetRef};
9use vortex_array::validity::Validity;
10use vortex_array::variants::PrimitiveArrayTrait;
11use vortex_array::vtable::VTableRef;
12use vortex_array::{
13    Array, ArrayCanonicalImpl, ArrayExt, ArrayImpl, ArrayRef, ArrayStatisticsImpl,
14    ArrayValidityImpl, ArrayVariantsImpl, Canonical, Encoding, RkyvMetadata, try_from_array_ref,
15};
16use vortex_buffer::ByteBuffer;
17use vortex_dtype::{DType, NativePType, PType, match_each_integer_ptype_with_unsigned_type};
18use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
19use vortex_mask::Mask;
20
21use crate::bitpacking::serde::BitPackedMetadata;
22
23mod compress;
24mod compute;
25mod serde;
26
27#[derive(Clone, Debug)]
28pub struct BitPackedArray {
29    offset: u16,
30    len: usize,
31    dtype: DType,
32    bit_width: u8,
33    packed: ByteBuffer,
34    patches: Option<Patches>,
35    validity: Validity,
36    stats_set: ArrayStats,
37}
38
39try_from_array_ref!(BitPackedArray);
40
41pub struct BitPackedEncoding;
42impl Encoding for BitPackedEncoding {
43    type Array = BitPackedArray;
44    type Metadata = RkyvMetadata<BitPackedMetadata>;
45}
46
47/// NB: All non-null values in the patches array are considered patches
48impl BitPackedArray {
49    /// Create a new bitpacked array using a buffer of packed data.
50    ///
51    /// The packed data should be interpreted as a sequence of values with size `bit_width`.
52    ///
53    /// # Errors
54    ///
55    /// This method returns errors if any of the metadata is inconsistent, for example the packed
56    /// buffer provided does not have the right size according to the supplied length and target
57    /// PType.
58    ///
59    /// # Safety
60    ///
61    /// For signed arrays, it is the caller's responsibility to ensure that there are no values
62    /// that can be interpreted once unpacked to the provided PType.
63    ///
64    /// This invariant is upheld by the compressor, but callers must ensure this if they wish to
65    /// construct a new `BitPackedArray` from parts.
66    ///
67    /// See also the [`encode`][Self::encode] method on this type for a safe path to create a new
68    /// bit-packed array.
69    pub unsafe fn new_unchecked(
70        packed: ByteBuffer,
71        ptype: PType,
72        validity: Validity,
73        patches: Option<Patches>,
74        bit_width: u8,
75        len: usize,
76    ) -> VortexResult<Self> {
77        // SAFETY: checked by caller.
78        unsafe {
79            Self::new_unchecked_with_offset(packed, ptype, validity, patches, bit_width, len, 0)
80        }
81    }
82
83    /// An unsafe constructor for a `BitPackedArray` that also specifies a slicing offset.
84    ///
85    /// See also [`new_unchecked`][Self::new_unchecked].
86    pub(crate) unsafe fn new_unchecked_with_offset(
87        packed: ByteBuffer,
88        ptype: PType,
89        validity: Validity,
90        patches: Option<Patches>,
91        bit_width: u8,
92        length: usize,
93        offset: u16,
94    ) -> VortexResult<Self> {
95        let dtype = DType::Primitive(ptype, validity.nullability());
96        if !dtype.is_int() {
97            vortex_bail!(MismatchedTypes: "integer", dtype);
98        }
99
100        if bit_width > u64::BITS as u8 {
101            vortex_bail!("Unsupported bit width {}", bit_width);
102        }
103        if offset > 1023 {
104            vortex_bail!(
105                "Offset must be less than full block, i.e. 1024, got {}",
106                offset
107            );
108        }
109
110        if let Some(ref patches) = patches {
111            // Ensure that array and patches have same PType
112            if !patches.dtype().eq_ignore_nullability(ptype.into()) {
113                vortex_bail!(
114                    "Patches DType {} does not match BitPackedArray dtype {}",
115                    patches.dtype().as_nonnullable(),
116                    ptype
117                )
118            }
119        }
120
121        // expected packed size is in bytes
122        let expected_packed_size =
123            (length + offset as usize).div_ceil(1024) * (128 * bit_width as usize);
124        if packed.len() != expected_packed_size {
125            return Err(vortex_err!(
126                "Expected {} packed bytes, got {}",
127                expected_packed_size,
128                packed.len()
129            ));
130        }
131
132        // TODO(ngates): enforce 128 byte alignment once we have a BufferBuilder that can
133        //  enforce custom alignments.
134        // let packed = ByteBuffer::new_with_alignment(packed, FASTLANES_ALIGNMENT);
135
136        Ok(Self {
137            offset,
138            len: length,
139            dtype,
140            bit_width,
141            packed,
142            patches,
143            validity,
144            stats_set: Default::default(),
145        })
146    }
147
148    #[inline]
149    pub fn packed(&self) -> &ByteBuffer {
150        &self.packed
151    }
152
153    /// Access the slice of packed values as an array of `T`
154    #[inline]
155    pub fn packed_slice<T: NativePType + BitPacking>(&self) -> &[T] {
156        let packed_bytes = self.packed();
157        let packed_ptr: *const T = packed_bytes.as_ptr().cast();
158        // Return number of elements of type `T` packed in the buffer
159        let packed_len = packed_bytes.len() / size_of::<T>();
160
161        // SAFETY: as_slice points to buffer memory that outlives the lifetime of `self`.
162        //  Unfortunately Rust cannot understand this, so we reconstruct the slice from raw parts
163        //  to get it to reinterpret the lifetime.
164        unsafe { std::slice::from_raw_parts(packed_ptr, packed_len) }
165    }
166
167    #[inline]
168    pub fn bit_width(&self) -> u8 {
169        self.bit_width
170    }
171
172    /// Access the patches array.
173    ///
174    /// If present, patches MUST be a `SparseArray` with equal-length to this array, and whose
175    /// indices indicate the locations of patches. The indices must have non-zero length.
176    #[inline]
177    pub fn patches(&self) -> Option<&Patches> {
178        self.patches.as_ref()
179    }
180
181    pub fn replace_patches(&mut self, patches: Option<Patches>) {
182        self.patches = patches;
183    }
184
185    #[inline]
186    pub fn offset(&self) -> u16 {
187        self.offset
188    }
189
190    pub fn validity(&self) -> &Validity {
191        &self.validity
192    }
193
194    /// Bit-pack an array of primitive integers down to the target bit-width using the FastLanes
195    /// SIMD-accelerated packing kernels.
196    ///
197    /// # Errors
198    ///
199    /// If the provided array is not an integer type, an error will be returned.
200    ///
201    /// If the provided array contains negative values, an error will be returned.
202    ///
203    /// If the requested bit-width for packing is larger than the array's native width, an
204    /// error will be returned.
205    pub fn encode(array: &dyn Array, bit_width: u8) -> VortexResult<Self> {
206        if let Some(parray) = array.as_opt::<PrimitiveArray>() {
207            bitpack_encode(parray, bit_width)
208        } else {
209            vortex_bail!("Bitpacking can only encode primitive arrays");
210        }
211    }
212
213    /// Calculate the maximum value that **can** be contained by this array, given its bit-width.
214    ///
215    /// Note that this value need not actually be present in the array.
216    #[inline]
217    fn max_packed_value(&self) -> usize {
218        (1 << self.bit_width()) - 1
219    }
220}
221
222impl ArrayImpl for BitPackedArray {
223    type Encoding = BitPackedEncoding;
224
225    fn _len(&self) -> usize {
226        self.len
227    }
228
229    fn _dtype(&self) -> &DType {
230        &self.dtype
231    }
232
233    fn _vtable(&self) -> VTableRef {
234        VTableRef::new_ref(&BitPackedEncoding)
235    }
236
237    fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
238        let patches = self.patches().map(|existing| {
239            let indices = children[0].clone();
240            let values = children[1].clone();
241            Patches::new(existing.array_len(), existing.offset(), indices, values)
242        });
243
244        let validity = if self.validity().is_array() {
245            Validity::Array(children[children.len() - 1].clone())
246        } else {
247            self.validity().clone()
248        };
249
250        unsafe {
251            Self::new_unchecked_with_offset(
252                self.packed().clone(),
253                self.ptype(),
254                validity,
255                patches,
256                self.bit_width(),
257                self.len(),
258                self.offset(),
259            )
260        }
261    }
262}
263
264impl ArrayCanonicalImpl for BitPackedArray {
265    fn _to_canonical(&self) -> VortexResult<Canonical> {
266        unpack(self).map(Canonical::Primitive)
267    }
268
269    fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
270        match_each_integer_ptype_with_unsigned_type!(self.ptype(), |$T, $UnsignedT| {
271            unpack_into::<$T, $UnsignedT, _, _>(
272                self,
273                builder
274                    .as_any_mut()
275                    .downcast_mut()
276                    .vortex_expect("bit packed array must canonicalize into a primitive array"),
277                // SAFETY: UnsignedT is the unsigned version of T, reinterpreting &[UnsignedT] to
278                // &[T] is therefore safe.
279                |x| unsafe { std::mem::transmute(x) },
280                // SAFETY: UnsignedT is the unsigned version of T, reinterpreting &mut [T] to
281                // &mut [UnsignedT] is therefore safe.
282                |x| unsafe { std::mem::transmute(x) },
283            )
284        })
285    }
286}
287
288impl ArrayStatisticsImpl for BitPackedArray {
289    fn _stats_ref(&self) -> StatsSetRef<'_> {
290        self.stats_set.to_ref(self)
291    }
292}
293
294impl ArrayValidityImpl for BitPackedArray {
295    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
296        self.validity.is_valid(index)
297    }
298
299    fn _all_valid(&self) -> VortexResult<bool> {
300        self.validity.all_valid()
301    }
302
303    fn _all_invalid(&self) -> VortexResult<bool> {
304        self.validity.all_invalid()
305    }
306
307    fn _validity_mask(&self) -> VortexResult<Mask> {
308        self.validity.to_mask(self.len())
309    }
310}
311
312impl ArrayVariantsImpl for BitPackedArray {
313    fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
314        Some(self)
315    }
316}
317
318impl PrimitiveArrayTrait for BitPackedArray {}
319
320#[cfg(test)]
321mod test {
322    use vortex_array::arrays::PrimitiveArray;
323    use vortex_array::{IntoArray, ToCanonical};
324    use vortex_buffer::Buffer;
325
326    use crate::BitPackedArray;
327
328    // #[cfg_attr(miri, ignore)]
329    // #[test]
330    // fn test_bitpacked_metadata() {
331    //     check_metadata(
332    //         "bitpacked.metadata",
333    //         RkyvMetadata(BitPackedMetadata {
334    //             patches: Some(PatchesMetadata::new(usize::MAX, usize::MAX, PType::U64)),
335    //             validity: ValidityMetadata::AllValid,
336    //             offset: u16::MAX,
337    //             bit_width: u8::MAX,
338    //         }),
339    //     );
340    // }
341
342    #[test]
343    fn test_encode() {
344        let values = [Some(1), None, Some(1), None, Some(1), None, Some(u64::MAX)];
345        let uncompressed = PrimitiveArray::from_option_iter(values);
346        let packed = BitPackedArray::encode(&uncompressed, 1).unwrap();
347        let expected = &[1, 0, 1, 0, 1, 0, u64::MAX];
348        let results = packed.to_primitive().unwrap().as_slice::<u64>().to_vec();
349        assert_eq!(results, expected);
350    }
351
352    #[test]
353    fn test_encode_too_wide() {
354        let values = [Some(1u8), None, Some(1), None, Some(1), None];
355        let uncompressed = PrimitiveArray::from_option_iter(values);
356        let _packed = BitPackedArray::encode(&uncompressed, 8)
357            .expect_err("Cannot pack value into the same width");
358        let _packed = BitPackedArray::encode(&uncompressed, 9)
359            .expect_err("Cannot pack value into larger width");
360    }
361
362    #[test]
363    fn signed_with_patches() {
364        let values: Buffer<i32> = (0i32..=512).collect();
365        let parray = values.clone().into_array();
366
367        let packed_with_patches = BitPackedArray::encode(&parray, 9).unwrap();
368        assert!(packed_with_patches.patches().is_some());
369        assert_eq!(
370            packed_with_patches
371                .to_primitive()
372                .unwrap()
373                .as_slice::<i32>(),
374            values.as_slice()
375        );
376    }
377}