vortex_fastlanes/bitpacking/
mod.rs1use std::fmt::Debug;
2
3pub use compress::*;
4use fastlanes::BitPacking;
5use vortex_array::arrays::PrimitiveArray;
6use vortex_array::builders::ArrayBuilder;
7use vortex_array::patches::Patches;
8use vortex_array::stats::{ArrayStats, StatsSetRef};
9use vortex_array::validity::Validity;
10use vortex_array::variants::PrimitiveArrayTrait;
11use vortex_array::vtable::VTableRef;
12use vortex_array::{
13    Array, ArrayCanonicalImpl, ArrayExt, ArrayImpl, ArrayRef, ArrayStatisticsImpl,
14    ArrayValidityImpl, ArrayVariantsImpl, Canonical, Encoding, RkyvMetadata, try_from_array_ref,
15};
16use vortex_buffer::ByteBuffer;
17use vortex_dtype::{DType, NativePType, PType, match_each_integer_ptype_with_unsigned_type};
18use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
19use vortex_mask::Mask;
20
21use crate::bitpacking::serde::BitPackedMetadata;
22
23mod compress;
24mod compute;
25mod serde;
26
27#[derive(Clone, Debug)]
28pub struct BitPackedArray {
29    offset: u16,
30    len: usize,
31    dtype: DType,
32    bit_width: u8,
33    packed: ByteBuffer,
34    patches: Option<Patches>,
35    validity: Validity,
36    stats_set: ArrayStats,
37}
38
39try_from_array_ref!(BitPackedArray);
40
41pub struct BitPackedEncoding;
42impl Encoding for BitPackedEncoding {
43    type Array = BitPackedArray;
44    type Metadata = RkyvMetadata<BitPackedMetadata>;
45}
46
47impl BitPackedArray {
49    pub unsafe fn new_unchecked(
70        packed: ByteBuffer,
71        ptype: PType,
72        validity: Validity,
73        patches: Option<Patches>,
74        bit_width: u8,
75        len: usize,
76    ) -> VortexResult<Self> {
77        unsafe {
79            Self::new_unchecked_with_offset(packed, ptype, validity, patches, bit_width, len, 0)
80        }
81    }
82
83    pub(crate) unsafe fn new_unchecked_with_offset(
87        packed: ByteBuffer,
88        ptype: PType,
89        validity: Validity,
90        patches: Option<Patches>,
91        bit_width: u8,
92        length: usize,
93        offset: u16,
94    ) -> VortexResult<Self> {
95        let dtype = DType::Primitive(ptype, validity.nullability());
96        if !dtype.is_int() {
97            vortex_bail!(MismatchedTypes: "integer", dtype);
98        }
99
100        if bit_width > u64::BITS as u8 {
101            vortex_bail!("Unsupported bit width {}", bit_width);
102        }
103        if offset > 1023 {
104            vortex_bail!(
105                "Offset must be less than full block, i.e. 1024, got {}",
106                offset
107            );
108        }
109
110        if let Some(ref patches) = patches {
111            if !patches.dtype().eq_ignore_nullability(ptype.into()) {
113                vortex_bail!(
114                    "Patches DType {} does not match BitPackedArray dtype {}",
115                    patches.dtype().as_nonnullable(),
116                    ptype
117                )
118            }
119        }
120
121        let expected_packed_size =
123            (length + offset as usize).div_ceil(1024) * (128 * bit_width as usize);
124        if packed.len() != expected_packed_size {
125            return Err(vortex_err!(
126                "Expected {} packed bytes, got {}",
127                expected_packed_size,
128                packed.len()
129            ));
130        }
131
132        Ok(Self {
137            offset,
138            len: length,
139            dtype,
140            bit_width,
141            packed,
142            patches,
143            validity,
144            stats_set: Default::default(),
145        })
146    }
147
148    #[inline]
149    pub fn packed(&self) -> &ByteBuffer {
150        &self.packed
151    }
152
153    #[inline]
155    pub fn packed_slice<T: NativePType + BitPacking>(&self) -> &[T] {
156        let packed_bytes = self.packed();
157        let packed_ptr: *const T = packed_bytes.as_ptr().cast();
158        let packed_len = packed_bytes.len() / size_of::<T>();
160
161        unsafe { std::slice::from_raw_parts(packed_ptr, packed_len) }
165    }
166
167    #[inline]
168    pub fn bit_width(&self) -> u8 {
169        self.bit_width
170    }
171
172    #[inline]
177    pub fn patches(&self) -> Option<&Patches> {
178        self.patches.as_ref()
179    }
180
181    pub fn replace_patches(&mut self, patches: Option<Patches>) {
182        self.patches = patches;
183    }
184
185    #[inline]
186    pub fn offset(&self) -> u16 {
187        self.offset
188    }
189
190    pub fn validity(&self) -> &Validity {
191        &self.validity
192    }
193
194    pub fn encode(array: &dyn Array, bit_width: u8) -> VortexResult<Self> {
206        if let Some(parray) = array.as_opt::<PrimitiveArray>() {
207            bitpack_encode(parray, bit_width)
208        } else {
209            vortex_bail!("Bitpacking can only encode primitive arrays");
210        }
211    }
212
213    #[inline]
217    fn max_packed_value(&self) -> usize {
218        (1 << self.bit_width()) - 1
219    }
220}
221
222impl ArrayImpl for BitPackedArray {
223    type Encoding = BitPackedEncoding;
224
225    fn _len(&self) -> usize {
226        self.len
227    }
228
229    fn _dtype(&self) -> &DType {
230        &self.dtype
231    }
232
233    fn _vtable(&self) -> VTableRef {
234        VTableRef::new_ref(&BitPackedEncoding)
235    }
236
237    fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
238        let patches = self.patches().map(|existing| {
239            let indices = children[0].clone();
240            let values = children[1].clone();
241            Patches::new(existing.array_len(), existing.offset(), indices, values)
242        });
243
244        let validity = if self.validity().is_array() {
245            Validity::Array(children[children.len() - 1].clone())
246        } else {
247            self.validity().clone()
248        };
249
250        unsafe {
251            Self::new_unchecked_with_offset(
252                self.packed().clone(),
253                self.ptype(),
254                validity,
255                patches,
256                self.bit_width(),
257                self.len(),
258                self.offset(),
259            )
260        }
261    }
262}
263
264impl ArrayCanonicalImpl for BitPackedArray {
265    fn _to_canonical(&self) -> VortexResult<Canonical> {
266        unpack(self).map(Canonical::Primitive)
267    }
268
269    fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
270        match_each_integer_ptype_with_unsigned_type!(self.ptype(), |$T, $UnsignedT| {
271            unpack_into::<$T, $UnsignedT, _, _>(
272                self,
273                builder
274                    .as_any_mut()
275                    .downcast_mut()
276                    .vortex_expect("bit packed array must canonicalize into a primitive array"),
277                |x| unsafe { std::mem::transmute(x) },
280                |x| unsafe { std::mem::transmute(x) },
283            )
284        })
285    }
286}
287
288impl ArrayStatisticsImpl for BitPackedArray {
289    fn _stats_ref(&self) -> StatsSetRef<'_> {
290        self.stats_set.to_ref(self)
291    }
292}
293
294impl ArrayValidityImpl for BitPackedArray {
295    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
296        self.validity.is_valid(index)
297    }
298
299    fn _all_valid(&self) -> VortexResult<bool> {
300        self.validity.all_valid()
301    }
302
303    fn _all_invalid(&self) -> VortexResult<bool> {
304        self.validity.all_invalid()
305    }
306
307    fn _validity_mask(&self) -> VortexResult<Mask> {
308        self.validity.to_mask(self.len())
309    }
310}
311
312impl ArrayVariantsImpl for BitPackedArray {
313    fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
314        Some(self)
315    }
316}
317
318impl PrimitiveArrayTrait for BitPackedArray {}
319
320#[cfg(test)]
321mod test {
322    use vortex_array::arrays::PrimitiveArray;
323    use vortex_array::{IntoArray, ToCanonical};
324    use vortex_buffer::Buffer;
325
326    use crate::BitPackedArray;
327
328    #[test]
343    fn test_encode() {
344        let values = [Some(1), None, Some(1), None, Some(1), None, Some(u64::MAX)];
345        let uncompressed = PrimitiveArray::from_option_iter(values);
346        let packed = BitPackedArray::encode(&uncompressed, 1).unwrap();
347        let expected = &[1, 0, 1, 0, 1, 0, u64::MAX];
348        let results = packed.to_primitive().unwrap().as_slice::<u64>().to_vec();
349        assert_eq!(results, expected);
350    }
351
352    #[test]
353    fn test_encode_too_wide() {
354        let values = [Some(1u8), None, Some(1), None, Some(1), None];
355        let uncompressed = PrimitiveArray::from_option_iter(values);
356        let _packed = BitPackedArray::encode(&uncompressed, 8)
357            .expect_err("Cannot pack value into the same width");
358        let _packed = BitPackedArray::encode(&uncompressed, 9)
359            .expect_err("Cannot pack value into larger width");
360    }
361
362    #[test]
363    fn signed_with_patches() {
364        let values: Buffer<i32> = (0i32..=512).collect();
365        let parray = values.clone().into_array();
366
367        let packed_with_patches = BitPackedArray::encode(&parray, 9).unwrap();
368        assert!(packed_with_patches.patches().is_some());
369        assert_eq!(
370            packed_with_patches
371                .to_primitive()
372                .unwrap()
373                .as_slice::<i32>(),
374            values.as_slice()
375        );
376    }
377}