vortex_fastlanes/bitpacking/
mod.rs1use std::fmt::Debug;
2
3pub use compress::*;
4use fastlanes::BitPacking;
5use vortex_array::arrays::PrimitiveArray;
6use vortex_array::builders::ArrayBuilder;
7use vortex_array::patches::Patches;
8use vortex_array::stats::{ArrayStats, StatsSetRef};
9use vortex_array::validity::Validity;
10use vortex_array::variants::PrimitiveArrayTrait;
11use vortex_array::vtable::VTableRef;
12use vortex_array::{
13 Array, ArrayCanonicalImpl, ArrayExt, ArrayImpl, ArrayRef, ArrayStatisticsImpl,
14 ArrayValidityImpl, ArrayVariantsImpl, Canonical, Encoding, RkyvMetadata, try_from_array_ref,
15};
16use vortex_buffer::ByteBuffer;
17use vortex_dtype::{DType, NativePType, PType, match_each_integer_ptype_with_unsigned_type};
18use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
19use vortex_mask::Mask;
20
21use crate::bitpacking::serde::BitPackedMetadata;
22
23mod compress;
24mod compute;
25mod serde;
26
27#[derive(Clone, Debug)]
28pub struct BitPackedArray {
29 offset: u16,
30 len: usize,
31 dtype: DType,
32 bit_width: u8,
33 packed: ByteBuffer,
34 patches: Option<Patches>,
35 validity: Validity,
36 stats_set: ArrayStats,
37}
38
39try_from_array_ref!(BitPackedArray);
40
41pub struct BitPackedEncoding;
42impl Encoding for BitPackedEncoding {
43 type Array = BitPackedArray;
44 type Metadata = RkyvMetadata<BitPackedMetadata>;
45}
46
47impl BitPackedArray {
49 pub unsafe fn new_unchecked(
70 packed: ByteBuffer,
71 ptype: PType,
72 validity: Validity,
73 patches: Option<Patches>,
74 bit_width: u8,
75 len: usize,
76 ) -> VortexResult<Self> {
77 unsafe {
79 Self::new_unchecked_with_offset(packed, ptype, validity, patches, bit_width, len, 0)
80 }
81 }
82
83 pub(crate) unsafe fn new_unchecked_with_offset(
87 packed: ByteBuffer,
88 ptype: PType,
89 validity: Validity,
90 patches: Option<Patches>,
91 bit_width: u8,
92 length: usize,
93 offset: u16,
94 ) -> VortexResult<Self> {
95 let dtype = DType::Primitive(ptype, validity.nullability());
96 if !dtype.is_int() {
97 vortex_bail!(MismatchedTypes: "integer", dtype);
98 }
99
100 if bit_width > u64::BITS as u8 {
101 vortex_bail!("Unsupported bit width {}", bit_width);
102 }
103 if offset > 1023 {
104 vortex_bail!(
105 "Offset must be less than full block, i.e. 1024, got {}",
106 offset
107 );
108 }
109
110 if let Some(ref patches) = patches {
111 if !patches.dtype().eq_ignore_nullability(ptype.into()) {
113 vortex_bail!(
114 "Patches DType {} does not match BitPackedArray dtype {}",
115 patches.dtype().as_nonnullable(),
116 ptype
117 )
118 }
119 }
120
121 let expected_packed_size =
123 (length + offset as usize).div_ceil(1024) * (128 * bit_width as usize);
124 if packed.len() != expected_packed_size {
125 return Err(vortex_err!(
126 "Expected {} packed bytes, got {}",
127 expected_packed_size,
128 packed.len()
129 ));
130 }
131
132 Ok(Self {
137 offset,
138 len: length,
139 dtype,
140 bit_width,
141 packed,
142 patches,
143 validity,
144 stats_set: Default::default(),
145 })
146 }
147
148 #[inline]
149 pub fn packed(&self) -> &ByteBuffer {
150 &self.packed
151 }
152
153 #[inline]
155 pub fn packed_slice<T: NativePType + BitPacking>(&self) -> &[T] {
156 let packed_bytes = self.packed();
157 let packed_ptr: *const T = packed_bytes.as_ptr().cast();
158 let packed_len = packed_bytes.len() / size_of::<T>();
160
161 unsafe { std::slice::from_raw_parts(packed_ptr, packed_len) }
165 }
166
167 #[inline]
168 pub fn bit_width(&self) -> u8 {
169 self.bit_width
170 }
171
172 #[inline]
177 pub fn patches(&self) -> Option<&Patches> {
178 self.patches.as_ref()
179 }
180
181 pub fn replace_patches(&mut self, patches: Option<Patches>) {
182 self.patches = patches;
183 }
184
185 #[inline]
186 pub fn offset(&self) -> u16 {
187 self.offset
188 }
189
190 pub fn validity(&self) -> &Validity {
191 &self.validity
192 }
193
194 pub fn encode(array: &dyn Array, bit_width: u8) -> VortexResult<Self> {
206 if let Some(parray) = array.as_opt::<PrimitiveArray>() {
207 bitpack_encode(parray, bit_width)
208 } else {
209 vortex_bail!("Bitpacking can only encode primitive arrays");
210 }
211 }
212
213 #[inline]
217 fn max_packed_value(&self) -> usize {
218 (1 << self.bit_width()) - 1
219 }
220}
221
222impl ArrayImpl for BitPackedArray {
223 type Encoding = BitPackedEncoding;
224
225 fn _len(&self) -> usize {
226 self.len
227 }
228
229 fn _dtype(&self) -> &DType {
230 &self.dtype
231 }
232
233 fn _vtable(&self) -> VTableRef {
234 VTableRef::new_ref(&BitPackedEncoding)
235 }
236
237 fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
238 let patches = self.patches().map(|existing| {
239 let indices = children[0].clone();
240 let values = children[1].clone();
241 Patches::new(existing.array_len(), existing.offset(), indices, values)
242 });
243
244 let validity = if self.validity().is_array() {
245 Validity::Array(children[children.len() - 1].clone())
246 } else {
247 self.validity().clone()
248 };
249
250 unsafe {
251 Self::new_unchecked_with_offset(
252 self.packed().clone(),
253 self.ptype(),
254 validity,
255 patches,
256 self.bit_width(),
257 self.len(),
258 self.offset(),
259 )
260 }
261 }
262}
263
264impl ArrayCanonicalImpl for BitPackedArray {
265 fn _to_canonical(&self) -> VortexResult<Canonical> {
266 unpack(self).map(Canonical::Primitive)
267 }
268
269 fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
270 match_each_integer_ptype_with_unsigned_type!(self.ptype(), |$T, $UnsignedT| {
271 unpack_into::<$T, $UnsignedT, _, _>(
272 self,
273 builder
274 .as_any_mut()
275 .downcast_mut()
276 .vortex_expect("bit packed array must canonicalize into a primitive array"),
277 |x| unsafe { std::mem::transmute(x) },
280 |x| unsafe { std::mem::transmute(x) },
283 )
284 })
285 }
286}
287
288impl ArrayStatisticsImpl for BitPackedArray {
289 fn _stats_ref(&self) -> StatsSetRef<'_> {
290 self.stats_set.to_ref(self)
291 }
292}
293
294impl ArrayValidityImpl for BitPackedArray {
295 fn _is_valid(&self, index: usize) -> VortexResult<bool> {
296 self.validity.is_valid(index)
297 }
298
299 fn _all_valid(&self) -> VortexResult<bool> {
300 self.validity.all_valid()
301 }
302
303 fn _all_invalid(&self) -> VortexResult<bool> {
304 self.validity.all_invalid()
305 }
306
307 fn _validity_mask(&self) -> VortexResult<Mask> {
308 self.validity.to_mask(self.len())
309 }
310}
311
312impl ArrayVariantsImpl for BitPackedArray {
313 fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
314 Some(self)
315 }
316}
317
318impl PrimitiveArrayTrait for BitPackedArray {}
319
320#[cfg(test)]
321mod test {
322 use vortex_array::arrays::PrimitiveArray;
323 use vortex_array::{IntoArray, ToCanonical};
324 use vortex_buffer::Buffer;
325
326 use crate::BitPackedArray;
327
328 #[test]
343 fn test_encode() {
344 let values = [Some(1), None, Some(1), None, Some(1), None, Some(u64::MAX)];
345 let uncompressed = PrimitiveArray::from_option_iter(values);
346 let packed = BitPackedArray::encode(&uncompressed, 1).unwrap();
347 let expected = &[1, 0, 1, 0, 1, 0, u64::MAX];
348 let results = packed.to_primitive().unwrap().as_slice::<u64>().to_vec();
349 assert_eq!(results, expected);
350 }
351
352 #[test]
353 fn test_encode_too_wide() {
354 let values = [Some(1u8), None, Some(1), None, Some(1), None];
355 let uncompressed = PrimitiveArray::from_option_iter(values);
356 let _packed = BitPackedArray::encode(&uncompressed, 8)
357 .expect_err("Cannot pack value into the same width");
358 let _packed = BitPackedArray::encode(&uncompressed, 9)
359 .expect_err("Cannot pack value into larger width");
360 }
361
362 #[test]
363 fn signed_with_patches() {
364 let values: Buffer<i32> = (0i32..=512).collect();
365 let parray = values.clone().into_array();
366
367 let packed_with_patches = BitPackedArray::encode(&parray, 9).unwrap();
368 assert!(packed_with_patches.patches().is_some());
369 assert_eq!(
370 packed_with_patches
371 .to_primitive()
372 .unwrap()
373 .as_slice::<i32>(),
374 values.as_slice()
375 );
376 }
377}