vortex_fastlanes/bitpacking/array/
mod.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6
7use fastlanes::BitPacking;
8use vortex_array::ArrayRef;
9use vortex_array::ExecutionCtx;
10use vortex_array::TypedArrayRef;
11use vortex_array::array_slots;
12use vortex_array::arrays::Primitive;
13use vortex_array::arrays::PrimitiveArray;
14use vortex_array::buffer::BufferHandle;
15use vortex_array::dtype::DType;
16use vortex_array::dtype::NativePType;
17use vortex_array::dtype::PType;
18use vortex_array::patches::PatchSlotIndices;
19use vortex_array::patches::Patches;
20use vortex_array::patches::PatchesData;
21use vortex_array::validity::Validity;
22use vortex_array::vtable::child_to_validity;
23use vortex_error::VortexResult;
24use vortex_error::vortex_ensure;
25use vortex_error::vortex_err;
26
27pub mod bitpack_compress;
28pub mod bitpack_decompress;
29pub mod unpack_iter;
30
31use crate::BitPackedArray;
32use crate::bitpack_compress::bitpack_encode;
33use crate::unpack_iter::BitPacked as BitPackedIter;
34use crate::unpack_iter::BitUnpackedChunks;
35
36#[array_slots(crate::BitPacked)]
37pub struct BitPackedSlots {
38 pub patch_indices: Option<ArrayRef>,
40 pub patch_values: Option<ArrayRef>,
42 pub patch_chunk_offsets: Option<ArrayRef>,
44 pub validity_child: Option<ArrayRef>,
46}
47
48pub(crate) const PATCH_SLOTS: PatchSlotIndices = PatchSlotIndices {
49 indices: BitPackedSlots::PATCH_INDICES,
50 values: BitPackedSlots::PATCH_VALUES,
51 chunk_offsets: BitPackedSlots::PATCH_CHUNK_OFFSETS,
52};
53
54pub struct BitPackedDataParts {
55 pub offset: u16,
56 pub bit_width: u8,
57 pub len: usize,
58 pub packed: BufferHandle,
59 pub patches: Option<Patches>,
60 pub validity: Validity,
61}
62
63#[derive(Clone, Debug)]
64pub struct BitPackedData {
65 pub(super) offset: u16,
68 pub(super) bit_width: u8,
69 pub(super) packed: BufferHandle,
70 pub(super) patches_data: Option<PatchesData>,
72}
73
74impl Display for BitPackedData {
75 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
76 write!(f, "bit_width: {}, offset: {}", self.bit_width, self.offset)
77 }
78}
79
80impl BitPackedData {
81 pub fn try_new(
123 packed: BufferHandle,
124 patches: Option<Patches>,
125 bit_width: u8,
126 offset: u16,
127 ) -> VortexResult<Self> {
128 vortex_ensure!(bit_width <= 64, "Unsupported bit width {bit_width}");
129 vortex_ensure!(
130 offset < 1024,
131 "Offset must be less than the full block i.e., 1024, got {offset}"
132 );
133
134 Ok(Self {
135 offset,
136 bit_width,
137 packed,
138 patches_data: patches.as_ref().map(PatchesData::from_patches),
139 })
140 }
141
142 pub(crate) fn validate(
143 packed: &BufferHandle,
144 ptype: PType,
145 validity: &Validity,
146 patches: Option<&Patches>,
147 bit_width: u8,
148 length: usize,
149 offset: u16,
150 ) -> VortexResult<()> {
151 vortex_ensure!(ptype.is_int(), MismatchedTypes: "integer", ptype);
152 vortex_ensure!(bit_width <= 64, "Unsupported bit width {bit_width}");
153
154 if let Some(validity_len) = validity.maybe_len() {
155 vortex_ensure!(
156 validity_len == length,
157 "BitPackedArray validity length {validity_len} != array length {length}",
158 );
159 }
160
161 if let Some(patches) = patches {
163 Self::validate_patches(patches, ptype, length)?;
164 }
165
166 let expected_packed_len =
168 (length + offset as usize).div_ceil(1024) * (128 * bit_width as usize);
169 vortex_ensure!(
170 packed.len() == expected_packed_len,
171 "Expected {} packed bytes, got {}",
172 expected_packed_len,
173 packed.len()
174 );
175
176 Ok(())
177 }
178
179 fn validate_patches(patches: &Patches, ptype: PType, len: usize) -> VortexResult<()> {
180 vortex_ensure!(
182 patches.dtype().eq_ignore_nullability(ptype.into()),
183 "Patches DType {} does not match BitPackedArray dtype {}",
184 patches.dtype().as_nonnullable(),
185 ptype
186 );
187
188 vortex_ensure!(
189 patches.array_len() == len,
190 "BitPackedArray patches length {} != expected {len}",
191 patches.array_len(),
192 );
193
194 Ok(())
195 }
196
197 pub fn ptype(&self, dtype: &DType) -> PType {
198 dtype.as_ptype()
199 }
200
201 #[inline]
203 pub fn packed(&self) -> &BufferHandle {
204 &self.packed
205 }
206
207 #[inline]
209 pub fn packed_slice<T: NativePType + BitPacking>(&self) -> &[T] {
210 let packed_bytes = self.packed().as_host();
211 let packed_ptr: *const T = packed_bytes.as_ptr().cast();
212 let packed_len = packed_bytes.len() / size_of::<T>();
214
215 unsafe { std::slice::from_raw_parts(packed_ptr, packed_len) }
219 }
220
221 pub fn unpacked_chunks<T: BitPackedIter>(
223 &self,
224 dtype: &DType,
225 len: usize,
226 ) -> VortexResult<BitUnpackedChunks<T>> {
227 assert_eq!(
228 T::PTYPE,
229 self.ptype(dtype),
230 "Requested type doesn't match the array ptype"
231 );
232 BitUnpackedChunks::try_new(self, len)
233 }
234
235 #[inline]
237 pub fn bit_width(&self) -> u8 {
238 self.bit_width
239 }
240
241 #[inline]
242 pub fn offset(&self) -> u16 {
243 self.offset
244 }
245
246 pub fn encode(
258 array: &ArrayRef,
259 bit_width: u8,
260 ctx: &mut ExecutionCtx,
261 ) -> VortexResult<BitPackedArray> {
262 let parray: PrimitiveArray = array
263 .clone()
264 .try_downcast::<Primitive>()
265 .map_err(|a| vortex_err!(InvalidArgument: "Bitpacking can only encode primitive arrays, got {}", a.encoding_id()))?;
266 bitpack_encode(&parray, bit_width, None, ctx)
267 }
268
269 #[inline]
273 pub fn max_packed_value(&self) -> usize {
274 (1 << self.bit_width()) - 1
275 }
276}
277
278pub trait BitPackedArrayExt: BitPackedArraySlotsExt {
279 #[inline]
280 fn packed(&self) -> &BufferHandle {
281 BitPackedData::packed(self)
282 }
283
284 #[inline]
285 fn bit_width(&self) -> u8 {
286 BitPackedData::bit_width(self)
287 }
288
289 #[inline]
290 fn offset(&self) -> u16 {
291 BitPackedData::offset(self)
292 }
293
294 #[inline]
295 fn patches(&self) -> Option<Patches> {
296 PatchesData::patches_from_slots(
297 self.patches_data.as_ref(),
298 self.as_ref().len(),
299 self.as_ref().slots(),
300 PATCH_SLOTS,
301 )
302 }
303
304 #[inline]
305 fn validity(&self) -> Validity {
306 child_to_validity(self.validity_child(), self.as_ref().dtype().nullability())
307 }
308
309 #[inline]
310 fn packed_slice<T: NativePType + BitPacking>(&self) -> &[T] {
311 BitPackedData::packed_slice::<T>(self)
312 }
313
314 #[inline]
315 fn unpacked_chunks<T: BitPackedIter>(&self) -> VortexResult<BitUnpackedChunks<T>> {
316 BitPackedData::unpacked_chunks::<T>(self, self.as_ref().dtype(), self.as_ref().len())
317 }
318}
319
320impl<T: TypedArrayRef<crate::BitPacked>> BitPackedArrayExt for T {}
321
322#[cfg(test)]
323mod test {
324 use std::sync::LazyLock;
325
326 use vortex_array::IntoArray;
327 use vortex_array::VortexSessionExecute;
328 use vortex_array::arrays::PrimitiveArray;
329 use vortex_array::assert_arrays_eq;
330 use vortex_array::session::ArraySession;
331 use vortex_buffer::Buffer;
332 use vortex_session::VortexSession;
333
334 use crate::BitPackedData;
335 use crate::bitpacking::array::BitPackedArrayExt;
336
337 static SESSION: LazyLock<VortexSession> =
338 LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
339
340 #[test]
341 fn test_encode() {
342 let mut ctx = SESSION.create_execution_ctx();
343 let values = [
344 Some(1u64),
345 None,
346 Some(1),
347 None,
348 Some(1),
349 None,
350 Some(u64::MAX),
351 ];
352 let uncompressed = PrimitiveArray::from_option_iter(values);
353 let packed = BitPackedData::encode(&uncompressed.into_array(), 1, &mut ctx).unwrap();
354 let expected = PrimitiveArray::from_option_iter(values);
355 let packed_primitive = packed
356 .as_array()
357 .clone()
358 .execute::<PrimitiveArray>(&mut ctx)
359 .unwrap();
360 assert_arrays_eq!(packed_primitive, expected);
361 }
362
363 #[test]
364 fn test_encode_too_wide() {
365 let mut ctx = SESSION.create_execution_ctx();
366 let values = [Some(1u8), None, Some(1), None, Some(1), None];
367 let uncompressed = PrimitiveArray::from_option_iter(values);
368 let _packed = BitPackedData::encode(&uncompressed.clone().into_array(), 8, &mut ctx)
369 .expect_err("Cannot pack value into the same width");
370 let _packed = BitPackedData::encode(&uncompressed.into_array(), 9, &mut ctx)
371 .expect_err("Cannot pack value into larger width");
372 }
373
374 #[test]
375 fn signed_with_patches() {
376 let mut ctx = SESSION.create_execution_ctx();
377 let values: Buffer<i32> = (0i32..=512).collect();
378 let parray = values.clone().into_array();
379
380 let packed_with_patches = BitPackedData::encode(&parray, 9, &mut ctx).unwrap();
381 assert!(packed_with_patches.patches().is_some());
382 let packed_primitive = packed_with_patches
383 .as_array()
384 .clone()
385 .execute::<PrimitiveArray>(&mut ctx)
386 .unwrap();
387 assert_arrays_eq!(
388 packed_primitive,
389 PrimitiveArray::new(values, vortex_array::validity::Validity::NonNullable)
390 );
391 }
392}