use std::fmt::Display;
use std::fmt::Formatter;
use fastlanes::BitPacking;
use vortex_array::ArrayRef;
use vortex_array::TypedArrayRef;
use vortex_array::array_slots;
use vortex_array::arrays::Primitive;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::buffer::BufferHandle;
use vortex_array::dtype::DType;
use vortex_array::dtype::NativePType;
use vortex_array::dtype::PType;
use vortex_array::patches::Patches;
use vortex_array::validity::Validity;
use vortex_array::vtable::child_to_validity;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_ensure;
use vortex_error::vortex_err;
pub mod bitpack_compress;
pub mod bitpack_decompress;
pub mod unpack_iter;
use crate::BitPackedArray;
use crate::bitpack_compress::bitpack_encode;
use crate::unpack_iter::BitPacked as BitPackedIter;
use crate::unpack_iter::BitUnpackedChunks;
#[array_slots(crate::BitPacked)]
pub struct BitPackedSlots {
pub patch_indices: Option<ArrayRef>,
pub patch_values: Option<ArrayRef>,
pub patch_chunk_offsets: Option<ArrayRef>,
pub validity_child: Option<ArrayRef>,
}
pub struct BitPackedDataParts {
pub offset: u16,
pub bit_width: u8,
pub len: usize,
pub packed: BufferHandle,
pub patches: Option<Patches>,
pub validity: Validity,
}
#[derive(Clone, Debug)]
pub struct BitPackedData {
pub(super) offset: u16,
pub(super) bit_width: u8,
pub(super) packed: BufferHandle,
pub(super) patch_offset: Option<usize>,
pub(super) patch_offset_within_chunk: Option<usize>,
}
impl Display for BitPackedData {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "bit_width: {}, offset: {}", self.bit_width, self.offset)
}
}
impl BitPackedData {
pub fn try_new(
packed: BufferHandle,
patches: Option<Patches>,
bit_width: u8,
offset: u16,
) -> VortexResult<Self> {
vortex_ensure!(bit_width <= 64, "Unsupported bit width {bit_width}");
vortex_ensure!(
offset < 1024,
"Offset must be less than the full block i.e., 1024, got {offset}"
);
let (patch_offset, patch_offset_within_chunk) = match &patches {
Some(p) => (Some(p.offset()), p.offset_within_chunk()),
None => (None, None),
};
Ok(Self {
offset,
bit_width,
packed,
patch_offset,
patch_offset_within_chunk,
})
}
pub(crate) fn validate(
packed: &BufferHandle,
ptype: PType,
validity: &Validity,
patches: Option<&Patches>,
bit_width: u8,
length: usize,
offset: u16,
) -> VortexResult<()> {
vortex_ensure!(ptype.is_int(), MismatchedTypes: "integer", ptype);
vortex_ensure!(bit_width <= 64, "Unsupported bit width {bit_width}");
if let Some(validity_len) = validity.maybe_len() {
vortex_ensure!(
validity_len == length,
"BitPackedArray validity length {validity_len} != array length {length}",
);
}
if let Some(patches) = patches {
Self::validate_patches(patches, ptype, length)?;
}
let expected_packed_len =
(length + offset as usize).div_ceil(1024) * (128 * bit_width as usize);
vortex_ensure!(
packed.len() == expected_packed_len,
"Expected {} packed bytes, got {}",
expected_packed_len,
packed.len()
);
Ok(())
}
fn validate_patches(patches: &Patches, ptype: PType, len: usize) -> VortexResult<()> {
vortex_ensure!(
patches.dtype().eq_ignore_nullability(ptype.into()),
"Patches DType {} does not match BitPackedArray dtype {}",
patches.dtype().as_nonnullable(),
ptype
);
vortex_ensure!(
patches.array_len() == len,
"BitPackedArray patches length {} != expected {len}",
patches.array_len(),
);
Ok(())
}
pub fn ptype(&self, dtype: &DType) -> PType {
dtype.as_ptype()
}
#[inline]
pub fn packed(&self) -> &BufferHandle {
&self.packed
}
#[inline]
pub fn packed_slice<T: NativePType + BitPacking>(&self) -> &[T] {
let packed_bytes = self.packed().as_host();
let packed_ptr: *const T = packed_bytes.as_ptr().cast();
let packed_len = packed_bytes.len() / size_of::<T>();
unsafe { std::slice::from_raw_parts(packed_ptr, packed_len) }
}
pub fn unpacked_chunks<T: BitPackedIter>(
&self,
dtype: &DType,
len: usize,
) -> VortexResult<BitUnpackedChunks<T>> {
assert_eq!(
T::PTYPE,
self.ptype(dtype),
"Requested type doesn't match the array ptype"
);
BitUnpackedChunks::try_new(self, len)
}
#[inline]
pub fn bit_width(&self) -> u8 {
self.bit_width
}
#[inline]
pub fn offset(&self) -> u16 {
self.offset
}
pub fn encode(array: &ArrayRef, bit_width: u8) -> VortexResult<BitPackedArray> {
let parray: PrimitiveArray = array
.clone()
.try_downcast::<Primitive>()
.map_err(|a| vortex_err!(InvalidArgument: "Bitpacking can only encode primitive arrays, got {}", a.encoding_id()))?;
bitpack_encode(&parray, bit_width, None)
}
#[inline]
pub fn max_packed_value(&self) -> usize {
(1 << self.bit_width()) - 1
}
}
pub trait BitPackedArrayExt: BitPackedArraySlotsExt {
#[inline]
fn packed(&self) -> &BufferHandle {
BitPackedData::packed(self)
}
#[inline]
fn bit_width(&self) -> u8 {
BitPackedData::bit_width(self)
}
#[inline]
fn offset(&self) -> u16 {
BitPackedData::offset(self)
}
#[inline]
fn patches(&self) -> Option<Patches> {
match (self.patch_indices(), self.patch_values()) {
(Some(indices), Some(values)) => {
let patch_offset = self
.patch_offset
.vortex_expect("has patch slots but no patch_offset");
Some(unsafe {
Patches::new_unchecked(
self.as_ref().len(),
patch_offset,
indices.clone(),
values.clone(),
self.patch_chunk_offsets().cloned(),
self.patch_offset_within_chunk,
)
})
}
_ => None,
}
}
#[inline]
fn validity(&self) -> Validity {
child_to_validity(
&self.validity_child().cloned(),
self.as_ref().dtype().nullability(),
)
}
#[inline]
fn validity_mask(&self) -> vortex_mask::Mask {
self.validity().to_mask(self.as_ref().len())
}
#[inline]
fn packed_slice<T: NativePType + BitPacking>(&self) -> &[T] {
BitPackedData::packed_slice::<T>(self)
}
#[inline]
fn unpacked_chunks<T: BitPackedIter>(&self) -> VortexResult<BitUnpackedChunks<T>> {
BitPackedData::unpacked_chunks::<T>(self, self.as_ref().dtype(), self.as_ref().len())
}
}
impl<T: TypedArrayRef<crate::BitPacked>> BitPackedArrayExt for T {}
#[cfg(test)]
mod test {
use vortex_array::IntoArray;
use vortex_array::ToCanonical;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::assert_arrays_eq;
use vortex_buffer::Buffer;
use crate::BitPackedData;
use crate::bitpacking::array::BitPackedArrayExt;
#[test]
fn test_encode() {
let values = [
Some(1u64),
None,
Some(1),
None,
Some(1),
None,
Some(u64::MAX),
];
let uncompressed = PrimitiveArray::from_option_iter(values);
let packed = BitPackedData::encode(&uncompressed.into_array(), 1).unwrap();
let expected = PrimitiveArray::from_option_iter(values);
assert_arrays_eq!(packed.as_array().to_primitive(), expected);
}
#[test]
fn test_encode_too_wide() {
let values = [Some(1u8), None, Some(1), None, Some(1), None];
let uncompressed = PrimitiveArray::from_option_iter(values);
let _packed = BitPackedData::encode(&uncompressed.clone().into_array(), 8)
.expect_err("Cannot pack value into the same width");
let _packed = BitPackedData::encode(&uncompressed.into_array(), 9)
.expect_err("Cannot pack value into larger width");
}
#[test]
fn signed_with_patches() {
let values: Buffer<i32> = (0i32..=512).collect();
let parray = values.clone().into_array();
let packed_with_patches = BitPackedData::encode(&parray, 9).unwrap();
assert!(packed_with_patches.patches().is_some());
assert_arrays_eq!(
packed_with_patches.as_array().to_primitive(),
PrimitiveArray::new(values, vortex_array::validity::Validity::NonNullable)
);
}
}