use fastlanes::BitPacking;
use vortex_array::ArrayRef;
use vortex_array::arrays::PrimitiveVTable;
use vortex_array::buffer::BufferHandle;
use vortex_array::dtype::DType;
use vortex_array::dtype::NativePType;
use vortex_array::dtype::PType;
use vortex_array::patches::Patches;
use vortex_array::stats::ArrayStats;
use vortex_array::validity::Validity;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_ensure;
pub mod bitpack_compress;
pub mod bitpack_decompress;
pub mod unpack_iter;
use crate::bitpack_compress::bitpack_encode;
use crate::unpack_iter::BitPacked;
use crate::unpack_iter::BitUnpackedChunks;
pub struct BitPackedArrayParts {
pub offset: u16,
pub bit_width: u8,
pub len: usize,
pub packed: BufferHandle,
pub patches: Option<Patches>,
pub validity: Validity,
}
#[derive(Clone, Debug)]
pub struct BitPackedArray {
pub(super) offset: u16,
pub(super) len: usize,
pub(super) dtype: DType,
pub(super) bit_width: u8,
pub(super) packed: BufferHandle,
pub(super) patches: Option<Patches>,
pub(super) validity: Validity,
pub(super) stats_set: ArrayStats,
}
impl BitPackedArray {
pub(crate) unsafe fn new_unchecked(
packed: BufferHandle,
dtype: DType,
validity: Validity,
patches: Option<Patches>,
bit_width: u8,
len: usize,
offset: u16,
) -> Self {
Self {
offset,
len,
dtype,
bit_width,
packed,
patches,
validity,
stats_set: Default::default(),
}
}
pub fn try_new(
packed: BufferHandle,
ptype: PType,
validity: Validity,
patches: Option<Patches>,
bit_width: u8,
length: usize,
offset: u16,
) -> VortexResult<Self> {
Self::validate(
&packed,
ptype,
&validity,
patches.as_ref(),
bit_width,
length,
offset,
)?;
let dtype = DType::Primitive(ptype, validity.nullability());
unsafe {
Ok(Self::new_unchecked(
packed, dtype, validity, patches, bit_width, length, offset,
))
}
}
fn validate(
packed: &BufferHandle,
ptype: PType,
validity: &Validity,
patches: Option<&Patches>,
bit_width: u8,
length: usize,
offset: u16,
) -> VortexResult<()> {
vortex_ensure!(ptype.is_int(), MismatchedTypes: "integer", ptype);
vortex_ensure!(bit_width <= 64, "Unsupported bit width {bit_width}");
if let Some(validity_len) = validity.maybe_len() {
vortex_ensure!(
validity_len == length,
"BitPackedArray validity length {validity_len} != array length {length}",
);
}
vortex_ensure!(
offset < 1024,
"Offset must be less than the full block i.e., 1024, got {offset}"
);
if let Some(patches) = patches {
Self::validate_patches(patches, ptype, length)?;
}
let expected_packed_len =
(length + offset as usize).div_ceil(1024) * (128 * bit_width as usize);
vortex_ensure!(
packed.len() == expected_packed_len,
"Expected {} packed bytes, got {}",
expected_packed_len,
packed.len()
);
Ok(())
}
fn validate_patches(patches: &Patches, ptype: PType, len: usize) -> VortexResult<()> {
vortex_ensure!(
patches.dtype().eq_ignore_nullability(ptype.into()),
"Patches DType {} does not match BitPackedArray dtype {}",
patches.dtype().as_nonnullable(),
ptype
);
vortex_ensure!(
patches.array_len() == len,
"BitPackedArray patches length {} != expected {len}",
patches.array_len(),
);
Ok(())
}
pub fn ptype(&self) -> PType {
self.dtype.as_ptype()
}
#[inline]
pub fn packed(&self) -> &BufferHandle {
&self.packed
}
#[inline]
pub fn packed_slice<T: NativePType + BitPacking>(&self) -> &[T] {
let packed_bytes = self.packed().as_host();
let packed_ptr: *const T = packed_bytes.as_ptr().cast();
let packed_len = packed_bytes.len() / size_of::<T>();
unsafe { std::slice::from_raw_parts(packed_ptr, packed_len) }
}
pub fn unpacked_chunks<T: BitPacked>(&self) -> BitUnpackedChunks<T> {
assert_eq!(
T::PTYPE,
self.ptype(),
"Requested type doesn't match the array ptype"
);
BitUnpackedChunks::new(self)
}
#[inline]
pub fn bit_width(&self) -> u8 {
self.bit_width
}
#[inline]
pub fn patches(&self) -> Option<&Patches> {
self.patches.as_ref()
}
pub fn replace_patches(&mut self, patches: Option<Patches>) {
self.patches = patches;
}
#[inline]
pub fn offset(&self) -> u16 {
self.offset
}
pub fn encode(array: &ArrayRef, bit_width: u8) -> VortexResult<Self> {
if let Some(parray) = array.as_opt::<PrimitiveVTable>() {
bitpack_encode(parray, bit_width, None)
} else {
vortex_bail!(InvalidArgument: "Bitpacking can only encode primitive arrays");
}
}
#[inline]
pub fn max_packed_value(&self) -> usize {
(1 << self.bit_width()) - 1
}
pub fn into_parts(self) -> BitPackedArrayParts {
BitPackedArrayParts {
offset: self.offset,
bit_width: self.bit_width,
len: self.len,
packed: self.packed,
patches: self.patches,
validity: self.validity,
}
}
}
#[cfg(test)]
mod test {
use vortex_array::IntoArray;
use vortex_array::ToCanonical;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::assert_arrays_eq;
use vortex_buffer::Buffer;
use crate::BitPackedArray;
#[test]
fn test_encode() {
let values = [
Some(1u64),
None,
Some(1),
None,
Some(1),
None,
Some(u64::MAX),
];
let uncompressed = PrimitiveArray::from_option_iter(values);
let packed = BitPackedArray::encode(&uncompressed.to_array(), 1).unwrap();
let expected = PrimitiveArray::from_option_iter(values);
assert_arrays_eq!(packed.to_primitive(), expected);
}
#[test]
fn test_encode_too_wide() {
let values = [Some(1u8), None, Some(1), None, Some(1), None];
let uncompressed = PrimitiveArray::from_option_iter(values);
let _packed = BitPackedArray::encode(&uncompressed.to_array(), 8)
.expect_err("Cannot pack value into the same width");
let _packed = BitPackedArray::encode(&uncompressed.to_array(), 9)
.expect_err("Cannot pack value into larger width");
}
#[test]
fn signed_with_patches() {
let values: Buffer<i32> = (0i32..=512).collect();
let parray = values.clone().into_array();
let packed_with_patches = BitPackedArray::encode(&parray, 9).unwrap();
assert!(packed_with_patches.patches().is_some());
assert_arrays_eq!(
packed_with_patches.to_primitive(),
PrimitiveArray::new(values, vortex_array::validity::Validity::NonNullable)
);
}
}