use std::{iter::FromIterator, ops::Deref, sync::Arc};
use either::Either;
use crate::{buffer::Bytes, error::Error, trusted_len::TrustedLen};
use super::{
chunk_iter_to_vec,
utils::{count_zeros, fmt, get_bit, get_bit_unchecked, BitChunk, BitChunks, BitmapIter},
IntoIter, MutableBitmap,
};
#[derive(Clone)]
pub struct Bitmap {
bytes: Arc<Bytes<u8>>,
offset: usize,
length: usize,
unset_bits: usize,
}
impl std::fmt::Debug for Bitmap {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let (bytes, offset, len) = self.as_slice();
fmt(bytes, offset, len, f)
}
}
impl Default for Bitmap {
fn default() -> Self {
MutableBitmap::new().into()
}
}
pub(super) fn check(bytes: &[u8], offset: usize, length: usize) -> Result<(), Error> {
if offset + length > bytes.len().saturating_mul(8) {
return Err(Error::InvalidArgumentError(format!(
"The offset + length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
offset + length,
bytes.len().saturating_mul(8)
)));
}
Ok(())
}
impl Bitmap {
#[inline]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn try_new(bytes: Vec<u8>, length: usize) -> Result<Self, Error> {
check(&bytes, 0, length)?;
let unset_bits = count_zeros(&bytes, 0, length);
Ok(Self {
length,
offset: 0,
bytes: Arc::new(bytes.into()),
unset_bits,
})
}
#[inline]
pub fn len(&self) -> usize {
self.length
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn iter(&self) -> BitmapIter {
BitmapIter::new(&self.bytes, self.offset, self.length)
}
pub fn chunks<T: BitChunk>(&self) -> BitChunks<T> {
BitChunks::new(&self.bytes, self.offset, self.length)
}
#[inline]
pub fn as_slice(&self) -> (&[u8], usize, usize) {
let start = self.offset / 8;
let len = (self.offset % 8 + self.length).saturating_add(7) / 8;
(
&self.bytes[start..start + len],
self.offset % 8,
self.length,
)
}
pub const fn unset_bits(&self) -> usize {
self.unset_bits
}
#[inline]
#[deprecated(since = "0.13.0", note = "use `unset_bits` instead")]
pub fn null_count(&self) -> usize {
self.unset_bits
}
#[inline]
pub fn slice(&mut self, offset: usize, length: usize) {
assert!(offset + length <= self.length);
unsafe { self.slice_unchecked(offset, length) }
}
#[inline]
pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
if !(offset == 0 && length == self.length) {
if length < self.length / 2 {
self.unset_bits = count_zeros(&self.bytes, self.offset + offset, length);
} else {
let start_end = self.offset + offset + length;
let head_count = count_zeros(&self.bytes, self.offset, offset);
let tail_count = count_zeros(&self.bytes, start_end, self.length - length - offset);
self.unset_bits -= head_count + tail_count;
}
self.offset += offset;
self.length = length;
}
}
#[inline]
#[must_use]
pub fn sliced(self, offset: usize, length: usize) -> Self {
assert!(offset + length <= self.length);
unsafe { self.sliced_unchecked(offset, length) }
}
#[inline]
#[must_use]
pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
self.slice_unchecked(offset, length);
self
}
#[inline]
pub fn get_bit(&self, i: usize) -> bool {
get_bit(&self.bytes, self.offset + i)
}
#[inline]
pub unsafe fn get_bit_unchecked(&self, i: usize) -> bool {
get_bit_unchecked(&self.bytes, self.offset + i)
}
pub(crate) fn as_ptr(&self) -> *const u8 {
self.bytes.deref().as_ptr()
}
pub(crate) fn offset(&self) -> usize {
self.offset
}
pub fn into_mut(mut self) -> Either<Self, MutableBitmap> {
match (
self.offset,
Arc::get_mut(&mut self.bytes).and_then(|b| b.get_vec()),
) {
(0, Some(v)) => {
let data = std::mem::take(v);
Either::Right(MutableBitmap::from_vec(data, self.length))
}
_ => Either::Left(self),
}
}
pub fn make_mut(self) -> MutableBitmap {
match self.into_mut() {
Either::Left(data) => {
if data.offset > 0 {
let chunks = data.chunks::<u64>();
let remainder = chunks.remainder();
let vec = chunk_iter_to_vec(chunks.chain(std::iter::once(remainder)));
MutableBitmap::from_vec(vec, data.length)
} else {
MutableBitmap::from_vec(data.bytes.as_ref().to_vec(), data.length)
}
}
Either::Right(data) => data,
}
}
#[inline]
pub fn new_zeroed(length: usize) -> Self {
let bytes = vec![0; length.saturating_add(7) / 8];
unsafe { Bitmap::from_inner_unchecked(Arc::new(bytes.into()), 0, length, length) }
}
#[inline]
pub fn null_count_range(&self, offset: usize, length: usize) -> usize {
count_zeros(&self.bytes, self.offset + offset, length)
}
#[inline]
pub fn from_u8_slice<T: AsRef<[u8]>>(slice: T, length: usize) -> Self {
Bitmap::try_new(slice.as_ref().to_vec(), length).unwrap()
}
#[inline]
pub fn from_u8_vec(vec: Vec<u8>, length: usize) -> Self {
Bitmap::try_new(vec, length).unwrap()
}
#[inline]
pub fn get(&self, i: usize) -> Option<bool> {
if i < self.len() {
Some(unsafe { self.get_bit_unchecked(i) })
} else {
None
}
}
#[must_use]
pub fn into_inner(self) -> (Arc<Bytes<u8>>, usize, usize, usize) {
let Self {
bytes,
offset,
length,
unset_bits,
} = self;
(bytes, offset, length, unset_bits)
}
pub unsafe fn from_inner(
bytes: Arc<Bytes<u8>>,
offset: usize,
length: usize,
unset_bits: usize,
) -> Result<Self, Error> {
check(&bytes, offset, length)?;
Ok(Self {
bytes,
offset,
length,
unset_bits,
})
}
pub unsafe fn from_inner_unchecked(
bytes: Arc<Bytes<u8>>,
offset: usize,
length: usize,
unset_bits: usize,
) -> Self {
Self {
bytes,
offset,
length,
unset_bits,
}
}
}
impl<P: AsRef<[bool]>> From<P> for Bitmap {
fn from(slice: P) -> Self {
Self::from_trusted_len_iter(slice.as_ref().iter().copied())
}
}
impl FromIterator<bool> for Bitmap {
fn from_iter<I>(iter: I) -> Self
where
I: IntoIterator<Item = bool>,
{
MutableBitmap::from_iter(iter).into()
}
}
impl Bitmap {
#[inline]
pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(iterator: I) -> Self {
MutableBitmap::from_trusted_len_iter_unchecked(iterator).into()
}
#[inline]
pub fn from_trusted_len_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
MutableBitmap::from_trusted_len_iter(iterator).into()
}
#[inline]
pub fn try_from_trusted_len_iter<E, I: TrustedLen<Item = std::result::Result<bool, E>>>(
iterator: I,
) -> std::result::Result<Self, E> {
Ok(MutableBitmap::try_from_trusted_len_iter(iterator)?.into())
}
#[inline]
pub unsafe fn try_from_trusted_len_iter_unchecked<
E,
I: Iterator<Item = std::result::Result<bool, E>>,
>(
iterator: I,
) -> std::result::Result<Self, E> {
Ok(MutableBitmap::try_from_trusted_len_iter_unchecked(iterator)?.into())
}
#[cfg(feature = "arrow")]
pub fn from_null_buffer(value: arrow_buffer::buffer::NullBuffer) -> Self {
let offset = value.offset();
let length = value.len();
let unset_bits = value.null_count();
Self {
offset,
length,
unset_bits,
bytes: Arc::new(crate::buffer::to_bytes(value.buffer().clone())),
}
}
}
impl<'a> IntoIterator for &'a Bitmap {
type Item = bool;
type IntoIter = BitmapIter<'a>;
fn into_iter(self) -> Self::IntoIter {
BitmapIter::<'a>::new(&self.bytes, self.offset, self.length)
}
}
impl IntoIterator for Bitmap {
type Item = bool;
type IntoIter = IntoIter;
fn into_iter(self) -> Self::IntoIter {
IntoIter::new(self)
}
}
#[cfg(feature = "arrow")]
impl From<Bitmap> for arrow_buffer::buffer::NullBuffer {
fn from(value: Bitmap) -> Self {
let null_count = value.unset_bits;
let buffer = crate::buffer::to_buffer(value.bytes);
let buffer = arrow_buffer::buffer::BooleanBuffer::new(buffer, value.offset, value.length);
unsafe { arrow_buffer::buffer::NullBuffer::new_unchecked(buffer, null_count) }
}
}