1#![allow(unsafe_op_in_unsafe_fn)]
2use std::ops::Deref;
3
4use either::Either;
5use polars_buffer::{Buffer, SharedStorage};
6use polars_error::{PolarsResult, polars_bail};
7use polars_utils::relaxed_cell::RelaxedCell;
8
9use super::utils::{self, BitChunk, BitChunks, BitmapIter, count_zeros, fmt, get_bit_unchecked};
10use super::{IntoIter, MutableBitmap, chunk_iter_to_vec, num_intersections_with};
11use crate::array::Splitable;
12use crate::bitmap::aligned::AlignedBitmapSlice;
13use crate::bitmap::iterator::{
14 FastU32BitmapIter, FastU56BitmapIter, FastU64BitmapIter, TrueIdxIter,
15};
16use crate::bitmap::utils::bytes_for;
17use crate::legacy::utils::FromTrustedLenIterator;
18use crate::trusted_len::TrustedLen;
19
20const UNKNOWN_BIT_COUNT: u64 = u64::MAX;
21
22#[derive(Default, Clone)]
55pub struct Bitmap {
56 storage: SharedStorage<u8>,
57 offset: usize,
60 length: usize,
61
62 unset_bit_count_cache: RelaxedCell<u64>,
67}
68
69#[inline(always)]
70fn has_cached_unset_bit_count(ubcc: u64) -> bool {
71 ubcc >> 63 == 0
72}
73
74impl std::fmt::Debug for Bitmap {
75 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
76 let (bytes, offset, len) = self.as_slice();
77 fmt(bytes, offset, len, f)
78 }
79}
80
81pub(super) fn check(bytes: &[u8], offset: usize, length: usize) -> PolarsResult<()> {
82 if offset + length > bytes.len().saturating_mul(8) {
83 polars_bail!(InvalidOperation:
84 "The offset + length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
85 offset + length,
86 bytes.len().saturating_mul(8)
87 );
88 }
89 Ok(())
90}
91
92impl Bitmap {
93 #[inline]
95 pub fn new() -> Self {
96 Self::default()
97 }
98
99 #[inline]
103 pub fn try_new(bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
104 check(&bytes, 0, length)?;
105 Ok(Self {
106 storage: SharedStorage::from_vec(bytes),
107 length,
108 offset: 0,
109 unset_bit_count_cache: RelaxedCell::from(if length == 0 {
110 0
111 } else {
112 UNKNOWN_BIT_COUNT
113 }),
114 })
115 }
116
117 #[inline]
119 pub fn len(&self) -> usize {
120 self.length
121 }
122
123 #[inline]
125 pub fn is_empty(&self) -> bool {
126 self.len() == 0
127 }
128
129 pub fn iter(&self) -> BitmapIter<'_> {
131 BitmapIter::new(&self.storage, self.offset, self.length)
132 }
133
134 pub fn chunks<T: BitChunk>(&self) -> BitChunks<'_, T> {
138 BitChunks::new(&self.storage, self.offset, self.length)
139 }
140
141 pub fn fast_iter_u32(&self) -> FastU32BitmapIter<'_> {
144 FastU32BitmapIter::new(&self.storage, self.offset, self.length)
145 }
146
147 pub fn fast_iter_u56(&self) -> FastU56BitmapIter<'_> {
150 FastU56BitmapIter::new(&self.storage, self.offset, self.length)
151 }
152
153 pub fn fast_iter_u64(&self) -> FastU64BitmapIter<'_> {
156 FastU64BitmapIter::new(&self.storage, self.offset, self.length)
157 }
158
159 pub fn true_idx_iter(&self) -> TrueIdxIter<'_> {
161 TrueIdxIter::new(self.len(), Some(self))
162 }
163
164 pub fn aligned<T: BitChunk>(&self) -> AlignedBitmapSlice<'_, T> {
166 AlignedBitmapSlice::new(&self.storage, self.offset, self.length)
167 }
168
169 #[inline]
177 pub fn as_slice(&self) -> (&[u8], usize, usize) {
178 let start = self.offset / 8;
179 let len = (self.offset % 8 + self.length).saturating_add(7) / 8;
180 (
181 &self.storage[start..start + len],
182 self.offset % 8,
183 self.length,
184 )
185 }
186
187 #[inline]
191 pub fn set_bits(&self) -> usize {
192 self.length - self.unset_bits()
193 }
194
195 #[inline]
199 pub fn lazy_set_bits(&self) -> Option<usize> {
200 Some(self.length - self.lazy_unset_bits()?)
201 }
202
203 pub fn unset_bits(&self) -> usize {
212 self.lazy_unset_bits().unwrap_or_else(|| {
213 let zeros = count_zeros(&self.storage, self.offset, self.length);
214 self.unset_bit_count_cache.store(zeros as u64);
215 zeros
216 })
217 }
218
219 pub fn lazy_unset_bits(&self) -> Option<usize> {
223 let cache = self.unset_bit_count_cache.load();
224 has_cached_unset_bit_count(cache).then_some(cache as usize)
225 }
226
227 pub unsafe fn update_bit_count(&mut self, bits_set: usize) {
233 assert!(bits_set <= self.length);
234 let zeros = self.length - bits_set;
235 self.unset_bit_count_cache.store(zeros as u64);
236 }
237
238 #[inline]
243 pub fn slice(&mut self, offset: usize, length: usize) {
244 assert!(offset + length <= self.length);
245 unsafe { self.slice_unchecked(offset, length) }
246 }
247
248 #[inline]
253 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
254 if offset == 0 && length == self.length {
256 return;
257 }
258
259 let unset_bit_count_cache = self.unset_bit_count_cache.get_mut();
261 if *unset_bit_count_cache == 0 || *unset_bit_count_cache == self.length as u64 {
262 let new_count = if *unset_bit_count_cache > 0 {
263 length as u64
264 } else {
265 0
266 };
267 *unset_bit_count_cache = new_count;
268 self.offset += offset;
269 self.length = length;
270 return;
271 }
272
273 if has_cached_unset_bit_count(*unset_bit_count_cache) {
274 let small_portion = (self.length / 5).max(32);
278 if length + small_portion >= self.length {
279 let slice_end = self.offset + offset + length;
281 let head_count = count_zeros(&self.storage, self.offset, offset);
282 let tail_count =
283 count_zeros(&self.storage, slice_end, self.length - length - offset);
284 let new_count = *unset_bit_count_cache - head_count as u64 - tail_count as u64;
285 *unset_bit_count_cache = new_count;
286 } else {
287 *unset_bit_count_cache = UNKNOWN_BIT_COUNT;
288 }
289 }
290
291 self.offset += offset;
292 self.length = length;
293 }
294
295 #[inline]
300 #[must_use]
301 pub fn sliced(self, offset: usize, length: usize) -> Self {
302 assert!(offset + length <= self.length);
303 unsafe { self.sliced_unchecked(offset, length) }
304 }
305
306 #[inline]
311 #[must_use]
312 pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
313 self.slice_unchecked(offset, length);
314 self
315 }
316
317 #[inline]
321 pub fn get_bit(&self, i: usize) -> bool {
322 assert!(i < self.len());
323 unsafe { self.get_bit_unchecked(i) }
324 }
325
326 #[inline]
331 pub unsafe fn get_bit_unchecked(&self, i: usize) -> bool {
332 debug_assert!(i < self.len());
333 get_bit_unchecked(&self.storage, self.offset + i)
334 }
335
336 pub(crate) fn as_ptr(&self) -> *const u8 {
339 self.storage.deref().as_ptr()
340 }
341
342 pub(crate) fn offset(&self) -> usize {
345 self.offset
346 }
347
348 pub fn into_mut(mut self) -> Either<Self, MutableBitmap> {
356 match self.storage.try_into_vec() {
357 Ok(v) => Either::Right(MutableBitmap::from_vec(v, self.length)),
358 Err(storage) => {
359 self.storage = storage;
360 Either::Left(self)
361 },
362 }
363 }
364
365 pub fn make_mut(self) -> MutableBitmap {
368 match self.into_mut() {
369 Either::Left(data) => {
370 if data.offset > 0 {
371 let chunks = data.chunks::<u64>();
373 let remainder = chunks.remainder();
374 let vec = chunk_iter_to_vec(chunks.chain(std::iter::once(remainder)));
375 MutableBitmap::from_vec(vec, data.length)
376 } else {
377 let len = bytes_for(data.length);
378 MutableBitmap::from_vec(data.storage[0..len].to_vec(), data.length)
379 }
380 },
381 Either::Right(data) => data,
382 }
383 }
384
385 #[inline]
387 pub fn new_zeroed(length: usize) -> Self {
388 let bytes_needed = length.div_ceil(8);
389 let storage = Buffer::zeroed(bytes_needed).into_storage();
390 Self {
391 storage,
392 offset: 0,
393 length,
394 unset_bit_count_cache: RelaxedCell::from(length as u64),
395 }
396 }
397
398 #[inline]
400 pub fn new_with_value(value: bool, length: usize) -> Self {
401 if !value {
402 return Self::new_zeroed(length);
403 }
404
405 unsafe {
406 Bitmap::from_inner_unchecked(
407 SharedStorage::from_vec(vec![u8::MAX; length.saturating_add(7) / 8]),
408 0,
409 length,
410 Some(0),
411 )
412 }
413 }
414
415 #[inline]
417 pub fn null_count_range(&self, offset: usize, length: usize) -> usize {
418 count_zeros(&self.storage, self.offset + offset, length)
419 }
420
421 #[inline]
425 pub fn from_u8_slice<T: AsRef<[u8]>>(slice: T, length: usize) -> Self {
426 Bitmap::try_new(slice.as_ref().to_vec(), length).unwrap()
427 }
428
429 #[inline]
434 pub fn from_u8_vec(vec: Vec<u8>, length: usize) -> Self {
435 Bitmap::try_new(vec, length).unwrap()
436 }
437
438 #[inline]
440 pub fn get(&self, i: usize) -> Option<bool> {
441 if i < self.len() {
442 Some(unsafe { self.get_bit_unchecked(i) })
443 } else {
444 None
445 }
446 }
447
448 pub unsafe fn from_inner_unchecked(
454 storage: SharedStorage<u8>,
455 offset: usize,
456 length: usize,
457 unset_bits: Option<usize>,
458 ) -> Self {
459 debug_assert!(check(&storage[..], offset, length).is_ok());
460
461 let unset_bit_count_cache = if let Some(n) = unset_bits {
462 RelaxedCell::from(n as u64)
463 } else {
464 RelaxedCell::from(UNKNOWN_BIT_COUNT)
465 };
466 Self {
467 storage,
468 offset,
469 length,
470 unset_bit_count_cache,
471 }
472 }
473
474 pub fn intersects_with(&self, other: &Self) -> bool {
478 self.num_intersections_with(other) != 0
479 }
480
481 pub fn num_intersections_with(&self, other: &Self) -> usize {
483 num_intersections_with(
484 super::bitmask::BitMask::from_bitmap(self),
485 super::bitmask::BitMask::from_bitmap(other),
486 )
487 }
488
489 pub fn select(&self, truthy: &Self, falsy: &Self) -> Self {
495 super::bitmap_ops::select(self, truthy, falsy)
496 }
497
498 pub fn select_constant(&self, truthy: &Self, falsy: bool) -> Self {
504 super::bitmap_ops::select_constant(self, truthy, falsy)
505 }
506
507 pub fn num_edges(&self) -> usize {
509 super::bitmap_ops::num_edges(self)
510 }
511
512 pub fn leading_zeros(&self) -> usize {
514 utils::leading_zeros(&self.storage, self.offset, self.length)
515 }
516 pub fn leading_ones(&self) -> usize {
518 utils::leading_ones(&self.storage, self.offset, self.length)
519 }
520 pub fn trailing_zeros(&self) -> usize {
522 utils::trailing_zeros(&self.storage, self.offset, self.length)
523 }
524 pub fn trailing_ones(&self) -> usize {
526 utils::trailing_ones(&self.storage, self.offset, self.length)
527 }
528
529 pub fn take_leading_zeros(&mut self) -> usize {
532 if self
533 .lazy_unset_bits()
534 .is_some_and(|unset_bits| unset_bits == self.length)
535 {
536 let leading_zeros = self.length;
537 self.offset += self.length;
538 self.length = 0;
539 *self.unset_bit_count_cache.get_mut() = 0;
540 return leading_zeros;
541 }
542
543 let leading_zeros = self.leading_zeros();
544 self.offset += leading_zeros;
545 self.length -= leading_zeros;
546 if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
547 *self.unset_bit_count_cache.get_mut() -= leading_zeros as u64;
548 }
549 leading_zeros
550 }
551 pub fn take_leading_ones(&mut self) -> usize {
554 if self
555 .lazy_unset_bits()
556 .is_some_and(|unset_bits| unset_bits == 0)
557 {
558 let leading_ones = self.length;
559 self.offset += self.length;
560 self.length = 0;
561 *self.unset_bit_count_cache.get_mut() = 0;
562 return leading_ones;
563 }
564
565 let leading_ones = self.leading_ones();
566 self.offset += leading_ones;
567 self.length -= leading_ones;
568 leading_ones
570 }
571 pub fn take_trailing_zeros(&mut self) -> usize {
574 if self
575 .lazy_unset_bits()
576 .is_some_and(|unset_bits| unset_bits == self.length)
577 {
578 let trailing_zeros = self.length;
579 self.length = 0;
580 *self.unset_bit_count_cache.get_mut() = 0;
581 return trailing_zeros;
582 }
583
584 let trailing_zeros = self.trailing_zeros();
585 self.length -= trailing_zeros;
586 if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
587 *self.unset_bit_count_cache.get_mut() -= trailing_zeros as u64;
588 }
589 trailing_zeros
590 }
591 pub fn take_trailing_ones(&mut self) -> usize {
594 if self
595 .lazy_unset_bits()
596 .is_some_and(|unset_bits| unset_bits == 0)
597 {
598 let trailing_ones = self.length;
599 self.length = 0;
600 *self.unset_bit_count_cache.get_mut() = 0;
601 return trailing_ones;
602 }
603
604 let trailing_ones = self.trailing_ones();
605 self.length -= trailing_ones;
606 trailing_ones
608 }
609}
610
611impl<P: AsRef<[bool]>> From<P> for Bitmap {
612 fn from(slice: P) -> Self {
613 Self::from_trusted_len_iter(slice.as_ref().iter().copied())
614 }
615}
616
617impl FromIterator<bool> for Bitmap {
618 fn from_iter<I>(iter: I) -> Self
619 where
620 I: IntoIterator<Item = bool>,
621 {
622 MutableBitmap::from_iter(iter).into()
623 }
624}
625
626impl FromTrustedLenIterator<bool> for Bitmap {
627 fn from_iter_trusted_length<T: IntoIterator<Item = bool>>(iter: T) -> Self
628 where
629 T::IntoIter: TrustedLen,
630 {
631 MutableBitmap::from_trusted_len_iter(iter.into_iter()).into()
632 }
633}
634
635impl Bitmap {
636 #[inline]
641 pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(iterator: I) -> Self {
642 MutableBitmap::from_trusted_len_iter_unchecked(iterator).into()
643 }
644
645 #[inline]
647 pub fn from_trusted_len_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
648 MutableBitmap::from_trusted_len_iter(iterator).into()
649 }
650
651 #[inline]
653 pub fn try_from_trusted_len_iter<E, I: TrustedLen<Item = std::result::Result<bool, E>>>(
654 iterator: I,
655 ) -> std::result::Result<Self, E> {
656 Ok(MutableBitmap::try_from_trusted_len_iter(iterator)?.into())
657 }
658
659 #[inline]
664 pub unsafe fn try_from_trusted_len_iter_unchecked<
665 E,
666 I: Iterator<Item = std::result::Result<bool, E>>,
667 >(
668 iterator: I,
669 ) -> std::result::Result<Self, E> {
670 Ok(MutableBitmap::try_from_trusted_len_iter_unchecked(iterator)?.into())
671 }
672}
673
674impl<'a> IntoIterator for &'a Bitmap {
675 type Item = bool;
676 type IntoIter = BitmapIter<'a>;
677
678 fn into_iter(self) -> Self::IntoIter {
679 BitmapIter::<'a>::new(&self.storage, self.offset, self.length)
680 }
681}
682
683impl IntoIterator for Bitmap {
684 type Item = bool;
685 type IntoIter = IntoIter;
686
687 fn into_iter(self) -> Self::IntoIter {
688 IntoIter::new(self)
689 }
690}
691
692impl Splitable for Bitmap {
693 #[inline(always)]
694 fn check_bound(&self, offset: usize) -> bool {
695 offset <= self.len()
696 }
697
698 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
699 if offset == 0 {
700 return (Self::new(), self.clone());
701 }
702 if offset == self.len() {
703 return (self.clone(), Self::new());
704 }
705
706 let ubcc = self.unset_bit_count_cache.load();
707
708 let lhs_length = offset;
709 let rhs_length = self.length - offset;
710
711 let mut lhs_ubcc = UNKNOWN_BIT_COUNT;
712 let mut rhs_ubcc = UNKNOWN_BIT_COUNT;
713
714 if has_cached_unset_bit_count(ubcc) {
715 if ubcc == 0 {
716 lhs_ubcc = 0;
717 rhs_ubcc = 0;
718 } else if ubcc == self.length as u64 {
719 lhs_ubcc = offset as u64;
720 rhs_ubcc = (self.length - offset) as u64;
721 } else {
722 let small_portion = (self.length / 4).max(32);
726
727 if lhs_length <= rhs_length {
728 if rhs_length + small_portion >= self.length {
729 let count = count_zeros(&self.storage, self.offset, lhs_length) as u64;
730 lhs_ubcc = count;
731 rhs_ubcc = ubcc - count;
732 }
733 } else if lhs_length + small_portion >= self.length {
734 let count = count_zeros(&self.storage, self.offset + offset, rhs_length) as u64;
735 lhs_ubcc = ubcc - count;
736 rhs_ubcc = count;
737 }
738 }
739 }
740
741 debug_assert!(lhs_ubcc == UNKNOWN_BIT_COUNT || lhs_ubcc <= ubcc);
742 debug_assert!(rhs_ubcc == UNKNOWN_BIT_COUNT || rhs_ubcc <= ubcc);
743
744 (
745 Self {
746 storage: self.storage.clone(),
747 offset: self.offset,
748 length: lhs_length,
749 unset_bit_count_cache: RelaxedCell::from(lhs_ubcc),
750 },
751 Self {
752 storage: self.storage.clone(),
753 offset: self.offset + offset,
754 length: rhs_length,
755 unset_bit_count_cache: RelaxedCell::from(rhs_ubcc),
756 },
757 )
758 }
759}