1#![allow(unsafe_op_in_unsafe_fn)]
2use std::ops::Deref;
3
4use either::Either;
5use polars_buffer::{Buffer, SharedStorage};
6use polars_error::{PolarsResult, polars_bail};
7use polars_utils::relaxed_cell::RelaxedCell;
8
9use super::utils::{self, BitChunk, BitChunks, BitmapIter, count_zeros, fmt, get_bit_unchecked};
10use super::{IntoIter, MutableBitmap, chunk_iter_to_vec, num_intersections_with};
11use crate::array::Splitable;
12use crate::bitmap::BitmapBuilder;
13use crate::bitmap::aligned::AlignedBitmapSlice;
14use crate::bitmap::iterator::{
15 FastU32BitmapIter, FastU56BitmapIter, FastU64BitmapIter, TrueIdxIter,
16};
17use crate::bitmap::utils::bytes_for;
18use crate::legacy::utils::FromTrustedLenIterator;
19use crate::trusted_len::TrustedLen;
20
21const UNKNOWN_BIT_COUNT: u64 = u64::MAX;
22
23#[derive(Default, Clone)]
56pub struct Bitmap {
57 storage: SharedStorage<u8>,
58 offset: usize,
61 length: usize,
62
63 unset_bit_count_cache: RelaxedCell<u64>,
68}
69
70#[inline(always)]
71fn has_cached_unset_bit_count(ubcc: u64) -> bool {
72 ubcc >> 63 == 0
73}
74
75impl std::fmt::Debug for Bitmap {
76 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77 let (bytes, offset, len) = self.as_slice();
78 fmt(bytes, offset, len, f)
79 }
80}
81
82pub(super) fn check(bytes: &[u8], offset: usize, length: usize) -> PolarsResult<()> {
83 if offset + length > bytes.len().saturating_mul(8) {
84 polars_bail!(InvalidOperation:
85 "The offset + length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
86 offset + length,
87 bytes.len().saturating_mul(8)
88 );
89 }
90 Ok(())
91}
92
93impl Bitmap {
94 #[inline]
96 pub fn new() -> Self {
97 Self::default()
98 }
99
100 #[inline]
104 pub fn try_new(bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
105 check(&bytes, 0, length)?;
106 Ok(Self {
107 storage: SharedStorage::from_vec(bytes),
108 length,
109 offset: 0,
110 unset_bit_count_cache: RelaxedCell::from(if length == 0 {
111 0
112 } else {
113 UNKNOWN_BIT_COUNT
114 }),
115 })
116 }
117
118 #[inline]
120 pub fn len(&self) -> usize {
121 self.length
122 }
123
124 #[inline]
126 pub fn is_empty(&self) -> bool {
127 self.len() == 0
128 }
129
130 pub fn iter(&self) -> BitmapIter<'_> {
132 BitmapIter::new(&self.storage, self.offset, self.length)
133 }
134
135 pub fn chunks<T: BitChunk>(&self) -> BitChunks<'_, T> {
139 BitChunks::new(&self.storage, self.offset, self.length)
140 }
141
142 pub fn fast_iter_u32(&self) -> FastU32BitmapIter<'_> {
145 FastU32BitmapIter::new(&self.storage, self.offset, self.length)
146 }
147
148 pub fn fast_iter_u56(&self) -> FastU56BitmapIter<'_> {
151 FastU56BitmapIter::new(&self.storage, self.offset, self.length)
152 }
153
154 pub fn fast_iter_u64(&self) -> FastU64BitmapIter<'_> {
157 FastU64BitmapIter::new(&self.storage, self.offset, self.length)
158 }
159
160 pub fn true_idx_iter(&self) -> TrueIdxIter<'_> {
162 TrueIdxIter::new(self.len(), Some(self))
163 }
164
165 pub fn aligned<T: BitChunk>(&self) -> AlignedBitmapSlice<'_, T> {
167 AlignedBitmapSlice::new(&self.storage, self.offset, self.length)
168 }
169
170 #[inline]
178 pub fn as_slice(&self) -> (&[u8], usize, usize) {
179 let start = self.offset / 8;
180 let len = (self.offset % 8 + self.length).saturating_add(7) / 8;
181 (
182 &self.storage[start..start + len],
183 self.offset % 8,
184 self.length,
185 )
186 }
187
188 #[inline]
192 pub fn set_bits(&self) -> usize {
193 self.length - self.unset_bits()
194 }
195
196 #[inline]
200 pub fn lazy_set_bits(&self) -> Option<usize> {
201 Some(self.length - self.lazy_unset_bits()?)
202 }
203
204 pub fn unset_bits(&self) -> usize {
213 self.lazy_unset_bits().unwrap_or_else(|| {
214 let zeros = count_zeros(&self.storage, self.offset, self.length);
215 self.unset_bit_count_cache.store(zeros as u64);
216 zeros
217 })
218 }
219
220 pub fn lazy_unset_bits(&self) -> Option<usize> {
224 let cache = self.unset_bit_count_cache.load();
225 has_cached_unset_bit_count(cache).then_some(cache as usize)
226 }
227
228 pub unsafe fn update_bit_count(&mut self, bits_set: usize) {
234 assert!(bits_set <= self.length);
235 let zeros = self.length - bits_set;
236 self.unset_bit_count_cache.store(zeros as u64);
237 }
238
239 #[inline]
244 pub fn slice(&mut self, offset: usize, length: usize) {
245 assert!(offset + length <= self.length);
246 unsafe { self.slice_unchecked(offset, length) }
247 }
248
249 #[inline]
254 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
255 if offset == 0 && length == self.length {
257 return;
258 }
259
260 let unset_bit_count_cache = self.unset_bit_count_cache.get_mut();
262 if *unset_bit_count_cache == 0 || *unset_bit_count_cache == self.length as u64 {
263 let new_count = if *unset_bit_count_cache > 0 {
264 length as u64
265 } else {
266 0
267 };
268 *unset_bit_count_cache = new_count;
269 self.offset += offset;
270 self.length = length;
271 return;
272 }
273
274 if has_cached_unset_bit_count(*unset_bit_count_cache) {
275 let small_portion = (self.length / 5).max(32);
279 if length + small_portion >= self.length {
280 let slice_end = self.offset + offset + length;
282 let head_count = count_zeros(&self.storage, self.offset, offset);
283 let tail_count =
284 count_zeros(&self.storage, slice_end, self.length - length - offset);
285 let new_count = *unset_bit_count_cache - head_count as u64 - tail_count as u64;
286 *unset_bit_count_cache = new_count;
287 } else {
288 *unset_bit_count_cache = UNKNOWN_BIT_COUNT;
289 }
290 }
291
292 self.offset += offset;
293 self.length = length;
294 }
295
296 #[inline]
301 #[must_use]
302 pub fn sliced(self, offset: usize, length: usize) -> Self {
303 assert!(offset + length <= self.length);
304 unsafe { self.sliced_unchecked(offset, length) }
305 }
306
307 #[inline]
312 #[must_use]
313 pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
314 self.slice_unchecked(offset, length);
315 self
316 }
317
318 #[inline]
322 pub fn get_bit(&self, i: usize) -> bool {
323 assert!(i < self.len());
324 unsafe { self.get_bit_unchecked(i) }
325 }
326
327 #[inline]
332 pub unsafe fn get_bit_unchecked(&self, i: usize) -> bool {
333 debug_assert!(i < self.len());
334 get_bit_unchecked(&self.storage, self.offset + i)
335 }
336
337 pub(crate) fn as_ptr(&self) -> *const u8 {
340 self.storage.deref().as_ptr()
341 }
342
343 pub(crate) fn offset(&self) -> usize {
346 self.offset
347 }
348
349 pub fn into_mut(mut self) -> Either<Self, MutableBitmap> {
357 match self.storage.try_into_vec() {
358 Ok(v) => Either::Right(MutableBitmap::from_vec(v, self.length)),
359 Err(storage) => {
360 self.storage = storage;
361 Either::Left(self)
362 },
363 }
364 }
365
366 pub fn make_mut(self) -> MutableBitmap {
369 match self.into_mut() {
370 Either::Left(data) => {
371 if data.offset > 0 {
372 let chunks = data.chunks::<u64>();
374 let remainder = chunks.remainder();
375 let vec = chunk_iter_to_vec(chunks.chain(std::iter::once(remainder)));
376 MutableBitmap::from_vec(vec, data.length)
377 } else {
378 let len = bytes_for(data.length);
379 MutableBitmap::from_vec(data.storage[0..len].to_vec(), data.length)
380 }
381 },
382 Either::Right(data) => data,
383 }
384 }
385
386 #[inline]
388 pub fn new_zeroed(length: usize) -> Self {
389 let bytes_needed = length.div_ceil(8);
390 let storage = Buffer::zeroed(bytes_needed).into_storage();
391 Self {
392 storage,
393 offset: 0,
394 length,
395 unset_bit_count_cache: RelaxedCell::from(length as u64),
396 }
397 }
398
399 #[inline]
401 pub fn new_with_value(value: bool, length: usize) -> Self {
402 if !value {
403 return Self::new_zeroed(length);
404 }
405
406 unsafe {
407 Bitmap::from_inner_unchecked(
408 SharedStorage::from_vec(vec![u8::MAX; length.saturating_add(7) / 8]),
409 0,
410 length,
411 Some(0),
412 )
413 }
414 }
415
416 #[inline]
418 pub fn null_count_range(&self, offset: usize, length: usize) -> usize {
419 count_zeros(&self.storage, self.offset + offset, length)
420 }
421
422 #[inline]
426 pub fn from_u8_slice<T: AsRef<[u8]>>(slice: T, length: usize) -> Self {
427 Bitmap::try_new(slice.as_ref().to_vec(), length).unwrap()
428 }
429
430 #[inline]
435 pub fn from_u8_vec(vec: Vec<u8>, length: usize) -> Self {
436 Bitmap::try_new(vec, length).unwrap()
437 }
438
439 #[inline]
441 pub fn get(&self, i: usize) -> Option<bool> {
442 if i < self.len() {
443 Some(unsafe { self.get_bit_unchecked(i) })
444 } else {
445 None
446 }
447 }
448
449 pub unsafe fn from_inner_unchecked(
455 storage: SharedStorage<u8>,
456 offset: usize,
457 length: usize,
458 unset_bits: Option<usize>,
459 ) -> Self {
460 debug_assert!(check(&storage[..], offset, length).is_ok());
461
462 let unset_bit_count_cache = if let Some(n) = unset_bits {
463 RelaxedCell::from(n as u64)
464 } else {
465 RelaxedCell::from(UNKNOWN_BIT_COUNT)
466 };
467 Self {
468 storage,
469 offset,
470 length,
471 unset_bit_count_cache,
472 }
473 }
474
475 pub fn intersects_with(&self, other: &Self) -> bool {
479 self.num_intersections_with(other) != 0
480 }
481
482 pub fn num_intersections_with(&self, other: &Self) -> usize {
484 num_intersections_with(
485 super::bitmask::BitMask::from_bitmap(self),
486 super::bitmask::BitMask::from_bitmap(other),
487 )
488 }
489
490 pub fn select(&self, truthy: &Self, falsy: &Self) -> Self {
496 super::bitmap_ops::select(self, truthy, falsy)
497 }
498
499 pub fn select_constant(&self, truthy: &Self, falsy: bool) -> Self {
505 super::bitmap_ops::select_constant(self, truthy, falsy)
506 }
507
508 pub fn num_edges(&self) -> usize {
510 super::bitmap_ops::num_edges(self)
511 }
512
513 pub fn leading_zeros(&self) -> usize {
515 utils::leading_zeros(&self.storage, self.offset, self.length)
516 }
517 pub fn leading_ones(&self) -> usize {
519 utils::leading_ones(&self.storage, self.offset, self.length)
520 }
521 pub fn trailing_zeros(&self) -> usize {
523 utils::trailing_zeros(&self.storage, self.offset, self.length)
524 }
525 pub fn trailing_ones(&self) -> usize {
527 utils::trailing_ones(&self.storage, self.offset, self.length)
528 }
529
530 pub fn take_leading_zeros(&mut self) -> usize {
533 if self
534 .lazy_unset_bits()
535 .is_some_and(|unset_bits| unset_bits == self.length)
536 {
537 let leading_zeros = self.length;
538 self.offset += self.length;
539 self.length = 0;
540 *self.unset_bit_count_cache.get_mut() = 0;
541 return leading_zeros;
542 }
543
544 let leading_zeros = self.leading_zeros();
545 self.offset += leading_zeros;
546 self.length -= leading_zeros;
547 if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
548 *self.unset_bit_count_cache.get_mut() -= leading_zeros as u64;
549 }
550 leading_zeros
551 }
552 pub fn take_leading_ones(&mut self) -> usize {
555 if self
556 .lazy_unset_bits()
557 .is_some_and(|unset_bits| unset_bits == 0)
558 {
559 let leading_ones = self.length;
560 self.offset += self.length;
561 self.length = 0;
562 *self.unset_bit_count_cache.get_mut() = 0;
563 return leading_ones;
564 }
565
566 let leading_ones = self.leading_ones();
567 self.offset += leading_ones;
568 self.length -= leading_ones;
569 leading_ones
571 }
572 pub fn take_trailing_zeros(&mut self) -> usize {
575 if self
576 .lazy_unset_bits()
577 .is_some_and(|unset_bits| unset_bits == self.length)
578 {
579 let trailing_zeros = self.length;
580 self.length = 0;
581 *self.unset_bit_count_cache.get_mut() = 0;
582 return trailing_zeros;
583 }
584
585 let trailing_zeros = self.trailing_zeros();
586 self.length -= trailing_zeros;
587 if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
588 *self.unset_bit_count_cache.get_mut() -= trailing_zeros as u64;
589 }
590 trailing_zeros
591 }
592 pub fn take_trailing_ones(&mut self) -> usize {
595 if self
596 .lazy_unset_bits()
597 .is_some_and(|unset_bits| unset_bits == 0)
598 {
599 let trailing_ones = self.length;
600 self.length = 0;
601 *self.unset_bit_count_cache.get_mut() = 0;
602 return trailing_ones;
603 }
604
605 let trailing_ones = self.trailing_ones();
606 self.length -= trailing_ones;
607 trailing_ones
609 }
610}
611
612impl<P: AsRef<[bool]>> From<P> for Bitmap {
613 fn from(slice: P) -> Self {
614 Self::from_trusted_len_iter(slice.as_ref().iter().copied())
615 }
616}
617
618impl FromIterator<bool> for Bitmap {
619 fn from_iter<I>(iter: I) -> Self
620 where
621 I: IntoIterator<Item = bool>,
622 {
623 MutableBitmap::from_iter(iter).into()
624 }
625}
626
627impl FromTrustedLenIterator<bool> for Bitmap {
628 fn from_iter_trusted_length<T: IntoIterator<Item = bool>>(iter: T) -> Self
629 where
630 T::IntoIter: TrustedLen,
631 {
632 MutableBitmap::from_trusted_len_iter(iter.into_iter()).into()
633 }
634}
635
636impl Bitmap {
637 pub fn opt_from_iter<I: Iterator<Item = bool>>(mut iterator: I) -> Option<Self> {
639 let mut num_true = 0;
640 loop {
641 match iterator.next() {
642 Some(true) => num_true += 1,
643 Some(false) => break,
644 None => return None, }
646 }
647
648 let mut bm = BitmapBuilder::with_capacity(num_true + 1 + iterator.size_hint().0);
649 bm.extend_constant(num_true, true);
650 bm.push(false);
651 for x in iterator {
652 bm.push(x);
653 }
654 bm.into_opt_validity()
655 }
656
657 #[inline]
662 pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(iterator: I) -> Self {
663 MutableBitmap::from_trusted_len_iter_unchecked(iterator).into()
664 }
665
666 #[inline]
668 pub fn from_trusted_len_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
669 MutableBitmap::from_trusted_len_iter(iterator).into()
670 }
671
672 #[inline]
674 pub fn try_from_trusted_len_iter<E, I: TrustedLen<Item = std::result::Result<bool, E>>>(
675 iterator: I,
676 ) -> std::result::Result<Self, E> {
677 Ok(MutableBitmap::try_from_trusted_len_iter(iterator)?.into())
678 }
679
680 #[inline]
685 pub unsafe fn try_from_trusted_len_iter_unchecked<
686 E,
687 I: Iterator<Item = std::result::Result<bool, E>>,
688 >(
689 iterator: I,
690 ) -> std::result::Result<Self, E> {
691 Ok(MutableBitmap::try_from_trusted_len_iter_unchecked(iterator)?.into())
692 }
693}
694
695impl<'a> IntoIterator for &'a Bitmap {
696 type Item = bool;
697 type IntoIter = BitmapIter<'a>;
698
699 fn into_iter(self) -> Self::IntoIter {
700 BitmapIter::<'a>::new(&self.storage, self.offset, self.length)
701 }
702}
703
704impl IntoIterator for Bitmap {
705 type Item = bool;
706 type IntoIter = IntoIter;
707
708 fn into_iter(self) -> Self::IntoIter {
709 IntoIter::new(self)
710 }
711}
712
713impl Splitable for Bitmap {
714 #[inline(always)]
715 fn check_bound(&self, offset: usize) -> bool {
716 offset <= self.len()
717 }
718
719 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
720 if offset == 0 {
721 return (Self::new(), self.clone());
722 }
723 if offset == self.len() {
724 return (self.clone(), Self::new());
725 }
726
727 let ubcc = self.unset_bit_count_cache.load();
728
729 let lhs_length = offset;
730 let rhs_length = self.length - offset;
731
732 let mut lhs_ubcc = UNKNOWN_BIT_COUNT;
733 let mut rhs_ubcc = UNKNOWN_BIT_COUNT;
734
735 if has_cached_unset_bit_count(ubcc) {
736 if ubcc == 0 {
737 lhs_ubcc = 0;
738 rhs_ubcc = 0;
739 } else if ubcc == self.length as u64 {
740 lhs_ubcc = offset as u64;
741 rhs_ubcc = (self.length - offset) as u64;
742 } else {
743 let small_portion = (self.length / 4).max(32);
747
748 if lhs_length <= rhs_length {
749 if rhs_length + small_portion >= self.length {
750 let count = count_zeros(&self.storage, self.offset, lhs_length) as u64;
751 lhs_ubcc = count;
752 rhs_ubcc = ubcc - count;
753 }
754 } else if lhs_length + small_portion >= self.length {
755 let count = count_zeros(&self.storage, self.offset + offset, rhs_length) as u64;
756 lhs_ubcc = ubcc - count;
757 rhs_ubcc = count;
758 }
759 }
760 }
761
762 debug_assert!(lhs_ubcc == UNKNOWN_BIT_COUNT || lhs_ubcc <= ubcc);
763 debug_assert!(rhs_ubcc == UNKNOWN_BIT_COUNT || rhs_ubcc <= ubcc);
764
765 (
766 Self {
767 storage: self.storage.clone(),
768 offset: self.offset,
769 length: lhs_length,
770 unset_bit_count_cache: RelaxedCell::from(lhs_ubcc),
771 },
772 Self {
773 storage: self.storage.clone(),
774 offset: self.offset + offset,
775 length: rhs_length,
776 unset_bit_count_cache: RelaxedCell::from(rhs_ubcc),
777 },
778 )
779 }
780}