1#![allow(unsafe_op_in_unsafe_fn)]
2use std::ops::Deref;
3use std::sync::LazyLock;
4
5use either::Either;
6use polars_error::{PolarsResult, polars_bail};
7use polars_utils::relaxed_cell::RelaxedCell;
8
9use super::utils::{self, BitChunk, BitChunks, BitmapIter, count_zeros, fmt, get_bit_unchecked};
10use super::{IntoIter, MutableBitmap, chunk_iter_to_vec, num_intersections_with};
11use crate::array::Splitable;
12use crate::bitmap::aligned::AlignedBitmapSlice;
13use crate::bitmap::iterator::{
14 FastU32BitmapIter, FastU56BitmapIter, FastU64BitmapIter, TrueIdxIter,
15};
16use crate::bitmap::utils::bytes_for;
17use crate::legacy::utils::FromTrustedLenIterator;
18use crate::storage::SharedStorage;
19use crate::trusted_len::TrustedLen;
20
21const UNKNOWN_BIT_COUNT: u64 = u64::MAX;
22
23#[derive(Default, Clone)]
56pub struct Bitmap {
57 storage: SharedStorage<u8>,
58 offset: usize,
61 length: usize,
62
63 unset_bit_count_cache: RelaxedCell<u64>,
68}
69
70#[inline(always)]
71fn has_cached_unset_bit_count(ubcc: u64) -> bool {
72 ubcc >> 63 == 0
73}
74
75impl std::fmt::Debug for Bitmap {
76 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77 let (bytes, offset, len) = self.as_slice();
78 fmt(bytes, offset, len, f)
79 }
80}
81
82pub(super) fn check(bytes: &[u8], offset: usize, length: usize) -> PolarsResult<()> {
83 if offset + length > bytes.len().saturating_mul(8) {
84 polars_bail!(InvalidOperation:
85 "The offset + length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
86 offset + length,
87 bytes.len().saturating_mul(8)
88 );
89 }
90 Ok(())
91}
92
93impl Bitmap {
94 #[inline]
96 pub fn new() -> Self {
97 Self::default()
98 }
99
100 #[inline]
104 pub fn try_new(bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
105 check(&bytes, 0, length)?;
106 Ok(Self {
107 storage: SharedStorage::from_vec(bytes),
108 length,
109 offset: 0,
110 unset_bit_count_cache: RelaxedCell::from(if length == 0 {
111 0
112 } else {
113 UNKNOWN_BIT_COUNT
114 }),
115 })
116 }
117
118 #[inline]
120 pub fn len(&self) -> usize {
121 self.length
122 }
123
124 #[inline]
126 pub fn is_empty(&self) -> bool {
127 self.len() == 0
128 }
129
130 pub fn iter(&self) -> BitmapIter<'_> {
132 BitmapIter::new(&self.storage, self.offset, self.length)
133 }
134
135 pub fn chunks<T: BitChunk>(&self) -> BitChunks<'_, T> {
139 BitChunks::new(&self.storage, self.offset, self.length)
140 }
141
142 pub fn fast_iter_u32(&self) -> FastU32BitmapIter<'_> {
145 FastU32BitmapIter::new(&self.storage, self.offset, self.length)
146 }
147
148 pub fn fast_iter_u56(&self) -> FastU56BitmapIter<'_> {
151 FastU56BitmapIter::new(&self.storage, self.offset, self.length)
152 }
153
154 pub fn fast_iter_u64(&self) -> FastU64BitmapIter<'_> {
157 FastU64BitmapIter::new(&self.storage, self.offset, self.length)
158 }
159
160 pub fn true_idx_iter(&self) -> TrueIdxIter<'_> {
162 TrueIdxIter::new(self.len(), Some(self))
163 }
164
165 pub fn aligned<T: BitChunk>(&self) -> AlignedBitmapSlice<'_, T> {
167 AlignedBitmapSlice::new(&self.storage, self.offset, self.length)
168 }
169
170 #[inline]
178 pub fn as_slice(&self) -> (&[u8], usize, usize) {
179 let start = self.offset / 8;
180 let len = (self.offset % 8 + self.length).saturating_add(7) / 8;
181 (
182 &self.storage[start..start + len],
183 self.offset % 8,
184 self.length,
185 )
186 }
187
188 #[inline]
192 pub fn set_bits(&self) -> usize {
193 self.length - self.unset_bits()
194 }
195
196 #[inline]
200 pub fn lazy_set_bits(&self) -> Option<usize> {
201 Some(self.length - self.lazy_unset_bits()?)
202 }
203
204 pub fn unset_bits(&self) -> usize {
213 self.lazy_unset_bits().unwrap_or_else(|| {
214 let zeros = count_zeros(&self.storage, self.offset, self.length);
215 self.unset_bit_count_cache.store(zeros as u64);
216 zeros
217 })
218 }
219
220 pub fn lazy_unset_bits(&self) -> Option<usize> {
224 let cache = self.unset_bit_count_cache.load();
225 has_cached_unset_bit_count(cache).then_some(cache as usize)
226 }
227
228 pub unsafe fn update_bit_count(&mut self, bits_set: usize) {
234 assert!(bits_set <= self.length);
235 let zeros = self.length - bits_set;
236 self.unset_bit_count_cache.store(zeros as u64);
237 }
238
239 #[inline]
244 pub fn slice(&mut self, offset: usize, length: usize) {
245 assert!(offset + length <= self.length);
246 unsafe { self.slice_unchecked(offset, length) }
247 }
248
249 #[inline]
254 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
255 if offset == 0 && length == self.length {
257 return;
258 }
259
260 let unset_bit_count_cache = self.unset_bit_count_cache.get_mut();
262 if *unset_bit_count_cache == 0 || *unset_bit_count_cache == self.length as u64 {
263 let new_count = if *unset_bit_count_cache > 0 {
264 length as u64
265 } else {
266 0
267 };
268 *unset_bit_count_cache = new_count;
269 self.offset += offset;
270 self.length = length;
271 return;
272 }
273
274 if has_cached_unset_bit_count(*unset_bit_count_cache) {
275 let small_portion = (self.length / 5).max(32);
279 if length + small_portion >= self.length {
280 let slice_end = self.offset + offset + length;
282 let head_count = count_zeros(&self.storage, self.offset, offset);
283 let tail_count =
284 count_zeros(&self.storage, slice_end, self.length - length - offset);
285 let new_count = *unset_bit_count_cache - head_count as u64 - tail_count as u64;
286 *unset_bit_count_cache = new_count;
287 } else {
288 *unset_bit_count_cache = UNKNOWN_BIT_COUNT;
289 }
290 }
291
292 self.offset += offset;
293 self.length = length;
294 }
295
296 #[inline]
301 #[must_use]
302 pub fn sliced(self, offset: usize, length: usize) -> Self {
303 assert!(offset + length <= self.length);
304 unsafe { self.sliced_unchecked(offset, length) }
305 }
306
307 #[inline]
312 #[must_use]
313 pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
314 self.slice_unchecked(offset, length);
315 self
316 }
317
318 #[inline]
322 pub fn get_bit(&self, i: usize) -> bool {
323 assert!(i < self.len());
324 unsafe { self.get_bit_unchecked(i) }
325 }
326
327 #[inline]
332 pub unsafe fn get_bit_unchecked(&self, i: usize) -> bool {
333 debug_assert!(i < self.len());
334 get_bit_unchecked(&self.storage, self.offset + i)
335 }
336
337 pub(crate) fn as_ptr(&self) -> *const u8 {
340 self.storage.deref().as_ptr()
341 }
342
343 pub(crate) fn offset(&self) -> usize {
346 self.offset
347 }
348
349 pub fn into_mut(mut self) -> Either<Self, MutableBitmap> {
357 match self.storage.try_into_vec() {
358 Ok(v) => Either::Right(MutableBitmap::from_vec(v, self.length)),
359 Err(storage) => {
360 self.storage = storage;
361 Either::Left(self)
362 },
363 }
364 }
365
366 pub fn make_mut(self) -> MutableBitmap {
369 match self.into_mut() {
370 Either::Left(data) => {
371 if data.offset > 0 {
372 let chunks = data.chunks::<u64>();
374 let remainder = chunks.remainder();
375 let vec = chunk_iter_to_vec(chunks.chain(std::iter::once(remainder)));
376 MutableBitmap::from_vec(vec, data.length)
377 } else {
378 let len = bytes_for(data.length);
379 MutableBitmap::from_vec(data.storage[0..len].to_vec(), data.length)
380 }
381 },
382 Either::Right(data) => data,
383 }
384 }
385
386 #[inline]
388 pub fn new_zeroed(length: usize) -> Self {
389 const GLOBAL_ZERO_SIZE: usize = 1024 * 1024;
392 static GLOBAL_ZEROES: LazyLock<SharedStorage<u8>> = LazyLock::new(|| {
393 let mut ss = SharedStorage::from_vec(vec![0; GLOBAL_ZERO_SIZE]);
394 ss.leak();
395 ss
396 });
397
398 let bytes_needed = length.div_ceil(8);
399 let storage = if bytes_needed <= GLOBAL_ZERO_SIZE {
400 GLOBAL_ZEROES.clone()
401 } else {
402 SharedStorage::from_vec(vec![0; bytes_needed])
403 };
404 Self {
405 storage,
406 offset: 0,
407 length,
408 unset_bit_count_cache: RelaxedCell::from(length as u64),
409 }
410 }
411
412 #[inline]
414 pub fn new_with_value(value: bool, length: usize) -> Self {
415 if !value {
416 return Self::new_zeroed(length);
417 }
418
419 unsafe {
420 Bitmap::from_inner_unchecked(
421 SharedStorage::from_vec(vec![u8::MAX; length.saturating_add(7) / 8]),
422 0,
423 length,
424 Some(0),
425 )
426 }
427 }
428
429 #[inline]
431 pub fn null_count_range(&self, offset: usize, length: usize) -> usize {
432 count_zeros(&self.storage, self.offset + offset, length)
433 }
434
435 #[inline]
439 pub fn from_u8_slice<T: AsRef<[u8]>>(slice: T, length: usize) -> Self {
440 Bitmap::try_new(slice.as_ref().to_vec(), length).unwrap()
441 }
442
443 #[inline]
448 pub fn from_u8_vec(vec: Vec<u8>, length: usize) -> Self {
449 Bitmap::try_new(vec, length).unwrap()
450 }
451
452 #[inline]
454 pub fn get(&self, i: usize) -> Option<bool> {
455 if i < self.len() {
456 Some(unsafe { self.get_bit_unchecked(i) })
457 } else {
458 None
459 }
460 }
461
462 pub unsafe fn from_inner_unchecked(
468 storage: SharedStorage<u8>,
469 offset: usize,
470 length: usize,
471 unset_bits: Option<usize>,
472 ) -> Self {
473 debug_assert!(check(&storage[..], offset, length).is_ok());
474
475 let unset_bit_count_cache = if let Some(n) = unset_bits {
476 RelaxedCell::from(n as u64)
477 } else {
478 RelaxedCell::from(UNKNOWN_BIT_COUNT)
479 };
480 Self {
481 storage,
482 offset,
483 length,
484 unset_bit_count_cache,
485 }
486 }
487
488 pub fn intersects_with(&self, other: &Self) -> bool {
492 self.num_intersections_with(other) != 0
493 }
494
495 pub fn num_intersections_with(&self, other: &Self) -> usize {
497 num_intersections_with(
498 super::bitmask::BitMask::from_bitmap(self),
499 super::bitmask::BitMask::from_bitmap(other),
500 )
501 }
502
503 pub fn select(&self, truthy: &Self, falsy: &Self) -> Self {
509 super::bitmap_ops::select(self, truthy, falsy)
510 }
511
512 pub fn select_constant(&self, truthy: &Self, falsy: bool) -> Self {
518 super::bitmap_ops::select_constant(self, truthy, falsy)
519 }
520
521 pub fn num_edges(&self) -> usize {
523 super::bitmap_ops::num_edges(self)
524 }
525
526 pub fn leading_zeros(&self) -> usize {
528 utils::leading_zeros(&self.storage, self.offset, self.length)
529 }
530 pub fn leading_ones(&self) -> usize {
532 utils::leading_ones(&self.storage, self.offset, self.length)
533 }
534 pub fn trailing_zeros(&self) -> usize {
536 utils::trailing_zeros(&self.storage, self.offset, self.length)
537 }
538 pub fn trailing_ones(&self) -> usize {
540 utils::trailing_ones(&self.storage, self.offset, self.length)
541 }
542
543 pub fn take_leading_zeros(&mut self) -> usize {
546 if self
547 .lazy_unset_bits()
548 .is_some_and(|unset_bits| unset_bits == self.length)
549 {
550 let leading_zeros = self.length;
551 self.offset += self.length;
552 self.length = 0;
553 *self.unset_bit_count_cache.get_mut() = 0;
554 return leading_zeros;
555 }
556
557 let leading_zeros = self.leading_zeros();
558 self.offset += leading_zeros;
559 self.length -= leading_zeros;
560 if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
561 *self.unset_bit_count_cache.get_mut() -= leading_zeros as u64;
562 }
563 leading_zeros
564 }
565 pub fn take_leading_ones(&mut self) -> usize {
568 if self
569 .lazy_unset_bits()
570 .is_some_and(|unset_bits| unset_bits == 0)
571 {
572 let leading_ones = self.length;
573 self.offset += self.length;
574 self.length = 0;
575 *self.unset_bit_count_cache.get_mut() = 0;
576 return leading_ones;
577 }
578
579 let leading_ones = self.leading_ones();
580 self.offset += leading_ones;
581 self.length -= leading_ones;
582 leading_ones
584 }
585 pub fn take_trailing_zeros(&mut self) -> usize {
588 if self
589 .lazy_unset_bits()
590 .is_some_and(|unset_bits| unset_bits == self.length)
591 {
592 let trailing_zeros = self.length;
593 self.length = 0;
594 *self.unset_bit_count_cache.get_mut() = 0;
595 return trailing_zeros;
596 }
597
598 let trailing_zeros = self.trailing_zeros();
599 self.length -= trailing_zeros;
600 if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
601 *self.unset_bit_count_cache.get_mut() -= trailing_zeros as u64;
602 }
603 trailing_zeros
604 }
605 pub fn take_trailing_ones(&mut self) -> usize {
608 if self
609 .lazy_unset_bits()
610 .is_some_and(|unset_bits| unset_bits == 0)
611 {
612 let trailing_ones = self.length;
613 self.length = 0;
614 *self.unset_bit_count_cache.get_mut() = 0;
615 return trailing_ones;
616 }
617
618 let trailing_ones = self.trailing_ones();
619 self.length -= trailing_ones;
620 trailing_ones
622 }
623}
624
625impl<P: AsRef<[bool]>> From<P> for Bitmap {
626 fn from(slice: P) -> Self {
627 Self::from_trusted_len_iter(slice.as_ref().iter().copied())
628 }
629}
630
631impl FromIterator<bool> for Bitmap {
632 fn from_iter<I>(iter: I) -> Self
633 where
634 I: IntoIterator<Item = bool>,
635 {
636 MutableBitmap::from_iter(iter).into()
637 }
638}
639
640impl FromTrustedLenIterator<bool> for Bitmap {
641 fn from_iter_trusted_length<T: IntoIterator<Item = bool>>(iter: T) -> Self
642 where
643 T::IntoIter: TrustedLen,
644 {
645 MutableBitmap::from_trusted_len_iter(iter.into_iter()).into()
646 }
647}
648
649impl Bitmap {
650 #[inline]
655 pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(iterator: I) -> Self {
656 MutableBitmap::from_trusted_len_iter_unchecked(iterator).into()
657 }
658
659 #[inline]
661 pub fn from_trusted_len_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
662 MutableBitmap::from_trusted_len_iter(iterator).into()
663 }
664
665 #[inline]
667 pub fn try_from_trusted_len_iter<E, I: TrustedLen<Item = std::result::Result<bool, E>>>(
668 iterator: I,
669 ) -> std::result::Result<Self, E> {
670 Ok(MutableBitmap::try_from_trusted_len_iter(iterator)?.into())
671 }
672
673 #[inline]
678 pub unsafe fn try_from_trusted_len_iter_unchecked<
679 E,
680 I: Iterator<Item = std::result::Result<bool, E>>,
681 >(
682 iterator: I,
683 ) -> std::result::Result<Self, E> {
684 Ok(MutableBitmap::try_from_trusted_len_iter_unchecked(iterator)?.into())
685 }
686}
687
688impl<'a> IntoIterator for &'a Bitmap {
689 type Item = bool;
690 type IntoIter = BitmapIter<'a>;
691
692 fn into_iter(self) -> Self::IntoIter {
693 BitmapIter::<'a>::new(&self.storage, self.offset, self.length)
694 }
695}
696
697impl IntoIterator for Bitmap {
698 type Item = bool;
699 type IntoIter = IntoIter;
700
701 fn into_iter(self) -> Self::IntoIter {
702 IntoIter::new(self)
703 }
704}
705
706impl Splitable for Bitmap {
707 #[inline(always)]
708 fn check_bound(&self, offset: usize) -> bool {
709 offset <= self.len()
710 }
711
712 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
713 if offset == 0 {
714 return (Self::new(), self.clone());
715 }
716 if offset == self.len() {
717 return (self.clone(), Self::new());
718 }
719
720 let ubcc = self.unset_bit_count_cache.load();
721
722 let lhs_length = offset;
723 let rhs_length = self.length - offset;
724
725 let mut lhs_ubcc = UNKNOWN_BIT_COUNT;
726 let mut rhs_ubcc = UNKNOWN_BIT_COUNT;
727
728 if has_cached_unset_bit_count(ubcc) {
729 if ubcc == 0 {
730 lhs_ubcc = 0;
731 rhs_ubcc = 0;
732 } else if ubcc == self.length as u64 {
733 lhs_ubcc = offset as u64;
734 rhs_ubcc = (self.length - offset) as u64;
735 } else {
736 let small_portion = (self.length / 4).max(32);
740
741 if lhs_length <= rhs_length {
742 if rhs_length + small_portion >= self.length {
743 let count = count_zeros(&self.storage, self.offset, lhs_length) as u64;
744 lhs_ubcc = count;
745 rhs_ubcc = ubcc - count;
746 }
747 } else if lhs_length + small_portion >= self.length {
748 let count = count_zeros(&self.storage, self.offset + offset, rhs_length) as u64;
749 lhs_ubcc = ubcc - count;
750 rhs_ubcc = count;
751 }
752 }
753 }
754
755 debug_assert!(lhs_ubcc == UNKNOWN_BIT_COUNT || lhs_ubcc <= ubcc);
756 debug_assert!(rhs_ubcc == UNKNOWN_BIT_COUNT || rhs_ubcc <= ubcc);
757
758 (
759 Self {
760 storage: self.storage.clone(),
761 offset: self.offset,
762 length: lhs_length,
763 unset_bit_count_cache: RelaxedCell::from(lhs_ubcc),
764 },
765 Self {
766 storage: self.storage.clone(),
767 offset: self.offset + offset,
768 length: rhs_length,
769 unset_bit_count_cache: RelaxedCell::from(rhs_ubcc),
770 },
771 )
772 }
773}