1use std::ops::Deref;
2use std::sync::atomic::{AtomicU64, Ordering};
3use std::sync::LazyLock;
4
5use either::Either;
6use polars_error::{polars_bail, PolarsResult};
7
8use super::utils::{self, count_zeros, fmt, get_bit_unchecked, BitChunk, BitChunks, BitmapIter};
9use super::{chunk_iter_to_vec, intersects_with, num_intersections_with, IntoIter, MutableBitmap};
10use crate::array::Splitable;
11use crate::bitmap::aligned::AlignedBitmapSlice;
12use crate::bitmap::iterator::{
13 FastU32BitmapIter, FastU56BitmapIter, FastU64BitmapIter, TrueIdxIter,
14};
15use crate::legacy::utils::FromTrustedLenIterator;
16use crate::storage::SharedStorage;
17use crate::trusted_len::TrustedLen;
18
19const UNKNOWN_BIT_COUNT: u64 = u64::MAX;
20
21pub struct Bitmap {
54 storage: SharedStorage<u8>,
55 offset: usize,
58 length: usize,
59
60 unset_bit_count_cache: AtomicU64,
65}
66
67#[inline(always)]
68fn has_cached_unset_bit_count(ubcc: u64) -> bool {
69 ubcc >> 63 == 0
70}
71
72impl Clone for Bitmap {
73 fn clone(&self) -> Self {
74 Self {
75 storage: self.storage.clone(),
76 offset: self.offset,
77 length: self.length,
78 unset_bit_count_cache: AtomicU64::new(
79 self.unset_bit_count_cache.load(Ordering::Relaxed),
80 ),
81 }
82 }
83}
84
85impl std::fmt::Debug for Bitmap {
86 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87 let (bytes, offset, len) = self.as_slice();
88 fmt(bytes, offset, len, f)
89 }
90}
91
92impl Default for Bitmap {
93 fn default() -> Self {
94 MutableBitmap::new().into()
95 }
96}
97
98pub(super) fn check(bytes: &[u8], offset: usize, length: usize) -> PolarsResult<()> {
99 if offset + length > bytes.len().saturating_mul(8) {
100 polars_bail!(InvalidOperation:
101 "The offset + length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
102 offset + length,
103 bytes.len().saturating_mul(8)
104 );
105 }
106 Ok(())
107}
108
109impl Bitmap {
110 #[inline]
112 pub fn new() -> Self {
113 Self::default()
114 }
115
116 #[inline]
120 pub fn try_new(bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
121 check(&bytes, 0, length)?;
122 Ok(Self {
123 storage: SharedStorage::from_vec(bytes),
124 length,
125 offset: 0,
126 unset_bit_count_cache: AtomicU64::new(if length == 0 { 0 } else { UNKNOWN_BIT_COUNT }),
127 })
128 }
129
130 #[inline]
132 pub fn len(&self) -> usize {
133 self.length
134 }
135
136 #[inline]
138 pub fn is_empty(&self) -> bool {
139 self.len() == 0
140 }
141
142 pub fn iter(&self) -> BitmapIter {
144 BitmapIter::new(&self.storage, self.offset, self.length)
145 }
146
147 pub fn chunks<T: BitChunk>(&self) -> BitChunks<T> {
151 BitChunks::new(&self.storage, self.offset, self.length)
152 }
153
154 pub fn fast_iter_u32(&self) -> FastU32BitmapIter<'_> {
157 FastU32BitmapIter::new(&self.storage, self.offset, self.length)
158 }
159
160 pub fn fast_iter_u56(&self) -> FastU56BitmapIter<'_> {
163 FastU56BitmapIter::new(&self.storage, self.offset, self.length)
164 }
165
166 pub fn fast_iter_u64(&self) -> FastU64BitmapIter<'_> {
169 FastU64BitmapIter::new(&self.storage, self.offset, self.length)
170 }
171
172 pub fn true_idx_iter(&self) -> TrueIdxIter<'_> {
174 TrueIdxIter::new(self.len(), Some(self))
175 }
176
177 pub fn aligned<T: BitChunk>(&self) -> AlignedBitmapSlice<'_, T> {
179 AlignedBitmapSlice::new(&self.storage, self.offset, self.length)
180 }
181
182 #[inline]
190 pub fn as_slice(&self) -> (&[u8], usize, usize) {
191 let start = self.offset / 8;
192 let len = (self.offset % 8 + self.length).saturating_add(7) / 8;
193 (
194 &self.storage[start..start + len],
195 self.offset % 8,
196 self.length,
197 )
198 }
199
200 #[inline]
204 pub fn set_bits(&self) -> usize {
205 self.length - self.unset_bits()
206 }
207
208 #[inline]
212 pub fn lazy_set_bits(&self) -> Option<usize> {
213 Some(self.length - self.lazy_unset_bits()?)
214 }
215
216 pub fn unset_bits(&self) -> usize {
225 self.lazy_unset_bits().unwrap_or_else(|| {
226 let zeros = count_zeros(&self.storage, self.offset, self.length);
227 self.unset_bit_count_cache
228 .store(zeros as u64, Ordering::Relaxed);
229 zeros
230 })
231 }
232
233 pub fn lazy_unset_bits(&self) -> Option<usize> {
237 let cache = self.unset_bit_count_cache.load(Ordering::Relaxed);
238 has_cached_unset_bit_count(cache).then_some(cache as usize)
239 }
240
241 pub unsafe fn update_bit_count(&mut self, bits_set: usize) {
247 assert!(bits_set <= self.length);
248 let zeros = self.length - bits_set;
249 self.unset_bit_count_cache
250 .store(zeros as u64, Ordering::Relaxed);
251 }
252
253 #[inline]
258 pub fn slice(&mut self, offset: usize, length: usize) {
259 assert!(offset + length <= self.length);
260 unsafe { self.slice_unchecked(offset, length) }
261 }
262
263 #[inline]
268 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
269 if offset == 0 && length == self.length {
271 return;
272 }
273
274 let unset_bit_count_cache = self.unset_bit_count_cache.get_mut();
276 if *unset_bit_count_cache == 0 || *unset_bit_count_cache == self.length as u64 {
277 let new_count = if *unset_bit_count_cache > 0 {
278 length as u64
279 } else {
280 0
281 };
282 *unset_bit_count_cache = new_count;
283 self.offset += offset;
284 self.length = length;
285 return;
286 }
287
288 if has_cached_unset_bit_count(*unset_bit_count_cache) {
289 let small_portion = (self.length / 5).max(32);
293 if length + small_portion >= self.length {
294 let slice_end = self.offset + offset + length;
296 let head_count = count_zeros(&self.storage, self.offset, offset);
297 let tail_count =
298 count_zeros(&self.storage, slice_end, self.length - length - offset);
299 let new_count = *unset_bit_count_cache - head_count as u64 - tail_count as u64;
300 *unset_bit_count_cache = new_count;
301 } else {
302 *unset_bit_count_cache = UNKNOWN_BIT_COUNT;
303 }
304 }
305
306 self.offset += offset;
307 self.length = length;
308 }
309
310 #[inline]
315 #[must_use]
316 pub fn sliced(self, offset: usize, length: usize) -> Self {
317 assert!(offset + length <= self.length);
318 unsafe { self.sliced_unchecked(offset, length) }
319 }
320
321 #[inline]
326 #[must_use]
327 pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
328 self.slice_unchecked(offset, length);
329 self
330 }
331
332 #[inline]
336 pub fn get_bit(&self, i: usize) -> bool {
337 assert!(i < self.len());
338 unsafe { self.get_bit_unchecked(i) }
339 }
340
341 #[inline]
346 pub unsafe fn get_bit_unchecked(&self, i: usize) -> bool {
347 debug_assert!(i < self.len());
348 get_bit_unchecked(&self.storage, self.offset + i)
349 }
350
351 pub(crate) fn as_ptr(&self) -> *const u8 {
354 self.storage.deref().as_ptr()
355 }
356
357 pub(crate) fn offset(&self) -> usize {
360 self.offset
361 }
362
363 pub fn into_mut(mut self) -> Either<Self, MutableBitmap> {
371 match self.storage.try_into_vec() {
372 Ok(v) => Either::Right(MutableBitmap::from_vec(v, self.length)),
373 Err(storage) => {
374 self.storage = storage;
375 Either::Left(self)
376 },
377 }
378 }
379
380 pub fn make_mut(self) -> MutableBitmap {
383 match self.into_mut() {
384 Either::Left(data) => {
385 if data.offset > 0 {
386 let chunks = data.chunks::<u64>();
388 let remainder = chunks.remainder();
389 let vec = chunk_iter_to_vec(chunks.chain(std::iter::once(remainder)));
390 MutableBitmap::from_vec(vec, data.length)
391 } else {
392 MutableBitmap::from_vec(data.storage.as_ref().to_vec(), data.length)
393 }
394 },
395 Either::Right(data) => data,
396 }
397 }
398
399 #[inline]
401 pub fn new_zeroed(length: usize) -> Self {
402 const GLOBAL_ZERO_SIZE: usize = 1024 * 1024;
405 static GLOBAL_ZEROES: LazyLock<SharedStorage<u8>> =
406 LazyLock::new(|| SharedStorage::from_static(vec![0; GLOBAL_ZERO_SIZE].leak()));
407
408 let bytes_needed = length.div_ceil(8);
409 let storage = if bytes_needed <= GLOBAL_ZERO_SIZE {
410 GLOBAL_ZEROES.clone()
411 } else {
412 SharedStorage::from_vec(vec![0; bytes_needed])
413 };
414 Self {
415 storage,
416 offset: 0,
417 length,
418 unset_bit_count_cache: AtomicU64::new(length as u64),
419 }
420 }
421
422 #[inline]
424 pub fn new_with_value(value: bool, length: usize) -> Self {
425 if !value {
426 return Self::new_zeroed(length);
427 }
428
429 unsafe {
430 Bitmap::from_inner_unchecked(
431 SharedStorage::from_vec(vec![u8::MAX; length.saturating_add(7) / 8]),
432 0,
433 length,
434 Some(0),
435 )
436 }
437 }
438
439 #[inline]
441 pub fn null_count_range(&self, offset: usize, length: usize) -> usize {
442 count_zeros(&self.storage, self.offset + offset, length)
443 }
444
445 #[inline]
449 pub fn from_u8_slice<T: AsRef<[u8]>>(slice: T, length: usize) -> Self {
450 Bitmap::try_new(slice.as_ref().to_vec(), length).unwrap()
451 }
452
453 #[inline]
458 pub fn from_u8_vec(vec: Vec<u8>, length: usize) -> Self {
459 Bitmap::try_new(vec, length).unwrap()
460 }
461
462 #[inline]
464 pub fn get(&self, i: usize) -> Option<bool> {
465 if i < self.len() {
466 Some(unsafe { self.get_bit_unchecked(i) })
467 } else {
468 None
469 }
470 }
471
472 pub unsafe fn from_inner_unchecked(
478 storage: SharedStorage<u8>,
479 offset: usize,
480 length: usize,
481 unset_bits: Option<usize>,
482 ) -> Self {
483 debug_assert!(check(&storage[..], offset, length).is_ok());
484
485 let unset_bit_count_cache = if let Some(n) = unset_bits {
486 AtomicU64::new(n as u64)
487 } else {
488 AtomicU64::new(UNKNOWN_BIT_COUNT)
489 };
490 Self {
491 storage,
492 offset,
493 length,
494 unset_bit_count_cache,
495 }
496 }
497
498 pub fn intersects_with(&self, other: &Self) -> bool {
502 intersects_with(self, other)
503 }
504
505 pub fn num_intersections_with(&self, other: &Self) -> usize {
507 num_intersections_with(self, other)
508 }
509
510 pub fn select(&self, truthy: &Self, falsy: &Self) -> Self {
516 super::bitmap_ops::select(self, truthy, falsy)
517 }
518
519 pub fn select_constant(&self, truthy: &Self, falsy: bool) -> Self {
525 super::bitmap_ops::select_constant(self, truthy, falsy)
526 }
527
528 pub fn num_edges(&self) -> usize {
530 super::bitmap_ops::num_edges(self)
531 }
532
533 pub fn leading_zeros(&self) -> usize {
535 utils::leading_zeros(&self.storage, self.offset, self.length)
536 }
537 pub fn leading_ones(&self) -> usize {
539 utils::leading_ones(&self.storage, self.offset, self.length)
540 }
541 pub fn trailing_zeros(&self) -> usize {
543 utils::trailing_zeros(&self.storage, self.offset, self.length)
544 }
545 pub fn trailing_ones(&mut self) -> usize {
547 utils::trailing_ones(&self.storage, self.offset, self.length)
548 }
549
550 pub fn take_leading_zeros(&mut self) -> usize {
553 if self
554 .lazy_unset_bits()
555 .is_some_and(|unset_bits| unset_bits == self.length)
556 {
557 let leading_zeros = self.length;
558 self.offset += self.length;
559 self.length = 0;
560 *self.unset_bit_count_cache.get_mut() = 0;
561 return leading_zeros;
562 }
563
564 let leading_zeros = self.leading_zeros();
565 self.offset += leading_zeros;
566 self.length -= leading_zeros;
567 if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
568 *self.unset_bit_count_cache.get_mut() -= leading_zeros as u64;
569 }
570 leading_zeros
571 }
572 pub fn take_leading_ones(&mut self) -> usize {
575 if self
576 .lazy_unset_bits()
577 .is_some_and(|unset_bits| unset_bits == 0)
578 {
579 let leading_ones = self.length;
580 self.offset += self.length;
581 self.length = 0;
582 *self.unset_bit_count_cache.get_mut() = 0;
583 return leading_ones;
584 }
585
586 let leading_ones = self.leading_ones();
587 self.offset += leading_ones;
588 self.length -= leading_ones;
589 leading_ones
591 }
592 pub fn take_trailing_zeros(&mut self) -> usize {
595 if self
596 .lazy_unset_bits()
597 .is_some_and(|unset_bits| unset_bits == self.length)
598 {
599 let trailing_zeros = self.length;
600 self.length = 0;
601 *self.unset_bit_count_cache.get_mut() = 0;
602 return trailing_zeros;
603 }
604
605 let trailing_zeros = self.trailing_zeros();
606 self.length -= trailing_zeros;
607 if has_cached_unset_bit_count(*self.unset_bit_count_cache.get_mut()) {
608 *self.unset_bit_count_cache.get_mut() -= trailing_zeros as u64;
609 }
610 trailing_zeros
611 }
612 pub fn take_trailing_ones(&mut self) -> usize {
615 if self
616 .lazy_unset_bits()
617 .is_some_and(|unset_bits| unset_bits == 0)
618 {
619 let trailing_ones = self.length;
620 self.length = 0;
621 *self.unset_bit_count_cache.get_mut() = 0;
622 return trailing_ones;
623 }
624
625 let trailing_ones = self.trailing_ones();
626 self.length -= trailing_ones;
627 trailing_ones
629 }
630}
631
632impl<P: AsRef<[bool]>> From<P> for Bitmap {
633 fn from(slice: P) -> Self {
634 Self::from_trusted_len_iter(slice.as_ref().iter().copied())
635 }
636}
637
638impl FromIterator<bool> for Bitmap {
639 fn from_iter<I>(iter: I) -> Self
640 where
641 I: IntoIterator<Item = bool>,
642 {
643 MutableBitmap::from_iter(iter).into()
644 }
645}
646
647impl FromTrustedLenIterator<bool> for Bitmap {
648 fn from_iter_trusted_length<T: IntoIterator<Item = bool>>(iter: T) -> Self
649 where
650 T::IntoIter: TrustedLen,
651 {
652 MutableBitmap::from_trusted_len_iter(iter.into_iter()).into()
653 }
654}
655
656impl Bitmap {
657 #[inline]
662 pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(iterator: I) -> Self {
663 MutableBitmap::from_trusted_len_iter_unchecked(iterator).into()
664 }
665
666 #[inline]
668 pub fn from_trusted_len_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {
669 MutableBitmap::from_trusted_len_iter(iterator).into()
670 }
671
672 #[inline]
674 pub fn try_from_trusted_len_iter<E, I: TrustedLen<Item = std::result::Result<bool, E>>>(
675 iterator: I,
676 ) -> std::result::Result<Self, E> {
677 Ok(MutableBitmap::try_from_trusted_len_iter(iterator)?.into())
678 }
679
680 #[inline]
685 pub unsafe fn try_from_trusted_len_iter_unchecked<
686 E,
687 I: Iterator<Item = std::result::Result<bool, E>>,
688 >(
689 iterator: I,
690 ) -> std::result::Result<Self, E> {
691 Ok(MutableBitmap::try_from_trusted_len_iter_unchecked(iterator)?.into())
692 }
693}
694
695impl<'a> IntoIterator for &'a Bitmap {
696 type Item = bool;
697 type IntoIter = BitmapIter<'a>;
698
699 fn into_iter(self) -> Self::IntoIter {
700 BitmapIter::<'a>::new(&self.storage, self.offset, self.length)
701 }
702}
703
704impl IntoIterator for Bitmap {
705 type Item = bool;
706 type IntoIter = IntoIter;
707
708 fn into_iter(self) -> Self::IntoIter {
709 IntoIter::new(self)
710 }
711}
712
713impl Splitable for Bitmap {
714 #[inline(always)]
715 fn check_bound(&self, offset: usize) -> bool {
716 offset <= self.len()
717 }
718
719 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
720 if offset == 0 {
721 return (Self::new(), self.clone());
722 }
723 if offset == self.len() {
724 return (self.clone(), Self::new());
725 }
726
727 let ubcc = self.unset_bit_count_cache.load(Ordering::Relaxed);
728
729 let lhs_length = offset;
730 let rhs_length = self.length - offset;
731
732 let mut lhs_ubcc = UNKNOWN_BIT_COUNT;
733 let mut rhs_ubcc = UNKNOWN_BIT_COUNT;
734
735 if has_cached_unset_bit_count(ubcc) {
736 if ubcc == 0 {
737 lhs_ubcc = 0;
738 rhs_ubcc = 0;
739 } else if ubcc == self.length as u64 {
740 lhs_ubcc = offset as u64;
741 rhs_ubcc = (self.length - offset) as u64;
742 } else {
743 let small_portion = (self.length / 4).max(32);
747
748 if lhs_length <= rhs_length {
749 if rhs_length + small_portion >= self.length {
750 let count = count_zeros(&self.storage, self.offset, lhs_length) as u64;
751 lhs_ubcc = count;
752 rhs_ubcc = ubcc - count;
753 }
754 } else if lhs_length + small_portion >= self.length {
755 let count = count_zeros(&self.storage, self.offset + offset, rhs_length) as u64;
756 lhs_ubcc = ubcc - count;
757 rhs_ubcc = count;
758 }
759 }
760 }
761
762 debug_assert!(lhs_ubcc == UNKNOWN_BIT_COUNT || lhs_ubcc <= ubcc);
763 debug_assert!(rhs_ubcc == UNKNOWN_BIT_COUNT || rhs_ubcc <= ubcc);
764
765 (
766 Self {
767 storage: self.storage.clone(),
768 offset: self.offset,
769 length: lhs_length,
770 unset_bit_count_cache: AtomicU64::new(lhs_ubcc),
771 },
772 Self {
773 storage: self.storage.clone(),
774 offset: self.offset + offset,
775 length: rhs_length,
776 unset_bit_count_cache: AtomicU64::new(rhs_ubcc),
777 },
778 )
779 }
780}