1#![allow(unsafe_op_in_unsafe_fn)]
2use std::hint::unreachable_unchecked;
3
4use polars_error::{PolarsResult, polars_bail};
5use polars_utils::vec::PushUnchecked;
6
7use super::bitmask::BitMask;
8use super::utils::{BitChunk, BitChunks, BitChunksExactMut, BitmapIter, count_zeros, fmt};
9use super::{Bitmap, intersects_with_mut};
10use crate::bitmap::utils::{get_bit_unchecked, merge_reversed, set_bit_in_byte};
11use crate::storage::SharedStorage;
12use crate::trusted_len::TrustedLen;
13
14#[derive(Clone)]
48pub struct MutableBitmap {
49 buffer: Vec<u8>,
50 length: usize,
52}
53
54impl std::fmt::Debug for MutableBitmap {
55 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56 fmt(&self.buffer, 0, self.len(), f)
57 }
58}
59
60impl PartialEq for MutableBitmap {
61 fn eq(&self, other: &Self) -> bool {
62 self.iter().eq(other.iter())
63 }
64}
65
66impl MutableBitmap {
67 #[inline]
69 pub fn new() -> Self {
70 Self {
71 buffer: Vec::new(),
72 length: 0,
73 }
74 }
75
76 #[inline]
80 pub fn try_new(mut bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
81 if length > bytes.len().saturating_mul(8) {
82 polars_bail!(InvalidOperation:
83 "The length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
84 length,
85 bytes.len().saturating_mul(8)
86 )
87 }
88
89 let min_byte_length_needed = length.div_ceil(8);
91 bytes.drain(min_byte_length_needed..);
92 Ok(Self {
93 length,
94 buffer: bytes,
95 })
96 }
97
98 #[inline]
103 pub fn from_vec(buffer: Vec<u8>, length: usize) -> Self {
104 Self::try_new(buffer, length).unwrap()
105 }
106
107 #[inline]
109 pub fn with_capacity(capacity: usize) -> Self {
110 Self {
111 buffer: Vec::with_capacity(capacity.saturating_add(7) / 8),
112 length: 0,
113 }
114 }
115
116 #[inline]
118 pub fn push(&mut self, value: bool) {
119 if self.length % 8 == 0 {
120 self.buffer.push(0);
121 }
122 let byte = unsafe { self.buffer.last_mut().unwrap_unchecked() };
123 *byte = set_bit_in_byte(*byte, self.length % 8, value);
124 self.length += 1;
125 }
126
127 #[inline]
130 pub fn pop(&mut self) -> Option<bool> {
131 if self.is_empty() {
132 return None;
133 }
134
135 self.length -= 1;
136 let value = unsafe { self.get_unchecked(self.length) };
137 if self.length % 8 == 0 {
138 self.buffer.pop();
139 }
140 Some(value)
141 }
142
143 #[inline]
147 pub fn get(&self, index: usize) -> bool {
148 assert!(index < self.len());
149 unsafe { self.get_unchecked(index) }
150 }
151
152 #[inline]
157 pub unsafe fn get_unchecked(&self, index: usize) -> bool {
158 get_bit_unchecked(&self.buffer, index)
159 }
160
161 #[inline]
165 pub fn set(&mut self, index: usize, value: bool) {
166 assert!(index < self.len());
167 unsafe {
168 self.set_unchecked(index, value);
169 }
170 }
171
172 #[inline]
177 pub unsafe fn or_pos_unchecked(&mut self, index: usize, value: bool) {
178 *self.buffer.get_unchecked_mut(index / 8) |= (value as u8) << (index % 8);
179 }
180
181 #[inline]
186 pub unsafe fn and_pos_unchecked(&mut self, index: usize, value: bool) {
187 *self.buffer.get_unchecked_mut(index / 8) &= (value as u8) << (index % 8);
188 }
189
190 pub fn iter(&self) -> BitmapIter {
192 BitmapIter::new(&self.buffer, 0, self.length)
193 }
194
195 #[inline]
197 pub fn clear(&mut self) {
198 self.length = 0;
199 self.buffer.clear();
200 }
201
202 #[inline]
206 pub fn extend_constant(&mut self, additional: usize, value: bool) {
207 if additional == 0 {
208 return;
209 }
210
211 if value {
212 self.extend_set(additional)
213 } else {
214 self.extend_unset(additional)
215 }
216 }
217
218 pub fn resize(&mut self, length: usize, value: bool) {
221 if let Some(additional) = length.checked_sub(self.len()) {
222 self.extend_constant(additional, value);
223 } else {
224 self.buffer.truncate(length.saturating_add(7) / 8);
225 self.length = length;
226 }
227 }
228
229 #[inline]
231 pub fn from_len_zeroed(length: usize) -> Self {
232 Self {
233 buffer: vec![0; length.saturating_add(7) / 8],
234 length,
235 }
236 }
237
238 #[inline]
240 pub fn from_len_set(length: usize) -> Self {
241 Self {
242 buffer: vec![u8::MAX; length.saturating_add(7) / 8],
243 length,
244 }
245 }
246
247 #[inline(always)]
249 pub fn reserve(&mut self, additional: usize) {
250 self.buffer
251 .reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len())
252 }
253
254 #[inline]
256 pub fn capacity(&self) -> usize {
257 self.buffer.capacity() * 8
258 }
259
260 #[inline]
265 pub unsafe fn push_unchecked(&mut self, value: bool) {
266 if self.length % 8 == 0 {
267 self.buffer.push_unchecked(0);
268 }
269 let byte = self.buffer.last_mut().unwrap_unchecked();
270 *byte = set_bit_in_byte(*byte, self.length % 8, value);
271 self.length += 1;
272 }
273
274 pub fn unset_bits(&self) -> usize {
280 count_zeros(&self.buffer, 0, self.length)
281 }
282
283 pub fn set_bits(&self) -> usize {
289 self.length - self.unset_bits()
290 }
291
292 #[inline]
294 pub fn len(&self) -> usize {
295 self.length
296 }
297
298 #[inline]
300 pub fn is_empty(&self) -> bool {
301 self.len() == 0
302 }
303
304 #[inline]
307 pub(crate) unsafe fn set_len(&mut self, len: usize) {
308 self.buffer.set_len(len.saturating_add(7) / 8);
309 self.length = len;
310 }
311
312 fn extend_set(&mut self, mut additional: usize) {
313 let offset = self.length % 8;
314 let added = if offset != 0 {
315 let last_index = self.buffer.len() - 1;
317 let last = &mut self.buffer[last_index];
318
319 let remaining = 0b11111111u8;
320 let remaining = remaining >> 8usize.saturating_sub(additional);
321 let remaining = remaining << offset;
322 *last |= remaining;
323 std::cmp::min(additional, 8 - offset)
324 } else {
325 0
326 };
327 self.length += added;
328 additional = additional.saturating_sub(added);
329 if additional > 0 {
330 debug_assert_eq!(self.length % 8, 0);
331 let existing = self.length.saturating_add(7) / 8;
332 let required = (self.length + additional).saturating_add(7) / 8;
333 self.buffer
335 .extend(std::iter::repeat_n(0b11111111u8, required - existing));
336 self.length += additional;
337 }
338 }
339
340 fn extend_unset(&mut self, mut additional: usize) {
341 let offset = self.length % 8;
342 let added = if offset != 0 {
343 let last_index = self.buffer.len() - 1;
345 let last = &mut self.buffer[last_index];
346 *last &= 0b11111111u8 >> (8 - offset); std::cmp::min(additional, 8 - offset)
348 } else {
349 0
350 };
351 self.length += added;
352 additional = additional.saturating_sub(added);
353 if additional > 0 {
354 debug_assert_eq!(self.length % 8, 0);
355 self.buffer
356 .resize((self.length + additional).saturating_add(7) / 8, 0);
357 self.length += additional;
358 }
359 }
360
361 #[inline]
366 pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
367 debug_assert!(index < self.len());
368 let byte = self.buffer.get_unchecked_mut(index / 8);
369 *byte = set_bit_in_byte(*byte, index % 8, value);
370 }
371
372 pub fn shrink_to_fit(&mut self) {
374 self.buffer.shrink_to_fit();
375 }
376
377 pub fn chunks<T: BitChunk>(&self) -> BitChunks<T> {
381 BitChunks::new(&self.buffer, 0, self.length)
382 }
383
384 pub(crate) fn bitchunks_exact_mut<T: BitChunk>(&mut self) -> BitChunksExactMut<T> {
386 BitChunksExactMut::new(&mut self.buffer, self.length)
387 }
388
389 pub fn intersects_with(&self, other: &Self) -> bool {
390 intersects_with_mut(self, other)
391 }
392
393 pub fn freeze(self) -> Bitmap {
394 self.into()
395 }
396}
397
398impl From<MutableBitmap> for Bitmap {
399 #[inline]
400 fn from(buffer: MutableBitmap) -> Self {
401 Bitmap::try_new(buffer.buffer, buffer.length).unwrap()
402 }
403}
404
405impl From<MutableBitmap> for Option<Bitmap> {
406 #[inline]
407 fn from(buffer: MutableBitmap) -> Self {
408 let unset_bits = buffer.unset_bits();
409 if unset_bits > 0 {
410 let bitmap = unsafe {
412 Bitmap::from_inner_unchecked(
413 SharedStorage::from_vec(buffer.buffer),
414 0,
415 buffer.length,
416 Some(unset_bits),
417 )
418 };
419 Some(bitmap)
420 } else {
421 None
422 }
423 }
424}
425
426impl<P: AsRef<[bool]>> From<P> for MutableBitmap {
427 #[inline]
428 fn from(slice: P) -> Self {
429 MutableBitmap::from_trusted_len_iter(slice.as_ref().iter().copied())
430 }
431}
432
433impl Extend<bool> for MutableBitmap {
434 fn extend<T: IntoIterator<Item = bool>>(&mut self, iter: T) {
435 let mut iterator = iter.into_iter();
436
437 let mut buffer = std::mem::take(&mut self.buffer);
438 let mut length = std::mem::take(&mut self.length);
439
440 let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
441 buffer.reserve(byte_capacity);
442
443 loop {
444 let mut exhausted = false;
445 let mut byte_accum: u8 = 0;
446 let mut mask: u8 = 1;
447
448 while mask != 0 {
450 if let Some(value) = iterator.next() {
451 length += 1;
452 byte_accum |= match value {
453 true => mask,
454 false => 0,
455 };
456 mask <<= 1;
457 } else {
458 exhausted = true;
459 break;
460 }
461 }
462
463 if exhausted && mask == 1 {
465 break;
466 }
467
468 if buffer.len() == buffer.capacity() {
470 let additional_byte_capacity = 1usize.saturating_add(
472 iterator.size_hint().0.saturating_add(7) / 8, );
474 buffer.reserve(additional_byte_capacity)
475 }
476
477 buffer.push(byte_accum);
479 if exhausted {
480 break;
481 }
482 }
483
484 self.buffer = buffer;
485 self.length = length;
486 }
487}
488
489impl FromIterator<bool> for MutableBitmap {
490 fn from_iter<I>(iter: I) -> Self
491 where
492 I: IntoIterator<Item = bool>,
493 {
494 let mut bm = Self::new();
495 bm.extend(iter);
496 bm
497 }
498}
499
500#[inline]
505unsafe fn get_chunk_unchecked(iterator: &mut impl Iterator<Item = bool>) -> u64 {
506 let mut byte = 0u64;
507 let mut mask;
508 for i in 0..8 {
509 mask = 1u64 << (8 * i);
510 for _ in 0..8 {
511 let value = match iterator.next() {
512 Some(value) => value,
513 None => unsafe { unreachable_unchecked() },
514 };
515
516 byte |= match value {
517 true => mask,
518 false => 0,
519 };
520 mask <<= 1;
521 }
522 }
523 byte
524}
525
526#[inline]
529unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator<Item = bool>) -> u8 {
530 let mut byte_accum: u8 = 0;
531 let mut mask: u8 = 1;
532 for _ in 0..len {
533 let value = match iterator.next() {
534 Some(value) => value,
535 None => unsafe { unreachable_unchecked() },
536 };
537
538 byte_accum |= match value {
539 true => mask,
540 false => 0,
541 };
542 mask <<= 1;
543 }
544 byte_accum
545}
546
547#[inline]
551unsafe fn extend_aligned_trusted_iter_unchecked(
552 buffer: &mut Vec<u8>,
553 mut iterator: impl Iterator<Item = bool>,
554) -> usize {
555 let additional_bits = iterator.size_hint().1.unwrap();
556 let chunks = additional_bits / 64;
557 let remainder = additional_bits % 64;
558
559 let additional = additional_bits.div_ceil(8);
560 assert_eq!(
561 additional,
562 chunks * 8 + remainder / 8 + (remainder % 8 > 0) as usize
564 );
565 buffer.reserve(additional);
566
567 for _ in 0..chunks {
569 let chunk = get_chunk_unchecked(&mut iterator);
570 buffer.extend_from_slice(&chunk.to_le_bytes());
571 }
572
573 for _ in 0..(remainder / 8) {
575 let byte = unsafe { get_byte_unchecked(8, &mut iterator) };
576 buffer.push(byte)
577 }
578
579 let remainder = remainder % 8;
581 if remainder > 0 {
582 let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) };
583 buffer.push(byte)
584 }
585 additional_bits
586}
587
588impl MutableBitmap {
589 #[inline]
591 pub fn extend_from_trusted_len_iter<I: TrustedLen<Item = bool>>(&mut self, iterator: I) {
592 unsafe { self.extend_from_trusted_len_iter_unchecked(iterator) }
594 }
595
596 #[inline]
601 pub unsafe fn extend_from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(
602 &mut self,
603 mut iterator: I,
604 ) {
605 let mut length = iterator.size_hint().1.unwrap();
607
608 let bit_offset = self.length % 8;
609
610 if length < 8 - bit_offset {
611 if bit_offset == 0 {
612 self.buffer.push(0);
613 }
614 let byte = self.buffer.last_mut().unwrap();
616 let mut i = bit_offset;
617 for value in iterator {
618 *byte = set_bit_in_byte(*byte, i, value);
619 i += 1;
620 }
621 self.length += length;
622 return;
623 }
624
625 if bit_offset != 0 {
629 let byte = self.buffer.last_mut().unwrap();
631 (bit_offset..8).for_each(|i| {
632 *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap());
633 });
634 self.length += 8 - bit_offset;
635 length -= 8 - bit_offset;
636 }
637
638 debug_assert_eq!(self.length % 8, 0);
640
641 unsafe { extend_aligned_trusted_iter_unchecked(&mut self.buffer, iterator) };
642 self.length += length;
643 }
644
645 #[inline]
650 pub unsafe fn from_trusted_len_iter_unchecked<I>(iterator: I) -> Self
651 where
652 I: Iterator<Item = bool>,
653 {
654 let mut buffer = Vec::<u8>::new();
655
656 let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator);
657
658 Self { buffer, length }
659 }
660
661 #[inline]
663 pub fn from_trusted_len_iter<I>(iterator: I) -> Self
664 where
665 I: TrustedLen<Item = bool>,
666 {
667 unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
669 }
670
671 pub fn try_from_trusted_len_iter<E, I>(iterator: I) -> std::result::Result<Self, E>
673 where
674 I: TrustedLen<Item = std::result::Result<bool, E>>,
675 {
676 unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
677 }
678
679 pub unsafe fn try_from_trusted_len_iter_unchecked<E, I>(
684 mut iterator: I,
685 ) -> std::result::Result<Self, E>
686 where
687 I: Iterator<Item = std::result::Result<bool, E>>,
688 {
689 let length = iterator.size_hint().1.unwrap();
690
691 let mut buffer = vec![0u8; length.div_ceil(8)];
692
693 let chunks = length / 8;
694 let reminder = length % 8;
695
696 let data = buffer.as_mut_slice();
697 data[..chunks].iter_mut().try_for_each(|byte| {
698 (0..8).try_for_each(|i| {
699 *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap()?);
700 Ok(())
701 })
702 })?;
703
704 if reminder != 0 {
705 let last = &mut data[chunks];
706 iterator.enumerate().try_for_each(|(i, value)| {
707 *last = set_bit_in_byte(*last, i, value?);
708 Ok(())
709 })?;
710 }
711
712 Ok(Self { buffer, length })
713 }
714
715 fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) {
716 let aligned_offset = offset / 8;
722 let own_offset = self.length % 8;
723 debug_assert_eq!(offset % 8, 0); debug_assert!(own_offset != 0); let bytes_len = length.saturating_add(7) / 8;
727 let items = &slice[aligned_offset..aligned_offset + bytes_len];
728 let buffer = self.buffer.as_mut_slice();
730 let last = &mut buffer[buffer.len() - 1];
731
732 *last &= 0b11111111u8 >> (8 - own_offset); *last |= items[0] << own_offset;
736
737 if length + own_offset <= 8 {
738 self.length += length;
740 return;
741 }
742 let additional = length - (8 - own_offset);
743
744 let remaining = [items[items.len() - 1], 0];
745 let bytes = items
746 .windows(2)
747 .chain(std::iter::once(remaining.as_ref()))
748 .map(|w| merge_reversed(w[0], w[1], 8 - own_offset))
749 .take(additional.saturating_add(7) / 8);
750 self.buffer.extend(bytes);
751
752 self.length += length;
753 }
754
755 fn extend_aligned(&mut self, slice: &[u8], offset: usize, length: usize) {
756 let aligned_offset = offset / 8;
757 let bytes_len = length.saturating_add(7) / 8;
758 let items = &slice[aligned_offset..aligned_offset + bytes_len];
759 self.buffer.extend_from_slice(items);
760 self.length += length;
761 }
762
763 #[inline]
772 pub unsafe fn extend_from_slice_unchecked(
773 &mut self,
774 slice: &[u8],
775 offset: usize,
776 length: usize,
777 ) {
778 if length == 0 {
779 return;
780 };
781 let is_aligned = self.length % 8 == 0;
782 let other_is_aligned = offset % 8 == 0;
783 match (is_aligned, other_is_aligned) {
784 (true, true) => self.extend_aligned(slice, offset, length),
785 (false, true) => self.extend_unaligned(slice, offset, length),
786 _ => self.extend_from_trusted_len_iter(BitmapIter::new(slice, offset, length)),
788 }
789 debug_assert_eq!(self.length.saturating_add(7) / 8, self.buffer.len());
791 }
792
793 #[inline]
799 pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
800 assert!(offset + length <= slice.len() * 8);
801 unsafe { self.extend_from_slice_unchecked(slice, offset, length) }
803 }
804
805 #[inline]
806 pub fn extend_from_bitmask(&mut self, bitmask: BitMask<'_>) {
807 let (slice, offset, length) = bitmask.inner();
808 self.extend_from_slice(slice, offset, length)
809 }
810
811 #[inline]
813 pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
814 let (slice, offset, length) = bitmap.as_slice();
815 unsafe {
817 self.extend_from_slice_unchecked(slice, offset, length);
818 }
819 }
820
821 #[inline]
824 pub fn as_slice(&self) -> &[u8] {
825 let len = (self.length).saturating_add(7) / 8;
826 &self.buffer[..len]
827 }
828
829 #[inline]
832 pub fn as_mut_slice(&mut self) -> &mut [u8] {
833 let len = (self.length).saturating_add(7) / 8;
834 &mut self.buffer[..len]
835 }
836}
837
838impl Default for MutableBitmap {
839 fn default() -> Self {
840 Self::new()
841 }
842}
843
844impl<'a> IntoIterator for &'a MutableBitmap {
845 type Item = bool;
846 type IntoIter = BitmapIter<'a>;
847
848 fn into_iter(self) -> Self::IntoIter {
849 BitmapIter::<'a>::new(&self.buffer, 0, self.length)
850 }
851}