1#![allow(unsafe_op_in_unsafe_fn)]
2use std::hint::unreachable_unchecked;
3
4use polars_error::{PolarsResult, polars_bail};
5use polars_utils::vec::PushUnchecked;
6
7use super::bitmask::BitMask;
8use super::utils::{BitChunk, BitChunks, BitChunksExactMut, BitmapIter, count_zeros, fmt};
9use super::{Bitmap, intersects_with_mut};
10use crate::bitmap::utils::{get_bit_unchecked, merge_reversed, set_bit_in_byte};
11use crate::storage::SharedStorage;
12use crate::trusted_len::TrustedLen;
13
14#[derive(Clone)]
48pub struct MutableBitmap {
49 buffer: Vec<u8>,
50 length: usize,
52}
53
54impl std::fmt::Debug for MutableBitmap {
55 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56 fmt(&self.buffer, 0, self.len(), f)
57 }
58}
59
60impl PartialEq for MutableBitmap {
61 fn eq(&self, other: &Self) -> bool {
62 self.iter().eq(other.iter())
63 }
64}
65
66impl MutableBitmap {
67 #[inline]
69 pub fn new() -> Self {
70 Self {
71 buffer: Vec::new(),
72 length: 0,
73 }
74 }
75
76 #[inline]
80 pub fn try_new(mut bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
81 if length > bytes.len().saturating_mul(8) {
82 polars_bail!(InvalidOperation:
83 "The length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
84 length,
85 bytes.len().saturating_mul(8)
86 )
87 }
88
89 let min_byte_length_needed = length.div_ceil(8);
91 bytes.drain(min_byte_length_needed..);
92 Ok(Self {
93 length,
94 buffer: bytes,
95 })
96 }
97
98 #[inline]
103 pub fn from_vec(buffer: Vec<u8>, length: usize) -> Self {
104 Self::try_new(buffer, length).unwrap()
105 }
106
107 #[inline]
109 pub fn with_capacity(capacity: usize) -> Self {
110 Self {
111 buffer: Vec::with_capacity(capacity.saturating_add(7) / 8),
112 length: 0,
113 }
114 }
115
116 #[inline]
118 pub fn push(&mut self, value: bool) {
119 if self.length.is_multiple_of(8) {
120 self.buffer.push(0);
121 }
122 let byte = unsafe { self.buffer.last_mut().unwrap_unchecked() };
123 *byte = set_bit_in_byte(*byte, self.length % 8, value);
124 self.length += 1;
125 }
126
127 #[inline]
130 pub fn pop(&mut self) -> Option<bool> {
131 if self.is_empty() {
132 return None;
133 }
134
135 self.length -= 1;
136 let value = unsafe { self.get_unchecked(self.length) };
137 if self.length.is_multiple_of(8) {
138 self.buffer.pop();
139 }
140 Some(value)
141 }
142
143 #[inline]
147 pub fn get(&self, index: usize) -> bool {
148 assert!(index < self.len());
149 unsafe { self.get_unchecked(index) }
150 }
151
152 #[inline]
157 pub unsafe fn get_unchecked(&self, index: usize) -> bool {
158 get_bit_unchecked(&self.buffer, index)
159 }
160
161 #[inline]
165 pub fn set(&mut self, index: usize, value: bool) {
166 assert!(index < self.len());
167 unsafe {
168 self.set_unchecked(index, value);
169 }
170 }
171
172 #[inline]
177 pub unsafe fn or_pos_unchecked(&mut self, index: usize, value: bool) {
178 *self.buffer.get_unchecked_mut(index / 8) |= (value as u8) << (index % 8);
179 }
180
181 #[inline]
186 pub unsafe fn and_pos_unchecked(&mut self, index: usize, value: bool) {
187 *self.buffer.get_unchecked_mut(index / 8) &=
188 (0xFE | u8::from(value)).rotate_left(index as u32);
189 }
190
191 #[inline]
196 pub unsafe fn xor_pos_unchecked(&mut self, index: usize, value: bool) {
197 *self.buffer.get_unchecked_mut(index / 8) ^= (value as u8) << (index % 8);
198 }
199
200 pub fn iter(&self) -> BitmapIter<'_> {
202 BitmapIter::new(&self.buffer, 0, self.length)
203 }
204
205 #[inline]
207 pub fn clear(&mut self) {
208 self.length = 0;
209 self.buffer.clear();
210 }
211
212 #[inline]
216 pub fn extend_constant(&mut self, additional: usize, value: bool) {
217 if additional == 0 {
218 return;
219 }
220
221 if value {
222 self.extend_set(additional)
223 } else {
224 self.extend_unset(additional)
225 }
226 }
227
228 pub fn resize(&mut self, length: usize, value: bool) {
231 if let Some(additional) = length.checked_sub(self.len()) {
232 self.extend_constant(additional, value);
233 } else {
234 self.buffer.truncate(length.saturating_add(7) / 8);
235 self.length = length;
236 }
237 }
238
239 #[inline]
241 pub fn from_len_zeroed(length: usize) -> Self {
242 Self {
243 buffer: vec![0; length.saturating_add(7) / 8],
244 length,
245 }
246 }
247
248 #[inline]
250 pub fn from_len_set(length: usize) -> Self {
251 Self {
252 buffer: vec![u8::MAX; length.saturating_add(7) / 8],
253 length,
254 }
255 }
256
257 #[inline(always)]
259 pub fn reserve(&mut self, additional: usize) {
260 self.buffer
261 .reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len())
262 }
263
264 #[inline]
266 pub fn capacity(&self) -> usize {
267 self.buffer.capacity() * 8
268 }
269
270 #[inline]
275 pub unsafe fn push_unchecked(&mut self, value: bool) {
276 if self.length.is_multiple_of(8) {
277 self.buffer.push_unchecked(0);
278 }
279 let byte = self.buffer.last_mut().unwrap_unchecked();
280 *byte = set_bit_in_byte(*byte, self.length % 8, value);
281 self.length += 1;
282 }
283
284 pub fn unset_bits(&self) -> usize {
290 count_zeros(&self.buffer, 0, self.length)
291 }
292
293 pub fn set_bits(&self) -> usize {
299 self.length - self.unset_bits()
300 }
301
302 #[inline]
304 pub fn len(&self) -> usize {
305 self.length
306 }
307
308 #[inline]
310 pub fn is_empty(&self) -> bool {
311 self.len() == 0
312 }
313
314 #[inline]
317 pub(crate) unsafe fn set_len(&mut self, len: usize) {
318 self.buffer.set_len(len.saturating_add(7) / 8);
319 self.length = len;
320 }
321
322 fn extend_set(&mut self, mut additional: usize) {
323 let offset = self.length % 8;
324 let added = if offset != 0 {
325 let last_index = self.buffer.len() - 1;
327 let last = &mut self.buffer[last_index];
328
329 let remaining = 0b11111111u8;
330 let remaining = remaining >> 8usize.saturating_sub(additional);
331 let remaining = remaining << offset;
332 *last |= remaining;
333 std::cmp::min(additional, 8 - offset)
334 } else {
335 0
336 };
337 self.length += added;
338 additional = additional.saturating_sub(added);
339 if additional > 0 {
340 debug_assert_eq!(self.length % 8, 0);
341 let existing = self.length.saturating_add(7) / 8;
342 let required = (self.length + additional).saturating_add(7) / 8;
343 self.buffer
345 .extend(std::iter::repeat_n(0b11111111u8, required - existing));
346 self.length += additional;
347 }
348 }
349
350 fn extend_unset(&mut self, mut additional: usize) {
351 let offset = self.length % 8;
352 let added = if offset != 0 {
353 let last_index = self.buffer.len() - 1;
355 let last = &mut self.buffer[last_index];
356 *last &= 0b11111111u8 >> (8 - offset); std::cmp::min(additional, 8 - offset)
358 } else {
359 0
360 };
361 self.length += added;
362 additional = additional.saturating_sub(added);
363 if additional > 0 {
364 debug_assert_eq!(self.length % 8, 0);
365 self.buffer
366 .resize((self.length + additional).saturating_add(7) / 8, 0);
367 self.length += additional;
368 }
369 }
370
371 #[inline]
376 pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
377 debug_assert!(index < self.len());
378 let byte = self.buffer.get_unchecked_mut(index / 8);
379 *byte = set_bit_in_byte(*byte, index % 8, value);
380 }
381
382 pub fn shrink_to_fit(&mut self) {
384 self.buffer.shrink_to_fit();
385 }
386
387 pub fn chunks<T: BitChunk>(&self) -> BitChunks<'_, T> {
391 BitChunks::new(&self.buffer, 0, self.length)
392 }
393
394 pub(crate) fn bitchunks_exact_mut<T: BitChunk>(&mut self) -> BitChunksExactMut<'_, T> {
396 BitChunksExactMut::new(&mut self.buffer, self.length)
397 }
398
399 pub fn intersects_with(&self, other: &Self) -> bool {
400 intersects_with_mut(self, other)
401 }
402
403 pub fn freeze(self) -> Bitmap {
404 self.into()
405 }
406}
407
408impl From<MutableBitmap> for Bitmap {
409 #[inline]
410 fn from(buffer: MutableBitmap) -> Self {
411 Bitmap::try_new(buffer.buffer, buffer.length).unwrap()
412 }
413}
414
415impl From<MutableBitmap> for Option<Bitmap> {
416 #[inline]
417 fn from(buffer: MutableBitmap) -> Self {
418 let unset_bits = buffer.unset_bits();
419 if unset_bits > 0 {
420 let bitmap = unsafe {
422 Bitmap::from_inner_unchecked(
423 SharedStorage::from_vec(buffer.buffer),
424 0,
425 buffer.length,
426 Some(unset_bits),
427 )
428 };
429 Some(bitmap)
430 } else {
431 None
432 }
433 }
434}
435
436impl<P: AsRef<[bool]>> From<P> for MutableBitmap {
437 #[inline]
438 fn from(slice: P) -> Self {
439 MutableBitmap::from_trusted_len_iter(slice.as_ref().iter().copied())
440 }
441}
442
443impl Extend<bool> for MutableBitmap {
444 fn extend<T: IntoIterator<Item = bool>>(&mut self, iter: T) {
445 let mut iterator = iter.into_iter();
446
447 let mut buffer = std::mem::take(&mut self.buffer);
448 let mut length = std::mem::take(&mut self.length);
449
450 let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
451 buffer.reserve(byte_capacity);
452
453 loop {
454 let mut exhausted = false;
455 let mut byte_accum: u8 = 0;
456 let mut mask: u8 = 1;
457
458 while mask != 0 {
460 if let Some(value) = iterator.next() {
461 length += 1;
462 byte_accum |= match value {
463 true => mask,
464 false => 0,
465 };
466 mask <<= 1;
467 } else {
468 exhausted = true;
469 break;
470 }
471 }
472
473 if exhausted && mask == 1 {
475 break;
476 }
477
478 if buffer.len() == buffer.capacity() {
480 let additional_byte_capacity = 1usize.saturating_add(
482 iterator.size_hint().0.saturating_add(7) / 8, );
484 buffer.reserve(additional_byte_capacity)
485 }
486
487 buffer.push(byte_accum);
489 if exhausted {
490 break;
491 }
492 }
493
494 self.buffer = buffer;
495 self.length = length;
496 }
497}
498
499impl FromIterator<bool> for MutableBitmap {
500 fn from_iter<I>(iter: I) -> Self
501 where
502 I: IntoIterator<Item = bool>,
503 {
504 let mut bm = Self::new();
505 bm.extend(iter);
506 bm
507 }
508}
509
510#[inline]
515unsafe fn get_chunk_unchecked(iterator: &mut impl Iterator<Item = bool>) -> u64 {
516 let mut byte = 0u64;
517 let mut mask;
518 for i in 0..8 {
519 mask = 1u64 << (8 * i);
520 for _ in 0..8 {
521 let value = match iterator.next() {
522 Some(value) => value,
523 None => unsafe { unreachable_unchecked() },
524 };
525
526 byte |= match value {
527 true => mask,
528 false => 0,
529 };
530 mask <<= 1;
531 }
532 }
533 byte
534}
535
536#[inline]
539unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator<Item = bool>) -> u8 {
540 let mut byte_accum: u8 = 0;
541 let mut mask: u8 = 1;
542 for _ in 0..len {
543 let value = match iterator.next() {
544 Some(value) => value,
545 None => unsafe { unreachable_unchecked() },
546 };
547
548 byte_accum |= match value {
549 true => mask,
550 false => 0,
551 };
552 mask <<= 1;
553 }
554 byte_accum
555}
556
557#[inline]
561unsafe fn extend_aligned_trusted_iter_unchecked(
562 buffer: &mut Vec<u8>,
563 mut iterator: impl Iterator<Item = bool>,
564) -> usize {
565 let additional_bits = iterator.size_hint().1.unwrap();
566 let chunks = additional_bits / 64;
567 let remainder = additional_bits % 64;
568
569 let additional = additional_bits.div_ceil(8);
570 assert_eq!(
571 additional,
572 chunks * 8 + remainder / 8 + !remainder.is_multiple_of(8) as usize
574 );
575 buffer.reserve(additional);
576
577 for _ in 0..chunks {
579 let chunk = get_chunk_unchecked(&mut iterator);
580 buffer.extend_from_slice(&chunk.to_le_bytes());
581 }
582
583 for _ in 0..(remainder / 8) {
585 let byte = unsafe { get_byte_unchecked(8, &mut iterator) };
586 buffer.push(byte)
587 }
588
589 let remainder = remainder % 8;
591 if remainder > 0 {
592 let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) };
593 buffer.push(byte)
594 }
595 additional_bits
596}
597
598impl MutableBitmap {
599 #[inline]
601 pub fn extend_from_trusted_len_iter<I: TrustedLen<Item = bool>>(&mut self, iterator: I) {
602 unsafe { self.extend_from_trusted_len_iter_unchecked(iterator) }
604 }
605
606 #[inline]
611 pub unsafe fn extend_from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(
612 &mut self,
613 mut iterator: I,
614 ) {
615 let mut length = iterator.size_hint().1.unwrap();
617
618 let bit_offset = self.length % 8;
619
620 if length < 8 - bit_offset {
621 if bit_offset == 0 {
622 self.buffer.push(0);
623 }
624 let byte = self.buffer.last_mut().unwrap();
626 let mut i = bit_offset;
627 for value in iterator {
628 *byte = set_bit_in_byte(*byte, i, value);
629 i += 1;
630 }
631 self.length += length;
632 return;
633 }
634
635 if bit_offset != 0 {
639 let byte = self.buffer.last_mut().unwrap();
641 (bit_offset..8).for_each(|i| {
642 *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap());
643 });
644 self.length += 8 - bit_offset;
645 length -= 8 - bit_offset;
646 }
647
648 debug_assert_eq!(self.length % 8, 0);
650
651 unsafe { extend_aligned_trusted_iter_unchecked(&mut self.buffer, iterator) };
652 self.length += length;
653 }
654
655 #[inline]
660 pub unsafe fn from_trusted_len_iter_unchecked<I>(iterator: I) -> Self
661 where
662 I: Iterator<Item = bool>,
663 {
664 let mut buffer = Vec::<u8>::new();
665
666 let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator);
667
668 Self { buffer, length }
669 }
670
671 #[inline]
673 pub fn from_trusted_len_iter<I>(iterator: I) -> Self
674 where
675 I: TrustedLen<Item = bool>,
676 {
677 unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
679 }
680
681 pub fn try_from_trusted_len_iter<E, I>(iterator: I) -> std::result::Result<Self, E>
683 where
684 I: TrustedLen<Item = std::result::Result<bool, E>>,
685 {
686 unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
687 }
688
689 pub unsafe fn try_from_trusted_len_iter_unchecked<E, I>(
694 mut iterator: I,
695 ) -> std::result::Result<Self, E>
696 where
697 I: Iterator<Item = std::result::Result<bool, E>>,
698 {
699 let length = iterator.size_hint().1.unwrap();
700
701 let mut buffer = vec![0u8; length.div_ceil(8)];
702
703 let chunks = length / 8;
704 let reminder = length % 8;
705
706 let data = buffer.as_mut_slice();
707 data[..chunks].iter_mut().try_for_each(|byte| {
708 (0..8).try_for_each(|i| {
709 *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap()?);
710 Ok(())
711 })
712 })?;
713
714 if reminder != 0 {
715 let last = &mut data[chunks];
716 iterator.enumerate().try_for_each(|(i, value)| {
717 *last = set_bit_in_byte(*last, i, value?);
718 Ok(())
719 })?;
720 }
721
722 Ok(Self { buffer, length })
723 }
724
725 fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) {
726 let aligned_offset = offset / 8;
732 let own_offset = self.length % 8;
733 debug_assert_eq!(offset % 8, 0); debug_assert!(own_offset != 0); let bytes_len = length.saturating_add(7) / 8;
737 let items = &slice[aligned_offset..aligned_offset + bytes_len];
738 let buffer = self.buffer.as_mut_slice();
740 let last = &mut buffer[buffer.len() - 1];
741
742 *last &= 0b11111111u8 >> (8 - own_offset); *last |= items[0] << own_offset;
746
747 if length + own_offset <= 8 {
748 self.length += length;
750 return;
751 }
752 let additional = length - (8 - own_offset);
753
754 let remaining = [items[items.len() - 1], 0];
755 let bytes = items
756 .windows(2)
757 .chain(std::iter::once(remaining.as_ref()))
758 .map(|w| merge_reversed(w[0], w[1], 8 - own_offset))
759 .take(additional.saturating_add(7) / 8);
760 self.buffer.extend(bytes);
761
762 self.length += length;
763 }
764
765 fn extend_aligned(&mut self, slice: &[u8], offset: usize, length: usize) {
766 let aligned_offset = offset / 8;
767 let bytes_len = length.saturating_add(7) / 8;
768 let items = &slice[aligned_offset..aligned_offset + bytes_len];
769 self.buffer.extend_from_slice(items);
770 self.length += length;
771 }
772
773 #[inline]
782 pub unsafe fn extend_from_slice_unchecked(
783 &mut self,
784 slice: &[u8],
785 offset: usize,
786 length: usize,
787 ) {
788 if length == 0 {
789 return;
790 };
791 let is_aligned = self.length.is_multiple_of(8);
792 let other_is_aligned = offset.is_multiple_of(8);
793 match (is_aligned, other_is_aligned) {
794 (true, true) => self.extend_aligned(slice, offset, length),
795 (false, true) => self.extend_unaligned(slice, offset, length),
796 _ => self.extend_from_trusted_len_iter(BitmapIter::new(slice, offset, length)),
798 }
799 debug_assert_eq!(self.length.saturating_add(7) / 8, self.buffer.len());
801 }
802
803 #[inline]
809 pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
810 assert!(offset + length <= slice.len() * 8);
811 unsafe { self.extend_from_slice_unchecked(slice, offset, length) }
813 }
814
815 #[inline]
816 pub fn extend_from_bitmask(&mut self, bitmask: BitMask<'_>) {
817 let (slice, offset, length) = bitmask.inner();
818 self.extend_from_slice(slice, offset, length)
819 }
820
821 #[inline]
823 pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
824 let (slice, offset, length) = bitmap.as_slice();
825 unsafe {
827 self.extend_from_slice_unchecked(slice, offset, length);
828 }
829 }
830
831 #[inline]
834 pub fn as_slice(&self) -> &[u8] {
835 let len = (self.length).saturating_add(7) / 8;
836 &self.buffer[..len]
837 }
838
839 #[inline]
842 pub fn as_mut_slice(&mut self) -> &mut [u8] {
843 let len = (self.length).saturating_add(7) / 8;
844 &mut self.buffer[..len]
845 }
846}
847
848impl Default for MutableBitmap {
849 fn default() -> Self {
850 Self::new()
851 }
852}
853
854impl<'a> IntoIterator for &'a MutableBitmap {
855 type Item = bool;
856 type IntoIter = BitmapIter<'a>;
857
858 fn into_iter(self) -> Self::IntoIter {
859 BitmapIter::<'a>::new(&self.buffer, 0, self.length)
860 }
861}