1#![allow(unsafe_op_in_unsafe_fn)]
2use std::hint::unreachable_unchecked;
3
4use polars_buffer::SharedStorage;
5use polars_error::{PolarsResult, polars_bail};
6use polars_utils::vec::PushUnchecked;
7
8use super::bitmask::BitMask;
9use super::utils::{BitChunk, BitChunks, BitChunksExactMut, BitmapIter, count_zeros, fmt};
10use super::{Bitmap, intersects_with_mut};
11use crate::bitmap::utils::{get_bit_unchecked, merge_reversed, set_bit_in_byte};
12use crate::trusted_len::TrustedLen;
13
14#[derive(Clone)]
48pub struct MutableBitmap {
49 buffer: Vec<u8>,
50 length: usize,
52}
53
54impl std::fmt::Debug for MutableBitmap {
55 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56 fmt(&self.buffer, 0, self.len(), f)
57 }
58}
59
60impl PartialEq for MutableBitmap {
61 fn eq(&self, other: &Self) -> bool {
62 self.iter().eq(other.iter())
63 }
64}
65
66impl MutableBitmap {
67 #[inline]
69 pub fn new() -> Self {
70 Self {
71 buffer: Vec::new(),
72 length: 0,
73 }
74 }
75
76 #[inline]
80 pub fn try_new(mut bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
81 if length > bytes.len().saturating_mul(8) {
82 polars_bail!(InvalidOperation:
83 "The length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
84 length,
85 bytes.len().saturating_mul(8)
86 )
87 }
88
89 let min_byte_length_needed = length.div_ceil(8);
91 bytes.drain(min_byte_length_needed..);
92 Ok(Self {
93 length,
94 buffer: bytes,
95 })
96 }
97
98 #[inline]
103 pub fn from_vec(buffer: Vec<u8>, length: usize) -> Self {
104 Self::try_new(buffer, length).unwrap()
105 }
106
107 #[inline]
109 pub fn with_capacity(capacity: usize) -> Self {
110 Self {
111 buffer: Vec::with_capacity(capacity.saturating_add(7) / 8),
112 length: 0,
113 }
114 }
115
116 #[inline]
118 pub fn push(&mut self, value: bool) {
119 if self.length.is_multiple_of(8) {
120 self.buffer.push(0);
121 }
122 let byte = unsafe { self.buffer.last_mut().unwrap_unchecked() };
123 *byte = set_bit_in_byte(*byte, self.length % 8, value);
124 self.length += 1;
125 }
126
127 #[inline]
130 pub fn pop(&mut self) -> Option<bool> {
131 if self.is_empty() {
132 return None;
133 }
134
135 self.length -= 1;
136 let value = unsafe { self.get_unchecked(self.length) };
137 if self.length.is_multiple_of(8) {
138 self.buffer.pop();
139 }
140 Some(value)
141 }
142
143 #[inline]
147 pub fn get(&self, index: usize) -> bool {
148 assert!(index < self.len());
149 unsafe { self.get_unchecked(index) }
150 }
151
152 #[inline]
157 pub unsafe fn get_unchecked(&self, index: usize) -> bool {
158 get_bit_unchecked(&self.buffer, index)
159 }
160
161 #[inline]
165 pub fn set(&mut self, index: usize, value: bool) {
166 assert!(index < self.len());
167 unsafe {
168 self.set_unchecked(index, value);
169 }
170 }
171
172 #[inline]
174 pub fn or_pos(&mut self, index: usize, value: bool) {
175 assert!(index < self.len());
176 unsafe {
177 self.or_pos_unchecked(index, value);
178 }
179 }
180
181 #[inline]
183 pub fn and_pos(&mut self, index: usize, value: bool) {
184 assert!(index < self.len());
185 unsafe {
186 self.and_pos_unchecked(index, value);
187 }
188 }
189
190 #[inline]
195 pub unsafe fn or_pos_unchecked(&mut self, index: usize, value: bool) {
196 *self.buffer.get_unchecked_mut(index / 8) |= (value as u8) << (index % 8);
197 }
198
199 #[inline]
204 pub unsafe fn and_pos_unchecked(&mut self, index: usize, value: bool) {
205 *self.buffer.get_unchecked_mut(index / 8) &=
206 (0xFE | u8::from(value)).rotate_left(index as u32);
207 }
208
209 #[inline]
214 pub unsafe fn xor_pos_unchecked(&mut self, index: usize, value: bool) {
215 *self.buffer.get_unchecked_mut(index / 8) ^= (value as u8) << (index % 8);
216 }
217
218 pub fn iter(&self) -> BitmapIter<'_> {
220 BitmapIter::new(&self.buffer, 0, self.length)
221 }
222
223 #[inline]
225 pub fn clear(&mut self) {
226 self.length = 0;
227 self.buffer.clear();
228 }
229
230 #[inline]
234 pub fn extend_constant(&mut self, additional: usize, value: bool) {
235 if additional == 0 {
236 return;
237 }
238
239 if value {
240 self.extend_set(additional)
241 } else {
242 self.extend_unset(additional)
243 }
244 }
245
246 pub fn resize(&mut self, length: usize, value: bool) {
249 if let Some(additional) = length.checked_sub(self.len()) {
250 self.extend_constant(additional, value);
251 } else {
252 self.buffer.truncate(length.saturating_add(7) / 8);
253 self.length = length;
254 }
255 }
256
257 #[inline]
259 pub fn from_len_zeroed(length: usize) -> Self {
260 Self {
261 buffer: vec![0; length.saturating_add(7) / 8],
262 length,
263 }
264 }
265
266 #[inline]
268 pub fn from_len_set(length: usize) -> Self {
269 Self {
270 buffer: vec![u8::MAX; length.saturating_add(7) / 8],
271 length,
272 }
273 }
274
275 #[inline(always)]
277 pub fn reserve(&mut self, additional: usize) {
278 self.buffer
279 .reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len())
280 }
281
282 #[inline]
284 pub fn capacity(&self) -> usize {
285 self.buffer.capacity() * 8
286 }
287
288 #[inline]
293 pub unsafe fn push_unchecked(&mut self, value: bool) {
294 if self.length.is_multiple_of(8) {
295 self.buffer.push_unchecked(0);
296 }
297 let byte = self.buffer.last_mut().unwrap_unchecked();
298 *byte = set_bit_in_byte(*byte, self.length % 8, value);
299 self.length += 1;
300 }
301
302 pub fn unset_bits(&self) -> usize {
308 count_zeros(&self.buffer, 0, self.length)
309 }
310
311 pub fn set_bits(&self) -> usize {
317 self.length - self.unset_bits()
318 }
319
320 #[inline]
322 pub fn len(&self) -> usize {
323 self.length
324 }
325
326 #[inline]
328 pub fn is_empty(&self) -> bool {
329 self.len() == 0
330 }
331
332 #[inline]
335 pub(crate) unsafe fn set_len(&mut self, len: usize) {
336 self.buffer.set_len(len.saturating_add(7) / 8);
337 self.length = len;
338 }
339
340 fn extend_set(&mut self, mut additional: usize) {
341 let offset = self.length % 8;
342 let added = if offset != 0 {
343 let last_index = self.buffer.len() - 1;
345 let last = &mut self.buffer[last_index];
346
347 let remaining = 0b11111111u8;
348 let remaining = remaining >> 8usize.saturating_sub(additional);
349 let remaining = remaining << offset;
350 *last |= remaining;
351 std::cmp::min(additional, 8 - offset)
352 } else {
353 0
354 };
355 self.length += added;
356 additional = additional.saturating_sub(added);
357 if additional > 0 {
358 debug_assert_eq!(self.length % 8, 0);
359 let existing = self.length.saturating_add(7) / 8;
360 let required = (self.length + additional).saturating_add(7) / 8;
361 self.buffer
363 .extend(std::iter::repeat_n(0b11111111u8, required - existing));
364 self.length += additional;
365 }
366 }
367
368 fn extend_unset(&mut self, mut additional: usize) {
369 let offset = self.length % 8;
370 let added = if offset != 0 {
371 let last_index = self.buffer.len() - 1;
373 let last = &mut self.buffer[last_index];
374 *last &= 0b11111111u8 >> (8 - offset); std::cmp::min(additional, 8 - offset)
376 } else {
377 0
378 };
379 self.length += added;
380 additional = additional.saturating_sub(added);
381 if additional > 0 {
382 debug_assert_eq!(self.length % 8, 0);
383 self.buffer
384 .resize((self.length + additional).saturating_add(7) / 8, 0);
385 self.length += additional;
386 }
387 }
388
389 #[inline]
394 pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
395 debug_assert!(index < self.len());
396 let byte = self.buffer.get_unchecked_mut(index / 8);
397 *byte = set_bit_in_byte(*byte, index % 8, value);
398 }
399
400 pub fn shrink_to_fit(&mut self) {
402 self.buffer.shrink_to_fit();
403 }
404
405 pub fn chunks<T: BitChunk>(&self) -> BitChunks<'_, T> {
409 BitChunks::new(&self.buffer, 0, self.length)
410 }
411
412 pub(crate) fn bitchunks_exact_mut<T: BitChunk>(&mut self) -> BitChunksExactMut<'_, T> {
414 BitChunksExactMut::new(&mut self.buffer, self.length)
415 }
416
417 pub fn intersects_with(&self, other: &Self) -> bool {
418 intersects_with_mut(self, other)
419 }
420
421 pub fn freeze(self) -> Bitmap {
422 self.into()
423 }
424}
425
426impl From<MutableBitmap> for Bitmap {
427 #[inline]
428 fn from(buffer: MutableBitmap) -> Self {
429 Bitmap::try_new(buffer.buffer, buffer.length).unwrap()
430 }
431}
432
433impl From<MutableBitmap> for Option<Bitmap> {
434 #[inline]
435 fn from(buffer: MutableBitmap) -> Self {
436 let unset_bits = buffer.unset_bits();
437 if unset_bits > 0 {
438 let bitmap = unsafe {
440 Bitmap::from_inner_unchecked(
441 SharedStorage::from_vec(buffer.buffer),
442 0,
443 buffer.length,
444 Some(unset_bits),
445 )
446 };
447 Some(bitmap)
448 } else {
449 None
450 }
451 }
452}
453
454impl<P: AsRef<[bool]>> From<P> for MutableBitmap {
455 #[inline]
456 fn from(slice: P) -> Self {
457 MutableBitmap::from_trusted_len_iter(slice.as_ref().iter().copied())
458 }
459}
460
461impl Extend<bool> for MutableBitmap {
462 fn extend<T: IntoIterator<Item = bool>>(&mut self, iter: T) {
463 let mut iterator = iter.into_iter();
464
465 let mut buffer = std::mem::take(&mut self.buffer);
466 let mut length = std::mem::take(&mut self.length);
467
468 let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
469 buffer.reserve(byte_capacity);
470
471 loop {
472 let mut exhausted = false;
473 let mut byte_accum: u8 = 0;
474 let mut mask: u8 = 1;
475
476 while mask != 0 {
478 if let Some(value) = iterator.next() {
479 length += 1;
480 byte_accum |= match value {
481 true => mask,
482 false => 0,
483 };
484 mask <<= 1;
485 } else {
486 exhausted = true;
487 break;
488 }
489 }
490
491 if exhausted && mask == 1 {
493 break;
494 }
495
496 if buffer.len() == buffer.capacity() {
498 let additional_byte_capacity = 1usize.saturating_add(
500 iterator.size_hint().0.saturating_add(7) / 8, );
502 buffer.reserve(additional_byte_capacity)
503 }
504
505 buffer.push(byte_accum);
507 if exhausted {
508 break;
509 }
510 }
511
512 self.buffer = buffer;
513 self.length = length;
514 }
515}
516
517impl FromIterator<bool> for MutableBitmap {
518 fn from_iter<I>(iter: I) -> Self
519 where
520 I: IntoIterator<Item = bool>,
521 {
522 let mut bm = Self::new();
523 bm.extend(iter);
524 bm
525 }
526}
527
528#[inline]
533unsafe fn get_chunk_unchecked(iterator: &mut impl Iterator<Item = bool>) -> u64 {
534 let mut byte = 0u64;
535 let mut mask;
536 for i in 0..8 {
537 mask = 1u64 << (8 * i);
538 for _ in 0..8 {
539 let value = match iterator.next() {
540 Some(value) => value,
541 None => unsafe { unreachable_unchecked() },
542 };
543
544 byte |= match value {
545 true => mask,
546 false => 0,
547 };
548 mask <<= 1;
549 }
550 }
551 byte
552}
553
554#[inline]
557unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator<Item = bool>) -> u8 {
558 let mut byte_accum: u8 = 0;
559 let mut mask: u8 = 1;
560 for _ in 0..len {
561 let value = match iterator.next() {
562 Some(value) => value,
563 None => unsafe { unreachable_unchecked() },
564 };
565
566 byte_accum |= match value {
567 true => mask,
568 false => 0,
569 };
570 mask <<= 1;
571 }
572 byte_accum
573}
574
575#[inline]
579unsafe fn extend_aligned_trusted_iter_unchecked(
580 buffer: &mut Vec<u8>,
581 mut iterator: impl Iterator<Item = bool>,
582) -> usize {
583 let additional_bits = iterator.size_hint().1.unwrap();
584 let chunks = additional_bits / 64;
585 let remainder = additional_bits % 64;
586
587 let additional = additional_bits.div_ceil(8);
588 assert_eq!(
589 additional,
590 chunks * 8 + remainder / 8 + !remainder.is_multiple_of(8) as usize
592 );
593 buffer.reserve(additional);
594
595 for _ in 0..chunks {
597 let chunk = get_chunk_unchecked(&mut iterator);
598 buffer.extend_from_slice(&chunk.to_le_bytes());
599 }
600
601 for _ in 0..(remainder / 8) {
603 let byte = unsafe { get_byte_unchecked(8, &mut iterator) };
604 buffer.push(byte)
605 }
606
607 let remainder = remainder % 8;
609 if remainder > 0 {
610 let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) };
611 buffer.push(byte)
612 }
613 additional_bits
614}
615
616impl MutableBitmap {
617 #[inline]
619 pub fn extend_from_trusted_len_iter<I: TrustedLen<Item = bool>>(&mut self, iterator: I) {
620 unsafe { self.extend_from_trusted_len_iter_unchecked(iterator) }
622 }
623
624 #[inline]
629 pub unsafe fn extend_from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(
630 &mut self,
631 mut iterator: I,
632 ) {
633 let mut length = iterator.size_hint().1.unwrap();
635
636 let bit_offset = self.length % 8;
637
638 if length < 8 - bit_offset {
639 if bit_offset == 0 {
640 self.buffer.push(0);
641 }
642 let byte = self.buffer.last_mut().unwrap();
644 for (i, value) in (bit_offset..).zip(iterator) {
645 *byte = set_bit_in_byte(*byte, i, value);
646 }
647 self.length += length;
648 return;
649 }
650
651 if bit_offset != 0 {
655 let byte = self.buffer.last_mut().unwrap();
657 (bit_offset..8).for_each(|i| {
658 *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap());
659 });
660 self.length += 8 - bit_offset;
661 length -= 8 - bit_offset;
662 }
663
664 debug_assert_eq!(self.length % 8, 0);
666
667 unsafe { extend_aligned_trusted_iter_unchecked(&mut self.buffer, iterator) };
668 self.length += length;
669 }
670
671 #[inline]
676 pub unsafe fn from_trusted_len_iter_unchecked<I>(iterator: I) -> Self
677 where
678 I: Iterator<Item = bool>,
679 {
680 let mut buffer = Vec::<u8>::new();
681
682 let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator);
683
684 Self { buffer, length }
685 }
686
687 #[inline]
689 pub fn from_trusted_len_iter<I>(iterator: I) -> Self
690 where
691 I: TrustedLen<Item = bool>,
692 {
693 unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
695 }
696
697 pub fn try_from_trusted_len_iter<E, I>(iterator: I) -> std::result::Result<Self, E>
699 where
700 I: TrustedLen<Item = std::result::Result<bool, E>>,
701 {
702 unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
703 }
704
705 pub unsafe fn try_from_trusted_len_iter_unchecked<E, I>(
710 mut iterator: I,
711 ) -> std::result::Result<Self, E>
712 where
713 I: Iterator<Item = std::result::Result<bool, E>>,
714 {
715 let length = iterator.size_hint().1.unwrap();
716
717 let mut buffer = vec![0u8; length.div_ceil(8)];
718
719 let chunks = length / 8;
720 let reminder = length % 8;
721
722 let data = buffer.as_mut_slice();
723 data[..chunks].iter_mut().try_for_each(|byte| {
724 (0..8).try_for_each(|i| {
725 *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap()?);
726 Ok(())
727 })
728 })?;
729
730 if reminder != 0 {
731 let last = &mut data[chunks];
732 iterator.enumerate().try_for_each(|(i, value)| {
733 *last = set_bit_in_byte(*last, i, value?);
734 Ok(())
735 })?;
736 }
737
738 Ok(Self { buffer, length })
739 }
740
741 fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) {
742 let aligned_offset = offset / 8;
748 let own_offset = self.length % 8;
749 debug_assert_eq!(offset % 8, 0); debug_assert!(own_offset != 0); let bytes_len = length.saturating_add(7) / 8;
753 let items = &slice[aligned_offset..aligned_offset + bytes_len];
754 let buffer = self.buffer.as_mut_slice();
756 let last = &mut buffer[buffer.len() - 1];
757
758 *last &= 0b11111111u8 >> (8 - own_offset); *last |= items[0] << own_offset;
762
763 if length + own_offset <= 8 {
764 self.length += length;
766 return;
767 }
768 let additional = length - (8 - own_offset);
769
770 let remaining = [items[items.len() - 1], 0];
771 let bytes = items
772 .windows(2)
773 .chain(std::iter::once(remaining.as_ref()))
774 .map(|w| merge_reversed(w[0], w[1], 8 - own_offset))
775 .take(additional.saturating_add(7) / 8);
776 self.buffer.extend(bytes);
777
778 self.length += length;
779 }
780
781 fn extend_aligned(&mut self, slice: &[u8], offset: usize, length: usize) {
782 let aligned_offset = offset / 8;
783 let bytes_len = length.saturating_add(7) / 8;
784 let items = &slice[aligned_offset..aligned_offset + bytes_len];
785 self.buffer.extend_from_slice(items);
786 self.length += length;
787 }
788
789 #[inline]
798 pub unsafe fn extend_from_slice_unchecked(
799 &mut self,
800 slice: &[u8],
801 offset: usize,
802 length: usize,
803 ) {
804 if length == 0 {
805 return;
806 };
807 let is_aligned = self.length.is_multiple_of(8);
808 let other_is_aligned = offset.is_multiple_of(8);
809 match (is_aligned, other_is_aligned) {
810 (true, true) => self.extend_aligned(slice, offset, length),
811 (false, true) => self.extend_unaligned(slice, offset, length),
812 _ => self.extend_from_trusted_len_iter(BitmapIter::new(slice, offset, length)),
814 }
815 debug_assert_eq!(self.length.saturating_add(7) / 8, self.buffer.len());
817 }
818
819 #[inline]
825 pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
826 assert!(offset + length <= slice.len() * 8);
827 unsafe { self.extend_from_slice_unchecked(slice, offset, length) }
829 }
830
831 #[inline]
832 pub fn extend_from_bitmask(&mut self, bitmask: BitMask<'_>) {
833 let (slice, offset, length) = bitmask.inner();
834 self.extend_from_slice(slice, offset, length)
835 }
836
837 #[inline]
839 pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
840 let (slice, offset, length) = bitmap.as_slice();
841 unsafe {
843 self.extend_from_slice_unchecked(slice, offset, length);
844 }
845 }
846
847 #[inline]
850 pub fn as_slice(&self) -> &[u8] {
851 let len = (self.length).saturating_add(7) / 8;
852 &self.buffer[..len]
853 }
854
855 #[inline]
858 pub fn as_mut_slice(&mut self) -> &mut [u8] {
859 let len = (self.length).saturating_add(7) / 8;
860 &mut self.buffer[..len]
861 }
862}
863
864impl Default for MutableBitmap {
865 fn default() -> Self {
866 Self::new()
867 }
868}
869
870impl<'a> IntoIterator for &'a MutableBitmap {
871 type Item = bool;
872 type IntoIter = BitmapIter<'a>;
873
874 fn into_iter(self) -> Self::IntoIter {
875 BitmapIter::<'a>::new(&self.buffer, 0, self.length)
876 }
877}