Skip to main content

polars_arrow/bitmap/
mutable.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::hint::unreachable_unchecked;
3
4use polars_buffer::SharedStorage;
5use polars_error::{PolarsResult, polars_bail};
6use polars_utils::vec::PushUnchecked;
7
8use super::bitmask::BitMask;
9use super::utils::{BitChunk, BitChunks, BitChunksExactMut, BitmapIter, count_zeros, fmt};
10use super::{Bitmap, intersects_with_mut};
11use crate::bitmap::utils::{get_bit_unchecked, merge_reversed, set_bit_in_byte};
12use crate::trusted_len::TrustedLen;
13
14/// A container of booleans. [`MutableBitmap`] is semantically equivalent
15/// to [`Vec<bool>`].
16///
17/// The two main differences against [`Vec<bool>`] is that each element stored as a single bit,
18/// thereby:
19/// * it uses 8x less memory
20/// * it cannot be represented as `&[bool]` (i.e. no pointer arithmetics).
21///
22/// A [`MutableBitmap`] can be converted to a [`Bitmap`] at `O(1)`.
23/// # Examples
24/// ```
25/// use polars_arrow::bitmap::MutableBitmap;
26///
27/// let bitmap = MutableBitmap::from([true, false, true]);
28/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true]);
29///
30/// // creation directly from bytes
31/// let mut bitmap = MutableBitmap::try_new(vec![0b00001101], 5).unwrap();
32/// // note: the first bit is the left-most of the first byte
33/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true, true, false]);
34/// // we can also get the slice:
35/// assert_eq!(bitmap.as_slice(), [0b00001101u8].as_ref());
36/// // debug helps :)
37/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01101] }");
38///
39/// // It supports mutation in place
40/// bitmap.set(0, false);
41/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01100] }");
42/// // and `O(1)` random access
43/// assert_eq!(bitmap.get(0), false);
44/// ```
45/// # Implementation
46/// This container is internally a [`Vec<u8>`].
47#[derive(Clone)]
48pub struct MutableBitmap {
49    buffer: Vec<u8>,
50    // invariant: length.saturating_add(7) / 8 == buffer.len();
51    length: usize,
52}
53
54impl std::fmt::Debug for MutableBitmap {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        fmt(&self.buffer, 0, self.len(), f)
57    }
58}
59
60impl PartialEq for MutableBitmap {
61    fn eq(&self, other: &Self) -> bool {
62        self.iter().eq(other.iter())
63    }
64}
65
66impl MutableBitmap {
67    /// Initializes an empty [`MutableBitmap`].
68    #[inline]
69    pub fn new() -> Self {
70        Self {
71            buffer: Vec::new(),
72            length: 0,
73        }
74    }
75
76    /// Initializes a new [`MutableBitmap`] from a [`Vec<u8>`] and a length.
77    /// # Errors
78    /// This function errors iff `length > bytes.len() * 8`
79    #[inline]
80    pub fn try_new(mut bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
81        if length > bytes.len().saturating_mul(8) {
82            polars_bail!(InvalidOperation:
83                "The length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
84                length,
85                bytes.len().saturating_mul(8)
86            )
87        }
88
89        // Ensure invariant holds.
90        let min_byte_length_needed = length.div_ceil(8);
91        bytes.drain(min_byte_length_needed..);
92        Ok(Self {
93            length,
94            buffer: bytes,
95        })
96    }
97
98    /// Initializes a [`MutableBitmap`] from a [`Vec<u8>`] and a length.
99    /// This function is `O(1)`.
100    /// # Panic
101    /// Panics iff the length is larger than the length of the buffer times 8.
102    #[inline]
103    pub fn from_vec(buffer: Vec<u8>, length: usize) -> Self {
104        Self::try_new(buffer, length).unwrap()
105    }
106
107    /// Initializes a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits.
108    #[inline]
109    pub fn with_capacity(capacity: usize) -> Self {
110        Self {
111            buffer: Vec::with_capacity(capacity.saturating_add(7) / 8),
112            length: 0,
113        }
114    }
115
116    /// Pushes a new bit to the [`MutableBitmap`], re-sizing it if necessary.
117    #[inline]
118    pub fn push(&mut self, value: bool) {
119        if self.length.is_multiple_of(8) {
120            self.buffer.push(0);
121        }
122        let byte = unsafe { self.buffer.last_mut().unwrap_unchecked() };
123        *byte = set_bit_in_byte(*byte, self.length % 8, value);
124        self.length += 1;
125    }
126
127    /// Pop the last bit from the [`MutableBitmap`].
128    /// Note if the [`MutableBitmap`] is empty, this method will return None.
129    #[inline]
130    pub fn pop(&mut self) -> Option<bool> {
131        if self.is_empty() {
132            return None;
133        }
134
135        self.length -= 1;
136        let value = unsafe { self.get_unchecked(self.length) };
137        if self.length.is_multiple_of(8) {
138            self.buffer.pop();
139        }
140        Some(value)
141    }
142
143    /// Returns whether the position `index` is set.
144    /// # Panics
145    /// Panics iff `index >= self.len()`.
146    #[inline]
147    pub fn get(&self, index: usize) -> bool {
148        assert!(index < self.len());
149        unsafe { self.get_unchecked(index) }
150    }
151
152    /// Returns whether the position `index` is set.
153    ///
154    /// # Safety
155    /// The caller must ensure `index < self.len()`.
156    #[inline]
157    pub unsafe fn get_unchecked(&self, index: usize) -> bool {
158        get_bit_unchecked(&self.buffer, index)
159    }
160
161    /// Sets the position `index` to `value`
162    /// # Panics
163    /// Panics iff `index >= self.len()`.
164    #[inline]
165    pub fn set(&mut self, index: usize, value: bool) {
166        assert!(index < self.len());
167        unsafe {
168            self.set_unchecked(index, value);
169        }
170    }
171
172    /// Sets the position `index` to the OR of its original value and `value`.
173    #[inline]
174    pub fn or_pos(&mut self, index: usize, value: bool) {
175        assert!(index < self.len());
176        unsafe {
177            self.or_pos_unchecked(index, value);
178        }
179    }
180
181    /// Sets the position `index` to the AND of its original value and `value`.
182    #[inline]
183    pub fn and_pos(&mut self, index: usize, value: bool) {
184        assert!(index < self.len());
185        unsafe {
186            self.and_pos_unchecked(index, value);
187        }
188    }
189
190    /// Sets the position `index` to the OR of its original value and `value`.
191    ///
192    /// # Safety
193    /// It's undefined behavior if index >= self.len().
194    #[inline]
195    pub unsafe fn or_pos_unchecked(&mut self, index: usize, value: bool) {
196        *self.buffer.get_unchecked_mut(index / 8) |= (value as u8) << (index % 8);
197    }
198
199    /// Sets the position `index` to the AND of its original value and `value`.
200    ///
201    /// # Safety
202    /// It's undefined behavior if index >= self.len().
203    #[inline]
204    pub unsafe fn and_pos_unchecked(&mut self, index: usize, value: bool) {
205        *self.buffer.get_unchecked_mut(index / 8) &=
206            (0xFE | u8::from(value)).rotate_left(index as u32);
207    }
208
209    /// Sets the position `index` to the XOR of its original value and `value`.
210    ///
211    /// # Safety
212    /// It's undefined behavior if index >= self.len().
213    #[inline]
214    pub unsafe fn xor_pos_unchecked(&mut self, index: usize, value: bool) {
215        *self.buffer.get_unchecked_mut(index / 8) ^= (value as u8) << (index % 8);
216    }
217
218    /// constructs a new iterator over the bits of [`MutableBitmap`].
219    pub fn iter(&self) -> BitmapIter<'_> {
220        BitmapIter::new(&self.buffer, 0, self.length)
221    }
222
223    /// Empties the [`MutableBitmap`].
224    #[inline]
225    pub fn clear(&mut self) {
226        self.length = 0;
227        self.buffer.clear();
228    }
229
230    /// Extends [`MutableBitmap`] by `additional` values of constant `value`.
231    /// # Implementation
232    /// This function is an order of magnitude faster than pushing element by element.
233    #[inline]
234    pub fn extend_constant(&mut self, additional: usize, value: bool) {
235        if additional == 0 {
236            return;
237        }
238
239        if value {
240            self.extend_set(additional)
241        } else {
242            self.extend_unset(additional)
243        }
244    }
245
246    /// Resizes the [`MutableBitmap`] to the specified length, inserting value
247    /// if the length is bigger than the current length.
248    pub fn resize(&mut self, length: usize, value: bool) {
249        if let Some(additional) = length.checked_sub(self.len()) {
250            self.extend_constant(additional, value);
251        } else {
252            self.buffer.truncate(length.saturating_add(7) / 8);
253            self.length = length;
254        }
255    }
256
257    /// Initializes a zeroed [`MutableBitmap`].
258    #[inline]
259    pub fn from_len_zeroed(length: usize) -> Self {
260        Self {
261            buffer: vec![0; length.saturating_add(7) / 8],
262            length,
263        }
264    }
265
266    /// Initializes a [`MutableBitmap`] with all values set to valid/ true.
267    #[inline]
268    pub fn from_len_set(length: usize) -> Self {
269        Self {
270            buffer: vec![u8::MAX; length.saturating_add(7) / 8],
271            length,
272        }
273    }
274
275    /// Reserves `additional` bits in the [`MutableBitmap`], potentially re-allocating its buffer.
276    #[inline(always)]
277    pub fn reserve(&mut self, additional: usize) {
278        self.buffer
279            .reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len())
280    }
281
282    /// Returns the capacity of [`MutableBitmap`] in number of bits.
283    #[inline]
284    pub fn capacity(&self) -> usize {
285        self.buffer.capacity() * 8
286    }
287
288    /// Pushes a new bit to the [`MutableBitmap`]
289    ///
290    /// # Safety
291    /// The caller must ensure that the [`MutableBitmap`] has sufficient capacity.
292    #[inline]
293    pub unsafe fn push_unchecked(&mut self, value: bool) {
294        if self.length.is_multiple_of(8) {
295            self.buffer.push_unchecked(0);
296        }
297        let byte = self.buffer.last_mut().unwrap_unchecked();
298        *byte = set_bit_in_byte(*byte, self.length % 8, value);
299        self.length += 1;
300    }
301
302    /// Returns the number of unset bits on this [`MutableBitmap`].
303    ///
304    /// Guaranteed to be `<= self.len()`.
305    /// # Implementation
306    /// This function is `O(N)`
307    pub fn unset_bits(&self) -> usize {
308        count_zeros(&self.buffer, 0, self.length)
309    }
310
311    /// Returns the number of set bits on this [`MutableBitmap`].
312    ///
313    /// Guaranteed to be `<= self.len()`.
314    /// # Implementation
315    /// This function is `O(N)`
316    pub fn set_bits(&self) -> usize {
317        self.length - self.unset_bits()
318    }
319
320    /// Returns the length of the [`MutableBitmap`].
321    #[inline]
322    pub fn len(&self) -> usize {
323        self.length
324    }
325
326    /// Returns whether [`MutableBitmap`] is empty.
327    #[inline]
328    pub fn is_empty(&self) -> bool {
329        self.len() == 0
330    }
331
332    /// # Safety
333    /// The caller must ensure that the [`MutableBitmap`] was properly initialized up to `len`.
334    #[inline]
335    pub(crate) unsafe fn set_len(&mut self, len: usize) {
336        self.buffer.set_len(len.saturating_add(7) / 8);
337        self.length = len;
338    }
339
340    fn extend_set(&mut self, mut additional: usize) {
341        let offset = self.length % 8;
342        let added = if offset != 0 {
343            // offset != 0 => at least one byte in the buffer
344            let last_index = self.buffer.len() - 1;
345            let last = &mut self.buffer[last_index];
346
347            let remaining = 0b11111111u8;
348            let remaining = remaining >> 8usize.saturating_sub(additional);
349            let remaining = remaining << offset;
350            *last |= remaining;
351            std::cmp::min(additional, 8 - offset)
352        } else {
353            0
354        };
355        self.length += added;
356        additional = additional.saturating_sub(added);
357        if additional > 0 {
358            debug_assert_eq!(self.length % 8, 0);
359            let existing = self.length.saturating_add(7) / 8;
360            let required = (self.length + additional).saturating_add(7) / 8;
361            // add remaining as full bytes
362            self.buffer
363                .extend(std::iter::repeat_n(0b11111111u8, required - existing));
364            self.length += additional;
365        }
366    }
367
368    fn extend_unset(&mut self, mut additional: usize) {
369        let offset = self.length % 8;
370        let added = if offset != 0 {
371            // offset != 0 => at least one byte in the buffer
372            let last_index = self.buffer.len() - 1;
373            let last = &mut self.buffer[last_index];
374            *last &= 0b11111111u8 >> (8 - offset); // unset them
375            std::cmp::min(additional, 8 - offset)
376        } else {
377            0
378        };
379        self.length += added;
380        additional = additional.saturating_sub(added);
381        if additional > 0 {
382            debug_assert_eq!(self.length % 8, 0);
383            self.buffer
384                .resize((self.length + additional).saturating_add(7) / 8, 0);
385            self.length += additional;
386        }
387    }
388
389    /// Sets the position `index` to `value`
390    ///
391    /// # Safety
392    /// Caller must ensure that `index < self.len()`
393    #[inline]
394    pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
395        debug_assert!(index < self.len());
396        let byte = self.buffer.get_unchecked_mut(index / 8);
397        *byte = set_bit_in_byte(*byte, index % 8, value);
398    }
399
400    /// Shrinks the capacity of the [`MutableBitmap`] to fit its current length.
401    pub fn shrink_to_fit(&mut self) {
402        self.buffer.shrink_to_fit();
403    }
404
405    /// Returns an iterator over bits in bit chunks [`BitChunk`].
406    ///
407    /// This iterator is useful to operate over multiple bits via e.g. bitwise.
408    pub fn chunks<T: BitChunk>(&self) -> BitChunks<'_, T> {
409        BitChunks::new(&self.buffer, 0, self.length)
410    }
411
412    /// Returns an iterator over mutable slices, [`BitChunksExactMut`]
413    pub(crate) fn bitchunks_exact_mut<T: BitChunk>(&mut self) -> BitChunksExactMut<'_, T> {
414        BitChunksExactMut::new(&mut self.buffer, self.length)
415    }
416
417    pub fn intersects_with(&self, other: &Self) -> bool {
418        intersects_with_mut(self, other)
419    }
420
421    pub fn freeze(self) -> Bitmap {
422        self.into()
423    }
424}
425
426impl From<MutableBitmap> for Bitmap {
427    #[inline]
428    fn from(buffer: MutableBitmap) -> Self {
429        Bitmap::try_new(buffer.buffer, buffer.length).unwrap()
430    }
431}
432
433impl From<MutableBitmap> for Option<Bitmap> {
434    #[inline]
435    fn from(buffer: MutableBitmap) -> Self {
436        let unset_bits = buffer.unset_bits();
437        if unset_bits > 0 {
438            // SAFETY: invariants of the `MutableBitmap` equal that of `Bitmap`.
439            let bitmap = unsafe {
440                Bitmap::from_inner_unchecked(
441                    SharedStorage::from_vec(buffer.buffer),
442                    0,
443                    buffer.length,
444                    Some(unset_bits),
445                )
446            };
447            Some(bitmap)
448        } else {
449            None
450        }
451    }
452}
453
454impl<P: AsRef<[bool]>> From<P> for MutableBitmap {
455    #[inline]
456    fn from(slice: P) -> Self {
457        MutableBitmap::from_trusted_len_iter(slice.as_ref().iter().copied())
458    }
459}
460
461impl Extend<bool> for MutableBitmap {
462    fn extend<T: IntoIterator<Item = bool>>(&mut self, iter: T) {
463        let mut iterator = iter.into_iter();
464
465        let mut buffer = std::mem::take(&mut self.buffer);
466        let mut length = std::mem::take(&mut self.length);
467
468        let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
469        buffer.reserve(byte_capacity);
470
471        loop {
472            let mut exhausted = false;
473            let mut byte_accum: u8 = 0;
474            let mut mask: u8 = 1;
475
476            //collect (up to) 8 bits into a byte
477            while mask != 0 {
478                if let Some(value) = iterator.next() {
479                    length += 1;
480                    byte_accum |= match value {
481                        true => mask,
482                        false => 0,
483                    };
484                    mask <<= 1;
485                } else {
486                    exhausted = true;
487                    break;
488                }
489            }
490
491            // break if the iterator was exhausted before it provided a bool for this byte
492            if exhausted && mask == 1 {
493                break;
494            }
495
496            //ensure we have capacity to write the byte
497            if buffer.len() == buffer.capacity() {
498                //no capacity for new byte, allocate 1 byte more (plus however many more the iterator advertises)
499                let additional_byte_capacity = 1usize.saturating_add(
500                    iterator.size_hint().0.saturating_add(7) / 8, //convert bit count to byte count, rounding up
501                );
502                buffer.reserve(additional_byte_capacity)
503            }
504
505            // Soundness: capacity was allocated above
506            buffer.push(byte_accum);
507            if exhausted {
508                break;
509            }
510        }
511
512        self.buffer = buffer;
513        self.length = length;
514    }
515}
516
517impl FromIterator<bool> for MutableBitmap {
518    fn from_iter<I>(iter: I) -> Self
519    where
520        I: IntoIterator<Item = bool>,
521    {
522        let mut bm = Self::new();
523        bm.extend(iter);
524        bm
525    }
526}
527
528// [7, 6, 5, 4, 3, 2, 1, 0], [15, 14, 13, 12, 11, 10, 9, 8]
529// [00000001_00000000_00000000_00000000_...] // u64
530/// # Safety
531/// The iterator must be trustedLen and its len must be least `len`.
532#[inline]
533unsafe fn get_chunk_unchecked(iterator: &mut impl Iterator<Item = bool>) -> u64 {
534    let mut byte = 0u64;
535    let mut mask;
536    for i in 0..8 {
537        mask = 1u64 << (8 * i);
538        for _ in 0..8 {
539            let value = match iterator.next() {
540                Some(value) => value,
541                None => unsafe { unreachable_unchecked() },
542            };
543
544            byte |= match value {
545                true => mask,
546                false => 0,
547            };
548            mask <<= 1;
549        }
550    }
551    byte
552}
553
554/// # Safety
555/// The iterator must be trustedLen and its len must be least `len`.
556#[inline]
557unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator<Item = bool>) -> u8 {
558    let mut byte_accum: u8 = 0;
559    let mut mask: u8 = 1;
560    for _ in 0..len {
561        let value = match iterator.next() {
562            Some(value) => value,
563            None => unsafe { unreachable_unchecked() },
564        };
565
566        byte_accum |= match value {
567            true => mask,
568            false => 0,
569        };
570        mask <<= 1;
571    }
572    byte_accum
573}
574
575/// Extends the [`Vec<u8>`] from `iterator`
576/// # Safety
577/// The iterator MUST be [`TrustedLen`].
578#[inline]
579unsafe fn extend_aligned_trusted_iter_unchecked(
580    buffer: &mut Vec<u8>,
581    mut iterator: impl Iterator<Item = bool>,
582) -> usize {
583    let additional_bits = iterator.size_hint().1.unwrap();
584    let chunks = additional_bits / 64;
585    let remainder = additional_bits % 64;
586
587    let additional = additional_bits.div_ceil(8);
588    assert_eq!(
589        additional,
590        // a hint of how the following calculation will be done
591        chunks * 8 + remainder / 8 + !remainder.is_multiple_of(8) as usize
592    );
593    buffer.reserve(additional);
594
595    // chunks of 64 bits
596    for _ in 0..chunks {
597        let chunk = get_chunk_unchecked(&mut iterator);
598        buffer.extend_from_slice(&chunk.to_le_bytes());
599    }
600
601    // remaining complete bytes
602    for _ in 0..(remainder / 8) {
603        let byte = unsafe { get_byte_unchecked(8, &mut iterator) };
604        buffer.push(byte)
605    }
606
607    // remaining bits
608    let remainder = remainder % 8;
609    if remainder > 0 {
610        let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) };
611        buffer.push(byte)
612    }
613    additional_bits
614}
615
616impl MutableBitmap {
617    /// Extends `self` from a [`TrustedLen`] iterator.
618    #[inline]
619    pub fn extend_from_trusted_len_iter<I: TrustedLen<Item = bool>>(&mut self, iterator: I) {
620        // SAFETY: I: TrustedLen
621        unsafe { self.extend_from_trusted_len_iter_unchecked(iterator) }
622    }
623
624    /// Extends `self` from an iterator of trusted len.
625    ///
626    /// # Safety
627    /// The caller must guarantee that the iterator has a trusted len.
628    #[inline]
629    pub unsafe fn extend_from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(
630        &mut self,
631        mut iterator: I,
632    ) {
633        // the length of the iterator throughout this function.
634        let mut length = iterator.size_hint().1.unwrap();
635
636        let bit_offset = self.length % 8;
637
638        if length < 8 - bit_offset {
639            if bit_offset == 0 {
640                self.buffer.push(0);
641            }
642            // the iterator will not fill the last byte
643            let byte = self.buffer.last_mut().unwrap();
644            for (i, value) in (bit_offset..).zip(iterator) {
645                *byte = set_bit_in_byte(*byte, i, value);
646            }
647            self.length += length;
648            return;
649        }
650
651        // at this point we know that length will hit a byte boundary and thus
652        // increase the buffer.
653
654        if bit_offset != 0 {
655            // we are in the middle of a byte; lets finish it
656            let byte = self.buffer.last_mut().unwrap();
657            (bit_offset..8).for_each(|i| {
658                *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap());
659            });
660            self.length += 8 - bit_offset;
661            length -= 8 - bit_offset;
662        }
663
664        // everything is aligned; proceed with the bulk operation
665        debug_assert_eq!(self.length % 8, 0);
666
667        unsafe { extend_aligned_trusted_iter_unchecked(&mut self.buffer, iterator) };
668        self.length += length;
669    }
670
671    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
672    ///
673    /// # Safety
674    /// The iterator must report an accurate length.
675    #[inline]
676    pub unsafe fn from_trusted_len_iter_unchecked<I>(iterator: I) -> Self
677    where
678        I: Iterator<Item = bool>,
679    {
680        let mut buffer = Vec::<u8>::new();
681
682        let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator);
683
684        Self { buffer, length }
685    }
686
687    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
688    #[inline]
689    pub fn from_trusted_len_iter<I>(iterator: I) -> Self
690    where
691        I: TrustedLen<Item = bool>,
692    {
693        // SAFETY: Iterator is `TrustedLen`
694        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
695    }
696
697    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
698    pub fn try_from_trusted_len_iter<E, I>(iterator: I) -> std::result::Result<Self, E>
699    where
700        I: TrustedLen<Item = std::result::Result<bool, E>>,
701    {
702        unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
703    }
704
705    /// Creates a new [`MutableBitmap`] from an falible iterator of booleans.
706    ///
707    /// # Safety
708    /// The caller must guarantee that the iterator is `TrustedLen`.
709    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I>(
710        mut iterator: I,
711    ) -> std::result::Result<Self, E>
712    where
713        I: Iterator<Item = std::result::Result<bool, E>>,
714    {
715        let length = iterator.size_hint().1.unwrap();
716
717        let mut buffer = vec![0u8; length.div_ceil(8)];
718
719        let chunks = length / 8;
720        let reminder = length % 8;
721
722        let data = buffer.as_mut_slice();
723        data[..chunks].iter_mut().try_for_each(|byte| {
724            (0..8).try_for_each(|i| {
725                *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap()?);
726                Ok(())
727            })
728        })?;
729
730        if reminder != 0 {
731            let last = &mut data[chunks];
732            iterator.enumerate().try_for_each(|(i, value)| {
733                *last = set_bit_in_byte(*last, i, value?);
734                Ok(())
735            })?;
736        }
737
738        Ok(Self { buffer, length })
739    }
740
741    fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) {
742        // e.g.
743        // [a, b, --101010]     <- to be extended
744        // [00111111, 11010101] <- to extend
745        // [a, b, 11101010, --001111] expected result
746
747        let aligned_offset = offset / 8;
748        let own_offset = self.length % 8;
749        debug_assert_eq!(offset % 8, 0); // assumed invariant
750        debug_assert!(own_offset != 0); // assumed invariant
751
752        let bytes_len = length.saturating_add(7) / 8;
753        let items = &slice[aligned_offset..aligned_offset + bytes_len];
754        // self has some offset => we need to shift all `items`, and merge the first
755        let buffer = self.buffer.as_mut_slice();
756        let last = &mut buffer[buffer.len() - 1];
757
758        // --101010 | 00111111 << 6 = 11101010
759        // erase previous
760        *last &= 0b11111111u8 >> (8 - own_offset); // unset before setting
761        *last |= items[0] << own_offset;
762
763        if length + own_offset <= 8 {
764            // no new bytes needed
765            self.length += length;
766            return;
767        }
768        let additional = length - (8 - own_offset);
769
770        let remaining = [items[items.len() - 1], 0];
771        let bytes = items
772            .windows(2)
773            .chain(std::iter::once(remaining.as_ref()))
774            .map(|w| merge_reversed(w[0], w[1], 8 - own_offset))
775            .take(additional.saturating_add(7) / 8);
776        self.buffer.extend(bytes);
777
778        self.length += length;
779    }
780
781    fn extend_aligned(&mut self, slice: &[u8], offset: usize, length: usize) {
782        let aligned_offset = offset / 8;
783        let bytes_len = length.saturating_add(7) / 8;
784        let items = &slice[aligned_offset..aligned_offset + bytes_len];
785        self.buffer.extend_from_slice(items);
786        self.length += length;
787    }
788
789    /// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
790    /// This is the fastest way to extend a [`MutableBitmap`].
791    /// # Implementation
792    /// When both [`MutableBitmap`]'s length and `offset` are both multiples of 8,
793    /// this function performs a memcopy. Else, it first aligns bit by bit and then performs a memcopy.
794    ///
795    /// # Safety
796    /// Caller must ensure `offset + length <= slice.len() * 8`
797    #[inline]
798    pub unsafe fn extend_from_slice_unchecked(
799        &mut self,
800        slice: &[u8],
801        offset: usize,
802        length: usize,
803    ) {
804        if length == 0 {
805            return;
806        };
807        let is_aligned = self.length.is_multiple_of(8);
808        let other_is_aligned = offset.is_multiple_of(8);
809        match (is_aligned, other_is_aligned) {
810            (true, true) => self.extend_aligned(slice, offset, length),
811            (false, true) => self.extend_unaligned(slice, offset, length),
812            // todo: further optimize the other branches.
813            _ => self.extend_from_trusted_len_iter(BitmapIter::new(slice, offset, length)),
814        }
815        // internal invariant:
816        debug_assert_eq!(self.length.saturating_add(7) / 8, self.buffer.len());
817    }
818
819    /// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
820    /// This is the fastest way to extend a [`MutableBitmap`].
821    /// # Implementation
822    /// When both [`MutableBitmap`]'s length and `offset` are both multiples of 8,
823    /// this function performs a memcopy. Else, it first aligns bit by bit and then performs a memcopy.
824    #[inline]
825    pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
826        assert!(offset + length <= slice.len() * 8);
827        // SAFETY: invariant is asserted
828        unsafe { self.extend_from_slice_unchecked(slice, offset, length) }
829    }
830
831    #[inline]
832    pub fn extend_from_bitmask(&mut self, bitmask: BitMask<'_>) {
833        let (slice, offset, length) = bitmask.inner();
834        self.extend_from_slice(slice, offset, length)
835    }
836
837    /// Extends the [`MutableBitmap`] from a [`Bitmap`].
838    #[inline]
839    pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
840        let (slice, offset, length) = bitmap.as_slice();
841        // SAFETY: bitmap.as_slice adheres to the invariant
842        unsafe {
843            self.extend_from_slice_unchecked(slice, offset, length);
844        }
845    }
846
847    /// Returns the slice of bytes of this [`MutableBitmap`].
848    /// Note that the last byte may not be fully used.
849    #[inline]
850    pub fn as_slice(&self) -> &[u8] {
851        let len = (self.length).saturating_add(7) / 8;
852        &self.buffer[..len]
853    }
854
855    /// Returns the slice of bytes of this [`MutableBitmap`].
856    /// Note that the last byte may not be fully used.
857    #[inline]
858    pub fn as_mut_slice(&mut self) -> &mut [u8] {
859        let len = (self.length).saturating_add(7) / 8;
860        &mut self.buffer[..len]
861    }
862}
863
864impl Default for MutableBitmap {
865    fn default() -> Self {
866        Self::new()
867    }
868}
869
870impl<'a> IntoIterator for &'a MutableBitmap {
871    type Item = bool;
872    type IntoIter = BitmapIter<'a>;
873
874    fn into_iter(self) -> Self::IntoIter {
875        BitmapIter::<'a>::new(&self.buffer, 0, self.length)
876    }
877}