polars_arrow/bitmap/
mutable.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::hint::unreachable_unchecked;
3
4use polars_error::{PolarsResult, polars_bail};
5use polars_utils::vec::PushUnchecked;
6
7use super::bitmask::BitMask;
8use super::utils::{BitChunk, BitChunks, BitChunksExactMut, BitmapIter, count_zeros, fmt};
9use super::{Bitmap, intersects_with_mut};
10use crate::bitmap::utils::{get_bit_unchecked, merge_reversed, set_bit_in_byte};
11use crate::storage::SharedStorage;
12use crate::trusted_len::TrustedLen;
13
14/// A container of booleans. [`MutableBitmap`] is semantically equivalent
15/// to [`Vec<bool>`].
16///
17/// The two main differences against [`Vec<bool>`] is that each element stored as a single bit,
18/// thereby:
19/// * it uses 8x less memory
20/// * it cannot be represented as `&[bool]` (i.e. no pointer arithmetics).
21///
22/// A [`MutableBitmap`] can be converted to a [`Bitmap`] at `O(1)`.
23/// # Examples
24/// ```
25/// use polars_arrow::bitmap::MutableBitmap;
26///
27/// let bitmap = MutableBitmap::from([true, false, true]);
28/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true]);
29///
30/// // creation directly from bytes
31/// let mut bitmap = MutableBitmap::try_new(vec![0b00001101], 5).unwrap();
32/// // note: the first bit is the left-most of the first byte
33/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true, true, false]);
34/// // we can also get the slice:
35/// assert_eq!(bitmap.as_slice(), [0b00001101u8].as_ref());
36/// // debug helps :)
37/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01101] }");
38///
39/// // It supports mutation in place
40/// bitmap.set(0, false);
41/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01100] }");
42/// // and `O(1)` random access
43/// assert_eq!(bitmap.get(0), false);
44/// ```
45/// # Implementation
46/// This container is internally a [`Vec<u8>`].
47#[derive(Clone)]
48pub struct MutableBitmap {
49    buffer: Vec<u8>,
50    // invariant: length.saturating_add(7) / 8 == buffer.len();
51    length: usize,
52}
53
54impl std::fmt::Debug for MutableBitmap {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        fmt(&self.buffer, 0, self.len(), f)
57    }
58}
59
60impl PartialEq for MutableBitmap {
61    fn eq(&self, other: &Self) -> bool {
62        self.iter().eq(other.iter())
63    }
64}
65
66impl MutableBitmap {
67    /// Initializes an empty [`MutableBitmap`].
68    #[inline]
69    pub fn new() -> Self {
70        Self {
71            buffer: Vec::new(),
72            length: 0,
73        }
74    }
75
76    /// Initializes a new [`MutableBitmap`] from a [`Vec<u8>`] and a length.
77    /// # Errors
78    /// This function errors iff `length > bytes.len() * 8`
79    #[inline]
80    pub fn try_new(mut bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
81        if length > bytes.len().saturating_mul(8) {
82            polars_bail!(InvalidOperation:
83                "The length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
84                length,
85                bytes.len().saturating_mul(8)
86            )
87        }
88
89        // Ensure invariant holds.
90        let min_byte_length_needed = length.div_ceil(8);
91        bytes.drain(min_byte_length_needed..);
92        Ok(Self {
93            length,
94            buffer: bytes,
95        })
96    }
97
98    /// Initializes a [`MutableBitmap`] from a [`Vec<u8>`] and a length.
99    /// This function is `O(1)`.
100    /// # Panic
101    /// Panics iff the length is larger than the length of the buffer times 8.
102    #[inline]
103    pub fn from_vec(buffer: Vec<u8>, length: usize) -> Self {
104        Self::try_new(buffer, length).unwrap()
105    }
106
107    /// Initializes a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits.
108    #[inline]
109    pub fn with_capacity(capacity: usize) -> Self {
110        Self {
111            buffer: Vec::with_capacity(capacity.saturating_add(7) / 8),
112            length: 0,
113        }
114    }
115
116    /// Pushes a new bit to the [`MutableBitmap`], re-sizing it if necessary.
117    #[inline]
118    pub fn push(&mut self, value: bool) {
119        if self.length.is_multiple_of(8) {
120            self.buffer.push(0);
121        }
122        let byte = unsafe { self.buffer.last_mut().unwrap_unchecked() };
123        *byte = set_bit_in_byte(*byte, self.length % 8, value);
124        self.length += 1;
125    }
126
127    /// Pop the last bit from the [`MutableBitmap`].
128    /// Note if the [`MutableBitmap`] is empty, this method will return None.
129    #[inline]
130    pub fn pop(&mut self) -> Option<bool> {
131        if self.is_empty() {
132            return None;
133        }
134
135        self.length -= 1;
136        let value = unsafe { self.get_unchecked(self.length) };
137        if self.length.is_multiple_of(8) {
138            self.buffer.pop();
139        }
140        Some(value)
141    }
142
143    /// Returns whether the position `index` is set.
144    /// # Panics
145    /// Panics iff `index >= self.len()`.
146    #[inline]
147    pub fn get(&self, index: usize) -> bool {
148        assert!(index < self.len());
149        unsafe { self.get_unchecked(index) }
150    }
151
152    /// Returns whether the position `index` is set.
153    ///
154    /// # Safety
155    /// The caller must ensure `index < self.len()`.
156    #[inline]
157    pub unsafe fn get_unchecked(&self, index: usize) -> bool {
158        get_bit_unchecked(&self.buffer, index)
159    }
160
161    /// Sets the position `index` to `value`
162    /// # Panics
163    /// Panics iff `index >= self.len()`.
164    #[inline]
165    pub fn set(&mut self, index: usize, value: bool) {
166        assert!(index < self.len());
167        unsafe {
168            self.set_unchecked(index, value);
169        }
170    }
171
172    /// Sets the position `index` to the OR of its original value and `value`.
173    ///
174    /// # Safety
175    /// It's undefined behavior if index >= self.len().
176    #[inline]
177    pub unsafe fn or_pos_unchecked(&mut self, index: usize, value: bool) {
178        *self.buffer.get_unchecked_mut(index / 8) |= (value as u8) << (index % 8);
179    }
180
181    /// Sets the position `index` to the AND of its original value and `value`.
182    ///
183    /// # Safety
184    /// It's undefined behavior if index >= self.len().
185    #[inline]
186    pub unsafe fn and_pos_unchecked(&mut self, index: usize, value: bool) {
187        *self.buffer.get_unchecked_mut(index / 8) &=
188            (0xFE | u8::from(value)).rotate_left(index as u32);
189    }
190
191    /// Sets the position `index` to the XOR of its original value and `value`.
192    ///
193    /// # Safety
194    /// It's undefined behavior if index >= self.len().
195    #[inline]
196    pub unsafe fn xor_pos_unchecked(&mut self, index: usize, value: bool) {
197        *self.buffer.get_unchecked_mut(index / 8) ^= (value as u8) << (index % 8);
198    }
199
200    /// constructs a new iterator over the bits of [`MutableBitmap`].
201    pub fn iter(&self) -> BitmapIter<'_> {
202        BitmapIter::new(&self.buffer, 0, self.length)
203    }
204
205    /// Empties the [`MutableBitmap`].
206    #[inline]
207    pub fn clear(&mut self) {
208        self.length = 0;
209        self.buffer.clear();
210    }
211
212    /// Extends [`MutableBitmap`] by `additional` values of constant `value`.
213    /// # Implementation
214    /// This function is an order of magnitude faster than pushing element by element.
215    #[inline]
216    pub fn extend_constant(&mut self, additional: usize, value: bool) {
217        if additional == 0 {
218            return;
219        }
220
221        if value {
222            self.extend_set(additional)
223        } else {
224            self.extend_unset(additional)
225        }
226    }
227
228    /// Resizes the [`MutableBitmap`] to the specified length, inserting value
229    /// if the length is bigger than the current length.
230    pub fn resize(&mut self, length: usize, value: bool) {
231        if let Some(additional) = length.checked_sub(self.len()) {
232            self.extend_constant(additional, value);
233        } else {
234            self.buffer.truncate(length.saturating_add(7) / 8);
235            self.length = length;
236        }
237    }
238
239    /// Initializes a zeroed [`MutableBitmap`].
240    #[inline]
241    pub fn from_len_zeroed(length: usize) -> Self {
242        Self {
243            buffer: vec![0; length.saturating_add(7) / 8],
244            length,
245        }
246    }
247
248    /// Initializes a [`MutableBitmap`] with all values set to valid/ true.
249    #[inline]
250    pub fn from_len_set(length: usize) -> Self {
251        Self {
252            buffer: vec![u8::MAX; length.saturating_add(7) / 8],
253            length,
254        }
255    }
256
257    /// Reserves `additional` bits in the [`MutableBitmap`], potentially re-allocating its buffer.
258    #[inline(always)]
259    pub fn reserve(&mut self, additional: usize) {
260        self.buffer
261            .reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len())
262    }
263
264    /// Returns the capacity of [`MutableBitmap`] in number of bits.
265    #[inline]
266    pub fn capacity(&self) -> usize {
267        self.buffer.capacity() * 8
268    }
269
270    /// Pushes a new bit to the [`MutableBitmap`]
271    ///
272    /// # Safety
273    /// The caller must ensure that the [`MutableBitmap`] has sufficient capacity.
274    #[inline]
275    pub unsafe fn push_unchecked(&mut self, value: bool) {
276        if self.length.is_multiple_of(8) {
277            self.buffer.push_unchecked(0);
278        }
279        let byte = self.buffer.last_mut().unwrap_unchecked();
280        *byte = set_bit_in_byte(*byte, self.length % 8, value);
281        self.length += 1;
282    }
283
284    /// Returns the number of unset bits on this [`MutableBitmap`].
285    ///
286    /// Guaranteed to be `<= self.len()`.
287    /// # Implementation
288    /// This function is `O(N)`
289    pub fn unset_bits(&self) -> usize {
290        count_zeros(&self.buffer, 0, self.length)
291    }
292
293    /// Returns the number of set bits on this [`MutableBitmap`].
294    ///
295    /// Guaranteed to be `<= self.len()`.
296    /// # Implementation
297    /// This function is `O(N)`
298    pub fn set_bits(&self) -> usize {
299        self.length - self.unset_bits()
300    }
301
302    /// Returns the length of the [`MutableBitmap`].
303    #[inline]
304    pub fn len(&self) -> usize {
305        self.length
306    }
307
308    /// Returns whether [`MutableBitmap`] is empty.
309    #[inline]
310    pub fn is_empty(&self) -> bool {
311        self.len() == 0
312    }
313
314    /// # Safety
315    /// The caller must ensure that the [`MutableBitmap`] was properly initialized up to `len`.
316    #[inline]
317    pub(crate) unsafe fn set_len(&mut self, len: usize) {
318        self.buffer.set_len(len.saturating_add(7) / 8);
319        self.length = len;
320    }
321
322    fn extend_set(&mut self, mut additional: usize) {
323        let offset = self.length % 8;
324        let added = if offset != 0 {
325            // offset != 0 => at least one byte in the buffer
326            let last_index = self.buffer.len() - 1;
327            let last = &mut self.buffer[last_index];
328
329            let remaining = 0b11111111u8;
330            let remaining = remaining >> 8usize.saturating_sub(additional);
331            let remaining = remaining << offset;
332            *last |= remaining;
333            std::cmp::min(additional, 8 - offset)
334        } else {
335            0
336        };
337        self.length += added;
338        additional = additional.saturating_sub(added);
339        if additional > 0 {
340            debug_assert_eq!(self.length % 8, 0);
341            let existing = self.length.saturating_add(7) / 8;
342            let required = (self.length + additional).saturating_add(7) / 8;
343            // add remaining as full bytes
344            self.buffer
345                .extend(std::iter::repeat_n(0b11111111u8, required - existing));
346            self.length += additional;
347        }
348    }
349
350    fn extend_unset(&mut self, mut additional: usize) {
351        let offset = self.length % 8;
352        let added = if offset != 0 {
353            // offset != 0 => at least one byte in the buffer
354            let last_index = self.buffer.len() - 1;
355            let last = &mut self.buffer[last_index];
356            *last &= 0b11111111u8 >> (8 - offset); // unset them
357            std::cmp::min(additional, 8 - offset)
358        } else {
359            0
360        };
361        self.length += added;
362        additional = additional.saturating_sub(added);
363        if additional > 0 {
364            debug_assert_eq!(self.length % 8, 0);
365            self.buffer
366                .resize((self.length + additional).saturating_add(7) / 8, 0);
367            self.length += additional;
368        }
369    }
370
371    /// Sets the position `index` to `value`
372    ///
373    /// # Safety
374    /// Caller must ensure that `index < self.len()`
375    #[inline]
376    pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
377        debug_assert!(index < self.len());
378        let byte = self.buffer.get_unchecked_mut(index / 8);
379        *byte = set_bit_in_byte(*byte, index % 8, value);
380    }
381
382    /// Shrinks the capacity of the [`MutableBitmap`] to fit its current length.
383    pub fn shrink_to_fit(&mut self) {
384        self.buffer.shrink_to_fit();
385    }
386
387    /// Returns an iterator over bits in bit chunks [`BitChunk`].
388    ///
389    /// This iterator is useful to operate over multiple bits via e.g. bitwise.
390    pub fn chunks<T: BitChunk>(&self) -> BitChunks<'_, T> {
391        BitChunks::new(&self.buffer, 0, self.length)
392    }
393
394    /// Returns an iterator over mutable slices, [`BitChunksExactMut`]
395    pub(crate) fn bitchunks_exact_mut<T: BitChunk>(&mut self) -> BitChunksExactMut<'_, T> {
396        BitChunksExactMut::new(&mut self.buffer, self.length)
397    }
398
399    pub fn intersects_with(&self, other: &Self) -> bool {
400        intersects_with_mut(self, other)
401    }
402
403    pub fn freeze(self) -> Bitmap {
404        self.into()
405    }
406}
407
408impl From<MutableBitmap> for Bitmap {
409    #[inline]
410    fn from(buffer: MutableBitmap) -> Self {
411        Bitmap::try_new(buffer.buffer, buffer.length).unwrap()
412    }
413}
414
415impl From<MutableBitmap> for Option<Bitmap> {
416    #[inline]
417    fn from(buffer: MutableBitmap) -> Self {
418        let unset_bits = buffer.unset_bits();
419        if unset_bits > 0 {
420            // SAFETY: invariants of the `MutableBitmap` equal that of `Bitmap`.
421            let bitmap = unsafe {
422                Bitmap::from_inner_unchecked(
423                    SharedStorage::from_vec(buffer.buffer),
424                    0,
425                    buffer.length,
426                    Some(unset_bits),
427                )
428            };
429            Some(bitmap)
430        } else {
431            None
432        }
433    }
434}
435
436impl<P: AsRef<[bool]>> From<P> for MutableBitmap {
437    #[inline]
438    fn from(slice: P) -> Self {
439        MutableBitmap::from_trusted_len_iter(slice.as_ref().iter().copied())
440    }
441}
442
443impl Extend<bool> for MutableBitmap {
444    fn extend<T: IntoIterator<Item = bool>>(&mut self, iter: T) {
445        let mut iterator = iter.into_iter();
446
447        let mut buffer = std::mem::take(&mut self.buffer);
448        let mut length = std::mem::take(&mut self.length);
449
450        let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
451        buffer.reserve(byte_capacity);
452
453        loop {
454            let mut exhausted = false;
455            let mut byte_accum: u8 = 0;
456            let mut mask: u8 = 1;
457
458            //collect (up to) 8 bits into a byte
459            while mask != 0 {
460                if let Some(value) = iterator.next() {
461                    length += 1;
462                    byte_accum |= match value {
463                        true => mask,
464                        false => 0,
465                    };
466                    mask <<= 1;
467                } else {
468                    exhausted = true;
469                    break;
470                }
471            }
472
473            // break if the iterator was exhausted before it provided a bool for this byte
474            if exhausted && mask == 1 {
475                break;
476            }
477
478            //ensure we have capacity to write the byte
479            if buffer.len() == buffer.capacity() {
480                //no capacity for new byte, allocate 1 byte more (plus however many more the iterator advertises)
481                let additional_byte_capacity = 1usize.saturating_add(
482                    iterator.size_hint().0.saturating_add(7) / 8, //convert bit count to byte count, rounding up
483                );
484                buffer.reserve(additional_byte_capacity)
485            }
486
487            // Soundness: capacity was allocated above
488            buffer.push(byte_accum);
489            if exhausted {
490                break;
491            }
492        }
493
494        self.buffer = buffer;
495        self.length = length;
496    }
497}
498
499impl FromIterator<bool> for MutableBitmap {
500    fn from_iter<I>(iter: I) -> Self
501    where
502        I: IntoIterator<Item = bool>,
503    {
504        let mut bm = Self::new();
505        bm.extend(iter);
506        bm
507    }
508}
509
510// [7, 6, 5, 4, 3, 2, 1, 0], [15, 14, 13, 12, 11, 10, 9, 8]
511// [00000001_00000000_00000000_00000000_...] // u64
512/// # Safety
513/// The iterator must be trustedLen and its len must be least `len`.
514#[inline]
515unsafe fn get_chunk_unchecked(iterator: &mut impl Iterator<Item = bool>) -> u64 {
516    let mut byte = 0u64;
517    let mut mask;
518    for i in 0..8 {
519        mask = 1u64 << (8 * i);
520        for _ in 0..8 {
521            let value = match iterator.next() {
522                Some(value) => value,
523                None => unsafe { unreachable_unchecked() },
524            };
525
526            byte |= match value {
527                true => mask,
528                false => 0,
529            };
530            mask <<= 1;
531        }
532    }
533    byte
534}
535
536/// # Safety
537/// The iterator must be trustedLen and its len must be least `len`.
538#[inline]
539unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator<Item = bool>) -> u8 {
540    let mut byte_accum: u8 = 0;
541    let mut mask: u8 = 1;
542    for _ in 0..len {
543        let value = match iterator.next() {
544            Some(value) => value,
545            None => unsafe { unreachable_unchecked() },
546        };
547
548        byte_accum |= match value {
549            true => mask,
550            false => 0,
551        };
552        mask <<= 1;
553    }
554    byte_accum
555}
556
557/// Extends the [`Vec<u8>`] from `iterator`
558/// # Safety
559/// The iterator MUST be [`TrustedLen`].
560#[inline]
561unsafe fn extend_aligned_trusted_iter_unchecked(
562    buffer: &mut Vec<u8>,
563    mut iterator: impl Iterator<Item = bool>,
564) -> usize {
565    let additional_bits = iterator.size_hint().1.unwrap();
566    let chunks = additional_bits / 64;
567    let remainder = additional_bits % 64;
568
569    let additional = additional_bits.div_ceil(8);
570    assert_eq!(
571        additional,
572        // a hint of how the following calculation will be done
573        chunks * 8 + remainder / 8 + !remainder.is_multiple_of(8) as usize
574    );
575    buffer.reserve(additional);
576
577    // chunks of 64 bits
578    for _ in 0..chunks {
579        let chunk = get_chunk_unchecked(&mut iterator);
580        buffer.extend_from_slice(&chunk.to_le_bytes());
581    }
582
583    // remaining complete bytes
584    for _ in 0..(remainder / 8) {
585        let byte = unsafe { get_byte_unchecked(8, &mut iterator) };
586        buffer.push(byte)
587    }
588
589    // remaining bits
590    let remainder = remainder % 8;
591    if remainder > 0 {
592        let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) };
593        buffer.push(byte)
594    }
595    additional_bits
596}
597
598impl MutableBitmap {
599    /// Extends `self` from a [`TrustedLen`] iterator.
600    #[inline]
601    pub fn extend_from_trusted_len_iter<I: TrustedLen<Item = bool>>(&mut self, iterator: I) {
602        // SAFETY: I: TrustedLen
603        unsafe { self.extend_from_trusted_len_iter_unchecked(iterator) }
604    }
605
606    /// Extends `self` from an iterator of trusted len.
607    ///
608    /// # Safety
609    /// The caller must guarantee that the iterator has a trusted len.
610    #[inline]
611    pub unsafe fn extend_from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(
612        &mut self,
613        mut iterator: I,
614    ) {
615        // the length of the iterator throughout this function.
616        let mut length = iterator.size_hint().1.unwrap();
617
618        let bit_offset = self.length % 8;
619
620        if length < 8 - bit_offset {
621            if bit_offset == 0 {
622                self.buffer.push(0);
623            }
624            // the iterator will not fill the last byte
625            let byte = self.buffer.last_mut().unwrap();
626            let mut i = bit_offset;
627            for value in iterator {
628                *byte = set_bit_in_byte(*byte, i, value);
629                i += 1;
630            }
631            self.length += length;
632            return;
633        }
634
635        // at this point we know that length will hit a byte boundary and thus
636        // increase the buffer.
637
638        if bit_offset != 0 {
639            // we are in the middle of a byte; lets finish it
640            let byte = self.buffer.last_mut().unwrap();
641            (bit_offset..8).for_each(|i| {
642                *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap());
643            });
644            self.length += 8 - bit_offset;
645            length -= 8 - bit_offset;
646        }
647
648        // everything is aligned; proceed with the bulk operation
649        debug_assert_eq!(self.length % 8, 0);
650
651        unsafe { extend_aligned_trusted_iter_unchecked(&mut self.buffer, iterator) };
652        self.length += length;
653    }
654
655    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
656    ///
657    /// # Safety
658    /// The iterator must report an accurate length.
659    #[inline]
660    pub unsafe fn from_trusted_len_iter_unchecked<I>(iterator: I) -> Self
661    where
662        I: Iterator<Item = bool>,
663    {
664        let mut buffer = Vec::<u8>::new();
665
666        let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator);
667
668        Self { buffer, length }
669    }
670
671    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
672    #[inline]
673    pub fn from_trusted_len_iter<I>(iterator: I) -> Self
674    where
675        I: TrustedLen<Item = bool>,
676    {
677        // SAFETY: Iterator is `TrustedLen`
678        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
679    }
680
681    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
682    pub fn try_from_trusted_len_iter<E, I>(iterator: I) -> std::result::Result<Self, E>
683    where
684        I: TrustedLen<Item = std::result::Result<bool, E>>,
685    {
686        unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
687    }
688
689    /// Creates a new [`MutableBitmap`] from an falible iterator of booleans.
690    ///
691    /// # Safety
692    /// The caller must guarantee that the iterator is `TrustedLen`.
693    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I>(
694        mut iterator: I,
695    ) -> std::result::Result<Self, E>
696    where
697        I: Iterator<Item = std::result::Result<bool, E>>,
698    {
699        let length = iterator.size_hint().1.unwrap();
700
701        let mut buffer = vec![0u8; length.div_ceil(8)];
702
703        let chunks = length / 8;
704        let reminder = length % 8;
705
706        let data = buffer.as_mut_slice();
707        data[..chunks].iter_mut().try_for_each(|byte| {
708            (0..8).try_for_each(|i| {
709                *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap()?);
710                Ok(())
711            })
712        })?;
713
714        if reminder != 0 {
715            let last = &mut data[chunks];
716            iterator.enumerate().try_for_each(|(i, value)| {
717                *last = set_bit_in_byte(*last, i, value?);
718                Ok(())
719            })?;
720        }
721
722        Ok(Self { buffer, length })
723    }
724
725    fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) {
726        // e.g.
727        // [a, b, --101010]     <- to be extended
728        // [00111111, 11010101] <- to extend
729        // [a, b, 11101010, --001111] expected result
730
731        let aligned_offset = offset / 8;
732        let own_offset = self.length % 8;
733        debug_assert_eq!(offset % 8, 0); // assumed invariant
734        debug_assert!(own_offset != 0); // assumed invariant
735
736        let bytes_len = length.saturating_add(7) / 8;
737        let items = &slice[aligned_offset..aligned_offset + bytes_len];
738        // self has some offset => we need to shift all `items`, and merge the first
739        let buffer = self.buffer.as_mut_slice();
740        let last = &mut buffer[buffer.len() - 1];
741
742        // --101010 | 00111111 << 6 = 11101010
743        // erase previous
744        *last &= 0b11111111u8 >> (8 - own_offset); // unset before setting
745        *last |= items[0] << own_offset;
746
747        if length + own_offset <= 8 {
748            // no new bytes needed
749            self.length += length;
750            return;
751        }
752        let additional = length - (8 - own_offset);
753
754        let remaining = [items[items.len() - 1], 0];
755        let bytes = items
756            .windows(2)
757            .chain(std::iter::once(remaining.as_ref()))
758            .map(|w| merge_reversed(w[0], w[1], 8 - own_offset))
759            .take(additional.saturating_add(7) / 8);
760        self.buffer.extend(bytes);
761
762        self.length += length;
763    }
764
765    fn extend_aligned(&mut self, slice: &[u8], offset: usize, length: usize) {
766        let aligned_offset = offset / 8;
767        let bytes_len = length.saturating_add(7) / 8;
768        let items = &slice[aligned_offset..aligned_offset + bytes_len];
769        self.buffer.extend_from_slice(items);
770        self.length += length;
771    }
772
773    /// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
774    /// This is the fastest way to extend a [`MutableBitmap`].
775    /// # Implementation
776    /// When both [`MutableBitmap`]'s length and `offset` are both multiples of 8,
777    /// this function performs a memcopy. Else, it first aligns bit by bit and then performs a memcopy.
778    ///
779    /// # Safety
780    /// Caller must ensure `offset + length <= slice.len() * 8`
781    #[inline]
782    pub unsafe fn extend_from_slice_unchecked(
783        &mut self,
784        slice: &[u8],
785        offset: usize,
786        length: usize,
787    ) {
788        if length == 0 {
789            return;
790        };
791        let is_aligned = self.length.is_multiple_of(8);
792        let other_is_aligned = offset.is_multiple_of(8);
793        match (is_aligned, other_is_aligned) {
794            (true, true) => self.extend_aligned(slice, offset, length),
795            (false, true) => self.extend_unaligned(slice, offset, length),
796            // todo: further optimize the other branches.
797            _ => self.extend_from_trusted_len_iter(BitmapIter::new(slice, offset, length)),
798        }
799        // internal invariant:
800        debug_assert_eq!(self.length.saturating_add(7) / 8, self.buffer.len());
801    }
802
803    /// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
804    /// This is the fastest way to extend a [`MutableBitmap`].
805    /// # Implementation
806    /// When both [`MutableBitmap`]'s length and `offset` are both multiples of 8,
807    /// this function performs a memcopy. Else, it first aligns bit by bit and then performs a memcopy.
808    #[inline]
809    pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
810        assert!(offset + length <= slice.len() * 8);
811        // SAFETY: invariant is asserted
812        unsafe { self.extend_from_slice_unchecked(slice, offset, length) }
813    }
814
815    #[inline]
816    pub fn extend_from_bitmask(&mut self, bitmask: BitMask<'_>) {
817        let (slice, offset, length) = bitmask.inner();
818        self.extend_from_slice(slice, offset, length)
819    }
820
821    /// Extends the [`MutableBitmap`] from a [`Bitmap`].
822    #[inline]
823    pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
824        let (slice, offset, length) = bitmap.as_slice();
825        // SAFETY: bitmap.as_slice adheres to the invariant
826        unsafe {
827            self.extend_from_slice_unchecked(slice, offset, length);
828        }
829    }
830
831    /// Returns the slice of bytes of this [`MutableBitmap`].
832    /// Note that the last byte may not be fully used.
833    #[inline]
834    pub fn as_slice(&self) -> &[u8] {
835        let len = (self.length).saturating_add(7) / 8;
836        &self.buffer[..len]
837    }
838
839    /// Returns the slice of bytes of this [`MutableBitmap`].
840    /// Note that the last byte may not be fully used.
841    #[inline]
842    pub fn as_mut_slice(&mut self) -> &mut [u8] {
843        let len = (self.length).saturating_add(7) / 8;
844        &mut self.buffer[..len]
845    }
846}
847
848impl Default for MutableBitmap {
849    fn default() -> Self {
850        Self::new()
851    }
852}
853
854impl<'a> IntoIterator for &'a MutableBitmap {
855    type Item = bool;
856    type IntoIter = BitmapIter<'a>;
857
858    fn into_iter(self) -> Self::IntoIter {
859        BitmapIter::<'a>::new(&self.buffer, 0, self.length)
860    }
861}