polars_arrow/bitmap/
mutable.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use std::hint::unreachable_unchecked;
3
4use polars_error::{PolarsResult, polars_bail};
5use polars_utils::vec::PushUnchecked;
6
7use super::bitmask::BitMask;
8use super::utils::{BitChunk, BitChunks, BitChunksExactMut, BitmapIter, count_zeros, fmt};
9use super::{Bitmap, intersects_with_mut};
10use crate::bitmap::utils::{get_bit_unchecked, merge_reversed, set_bit_in_byte};
11use crate::storage::SharedStorage;
12use crate::trusted_len::TrustedLen;
13
14/// A container of booleans. [`MutableBitmap`] is semantically equivalent
15/// to [`Vec<bool>`].
16///
17/// The two main differences against [`Vec<bool>`] is that each element stored as a single bit,
18/// thereby:
19/// * it uses 8x less memory
20/// * it cannot be represented as `&[bool]` (i.e. no pointer arithmetics).
21///
22/// A [`MutableBitmap`] can be converted to a [`Bitmap`] at `O(1)`.
23/// # Examples
24/// ```
25/// use polars_arrow::bitmap::MutableBitmap;
26///
27/// let bitmap = MutableBitmap::from([true, false, true]);
28/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true]);
29///
30/// // creation directly from bytes
31/// let mut bitmap = MutableBitmap::try_new(vec![0b00001101], 5).unwrap();
32/// // note: the first bit is the left-most of the first byte
33/// assert_eq!(bitmap.iter().collect::<Vec<_>>(), vec![true, false, true, true, false]);
34/// // we can also get the slice:
35/// assert_eq!(bitmap.as_slice(), [0b00001101u8].as_ref());
36/// // debug helps :)
37/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01101] }");
38///
39/// // It supports mutation in place
40/// bitmap.set(0, false);
41/// assert_eq!(format!("{:?}", bitmap), "Bitmap { len: 5, offset: 0, bytes: [0b___01100] }");
42/// // and `O(1)` random access
43/// assert_eq!(bitmap.get(0), false);
44/// ```
45/// # Implementation
46/// This container is internally a [`Vec<u8>`].
47#[derive(Clone)]
48pub struct MutableBitmap {
49    buffer: Vec<u8>,
50    // invariant: length.saturating_add(7) / 8 == buffer.len();
51    length: usize,
52}
53
54impl std::fmt::Debug for MutableBitmap {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        fmt(&self.buffer, 0, self.len(), f)
57    }
58}
59
60impl PartialEq for MutableBitmap {
61    fn eq(&self, other: &Self) -> bool {
62        self.iter().eq(other.iter())
63    }
64}
65
66impl MutableBitmap {
67    /// Initializes an empty [`MutableBitmap`].
68    #[inline]
69    pub fn new() -> Self {
70        Self {
71            buffer: Vec::new(),
72            length: 0,
73        }
74    }
75
76    /// Initializes a new [`MutableBitmap`] from a [`Vec<u8>`] and a length.
77    /// # Errors
78    /// This function errors iff `length > bytes.len() * 8`
79    #[inline]
80    pub fn try_new(mut bytes: Vec<u8>, length: usize) -> PolarsResult<Self> {
81        if length > bytes.len().saturating_mul(8) {
82            polars_bail!(InvalidOperation:
83                "The length of the bitmap ({}) must be `<=` to the number of bytes times 8 ({})",
84                length,
85                bytes.len().saturating_mul(8)
86            )
87        }
88
89        // Ensure invariant holds.
90        let min_byte_length_needed = length.div_ceil(8);
91        bytes.drain(min_byte_length_needed..);
92        Ok(Self {
93            length,
94            buffer: bytes,
95        })
96    }
97
98    /// Initializes a [`MutableBitmap`] from a [`Vec<u8>`] and a length.
99    /// This function is `O(1)`.
100    /// # Panic
101    /// Panics iff the length is larger than the length of the buffer times 8.
102    #[inline]
103    pub fn from_vec(buffer: Vec<u8>, length: usize) -> Self {
104        Self::try_new(buffer, length).unwrap()
105    }
106
107    /// Initializes a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits.
108    #[inline]
109    pub fn with_capacity(capacity: usize) -> Self {
110        Self {
111            buffer: Vec::with_capacity(capacity.saturating_add(7) / 8),
112            length: 0,
113        }
114    }
115
116    /// Pushes a new bit to the [`MutableBitmap`], re-sizing it if necessary.
117    #[inline]
118    pub fn push(&mut self, value: bool) {
119        if self.length % 8 == 0 {
120            self.buffer.push(0);
121        }
122        let byte = unsafe { self.buffer.last_mut().unwrap_unchecked() };
123        *byte = set_bit_in_byte(*byte, self.length % 8, value);
124        self.length += 1;
125    }
126
127    /// Pop the last bit from the [`MutableBitmap`].
128    /// Note if the [`MutableBitmap`] is empty, this method will return None.
129    #[inline]
130    pub fn pop(&mut self) -> Option<bool> {
131        if self.is_empty() {
132            return None;
133        }
134
135        self.length -= 1;
136        let value = unsafe { self.get_unchecked(self.length) };
137        if self.length % 8 == 0 {
138            self.buffer.pop();
139        }
140        Some(value)
141    }
142
143    /// Returns whether the position `index` is set.
144    /// # Panics
145    /// Panics iff `index >= self.len()`.
146    #[inline]
147    pub fn get(&self, index: usize) -> bool {
148        assert!(index < self.len());
149        unsafe { self.get_unchecked(index) }
150    }
151
152    /// Returns whether the position `index` is set.
153    ///
154    /// # Safety
155    /// The caller must ensure `index < self.len()`.
156    #[inline]
157    pub unsafe fn get_unchecked(&self, index: usize) -> bool {
158        get_bit_unchecked(&self.buffer, index)
159    }
160
161    /// Sets the position `index` to `value`
162    /// # Panics
163    /// Panics iff `index >= self.len()`.
164    #[inline]
165    pub fn set(&mut self, index: usize, value: bool) {
166        assert!(index < self.len());
167        unsafe {
168            self.set_unchecked(index, value);
169        }
170    }
171
172    /// Sets the position `index` to the OR of its original value and `value`.
173    ///
174    /// # Safety
175    /// It's undefined behavior if index >= self.len().
176    #[inline]
177    pub unsafe fn or_pos_unchecked(&mut self, index: usize, value: bool) {
178        *self.buffer.get_unchecked_mut(index / 8) |= (value as u8) << (index % 8);
179    }
180
181    /// Sets the position `index` to the AND of its original value and `value`.
182    ///
183    /// # Safety
184    /// It's undefined behavior if index >= self.len().
185    #[inline]
186    pub unsafe fn and_pos_unchecked(&mut self, index: usize, value: bool) {
187        *self.buffer.get_unchecked_mut(index / 8) &= (value as u8) << (index % 8);
188    }
189
190    /// constructs a new iterator over the bits of [`MutableBitmap`].
191    pub fn iter(&self) -> BitmapIter {
192        BitmapIter::new(&self.buffer, 0, self.length)
193    }
194
195    /// Empties the [`MutableBitmap`].
196    #[inline]
197    pub fn clear(&mut self) {
198        self.length = 0;
199        self.buffer.clear();
200    }
201
202    /// Extends [`MutableBitmap`] by `additional` values of constant `value`.
203    /// # Implementation
204    /// This function is an order of magnitude faster than pushing element by element.
205    #[inline]
206    pub fn extend_constant(&mut self, additional: usize, value: bool) {
207        if additional == 0 {
208            return;
209        }
210
211        if value {
212            self.extend_set(additional)
213        } else {
214            self.extend_unset(additional)
215        }
216    }
217
218    /// Resizes the [`MutableBitmap`] to the specified length, inserting value
219    /// if the length is bigger than the current length.
220    pub fn resize(&mut self, length: usize, value: bool) {
221        if let Some(additional) = length.checked_sub(self.len()) {
222            self.extend_constant(additional, value);
223        } else {
224            self.buffer.truncate(length.saturating_add(7) / 8);
225            self.length = length;
226        }
227    }
228
229    /// Initializes a zeroed [`MutableBitmap`].
230    #[inline]
231    pub fn from_len_zeroed(length: usize) -> Self {
232        Self {
233            buffer: vec![0; length.saturating_add(7) / 8],
234            length,
235        }
236    }
237
238    /// Initializes a [`MutableBitmap`] with all values set to valid/ true.
239    #[inline]
240    pub fn from_len_set(length: usize) -> Self {
241        Self {
242            buffer: vec![u8::MAX; length.saturating_add(7) / 8],
243            length,
244        }
245    }
246
247    /// Reserves `additional` bits in the [`MutableBitmap`], potentially re-allocating its buffer.
248    #[inline(always)]
249    pub fn reserve(&mut self, additional: usize) {
250        self.buffer
251            .reserve((self.length + additional).saturating_add(7) / 8 - self.buffer.len())
252    }
253
254    /// Returns the capacity of [`MutableBitmap`] in number of bits.
255    #[inline]
256    pub fn capacity(&self) -> usize {
257        self.buffer.capacity() * 8
258    }
259
260    /// Pushes a new bit to the [`MutableBitmap`]
261    ///
262    /// # Safety
263    /// The caller must ensure that the [`MutableBitmap`] has sufficient capacity.
264    #[inline]
265    pub unsafe fn push_unchecked(&mut self, value: bool) {
266        if self.length % 8 == 0 {
267            self.buffer.push_unchecked(0);
268        }
269        let byte = self.buffer.last_mut().unwrap_unchecked();
270        *byte = set_bit_in_byte(*byte, self.length % 8, value);
271        self.length += 1;
272    }
273
274    /// Returns the number of unset bits on this [`MutableBitmap`].
275    ///
276    /// Guaranteed to be `<= self.len()`.
277    /// # Implementation
278    /// This function is `O(N)`
279    pub fn unset_bits(&self) -> usize {
280        count_zeros(&self.buffer, 0, self.length)
281    }
282
283    /// Returns the number of set bits on this [`MutableBitmap`].
284    ///
285    /// Guaranteed to be `<= self.len()`.
286    /// # Implementation
287    /// This function is `O(N)`
288    pub fn set_bits(&self) -> usize {
289        self.length - self.unset_bits()
290    }
291
292    /// Returns the length of the [`MutableBitmap`].
293    #[inline]
294    pub fn len(&self) -> usize {
295        self.length
296    }
297
298    /// Returns whether [`MutableBitmap`] is empty.
299    #[inline]
300    pub fn is_empty(&self) -> bool {
301        self.len() == 0
302    }
303
304    /// # Safety
305    /// The caller must ensure that the [`MutableBitmap`] was properly initialized up to `len`.
306    #[inline]
307    pub(crate) unsafe fn set_len(&mut self, len: usize) {
308        self.buffer.set_len(len.saturating_add(7) / 8);
309        self.length = len;
310    }
311
312    fn extend_set(&mut self, mut additional: usize) {
313        let offset = self.length % 8;
314        let added = if offset != 0 {
315            // offset != 0 => at least one byte in the buffer
316            let last_index = self.buffer.len() - 1;
317            let last = &mut self.buffer[last_index];
318
319            let remaining = 0b11111111u8;
320            let remaining = remaining >> 8usize.saturating_sub(additional);
321            let remaining = remaining << offset;
322            *last |= remaining;
323            std::cmp::min(additional, 8 - offset)
324        } else {
325            0
326        };
327        self.length += added;
328        additional = additional.saturating_sub(added);
329        if additional > 0 {
330            debug_assert_eq!(self.length % 8, 0);
331            let existing = self.length.saturating_add(7) / 8;
332            let required = (self.length + additional).saturating_add(7) / 8;
333            // add remaining as full bytes
334            self.buffer
335                .extend(std::iter::repeat_n(0b11111111u8, required - existing));
336            self.length += additional;
337        }
338    }
339
340    fn extend_unset(&mut self, mut additional: usize) {
341        let offset = self.length % 8;
342        let added = if offset != 0 {
343            // offset != 0 => at least one byte in the buffer
344            let last_index = self.buffer.len() - 1;
345            let last = &mut self.buffer[last_index];
346            *last &= 0b11111111u8 >> (8 - offset); // unset them
347            std::cmp::min(additional, 8 - offset)
348        } else {
349            0
350        };
351        self.length += added;
352        additional = additional.saturating_sub(added);
353        if additional > 0 {
354            debug_assert_eq!(self.length % 8, 0);
355            self.buffer
356                .resize((self.length + additional).saturating_add(7) / 8, 0);
357            self.length += additional;
358        }
359    }
360
361    /// Sets the position `index` to `value`
362    ///
363    /// # Safety
364    /// Caller must ensure that `index < self.len()`
365    #[inline]
366    pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
367        debug_assert!(index < self.len());
368        let byte = self.buffer.get_unchecked_mut(index / 8);
369        *byte = set_bit_in_byte(*byte, index % 8, value);
370    }
371
372    /// Shrinks the capacity of the [`MutableBitmap`] to fit its current length.
373    pub fn shrink_to_fit(&mut self) {
374        self.buffer.shrink_to_fit();
375    }
376
377    /// Returns an iterator over bits in bit chunks [`BitChunk`].
378    ///
379    /// This iterator is useful to operate over multiple bits via e.g. bitwise.
380    pub fn chunks<T: BitChunk>(&self) -> BitChunks<T> {
381        BitChunks::new(&self.buffer, 0, self.length)
382    }
383
384    /// Returns an iterator over mutable slices, [`BitChunksExactMut`]
385    pub(crate) fn bitchunks_exact_mut<T: BitChunk>(&mut self) -> BitChunksExactMut<T> {
386        BitChunksExactMut::new(&mut self.buffer, self.length)
387    }
388
389    pub fn intersects_with(&self, other: &Self) -> bool {
390        intersects_with_mut(self, other)
391    }
392
393    pub fn freeze(self) -> Bitmap {
394        self.into()
395    }
396}
397
398impl From<MutableBitmap> for Bitmap {
399    #[inline]
400    fn from(buffer: MutableBitmap) -> Self {
401        Bitmap::try_new(buffer.buffer, buffer.length).unwrap()
402    }
403}
404
405impl From<MutableBitmap> for Option<Bitmap> {
406    #[inline]
407    fn from(buffer: MutableBitmap) -> Self {
408        let unset_bits = buffer.unset_bits();
409        if unset_bits > 0 {
410            // SAFETY: invariants of the `MutableBitmap` equal that of `Bitmap`.
411            let bitmap = unsafe {
412                Bitmap::from_inner_unchecked(
413                    SharedStorage::from_vec(buffer.buffer),
414                    0,
415                    buffer.length,
416                    Some(unset_bits),
417                )
418            };
419            Some(bitmap)
420        } else {
421            None
422        }
423    }
424}
425
426impl<P: AsRef<[bool]>> From<P> for MutableBitmap {
427    #[inline]
428    fn from(slice: P) -> Self {
429        MutableBitmap::from_trusted_len_iter(slice.as_ref().iter().copied())
430    }
431}
432
433impl Extend<bool> for MutableBitmap {
434    fn extend<T: IntoIterator<Item = bool>>(&mut self, iter: T) {
435        let mut iterator = iter.into_iter();
436
437        let mut buffer = std::mem::take(&mut self.buffer);
438        let mut length = std::mem::take(&mut self.length);
439
440        let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8;
441        buffer.reserve(byte_capacity);
442
443        loop {
444            let mut exhausted = false;
445            let mut byte_accum: u8 = 0;
446            let mut mask: u8 = 1;
447
448            //collect (up to) 8 bits into a byte
449            while mask != 0 {
450                if let Some(value) = iterator.next() {
451                    length += 1;
452                    byte_accum |= match value {
453                        true => mask,
454                        false => 0,
455                    };
456                    mask <<= 1;
457                } else {
458                    exhausted = true;
459                    break;
460                }
461            }
462
463            // break if the iterator was exhausted before it provided a bool for this byte
464            if exhausted && mask == 1 {
465                break;
466            }
467
468            //ensure we have capacity to write the byte
469            if buffer.len() == buffer.capacity() {
470                //no capacity for new byte, allocate 1 byte more (plus however many more the iterator advertises)
471                let additional_byte_capacity = 1usize.saturating_add(
472                    iterator.size_hint().0.saturating_add(7) / 8, //convert bit count to byte count, rounding up
473                );
474                buffer.reserve(additional_byte_capacity)
475            }
476
477            // Soundness: capacity was allocated above
478            buffer.push(byte_accum);
479            if exhausted {
480                break;
481            }
482        }
483
484        self.buffer = buffer;
485        self.length = length;
486    }
487}
488
489impl FromIterator<bool> for MutableBitmap {
490    fn from_iter<I>(iter: I) -> Self
491    where
492        I: IntoIterator<Item = bool>,
493    {
494        let mut bm = Self::new();
495        bm.extend(iter);
496        bm
497    }
498}
499
500// [7, 6, 5, 4, 3, 2, 1, 0], [15, 14, 13, 12, 11, 10, 9, 8]
501// [00000001_00000000_00000000_00000000_...] // u64
502/// # Safety
503/// The iterator must be trustedLen and its len must be least `len`.
504#[inline]
505unsafe fn get_chunk_unchecked(iterator: &mut impl Iterator<Item = bool>) -> u64 {
506    let mut byte = 0u64;
507    let mut mask;
508    for i in 0..8 {
509        mask = 1u64 << (8 * i);
510        for _ in 0..8 {
511            let value = match iterator.next() {
512                Some(value) => value,
513                None => unsafe { unreachable_unchecked() },
514            };
515
516            byte |= match value {
517                true => mask,
518                false => 0,
519            };
520            mask <<= 1;
521        }
522    }
523    byte
524}
525
526/// # Safety
527/// The iterator must be trustedLen and its len must be least `len`.
528#[inline]
529unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator<Item = bool>) -> u8 {
530    let mut byte_accum: u8 = 0;
531    let mut mask: u8 = 1;
532    for _ in 0..len {
533        let value = match iterator.next() {
534            Some(value) => value,
535            None => unsafe { unreachable_unchecked() },
536        };
537
538        byte_accum |= match value {
539            true => mask,
540            false => 0,
541        };
542        mask <<= 1;
543    }
544    byte_accum
545}
546
547/// Extends the [`Vec<u8>`] from `iterator`
548/// # Safety
549/// The iterator MUST be [`TrustedLen`].
550#[inline]
551unsafe fn extend_aligned_trusted_iter_unchecked(
552    buffer: &mut Vec<u8>,
553    mut iterator: impl Iterator<Item = bool>,
554) -> usize {
555    let additional_bits = iterator.size_hint().1.unwrap();
556    let chunks = additional_bits / 64;
557    let remainder = additional_bits % 64;
558
559    let additional = additional_bits.div_ceil(8);
560    assert_eq!(
561        additional,
562        // a hint of how the following calculation will be done
563        chunks * 8 + remainder / 8 + (remainder % 8 > 0) as usize
564    );
565    buffer.reserve(additional);
566
567    // chunks of 64 bits
568    for _ in 0..chunks {
569        let chunk = get_chunk_unchecked(&mut iterator);
570        buffer.extend_from_slice(&chunk.to_le_bytes());
571    }
572
573    // remaining complete bytes
574    for _ in 0..(remainder / 8) {
575        let byte = unsafe { get_byte_unchecked(8, &mut iterator) };
576        buffer.push(byte)
577    }
578
579    // remaining bits
580    let remainder = remainder % 8;
581    if remainder > 0 {
582        let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) };
583        buffer.push(byte)
584    }
585    additional_bits
586}
587
588impl MutableBitmap {
589    /// Extends `self` from a [`TrustedLen`] iterator.
590    #[inline]
591    pub fn extend_from_trusted_len_iter<I: TrustedLen<Item = bool>>(&mut self, iterator: I) {
592        // SAFETY: I: TrustedLen
593        unsafe { self.extend_from_trusted_len_iter_unchecked(iterator) }
594    }
595
596    /// Extends `self` from an iterator of trusted len.
597    ///
598    /// # Safety
599    /// The caller must guarantee that the iterator has a trusted len.
600    #[inline]
601    pub unsafe fn extend_from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(
602        &mut self,
603        mut iterator: I,
604    ) {
605        // the length of the iterator throughout this function.
606        let mut length = iterator.size_hint().1.unwrap();
607
608        let bit_offset = self.length % 8;
609
610        if length < 8 - bit_offset {
611            if bit_offset == 0 {
612                self.buffer.push(0);
613            }
614            // the iterator will not fill the last byte
615            let byte = self.buffer.last_mut().unwrap();
616            let mut i = bit_offset;
617            for value in iterator {
618                *byte = set_bit_in_byte(*byte, i, value);
619                i += 1;
620            }
621            self.length += length;
622            return;
623        }
624
625        // at this point we know that length will hit a byte boundary and thus
626        // increase the buffer.
627
628        if bit_offset != 0 {
629            // we are in the middle of a byte; lets finish it
630            let byte = self.buffer.last_mut().unwrap();
631            (bit_offset..8).for_each(|i| {
632                *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap());
633            });
634            self.length += 8 - bit_offset;
635            length -= 8 - bit_offset;
636        }
637
638        // everything is aligned; proceed with the bulk operation
639        debug_assert_eq!(self.length % 8, 0);
640
641        unsafe { extend_aligned_trusted_iter_unchecked(&mut self.buffer, iterator) };
642        self.length += length;
643    }
644
645    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
646    ///
647    /// # Safety
648    /// The iterator must report an accurate length.
649    #[inline]
650    pub unsafe fn from_trusted_len_iter_unchecked<I>(iterator: I) -> Self
651    where
652        I: Iterator<Item = bool>,
653    {
654        let mut buffer = Vec::<u8>::new();
655
656        let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator);
657
658        Self { buffer, length }
659    }
660
661    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
662    #[inline]
663    pub fn from_trusted_len_iter<I>(iterator: I) -> Self
664    where
665        I: TrustedLen<Item = bool>,
666    {
667        // SAFETY: Iterator is `TrustedLen`
668        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
669    }
670
671    /// Creates a new [`MutableBitmap`] from an iterator of booleans.
672    pub fn try_from_trusted_len_iter<E, I>(iterator: I) -> std::result::Result<Self, E>
673    where
674        I: TrustedLen<Item = std::result::Result<bool, E>>,
675    {
676        unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
677    }
678
679    /// Creates a new [`MutableBitmap`] from an falible iterator of booleans.
680    ///
681    /// # Safety
682    /// The caller must guarantee that the iterator is `TrustedLen`.
683    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I>(
684        mut iterator: I,
685    ) -> std::result::Result<Self, E>
686    where
687        I: Iterator<Item = std::result::Result<bool, E>>,
688    {
689        let length = iterator.size_hint().1.unwrap();
690
691        let mut buffer = vec![0u8; length.div_ceil(8)];
692
693        let chunks = length / 8;
694        let reminder = length % 8;
695
696        let data = buffer.as_mut_slice();
697        data[..chunks].iter_mut().try_for_each(|byte| {
698            (0..8).try_for_each(|i| {
699                *byte = set_bit_in_byte(*byte, i, iterator.next().unwrap()?);
700                Ok(())
701            })
702        })?;
703
704        if reminder != 0 {
705            let last = &mut data[chunks];
706            iterator.enumerate().try_for_each(|(i, value)| {
707                *last = set_bit_in_byte(*last, i, value?);
708                Ok(())
709            })?;
710        }
711
712        Ok(Self { buffer, length })
713    }
714
715    fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) {
716        // e.g.
717        // [a, b, --101010]     <- to be extended
718        // [00111111, 11010101] <- to extend
719        // [a, b, 11101010, --001111] expected result
720
721        let aligned_offset = offset / 8;
722        let own_offset = self.length % 8;
723        debug_assert_eq!(offset % 8, 0); // assumed invariant
724        debug_assert!(own_offset != 0); // assumed invariant
725
726        let bytes_len = length.saturating_add(7) / 8;
727        let items = &slice[aligned_offset..aligned_offset + bytes_len];
728        // self has some offset => we need to shift all `items`, and merge the first
729        let buffer = self.buffer.as_mut_slice();
730        let last = &mut buffer[buffer.len() - 1];
731
732        // --101010 | 00111111 << 6 = 11101010
733        // erase previous
734        *last &= 0b11111111u8 >> (8 - own_offset); // unset before setting
735        *last |= items[0] << own_offset;
736
737        if length + own_offset <= 8 {
738            // no new bytes needed
739            self.length += length;
740            return;
741        }
742        let additional = length - (8 - own_offset);
743
744        let remaining = [items[items.len() - 1], 0];
745        let bytes = items
746            .windows(2)
747            .chain(std::iter::once(remaining.as_ref()))
748            .map(|w| merge_reversed(w[0], w[1], 8 - own_offset))
749            .take(additional.saturating_add(7) / 8);
750        self.buffer.extend(bytes);
751
752        self.length += length;
753    }
754
755    fn extend_aligned(&mut self, slice: &[u8], offset: usize, length: usize) {
756        let aligned_offset = offset / 8;
757        let bytes_len = length.saturating_add(7) / 8;
758        let items = &slice[aligned_offset..aligned_offset + bytes_len];
759        self.buffer.extend_from_slice(items);
760        self.length += length;
761    }
762
763    /// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
764    /// This is the fastest way to extend a [`MutableBitmap`].
765    /// # Implementation
766    /// When both [`MutableBitmap`]'s length and `offset` are both multiples of 8,
767    /// this function performs a memcopy. Else, it first aligns bit by bit and then performs a memcopy.
768    ///
769    /// # Safety
770    /// Caller must ensure `offset + length <= slice.len() * 8`
771    #[inline]
772    pub unsafe fn extend_from_slice_unchecked(
773        &mut self,
774        slice: &[u8],
775        offset: usize,
776        length: usize,
777    ) {
778        if length == 0 {
779            return;
780        };
781        let is_aligned = self.length % 8 == 0;
782        let other_is_aligned = offset % 8 == 0;
783        match (is_aligned, other_is_aligned) {
784            (true, true) => self.extend_aligned(slice, offset, length),
785            (false, true) => self.extend_unaligned(slice, offset, length),
786            // todo: further optimize the other branches.
787            _ => self.extend_from_trusted_len_iter(BitmapIter::new(slice, offset, length)),
788        }
789        // internal invariant:
790        debug_assert_eq!(self.length.saturating_add(7) / 8, self.buffer.len());
791    }
792
793    /// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
794    /// This is the fastest way to extend a [`MutableBitmap`].
795    /// # Implementation
796    /// When both [`MutableBitmap`]'s length and `offset` are both multiples of 8,
797    /// this function performs a memcopy. Else, it first aligns bit by bit and then performs a memcopy.
798    #[inline]
799    pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
800        assert!(offset + length <= slice.len() * 8);
801        // SAFETY: invariant is asserted
802        unsafe { self.extend_from_slice_unchecked(slice, offset, length) }
803    }
804
805    #[inline]
806    pub fn extend_from_bitmask(&mut self, bitmask: BitMask<'_>) {
807        let (slice, offset, length) = bitmask.inner();
808        self.extend_from_slice(slice, offset, length)
809    }
810
811    /// Extends the [`MutableBitmap`] from a [`Bitmap`].
812    #[inline]
813    pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
814        let (slice, offset, length) = bitmap.as_slice();
815        // SAFETY: bitmap.as_slice adheres to the invariant
816        unsafe {
817            self.extend_from_slice_unchecked(slice, offset, length);
818        }
819    }
820
821    /// Returns the slice of bytes of this [`MutableBitmap`].
822    /// Note that the last byte may not be fully used.
823    #[inline]
824    pub fn as_slice(&self) -> &[u8] {
825        let len = (self.length).saturating_add(7) / 8;
826        &self.buffer[..len]
827    }
828
829    /// Returns the slice of bytes of this [`MutableBitmap`].
830    /// Note that the last byte may not be fully used.
831    #[inline]
832    pub fn as_mut_slice(&mut self) -> &mut [u8] {
833        let len = (self.length).saturating_add(7) / 8;
834        &mut self.buffer[..len]
835    }
836}
837
838impl Default for MutableBitmap {
839    fn default() -> Self {
840        Self::new()
841    }
842}
843
844impl<'a> IntoIterator for &'a MutableBitmap {
845    type Item = bool;
846    type IntoIter = BitmapIter<'a>;
847
848    fn into_iter(self) -> Self::IntoIter {
849        BitmapIter::<'a>::new(&self.buffer, 0, self.length)
850    }
851}