vortex_buffer/bit/
buf.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::ops::{BitAnd, BitOr, BitXor, Not, Range};
5
6use crate::bit::ops::{bitwise_and, bitwise_not, bitwise_or, bitwise_unary_op, bitwise_xor};
7use crate::bit::{
8    BitChunks, BitIndexIterator, BitIterator, BitSliceIterator, UnalignedBitChunk,
9    get_bit_unchecked,
10};
11use crate::{Alignment, BitBufferMut, Buffer, BufferMut, ByteBuffer, buffer};
12
13/// An immutable bitset stored as a packed byte buffer.
14#[derive(Clone, Debug, Eq)]
15pub struct BitBuffer {
16    buffer: ByteBuffer,
17    len: usize,
18    offset: usize,
19}
20
21impl PartialEq for BitBuffer {
22    fn eq(&self, other: &Self) -> bool {
23        if self.len != other.len {
24            return false;
25        }
26
27        self.chunks()
28            .iter()
29            .zip(other.chunks())
30            .all(|(a, b)| a == b)
31    }
32}
33
34impl BitBuffer {
35    /// Create a new `BoolBuffer` backed by a [`ByteBuffer`] with `len` bits in view.
36    ///
37    /// Panics if the buffer is not large enough to hold `len` bits.
38    pub fn new(buffer: ByteBuffer, len: usize) -> Self {
39        assert!(
40            buffer.len() * 8 >= len,
41            "provided ByteBuffer not large enough to back BoolBuffer with len {len}"
42        );
43
44        Self {
45            buffer,
46            len,
47            offset: 0,
48        }
49    }
50
51    /// Create a new `BoolBuffer` backed by a [`ByteBuffer`] with `len` bits in view, starting at the
52    /// given `offset` (in bits).
53    ///
54    /// Panics if the buffer is not large enough to hold `len` bits or if the offset is greater than
55    pub fn new_with_offset(buffer: ByteBuffer, len: usize, offset: usize) -> Self {
56        assert!(
57            len.saturating_add(offset) <= buffer.len().saturating_mul(8),
58            "provided ByteBuffer (len={}) not large enough to back BoolBuffer with offset {offset} len {len}",
59            buffer.len()
60        );
61
62        Self {
63            buffer,
64            len,
65            offset,
66        }
67    }
68
69    /// Create a new `BoolBuffer` of length `len` where all bits are set (true).
70    pub fn new_set(len: usize) -> Self {
71        let words = len.div_ceil(8);
72        let buffer = buffer![0xFF; words];
73
74        Self {
75            buffer,
76            len,
77            offset: 0,
78        }
79    }
80
81    /// Create a new `BoolBuffer` of length `len` where all bits are unset (false).
82    pub fn new_unset(len: usize) -> Self {
83        let words = len.div_ceil(8);
84        let buffer = Buffer::zeroed(words);
85
86        Self {
87            buffer,
88            len,
89            offset: 0,
90        }
91    }
92
93    /// Create a new empty `BitBuffer`.
94    pub fn empty() -> Self {
95        Self::new_set(0)
96    }
97
98    /// Create a new `BitBuffer` of length `len` where all bits are set to `value`.
99    pub fn full(value: bool, len: usize) -> Self {
100        if value {
101            Self::new_set(len)
102        } else {
103            Self::new_unset(len)
104        }
105    }
106
107    /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BitBuffer`
108    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
109        let mut buffer = BufferMut::with_capacity(len.div_ceil(64) * 8);
110
111        let chunks = len / 64;
112        let remainder = len % 64;
113        for chunk in 0..chunks {
114            let mut packed = 0;
115            for bit_idx in 0..64 {
116                let i = bit_idx + chunk * 64;
117                packed |= (f(i) as u64) << bit_idx;
118            }
119
120            // SAFETY: Already allocated sufficient capacity
121            unsafe { buffer.push_unchecked(packed) }
122        }
123
124        if remainder != 0 {
125            let mut packed = 0;
126            for bit_idx in 0..remainder {
127                let i = bit_idx + chunks * 64;
128                packed |= (f(i) as u64) << bit_idx;
129            }
130
131            // SAFETY: Already allocated sufficient capacity
132            unsafe { buffer.push_unchecked(packed) }
133        }
134
135        buffer.truncate(len.div_ceil(8));
136
137        Self::new(
138            buffer
139                .freeze()
140                .into_byte_buffer()
141                .aligned(Alignment::of::<u8>()),
142            len,
143        )
144    }
145
146    /// Get the logical length of this `BoolBuffer`.
147    ///
148    /// This may differ from the physical length of the backing buffer, for example if it was
149    /// created using the `new_with_offset` constructor, or if it was sliced.
150    #[inline]
151    pub fn len(&self) -> usize {
152        self.len
153    }
154
155    /// Returns `true` if the `BoolBuffer` is empty.
156    #[inline]
157    pub fn is_empty(&self) -> bool {
158        self.len() == 0
159    }
160
161    /// Offset of the start of the buffer in bits.
162    #[inline]
163    pub fn offset(&self) -> usize {
164        self.offset
165    }
166
167    /// Get a reference to the underlying buffer.
168    #[inline]
169    pub fn inner(&self) -> &ByteBuffer {
170        &self.buffer
171    }
172
173    /// Retrieve the value at the given index.
174    ///
175    /// Panics if the index is out of bounds.
176    #[inline]
177    pub fn value(&self, index: usize) -> bool {
178        assert!(index < self.len);
179        unsafe { self.value_unchecked(index) }
180    }
181
182    /// Retrieve the value at the given index without bounds checking
183    ///
184    /// # SAFETY
185    /// Caller must ensure that index is within the range of the buffer
186    #[inline]
187    pub unsafe fn value_unchecked(&self, index: usize) -> bool {
188        unsafe { get_bit_unchecked(self.buffer.as_ptr(), index + self.offset) }
189    }
190
191    /// Create a new zero-copy slice of this BoolBuffer that begins at the `start` index and extends
192    /// for `len` bits.
193    ///
194    /// Panics if the slice would extend beyond the end of the buffer.
195    pub fn slice(&self, range: Range<usize>) -> Self {
196        assert!(
197            range.len() <= self.len,
198            "slice from {} to {} exceeds len {}",
199            range.start,
200            range.end,
201            range.len()
202        );
203
204        Self::new_with_offset(self.buffer.clone(), range.len(), self.offset + range.start)
205    }
206
207    /// Slice any full bytes from the buffer, leaving the offset < 8.
208    pub fn shrink_offset(self) -> Self {
209        let bit_offset = self.offset % 8;
210        let len = self.len;
211        let buffer = self.into_inner();
212        BitBuffer::new_with_offset(buffer, len, bit_offset)
213    }
214
215    /// Access chunks of the buffer aligned to 8 byte boundary as [prefix, \<full chunks\>, suffix]
216    pub fn unaligned_chunks(&self) -> UnalignedBitChunk<'_> {
217        UnalignedBitChunk::new(self.buffer.as_slice(), self.offset, self.len)
218    }
219
220    /// Access chunks of the underlying buffer as 8 byte chunks with a final trailer
221    ///
222    /// If you're performing operations on a single buffer, prefer [BitBuffer::unaligned_chunks]
223    pub fn chunks(&self) -> BitChunks<'_> {
224        BitChunks::new(self.buffer.as_slice(), self.offset, self.len)
225    }
226
227    /// Get the number of set bits in the buffer.
228    pub fn true_count(&self) -> usize {
229        self.unaligned_chunks().count_ones()
230    }
231
232    /// Get the number of unset bits in the buffer.
233    pub fn false_count(&self) -> usize {
234        self.len - self.true_count()
235    }
236
237    /// Iterator over bits in the buffer
238    pub fn iter(&self) -> BitIterator<'_> {
239        BitIterator::new(self.buffer.as_slice(), self.offset, self.len)
240    }
241
242    /// Iterator over set indices of the underlying buffer
243    pub fn set_indices(&self) -> BitIndexIterator<'_> {
244        BitIndexIterator::new(self.buffer.as_slice(), self.offset, self.len)
245    }
246
247    /// Iterator over set slices of the underlying buffer
248    pub fn set_slices(&self) -> BitSliceIterator<'_> {
249        BitSliceIterator::new(self.buffer.as_slice(), self.offset, self.len)
250    }
251
252    /// Created a new BitBuffer with offset reset to 0
253    pub fn sliced(&self) -> Self {
254        if self.offset % 8 == 0 {
255            return Self::new(
256                self.buffer.slice(self.offset / 8..self.len.div_ceil(8)),
257                self.len,
258            );
259        }
260
261        Self::new(
262            bitwise_unary_op(self.buffer.clone(), self.offset, self.len, |a| a),
263            self.len,
264        )
265    }
266}
267
268// Conversions
269
270impl BitBuffer {
271    /// Consumes this `BoolBuffer` and returns the backing `Buffer<u8>` with any offset
272    /// and length information applied.
273    pub fn into_inner(self) -> ByteBuffer {
274        let word_start = self.offset / 8;
275        let word_end = (self.offset + self.len).div_ceil(8);
276
277        self.buffer.slice(word_start..word_end)
278    }
279
280    /// Get a mutable version of this `BitBuffer` along with bit offset in the first byte.
281    ///
282    /// If the caller doesn't hold only reference to the underlying buffer, a copy is created.
283    /// The second value of the tuple is a bit_offset of the first value in the first byte
284    pub fn into_mut(self) -> BitBufferMut {
285        let bit_offset = self.offset % 8;
286        let len = self.len;
287        // TODO(robert): if we are copying here we can strip offset bits
288        let shrunk = self.into_inner().into_mut();
289        BitBufferMut::from_buffer(shrunk, bit_offset, len)
290    }
291}
292
293impl From<&[bool]> for BitBuffer {
294    fn from(value: &[bool]) -> Self {
295        BitBufferMut::from(value).freeze()
296    }
297}
298
299impl From<Vec<bool>> for BitBuffer {
300    fn from(value: Vec<bool>) -> Self {
301        BitBufferMut::from(value).freeze()
302    }
303}
304
305impl FromIterator<bool> for BitBuffer {
306    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
307        BitBufferMut::from_iter(iter).freeze()
308    }
309}
310
311impl BitOr for &BitBuffer {
312    type Output = BitBuffer;
313
314    fn bitor(self, rhs: Self) -> Self::Output {
315        self.clone() | rhs.clone()
316    }
317}
318
319impl BitOr for BitBuffer {
320    type Output = BitBuffer;
321
322    fn bitor(self, rhs: Self) -> Self::Output {
323        assert_eq!(self.len, rhs.len);
324        BitBuffer::new_with_offset(
325            bitwise_or(self.buffer, self.offset, rhs.buffer, rhs.offset, self.len),
326            self.len,
327            0,
328        )
329    }
330}
331
332impl BitAnd for &BitBuffer {
333    type Output = BitBuffer;
334
335    fn bitand(self, rhs: Self) -> Self::Output {
336        self.clone() & rhs.clone()
337    }
338}
339
340impl BitAnd for BitBuffer {
341    type Output = BitBuffer;
342
343    fn bitand(self, rhs: Self) -> Self::Output {
344        assert_eq!(self.len, rhs.len);
345        BitBuffer::new_with_offset(
346            bitwise_and(self.buffer, self.offset, rhs.buffer, rhs.offset, self.len),
347            self.len,
348            0,
349        )
350    }
351}
352
353impl Not for &BitBuffer {
354    type Output = BitBuffer;
355
356    fn not(self) -> Self::Output {
357        !self.clone()
358    }
359}
360
361impl Not for BitBuffer {
362    type Output = BitBuffer;
363
364    fn not(self) -> Self::Output {
365        BitBuffer::new_with_offset(bitwise_not(self.buffer, self.offset, self.len), self.len, 0)
366    }
367}
368
369impl BitXor for &BitBuffer {
370    type Output = BitBuffer;
371
372    fn bitxor(self, rhs: Self) -> Self::Output {
373        self.clone() ^ rhs.clone()
374    }
375}
376
377impl BitXor for BitBuffer {
378    type Output = BitBuffer;
379
380    fn bitxor(self, rhs: Self) -> Self::Output {
381        assert_eq!(self.len, rhs.len);
382        BitBuffer::new_with_offset(
383            bitwise_xor(self.buffer, self.offset, rhs.buffer, rhs.offset, self.len),
384            self.len,
385            0,
386        )
387    }
388}
389
390impl<'a> IntoIterator for &'a BitBuffer {
391    type Item = bool;
392    type IntoIter = BitIterator<'a>;
393
394    fn into_iter(self) -> Self::IntoIter {
395        self.iter()
396    }
397}
398
399#[cfg(test)]
400mod tests {
401    use crate::bit::BitBuffer;
402    use crate::{ByteBuffer, buffer};
403
404    #[test]
405    fn test_bool() {
406        // Create a new Buffer<u64> of length 1024 where the 8th bit is set.
407        let buffer: ByteBuffer = buffer![1 << 7; 1024];
408        let bools = BitBuffer::new(buffer, 1024 * 8);
409
410        // sanity checks
411        assert_eq!(bools.len(), 1024 * 8);
412        assert!(!bools.is_empty());
413        assert_eq!(bools.true_count(), 1024);
414        assert_eq!(bools.false_count(), 1024 * 7);
415
416        // Check all the values
417        for word in 0..1024 {
418            for bit in 0..8 {
419                if bit == 7 {
420                    assert!(bools.value(word * 8 + bit));
421                } else {
422                    assert!(!bools.value(word * 8 + bit));
423                }
424            }
425        }
426
427        // Slice the buffer to create a new subset view.
428        let sliced = bools.slice(64..72);
429
430        // sanity checks
431        assert_eq!(sliced.len(), 8);
432        assert!(!sliced.is_empty());
433        assert_eq!(sliced.true_count(), 1);
434        assert_eq!(sliced.false_count(), 7);
435
436        // Check all of the values like before
437        for bit in 0..8 {
438            if bit == 7 {
439                assert!(sliced.value(bit));
440            } else {
441                assert!(!sliced.value(bit));
442            }
443        }
444    }
445}