Skip to main content

arrow_buffer/buffer/
boolean.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::bit_chunk_iterator::BitChunks;
19use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator, BitIterator, BitSliceIterator};
20use crate::bit_util::read_u64;
21use crate::{
22    BooleanBufferBuilder, Buffer, MutableBuffer, bit_util, buffer_bin_and, buffer_bin_or,
23    buffer_bin_xor,
24};
25
26use std::ops::{BitAnd, BitOr, BitXor, Not};
27
28/// A slice-able [`Buffer`] containing bit-packed booleans
29///
30/// This structure represents a sequence of boolean values packed into a
31/// byte-aligned [`Buffer`]. Both the offset and length are represented in bits.
32///
33/// # Layout
34///
35/// The values are represented as little endian bit-packed values, where the
36/// least significant bit of each byte represents the first boolean value and
37/// then proceeding to the most significant bit.
38///
39/// For example, the 10 bit bitmask `0b0111001101` has length 10, and is
40/// represented using 2 bytes with offset 0 like this:
41///
42/// ```text
43///        ┌─────────────────────────────────┐    ┌─────────────────────────────────┐
44///        │┌───┬───┬───┬───┬───┬───┬───┬───┐│    │┌───┬───┬───┬───┬───┬───┬───┬───┐│
45///        ││ 1 │ 0 │ 1 │ 1 │ 0 │ 0 │ 1 │ 1 ││    ││ 1 │ 0 │ ? │ ? │ ? │ ? │ ? │ ? ││
46///        │└───┴───┴───┴───┴───┴───┴───┴───┘│    │└───┴───┴───┴───┴───┴───┴───┴───┘│
47/// bit    └─────────────────────────────────┘    └─────────────────────────────────┘
48/// offset  0             Byte 0             7    0              Byte 1            7
49///
50///         length = 10 bits, offset = 0
51/// ```
52///
53/// The same bitmask with length 10 and offset 3 would be represented using 2
54/// bytes like this:
55///
56/// ```text
57///       ┌─────────────────────────────────┐    ┌─────────────────────────────────┐
58///       │┌───┬───┬───┬───┬───┬───┬───┬───┐│    │┌───┬───┬───┬───┬───┬───┬───┬───┐│
59///       ││ ? │ ? │ ? │ 1 │ 0 │ 1 │ 1 │ 0 ││    ││ 0 │ 1 │ 1 │ 1 │ 0 │ ? │ ? │ ? ││
60///       │└───┴───┴───┴───┴───┴───┴───┴───┘│    │└───┴───┴───┴───┴───┴───┴───┴───┘│
61/// bit   └─────────────────────────────────┘    └─────────────────────────────────┘
62/// offset 0             Byte 0             7    0              Byte 1            7
63///
64///        length = 10 bits, offset = 3
65/// ```
66///
67/// Note that the bits marked `?` are not logically part of the mask and may
68/// contain either `0` or `1`
69///
70/// # See Also
71/// * [`BooleanBufferBuilder`] for building [`BooleanBuffer`] instances
72/// * [`NullBuffer`] for representing null values in Arrow arrays
73///
74/// [`NullBuffer`]: crate::NullBuffer
75#[derive(Debug, Clone, Eq)]
76pub struct BooleanBuffer {
77    /// Underlying buffer (byte aligned)
78    buffer: Buffer,
79    /// Offset in bits (not bytes)
80    bit_offset: usize,
81    /// Length in bits (not bytes)
82    bit_len: usize,
83}
84
85impl PartialEq for BooleanBuffer {
86    fn eq(&self, other: &Self) -> bool {
87        if self.bit_len != other.bit_len {
88            return false;
89        }
90
91        let lhs = self.bit_chunks().iter_padded();
92        let rhs = other.bit_chunks().iter_padded();
93        lhs.zip(rhs).all(|(a, b)| a == b)
94    }
95}
96
97impl BooleanBuffer {
98    /// Create a new [`BooleanBuffer`] from a [`Buffer`], `bit_offset` offset and `bit_len` length
99    ///
100    /// # Panics
101    ///
102    /// This method will panic if `buffer` is not large enough
103    pub fn new(buffer: Buffer, bit_offset: usize, bit_len: usize) -> Self {
104        let total_len = bit_offset.saturating_add(bit_len);
105        let buffer_len = buffer.len();
106        let buffer_bit_len = buffer_len.saturating_mul(8);
107        assert!(
108            total_len <= buffer_bit_len,
109            "buffer not large enough (bit_offset: {bit_offset}, bit_len: {bit_len}, buffer_len: {buffer_len})"
110        );
111        Self {
112            buffer,
113            bit_offset,
114            bit_len,
115        }
116    }
117
118    /// Create a new [`BooleanBuffer`] of `length` bits (not bytes) where all values are `true`
119    pub fn new_set(length: usize) -> Self {
120        let mut builder = BooleanBufferBuilder::new(length);
121        builder.append_n(length, true);
122        builder.finish()
123    }
124
125    /// Create a new [`BooleanBuffer`] of `length` bits (not bytes) where all values are `false`
126    pub fn new_unset(length: usize) -> Self {
127        let buffer = MutableBuffer::new_null(length).into_buffer();
128        Self {
129            buffer,
130            bit_offset: 0,
131            bit_len: length,
132        }
133    }
134
135    /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BooleanBuffer`
136    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
137        let buffer = MutableBuffer::collect_bool(len, f);
138        Self::new(buffer.into(), 0, len)
139    }
140
141    /// Create a new [`BooleanBuffer`] by copying the relevant bits from an
142    /// input buffer.
143    ///
144    /// # Notes:
145    /// * The new `BooleanBuffer` may have non zero offset
146    ///   and/or padding bits outside the logical range.
147    ///
148    /// # Example: Create a new [`BooleanBuffer`] copying a bit slice from in input slice
149    /// ```
150    /// # use arrow_buffer::BooleanBuffer;
151    /// let input = [0b11001100u8, 0b10111010u8];
152    /// // // Copy bits 4..16 from input
153    /// let result = BooleanBuffer::from_bits(&input, 4, 12);
154    /// // output is 12 bits long starting from bit offset 4
155    /// assert_eq!(result.len(), 12);
156    /// assert_eq!(result.offset(), 4);
157    /// // the expected 12 bits are 0b101110101100 (bits 4..16 of the input)
158    /// let expected_bits = [false, false, true, true, false, true, false, true, true, true, false, true];
159    /// for (i, v) in expected_bits.into_iter().enumerate() {
160    ///    assert_eq!(result.value(i), v);
161    /// }
162    /// // However, underlying buffer has (ignored) bits set outside the requested range
163    /// assert_eq!(result.values(), &[0b11001100u8, 0b10111010, 0, 0, 0, 0, 0, 0]);
164    pub fn from_bits(src: impl AsRef<[u8]>, offset_in_bits: usize, len_in_bits: usize) -> Self {
165        Self::from_bitwise_unary_op(src, offset_in_bits, len_in_bits, |a| a)
166    }
167
168    /// Create a new [`BooleanBuffer`] by applying the bitwise operation to `op`
169    /// to an input buffer.
170    ///
171    /// This function is faster than applying the operation bit by bit as
172    /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
173    ///
174    /// # Notes:
175    /// * `op` takes a single `u64` inputs and produces one `u64` output.
176    /// * `op` must only apply bitwise operations
177    ///   on the relevant bits; the input `u64` may contain irrelevant bits
178    ///   and may be processed differently on different endian architectures.
179    /// * `op` may be called with input bits outside the requested range
180    /// * Returned `BooleanBuffer` may have non zero offset
181    /// * Returned `BooleanBuffer` may have bits set outside the requested range
182    ///
183    /// # See Also
184    /// - [`BooleanBuffer::from_bitwise_binary_op`] to create a new buffer from a binary operation
185    /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for in-place unary bitwise operations
186    ///
187    /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT`
188    /// ```
189    /// # use arrow_buffer::BooleanBuffer;
190    /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
191    /// // NOT of bits 4..16
192    /// let result = BooleanBuffer::from_bitwise_unary_op(
193    ///  &input, 4, 12, |a| !a
194    /// );
195    /// // output is 12 bits long starting from bit offset 4
196    /// assert_eq!(result.len(), 12);
197    /// assert_eq!(result.offset(), 4);
198    /// // the expected 12 bits are 0b001100110101, (NOT of the requested bits)
199    /// let expected_bits = [true, true, false, false, true, false, true, false, false, false, true, false];
200    /// for (i, v) in expected_bits.into_iter().enumerate() {
201    ///     assert_eq!(result.value(i), v);
202    /// }
203    /// // However, underlying buffer has (ignored) bits set outside the requested range
204    /// let expected = [0b00110011u8, 0b01000101u8, 255, 255, 255, 255, 255, 255];
205    /// assert_eq!(result.values(), &expected);
206    /// ```
207    pub fn from_bitwise_unary_op<F>(
208        src: impl AsRef<[u8]>,
209        offset_in_bits: usize,
210        len_in_bits: usize,
211        mut op: F,
212    ) -> Self
213    where
214        F: FnMut(u64) -> u64,
215    {
216        let end = offset_in_bits + len_in_bits;
217        // Align start and end to 64 bit (8 byte) boundaries if possible to allow using the
218        // optimized code path as much as possible.
219        let aligned_offset = offset_in_bits & !63;
220        let aligned_end_bytes = bit_util::ceil(end, 64) * 8;
221        let src_len = src.as_ref().len();
222        let slice_end = aligned_end_bytes.min(src_len);
223
224        let aligned_start = &src.as_ref()[aligned_offset / 8..slice_end];
225
226        let (prefix, aligned_u64s, suffix) = unsafe { aligned_start.as_ref().align_to::<u64>() };
227        match (prefix, suffix) {
228            ([], []) => {
229                // the buffer is word (64 bit) aligned, so use optimized Vec code.
230                let result_u64s: Vec<u64> = aligned_u64s.iter().map(|l| op(*l)).collect();
231                return BooleanBuffer::new(result_u64s.into(), offset_in_bits % 64, len_in_bits);
232            }
233            ([], suffix) => {
234                let suffix = read_u64(suffix);
235                let result_u64s: Vec<u64> = aligned_u64s
236                    .iter()
237                    .cloned()
238                    .chain(std::iter::once(suffix))
239                    .map(&mut op)
240                    .collect();
241                return BooleanBuffer::new(result_u64s.into(), offset_in_bits % 64, len_in_bits);
242            }
243            _ => {}
244        }
245
246        // align to byte boundaries
247        // Use unaligned code path, handle remainder bytes
248        let chunks = aligned_start.chunks_exact(8);
249        let remainder = chunks.remainder();
250        let iter = chunks.map(|c| u64::from_le_bytes(c.try_into().unwrap()));
251        let vec_u64s: Vec<u64> = if remainder.is_empty() {
252            iter.map(&mut op).collect()
253        } else {
254            iter.chain(Some(read_u64(remainder))).map(&mut op).collect()
255        };
256
257        BooleanBuffer::new(vec_u64s.into(), offset_in_bits % 64, len_in_bits)
258    }
259
260    /// Create a new [`BooleanBuffer`] by applying the bitwise operation `op` to
261    /// the relevant bits from two input buffers.
262    ///
263    /// This function is faster than applying the operation bit by bit as
264    /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
265    ///
266    /// # Notes:
267    /// * `op` takes two `u64` inputs and produces one `u64` output.
268    /// * `op` must only apply bitwise operations
269    ///   on the relevant bits; the input `u64` values may contain irrelevant bits
270    ///   and may be processed differently on different endian architectures.
271    /// * `op` may be called with input bits outside the requested range.
272    /// * The returned `BooleanBuffer` always has zero offset.
273    ///
274    /// # See Also
275    /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a single input buffer.
276    /// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for in-place binary bitwise operations
277    ///
278    /// # Example: Create new [`BooleanBuffer`] from bitwise `AND` of two [`Buffer`]s
279    /// ```
280    /// # use arrow_buffer::{Buffer, BooleanBuffer};
281    /// let left = Buffer::from(vec![0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
282    /// let right = Buffer::from(vec![0b10101010u8, 0b11011100u8, 0b11110000u8]); // 3 bytes = 24 bits
283    /// // AND of the first 12 bits
284    /// let result = BooleanBuffer::from_bitwise_binary_op(
285    ///   &left, 0, &right, 0, 12, |a, b| a & b
286    /// );
287    /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
288    /// ```
289    ///
290    /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte slices
291    /// ```
292    /// # use arrow_buffer::BooleanBuffer;
293    /// let left = [0b11001100u8, 0b10111010u8];
294    /// let right = [0b10101010u8, 0b11011100u8];
295    /// // OR of bits 4..16 from left and bits 0..12 from right
296    /// let result = BooleanBuffer::from_bitwise_binary_op(
297    ///  &left, 4, &right, 0, 12, |a, b| a | b
298    /// );
299    /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
300    /// ```
301    pub fn from_bitwise_binary_op<F>(
302        left: impl AsRef<[u8]>,
303        left_offset_in_bits: usize,
304        right: impl AsRef<[u8]>,
305        right_offset_in_bits: usize,
306        len_in_bits: usize,
307        mut op: F,
308    ) -> Self
309    where
310        F: FnMut(u64, u64) -> u64,
311    {
312        let left = left.as_ref();
313        let right = right.as_ref();
314        // try fast path for aligned input
315        // If the underlying buffers are aligned to u64 we can apply the operation directly on the u64 slices
316        // to improve performance.
317        if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 {
318            // align to byte boundary
319            let left = &left[left_offset_in_bits / 8..];
320            let right = &right[right_offset_in_bits / 8..];
321
322            unsafe {
323                let (left_prefix, left_u64s, left_suffix) = left.align_to::<u64>();
324                let (right_prefix, right_u64s, right_suffix) = right.align_to::<u64>();
325                // if there is no prefix or suffix, both buffers are aligned and
326                // we can do the operation directly on u64s.
327                // TODO: consider `slice::as_chunks` and `u64::from_le_bytes` when MSRV reaches 1.88.
328                // https://github.com/apache/arrow-rs/pull/9022#discussion_r2639949361
329                if left_prefix.is_empty()
330                    && right_prefix.is_empty()
331                    && left_suffix.is_empty()
332                    && right_suffix.is_empty()
333                {
334                    let result_u64s = left_u64s
335                        .iter()
336                        .zip(right_u64s.iter())
337                        .map(|(l, r)| op(*l, *r))
338                        .collect::<Vec<u64>>();
339                    return BooleanBuffer {
340                        buffer: Buffer::from(result_u64s),
341                        bit_offset: 0,
342                        bit_len: len_in_bits,
343                    };
344                }
345            }
346        }
347        let left_chunks = BitChunks::new(left, left_offset_in_bits, len_in_bits);
348        let right_chunks = BitChunks::new(right, right_offset_in_bits, len_in_bits);
349
350        let chunks = left_chunks
351            .iter()
352            .zip(right_chunks.iter())
353            .map(|(left, right)| op(left, right));
354        // Soundness: `BitChunks` is a `BitChunks` trusted length iterator which
355        // correctly reports its upper bound
356        let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
357
358        let remainder_bytes = bit_util::ceil(left_chunks.remainder_len(), 8);
359        let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
360        // we are counting its starting from the least significant bit, to to_le_bytes should be correct
361        let rem = &rem.to_le_bytes()[0..remainder_bytes];
362        buffer.extend_from_slice(rem);
363
364        BooleanBuffer {
365            buffer: Buffer::from(buffer),
366            bit_offset: 0,
367            bit_len: len_in_bits,
368        }
369    }
370
371    /// Returns the number of set bits in this buffer
372    pub fn count_set_bits(&self) -> usize {
373        self.buffer
374            .count_set_bits_offset(self.bit_offset, self.bit_len)
375    }
376
377    /// Finds the position of the n-th set bit (1-based) starting from `start` index.
378    /// If fewer than `n` set bits are found, returns the length of the buffer.
379    pub fn find_nth_set_bit_position(&self, start: usize, n: usize) -> usize {
380        if n == 0 {
381            return start;
382        }
383
384        self.slice(start, self.bit_len - start)
385            .set_indices()
386            .nth(n - 1)
387            .map(|idx| start + idx + 1)
388            .unwrap_or(self.bit_len)
389    }
390
391    /// Returns a [`BitChunks`] instance which can be used to iterate over
392    /// this buffer's bits in `u64` chunks
393    #[inline]
394    pub fn bit_chunks(&self) -> BitChunks<'_> {
395        BitChunks::new(self.values(), self.bit_offset, self.bit_len)
396    }
397
398    /// Returns the offset of this [`BooleanBuffer`] in bits (not bytes)
399    #[inline]
400    pub fn offset(&self) -> usize {
401        self.bit_offset
402    }
403
404    /// Returns the length of this [`BooleanBuffer`] in bits (not bytes)
405    #[inline]
406    pub fn len(&self) -> usize {
407        self.bit_len
408    }
409
410    /// Returns true if this [`BooleanBuffer`] is empty
411    #[inline]
412    pub fn is_empty(&self) -> bool {
413        self.bit_len == 0
414    }
415
416    /// Free up unused memory.
417    pub fn shrink_to_fit(&mut self) {
418        // TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer
419        self.buffer.shrink_to_fit();
420    }
421
422    /// Returns the boolean value at index `i`.
423    ///
424    /// # Panics
425    ///
426    /// Panics if `i >= self.len()`
427    #[inline]
428    pub fn value(&self, idx: usize) -> bool {
429        assert!(idx < self.bit_len);
430        unsafe { self.value_unchecked(idx) }
431    }
432
433    /// Returns the boolean value at index `i`.
434    ///
435    /// # Safety
436    /// This doesn't check bounds, the caller must ensure that index < self.len()
437    #[inline]
438    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
439        unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.bit_offset) }
440    }
441
442    /// Returns the packed values of this [`BooleanBuffer`] not including any offset
443    #[inline]
444    pub fn values(&self) -> &[u8] {
445        &self.buffer
446    }
447
448    /// Slices this [`BooleanBuffer`] by the provided `offset` and `length`
449    pub fn slice(&self, offset: usize, len: usize) -> Self {
450        assert!(
451            offset.saturating_add(len) <= self.bit_len,
452            "the length + offset of the sliced BooleanBuffer cannot exceed the existing length"
453        );
454        Self {
455            buffer: self.buffer.clone(),
456            bit_offset: self.bit_offset + offset,
457            bit_len: len,
458        }
459    }
460
461    /// Returns a [`Buffer`] containing the sliced contents of this [`BooleanBuffer`]
462    ///
463    /// Equivalent to `self.buffer.bit_slice(self.offset, self.len)`
464    pub fn sliced(&self) -> Buffer {
465        self.buffer.bit_slice(self.bit_offset, self.bit_len)
466    }
467
468    /// Returns true if this [`BooleanBuffer`] is equal to `other`, using pointer comparisons
469    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
470    /// return false when the arrays are logically equal
471    pub fn ptr_eq(&self, other: &Self) -> bool {
472        self.buffer.as_ptr() == other.buffer.as_ptr()
473            && self.bit_offset == other.bit_offset
474            && self.bit_len == other.bit_len
475    }
476
477    /// Returns the inner [`Buffer`]
478    ///
479    /// Note: this does not account for offset and length of this [`BooleanBuffer`]
480    #[inline]
481    pub fn inner(&self) -> &Buffer {
482        &self.buffer
483    }
484
485    /// Returns the inner [`Buffer`], consuming self
486    ///
487    /// Note: this does not account for offset and length of this [`BooleanBuffer`]
488    pub fn into_inner(self) -> Buffer {
489        self.buffer
490    }
491
492    /// Returns an iterator over the bits in this [`BooleanBuffer`]
493    pub fn iter(&self) -> BitIterator<'_> {
494        self.into_iter()
495    }
496
497    /// Returns an iterator over the set bit positions in this [`BooleanBuffer`]
498    pub fn set_indices(&self) -> BitIndexIterator<'_> {
499        BitIndexIterator::new(self.values(), self.bit_offset, self.bit_len)
500    }
501
502    /// Returns a `u32` iterator over set bit positions without any usize->u32 conversion
503    pub fn set_indices_u32(&self) -> BitIndexU32Iterator<'_> {
504        BitIndexU32Iterator::new(self.values(), self.bit_offset, self.bit_len)
505    }
506
507    /// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits
508    pub fn set_slices(&self) -> BitSliceIterator<'_> {
509        BitSliceIterator::new(self.values(), self.bit_offset, self.bit_len)
510    }
511}
512
513impl Not for &BooleanBuffer {
514    type Output = BooleanBuffer;
515
516    fn not(self) -> Self::Output {
517        BooleanBuffer::from_bitwise_unary_op(&self.buffer, self.bit_offset, self.bit_len, |a| !a)
518    }
519}
520
521impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
522    type Output = BooleanBuffer;
523
524    fn bitand(self, rhs: &BooleanBuffer) -> Self::Output {
525        assert_eq!(self.bit_len, rhs.bit_len);
526        BooleanBuffer {
527            buffer: buffer_bin_and(
528                &self.buffer,
529                self.bit_offset,
530                &rhs.buffer,
531                rhs.bit_offset,
532                self.bit_len,
533            ),
534            bit_offset: 0,
535            bit_len: self.bit_len,
536        }
537    }
538}
539
540impl BitOr<&BooleanBuffer> for &BooleanBuffer {
541    type Output = BooleanBuffer;
542
543    fn bitor(self, rhs: &BooleanBuffer) -> Self::Output {
544        assert_eq!(self.bit_len, rhs.bit_len);
545        BooleanBuffer {
546            buffer: buffer_bin_or(
547                &self.buffer,
548                self.bit_offset,
549                &rhs.buffer,
550                rhs.bit_offset,
551                self.bit_len,
552            ),
553            bit_offset: 0,
554            bit_len: self.bit_len,
555        }
556    }
557}
558
559impl BitXor<&BooleanBuffer> for &BooleanBuffer {
560    type Output = BooleanBuffer;
561
562    fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output {
563        assert_eq!(self.bit_len, rhs.bit_len);
564        BooleanBuffer {
565            buffer: buffer_bin_xor(
566                &self.buffer,
567                self.bit_offset,
568                &rhs.buffer,
569                rhs.bit_offset,
570                self.bit_len,
571            ),
572            bit_offset: 0,
573            bit_len: self.bit_len,
574        }
575    }
576}
577
578impl<'a> IntoIterator for &'a BooleanBuffer {
579    type Item = bool;
580    type IntoIter = BitIterator<'a>;
581
582    fn into_iter(self) -> Self::IntoIter {
583        BitIterator::new(self.values(), self.bit_offset, self.bit_len)
584    }
585}
586
587impl From<&[bool]> for BooleanBuffer {
588    fn from(value: &[bool]) -> Self {
589        let mut builder = BooleanBufferBuilder::new(value.len());
590        builder.append_slice(value);
591        builder.finish()
592    }
593}
594
595impl From<Vec<bool>> for BooleanBuffer {
596    fn from(value: Vec<bool>) -> Self {
597        value.as_slice().into()
598    }
599}
600
601impl FromIterator<bool> for BooleanBuffer {
602    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
603        let iter = iter.into_iter();
604        let (hint, _) = iter.size_hint();
605        let mut builder = BooleanBufferBuilder::new(hint);
606        iter.for_each(|b| builder.append(b));
607        builder.finish()
608    }
609}
610
611#[cfg(test)]
612mod tests {
613    use super::*;
614
615    #[test]
616    fn test_boolean_new() {
617        let bytes = &[0, 1, 2, 3, 4];
618        let buf = Buffer::from(bytes);
619        let offset = 0;
620        let len = 24;
621
622        let boolean_buf = BooleanBuffer::new(buf.clone(), offset, len);
623        assert_eq!(bytes, boolean_buf.values());
624        assert_eq!(offset, boolean_buf.offset());
625        assert_eq!(len, boolean_buf.len());
626
627        assert_eq!(2, boolean_buf.count_set_bits());
628        assert_eq!(&buf, boolean_buf.inner());
629        assert_eq!(buf, boolean_buf.clone().into_inner());
630
631        assert!(!boolean_buf.is_empty())
632    }
633
634    #[test]
635    fn test_boolean_data_equality() {
636        let boolean_buf1 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
637        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
638        assert_eq!(boolean_buf1, boolean_buf2);
639
640        // slice with same offset and same length should still preserve equality
641        let boolean_buf3 = boolean_buf1.slice(8, 16);
642        assert_ne!(boolean_buf1, boolean_buf3);
643        let boolean_buf4 = boolean_buf1.slice(0, 32);
644        assert_eq!(boolean_buf1, boolean_buf4);
645
646        // unequal because of different elements
647        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 0, 2, 3, 4]), 0, 32);
648        assert_ne!(boolean_buf1, boolean_buf2);
649
650        // unequal because of different length
651        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 24);
652        assert_ne!(boolean_buf1, boolean_buf2);
653
654        // ptr_eq
655        assert!(boolean_buf1.ptr_eq(&boolean_buf1));
656        assert!(boolean_buf2.ptr_eq(&boolean_buf2));
657        assert!(!boolean_buf1.ptr_eq(&boolean_buf2));
658    }
659
660    #[test]
661    fn test_boolean_slice() {
662        let bytes = &[0, 3, 2, 6, 2];
663        let boolean_buf1 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
664        let boolean_buf2 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
665
666        let boolean_slice1 = boolean_buf1.slice(16, 16);
667        let boolean_slice2 = boolean_buf2.slice(0, 16);
668        assert_eq!(boolean_slice1.values(), boolean_slice2.values());
669
670        assert_eq!(bytes, boolean_slice1.values());
671        assert_eq!(16, boolean_slice1.bit_offset);
672        assert_eq!(16, boolean_slice1.bit_len);
673
674        assert_eq!(bytes, boolean_slice2.values());
675        assert_eq!(0, boolean_slice2.bit_offset);
676        assert_eq!(16, boolean_slice2.bit_len);
677    }
678
679    #[test]
680    fn test_boolean_bitand() {
681        let offset = 0;
682        let len = 40;
683
684        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
685        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
686
687        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
688        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
689
690        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 0, 0]), offset, len);
691        assert_eq!(boolean_buf1 & boolean_buf2, expected);
692    }
693
694    #[test]
695    fn test_boolean_bitor() {
696        let offset = 0;
697        let len = 40;
698
699        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
700        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
701
702        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
703        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
704
705        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 1, 0]), offset, len);
706        assert_eq!(boolean_buf1 | boolean_buf2, expected);
707    }
708
709    #[test]
710    fn test_boolean_bitxor() {
711        let offset = 0;
712        let len = 40;
713
714        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
715        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
716
717        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
718        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
719
720        let expected = BooleanBuffer::new(Buffer::from(&[0, 0, 0, 1, 0]), offset, len);
721        assert_eq!(boolean_buf1 ^ boolean_buf2, expected);
722    }
723
724    #[test]
725    fn test_boolean_not() {
726        let offset = 0;
727        let len = 40;
728
729        let buf = Buffer::from(&[0, 1, 1, 0, 0]);
730        let boolean_buf = &BooleanBuffer::new(buf, offset, len);
731
732        let expected = BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
733        assert_eq!(!boolean_buf, expected);
734
735        // Demonstrate that Non-zero offsets are preserved
736        let sliced = boolean_buf.slice(3, 20);
737        let result = !&sliced;
738        assert_eq!(result.offset(), 3);
739        assert_eq!(result.len(), sliced.len());
740        for i in 0..sliced.len() {
741            assert_eq!(result.value(i), !sliced.value(i));
742        }
743    }
744
745    #[test]
746    fn test_boolean_from_slice_bool() {
747        let v = [true, false, false];
748        let buf = BooleanBuffer::from(&v[..]);
749        assert_eq!(buf.offset(), 0);
750        assert_eq!(buf.len(), 3);
751        assert_eq!(buf.values().len(), 1);
752        assert!(buf.value(0));
753    }
754
755    #[test]
756    fn test_from_bitwise_unary_op() {
757        // Use 1024 boolean values so that at least some of the tests cover multiple u64 chunks and
758        // perfect alignment
759        let input_bools = (0..1024)
760            .map(|_| rand::random::<bool>())
761            .collect::<Vec<bool>>();
762        let input_buffer = BooleanBuffer::from(&input_bools[..]);
763
764        // Note ensure we test offsets over 100 to cover multiple u64 chunks
765        for offset in 0..1024 {
766            let result = BooleanBuffer::from_bitwise_unary_op(
767                input_buffer.values(),
768                offset,
769                input_buffer.len() - offset,
770                |a| !a,
771            );
772            let expected = input_bools[offset..]
773                .iter()
774                .map(|b| !*b)
775                .collect::<BooleanBuffer>();
776            assert_eq!(result, expected);
777        }
778
779        // Also test when the input doesn't cover the entire buffer
780        for offset in 0..512 {
781            let len = 512 - offset; // fixed length less than total
782            let result =
783                BooleanBuffer::from_bitwise_unary_op(input_buffer.values(), offset, len, |a| !a);
784            let expected = input_bools[offset..]
785                .iter()
786                .take(len)
787                .map(|b| !*b)
788                .collect::<BooleanBuffer>();
789            assert_eq!(result, expected);
790        }
791    }
792
793    #[test]
794    fn test_from_bitwise_unary_op_unaligned_fallback() {
795        // Deterministic affine sequence over u8: b[i] = 37*i + 11 (mod 256).
796        // This yields a non-trivial mix of bits (prefix: 11, 48, 85, 122, 159, 196, 233, 14, ...)
797        // so unary bit operations are exercised on varied input patterns.
798        let bytes = (0..80)
799            .map(|i| (i as u8).wrapping_mul(37).wrapping_add(11))
800            .collect::<Vec<_>>();
801        let base = bytes.as_ptr() as usize;
802        let shift = (0..8).find(|s| (base + s) % 8 != 0).unwrap();
803        let misaligned = &bytes[shift..];
804
805        // Case 1: fallback path with `remainder.is_empty() == true`
806        let src = &misaligned[..24];
807        let offset = 7;
808        let len = 96;
809        let result = BooleanBuffer::from_bitwise_unary_op(src, offset, len, |a| !a);
810        let expected = (0..len)
811            .map(|i| !bit_util::get_bit(src, offset + i))
812            .collect::<BooleanBuffer>();
813        assert_eq!(result, expected);
814        assert_eq!(result.offset(), offset % 64);
815
816        // Case 2: fallback path with `remainder.is_empty() == false`
817        let src = &misaligned[..13];
818        let offset = 3;
819        let len = 100;
820        let result = BooleanBuffer::from_bitwise_unary_op(src, offset, len, |a| !a);
821        let expected = (0..len)
822            .map(|i| !bit_util::get_bit(src, offset + i))
823            .collect::<BooleanBuffer>();
824        assert_eq!(result, expected);
825        assert_eq!(result.offset(), offset % 64);
826    }
827
828    #[test]
829    fn test_from_bitwise_binary_op() {
830        // pick random boolean inputs
831        let input_bools_left = (0..1024)
832            .map(|_| rand::random::<bool>())
833            .collect::<Vec<bool>>();
834        let input_bools_right = (0..1024)
835            .map(|_| rand::random::<bool>())
836            .collect::<Vec<bool>>();
837        let input_buffer_left = BooleanBuffer::from(&input_bools_left[..]);
838        let input_buffer_right = BooleanBuffer::from(&input_bools_right[..]);
839
840        for left_offset in 0..200 {
841            for right_offset in [0, 4, 5, 17, 33, 24, 45, 64, 65, 100, 200] {
842                for len_offset in [0, 1, 44, 100, 256, 300, 512] {
843                    let len = 1024 - len_offset - left_offset.max(right_offset); // ensure we don't go out of bounds
844                    // compute with AND
845                    let result = BooleanBuffer::from_bitwise_binary_op(
846                        input_buffer_left.values(),
847                        left_offset,
848                        input_buffer_right.values(),
849                        right_offset,
850                        len,
851                        |a, b| a & b,
852                    );
853                    // compute directly from bools
854                    let expected = input_bools_left[left_offset..]
855                        .iter()
856                        .zip(&input_bools_right[right_offset..])
857                        .take(len)
858                        .map(|(a, b)| *a & *b)
859                        .collect::<BooleanBuffer>();
860                    assert_eq!(result, expected);
861                }
862            }
863        }
864    }
865
866    #[test]
867    fn test_extend_trusted_len_sets_byte_len() {
868        // Ensures extend_trusted_len keeps the underlying byte length in sync with bit length.
869        let mut builder = BooleanBufferBuilder::new(0);
870        let bools: Vec<_> = (0..10).map(|i| i % 2 == 0).collect();
871        unsafe { builder.extend_trusted_len(bools.into_iter()) };
872        assert_eq!(builder.as_slice().len(), bit_util::ceil(builder.len(), 8));
873    }
874
875    #[test]
876    fn test_extend_trusted_len_then_append() {
877        // Exercises append after extend_trusted_len to validate byte length and values.
878        let mut builder = BooleanBufferBuilder::new(0);
879        let bools: Vec<_> = (0..9).map(|i| i % 3 == 0).collect();
880        unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
881        builder.append(true);
882        assert_eq!(builder.as_slice().len(), bit_util::ceil(builder.len(), 8));
883        let finished = builder.finish();
884        for (i, v) in bools.into_iter().chain(std::iter::once(true)).enumerate() {
885            assert_eq!(finished.value(i), v, "at index {}", i);
886        }
887    }
888
889    #[test]
890    fn test_find_nth_set_bit_position() {
891        let bools = vec![true, false, true, true, false, true];
892        let buffer = BooleanBuffer::from(bools);
893
894        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 1), 1);
895        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 2), 3);
896        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 3), 4);
897        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 4), 6);
898        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 5), 6);
899
900        assert_eq!(buffer.clone().find_nth_set_bit_position(1, 1), 3);
901        assert_eq!(buffer.clone().find_nth_set_bit_position(3, 1), 4);
902        assert_eq!(buffer.clone().find_nth_set_bit_position(3, 2), 6);
903    }
904
905    #[test]
906    fn test_find_nth_set_bit_position_large() {
907        let mut bools = vec![false; 1000];
908        bools[100] = true;
909        bools[500] = true;
910        bools[999] = true;
911        let buffer = BooleanBuffer::from(bools);
912
913        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 1), 101);
914        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 2), 501);
915        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 3), 1000);
916        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 4), 1000);
917
918        assert_eq!(buffer.clone().find_nth_set_bit_position(101, 1), 501);
919    }
920
921    #[test]
922    fn test_find_nth_set_bit_position_sliced() {
923        let bools = vec![false, true, false, true, true, false, true]; // [F, T, F, T, T, F, T]
924        let buffer = BooleanBuffer::from(bools);
925        let slice = buffer.slice(1, 6); // [T, F, T, T, F, T]
926
927        assert_eq!(slice.len(), 6);
928        // Logical indices: 0, 1, 2, 3, 4, 5
929        // Logical values: T, F, T, T, F, T
930
931        assert_eq!(slice.clone().find_nth_set_bit_position(0, 1), 1);
932        assert_eq!(slice.clone().find_nth_set_bit_position(0, 2), 3);
933        assert_eq!(slice.clone().find_nth_set_bit_position(0, 3), 4);
934        assert_eq!(slice.clone().find_nth_set_bit_position(0, 4), 6);
935    }
936
937    #[test]
938    fn test_find_nth_set_bit_position_all_set() {
939        let buffer = BooleanBuffer::new_set(100);
940        for i in 1..=100 {
941            assert_eq!(buffer.clone().find_nth_set_bit_position(0, i), i);
942        }
943        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 101), 100);
944    }
945
946    #[test]
947    fn test_find_nth_set_bit_position_none_set() {
948        let buffer = BooleanBuffer::new_unset(100);
949        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 1), 100);
950    }
951}