Skip to main content

arrow_buffer/buffer/
boolean.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
19use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator, BitIterator, BitSliceIterator};
20use crate::bit_util::read_u64;
21use crate::{
22    BooleanBufferBuilder, Buffer, MutableBuffer, bit_util, buffer_bin_and, buffer_bin_or,
23    buffer_bin_xor,
24};
25
26use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};
27
28/// A slice-able [`Buffer`] containing bit-packed booleans
29///
30/// This structure represents a sequence of boolean values packed into a
31/// byte-aligned [`Buffer`]. Both the offset and length are represented in bits.
32///
33/// # Layout
34///
35/// The values are represented as little endian bit-packed values, where the
36/// least significant bit of each byte represents the first boolean value and
37/// then proceeding to the most significant bit.
38///
39/// For example, the 10 bit bitmask `0b0111001101` has length 10, and is
40/// represented using 2 bytes with offset 0 like this:
41///
42/// ```text
43///        ┌─────────────────────────────────┐    ┌─────────────────────────────────┐
44///        │┌───┬───┬───┬───┬───┬───┬───┬───┐│    │┌───┬───┬───┬───┬───┬───┬───┬───┐│
45///        ││ 1 │ 0 │ 1 │ 1 │ 0 │ 0 │ 1 │ 1 ││    ││ 1 │ 0 │ ? │ ? │ ? │ ? │ ? │ ? ││
46///        │└───┴───┴───┴───┴───┴───┴───┴───┘│    │└───┴───┴───┴───┴───┴───┴───┴───┘│
47/// bit    └─────────────────────────────────┘    └─────────────────────────────────┘
48/// offset  0             Byte 0             7    0              Byte 1            7
49///
50///         length = 10 bits, offset = 0
51/// ```
52///
53/// The same bitmask with length 10 and offset 3 would be represented using 2
54/// bytes like this:
55///
56/// ```text
57///       ┌─────────────────────────────────┐    ┌─────────────────────────────────┐
58///       │┌───┬───┬───┬───┬───┬───┬───┬───┐│    │┌───┬───┬───┬───┬───┬───┬───┬───┐│
59///       ││ ? │ ? │ ? │ 1 │ 0 │ 1 │ 1 │ 0 ││    ││ 0 │ 1 │ 1 │ 1 │ 0 │ ? │ ? │ ? ││
60///       │└───┴───┴───┴───┴───┴───┴───┴───┘│    │└───┴───┴───┴───┴───┴───┴───┴───┘│
61/// bit   └─────────────────────────────────┘    └─────────────────────────────────┘
62/// offset 0             Byte 0             7    0              Byte 1            7
63///
64///        length = 10 bits, offset = 3
65/// ```
66///
67/// Note that the bits marked `?` are not logically part of the mask and may
68/// contain either `0` or `1`
69///
70/// # Bitwise Operations
71///
72/// `BooleanBuffer` implements the standard bitwise traits for creating a new
73/// buffer ([`BitAnd`], [`BitOr`], [`BitXor`], [`Not`]) as well as the assign variants
74/// for updating an existing buffer in place when possible ([`BitAndAssign`],
75/// [`BitOrAssign`], [`BitXorAssign`]).
76///
77/// ```
78/// # use arrow_buffer::BooleanBuffer;
79/// let mut left = BooleanBuffer::from(&[true, false, true, true] as &[bool]);
80/// let right = BooleanBuffer::from(&[true, true, false, true] as &[bool]);
81///
82/// // Create a new buffer by applying bitwise AND
83/// let anded = &left & &right;
84/// assert_eq!(anded, BooleanBuffer::from(&[true, false, false, true] as &[bool]));
85///
86/// // Update `left` in place by applying bitwise AND in place
87/// left &= &right;
88/// assert_eq!(left, BooleanBuffer::from(&[true, false, false, true] as &[bool]));
89/// ```
90///
91/// # See Also
92/// * [`BooleanBufferBuilder`] for building [`BooleanBuffer`] instances
93/// * [`NullBuffer`] for representing null values in Arrow arrays
94///
95/// [`NullBuffer`]: crate::NullBuffer
96#[derive(Debug, Clone, Eq)]
97pub struct BooleanBuffer {
98    /// Underlying buffer (byte aligned)
99    buffer: Buffer,
100    /// Offset in bits (not bytes)
101    bit_offset: usize,
102    /// Length in bits (not bytes)
103    bit_len: usize,
104}
105
106impl PartialEq for BooleanBuffer {
107    fn eq(&self, other: &Self) -> bool {
108        if self.bit_len != other.bit_len {
109            return false;
110        }
111
112        let lhs = self.bit_chunks().iter_padded();
113        let rhs = other.bit_chunks().iter_padded();
114        lhs.zip(rhs).all(|(a, b)| a == b)
115    }
116}
117
118impl BooleanBuffer {
119    /// Create a new [`BooleanBuffer`] from a [`Buffer`], `bit_offset` offset and `bit_len` length
120    ///
121    /// # Panics
122    ///
123    /// This method will panic if `buffer` is not large enough
124    pub fn new(buffer: Buffer, bit_offset: usize, bit_len: usize) -> Self {
125        let total_len = bit_offset.saturating_add(bit_len);
126        let buffer_len = buffer.len();
127        let buffer_bit_len = buffer_len.saturating_mul(8);
128        assert!(
129            total_len <= buffer_bit_len,
130            "buffer not large enough (bit_offset: {bit_offset}, bit_len: {bit_len}, buffer_len: {buffer_len})"
131        );
132        Self {
133            buffer,
134            bit_offset,
135            bit_len,
136        }
137    }
138
139    /// Create a new [`BooleanBuffer`] of `length` bits (not bytes) where all values are `true`
140    pub fn new_set(length: usize) -> Self {
141        let mut builder = BooleanBufferBuilder::new(length);
142        builder.append_n(length, true);
143        builder.finish()
144    }
145
146    /// Create a new [`BooleanBuffer`] of `length` bits (not bytes) where all values are `false`
147    pub fn new_unset(length: usize) -> Self {
148        let buffer = MutableBuffer::new_null(length).into_buffer();
149        Self {
150            buffer,
151            bit_offset: 0,
152            bit_len: length,
153        }
154    }
155
156    /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BooleanBuffer`
157    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
158        let buffer = MutableBuffer::collect_bool(len, f);
159        Self::new(buffer.into(), 0, len)
160    }
161
162    /// Create a new [`BooleanBuffer`] by copying the relevant bits from an
163    /// input buffer.
164    ///
165    /// # Notes:
166    /// * The new `BooleanBuffer` may have non zero offset
167    ///   and/or padding bits outside the logical range.
168    ///
169    /// # Example: Create a new [`BooleanBuffer`] copying a bit slice from in input slice
170    /// ```
171    /// # use arrow_buffer::BooleanBuffer;
172    /// let input = [0b11001100u8, 0b10111010u8];
173    /// // // Copy bits 4..16 from input
174    /// let result = BooleanBuffer::from_bits(&input, 4, 12);
175    /// // output is 12 bits long starting from bit offset 4
176    /// assert_eq!(result.len(), 12);
177    /// assert_eq!(result.offset(), 4);
178    /// // the expected 12 bits are 0b101110101100 (bits 4..16 of the input)
179    /// let expected_bits = [false, false, true, true, false, true, false, true, true, true, false, true];
180    /// for (i, v) in expected_bits.into_iter().enumerate() {
181    ///    assert_eq!(result.value(i), v);
182    /// }
183    /// // However, underlying buffer has (ignored) bits set outside the requested range
184    /// assert_eq!(result.values(), &[0b11001100u8, 0b10111010, 0, 0, 0, 0, 0, 0]);
185    pub fn from_bits(src: impl AsRef<[u8]>, offset_in_bits: usize, len_in_bits: usize) -> Self {
186        Self::from_bitwise_unary_op(src, offset_in_bits, len_in_bits, |a| a)
187    }
188
189    /// Create a new [`BooleanBuffer`] by applying the bitwise operation to `op`
190    /// to an input buffer.
191    ///
192    /// This function is faster than applying the operation bit by bit as
193    /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
194    ///
195    /// # Notes:
196    /// * `op` takes a single `u64` inputs and produces one `u64` output.
197    /// * `op` must only apply bitwise operations
198    ///   on the relevant bits; the input `u64` may contain irrelevant bits
199    ///   and may be processed differently on different endian architectures.
200    /// * `op` may be called with input bits outside the requested range
201    /// * Returned `BooleanBuffer` may have non zero offset
202    /// * Returned `BooleanBuffer` may have bits set outside the requested range
203    ///
204    /// # See Also
205    /// - [`BooleanBuffer::from_bitwise_binary_op`] to create a new buffer from a binary operation
206    /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for in-place unary bitwise operations
207    ///
208    /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT`
209    /// ```
210    /// # use arrow_buffer::BooleanBuffer;
211    /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
212    /// // NOT of bits 4..16
213    /// let result = BooleanBuffer::from_bitwise_unary_op(
214    ///  &input, 4, 12, |a| !a
215    /// );
216    /// // output is 12 bits long starting from bit offset 4
217    /// assert_eq!(result.len(), 12);
218    /// assert_eq!(result.offset(), 4);
219    /// // the expected 12 bits are 0b001100110101, (NOT of the requested bits)
220    /// let expected_bits = [true, true, false, false, true, false, true, false, false, false, true, false];
221    /// for (i, v) in expected_bits.into_iter().enumerate() {
222    ///     assert_eq!(result.value(i), v);
223    /// }
224    /// // However, underlying buffer has (ignored) bits set outside the requested range
225    /// let expected = [0b00110011u8, 0b01000101u8, 255, 255, 255, 255, 255, 255];
226    /// assert_eq!(result.values(), &expected);
227    /// ```
228    pub fn from_bitwise_unary_op<F>(
229        src: impl AsRef<[u8]>,
230        offset_in_bits: usize,
231        len_in_bits: usize,
232        mut op: F,
233    ) -> Self
234    where
235        F: FnMut(u64) -> u64,
236    {
237        let end = offset_in_bits + len_in_bits;
238        // Align start and end to 64 bit (8 byte) boundaries if possible to allow using the
239        // optimized code path as much as possible.
240        let aligned_offset = offset_in_bits & !63;
241        let aligned_end_bytes = bit_util::ceil(end, 64) * 8;
242        let src_len = src.as_ref().len();
243        let slice_end = aligned_end_bytes.min(src_len);
244
245        let aligned_start = &src.as_ref()[aligned_offset / 8..slice_end];
246
247        let (prefix, aligned_u64s, suffix) = unsafe { aligned_start.as_ref().align_to::<u64>() };
248        match (prefix, suffix) {
249            ([], []) => {
250                // the buffer is word (64 bit) aligned, so use optimized Vec code.
251                let result_u64s: Vec<u64> = aligned_u64s.iter().map(|l| op(*l)).collect();
252                return BooleanBuffer::new(result_u64s.into(), offset_in_bits % 64, len_in_bits);
253            }
254            ([], suffix) => {
255                let suffix = read_u64(suffix);
256                let result_u64s: Vec<u64> = aligned_u64s
257                    .iter()
258                    .cloned()
259                    .chain(std::iter::once(suffix))
260                    .map(&mut op)
261                    .collect();
262                return BooleanBuffer::new(result_u64s.into(), offset_in_bits % 64, len_in_bits);
263            }
264            _ => {}
265        }
266
267        // align to byte boundaries
268        // Use unaligned code path, handle remainder bytes
269        let chunks = aligned_start.chunks_exact(8);
270        let remainder = chunks.remainder();
271        let iter = chunks.map(|c| u64::from_le_bytes(c.try_into().unwrap()));
272        let vec_u64s: Vec<u64> = if remainder.is_empty() {
273            iter.map(&mut op).collect()
274        } else {
275            iter.chain(Some(read_u64(remainder))).map(&mut op).collect()
276        };
277
278        BooleanBuffer::new(vec_u64s.into(), offset_in_bits % 64, len_in_bits)
279    }
280
281    /// Create a new [`BooleanBuffer`] by applying the bitwise operation `op` to
282    /// the relevant bits from two input buffers.
283    ///
284    /// This function is faster than applying the operation bit by bit as
285    /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
286    ///
287    /// # Notes:
288    /// * `op` takes two `u64` inputs and produces one `u64` output.
289    /// * `op` must only apply bitwise operations
290    ///   on the relevant bits; the input `u64` values may contain irrelevant bits
291    ///   and may be processed differently on different endian architectures.
292    /// * `op` may be called with input bits outside the requested range.
293    /// * Returned `BooleanBuffer` may have non zero offset
294    /// * Returned `BooleanBuffer` may have bits set outside the requested range
295    ///
296    /// # See Also
297    /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a single input buffer.
298    /// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for in-place binary bitwise operations
299    ///
300    /// # Example: Create new [`BooleanBuffer`] from bitwise `AND` of two [`Buffer`]s
301    /// ```
302    /// # use arrow_buffer::{Buffer, BooleanBuffer};
303    /// let left = Buffer::from(vec![0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
304    /// let right = Buffer::from(vec![0b10101010u8, 0b11011100u8, 0b11110000u8]); // 3 bytes = 24 bits
305    /// // AND of the first 12 bits
306    /// let result = BooleanBuffer::from_bitwise_binary_op(
307    ///   &left, 0, &right, 0, 12, |a, b| a & b
308    /// );
309    /// assert_eq!(result.len(), 12);
310    /// for i in 0..12 {
311    ///     assert_eq!(result.value(i), left.as_slice()[i / 8] >> (i % 8) & 1 == 1
312    ///         && right.as_slice()[i / 8] >> (i % 8) & 1 == 1);
313    /// }
314    /// ```
315    ///
316    /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte slices
317    /// ```
318    /// # use arrow_buffer::{BooleanBuffer, bit_util};
319    /// let left = [0b11001100u8, 0b10111010u8];
320    /// let right = [0b10101010u8, 0b11011100u8];
321    /// // OR of bits 4..16 from left and bits 0..12 from right
322    /// let result = BooleanBuffer::from_bitwise_binary_op(
323    ///  &left, 4, &right, 0, 12, |a, b| a | b
324    /// );
325    /// assert_eq!(result.len(), 12);
326    /// for i in 0..12 {
327    ///     let l = bit_util::get_bit(&left, 4 + i);
328    ///     let r = bit_util::get_bit(&right, i);
329    ///     assert_eq!(result.value(i), l | r);
330    /// }
331    /// ```
332    pub fn from_bitwise_binary_op<F>(
333        left: impl AsRef<[u8]>,
334        left_offset_in_bits: usize,
335        right: impl AsRef<[u8]>,
336        right_offset_in_bits: usize,
337        len_in_bits: usize,
338        mut op: F,
339    ) -> Self
340    where
341        F: FnMut(u64, u64) -> u64,
342    {
343        let left = left.as_ref();
344        let right = right.as_ref();
345
346        // When both offsets share the same sub-64-bit alignment, we can
347        // align both to 64-bit boundaries and zip u64s directly,
348        // avoiding BitChunks bit-shifting entirely.
349        if left_offset_in_bits % 64 == right_offset_in_bits % 64 {
350            let bit_offset = left_offset_in_bits % 64;
351            let left_end = left_offset_in_bits + len_in_bits;
352            let right_end = right_offset_in_bits + len_in_bits;
353
354            let left_aligned = left_offset_in_bits & !63;
355            let right_aligned = right_offset_in_bits & !63;
356
357            let left_end_bytes = (bit_util::ceil(left_end, 64) * 8).min(left.len());
358            let right_end_bytes = (bit_util::ceil(right_end, 64) * 8).min(right.len());
359
360            let left_slice = &left[left_aligned / 8..left_end_bytes];
361            let right_slice = &right[right_aligned / 8..right_end_bytes];
362
363            let (lp, left_u64s, ls) = unsafe { left_slice.align_to::<u64>() };
364            let (rp, right_u64s, rs) = unsafe { right_slice.align_to::<u64>() };
365
366            match (lp, ls, rp, rs) {
367                ([], [], [], []) => {
368                    let result_u64s: Vec<u64> = left_u64s
369                        .iter()
370                        .zip(right_u64s.iter())
371                        .map(|(l, r)| op(*l, *r))
372                        .collect();
373                    return BooleanBuffer::new(result_u64s.into(), bit_offset, len_in_bits);
374                }
375                ([], left_suf, [], right_suf) => {
376                    let left_iter = left_u64s
377                        .iter()
378                        .cloned()
379                        .chain((!left_suf.is_empty()).then(|| read_u64(left_suf)));
380                    let right_iter = right_u64s
381                        .iter()
382                        .cloned()
383                        .chain((!right_suf.is_empty()).then(|| read_u64(right_suf)));
384                    let result_u64s: Vec<u64> =
385                        left_iter.zip(right_iter).map(|(l, r)| op(l, r)).collect();
386                    return BooleanBuffer::new(result_u64s.into(), bit_offset, len_in_bits);
387                }
388                _ => {}
389            }
390
391            // Memory not u64-aligned, use chunks_exact fallback
392            let left_chunks = left_slice.chunks_exact(8);
393            let left_rem = left_chunks.remainder();
394            let right_chunks = right_slice.chunks_exact(8);
395            let right_rem = right_chunks.remainder();
396
397            let left_iter = left_chunks.map(|c| u64::from_le_bytes(c.try_into().unwrap()));
398            let right_iter = right_chunks.map(|c| u64::from_le_bytes(c.try_into().unwrap()));
399
400            let result_u64s: Vec<u64> = if left_rem.is_empty() && right_rem.is_empty() {
401                left_iter.zip(right_iter).map(|(l, r)| op(l, r)).collect()
402            } else {
403                left_iter
404                    .chain(Some(read_u64(left_rem)))
405                    .zip(right_iter.chain(Some(read_u64(right_rem))))
406                    .map(|(l, r)| op(l, r))
407                    .collect()
408            };
409            return BooleanBuffer::new(result_u64s.into(), bit_offset, len_in_bits);
410        }
411
412        // Different sub-64-bit alignments: bit-shifting unavoidable
413        let left_chunks = BitChunks::new(left, left_offset_in_bits, len_in_bits);
414        let right_chunks = BitChunks::new(right, right_offset_in_bits, len_in_bits);
415
416        let chunks = left_chunks
417            .iter()
418            .zip(right_chunks.iter())
419            .map(|(left, right)| op(left, right));
420        // Soundness: `BitChunks` is a `BitChunks` trusted length iterator which
421        // correctly reports its upper bound
422        let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
423
424        let remainder_bytes = bit_util::ceil(left_chunks.remainder_len(), 8);
425        let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
426        // we are counting its starting from the least significant bit, to to_le_bytes should be correct
427        let rem = &rem.to_le_bytes()[0..remainder_bytes];
428        buffer.extend_from_slice(rem);
429
430        BooleanBuffer {
431            buffer: Buffer::from(buffer),
432            bit_offset: 0,
433            bit_len: len_in_bits,
434        }
435    }
436
437    /// Returns the number of set bits in this buffer
438    pub fn count_set_bits(&self) -> usize {
439        self.buffer
440            .count_set_bits_offset(self.bit_offset, self.bit_len)
441    }
442
443    /// Finds the position of the n-th set bit (1-based) starting from `start` index.
444    /// If fewer than `n` set bits are found, returns the length of the buffer.
445    pub fn find_nth_set_bit_position(&self, start: usize, n: usize) -> usize {
446        if n == 0 {
447            return start;
448        }
449
450        self.slice(start, self.bit_len - start)
451            .set_indices()
452            .nth(n - 1)
453            .map(|idx| start + idx + 1)
454            .unwrap_or(self.bit_len)
455    }
456
457    /// Returns a [`BitChunks`] instance which can be used to iterate over
458    /// this buffer's bits in `u64` chunks
459    #[inline]
460    pub fn bit_chunks(&self) -> BitChunks<'_> {
461        BitChunks::new(self.values(), self.bit_offset, self.bit_len)
462    }
463
464    /// Returns the offset of this [`BooleanBuffer`] in bits (not bytes)
465    #[inline]
466    pub fn offset(&self) -> usize {
467        self.bit_offset
468    }
469
470    /// Returns the length of this [`BooleanBuffer`] in bits (not bytes)
471    #[inline]
472    pub fn len(&self) -> usize {
473        self.bit_len
474    }
475
476    /// Returns true if this [`BooleanBuffer`] is empty
477    #[inline]
478    pub fn is_empty(&self) -> bool {
479        self.bit_len == 0
480    }
481
482    /// Free up unused memory.
483    pub fn shrink_to_fit(&mut self) {
484        // TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer
485        self.buffer.shrink_to_fit();
486    }
487
488    /// Returns the boolean value at index `i`.
489    ///
490    /// # Panics
491    ///
492    /// Panics if `i >= self.len()`
493    #[inline]
494    pub fn value(&self, idx: usize) -> bool {
495        assert!(idx < self.bit_len);
496        unsafe { self.value_unchecked(idx) }
497    }
498
499    /// Returns the boolean value at index `i`.
500    ///
501    /// # Safety
502    /// This doesn't check bounds, the caller must ensure that index < self.len()
503    #[inline]
504    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
505        unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.bit_offset) }
506    }
507
508    /// Returns the packed values of this [`BooleanBuffer`] not including any offset
509    #[inline]
510    pub fn values(&self) -> &[u8] {
511        &self.buffer
512    }
513
514    /// Slices this [`BooleanBuffer`] by the provided `offset` and `length`
515    pub fn slice(&self, offset: usize, len: usize) -> Self {
516        assert!(
517            offset.saturating_add(len) <= self.bit_len,
518            "the length + offset of the sliced BooleanBuffer cannot exceed the existing length"
519        );
520        Self {
521            buffer: self.buffer.clone(),
522            bit_offset: self.bit_offset + offset,
523            bit_len: len,
524        }
525    }
526
527    /// Returns a new [`Buffer`] containing the sliced contents of this [`BooleanBuffer`]
528    ///
529    /// Equivalent to `self.buffer.bit_slice(self.offset, self.len)`
530    pub fn sliced(&self) -> Buffer {
531        self.buffer.bit_slice(self.bit_offset, self.bit_len)
532    }
533
534    /// Returns true if this [`BooleanBuffer`] is equal to `other`, using pointer comparisons
535    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
536    /// return false when the arrays are logically equal
537    pub fn ptr_eq(&self, other: &Self) -> bool {
538        self.buffer.as_ptr() == other.buffer.as_ptr()
539            && self.bit_offset == other.bit_offset
540            && self.bit_len == other.bit_len
541    }
542
543    /// Returns the inner [`Buffer`]
544    ///
545    /// Note: this does not account for offset and length of this [`BooleanBuffer`]
546    #[inline]
547    pub fn inner(&self) -> &Buffer {
548        &self.buffer
549    }
550
551    /// Returns the inner [`Buffer`], consuming self
552    ///
553    /// Note: this does not account for offset and length of this [`BooleanBuffer`]
554    pub fn into_inner(self) -> Buffer {
555        self.buffer
556    }
557
558    /// Claim memory used by this buffer in the provided memory pool.
559    ///
560    /// See [`Buffer::claim`] for details.
561    #[cfg(feature = "pool")]
562    pub fn claim(&self, pool: &dyn crate::MemoryPool) {
563        self.buffer.claim(pool);
564    }
565
566    /// Apply a bitwise binary operation to `self`.
567    ///
568    /// If the underlying buffer is uniquely owned, reuses the allocation
569    /// and updates the bytes in place. If the underlying buffer is shared,
570    /// returns a newly allocated buffer.
571    ///
572    /// # API Notes
573    ///
574    /// If the buffer is reused, the result preserves the existing offset, which
575    /// may be non-zero.
576    fn bitwise_bin_op_assign<F>(&mut self, rhs: &BooleanBuffer, op: F)
577    where
578        F: FnMut(u64, u64) -> u64,
579    {
580        assert_eq!(self.bit_len, rhs.bit_len);
581        // Try to mutate in place if the buffer is uniquely owned
582        let buffer = std::mem::take(&mut self.buffer);
583        match buffer.into_mutable() {
584            Ok(mut buf) => {
585                bit_util::apply_bitwise_binary_op(
586                    &mut buf,
587                    self.bit_offset,
588                    &rhs.buffer,
589                    rhs.bit_offset,
590                    self.bit_len,
591                    op,
592                );
593                self.buffer = buf.into();
594            }
595            Err(buf) => {
596                self.buffer = buf;
597                *self = BooleanBuffer::from_bitwise_binary_op(
598                    self.values(),
599                    self.bit_offset,
600                    rhs.values(),
601                    rhs.bit_offset,
602                    self.bit_len,
603                    op,
604                );
605            }
606        }
607    }
608
609    /// Returns an iterator over the bits in this [`BooleanBuffer`]
610    pub fn iter(&self) -> BitIterator<'_> {
611        self.into_iter()
612    }
613
614    /// Returns an [`UnalignedBitChunk`] over this buffer's values.
615    fn unaligned_bit_chunks(&self) -> UnalignedBitChunk<'_> {
616        UnalignedBitChunk::new(self.values(), self.offset(), self.len())
617    }
618
619    /// Returns an iterator over the set bit positions in this [`BooleanBuffer`]
620    pub fn set_indices(&self) -> BitIndexIterator<'_> {
621        BitIndexIterator::new(self.values(), self.bit_offset, self.bit_len)
622    }
623
624    /// Returns a `u32` iterator over set bit positions without any usize->u32 conversion
625    pub fn set_indices_u32(&self) -> BitIndexU32Iterator<'_> {
626        BitIndexU32Iterator::new(self.values(), self.bit_offset, self.bit_len)
627    }
628
629    /// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits
630    pub fn set_slices(&self) -> BitSliceIterator<'_> {
631        BitSliceIterator::new(self.values(), self.bit_offset, self.bit_len)
632    }
633
634    /// Block size for chunked fold operations in [`Self::has_true`] and [`Self::has_false`].
635    /// Using `chunks_exact` with this size lets the compiler fully unroll the inner
636    /// fold (no inner branch/loop), enabling short-circuit exits every N chunks.
637    const CHUNK_FOLD_BLOCK_SIZE: usize = 16;
638
639    /// Returns whether there is at least one `true` value in this buffer.
640    ///
641    /// This is more efficient than `count_set_bits() > 0` because it can short-circuit
642    /// as soon as a `true` value is found, without counting all set bits.
643    ///
644    /// Returns `false` for empty buffer.
645    pub fn has_true(&self) -> bool {
646        let bit_chunks = self.unaligned_bit_chunks();
647        let chunks = bit_chunks.chunks();
648        let mut exact = chunks.chunks_exact(Self::CHUNK_FOLD_BLOCK_SIZE);
649        let found = bit_chunks.prefix().unwrap_or(0) != 0
650            || exact.any(|block| block.iter().fold(0u64, |acc, &c| acc | c) != 0);
651        found || exact.remainder().iter().any(|&c| c != 0) || bit_chunks.suffix().unwrap_or(0) != 0
652    }
653
654    /// Returns whether there is at least one `false` value in this buffer.
655    ///
656    /// This is more efficient than `len() > count_set_bits()` because it can short-circuit
657    /// as soon as a `false` value is found, without counting all set bits.
658    ///
659    /// Returns `false` for empty buffer.
660    pub fn has_false(&self) -> bool {
661        let bit_chunks = self.unaligned_bit_chunks();
662        // UnalignedBitChunk zeros padding bits; fill them with 1s so
663        // they don't appear as false values.
664        let lead_mask = !((1u64 << bit_chunks.lead_padding()) - 1);
665        let trail_mask = if bit_chunks.trailing_padding() == 0 {
666            u64::MAX
667        } else {
668            (1u64 << (64 - bit_chunks.trailing_padding())) - 1
669        };
670        let (prefix_fill, suffix_fill) = match (bit_chunks.prefix(), bit_chunks.suffix()) {
671            (Some(_), Some(_)) => (!lead_mask, !trail_mask),
672            (Some(_), None) => (!lead_mask | !trail_mask, 0),
673            (None, Some(_)) => (0, !trail_mask),
674            (None, None) => (0, 0),
675        };
676        let chunks = bit_chunks.chunks();
677        let mut exact = chunks.chunks_exact(Self::CHUNK_FOLD_BLOCK_SIZE);
678        let found = bit_chunks
679            .prefix()
680            .is_some_and(|v| (v | prefix_fill) != u64::MAX)
681            || exact.any(|block| block.iter().fold(u64::MAX, |acc, &c| acc & c) != u64::MAX);
682        found
683            || exact.remainder().iter().any(|&c| c != u64::MAX)
684            || bit_chunks
685                .suffix()
686                .is_some_and(|v| (v | suffix_fill) != u64::MAX)
687    }
688}
689
690impl Not for &BooleanBuffer {
691    type Output = BooleanBuffer;
692
693    fn not(self) -> Self::Output {
694        BooleanBuffer::from_bitwise_unary_op(&self.buffer, self.bit_offset, self.bit_len, |a| !a)
695    }
696}
697
698impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
699    type Output = BooleanBuffer;
700
701    fn bitand(self, rhs: &BooleanBuffer) -> Self::Output {
702        assert_eq!(self.bit_len, rhs.bit_len);
703        BooleanBuffer {
704            buffer: buffer_bin_and(
705                &self.buffer,
706                self.bit_offset,
707                &rhs.buffer,
708                rhs.bit_offset,
709                self.bit_len,
710            ),
711            bit_offset: 0,
712            bit_len: self.bit_len,
713        }
714    }
715}
716
717impl BitOr<&BooleanBuffer> for &BooleanBuffer {
718    type Output = BooleanBuffer;
719
720    fn bitor(self, rhs: &BooleanBuffer) -> Self::Output {
721        assert_eq!(self.bit_len, rhs.bit_len);
722        BooleanBuffer {
723            buffer: buffer_bin_or(
724                &self.buffer,
725                self.bit_offset,
726                &rhs.buffer,
727                rhs.bit_offset,
728                self.bit_len,
729            ),
730            bit_offset: 0,
731            bit_len: self.bit_len,
732        }
733    }
734}
735
736impl BitXor<&BooleanBuffer> for &BooleanBuffer {
737    type Output = BooleanBuffer;
738
739    fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output {
740        assert_eq!(self.bit_len, rhs.bit_len);
741        BooleanBuffer {
742            buffer: buffer_bin_xor(
743                &self.buffer,
744                self.bit_offset,
745                &rhs.buffer,
746                rhs.bit_offset,
747                self.bit_len,
748            ),
749            bit_offset: 0,
750            bit_len: self.bit_len,
751        }
752    }
753}
754
755impl BitAndAssign<&BooleanBuffer> for BooleanBuffer {
756    fn bitand_assign(&mut self, rhs: &BooleanBuffer) {
757        self.bitwise_bin_op_assign(rhs, |a, b| a & b);
758    }
759}
760
761impl BitOrAssign<&BooleanBuffer> for BooleanBuffer {
762    fn bitor_assign(&mut self, rhs: &BooleanBuffer) {
763        self.bitwise_bin_op_assign(rhs, |a, b| a | b);
764    }
765}
766
767impl BitXorAssign<&BooleanBuffer> for BooleanBuffer {
768    fn bitxor_assign(&mut self, rhs: &BooleanBuffer) {
769        self.bitwise_bin_op_assign(rhs, |a, b| a ^ b);
770    }
771}
772
773impl<'a> IntoIterator for &'a BooleanBuffer {
774    type Item = bool;
775    type IntoIter = BitIterator<'a>;
776
777    fn into_iter(self) -> Self::IntoIter {
778        BitIterator::new(self.values(), self.bit_offset, self.bit_len)
779    }
780}
781
782impl From<&[bool]> for BooleanBuffer {
783    fn from(value: &[bool]) -> Self {
784        let mut builder = BooleanBufferBuilder::new(value.len());
785        builder.append_slice(value);
786        builder.finish()
787    }
788}
789
790impl From<Vec<bool>> for BooleanBuffer {
791    fn from(value: Vec<bool>) -> Self {
792        value.as_slice().into()
793    }
794}
795
796impl FromIterator<bool> for BooleanBuffer {
797    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
798        let iter = iter.into_iter();
799        let (hint, _) = iter.size_hint();
800        let mut builder = BooleanBufferBuilder::new(hint);
801        iter.for_each(|b| builder.append(b));
802        builder.finish()
803    }
804}
805
806#[cfg(test)]
807mod tests {
808    use super::*;
809
810    #[test]
811    fn test_boolean_new() {
812        let bytes = &[0, 1, 2, 3, 4];
813        let buf = Buffer::from(bytes);
814        let offset = 0;
815        let len = 24;
816
817        let boolean_buf = BooleanBuffer::new(buf.clone(), offset, len);
818        assert_eq!(bytes, boolean_buf.values());
819        assert_eq!(offset, boolean_buf.offset());
820        assert_eq!(len, boolean_buf.len());
821
822        assert_eq!(2, boolean_buf.count_set_bits());
823        assert_eq!(&buf, boolean_buf.inner());
824        assert_eq!(buf, boolean_buf.clone().into_inner());
825
826        assert!(!boolean_buf.is_empty())
827    }
828
829    #[test]
830    fn test_boolean_data_equality() {
831        let boolean_buf1 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
832        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
833        assert_eq!(boolean_buf1, boolean_buf2);
834
835        // slice with same offset and same length should still preserve equality
836        let boolean_buf3 = boolean_buf1.slice(8, 16);
837        assert_ne!(boolean_buf1, boolean_buf3);
838        let boolean_buf4 = boolean_buf1.slice(0, 32);
839        assert_eq!(boolean_buf1, boolean_buf4);
840
841        // unequal because of different elements
842        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 0, 2, 3, 4]), 0, 32);
843        assert_ne!(boolean_buf1, boolean_buf2);
844
845        // unequal because of different length
846        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 24);
847        assert_ne!(boolean_buf1, boolean_buf2);
848
849        // ptr_eq
850        assert!(boolean_buf1.ptr_eq(&boolean_buf1));
851        assert!(boolean_buf2.ptr_eq(&boolean_buf2));
852        assert!(!boolean_buf1.ptr_eq(&boolean_buf2));
853    }
854
855    #[test]
856    fn test_boolean_slice() {
857        let bytes = &[0, 3, 2, 6, 2];
858        let boolean_buf1 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
859        let boolean_buf2 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
860
861        let boolean_slice1 = boolean_buf1.slice(16, 16);
862        let boolean_slice2 = boolean_buf2.slice(0, 16);
863        assert_eq!(boolean_slice1.values(), boolean_slice2.values());
864
865        assert_eq!(bytes, boolean_slice1.values());
866        assert_eq!(16, boolean_slice1.bit_offset);
867        assert_eq!(16, boolean_slice1.bit_len);
868
869        assert_eq!(bytes, boolean_slice2.values());
870        assert_eq!(0, boolean_slice2.bit_offset);
871        assert_eq!(16, boolean_slice2.bit_len);
872    }
873
874    #[test]
875    fn test_boolean_bitand() {
876        let offset = 0;
877        let len = 40;
878
879        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
880        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
881
882        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
883        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
884
885        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 0, 0]), offset, len);
886        assert_eq!(boolean_buf1 & boolean_buf2, expected);
887    }
888
889    #[test]
890    fn test_boolean_bitor() {
891        let offset = 0;
892        let len = 40;
893
894        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
895        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
896
897        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
898        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
899
900        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 1, 0]), offset, len);
901        assert_eq!(boolean_buf1 | boolean_buf2, expected);
902    }
903
904    #[test]
905    fn test_boolean_bitxor() {
906        let offset = 0;
907        let len = 40;
908
909        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
910        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
911
912        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
913        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
914
915        let expected = BooleanBuffer::new(Buffer::from(&[0, 0, 0, 1, 0]), offset, len);
916        assert_eq!(boolean_buf1 ^ boolean_buf2, expected);
917    }
918
919    #[test]
920    fn test_boolean_bitand_assign_shared_and_unshared() {
921        let rhs = BooleanBuffer::from(&[true, true, false, true, false, true][..]);
922        let original = BooleanBuffer::from(&[true, false, true, true, true, false][..]);
923
924        let mut unshared = BooleanBuffer::from(&[true, false, true, true, true, false][..]);
925        unshared &= &rhs;
926
927        let mut shared = original.clone();
928        let _shared_owner = shared.clone();
929        shared &= &rhs;
930
931        let expected = &original & &rhs;
932        assert_eq!(unshared, expected);
933        assert_eq!(shared, expected);
934    }
935
936    #[test]
937    fn test_boolean_bitor_assign() {
938        let rhs = BooleanBuffer::from(&[true, true, false, true, false, true][..]);
939        let original = BooleanBuffer::from(&[true, false, true, true, true, false][..]);
940
941        let mut actual = original.clone();
942        actual |= &rhs;
943
944        let expected = &original | &rhs;
945        assert_eq!(actual, expected);
946    }
947
948    #[test]
949    fn test_boolean_bitxor_assign() {
950        let rhs = BooleanBuffer::from(&[true, true, false, true, false, true][..]);
951        let original = BooleanBuffer::from(&[true, false, true, true, true, false][..]);
952
953        let mut actual = original.clone();
954        actual ^= &rhs;
955
956        let expected = &original ^ &rhs;
957        assert_eq!(actual, expected);
958    }
959
960    #[test]
961    fn test_boolean_not() {
962        let offset = 0;
963        let len = 40;
964
965        let buf = Buffer::from(&[0, 1, 1, 0, 0]);
966        let boolean_buf = &BooleanBuffer::new(buf, offset, len);
967
968        let expected = BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
969        assert_eq!(!boolean_buf, expected);
970
971        // Demonstrate that Non-zero offsets are preserved
972        let sliced = boolean_buf.slice(3, 20);
973        let result = !&sliced;
974        assert_eq!(result.offset(), 3);
975        assert_eq!(result.len(), sliced.len());
976        for i in 0..sliced.len() {
977            assert_eq!(result.value(i), !sliced.value(i));
978        }
979    }
980
981    #[test]
982    fn test_boolean_from_slice_bool() {
983        let v = [true, false, false];
984        let buf = BooleanBuffer::from(&v[..]);
985        assert_eq!(buf.offset(), 0);
986        assert_eq!(buf.len(), 3);
987        assert_eq!(buf.values().len(), 1);
988        assert!(buf.value(0));
989    }
990
991    #[test]
992    fn test_from_bitwise_unary_op() {
993        // Use 1024 boolean values so that at least some of the tests cover multiple u64 chunks and
994        // perfect alignment
995        let input_bools = (0..1024)
996            .map(|_| rand::random::<bool>())
997            .collect::<Vec<bool>>();
998        let input_buffer = BooleanBuffer::from(&input_bools[..]);
999
1000        // Note ensure we test offsets over 100 to cover multiple u64 chunks
1001        for offset in 0..1024 {
1002            let result = BooleanBuffer::from_bitwise_unary_op(
1003                input_buffer.values(),
1004                offset,
1005                input_buffer.len() - offset,
1006                |a| !a,
1007            );
1008            let expected = input_bools[offset..]
1009                .iter()
1010                .map(|b| !*b)
1011                .collect::<BooleanBuffer>();
1012            assert_eq!(result, expected);
1013        }
1014
1015        // Also test when the input doesn't cover the entire buffer
1016        for offset in 0..512 {
1017            let len = 512 - offset; // fixed length less than total
1018            let result =
1019                BooleanBuffer::from_bitwise_unary_op(input_buffer.values(), offset, len, |a| !a);
1020            let expected = input_bools[offset..]
1021                .iter()
1022                .take(len)
1023                .map(|b| !*b)
1024                .collect::<BooleanBuffer>();
1025            assert_eq!(result, expected);
1026        }
1027    }
1028
1029    #[test]
1030    fn test_from_bitwise_unary_op_unaligned_fallback() {
1031        // Deterministic affine sequence over u8: b[i] = 37*i + 11 (mod 256).
1032        // This yields a non-trivial mix of bits (prefix: 11, 48, 85, 122, 159, 196, 233, 14, ...)
1033        // so unary bit operations are exercised on varied input patterns.
1034        let bytes = (0..80)
1035            .map(|i| (i as u8).wrapping_mul(37).wrapping_add(11))
1036            .collect::<Vec<_>>();
1037        let base = bytes.as_ptr() as usize;
1038        let shift = (0..8).find(|s| (base + s) % 8 != 0).unwrap();
1039        let misaligned = &bytes[shift..];
1040
1041        // Case 1: fallback path with `remainder.is_empty() == true`
1042        let src = &misaligned[..24];
1043        let offset = 7;
1044        let len = 96;
1045        let result = BooleanBuffer::from_bitwise_unary_op(src, offset, len, |a| !a);
1046        let expected = (0..len)
1047            .map(|i| !bit_util::get_bit(src, offset + i))
1048            .collect::<BooleanBuffer>();
1049        assert_eq!(result, expected);
1050        assert_eq!(result.offset(), offset % 64);
1051
1052        // Case 2: fallback path with `remainder.is_empty() == false`
1053        let src = &misaligned[..13];
1054        let offset = 3;
1055        let len = 100;
1056        let result = BooleanBuffer::from_bitwise_unary_op(src, offset, len, |a| !a);
1057        let expected = (0..len)
1058            .map(|i| !bit_util::get_bit(src, offset + i))
1059            .collect::<BooleanBuffer>();
1060        assert_eq!(result, expected);
1061        assert_eq!(result.offset(), offset % 64);
1062    }
1063
1064    #[test]
1065    fn test_from_bitwise_binary_op() {
1066        // pick random boolean inputs
1067        let input_bools_left = (0..1024)
1068            .map(|_| rand::random::<bool>())
1069            .collect::<Vec<bool>>();
1070        let input_bools_right = (0..1024)
1071            .map(|_| rand::random::<bool>())
1072            .collect::<Vec<bool>>();
1073        let input_buffer_left = BooleanBuffer::from(&input_bools_left[..]);
1074        let input_buffer_right = BooleanBuffer::from(&input_bools_right[..]);
1075
1076        #[cfg(miri)] // Takes too long otherwise
1077        let left_offsets = [0, 1, 7, 8, 63, 64, 65];
1078        #[cfg(not(miri))]
1079        let left_offsets = 0..200;
1080
1081        for left_offset in left_offsets {
1082            for right_offset in [0, 4, 5, 17, 33, 24, 45, 64, 65, 100, 200] {
1083                for len_offset in [0, 1, 44, 100, 256, 300, 512] {
1084                    let len = 1024 - len_offset - left_offset.max(right_offset); // ensure we don't go out of bounds
1085                    // compute with AND
1086                    let result = BooleanBuffer::from_bitwise_binary_op(
1087                        input_buffer_left.values(),
1088                        left_offset,
1089                        input_buffer_right.values(),
1090                        right_offset,
1091                        len,
1092                        |a, b| a & b,
1093                    );
1094                    // compute directly from bools
1095                    let expected = input_bools_left[left_offset..]
1096                        .iter()
1097                        .zip(&input_bools_right[right_offset..])
1098                        .take(len)
1099                        .map(|(a, b)| *a & *b)
1100                        .collect::<BooleanBuffer>();
1101                    assert_eq!(result, expected);
1102                }
1103            }
1104        }
1105    }
1106
1107    #[test]
1108    fn test_from_bitwise_binary_op_same_mod_64_unaligned_fallback() {
1109        // Exercise the shared-alignment fast path when both inputs are misaligned in memory,
1110        // forcing the chunks_exact fallback instead of align_to::<u64>().
1111        let left_bytes = [
1112            0,           // dropped so `&left_bytes[1..]` is not u64-aligned in memory
1113            0b1101_0010, // logical left bits start at bit 3 of this byte
1114            0b0110_1101,
1115            0b1010_0111,
1116            0b0001_1110,
1117            0b1110_0001,
1118            0b0101_1010,
1119            0b1001_0110,
1120            0b0011_1100,
1121            0b1011_0001,
1122            0b0100_1110,
1123            0b1100_0011,
1124            0b0111_1000,
1125        ];
1126        let right_bytes = [
1127            0,           // dropped so `&right_bytes[1..]` is not u64-aligned in memory
1128            0b1010_1100, // logical right bits start at bit 67 == bit 3 of the second 64-bit block
1129            0b0101_0011,
1130            0b1111_0000,
1131            0b0011_1010,
1132            0b1000_1111,
1133            0b0110_0101,
1134            0b1101_1000,
1135            0b0001_0111,
1136            0b1110_0100,
1137            0b0010_1101,
1138            0b1001_1010,
1139            0b0111_0001,
1140        ];
1141
1142        let left = &left_bytes[1..];
1143        let right = &right_bytes[1..];
1144
1145        let left_offset = 3;
1146        let right_offset = 67; // same mod 64 as left_offset, so this takes the shared-alignment path
1147        let len = 24; // leaves a partial trailing chunk, so this covers the non-empty remainder branch
1148
1149        let result = BooleanBuffer::from_bitwise_binary_op(
1150            left,
1151            left_offset,
1152            right,
1153            right_offset,
1154            len,
1155            |a, b| a & b,
1156        );
1157        let expected = (0..len)
1158            .map(|i| {
1159                bit_util::get_bit(left, left_offset + i)
1160                    & bit_util::get_bit(right, right_offset + i)
1161            })
1162            .collect::<BooleanBuffer>();
1163
1164        assert_eq!(result, expected);
1165        assert_eq!(result.offset(), left_offset % 64);
1166    }
1167
1168    #[test]
1169    fn test_from_bitwise_binary_op_same_mod_64_unaligned_fallback_no_remainder() {
1170        // Force the chunks_exact fallback with an exact 8-byte chunk so both remainders are empty.
1171        let left_bytes = [
1172            0,           // dropped so `&left_bytes[1..]` is not u64-aligned in memory
1173            0b1010_1100, // logical left bits start at bit 3 of this byte
1174            0b0110_1001,
1175            0b1101_0011,
1176            0b0001_1110,
1177            0b1110_0101,
1178            0b0101_1000,
1179            0b1001_0111,
1180            0b0011_1101,
1181        ];
1182        let right_bytes = [
1183            0,           // dropped so `&right_bytes[1..]` is not u64-aligned in memory
1184            0b0111_0010, // logical right bits start at bit 67 == bit 3 of the second 64-bit block
1185            0b1010_1001,
1186            0b0101_1110,
1187            0b1100_0011,
1188            0b0011_1011,
1189            0b1000_1110,
1190            0b1111_0001,
1191            0b0100_1101,
1192            0b1011_0110,
1193            0b0001_1011,
1194            0b1101_0100,
1195            0b0110_0011,
1196            0b1001_1110,
1197            0b0010_1001,
1198            0b1110_0110,
1199            0b0101_0001,
1200        ];
1201
1202        let left = &left_bytes[1..];
1203        let right = &right_bytes[1..];
1204
1205        let left_offset = 3;
1206        let right_offset = 67; // same mod 64 as left_offset, so this takes the shared-alignment path
1207        let len = 61; // 3 + 61 = 64, so the aligned slices are exactly one 8-byte chunk with empty remainders
1208
1209        let result = BooleanBuffer::from_bitwise_binary_op(
1210            left,
1211            left_offset,
1212            right,
1213            right_offset,
1214            len,
1215            |a, b| a | b,
1216        );
1217        let expected = (0..len)
1218            .map(|i| {
1219                bit_util::get_bit(left, left_offset + i)
1220                    | bit_util::get_bit(right, right_offset + i)
1221            })
1222            .collect::<BooleanBuffer>();
1223
1224        assert_eq!(result, expected);
1225        assert_eq!(result.offset(), left_offset % 64);
1226    }
1227
1228    #[test]
1229    fn test_extend_trusted_len_sets_byte_len() {
1230        // Ensures extend_trusted_len keeps the underlying byte length in sync with bit length.
1231        let mut builder = BooleanBufferBuilder::new(0);
1232        let bools: Vec<_> = (0..10).map(|i| i % 2 == 0).collect();
1233        unsafe { builder.extend_trusted_len(bools.into_iter()) };
1234        assert_eq!(builder.as_slice().len(), bit_util::ceil(builder.len(), 8));
1235    }
1236
1237    #[test]
1238    fn test_extend_trusted_len_then_append() {
1239        // Exercises append after extend_trusted_len to validate byte length and values.
1240        let mut builder = BooleanBufferBuilder::new(0);
1241        let bools: Vec<_> = (0..9).map(|i| i % 3 == 0).collect();
1242        unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
1243        builder.append(true);
1244        assert_eq!(builder.as_slice().len(), bit_util::ceil(builder.len(), 8));
1245        let finished = builder.finish();
1246        for (i, v) in bools.into_iter().chain(std::iter::once(true)).enumerate() {
1247            assert_eq!(finished.value(i), v, "at index {}", i);
1248        }
1249    }
1250
1251    #[test]
1252    fn test_find_nth_set_bit_position() {
1253        let bools = vec![true, false, true, true, false, true];
1254        let buffer = BooleanBuffer::from(bools);
1255
1256        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 1), 1);
1257        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 2), 3);
1258        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 3), 4);
1259        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 4), 6);
1260        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 5), 6);
1261
1262        assert_eq!(buffer.clone().find_nth_set_bit_position(1, 1), 3);
1263        assert_eq!(buffer.clone().find_nth_set_bit_position(3, 1), 4);
1264        assert_eq!(buffer.clone().find_nth_set_bit_position(3, 2), 6);
1265    }
1266
1267    #[test]
1268    fn test_find_nth_set_bit_position_large() {
1269        let mut bools = vec![false; 1000];
1270        bools[100] = true;
1271        bools[500] = true;
1272        bools[999] = true;
1273        let buffer = BooleanBuffer::from(bools);
1274
1275        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 1), 101);
1276        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 2), 501);
1277        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 3), 1000);
1278        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 4), 1000);
1279
1280        assert_eq!(buffer.clone().find_nth_set_bit_position(101, 1), 501);
1281    }
1282
1283    #[test]
1284    fn test_find_nth_set_bit_position_sliced() {
1285        let bools = vec![false, true, false, true, true, false, true]; // [F, T, F, T, T, F, T]
1286        let buffer = BooleanBuffer::from(bools);
1287        let slice = buffer.slice(1, 6); // [T, F, T, T, F, T]
1288
1289        assert_eq!(slice.len(), 6);
1290        // Logical indices: 0, 1, 2, 3, 4, 5
1291        // Logical values: T, F, T, T, F, T
1292
1293        assert_eq!(slice.clone().find_nth_set_bit_position(0, 1), 1);
1294        assert_eq!(slice.clone().find_nth_set_bit_position(0, 2), 3);
1295        assert_eq!(slice.clone().find_nth_set_bit_position(0, 3), 4);
1296        assert_eq!(slice.clone().find_nth_set_bit_position(0, 4), 6);
1297    }
1298
1299    #[test]
1300    fn test_find_nth_set_bit_position_all_set() {
1301        let buffer = BooleanBuffer::new_set(100);
1302        for i in 1..=100 {
1303            assert_eq!(buffer.clone().find_nth_set_bit_position(0, i), i);
1304        }
1305        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 101), 100);
1306    }
1307
1308    #[test]
1309    fn test_find_nth_set_bit_position_none_set() {
1310        let buffer = BooleanBuffer::new_unset(100);
1311        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 1), 100);
1312    }
1313
1314    #[test]
1315    fn test_has_true_has_false_all_true() {
1316        let arr = BooleanBuffer::from(vec![true, true, true]);
1317        assert!(arr.has_true());
1318        assert!(!arr.has_false());
1319    }
1320
1321    #[test]
1322    fn test_has_true_has_false_all_false() {
1323        let arr = BooleanBuffer::from(vec![false, false, false]);
1324        assert!(!arr.has_true());
1325        assert!(arr.has_false());
1326    }
1327
1328    #[test]
1329    fn test_has_true_has_false_mixed() {
1330        let arr = BooleanBuffer::from(vec![true, false, true]);
1331        assert!(arr.has_true());
1332        assert!(arr.has_false());
1333    }
1334
1335    #[test]
1336    fn test_has_true_has_false_empty() {
1337        let arr = BooleanBuffer::from(Vec::<bool>::new());
1338        assert!(!arr.has_true());
1339        assert!(!arr.has_false());
1340    }
1341
1342    #[test]
1343    fn test_has_false_aligned_suffix_all_true() {
1344        let arr = BooleanBuffer::from(vec![true; 129]);
1345        assert!(arr.has_true());
1346        assert!(!arr.has_false());
1347    }
1348
1349    #[test]
1350    fn test_has_false_non_aligned_all_true() {
1351        // 65 elements: exercises the remainder path in has_false
1352        let arr = BooleanBuffer::from(vec![true; 65]);
1353        assert!(arr.has_true());
1354        assert!(!arr.has_false());
1355    }
1356
1357    #[test]
1358    fn test_has_false_non_aligned_last_false() {
1359        // 64 trues + 1 false: remainder path should find the false
1360        let mut values = vec![true; 64];
1361        values.push(false);
1362        let arr = BooleanBuffer::from(values);
1363        assert!(arr.has_true());
1364        assert!(arr.has_false());
1365    }
1366
1367    #[test]
1368    fn test_has_false_exact_64_all_true() {
1369        // Exactly 64 elements, no remainder
1370        let arr = BooleanBuffer::from(vec![true; 64]);
1371        assert!(arr.has_true());
1372        assert!(!arr.has_false());
1373    }
1374
1375    #[test]
1376    fn test_has_true_has_false_unaligned_slices() {
1377        let cases = [
1378            (1, 129, true, false),
1379            (3, 130, true, false),
1380            (5, 65, true, false),
1381            (7, 64, true, false),
1382        ];
1383
1384        let base = BooleanBuffer::from(vec![true; 300]);
1385
1386        for (offset, len, expected_has_true, expected_has_false) in cases {
1387            let arr = base.slice(offset, len);
1388            assert_eq!(
1389                arr.has_true(),
1390                expected_has_true,
1391                "offset={offset} len={len}"
1392            );
1393            assert_eq!(
1394                arr.has_false(),
1395                expected_has_false,
1396                "offset={offset} len={len}"
1397            );
1398        }
1399    }
1400
1401    #[test]
1402    fn test_has_true_has_false_exact_multiples_of_64() {
1403        let cases = [
1404            (64, true, false),
1405            (128, true, false),
1406            (192, true, false),
1407            (256, true, false),
1408        ];
1409
1410        for (len, expected_has_true, expected_has_false) in cases {
1411            let arr = BooleanBuffer::from(vec![true; len]);
1412            assert_eq!(arr.has_true(), expected_has_true, "len={len}");
1413            assert_eq!(arr.has_false(), expected_has_false, "len={len}");
1414        }
1415    }
1416}