arrow_buffer/buffer/
boolean.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::bit_chunk_iterator::BitChunks;
19use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
20use crate::{
21    bit_util, buffer_bin_and, buffer_bin_or, buffer_bin_xor, buffer_unary_not,
22    BooleanBufferBuilder, Buffer, MutableBuffer,
23};
24
25use std::ops::{BitAnd, BitOr, BitXor, Not};
26
27/// A slice-able [`Buffer`] containing bit-packed booleans
28#[derive(Debug, Clone, Eq)]
29pub struct BooleanBuffer {
30    buffer: Buffer,
31    offset: usize,
32    len: usize,
33}
34
35impl PartialEq for BooleanBuffer {
36    fn eq(&self, other: &Self) -> bool {
37        if self.len != other.len {
38            return false;
39        }
40
41        let lhs = self.bit_chunks().iter_padded();
42        let rhs = other.bit_chunks().iter_padded();
43        lhs.zip(rhs).all(|(a, b)| a == b)
44    }
45}
46
47impl BooleanBuffer {
48    /// Create a new [`BooleanBuffer`] from a [`Buffer`], an `offset` and `length` in bits
49    ///
50    /// # Panics
51    ///
52    /// This method will panic if `buffer` is not large enough
53    pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
54        let total_len = offset.saturating_add(len);
55        let bit_len = buffer.len().saturating_mul(8);
56        assert!(total_len <= bit_len);
57        Self {
58            buffer,
59            offset,
60            len,
61        }
62    }
63
64    /// Create a new [`BooleanBuffer`] of `length` where all values are `true`
65    pub fn new_set(length: usize) -> Self {
66        let mut builder = BooleanBufferBuilder::new(length);
67        builder.append_n(length, true);
68        builder.finish()
69    }
70
71    /// Create a new [`BooleanBuffer`] of `length` where all values are `false`
72    pub fn new_unset(length: usize) -> Self {
73        let buffer = MutableBuffer::new_null(length).into_buffer();
74        Self {
75            buffer,
76            offset: 0,
77            len: length,
78        }
79    }
80
81    /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BooleanBuffer`
82    pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, f: F) -> Self {
83        let buffer = MutableBuffer::collect_bool(len, f);
84        Self::new(buffer.into(), 0, len)
85    }
86
87    /// Returns the number of set bits in this buffer
88    pub fn count_set_bits(&self) -> usize {
89        self.buffer.count_set_bits_offset(self.offset, self.len)
90    }
91
92    /// Returns a `BitChunks` instance which can be used to iterate over
93    /// this buffer's bits in `u64` chunks
94    #[inline]
95    pub fn bit_chunks(&self) -> BitChunks {
96        BitChunks::new(self.values(), self.offset, self.len)
97    }
98
99    /// Returns `true` if the bit at index `i` is set
100    ///
101    /// # Panics
102    ///
103    /// Panics if `i >= self.len()`
104    #[inline]
105    #[deprecated(note = "use BooleanBuffer::value")]
106    pub fn is_set(&self, i: usize) -> bool {
107        self.value(i)
108    }
109
110    /// Returns the offset of this [`BooleanBuffer`] in bits
111    #[inline]
112    pub fn offset(&self) -> usize {
113        self.offset
114    }
115
116    /// Returns the length of this [`BooleanBuffer`] in bits
117    #[inline]
118    pub fn len(&self) -> usize {
119        self.len
120    }
121
122    /// Returns true if this [`BooleanBuffer`] is empty
123    #[inline]
124    pub fn is_empty(&self) -> bool {
125        self.len == 0
126    }
127
128    /// Free up unused memory.
129    pub fn shrink_to_fit(&mut self) {
130        // TODO(emilk): we could shrink even more in the case where we are a small sub-slice of the full buffer
131        self.buffer.shrink_to_fit();
132    }
133
134    /// Returns the boolean value at index `i`.
135    ///
136    /// # Panics
137    ///
138    /// Panics if `i >= self.len()`
139    #[inline]
140    pub fn value(&self, idx: usize) -> bool {
141        assert!(idx < self.len);
142        unsafe { self.value_unchecked(idx) }
143    }
144
145    /// Returns the boolean value at index `i`.
146    ///
147    /// # Safety
148    /// This doesn't check bounds, the caller must ensure that index < self.len()
149    #[inline]
150    pub unsafe fn value_unchecked(&self, i: usize) -> bool {
151        unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.offset) }
152    }
153
154    /// Returns the packed values of this [`BooleanBuffer`] not including any offset
155    #[inline]
156    pub fn values(&self) -> &[u8] {
157        &self.buffer
158    }
159
160    /// Slices this [`BooleanBuffer`] by the provided `offset` and `length`
161    pub fn slice(&self, offset: usize, len: usize) -> Self {
162        assert!(
163            offset.saturating_add(len) <= self.len,
164            "the length + offset of the sliced BooleanBuffer cannot exceed the existing length"
165        );
166        Self {
167            buffer: self.buffer.clone(),
168            offset: self.offset + offset,
169            len,
170        }
171    }
172
173    /// Returns a [`Buffer`] containing the sliced contents of this [`BooleanBuffer`]
174    ///
175    /// Equivalent to `self.buffer.bit_slice(self.offset, self.len)`
176    pub fn sliced(&self) -> Buffer {
177        self.buffer.bit_slice(self.offset, self.len)
178    }
179
180    /// Returns true if this [`BooleanBuffer`] is equal to `other`, using pointer comparisons
181    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
182    /// return false when the arrays are logically equal
183    pub fn ptr_eq(&self, other: &Self) -> bool {
184        self.buffer.as_ptr() == other.buffer.as_ptr()
185            && self.offset == other.offset
186            && self.len == other.len
187    }
188
189    /// Returns the inner [`Buffer`]
190    #[inline]
191    pub fn inner(&self) -> &Buffer {
192        &self.buffer
193    }
194
195    /// Returns the inner [`Buffer`], consuming self
196    pub fn into_inner(self) -> Buffer {
197        self.buffer
198    }
199
200    /// Returns an iterator over the bits in this [`BooleanBuffer`]
201    pub fn iter(&self) -> BitIterator<'_> {
202        self.into_iter()
203    }
204
205    /// Returns an iterator over the set bit positions in this [`BooleanBuffer`]
206    pub fn set_indices(&self) -> BitIndexIterator<'_> {
207        BitIndexIterator::new(self.values(), self.offset, self.len)
208    }
209
210    /// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits
211    pub fn set_slices(&self) -> BitSliceIterator<'_> {
212        BitSliceIterator::new(self.values(), self.offset, self.len)
213    }
214}
215
216impl Not for &BooleanBuffer {
217    type Output = BooleanBuffer;
218
219    fn not(self) -> Self::Output {
220        BooleanBuffer {
221            buffer: buffer_unary_not(&self.buffer, self.offset, self.len),
222            offset: 0,
223            len: self.len,
224        }
225    }
226}
227
228impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
229    type Output = BooleanBuffer;
230
231    fn bitand(self, rhs: &BooleanBuffer) -> Self::Output {
232        assert_eq!(self.len, rhs.len);
233        BooleanBuffer {
234            buffer: buffer_bin_and(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
235            offset: 0,
236            len: self.len,
237        }
238    }
239}
240
241impl BitOr<&BooleanBuffer> for &BooleanBuffer {
242    type Output = BooleanBuffer;
243
244    fn bitor(self, rhs: &BooleanBuffer) -> Self::Output {
245        assert_eq!(self.len, rhs.len);
246        BooleanBuffer {
247            buffer: buffer_bin_or(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
248            offset: 0,
249            len: self.len,
250        }
251    }
252}
253
254impl BitXor<&BooleanBuffer> for &BooleanBuffer {
255    type Output = BooleanBuffer;
256
257    fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output {
258        assert_eq!(self.len, rhs.len);
259        BooleanBuffer {
260            buffer: buffer_bin_xor(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
261            offset: 0,
262            len: self.len,
263        }
264    }
265}
266
267impl<'a> IntoIterator for &'a BooleanBuffer {
268    type Item = bool;
269    type IntoIter = BitIterator<'a>;
270
271    fn into_iter(self) -> Self::IntoIter {
272        BitIterator::new(self.values(), self.offset, self.len)
273    }
274}
275
276impl From<&[bool]> for BooleanBuffer {
277    fn from(value: &[bool]) -> Self {
278        let mut builder = BooleanBufferBuilder::new(value.len());
279        builder.append_slice(value);
280        builder.finish()
281    }
282}
283
284impl From<Vec<bool>> for BooleanBuffer {
285    fn from(value: Vec<bool>) -> Self {
286        value.as_slice().into()
287    }
288}
289
290impl FromIterator<bool> for BooleanBuffer {
291    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
292        let iter = iter.into_iter();
293        let (hint, _) = iter.size_hint();
294        let mut builder = BooleanBufferBuilder::new(hint);
295        iter.for_each(|b| builder.append(b));
296        builder.finish()
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    #[test]
305    fn test_boolean_new() {
306        let bytes = &[0, 1, 2, 3, 4];
307        let buf = Buffer::from(bytes);
308        let offset = 0;
309        let len = 24;
310
311        let boolean_buf = BooleanBuffer::new(buf.clone(), offset, len);
312        assert_eq!(bytes, boolean_buf.values());
313        assert_eq!(offset, boolean_buf.offset());
314        assert_eq!(len, boolean_buf.len());
315
316        assert_eq!(2, boolean_buf.count_set_bits());
317        assert_eq!(&buf, boolean_buf.inner());
318        assert_eq!(buf, boolean_buf.clone().into_inner());
319
320        assert!(!boolean_buf.is_empty())
321    }
322
323    #[test]
324    fn test_boolean_data_equality() {
325        let boolean_buf1 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
326        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 32);
327        assert_eq!(boolean_buf1, boolean_buf2);
328
329        // slice with same offset and same length should still preserve equality
330        let boolean_buf3 = boolean_buf1.slice(8, 16);
331        assert_ne!(boolean_buf1, boolean_buf3);
332        let boolean_buf4 = boolean_buf1.slice(0, 32);
333        assert_eq!(boolean_buf1, boolean_buf4);
334
335        // unequal because of different elements
336        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 0, 2, 3, 4]), 0, 32);
337        assert_ne!(boolean_buf1, boolean_buf2);
338
339        // unequal because of different length
340        let boolean_buf2 = BooleanBuffer::new(Buffer::from(&[0, 1, 4, 3, 5]), 0, 24);
341        assert_ne!(boolean_buf1, boolean_buf2);
342
343        // ptr_eq
344        assert!(boolean_buf1.ptr_eq(&boolean_buf1));
345        assert!(boolean_buf2.ptr_eq(&boolean_buf2));
346        assert!(!boolean_buf1.ptr_eq(&boolean_buf2));
347    }
348
349    #[test]
350    fn test_boolean_slice() {
351        let bytes = &[0, 3, 2, 6, 2];
352        let boolean_buf1 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
353        let boolean_buf2 = BooleanBuffer::new(Buffer::from(bytes), 0, 32);
354
355        let boolean_slice1 = boolean_buf1.slice(16, 16);
356        let boolean_slice2 = boolean_buf2.slice(0, 16);
357        assert_eq!(boolean_slice1.values(), boolean_slice2.values());
358
359        assert_eq!(bytes, boolean_slice1.values());
360        assert_eq!(16, boolean_slice1.offset);
361        assert_eq!(16, boolean_slice1.len);
362
363        assert_eq!(bytes, boolean_slice2.values());
364        assert_eq!(0, boolean_slice2.offset);
365        assert_eq!(16, boolean_slice2.len);
366    }
367
368    #[test]
369    fn test_boolean_bitand() {
370        let offset = 0;
371        let len = 40;
372
373        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
374        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
375
376        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
377        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
378
379        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 0, 0]), offset, len);
380        assert_eq!(boolean_buf1 & boolean_buf2, expected);
381    }
382
383    #[test]
384    fn test_boolean_bitor() {
385        let offset = 0;
386        let len = 40;
387
388        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
389        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
390
391        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
392        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
393
394        let expected = BooleanBuffer::new(Buffer::from(&[0, 1, 1, 1, 0]), offset, len);
395        assert_eq!(boolean_buf1 | boolean_buf2, expected);
396    }
397
398    #[test]
399    fn test_boolean_bitxor() {
400        let offset = 0;
401        let len = 40;
402
403        let buf1 = Buffer::from(&[0, 1, 1, 0, 0]);
404        let boolean_buf1 = &BooleanBuffer::new(buf1, offset, len);
405
406        let buf2 = Buffer::from(&[0, 1, 1, 1, 0]);
407        let boolean_buf2 = &BooleanBuffer::new(buf2, offset, len);
408
409        let expected = BooleanBuffer::new(Buffer::from(&[0, 0, 0, 1, 0]), offset, len);
410        assert_eq!(boolean_buf1 ^ boolean_buf2, expected);
411    }
412
413    #[test]
414    fn test_boolean_not() {
415        let offset = 0;
416        let len = 40;
417
418        let buf = Buffer::from(&[0, 1, 1, 0, 0]);
419        let boolean_buf = &BooleanBuffer::new(buf, offset, len);
420
421        let expected = BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
422        assert_eq!(!boolean_buf, expected);
423    }
424
425    #[test]
426    fn test_boolean_from_slice_bool() {
427        let v = [true, false, false];
428        let buf = BooleanBuffer::from(&v[..]);
429        assert_eq!(buf.offset(), 0);
430        assert_eq!(buf.len(), 3);
431        assert_eq!(buf.values().len(), 1);
432        assert!(buf.value(0));
433    }
434}