arrow_buffer/builder/
boolean.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::bit_util::apply_bitwise_binary_op;
19use crate::{BooleanBuffer, Buffer, MutableBuffer, bit_util};
20use std::ops::Range;
21
22/// Builder for [`BooleanBuffer`]
23///
24/// # See Also
25///
26/// * [`NullBuffer`] for building [`BooleanBuffer`]s for representing nulls
27///
28/// [`NullBuffer`]: crate::NullBuffer
29#[derive(Debug)]
30pub struct BooleanBufferBuilder {
31    buffer: MutableBuffer,
32    len: usize,
33}
34
35impl BooleanBufferBuilder {
36    /// Creates a new `BooleanBufferBuilder` with sufficient space for
37    /// `capacity` bits (not bytes).
38    ///
39    /// The capacity is rounded up to the nearest multiple of 8 for the
40    /// allocation.
41    #[inline]
42    pub fn new(capacity: usize) -> Self {
43        let byte_capacity = bit_util::ceil(capacity, 8);
44        let buffer = MutableBuffer::new(byte_capacity);
45        Self { buffer, len: 0 }
46    }
47
48    /// Creates a new `BooleanBufferBuilder` from [`MutableBuffer`] of `len`
49    pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
50        assert!(len <= buffer.len() * 8);
51        let mut s = Self {
52            len: buffer.len() * 8,
53            buffer,
54        };
55        s.truncate(len);
56        s
57    }
58
59    /// Returns the length of the buffer
60    #[inline]
61    pub fn len(&self) -> usize {
62        self.len
63    }
64
65    /// Sets a bit in the buffer at `index`
66    #[inline]
67    pub fn set_bit(&mut self, index: usize, v: bool) {
68        if v {
69            bit_util::set_bit(self.buffer.as_mut(), index);
70        } else {
71            bit_util::unset_bit(self.buffer.as_mut(), index);
72        }
73    }
74
75    /// Gets a bit in the buffer at `index`
76    #[inline]
77    pub fn get_bit(&self, index: usize) -> bool {
78        bit_util::get_bit(self.buffer.as_slice(), index)
79    }
80
81    /// Returns true if empty
82    #[inline]
83    pub fn is_empty(&self) -> bool {
84        self.len == 0
85    }
86
87    /// Returns the capacity of the buffer, in bits (not bytes)
88    ///
89    /// Note this
90    ///
91    /// # Example
92    /// ```
93    /// # use arrow_buffer::builder::BooleanBufferBuilder;
94    /// // empty requires 0 bytes
95    /// let b = BooleanBufferBuilder::new(0);
96    /// assert_eq!(0, b.capacity());
97    /// // Creating space for 1 bit results in 64 bytes (space for 512 bits)
98    /// // (64 is the minimum allocation size for 64 bit architectures)
99    /// let mut b = BooleanBufferBuilder::new(1);
100    /// assert_eq!(512, b.capacity());
101    /// // 1000 bits requires 128 bytes (space for 1024 bits)
102    /// b.append_n(1000, true);
103    /// assert_eq!(1024, b.capacity());
104    /// ```
105    #[inline]
106    pub fn capacity(&self) -> usize {
107        self.buffer.capacity() * 8
108    }
109
110    /// Advances the buffer by `additional` bits
111    #[inline]
112    pub fn advance(&mut self, additional: usize) {
113        let new_len = self.len + additional;
114        let new_len_bytes = bit_util::ceil(new_len, 8);
115        if new_len_bytes > self.buffer.len() {
116            self.buffer.resize(new_len_bytes, 0);
117        }
118        self.len = new_len;
119    }
120
121    /// Truncates the builder to the given length
122    ///
123    /// If `len` is greater than the buffer's current length, this has no effect
124    #[inline]
125    pub fn truncate(&mut self, len: usize) {
126        if len > self.len {
127            return;
128        }
129
130        let new_len_bytes = bit_util::ceil(len, 8);
131        self.buffer.truncate(new_len_bytes);
132        self.len = len;
133
134        let remainder = self.len % 8;
135        if remainder != 0 {
136            let mask = (1_u8 << remainder).wrapping_sub(1);
137            *self.buffer.as_mut().last_mut().unwrap() &= mask;
138        }
139    }
140
141    /// Reserve space to at least `additional` new bits.
142    /// Capacity will be `>= self.len() + additional`.
143    /// New bytes are uninitialized and reading them is undefined behavior.
144    #[inline]
145    pub fn reserve(&mut self, additional: usize) {
146        let capacity = self.len + additional;
147        if capacity > self.capacity() {
148            // convert differential to bytes
149            let additional = bit_util::ceil(capacity, 8) - self.buffer.len();
150            self.buffer.reserve(additional);
151        }
152    }
153
154    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
155    /// growing it (potentially reallocating it) and writing `false` in the newly available bits.
156    #[inline]
157    pub fn resize(&mut self, len: usize) {
158        match len.checked_sub(self.len) {
159            Some(delta) => self.advance(delta),
160            None => self.truncate(len),
161        }
162    }
163
164    /// Appends a boolean `v` into the buffer
165    #[inline]
166    pub fn append(&mut self, v: bool) {
167        self.advance(1);
168        if v {
169            unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len - 1) };
170        }
171    }
172
173    /// Appends n `additional` bits of value `v` into the buffer
174    #[inline]
175    pub fn append_n(&mut self, additional: usize, v: bool) {
176        match v {
177            true => {
178                let new_len = self.len + additional;
179                let new_len_bytes = bit_util::ceil(new_len, 8);
180                let cur_remainder = self.len % 8;
181                let new_remainder = new_len % 8;
182
183                if cur_remainder != 0 {
184                    // Pad last byte with 1s
185                    *self.buffer.as_slice_mut().last_mut().unwrap() |= !((1 << cur_remainder) - 1)
186                }
187                self.buffer.resize(new_len_bytes, 0xFF);
188                if new_remainder != 0 {
189                    // Clear remaining bits
190                    *self.buffer.as_slice_mut().last_mut().unwrap() &= (1 << new_remainder) - 1
191                }
192                self.len = new_len;
193            }
194            false => self.advance(additional),
195        }
196    }
197
198    /// Appends a slice of booleans into the buffer
199    #[inline]
200    pub fn append_slice(&mut self, slice: &[bool]) {
201        let additional = slice.len();
202        self.advance(additional);
203
204        let offset = self.len() - additional;
205        for (i, v) in slice.iter().enumerate() {
206            if *v {
207                unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i) }
208            }
209        }
210    }
211
212    /// Append `range` bits from `to_set`
213    ///
214    /// `to_set` is a slice of bits packed LSB-first into `[u8]`
215    ///
216    /// # Panics
217    ///
218    /// Panics if `to_set` does not contain `ceil(range.end / 8)` bytes
219    pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) {
220        let offset_write = self.len;
221        let len = range.end - range.start;
222        // allocate new bits as 0
223        self.advance(len);
224        // copy bits from to_set into self.buffer a word at a time
225        apply_bitwise_binary_op(
226            self.buffer.as_slice_mut(),
227            offset_write,
228            to_set,
229            range.start,
230            len,
231            |_a, b| b, // copy bits from to_set
232        );
233    }
234
235    /// Append [`BooleanBuffer`] to this [`BooleanBufferBuilder`]
236    pub fn append_buffer(&mut self, buffer: &BooleanBuffer) {
237        let range = buffer.offset()..buffer.offset() + buffer.len();
238        self.append_packed_range(range, buffer.values())
239    }
240
241    /// Returns the packed bits
242    pub fn as_slice(&self) -> &[u8] {
243        self.buffer.as_slice()
244    }
245
246    /// Returns the packed bits
247    pub fn as_slice_mut(&mut self) -> &mut [u8] {
248        self.buffer.as_slice_mut()
249    }
250
251    /// Creates a [`BooleanBuffer`]
252    #[inline]
253    pub fn finish(&mut self) -> BooleanBuffer {
254        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
255        let len = std::mem::replace(&mut self.len, 0);
256        BooleanBuffer::new(buf.into(), 0, len)
257    }
258
259    /// Builds the [BooleanBuffer] without resetting the builder.
260    pub fn finish_cloned(&self) -> BooleanBuffer {
261        BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len)
262    }
263}
264
265impl From<BooleanBufferBuilder> for Buffer {
266    #[inline]
267    fn from(builder: BooleanBufferBuilder) -> Self {
268        builder.buffer.into()
269    }
270}
271
272impl From<BooleanBufferBuilder> for BooleanBuffer {
273    #[inline]
274    fn from(builder: BooleanBufferBuilder) -> Self {
275        BooleanBuffer::new(builder.buffer.into(), 0, builder.len)
276    }
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282
283    #[test]
284    fn test_boolean_buffer_builder_write_bytes() {
285        let mut b = BooleanBufferBuilder::new(4);
286        b.append(false);
287        b.append(true);
288        b.append(false);
289        b.append(true);
290        assert_eq!(4, b.len());
291        assert_eq!(512, b.capacity());
292        let buffer = b.finish();
293        assert_eq!(4, buffer.len());
294
295        // Overallocate capacity
296        let mut b = BooleanBufferBuilder::new(8);
297        b.append_slice(&[false, true, false, true]);
298        assert_eq!(4, b.len());
299        assert_eq!(512, b.capacity());
300        let buffer = b.finish();
301        assert_eq!(4, buffer.len());
302    }
303
304    #[test]
305    fn test_boolean_buffer_builder_unset_first_bit() {
306        let mut buffer = BooleanBufferBuilder::new(4);
307        buffer.append(true);
308        buffer.append(true);
309        buffer.append(false);
310        buffer.append(true);
311        buffer.set_bit(0, false);
312        assert_eq!(buffer.len(), 4);
313        assert_eq!(buffer.finish().values(), &[0b1010_u8]);
314    }
315
316    #[test]
317    fn test_boolean_buffer_builder_unset_last_bit() {
318        let mut buffer = BooleanBufferBuilder::new(4);
319        buffer.append(true);
320        buffer.append(true);
321        buffer.append(false);
322        buffer.append(true);
323        buffer.set_bit(3, false);
324        assert_eq!(buffer.len(), 4);
325        assert_eq!(buffer.finish().values(), &[0b0011_u8]);
326    }
327
328    #[test]
329    fn test_boolean_buffer_builder_unset_an_inner_bit() {
330        let mut buffer = BooleanBufferBuilder::new(5);
331        buffer.append(true);
332        buffer.append(true);
333        buffer.append(false);
334        buffer.append(true);
335        buffer.set_bit(1, false);
336        assert_eq!(buffer.len(), 4);
337        assert_eq!(buffer.finish().values(), &[0b1001_u8]);
338    }
339
340    #[test]
341    fn test_boolean_buffer_builder_unset_several_bits() {
342        let mut buffer = BooleanBufferBuilder::new(5);
343        buffer.append(true);
344        buffer.append(true);
345        buffer.append(true);
346        buffer.append(false);
347        buffer.append(true);
348        buffer.set_bit(1, false);
349        buffer.set_bit(2, false);
350        assert_eq!(buffer.len(), 5);
351        assert_eq!(buffer.finish().values(), &[0b10001_u8]);
352    }
353
354    #[test]
355    fn test_boolean_buffer_builder_unset_several_bits_bigger_than_one_byte() {
356        let mut buffer = BooleanBufferBuilder::new(16);
357        buffer.append_n(10, true);
358        buffer.set_bit(0, false);
359        buffer.set_bit(3, false);
360        buffer.set_bit(9, false);
361        assert_eq!(buffer.len(), 10);
362        assert_eq!(buffer.finish().values(), &[0b11110110_u8, 0b01_u8]);
363    }
364
365    #[test]
366    fn test_boolean_buffer_builder_flip_several_bits_bigger_than_one_byte() {
367        let mut buffer = BooleanBufferBuilder::new(16);
368        buffer.append_n(5, true);
369        buffer.append_n(5, false);
370        buffer.append_n(5, true);
371        buffer.set_bit(0, false);
372        buffer.set_bit(3, false);
373        buffer.set_bit(9, false);
374        buffer.set_bit(6, true);
375        buffer.set_bit(14, true);
376        buffer.set_bit(13, false);
377        assert_eq!(buffer.len(), 15);
378        assert_eq!(buffer.finish().values(), &[0b01010110_u8, 0b1011100_u8]);
379    }
380
381    #[test]
382    fn test_bool_buffer_builder_get_first_bit() {
383        let mut buffer = BooleanBufferBuilder::new(16);
384        buffer.append_n(8, true);
385        buffer.append_n(8, false);
386        assert!(buffer.get_bit(0));
387    }
388
389    #[test]
390    fn test_bool_buffer_builder_get_first_bit_not_requires_mutability() {
391        let buffer = {
392            let mut buffer = BooleanBufferBuilder::new(16);
393            buffer.append_n(8, true);
394            buffer
395        };
396
397        assert!(buffer.get_bit(0));
398    }
399
400    #[test]
401    fn test_bool_buffer_builder_get_last_bit() {
402        let mut buffer = BooleanBufferBuilder::new(16);
403        buffer.append_n(8, true);
404        buffer.append_n(8, false);
405        assert!(!buffer.get_bit(15));
406    }
407
408    #[test]
409    fn test_bool_buffer_builder_get_an_inner_bit() {
410        let mut buffer = BooleanBufferBuilder::new(16);
411        buffer.append_n(4, false);
412        buffer.append_n(8, true);
413        buffer.append_n(4, false);
414        assert!(buffer.get_bit(11));
415    }
416
417    #[test]
418    fn test_bool_buffer_fuzz() {
419        use rand::prelude::*;
420
421        let mut buffer = BooleanBufferBuilder::new(12);
422        let mut all_bools = vec![];
423        let mut rng = rand::rng();
424
425        let src_len = 32;
426        let (src, compacted_src) = {
427            let src: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
428                .take(src_len)
429                .collect();
430
431            let mut compacted_src = BooleanBufferBuilder::new(src_len);
432            compacted_src.append_slice(&src);
433            (src, compacted_src.finish())
434        };
435
436        for _ in 0..100 {
437            let a = rng.next_u32() as usize % src_len;
438            let b = rng.next_u32() as usize % src_len;
439
440            let start = a.min(b);
441            let end = a.max(b);
442
443            buffer.append_packed_range(start..end, compacted_src.values());
444            all_bools.extend_from_slice(&src[start..end]);
445        }
446
447        let mut compacted = BooleanBufferBuilder::new(all_bools.len());
448        compacted.append_slice(&all_bools);
449
450        assert_eq!(buffer.finish(), compacted.finish())
451    }
452
453    #[test]
454    fn test_boolean_array_builder_resize() {
455        let mut builder = BooleanBufferBuilder::new(20);
456        builder.append_n(4, true);
457        builder.append_n(7, false);
458        builder.append_n(2, true);
459        builder.resize(20);
460
461        assert_eq!(builder.len(), 20);
462        assert_eq!(builder.as_slice(), &[0b00001111, 0b00011000, 0b00000000]);
463
464        builder.resize(5);
465        assert_eq!(builder.len(), 5);
466        assert_eq!(builder.as_slice(), &[0b00001111]);
467
468        builder.append_n(4, true);
469        assert_eq!(builder.len(), 9);
470        assert_eq!(builder.as_slice(), &[0b11101111, 0b00000001]);
471    }
472
473    #[test]
474    fn test_truncate() {
475        let b = MutableBuffer::from_iter([true, true, true, true]);
476        let mut builder = BooleanBufferBuilder::new_from_buffer(b, 2);
477        builder.advance(2);
478        let finished = builder.finish();
479        assert_eq!(finished.values(), &[0b00000011]);
480
481        let mut builder = BooleanBufferBuilder::new(10);
482        builder.append_n(5, true);
483        builder.resize(3);
484        builder.advance(2);
485        let finished = builder.finish();
486        assert_eq!(finished.values(), &[0b00000111]);
487
488        let mut builder = BooleanBufferBuilder::new(10);
489        builder.append_n(16, true);
490        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
491        builder.truncate(20);
492        assert_eq!(builder.as_slice(), &[0xFF, 0xFF]);
493        builder.truncate(14);
494        assert_eq!(builder.as_slice(), &[0xFF, 0b00111111]);
495        builder.append(false);
496        builder.append(true);
497        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111]);
498        builder.append_packed_range(0..3, &[0xFF]);
499        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000111]);
500        builder.truncate(17);
501        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000001]);
502        builder.append_packed_range(0..2, &[2]);
503        assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b0000101]);
504        builder.truncate(8);
505        assert_eq!(builder.as_slice(), &[0xFF]);
506        builder.resize(14);
507        assert_eq!(builder.as_slice(), &[0xFF, 0x00]);
508        builder.truncate(0);
509        assert_eq!(builder.as_slice(), &[]);
510    }
511
512    #[test]
513    fn test_boolean_builder_increases_buffer_len() {
514        // 00000010 01001000
515        let buf = Buffer::from([72_u8, 2_u8]);
516        let mut builder = BooleanBufferBuilder::new(8);
517
518        for i in 0..16 {
519            if i == 3 || i == 6 || i == 9 {
520                builder.append(true);
521            } else {
522                builder.append(false);
523            }
524        }
525        let buf2 = builder.finish();
526
527        assert_eq!(buf.len(), buf2.inner().len());
528        assert_eq!(buf.as_slice(), buf2.values());
529    }
530}