vortex_array/arrays/bool/
array.rs

1use arrow_array::BooleanArray;
2use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
3use vortex_dtype::DType;
4use vortex_error::{VortexResult, vortex_panic};
5use vortex_mask::Mask;
6
7use super::serde::BoolMetadata;
8use crate::array::{Array, ArrayCanonicalImpl, ArrayValidityImpl, ArrayVariantsImpl};
9use crate::arrays::bool;
10use crate::builders::ArrayBuilder;
11use crate::stats::{ArrayStats, StatsSetRef};
12use crate::validity::Validity;
13use crate::variants::BoolArrayTrait;
14use crate::vtable::VTableRef;
15use crate::{ArrayImpl, ArrayRef, ArrayStatisticsImpl, Canonical, Encoding, RkyvMetadata};
16
17#[derive(Clone, Debug)]
18pub struct BoolArray {
19    dtype: DType,
20    buffer: BooleanBuffer,
21    pub(crate) validity: Validity,
22    // TODO(ngates): do we want a stats set to be shared across all arrays?
23    pub(crate) stats_set: ArrayStats,
24}
25
26pub struct BoolEncoding;
27impl Encoding for BoolEncoding {
28    type Array = BoolArray;
29    type Metadata = RkyvMetadata<BoolMetadata>;
30}
31
32impl BoolArray {
33    /// Create a new BoolArray from a set of indices and a length.
34    /// All indices must be less than the length.
35    pub fn from_indices<I: IntoIterator<Item = usize>>(length: usize, indices: I) -> Self {
36        let mut buffer = MutableBuffer::new_null(length);
37        indices
38            .into_iter()
39            .for_each(|idx| arrow_buffer::bit_util::set_bit(&mut buffer, idx));
40        Self::new(
41            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
42            Validity::NonNullable,
43        )
44    }
45
46    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`], without checking
47    /// any invariants.
48    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
49        if let Some(len) = validity.maybe_len() {
50            if buffer.len() != len {
51                vortex_panic!(
52                    "Buffer and validity length mismatch: buffer={}, validity={}",
53                    buffer.len(),
54                    len
55                );
56            }
57        }
58
59        // Shrink the buffer to remove any whole bytes.
60        let buffer = buffer.shrink_offset();
61        Self {
62            dtype: DType::Bool(validity.nullability()),
63            buffer,
64            validity,
65            stats_set: ArrayStats::default(),
66        }
67    }
68
69    /// Returns the underlying [`BooleanBuffer`] of the array.
70    pub fn boolean_buffer(&self) -> &BooleanBuffer {
71        assert!(
72            self.buffer.offset() < 8,
73            "Offset must be <8, did we forget to call shrink_offset? Found {}",
74            self.buffer.offset()
75        );
76        &self.buffer
77    }
78
79    /// Returns the underlying [`Validity`] of the array.
80    pub fn validity(&self) -> &Validity {
81        &self.validity
82    }
83
84    /// Get a mutable version of this array.
85    ///
86    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
87    /// otherwise a copy is created.
88    ///
89    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
90    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
91        let offset = self.buffer.offset();
92        let len = self.buffer.len();
93        let arrow_buffer = self.buffer.into_inner();
94        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
95            arrow_buffer.into_mutable().unwrap_or_else(|b| {
96                let mut buf = MutableBuffer::with_capacity(b.len());
97                buf.extend_from_slice(b.as_slice());
98                buf
99            })
100        } else {
101            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
102            buf.extend_from_slice(arrow_buffer.as_slice());
103            buf
104        };
105
106        (
107            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
108            offset,
109        )
110    }
111}
112
113impl ArrayImpl for BoolArray {
114    type Encoding = BoolEncoding;
115
116    #[inline]
117    fn _len(&self) -> usize {
118        self.buffer.len()
119    }
120
121    #[inline]
122    fn _dtype(&self) -> &DType {
123        &self.dtype
124    }
125
126    #[inline]
127    fn _vtable(&self) -> VTableRef {
128        VTableRef::new_ref(&BoolEncoding)
129    }
130
131    fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
132        let validity = if self.validity().is_array() {
133            Validity::Array(children[0].clone())
134        } else {
135            self.validity().clone()
136        };
137
138        Ok(Self::new(self.boolean_buffer().clone(), validity))
139    }
140}
141
142impl From<BooleanBuffer> for BoolArray {
143    fn from(value: BooleanBuffer) -> Self {
144        Self::new(value, Validity::NonNullable)
145    }
146}
147
148impl FromIterator<bool> for BoolArray {
149    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
150        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
151    }
152}
153
154impl FromIterator<Option<bool>> for BoolArray {
155    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
156        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
157
158        Self::new(
159            buffer,
160            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
161        )
162    }
163}
164
165impl ArrayCanonicalImpl for BoolArray {
166    #[inline]
167    fn _to_canonical(&self) -> VortexResult<Canonical> {
168        Ok(Canonical::Bool(self.clone()))
169    }
170
171    #[inline]
172    fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
173        builder.extend_from_array(self)
174    }
175}
176
177impl ArrayStatisticsImpl for BoolArray {
178    fn _stats_ref(&self) -> StatsSetRef<'_> {
179        self.stats_set.to_ref(self)
180    }
181}
182
183impl ArrayValidityImpl for BoolArray {
184    #[inline]
185    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
186        self.validity.is_valid(index)
187    }
188
189    #[inline]
190    fn _all_valid(&self) -> VortexResult<bool> {
191        self.validity.all_valid()
192    }
193
194    #[inline]
195    fn _all_invalid(&self) -> VortexResult<bool> {
196        self.validity.all_invalid()
197    }
198
199    #[inline]
200    fn _validity_mask(&self) -> VortexResult<Mask> {
201        self.validity.to_mask(self.len())
202    }
203}
204
205impl ArrayVariantsImpl for BoolArray {
206    fn _as_bool_typed(&self) -> Option<&dyn BoolArrayTrait> {
207        Some(self)
208    }
209}
210
211impl BoolArrayTrait for BoolArray {}
212
213pub trait BooleanBufferExt {
214    /// Slice any full bytes from the buffer, leaving the offset < 8.
215    fn shrink_offset(self) -> Self;
216}
217
218impl BooleanBufferExt for BooleanBuffer {
219    fn shrink_offset(self) -> Self {
220        let byte_offset = self.offset() / 8;
221        let bit_offset = self.offset() % 8;
222        let len = self.len();
223        let buffer = self.into_inner().slice(byte_offset);
224        BooleanBuffer::new(buffer, bit_offset, len)
225    }
226}