vortex_array/arrays/bool/
array.rs1use std::ops::BitAnd;
5
6use arrow_array::BooleanArray;
7use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
8use vortex_buffer::ByteBuffer;
9use vortex_dtype::DType;
10use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
11use vortex_mask::Mask;
12
13use crate::Canonical;
14use crate::arrays::{BoolVTable, bool};
15use crate::builders::ArrayBuilder;
16use crate::stats::{ArrayStats, StatsSetRef};
17use crate::validity::Validity;
18use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
19
20#[derive(Clone, Debug)]
50pub struct BoolArray {
51 dtype: DType,
52 buffer: BooleanBuffer,
53 pub(crate) validity: Validity,
54 pub(crate) stats_set: ArrayStats,
55}
56
57impl BoolArray {
58 fn validate(
59 buffer: &ByteBuffer,
60 offset: usize,
61 len: usize,
62 validity: &Validity,
63 ) -> VortexResult<()> {
64 vortex_ensure!(
65 offset < 8,
66 "offset must be less than whole byte, was {offset} bits"
67 );
68
69 let required_bytes = offset.saturating_add(len).div_ceil(8);
71 vortex_ensure!(
72 buffer.len() >= required_bytes,
73 "BoolArray with offset={offset} len={len} cannot be built from buffer of size {}",
74 buffer.len()
75 );
76
77 if let Some(validity_len) = validity.maybe_len() {
79 vortex_ensure!(
80 validity_len == len,
81 "BoolArray of size {len} cannot be built with validity of size {validity_len}"
82 );
83 }
84
85 Ok(())
86 }
87}
88
89impl BoolArray {
90 pub fn try_new(
106 buffer: ByteBuffer,
107 offset: usize,
108 len: usize,
109 validity: Validity,
110 ) -> VortexResult<Self> {
111 Self::validate(&buffer, offset, len, &validity)?;
112
113 Ok(Self::new(
114 BooleanBuffer::new(buffer.into_arrow_buffer(), offset, len),
115 validity,
116 ))
117 }
118
119 pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
123 if let Some(validity_len) = validity.maybe_len() {
124 assert_eq!(buffer.len(), validity_len);
125 }
126
127 let buffer = buffer.shrink_offset();
129 Self {
130 dtype: DType::Bool(validity.nullability()),
131 buffer,
132 validity,
133 stats_set: ArrayStats::default(),
134 }
135 }
136
137 pub fn from_indices<I: IntoIterator<Item = usize>>(
141 length: usize,
142 indices: I,
143 validity: Validity,
144 ) -> Self {
145 let mut buffer = MutableBuffer::new_null(length);
146 let buffer_slice = buffer.as_slice_mut();
147 indices
148 .into_iter()
149 .for_each(|idx| arrow_buffer::bit_util::set_bit(buffer_slice, idx));
150 Self::new(
151 BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
152 validity,
153 )
154 }
155
156 pub fn boolean_buffer(&self) -> &BooleanBuffer {
158 assert!(
159 self.buffer.offset() < 8,
160 "Offset must be <8, did we forget to call shrink_offset? Found {}",
161 self.buffer.offset()
162 );
163 &self.buffer
164 }
165
166 pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
173 let offset = self.buffer.offset();
174 let len = self.buffer.len();
175 let arrow_buffer = self.buffer.into_inner();
176 let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
177 arrow_buffer.into_mutable().unwrap_or_else(|b| {
178 let mut buf = MutableBuffer::with_capacity(b.len());
179 buf.extend_from_slice(b.as_slice());
180 buf
181 })
182 } else {
183 let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
184 buf.extend_from_slice(arrow_buffer.as_slice());
185 buf
186 };
187
188 (
189 BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
190 offset,
191 )
192 }
193
194 pub fn to_mask(&self) -> Mask {
195 self.maybe_to_mask()
196 .vortex_expect("cannot convert nullable boolean array to mask")
197 }
198
199 pub fn maybe_to_mask(&self) -> Option<Mask> {
200 self.all_valid()
201 .then(|| Mask::from_buffer(self.boolean_buffer().clone()))
202 }
203
204 pub fn to_mask_fill_null_false(&self) -> Mask {
205 if let Some(constant) = self.as_constant() {
206 let bool_constant = constant.as_bool();
207 if bool_constant.value().unwrap_or(false) {
208 return Mask::new_true(self.len());
209 } else {
210 return Mask::new_false(self.len());
211 }
212 }
213 let buffer = match self.validity_mask() {
215 Mask::AllTrue(_) => self.boolean_buffer().clone(),
216 Mask::AllFalse(_) => return Mask::new_false(self.len()),
217 Mask::Values(validity) => validity.boolean_buffer().bitand(self.boolean_buffer()),
218 };
219 Mask::from_buffer(buffer)
220 }
221}
222
223impl From<BooleanBuffer> for BoolArray {
224 fn from(value: BooleanBuffer) -> Self {
225 Self::new(value, Validity::NonNullable)
226 }
227}
228
229impl FromIterator<bool> for BoolArray {
230 fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
231 Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
232 }
233}
234
235impl FromIterator<Option<bool>> for BoolArray {
236 fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
237 let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
238
239 Self::new(
240 buffer,
241 nulls.map(Validity::from).unwrap_or(Validity::AllValid),
242 )
243 }
244}
245
246impl ValidityHelper for BoolArray {
247 fn validity(&self) -> &Validity {
248 &self.validity
249 }
250}
251
252impl ArrayVTable<BoolVTable> for BoolVTable {
253 fn len(array: &BoolArray) -> usize {
254 array.buffer.len()
255 }
256
257 fn dtype(array: &BoolArray) -> &DType {
258 &array.dtype
259 }
260
261 fn stats(array: &BoolArray) -> StatsSetRef<'_> {
262 array.stats_set.to_ref(array.as_ref())
263 }
264}
265
266impl CanonicalVTable<BoolVTable> for BoolVTable {
267 fn canonicalize(array: &BoolArray) -> Canonical {
268 Canonical::Bool(array.clone())
269 }
270
271 fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) {
272 builder.extend_from_array(array.as_ref())
273 }
274}
275
276pub trait BooleanBufferExt {
277 fn shrink_offset(self) -> Self;
279}
280
281impl BooleanBufferExt for BooleanBuffer {
282 fn shrink_offset(self) -> Self {
283 let byte_offset = self.offset() / 8;
284 let bit_offset = self.offset() % 8;
285 let len = self.len();
286 let buffer = self
287 .into_inner()
288 .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
289 BooleanBuffer::new(buffer, bit_offset, len)
290 }
291}
292
293#[cfg(test)]
294mod tests {
295 use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
296 use vortex_buffer::buffer;
297
298 use crate::arrays::{BoolArray, PrimitiveArray};
299 use crate::patches::Patches;
300 use crate::validity::Validity;
301 use crate::vtable::ValidityHelper;
302 use crate::{Array, IntoArray, ToCanonical};
303
304 #[test]
305 fn bool_array() {
306 let arr = BoolArray::from_iter([true, false, true]);
307 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
308 assert!(scalar);
309 }
310
311 #[test]
312 fn test_all_some_iter() {
313 let arr = BoolArray::from_iter([Some(true), Some(false)]);
314
315 assert!(matches!(arr.validity(), Validity::AllValid));
316
317 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
318 assert!(scalar);
319 let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
320 assert!(!scalar);
321 }
322
323 #[test]
324 fn test_bool_from_iter() {
325 let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
326
327 let scalar = bool::try_from(&arr.scalar_at(0)).unwrap();
328 assert!(scalar);
329
330 let scalar = bool::try_from(&arr.scalar_at(1)).unwrap();
331 assert!(scalar);
332
333 let scalar = arr.scalar_at(2);
334 assert!(scalar.is_null());
335
336 let scalar = bool::try_from(&arr.scalar_at(3)).unwrap();
337 assert!(!scalar);
338
339 let scalar = arr.scalar_at(4);
340 assert!(scalar.is_null());
341 }
342
343 #[test]
344 fn patch_sliced_bools() {
345 let arr = {
346 let mut builder = BooleanBufferBuilder::new(12);
347 builder.append(false);
348 builder.append_n(11, true);
349 BoolArray::from(builder.finish())
350 };
351 let sliced = arr.slice(4..12);
352 let sliced_len = sliced.len();
353 let (values, offset) = sliced.to_bool().into_boolean_builder();
354 assert_eq!(offset, 4);
355 assert_eq!(values.as_slice(), &[254, 15]);
356
357 let patches = Patches::new(
359 arr.len(),
360 0,
361 buffer![4u32].into_array(), BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
363 );
364 let arr = arr.patch(&patches);
365 let arr_len = arr.len();
366 let (values, offset) = arr.to_bool().into_boolean_builder();
367 assert_eq!(offset, 0);
368 assert_eq!(values.len(), arr_len + offset);
369 assert_eq!(values.as_slice(), &[238, 15]);
370
371 let (values, offset) = sliced.to_bool().into_boolean_builder();
373 assert_eq!(offset, 4);
374 assert_eq!(values.len(), sliced_len + offset);
375 assert_eq!(values.as_slice(), &[254, 15]); }
377
378 #[test]
379 fn slice_array_in_middle() {
380 let arr = BoolArray::from(BooleanBuffer::new_set(16));
381 let sliced = arr.slice(4..12);
382 let sliced_len = sliced.len();
383 let (values, offset) = sliced.to_bool().into_boolean_builder();
384 assert_eq!(offset, 4);
385 assert_eq!(values.len(), sliced_len + offset);
386 assert_eq!(values.as_slice(), &[255, 15]);
387 }
388
389 #[test]
390 #[should_panic]
391 fn patch_bools_owned() {
392 let buffer = buffer![255u8; 2];
393 let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
394 let arr = BoolArray::new(buf, Validity::NonNullable);
395 let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
396
397 let patches = Patches::new(
398 arr.len(),
399 0,
400 PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
401 BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
402 );
403 let arr = arr.patch(&patches);
404 assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
405
406 let (values, _byte_bit_offset) = arr.to_bool().into_boolean_builder();
407 assert_eq!(values.as_slice(), &[254, 127]);
408 }
409}