vortex_array/arrays/bool/
array.rs1use arrow_array::BooleanArray;
5use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
6use vortex_dtype::DType;
7use vortex_error::{VortexResult, vortex_panic};
8
9use crate::Canonical;
10use crate::arrays::{BoolVTable, bool};
11use crate::builders::ArrayBuilder;
12use crate::stats::{ArrayStats, StatsSetRef};
13use crate::validity::Validity;
14use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
15
16#[derive(Clone, Debug)]
46pub struct BoolArray {
47 dtype: DType,
48 buffer: BooleanBuffer,
49 pub(crate) validity: Validity,
50 pub(crate) stats_set: ArrayStats,
51}
52
53impl BoolArray {
54 pub fn from_indices<I: IntoIterator<Item = usize>>(
57 length: usize,
58 indices: I,
59 validity: Validity,
60 ) -> Self {
61 let mut buffer = MutableBuffer::new_null(length);
62 indices
63 .into_iter()
64 .for_each(|idx| arrow_buffer::bit_util::set_bit(&mut buffer, idx));
65 Self::new(
66 BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
67 validity,
68 )
69 }
70
71 pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
74 if let Some(len) = validity.maybe_len() {
75 if buffer.len() != len {
76 vortex_panic!(
77 "Buffer and validity length mismatch: buffer={}, validity={}",
78 buffer.len(),
79 len
80 );
81 }
82 }
83
84 let buffer = buffer.shrink_offset();
86 Self {
87 dtype: DType::Bool(validity.nullability()),
88 buffer,
89 validity,
90 stats_set: ArrayStats::default(),
91 }
92 }
93
94 pub fn boolean_buffer(&self) -> &BooleanBuffer {
96 assert!(
97 self.buffer.offset() < 8,
98 "Offset must be <8, did we forget to call shrink_offset? Found {}",
99 self.buffer.offset()
100 );
101 &self.buffer
102 }
103
104 pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
111 let offset = self.buffer.offset();
112 let len = self.buffer.len();
113 let arrow_buffer = self.buffer.into_inner();
114 let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
115 arrow_buffer.into_mutable().unwrap_or_else(|b| {
116 let mut buf = MutableBuffer::with_capacity(b.len());
117 buf.extend_from_slice(b.as_slice());
118 buf
119 })
120 } else {
121 let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
122 buf.extend_from_slice(arrow_buffer.as_slice());
123 buf
124 };
125
126 (
127 BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
128 offset,
129 )
130 }
131}
132
133impl From<BooleanBuffer> for BoolArray {
134 fn from(value: BooleanBuffer) -> Self {
135 Self::new(value, Validity::NonNullable)
136 }
137}
138
139impl FromIterator<bool> for BoolArray {
140 fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
141 Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
142 }
143}
144
145impl FromIterator<Option<bool>> for BoolArray {
146 fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
147 let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
148
149 Self::new(
150 buffer,
151 nulls.map(Validity::from).unwrap_or(Validity::AllValid),
152 )
153 }
154}
155
156impl ValidityHelper for BoolArray {
157 fn validity(&self) -> &Validity {
158 &self.validity
159 }
160}
161
162impl ArrayVTable<BoolVTable> for BoolVTable {
163 fn len(array: &BoolArray) -> usize {
164 array.buffer.len()
165 }
166
167 fn dtype(array: &BoolArray) -> &DType {
168 &array.dtype
169 }
170
171 fn stats(array: &BoolArray) -> StatsSetRef<'_> {
172 array.stats_set.to_ref(array.as_ref())
173 }
174}
175
176impl CanonicalVTable<BoolVTable> for BoolVTable {
177 fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
178 Ok(Canonical::Bool(array.clone()))
179 }
180
181 fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
182 builder.extend_from_array(array.as_ref())
183 }
184}
185
186pub trait BooleanBufferExt {
187 fn shrink_offset(self) -> Self;
189}
190
191impl BooleanBufferExt for BooleanBuffer {
192 fn shrink_offset(self) -> Self {
193 let byte_offset = self.offset() / 8;
194 let bit_offset = self.offset() % 8;
195 let len = self.len();
196 let buffer = self
197 .into_inner()
198 .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
199 BooleanBuffer::new(buffer, bit_offset, len)
200 }
201}
202
203#[cfg(test)]
204mod tests {
205 use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
206 use vortex_buffer::buffer;
207
208 use crate::arrays::{BoolArray, PrimitiveArray};
209 use crate::patches::Patches;
210 use crate::validity::Validity;
211 use crate::vtable::ValidityHelper;
212 use crate::{Array, IntoArray, ToCanonical};
213
214 #[test]
215 fn bool_array() {
216 let arr = BoolArray::from_iter([true, false, true]);
217 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
218 assert!(scalar);
219 }
220
221 #[test]
222 fn test_all_some_iter() {
223 let arr = BoolArray::from_iter([Some(true), Some(false)]);
224
225 assert!(matches!(arr.validity(), Validity::AllValid));
226
227 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
228 assert!(scalar);
229 let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
230 assert!(!scalar);
231 }
232
233 #[test]
234 fn test_bool_from_iter() {
235 let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
236
237 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
238 assert!(scalar);
239
240 let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
241 assert!(scalar);
242
243 let scalar = arr.scalar_at(2).unwrap();
244 assert!(scalar.is_null());
245
246 let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
247 assert!(!scalar);
248
249 let scalar = arr.scalar_at(4).unwrap();
250 assert!(scalar.is_null());
251 }
252
253 #[test]
254 fn patch_sliced_bools() {
255 let arr = {
256 let mut builder = BooleanBufferBuilder::new(12);
257 builder.append(false);
258 builder.append_n(11, true);
259 BoolArray::from(builder.finish())
260 };
261 let sliced = arr.slice(4, 12).unwrap();
262 let sliced_len = sliced.len();
263 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
264 assert_eq!(offset, 4);
265 assert_eq!(values.as_slice(), &[254, 15]);
266
267 let patches = Patches::new(
269 arr.len(),
270 0,
271 buffer![4u32].into_array(), BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
273 );
274 let arr = arr.patch(&patches).unwrap();
275 let arr_len = arr.len();
276 let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
277 assert_eq!(offset, 0);
278 assert_eq!(values.len(), arr_len + offset);
279 assert_eq!(values.as_slice(), &[238, 15]);
280
281 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
283 assert_eq!(offset, 4);
284 assert_eq!(values.len(), sliced_len + offset);
285 assert_eq!(values.as_slice(), &[254, 15]); }
287
288 #[test]
289 fn slice_array_in_middle() {
290 let arr = BoolArray::from(BooleanBuffer::new_set(16));
291 let sliced = arr.slice(4, 12).unwrap();
292 let sliced_len = sliced.len();
293 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
294 assert_eq!(offset, 4);
295 assert_eq!(values.len(), sliced_len + offset);
296 assert_eq!(values.as_slice(), &[255, 15]);
297 }
298
299 #[test]
300 #[should_panic]
301 fn patch_bools_owned() {
302 let buffer = buffer![255u8; 2];
303 let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
304 let arr = BoolArray::new(buf, Validity::NonNullable);
305 let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
306
307 let patches = Patches::new(
308 arr.len(),
309 0,
310 PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
311 BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
312 );
313 let arr = arr.patch(&patches).unwrap();
314 assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
315
316 let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
317 assert_eq!(values.as_slice(), &[254, 127]);
318 }
319}