vortex_array/arrays/bool/
array.rs1use arrow_array::BooleanArray;
5use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
6use vortex_dtype::DType;
7use vortex_error::{VortexResult, vortex_panic};
8
9use crate::Canonical;
10use crate::arrays::{BoolVTable, bool};
11use crate::builders::ArrayBuilder;
12use crate::stats::{ArrayStats, StatsSetRef};
13use crate::validity::Validity;
14use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
15
16#[derive(Clone, Debug)]
17pub struct BoolArray {
18 dtype: DType,
19 buffer: BooleanBuffer,
20 pub(crate) validity: Validity,
21 pub(crate) stats_set: ArrayStats,
22}
23
24impl BoolArray {
25 pub fn from_indices<I: IntoIterator<Item = usize>>(
28 length: usize,
29 indices: I,
30 validity: Validity,
31 ) -> Self {
32 let mut buffer = MutableBuffer::new_null(length);
33 indices
34 .into_iter()
35 .for_each(|idx| arrow_buffer::bit_util::set_bit(&mut buffer, idx));
36 Self::new(
37 BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
38 validity,
39 )
40 }
41
42 pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
45 if let Some(len) = validity.maybe_len() {
46 if buffer.len() != len {
47 vortex_panic!(
48 "Buffer and validity length mismatch: buffer={}, validity={}",
49 buffer.len(),
50 len
51 );
52 }
53 }
54
55 let buffer = buffer.shrink_offset();
57 Self {
58 dtype: DType::Bool(validity.nullability()),
59 buffer,
60 validity,
61 stats_set: ArrayStats::default(),
62 }
63 }
64
65 pub fn boolean_buffer(&self) -> &BooleanBuffer {
67 assert!(
68 self.buffer.offset() < 8,
69 "Offset must be <8, did we forget to call shrink_offset? Found {}",
70 self.buffer.offset()
71 );
72 &self.buffer
73 }
74
75 pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
82 let offset = self.buffer.offset();
83 let len = self.buffer.len();
84 let arrow_buffer = self.buffer.into_inner();
85 let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
86 arrow_buffer.into_mutable().unwrap_or_else(|b| {
87 let mut buf = MutableBuffer::with_capacity(b.len());
88 buf.extend_from_slice(b.as_slice());
89 buf
90 })
91 } else {
92 let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
93 buf.extend_from_slice(arrow_buffer.as_slice());
94 buf
95 };
96
97 (
98 BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
99 offset,
100 )
101 }
102}
103
104impl From<BooleanBuffer> for BoolArray {
105 fn from(value: BooleanBuffer) -> Self {
106 Self::new(value, Validity::NonNullable)
107 }
108}
109
110impl FromIterator<bool> for BoolArray {
111 fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
112 Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
113 }
114}
115
116impl FromIterator<Option<bool>> for BoolArray {
117 fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
118 let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
119
120 Self::new(
121 buffer,
122 nulls.map(Validity::from).unwrap_or(Validity::AllValid),
123 )
124 }
125}
126
127impl ValidityHelper for BoolArray {
128 fn validity(&self) -> &Validity {
129 &self.validity
130 }
131}
132
133impl ArrayVTable<BoolVTable> for BoolVTable {
134 fn len(array: &BoolArray) -> usize {
135 array.buffer.len()
136 }
137
138 fn dtype(array: &BoolArray) -> &DType {
139 &array.dtype
140 }
141
142 fn stats(array: &BoolArray) -> StatsSetRef<'_> {
143 array.stats_set.to_ref(array.as_ref())
144 }
145}
146
147impl CanonicalVTable<BoolVTable> for BoolVTable {
148 fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
149 Ok(Canonical::Bool(array.clone()))
150 }
151
152 fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
153 builder.extend_from_array(array.as_ref())
154 }
155}
156
157pub trait BooleanBufferExt {
158 fn shrink_offset(self) -> Self;
160}
161
162impl BooleanBufferExt for BooleanBuffer {
163 fn shrink_offset(self) -> Self {
164 let byte_offset = self.offset() / 8;
165 let bit_offset = self.offset() % 8;
166 let len = self.len();
167 let buffer = self
168 .into_inner()
169 .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
170 BooleanBuffer::new(buffer, bit_offset, len)
171 }
172}
173
174#[cfg(test)]
175mod tests {
176 use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
177 use vortex_buffer::buffer;
178
179 use crate::arrays::{BoolArray, PrimitiveArray};
180 use crate::compute::conformance::mask::test_mask;
181 use crate::patches::Patches;
182 use crate::validity::Validity;
183 use crate::vtable::ValidityHelper;
184 use crate::{Array, IntoArray, ToCanonical};
185
186 #[test]
187 fn bool_array() {
188 let arr = BoolArray::from_iter([true, false, true]);
189 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
190 assert!(scalar);
191 }
192
193 #[test]
194 fn test_all_some_iter() {
195 let arr = BoolArray::from_iter([Some(true), Some(false)]);
196
197 assert!(matches!(arr.validity(), Validity::AllValid));
198
199 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
200 assert!(scalar);
201 let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
202 assert!(!scalar);
203 }
204
205 #[test]
206 fn test_bool_from_iter() {
207 let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
208
209 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
210 assert!(scalar);
211
212 let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
213 assert!(scalar);
214
215 let scalar = arr.scalar_at(2).unwrap();
216 assert!(scalar.is_null());
217
218 let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
219 assert!(!scalar);
220
221 let scalar = arr.scalar_at(4).unwrap();
222 assert!(scalar.is_null());
223 }
224
225 #[test]
226 fn patch_sliced_bools() {
227 let arr = {
228 let mut builder = BooleanBufferBuilder::new(12);
229 builder.append(false);
230 builder.append_n(11, true);
231 BoolArray::from(builder.finish())
232 };
233 let sliced = arr.slice(4, 12).unwrap();
234 let sliced_len = sliced.len();
235 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
236 assert_eq!(offset, 4);
237 assert_eq!(values.as_slice(), &[254, 15]);
238
239 let patches = Patches::new(
241 arr.len(),
242 0,
243 PrimitiveArray::new(buffer![4u32], Validity::AllValid).into_array(),
244 BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
245 );
246 let arr = arr.patch(&patches).unwrap();
247 let arr_len = arr.len();
248 let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
249 assert_eq!(offset, 0);
250 assert_eq!(values.len(), arr_len + offset);
251 assert_eq!(values.as_slice(), &[238, 15]);
252
253 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
255 assert_eq!(offset, 4);
256 assert_eq!(values.len(), sliced_len + offset);
257 assert_eq!(values.as_slice(), &[254, 15]); }
259
260 #[test]
261 fn slice_array_in_middle() {
262 let arr = BoolArray::from(BooleanBuffer::new_set(16));
263 let sliced = arr.slice(4, 12).unwrap();
264 let sliced_len = sliced.len();
265 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
266 assert_eq!(offset, 4);
267 assert_eq!(values.len(), sliced_len + offset);
268 assert_eq!(values.as_slice(), &[255, 15]);
269 }
270
271 #[test]
272 #[should_panic]
273 fn patch_bools_owned() {
274 let buffer = buffer![255u8; 2];
275 let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
276 let arr = BoolArray::new(buf, Validity::NonNullable);
277 let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
278
279 let patches = Patches::new(
280 arr.len(),
281 0,
282 PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
283 BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
284 );
285 let arr = arr.patch(&patches).unwrap();
286 assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
287
288 let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
289 assert_eq!(values.as_slice(), &[254, 127]);
290 }
291
292 #[test]
293 fn test_mask_primitive_array() {
294 test_mask(BoolArray::from_iter([true, false, true, true, false]).as_ref());
295 }
296}