vortex_array/arrays/bool/
array.rs1use arrow_array::BooleanArray;
2use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
3use vortex_dtype::DType;
4use vortex_error::{VortexResult, vortex_panic};
5
6use crate::Canonical;
7use crate::arrays::{BoolVTable, bool};
8use crate::builders::ArrayBuilder;
9use crate::stats::{ArrayStats, StatsSetRef};
10use crate::validity::Validity;
11use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
12
13#[derive(Clone, Debug)]
14pub struct BoolArray {
15 dtype: DType,
16 buffer: BooleanBuffer,
17 pub(crate) validity: Validity,
18 pub(crate) stats_set: ArrayStats,
19}
20
21impl BoolArray {
22 pub fn from_indices<I: IntoIterator<Item = usize>>(length: usize, indices: I) -> Self {
25 let mut buffer = MutableBuffer::new_null(length);
26 indices
27 .into_iter()
28 .for_each(|idx| arrow_buffer::bit_util::set_bit(&mut buffer, idx));
29 Self::new(
30 BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
31 Validity::NonNullable,
32 )
33 }
34
35 pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
38 if let Some(len) = validity.maybe_len() {
39 if buffer.len() != len {
40 vortex_panic!(
41 "Buffer and validity length mismatch: buffer={}, validity={}",
42 buffer.len(),
43 len
44 );
45 }
46 }
47
48 let buffer = buffer.shrink_offset();
50 Self {
51 dtype: DType::Bool(validity.nullability()),
52 buffer,
53 validity,
54 stats_set: ArrayStats::default(),
55 }
56 }
57
58 pub fn boolean_buffer(&self) -> &BooleanBuffer {
60 assert!(
61 self.buffer.offset() < 8,
62 "Offset must be <8, did we forget to call shrink_offset? Found {}",
63 self.buffer.offset()
64 );
65 &self.buffer
66 }
67
68 pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
75 let offset = self.buffer.offset();
76 let len = self.buffer.len();
77 let arrow_buffer = self.buffer.into_inner();
78 let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
79 arrow_buffer.into_mutable().unwrap_or_else(|b| {
80 let mut buf = MutableBuffer::with_capacity(b.len());
81 buf.extend_from_slice(b.as_slice());
82 buf
83 })
84 } else {
85 let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
86 buf.extend_from_slice(arrow_buffer.as_slice());
87 buf
88 };
89
90 (
91 BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
92 offset,
93 )
94 }
95}
96
97impl From<BooleanBuffer> for BoolArray {
98 fn from(value: BooleanBuffer) -> Self {
99 Self::new(value, Validity::NonNullable)
100 }
101}
102
103impl FromIterator<bool> for BoolArray {
104 fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
105 Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
106 }
107}
108
109impl FromIterator<Option<bool>> for BoolArray {
110 fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
111 let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
112
113 Self::new(
114 buffer,
115 nulls.map(Validity::from).unwrap_or(Validity::AllValid),
116 )
117 }
118}
119
120impl ValidityHelper for BoolArray {
121 fn validity(&self) -> &Validity {
122 &self.validity
123 }
124}
125
126impl ArrayVTable<BoolVTable> for BoolVTable {
127 fn len(array: &BoolArray) -> usize {
128 array.buffer.len()
129 }
130
131 fn dtype(array: &BoolArray) -> &DType {
132 &array.dtype
133 }
134
135 fn stats(array: &BoolArray) -> StatsSetRef<'_> {
136 array.stats_set.to_ref(array.as_ref())
137 }
138}
139
140impl CanonicalVTable<BoolVTable> for BoolVTable {
141 fn canonicalize(array: &BoolArray) -> VortexResult<Canonical> {
142 Ok(Canonical::Bool(array.clone()))
143 }
144
145 fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
146 builder.extend_from_array(array.as_ref())
147 }
148}
149
150pub trait BooleanBufferExt {
151 fn shrink_offset(self) -> Self;
153}
154
155impl BooleanBufferExt for BooleanBuffer {
156 fn shrink_offset(self) -> Self {
157 let byte_offset = self.offset() / 8;
158 let bit_offset = self.offset() % 8;
159 let len = self.len();
160 let buffer = self
161 .into_inner()
162 .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
163 BooleanBuffer::new(buffer, bit_offset, len)
164 }
165}
166
167#[cfg(test)]
168mod tests {
169 use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
170 use vortex_buffer::buffer;
171
172 use crate::arrays::{BoolArray, PrimitiveArray};
173 use crate::compute::conformance::mask::test_mask;
174 use crate::patches::Patches;
175 use crate::validity::Validity;
176 use crate::vtable::ValidityHelper;
177 use crate::{Array, IntoArray, ToCanonical};
178
179 #[test]
180 fn bool_array() {
181 let arr = BoolArray::from_iter([true, false, true]);
182 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
183 assert!(scalar);
184 }
185
186 #[test]
187 fn test_all_some_iter() {
188 let arr = BoolArray::from_iter([Some(true), Some(false)]);
189
190 assert!(matches!(arr.validity(), Validity::AllValid));
191
192 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
193 assert!(scalar);
194 let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
195 assert!(!scalar);
196 }
197
198 #[test]
199 fn test_bool_from_iter() {
200 let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]);
201
202 let scalar = bool::try_from(&arr.scalar_at(0).unwrap()).unwrap();
203 assert!(scalar);
204
205 let scalar = bool::try_from(&arr.scalar_at(1).unwrap()).unwrap();
206 assert!(scalar);
207
208 let scalar = arr.scalar_at(2).unwrap();
209 assert!(scalar.is_null());
210
211 let scalar = bool::try_from(&arr.scalar_at(3).unwrap()).unwrap();
212 assert!(!scalar);
213
214 let scalar = arr.scalar_at(4).unwrap();
215 assert!(scalar.is_null());
216 }
217
218 #[test]
219 fn patch_sliced_bools() {
220 let arr = {
221 let mut builder = BooleanBufferBuilder::new(12);
222 builder.append(false);
223 builder.append_n(11, true);
224 BoolArray::from(builder.finish())
225 };
226 let sliced = arr.slice(4, 12).unwrap();
227 let sliced_len = sliced.len();
228 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
229 assert_eq!(offset, 4);
230 assert_eq!(values.as_slice(), &[254, 15]);
231
232 let patches = Patches::new(
234 arr.len(),
235 0,
236 PrimitiveArray::new(buffer![4u32], Validity::AllValid).into_array(),
237 BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
238 );
239 let arr = arr.patch(&patches).unwrap();
240 let arr_len = arr.len();
241 let (values, offset) = arr.to_bool().unwrap().into_boolean_builder();
242 assert_eq!(offset, 0);
243 assert_eq!(values.len(), arr_len + offset);
244 assert_eq!(values.as_slice(), &[238, 15]);
245
246 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
248 assert_eq!(offset, 4);
249 assert_eq!(values.len(), sliced_len + offset);
250 assert_eq!(values.as_slice(), &[254, 15]); }
252
253 #[test]
254 fn slice_array_in_middle() {
255 let arr = BoolArray::from(BooleanBuffer::new_set(16));
256 let sliced = arr.slice(4, 12).unwrap();
257 let sliced_len = sliced.len();
258 let (values, offset) = sliced.to_bool().unwrap().into_boolean_builder();
259 assert_eq!(offset, 4);
260 assert_eq!(values.len(), sliced_len + offset);
261 assert_eq!(values.as_slice(), &[255, 15]);
262 }
263
264 #[test]
265 #[should_panic]
266 fn patch_bools_owned() {
267 let buffer = buffer![255u8; 2];
268 let buf = BooleanBuffer::new(buffer.into_arrow_buffer(), 0, 15);
269 let arr = BoolArray::new(buf, Validity::NonNullable);
270 let buf_ptr = arr.boolean_buffer().sliced().as_ptr();
271
272 let patches = Patches::new(
273 arr.len(),
274 0,
275 PrimitiveArray::new(buffer![0u32], Validity::AllValid).into_array(),
276 BoolArray::from(BooleanBuffer::new_unset(1)).into_array(),
277 );
278 let arr = arr.patch(&patches).unwrap();
279 assert_eq!(arr.boolean_buffer().sliced().as_ptr(), buf_ptr);
280
281 let (values, _byte_bit_offset) = arr.to_bool().unwrap().into_boolean_builder();
282 assert_eq!(values.as_slice(), &[254, 127]);
283 }
284
285 #[test]
286 fn test_mask_primitive_array() {
287 test_mask(BoolArray::from_iter([true, false, true, true, false]).as_ref());
288 }
289}