vortex_array/arrays/bool/
array.rs

1use arrow_array::BooleanArray;
2use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
3use vortex_dtype::DType;
4use vortex_error::{VortexResult, vortex_panic};
5use vortex_mask::Mask;
6
7use super::serde::BoolMetadata;
8use crate::array::{Array, ArrayCanonicalImpl, ArrayValidityImpl, ArrayVariantsImpl};
9use crate::arrays::bool;
10use crate::builders::ArrayBuilder;
11use crate::stats::{ArrayStats, StatsSetRef};
12use crate::validity::Validity;
13use crate::variants::BoolArrayTrait;
14use crate::vtable::VTableRef;
15use crate::{ArrayImpl, ArrayRef, ArrayStatisticsImpl, Canonical, Encoding, ProstMetadata};
16
17#[derive(Clone, Debug)]
18pub struct BoolArray {
19    dtype: DType,
20    buffer: BooleanBuffer,
21    pub(crate) validity: Validity,
22    // TODO(ngates): do we want a stats set to be shared across all arrays?
23    pub(crate) stats_set: ArrayStats,
24}
25
26#[derive(Debug)]
27pub struct BoolEncoding;
28
29impl Encoding for BoolEncoding {
30    type Array = BoolArray;
31    type Metadata = ProstMetadata<BoolMetadata>;
32}
33
34impl BoolArray {
35    /// Create a new BoolArray from a set of indices and a length.
36    /// All indices must be less than the length.
37    pub fn from_indices<I: IntoIterator<Item = usize>>(length: usize, indices: I) -> Self {
38        let mut buffer = MutableBuffer::new_null(length);
39        indices
40            .into_iter()
41            .for_each(|idx| arrow_buffer::bit_util::set_bit(&mut buffer, idx));
42        Self::new(
43            BooleanBufferBuilder::new_from_buffer(buffer, length).finish(),
44            Validity::NonNullable,
45        )
46    }
47
48    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`], without checking
49    /// any invariants.
50    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
51        if let Some(len) = validity.maybe_len() {
52            if buffer.len() != len {
53                vortex_panic!(
54                    "Buffer and validity length mismatch: buffer={}, validity={}",
55                    buffer.len(),
56                    len
57                );
58            }
59        }
60
61        // Shrink the buffer to remove any whole bytes.
62        let buffer = buffer.shrink_offset();
63        Self {
64            dtype: DType::Bool(validity.nullability()),
65            buffer,
66            validity,
67            stats_set: ArrayStats::default(),
68        }
69    }
70
71    /// Returns the underlying [`BooleanBuffer`] of the array.
72    pub fn boolean_buffer(&self) -> &BooleanBuffer {
73        assert!(
74            self.buffer.offset() < 8,
75            "Offset must be <8, did we forget to call shrink_offset? Found {}",
76            self.buffer.offset()
77        );
78        &self.buffer
79    }
80
81    /// Returns the underlying [`Validity`] of the array.
82    pub fn validity(&self) -> &Validity {
83        &self.validity
84    }
85
86    /// Get a mutable version of this array.
87    ///
88    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
89    /// otherwise a copy is created.
90    ///
91    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
92    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
93        let offset = self.buffer.offset();
94        let len = self.buffer.len();
95        let arrow_buffer = self.buffer.into_inner();
96        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
97            arrow_buffer.into_mutable().unwrap_or_else(|b| {
98                let mut buf = MutableBuffer::with_capacity(b.len());
99                buf.extend_from_slice(b.as_slice());
100                buf
101            })
102        } else {
103            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
104            buf.extend_from_slice(arrow_buffer.as_slice());
105            buf
106        };
107
108        (
109            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
110            offset,
111        )
112    }
113}
114
115impl ArrayImpl for BoolArray {
116    type Encoding = BoolEncoding;
117
118    #[inline]
119    fn _len(&self) -> usize {
120        self.buffer.len()
121    }
122
123    #[inline]
124    fn _dtype(&self) -> &DType {
125        &self.dtype
126    }
127
128    #[inline]
129    fn _vtable(&self) -> VTableRef {
130        VTableRef::new_ref(&BoolEncoding)
131    }
132
133    fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
134        let validity = if self.validity().is_array() {
135            Validity::Array(children[0].clone())
136        } else {
137            self.validity().clone()
138        };
139
140        Ok(Self::new(self.boolean_buffer().clone(), validity))
141    }
142}
143
144impl From<BooleanBuffer> for BoolArray {
145    fn from(value: BooleanBuffer) -> Self {
146        Self::new(value, Validity::NonNullable)
147    }
148}
149
150impl FromIterator<bool> for BoolArray {
151    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
152        Self::new(BooleanBuffer::from_iter(iter), Validity::NonNullable)
153    }
154}
155
156impl FromIterator<Option<bool>> for BoolArray {
157    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
158        let (buffer, nulls) = BooleanArray::from_iter(iter).into_parts();
159
160        Self::new(
161            buffer,
162            nulls.map(Validity::from).unwrap_or(Validity::AllValid),
163        )
164    }
165}
166
167impl ArrayCanonicalImpl for BoolArray {
168    #[inline]
169    fn _to_canonical(&self) -> VortexResult<Canonical> {
170        Ok(Canonical::Bool(self.clone()))
171    }
172
173    #[inline]
174    fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
175        builder.extend_from_array(self)
176    }
177}
178
179impl ArrayStatisticsImpl for BoolArray {
180    fn _stats_ref(&self) -> StatsSetRef<'_> {
181        self.stats_set.to_ref(self)
182    }
183}
184
185impl ArrayValidityImpl for BoolArray {
186    #[inline]
187    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
188        self.validity.is_valid(index)
189    }
190
191    #[inline]
192    fn _all_valid(&self) -> VortexResult<bool> {
193        self.validity.all_valid()
194    }
195
196    #[inline]
197    fn _all_invalid(&self) -> VortexResult<bool> {
198        self.validity.all_invalid()
199    }
200
201    #[inline]
202    fn _validity_mask(&self) -> VortexResult<Mask> {
203        self.validity.to_mask(self.len())
204    }
205}
206
207impl ArrayVariantsImpl for BoolArray {
208    fn _as_bool_typed(&self) -> Option<&dyn BoolArrayTrait> {
209        Some(self)
210    }
211}
212
213impl BoolArrayTrait for BoolArray {}
214
215pub trait BooleanBufferExt {
216    /// Slice any full bytes from the buffer, leaving the offset < 8.
217    fn shrink_offset(self) -> Self;
218}
219
220impl BooleanBufferExt for BooleanBuffer {
221    fn shrink_offset(self) -> Self {
222        let byte_offset = self.offset() / 8;
223        let bit_offset = self.offset() % 8;
224        let len = self.len();
225        let buffer = self
226            .into_inner()
227            .slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
228        BooleanBuffer::new(buffer, bit_offset, len)
229    }
230}