vortex_array/arrays/bool/
array.rs

1use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
2use vortex_dtype::DType;
3use vortex_error::{VortexResult, vortex_panic};
4use vortex_mask::Mask;
5
6use crate::array::{Array, ArrayCanonicalImpl, ArrayValidityImpl, ArrayVariantsImpl};
7use crate::arrays::bool;
8use crate::arrays::bool::serde::BoolMetadata;
9use crate::builders::ArrayBuilder;
10use crate::stats::{ArrayStats, StatsSetRef};
11use crate::validity::Validity;
12use crate::variants::BoolArrayTrait;
13use crate::vtable::{EncodingVTable, VTableRef};
14use crate::{ArrayImpl, ArrayStatisticsImpl, Canonical, Encoding, EncodingId, RkyvMetadata};
15
16#[derive(Clone, Debug)]
17pub struct BoolArray {
18    dtype: DType,
19    buffer: BooleanBuffer,
20    pub(crate) validity: Validity,
21    // TODO(ngates): do we want a stats set to be shared across all arrays?
22    pub(crate) stats_set: ArrayStats,
23}
24
25pub struct BoolEncoding;
26impl Encoding for BoolEncoding {
27    type Array = BoolArray;
28    type Metadata = RkyvMetadata<BoolMetadata>;
29}
30
31impl EncodingVTable for BoolEncoding {
32    fn id(&self) -> EncodingId {
33        EncodingId::new_ref("vortex.bool")
34    }
35}
36
37impl BoolArray {
38    /// Creates a new [`BoolArray`] from a [`BooleanBuffer`] and [`Validity`], without checking
39    /// any invariants.
40    pub fn new(buffer: BooleanBuffer, validity: Validity) -> Self {
41        if let Some(len) = validity.maybe_len() {
42            if buffer.len() != len {
43                vortex_panic!(
44                    "Buffer and validity length mismatch: buffer={}, validity={}",
45                    buffer.len(),
46                    len
47                );
48            }
49        }
50
51        // Shrink the buffer to remove any whole bytes.
52        let buffer = buffer.shrink_offset();
53
54        Self {
55            dtype: DType::Bool(validity.nullability()),
56            buffer,
57            validity,
58            stats_set: ArrayStats::default(),
59        }
60    }
61
62    /// Returns the underlying [`BooleanBuffer`] of the array.
63    pub fn boolean_buffer(&self) -> &BooleanBuffer {
64        assert!(
65            self.buffer.offset() < 8,
66            "Offset must be <8, did we forget to call shrink_offset? Found {}",
67            self.buffer.offset()
68        );
69        &self.buffer
70    }
71
72    /// Returns the underlying [`Validity`] of the array.
73    pub fn validity(&self) -> &Validity {
74        &self.validity
75    }
76
77    /// Get a mutable version of this array.
78    ///
79    /// If the caller holds the only reference to the underlying buffer the underlying buffer is returned
80    /// otherwise a copy is created.
81    ///
82    /// The second value of the tuple is a bit_offset of first value in first byte of the returned builder
83    pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
84        let offset = self.buffer.offset();
85        let len = self.buffer.len();
86        let arrow_buffer = self.buffer.into_inner();
87        let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
88            arrow_buffer.into_mutable().unwrap_or_else(|b| {
89                let mut buf = MutableBuffer::with_capacity(b.len());
90                buf.extend_from_slice(b.as_slice());
91                buf
92            })
93        } else {
94            let mut buf = MutableBuffer::with_capacity(arrow_buffer.len());
95            buf.extend_from_slice(arrow_buffer.as_slice());
96            buf
97        };
98
99        (
100            BooleanBufferBuilder::new_from_buffer(mutable_buf, offset + len),
101            offset,
102        )
103    }
104}
105
106impl ArrayImpl for BoolArray {
107    type Encoding = BoolEncoding;
108
109    #[inline]
110    fn _len(&self) -> usize {
111        self.buffer.len()
112    }
113
114    #[inline]
115    fn _dtype(&self) -> &DType {
116        &self.dtype
117    }
118
119    #[inline]
120    fn _vtable(&self) -> VTableRef {
121        VTableRef::new_ref(&BoolEncoding)
122    }
123}
124
125impl ArrayCanonicalImpl for BoolArray {
126    #[inline]
127    fn _to_canonical(&self) -> VortexResult<Canonical> {
128        Ok(Canonical::Bool(self.clone()))
129    }
130
131    #[inline]
132    fn _append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
133        builder.extend_from_array(self)
134    }
135}
136
137impl ArrayStatisticsImpl for BoolArray {
138    fn _stats_ref(&self) -> StatsSetRef<'_> {
139        self.stats_set.to_ref(self)
140    }
141}
142
143impl ArrayValidityImpl for BoolArray {
144    #[inline]
145    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
146        self.validity.is_valid(index)
147    }
148
149    #[inline]
150    fn _all_valid(&self) -> VortexResult<bool> {
151        self.validity.all_valid()
152    }
153
154    #[inline]
155    fn _all_invalid(&self) -> VortexResult<bool> {
156        self.validity.all_invalid()
157    }
158
159    #[inline]
160    fn _validity_mask(&self) -> VortexResult<Mask> {
161        self.validity.to_logical(self.len())
162    }
163}
164
165impl ArrayVariantsImpl for BoolArray {
166    fn _as_bool_typed(&self) -> Option<&dyn BoolArrayTrait> {
167        Some(self)
168    }
169}
170
171impl BoolArrayTrait for BoolArray {}
172
173pub trait BooleanBufferExt {
174    /// Slice any full bytes from the buffer, leaving the offset < 8.
175    fn shrink_offset(self) -> Self;
176}
177
178impl BooleanBufferExt for BooleanBuffer {
179    fn shrink_offset(self) -> Self {
180        let byte_offset = self.offset() / 8;
181        let bit_offset = self.offset() % 8;
182        let len = self.len();
183        let buffer = self.into_inner().slice(byte_offset);
184        BooleanBuffer::new(buffer, bit_offset, len)
185    }
186}