vortex_bytebool/
array.rs

1use std::fmt::Debug;
2
3use arrow_buffer::BooleanBuffer;
4use vortex_array::arrays::BoolArray;
5use vortex_array::stats::{ArrayStats, StatsSetRef};
6use vortex_array::validity::Validity;
7use vortex_array::vtable::{
8    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityHelper,
9    ValidityVTableFromValidityHelper,
10};
11use vortex_array::{ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
12use vortex_buffer::ByteBuffer;
13use vortex_dtype::DType;
14use vortex_error::{VortexResult, vortex_panic};
15use vortex_scalar::Scalar;
16
17vtable!(ByteBool);
18
19impl VTable for ByteBoolVTable {
20    type Array = ByteBoolArray;
21    type Encoding = ByteBoolEncoding;
22
23    type ArrayVTable = Self;
24    type CanonicalVTable = Self;
25    type OperationsVTable = Self;
26    type ValidityVTable = ValidityVTableFromValidityHelper;
27    type VisitorVTable = Self;
28    type ComputeVTable = NotSupported;
29    type EncodeVTable = NotSupported;
30    type SerdeVTable = Self;
31
32    fn id(_encoding: &Self::Encoding) -> EncodingId {
33        EncodingId::new_ref("vortex.bytebool")
34    }
35
36    fn encoding(_array: &Self::Array) -> EncodingRef {
37        EncodingRef::new_ref(ByteBoolEncoding.as_ref())
38    }
39}
40
41#[derive(Clone, Debug)]
42pub struct ByteBoolArray {
43    dtype: DType,
44    buffer: ByteBuffer,
45    validity: Validity,
46    stats_set: ArrayStats,
47}
48
49#[derive(Clone, Debug)]
50pub struct ByteBoolEncoding;
51
52impl ByteBoolArray {
53    pub fn new(buffer: ByteBuffer, validity: Validity) -> Self {
54        let length = buffer.len();
55        if let Some(vlen) = validity.maybe_len() {
56            if length != vlen {
57                vortex_panic!(
58                    "Buffer length ({}) does not match validity length ({})",
59                    length,
60                    vlen
61                );
62            }
63        }
64        Self {
65            dtype: DType::Bool(validity.nullability()),
66            buffer,
67            validity,
68            stats_set: Default::default(),
69        }
70    }
71
72    // TODO(ngates): deprecate construction from vec
73    pub fn from_vec<V: Into<Validity>>(data: Vec<bool>, validity: V) -> Self {
74        let validity = validity.into();
75        // SAFETY: we are transmuting a Vec<bool> into a Vec<u8>
76        let data: Vec<u8> = unsafe { std::mem::transmute(data) };
77        Self::new(ByteBuffer::from(data), validity)
78    }
79
80    pub fn buffer(&self) -> &ByteBuffer {
81        &self.buffer
82    }
83
84    pub fn as_slice(&self) -> &[bool] {
85        // Safety: The internal buffer contains byte-sized bools
86        unsafe { std::mem::transmute(self.buffer().as_slice()) }
87    }
88}
89
90impl ValidityHelper for ByteBoolArray {
91    fn validity(&self) -> &Validity {
92        &self.validity
93    }
94}
95
96impl ArrayVTable<ByteBoolVTable> for ByteBoolVTable {
97    fn len(array: &ByteBoolArray) -> usize {
98        array.buffer.len()
99    }
100
101    fn dtype(array: &ByteBoolArray) -> &DType {
102        &array.dtype
103    }
104
105    fn stats(array: &ByteBoolArray) -> StatsSetRef<'_> {
106        array.stats_set.to_ref(array.as_ref())
107    }
108}
109
110impl CanonicalVTable<ByteBoolVTable> for ByteBoolVTable {
111    fn canonicalize(array: &ByteBoolArray) -> VortexResult<Canonical> {
112        let boolean_buffer = BooleanBuffer::from(array.as_slice());
113        let validity = array.validity().clone();
114        Ok(Canonical::Bool(BoolArray::new(boolean_buffer, validity)))
115    }
116}
117
118impl OperationsVTable<ByteBoolVTable> for ByteBoolVTable {
119    fn slice(array: &ByteBoolArray, start: usize, stop: usize) -> VortexResult<ArrayRef> {
120        Ok(ByteBoolArray::new(
121            array.buffer().slice(start..stop),
122            array.validity().slice(start, stop)?,
123        )
124        .into_array())
125    }
126
127    fn scalar_at(array: &ByteBoolArray, index: usize) -> VortexResult<Scalar> {
128        Ok(Scalar::bool(
129            array.buffer()[index] == 1,
130            array.dtype().nullability(),
131        ))
132    }
133}
134
135impl From<Vec<bool>> for ByteBoolArray {
136    fn from(value: Vec<bool>) -> Self {
137        Self::from_vec(value, Validity::AllValid)
138    }
139}
140
141impl From<Vec<Option<bool>>> for ByteBoolArray {
142    fn from(value: Vec<Option<bool>>) -> Self {
143        let validity = Validity::from_iter(value.iter().map(|v| v.is_some()));
144
145        // This doesn't reallocate, and the compiler even vectorizes it
146        let data = value.into_iter().map(Option::unwrap_or_default).collect();
147
148        Self::from_vec(data, validity)
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155
156    // #[cfg_attr(miri, ignore)]
157    // #[test]
158    // fn test_bytebool_metadata() {
159    //     check_metadata(
160    //         "bytebool.metadata",
161    //         SerdeMetadata(ByteBoolMetadata {
162    //             validity: ValidityMetadata::AllValid,
163    //         }),
164    //     );
165    // }
166
167    #[test]
168    fn test_validity_construction() {
169        let v = vec![true, false];
170        let v_len = v.len();
171
172        let arr = ByteBoolArray::from(v);
173        assert_eq!(v_len, arr.len());
174
175        for idx in 0..arr.len() {
176            assert!(arr.is_valid(idx).unwrap());
177        }
178
179        let v = vec![Some(true), None, Some(false)];
180        let arr = ByteBoolArray::from(v);
181        assert!(arr.is_valid(0).unwrap());
182        assert!(!arr.is_valid(1).unwrap());
183        assert!(arr.is_valid(2).unwrap());
184        assert_eq!(arr.len(), 3);
185
186        let v: Vec<Option<bool>> = vec![None, None];
187        let v_len = v.len();
188
189        let arr = ByteBoolArray::from(v);
190        assert_eq!(v_len, arr.len());
191
192        for idx in 0..arr.len() {
193            assert!(!arr.is_valid(idx).unwrap());
194        }
195        assert_eq!(arr.len(), 2);
196    }
197}