vortex_bytebool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use arrow_buffer::BooleanBuffer;
7use vortex_array::arrays::BoolArray;
8use vortex_array::stats::{ArrayStats, StatsSetRef};
9use vortex_array::validity::Validity;
10use vortex_array::vtable::{
11    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityHelper,
12    ValidityVTableFromValidityHelper,
13};
14use vortex_array::{ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
15use vortex_buffer::ByteBuffer;
16use vortex_dtype::DType;
17use vortex_error::{VortexResult, vortex_panic};
18use vortex_scalar::Scalar;
19
20vtable!(ByteBool);
21
22impl VTable for ByteBoolVTable {
23    type Array = ByteBoolArray;
24    type Encoding = ByteBoolEncoding;
25
26    type ArrayVTable = Self;
27    type CanonicalVTable = Self;
28    type OperationsVTable = Self;
29    type ValidityVTable = ValidityVTableFromValidityHelper;
30    type VisitorVTable = Self;
31    type ComputeVTable = NotSupported;
32    type EncodeVTable = NotSupported;
33    type SerdeVTable = Self;
34    type PipelineVTable = NotSupported;
35
36    fn id(_encoding: &Self::Encoding) -> EncodingId {
37        EncodingId::new_ref("vortex.bytebool")
38    }
39
40    fn encoding(_array: &Self::Array) -> EncodingRef {
41        EncodingRef::new_ref(ByteBoolEncoding.as_ref())
42    }
43}
44
45#[derive(Clone, Debug)]
46pub struct ByteBoolArray {
47    dtype: DType,
48    buffer: ByteBuffer,
49    validity: Validity,
50    stats_set: ArrayStats,
51}
52
53#[derive(Clone, Debug)]
54pub struct ByteBoolEncoding;
55
56impl ByteBoolArray {
57    pub fn new(buffer: ByteBuffer, validity: Validity) -> Self {
58        let length = buffer.len();
59        if let Some(vlen) = validity.maybe_len()
60            && length != vlen
61        {
62            vortex_panic!(
63                "Buffer length ({}) does not match validity length ({})",
64                length,
65                vlen
66            );
67        }
68        Self {
69            dtype: DType::Bool(validity.nullability()),
70            buffer,
71            validity,
72            stats_set: Default::default(),
73        }
74    }
75
76    // TODO(ngates): deprecate construction from vec
77    pub fn from_vec<V: Into<Validity>>(data: Vec<bool>, validity: V) -> Self {
78        let validity = validity.into();
79        // SAFETY: we are transmuting a Vec<bool> into a Vec<u8>
80        let data: Vec<u8> = unsafe { std::mem::transmute(data) };
81        Self::new(ByteBuffer::from(data), validity)
82    }
83
84    pub fn buffer(&self) -> &ByteBuffer {
85        &self.buffer
86    }
87
88    pub fn as_slice(&self) -> &[bool] {
89        // Safety: The internal buffer contains byte-sized bools
90        unsafe { std::mem::transmute(self.buffer().as_slice()) }
91    }
92}
93
94impl ValidityHelper for ByteBoolArray {
95    fn validity(&self) -> &Validity {
96        &self.validity
97    }
98}
99
100impl ArrayVTable<ByteBoolVTable> for ByteBoolVTable {
101    fn len(array: &ByteBoolArray) -> usize {
102        array.buffer.len()
103    }
104
105    fn dtype(array: &ByteBoolArray) -> &DType {
106        &array.dtype
107    }
108
109    fn stats(array: &ByteBoolArray) -> StatsSetRef<'_> {
110        array.stats_set.to_ref(array.as_ref())
111    }
112}
113
114impl CanonicalVTable<ByteBoolVTable> for ByteBoolVTable {
115    fn canonicalize(array: &ByteBoolArray) -> VortexResult<Canonical> {
116        let boolean_buffer = BooleanBuffer::from(array.as_slice());
117        let validity = array.validity().clone();
118        Ok(Canonical::Bool(BoolArray::new(boolean_buffer, validity)))
119    }
120}
121
122impl OperationsVTable<ByteBoolVTable> for ByteBoolVTable {
123    fn slice(array: &ByteBoolArray, start: usize, stop: usize) -> ArrayRef {
124        ByteBoolArray::new(
125            array.buffer().slice(start..stop),
126            array.validity().slice(start, stop),
127        )
128        .into_array()
129    }
130
131    fn scalar_at(array: &ByteBoolArray, index: usize) -> Scalar {
132        Scalar::bool(array.buffer()[index] == 1, array.dtype().nullability())
133    }
134}
135
136impl From<Vec<bool>> for ByteBoolArray {
137    fn from(value: Vec<bool>) -> Self {
138        Self::from_vec(value, Validity::AllValid)
139    }
140}
141
142impl From<Vec<Option<bool>>> for ByteBoolArray {
143    fn from(value: Vec<Option<bool>>) -> Self {
144        let validity = Validity::from_iter(value.iter().map(|v| v.is_some()));
145
146        // This doesn't reallocate, and the compiler even vectorizes it
147        let data = value.into_iter().map(Option::unwrap_or_default).collect();
148
149        Self::from_vec(data, validity)
150    }
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    // #[cfg_attr(miri, ignore)]
158    // #[test]
159    // fn test_bytebool_metadata() {
160    //     check_metadata(
161    //         "bytebool.metadata",
162    //         SerdeMetadata(ByteBoolMetadata {
163    //             validity: ValidityMetadata::AllValid,
164    //         }),
165    //     );
166    // }
167
168    #[test]
169    fn test_validity_construction() {
170        let v = vec![true, false];
171        let v_len = v.len();
172
173        let arr = ByteBoolArray::from(v);
174        assert_eq!(v_len, arr.len());
175
176        for idx in 0..arr.len() {
177            assert!(arr.is_valid(idx).unwrap());
178        }
179
180        let v = vec![Some(true), None, Some(false)];
181        let arr = ByteBoolArray::from(v);
182        assert!(arr.is_valid(0).unwrap());
183        assert!(!arr.is_valid(1).unwrap());
184        assert!(arr.is_valid(2).unwrap());
185        assert_eq!(arr.len(), 3);
186
187        let v: Vec<Option<bool>> = vec![None, None];
188        let v_len = v.len();
189
190        let arr = ByteBoolArray::from(v);
191        assert_eq!(v_len, arr.len());
192
193        for idx in 0..arr.len() {
194            assert!(!arr.is_valid(idx).unwrap());
195        }
196        assert_eq!(arr.len(), 2);
197    }
198}