vortex_bytebool/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::ops::Range;
6
7use arrow_buffer::BooleanBuffer;
8use vortex_array::arrays::BoolArray;
9use vortex_array::stats::{ArrayStats, StatsSetRef};
10use vortex_array::validity::Validity;
11use vortex_array::vtable::{
12    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityHelper,
13    ValidityVTableFromValidityHelper,
14};
15use vortex_array::{ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, vtable};
16use vortex_buffer::ByteBuffer;
17use vortex_dtype::DType;
18use vortex_error::vortex_panic;
19use vortex_scalar::Scalar;
20
21vtable!(ByteBool);
22
23impl VTable for ByteBoolVTable {
24    type Array = ByteBoolArray;
25    type Encoding = ByteBoolEncoding;
26
27    type ArrayVTable = Self;
28    type CanonicalVTable = Self;
29    type OperationsVTable = Self;
30    type ValidityVTable = ValidityVTableFromValidityHelper;
31    type VisitorVTable = Self;
32    type ComputeVTable = NotSupported;
33    type EncodeVTable = NotSupported;
34    type SerdeVTable = Self;
35    type PipelineVTable = NotSupported;
36
37    fn id(_encoding: &Self::Encoding) -> EncodingId {
38        EncodingId::new_ref("vortex.bytebool")
39    }
40
41    fn encoding(_array: &Self::Array) -> EncodingRef {
42        EncodingRef::new_ref(ByteBoolEncoding.as_ref())
43    }
44}
45
46#[derive(Clone, Debug)]
47pub struct ByteBoolArray {
48    dtype: DType,
49    buffer: ByteBuffer,
50    validity: Validity,
51    stats_set: ArrayStats,
52}
53
54#[derive(Clone, Debug)]
55pub struct ByteBoolEncoding;
56
57impl ByteBoolArray {
58    pub fn new(buffer: ByteBuffer, validity: Validity) -> Self {
59        let length = buffer.len();
60        if let Some(vlen) = validity.maybe_len()
61            && length != vlen
62        {
63            vortex_panic!(
64                "Buffer length ({}) does not match validity length ({})",
65                length,
66                vlen
67            );
68        }
69        Self {
70            dtype: DType::Bool(validity.nullability()),
71            buffer,
72            validity,
73            stats_set: Default::default(),
74        }
75    }
76
77    // TODO(ngates): deprecate construction from vec
78    pub fn from_vec<V: Into<Validity>>(data: Vec<bool>, validity: V) -> Self {
79        let validity = validity.into();
80        // SAFETY: we are transmuting a Vec<bool> into a Vec<u8>
81        let data: Vec<u8> = unsafe { std::mem::transmute(data) };
82        Self::new(ByteBuffer::from(data), validity)
83    }
84
85    pub fn buffer(&self) -> &ByteBuffer {
86        &self.buffer
87    }
88
89    pub fn as_slice(&self) -> &[bool] {
90        // Safety: The internal buffer contains byte-sized bools
91        unsafe { std::mem::transmute(self.buffer().as_slice()) }
92    }
93}
94
95impl ValidityHelper for ByteBoolArray {
96    fn validity(&self) -> &Validity {
97        &self.validity
98    }
99}
100
101impl ArrayVTable<ByteBoolVTable> for ByteBoolVTable {
102    fn len(array: &ByteBoolArray) -> usize {
103        array.buffer.len()
104    }
105
106    fn dtype(array: &ByteBoolArray) -> &DType {
107        &array.dtype
108    }
109
110    fn stats(array: &ByteBoolArray) -> StatsSetRef<'_> {
111        array.stats_set.to_ref(array.as_ref())
112    }
113}
114
115impl CanonicalVTable<ByteBoolVTable> for ByteBoolVTable {
116    fn canonicalize(array: &ByteBoolArray) -> Canonical {
117        let boolean_buffer = BooleanBuffer::from(array.as_slice());
118        let validity = array.validity().clone();
119        Canonical::Bool(BoolArray::new(boolean_buffer, validity))
120    }
121}
122
123impl OperationsVTable<ByteBoolVTable> for ByteBoolVTable {
124    fn slice(array: &ByteBoolArray, range: Range<usize>) -> ArrayRef {
125        ByteBoolArray::new(
126            array.buffer().slice(range.clone()),
127            array.validity().slice(range),
128        )
129        .into_array()
130    }
131
132    fn scalar_at(array: &ByteBoolArray, index: usize) -> Scalar {
133        Scalar::bool(array.buffer()[index] == 1, array.dtype().nullability())
134    }
135}
136
137impl From<Vec<bool>> for ByteBoolArray {
138    fn from(value: Vec<bool>) -> Self {
139        Self::from_vec(value, Validity::AllValid)
140    }
141}
142
143impl From<Vec<Option<bool>>> for ByteBoolArray {
144    fn from(value: Vec<Option<bool>>) -> Self {
145        let validity = Validity::from_iter(value.iter().map(|v| v.is_some()));
146
147        // This doesn't reallocate, and the compiler even vectorizes it
148        let data = value.into_iter().map(Option::unwrap_or_default).collect();
149
150        Self::from_vec(data, validity)
151    }
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157
158    // #[cfg_attr(miri, ignore)]
159    // #[test]
160    // fn test_bytebool_metadata() {
161    //     check_metadata(
162    //         "bytebool.metadata",
163    //         SerdeMetadata(ByteBoolMetadata {
164    //             validity: ValidityMetadata::AllValid,
165    //         }),
166    //     );
167    // }
168
169    #[test]
170    fn test_validity_construction() {
171        let v = vec![true, false];
172        let v_len = v.len();
173
174        let arr = ByteBoolArray::from(v);
175        assert_eq!(v_len, arr.len());
176
177        for idx in 0..arr.len() {
178            assert!(arr.is_valid(idx));
179        }
180
181        let v = vec![Some(true), None, Some(false)];
182        let arr = ByteBoolArray::from(v);
183        assert!(arr.is_valid(0));
184        assert!(!arr.is_valid(1));
185        assert!(arr.is_valid(2));
186        assert_eq!(arr.len(), 3);
187
188        let v: Vec<Option<bool>> = vec![None, None];
189        let v_len = v.len();
190
191        let arr = ByteBoolArray::from(v);
192        assert_eq!(v_len, arr.len());
193
194        for idx in 0..arr.len() {
195            assert!(!arr.is_valid(idx));
196        }
197        assert_eq!(arr.len(), 2);
198    }
199}