vortex_bytebool/
array.rs

1use std::fmt::Debug;
2
3use arrow_buffer::BooleanBuffer;
4use vortex_array::arrays::BoolArray;
5use vortex_array::stats::{ArrayStats, StatsSetRef};
6use vortex_array::validity::Validity;
7use vortex_array::variants::BoolArrayTrait;
8use vortex_array::vtable::VTableRef;
9use vortex_array::{
10    Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
11    ArrayVariantsImpl, Canonical, EmptyMetadata, Encoding, try_from_array_ref,
12};
13use vortex_buffer::ByteBuffer;
14use vortex_dtype::DType;
15use vortex_error::{VortexResult, vortex_panic};
16use vortex_mask::Mask;
17
18#[derive(Clone, Debug)]
19pub struct ByteBoolArray {
20    dtype: DType,
21    buffer: ByteBuffer,
22    validity: Validity,
23    stats_set: ArrayStats,
24}
25
26try_from_array_ref!(ByteBoolArray);
27
28#[derive(Debug)]
29pub struct ByteBoolEncoding;
30impl Encoding for ByteBoolEncoding {
31    type Array = ByteBoolArray;
32    type Metadata = EmptyMetadata;
33}
34
35impl ByteBoolArray {
36    pub fn new(buffer: ByteBuffer, validity: Validity) -> Self {
37        let length = buffer.len();
38        if let Some(vlen) = validity.maybe_len() {
39            if length != vlen {
40                vortex_panic!(
41                    "Buffer length ({}) does not match validity length ({})",
42                    length,
43                    vlen
44                );
45            }
46        }
47        Self {
48            dtype: DType::Bool(validity.nullability()),
49            buffer,
50            validity,
51            stats_set: Default::default(),
52        }
53    }
54
55    // TODO(ngates): deprecate construction from vec
56    pub fn from_vec<V: Into<Validity>>(data: Vec<bool>, validity: V) -> Self {
57        let validity = validity.into();
58        // SAFETY: we are transmuting a Vec<bool> into a Vec<u8>
59        let data: Vec<u8> = unsafe { std::mem::transmute(data) };
60        Self::new(ByteBuffer::from(data), validity)
61    }
62
63    pub fn buffer(&self) -> &ByteBuffer {
64        &self.buffer
65    }
66
67    pub fn validity(&self) -> &Validity {
68        &self.validity
69    }
70
71    pub fn as_slice(&self) -> &[bool] {
72        // Safety: The internal buffer contains byte-sized bools
73        unsafe { std::mem::transmute(self.buffer().as_slice()) }
74    }
75}
76
77impl ArrayImpl for ByteBoolArray {
78    type Encoding = ByteBoolEncoding;
79
80    fn _len(&self) -> usize {
81        self.buffer.len()
82    }
83
84    fn _dtype(&self) -> &DType {
85        &self.dtype
86    }
87
88    fn _vtable(&self) -> VTableRef {
89        VTableRef::new_ref(&ByteBoolEncoding)
90    }
91
92    fn _with_children(&self, children: &[ArrayRef]) -> VortexResult<Self> {
93        let validity = if self.validity().is_array() {
94            Validity::Array(children[0].clone())
95        } else {
96            self.validity().clone()
97        };
98
99        Ok(Self::new(self.buffer().clone(), validity))
100    }
101}
102
103impl ArrayCanonicalImpl for ByteBoolArray {
104    fn _to_canonical(&self) -> VortexResult<Canonical> {
105        let boolean_buffer = BooleanBuffer::from(self.as_slice());
106        let validity = self.validity().clone();
107        Ok(Canonical::Bool(BoolArray::new(boolean_buffer, validity)))
108    }
109}
110
111impl ArrayStatisticsImpl for ByteBoolArray {
112    fn _stats_ref(&self) -> StatsSetRef<'_> {
113        self.stats_set.to_ref(self)
114    }
115}
116
117impl ArrayValidityImpl for ByteBoolArray {
118    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
119        self.validity.is_valid(index)
120    }
121
122    fn _all_valid(&self) -> VortexResult<bool> {
123        self.validity.all_valid()
124    }
125
126    fn _all_invalid(&self) -> VortexResult<bool> {
127        self.validity.all_invalid()
128    }
129
130    fn _validity_mask(&self) -> VortexResult<Mask> {
131        self.validity.to_mask(self.len())
132    }
133}
134
135impl ArrayVariantsImpl for ByteBoolArray {
136    fn _as_bool_typed(&self) -> Option<&dyn BoolArrayTrait> {
137        Some(self)
138    }
139}
140
141impl BoolArrayTrait for ByteBoolArray {}
142
143impl From<Vec<bool>> for ByteBoolArray {
144    fn from(value: Vec<bool>) -> Self {
145        Self::from_vec(value, Validity::AllValid)
146    }
147}
148
149impl From<Vec<Option<bool>>> for ByteBoolArray {
150    fn from(value: Vec<Option<bool>>) -> Self {
151        let validity = Validity::from_iter(value.iter().map(|v| v.is_some()));
152
153        // This doesn't reallocate, and the compiler even vectorizes it
154        let data = value.into_iter().map(Option::unwrap_or_default).collect();
155
156        Self::from_vec(data, validity)
157    }
158}
159
160#[cfg(test)]
161mod tests {
162
163    use super::*;
164
165    // #[cfg_attr(miri, ignore)]
166    // #[test]
167    // fn test_bytebool_metadata() {
168    //     check_metadata(
169    //         "bytebool.metadata",
170    //         SerdeMetadata(ByteBoolMetadata {
171    //             validity: ValidityMetadata::AllValid,
172    //         }),
173    //     );
174    // }
175
176    #[test]
177    fn test_validity_construction() {
178        let v = vec![true, false];
179        let v_len = v.len();
180
181        let arr = ByteBoolArray::from(v);
182        assert_eq!(v_len, arr.len());
183
184        for idx in 0..arr.len() {
185            assert!(arr.is_valid(idx).unwrap());
186        }
187
188        let v = vec![Some(true), None, Some(false)];
189        let arr = ByteBoolArray::from(v);
190        assert!(arr.is_valid(0).unwrap());
191        assert!(!arr.is_valid(1).unwrap());
192        assert!(arr.is_valid(2).unwrap());
193        assert_eq!(arr.len(), 3);
194
195        let v: Vec<Option<bool>> = vec![None, None];
196        let v_len = v.len();
197
198        let arr = ByteBoolArray::from(v);
199        assert_eq!(v_len, arr.len());
200
201        for idx in 0..arr.len() {
202            assert!(!arr.is_valid(idx).unwrap());
203        }
204        assert_eq!(arr.len(), 2);
205    }
206}