vortex_roaring/boolean/
mod.rs1use std::fmt::{Debug, Display};
2use std::sync::Arc;
3
4use arrow_buffer::{BooleanBuffer, MutableBuffer};
5pub use compress::*;
6use croaring::Native;
7pub use croaring::{Bitmap, Portable};
8use serde::{Deserialize, Serialize};
9use vortex_array::array::BoolArray;
10use vortex_array::encoding::ids;
11use vortex_array::stats::StatsSet;
12use vortex_array::validity::{LogicalValidity, ValidityVTable};
13use vortex_array::variants::{BoolArrayTrait, VariantsVTable};
14use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
15use vortex_array::{
16 impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, IntoCanonical,
17};
18use vortex_buffer::Buffer;
19use vortex_dtype::{DType, Nullability};
20use vortex_error::{vortex_bail, vortex_err, VortexExpect as _, VortexResult};
21
22mod compress;
23mod compute;
24mod stats;
25
26impl_encoding!("vortex.roaring_bool", ids::ROARING_BOOL, RoaringBool);
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct RoaringBoolMetadata;
30
31impl Display for RoaringBoolMetadata {
32 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33 Debug::fmt(self, f)
34 }
35}
36
37impl RoaringBoolArray {
38 pub fn try_new(bitmap: Bitmap, length: usize) -> VortexResult<Self> {
39 let max_set = bitmap.maximum().unwrap_or(0) as usize;
40 if length < max_set {
41 vortex_bail!(
42 "RoaringBoolArray length is less than bitmap maximum {}",
43 max_set
44 )
45 }
46
47 let stats = StatsSet::bools_with_true_and_null_count(
48 bitmap.statistics().cardinality as usize,
49 0,
50 length,
51 );
52
53 ArrayData::try_new_owned(
54 &RoaringBoolEncoding,
55 DType::Bool(Nullability::NonNullable),
56 length,
57 Arc::new(RoaringBoolMetadata),
58 Some(Buffer::from(bitmap.serialize::<Native>())),
59 vec![].into(),
60 stats,
61 )?
62 .try_into()
63 }
64
65 pub fn bitmap(&self) -> Bitmap {
66 Bitmap::deserialize::<Native>(self.buffer().as_ref())
68 }
69
70 pub fn encode(array: ArrayData) -> VortexResult<ArrayData> {
71 if let Ok(bools) = BoolArray::try_from(array) {
72 roaring_bool_encode(bools).map(|a| a.into_array())
73 } else {
74 vortex_bail!("RoaringBool can only encode boolean arrays")
75 }
76 }
77
78 pub fn buffer(&self) -> &Buffer {
79 self.as_ref()
80 .buffer()
81 .vortex_expect("Missing buffer in PrimitiveArray")
82 }
83}
84
85impl ArrayTrait for RoaringBoolArray {}
86
87impl VariantsVTable<RoaringBoolArray> for RoaringBoolEncoding {
88 fn as_bool_array<'a>(&self, array: &'a RoaringBoolArray) -> Option<&'a dyn BoolArrayTrait> {
89 Some(array)
90 }
91}
92
93impl BoolArrayTrait for RoaringBoolArray {}
94
95impl VisitorVTable<RoaringBoolArray> for RoaringBoolEncoding {
96 fn accept(&self, array: &RoaringBoolArray, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
97 visitor.visit_buffer(array.buffer())
102 }
103}
104
105impl ValidityVTable<RoaringBoolArray> for RoaringBoolEncoding {
106 fn is_valid(&self, _array: &RoaringBoolArray, _index: usize) -> bool {
107 true
108 }
109
110 fn logical_validity(&self, array: &RoaringBoolArray) -> LogicalValidity {
111 LogicalValidity::AllValid(array.len())
112 }
113}
114
115impl IntoCanonical for RoaringBoolArray {
116 fn into_canonical(self) -> VortexResult<Canonical> {
117 let bitset = self
120 .bitmap()
121 .to_bitset()
122 .ok_or_else(|| vortex_err!("Failed to convert RoaringBitmap to Bitset"))?;
123
124 let byte_length = (self.len() + 7) / 8;
125 let mut buffer = MutableBuffer::with_capacity(byte_length);
126 buffer.extend_from_slice(bitset.as_slice());
127 if byte_length > bitset.size_in_bytes() {
128 buffer.extend_zeros(byte_length - bitset.size_in_bytes());
129 }
130 Ok(Canonical::Bool(BoolArray::new(
131 BooleanBuffer::new(buffer.into(), 0, self.len()),
132 Nullability::NonNullable,
133 )))
134 }
135}
136
137#[cfg(test)]
138mod test {
139 use std::iter;
140
141 use vortex_array::array::BoolArray;
142 use vortex_array::{ArrayLen, IntoArrayData, IntoArrayVariant};
143
144 use crate::RoaringBoolArray;
145
146 #[test]
147 #[cfg_attr(miri, ignore)]
148 pub fn iter() {
149 let bool: BoolArray = BoolArray::from_iter([true, false, true, true]);
150 let array = RoaringBoolArray::encode(bool.into_array()).unwrap();
151 let round_trip = RoaringBoolArray::try_from(array).unwrap();
152 let values = round_trip.bitmap().to_vec();
153 assert_eq!(values, vec![0, 2, 3]);
154 }
155
156 #[test]
157 #[cfg_attr(miri, ignore)]
158 pub fn trailing_false() {
159 let bool: BoolArray = BoolArray::from_iter(
160 [true, true]
161 .into_iter()
162 .chain(iter::repeat(false).take(100)),
163 );
164 let array = RoaringBoolArray::encode(bool.into_array()).unwrap();
165 let round_trip = RoaringBoolArray::try_from(array).unwrap();
166 let bool_arr = round_trip.into_bool().unwrap();
167 assert_eq!(bool_arr.len(), 102);
168 }
169}