vortex_roaring/integer/
mod.rs1use std::fmt::{Debug, Display};
2use std::sync::Arc;
3
4pub use compress::*;
5use croaring::{Bitmap, Portable};
6use serde::{Deserialize, Serialize};
7use vortex_array::array::PrimitiveArray;
8use vortex_array::compute::try_cast;
9use vortex_array::encoding::ids;
10use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
11use vortex_array::validity::{LogicalValidity, Validity, ValidityVTable};
12use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable};
13use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
14use vortex_array::{
15 impl_encoding, ArrayDType as _, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
16 IntoCanonical,
17};
18use vortex_buffer::Buffer;
19use vortex_dtype::Nullability::NonNullable;
20use vortex_dtype::{DType, PType};
21use vortex_error::{vortex_bail, VortexExpect as _, VortexResult};
22
23mod compress;
24mod compute;
25
26impl_encoding!("vortex.roaring_int", ids::ROARING_INT, RoaringInt);
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct RoaringIntMetadata {
30 ptype: PType,
31}
32
33impl Display for RoaringIntMetadata {
34 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35 Debug::fmt(self, f)
36 }
37}
38
39impl RoaringIntArray {
40 pub fn try_new(bitmap: Bitmap, ptype: PType) -> VortexResult<Self> {
41 if !ptype.is_unsigned_int() {
42 vortex_bail!(MismatchedTypes: "unsigned int", ptype);
43 }
44
45 let length = bitmap.statistics().cardinality as usize;
46 let max = bitmap.maximum();
47 if max
48 .map(|mv| mv as u64 > ptype.max_value_as_u64())
49 .unwrap_or(false)
50 {
51 vortex_bail!(
52 "Bitmap's maximum value ({}) is greater than the maximum value for the primitive type ({})",
53 max.vortex_expect("Bitmap has no maximum value despite having just checked"),
54 ptype
55 );
56 }
57
58 let mut stats = StatsSet::default();
59 stats.set(Stat::NullCount, 0);
60 stats.set(Stat::Max, max);
61 stats.set(Stat::Min, bitmap.minimum());
62 stats.set(Stat::IsConstant, length <= 1);
63 stats.set(Stat::IsSorted, true);
64 stats.set(Stat::IsStrictSorted, true);
65
66 ArrayData::try_new_owned(
67 &RoaringIntEncoding,
68 DType::Primitive(ptype, NonNullable),
69 length,
70 Arc::new(RoaringIntMetadata { ptype }),
71 Some(Buffer::from(bitmap.serialize::<Portable>())),
72 vec![].into(),
73 stats,
74 )?
75 .try_into()
76 }
77
78 pub fn owned_bitmap(&self) -> Bitmap {
79 Bitmap::deserialize::<Portable>(
80 self.as_ref()
81 .buffer()
82 .vortex_expect("RoaringBoolArray buffer is missing")
83 .as_ref(),
84 )
85 }
86
87 pub fn cached_ptype(&self) -> PType {
88 self.metadata().ptype
89 }
90
91 pub fn encode(array: ArrayData) -> VortexResult<ArrayData> {
92 if let Ok(parray) = PrimitiveArray::try_from(array) {
93 Ok(roaring_int_encode(parray)?.into_array())
94 } else {
95 vortex_bail!("RoaringInt can only encode primitive arrays")
96 }
97 }
98}
99
100impl ArrayTrait for RoaringIntArray {}
101
102impl VariantsVTable<RoaringIntArray> for RoaringIntEncoding {
103 fn as_primitive_array<'a>(
104 &self,
105 array: &'a RoaringIntArray,
106 ) -> Option<&'a dyn PrimitiveArrayTrait> {
107 Some(array)
108 }
109}
110
111impl PrimitiveArrayTrait for RoaringIntArray {}
112
113impl ValidityVTable<RoaringIntArray> for RoaringIntEncoding {
114 fn is_valid(&self, _array: &RoaringIntArray, _index: usize) -> bool {
115 true
116 }
117
118 fn logical_validity(&self, array: &RoaringIntArray) -> LogicalValidity {
119 LogicalValidity::AllValid(array.len())
120 }
121}
122
123impl IntoCanonical for RoaringIntArray {
124 fn into_canonical(self) -> VortexResult<Canonical> {
125 try_cast(
126 PrimitiveArray::from_vec(self.owned_bitmap().to_vec(), Validity::NonNullable),
127 self.dtype(),
128 )
129 .and_then(ArrayData::into_canonical)
130 }
131}
132
133impl VisitorVTable<RoaringIntArray> for RoaringIntEncoding {
134 fn accept(&self, array: &RoaringIntArray, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
135 visitor.visit_buffer(
136 array
137 .as_ref()
138 .buffer()
139 .vortex_expect("Missing buffer in RoaringIntArray"),
140 )
141 }
142}
143
144impl StatisticsVTable<RoaringIntArray> for RoaringIntEncoding {
145 fn compute_statistics(&self, array: &RoaringIntArray, stat: Stat) -> VortexResult<StatsSet> {
146 if stat == Stat::TrailingZeroFreq || stat == Stat::BitWidthFreq || stat == Stat::RunCount {
148 let primitive =
149 PrimitiveArray::from_vec(array.owned_bitmap().to_vec(), Validity::NonNullable);
150 primitive.statistics().compute_all(&[
151 Stat::TrailingZeroFreq,
152 Stat::BitWidthFreq,
153 Stat::RunCount,
154 ])
155 } else {
156 Ok(StatsSet::default())
157 }
158 }
159}