vortex_roaring/integer/
mod.rs

1use std::fmt::{Debug, Display};
2use std::sync::Arc;
3
4pub use compress::*;
5use croaring::{Bitmap, Portable};
6use serde::{Deserialize, Serialize};
7use vortex_array::array::PrimitiveArray;
8use vortex_array::compute::try_cast;
9use vortex_array::encoding::ids;
10use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
11use vortex_array::validity::{LogicalValidity, Validity, ValidityVTable};
12use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable};
13use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
14use vortex_array::{
15    impl_encoding, ArrayDType as _, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
16    IntoCanonical,
17};
18use vortex_buffer::Buffer;
19use vortex_dtype::Nullability::NonNullable;
20use vortex_dtype::{DType, PType};
21use vortex_error::{vortex_bail, VortexExpect as _, VortexResult};
22
23mod compress;
24mod compute;
25
26impl_encoding!("vortex.roaring_int", ids::ROARING_INT, RoaringInt);
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct RoaringIntMetadata {
30    ptype: PType,
31}
32
33impl Display for RoaringIntMetadata {
34    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35        Debug::fmt(self, f)
36    }
37}
38
39impl RoaringIntArray {
40    pub fn try_new(bitmap: Bitmap, ptype: PType) -> VortexResult<Self> {
41        if !ptype.is_unsigned_int() {
42            vortex_bail!(MismatchedTypes: "unsigned int", ptype);
43        }
44
45        let length = bitmap.statistics().cardinality as usize;
46        let max = bitmap.maximum();
47        if max
48            .map(|mv| mv as u64 > ptype.max_value_as_u64())
49            .unwrap_or(false)
50        {
51            vortex_bail!(
52                "Bitmap's maximum value ({}) is greater than the maximum value for the primitive type ({})",
53                max.vortex_expect("Bitmap has no maximum value despite having just checked"),
54                ptype
55            );
56        }
57
58        let mut stats = StatsSet::default();
59        stats.set(Stat::NullCount, 0);
60        stats.set(Stat::Max, max);
61        stats.set(Stat::Min, bitmap.minimum());
62        stats.set(Stat::IsConstant, length <= 1);
63        stats.set(Stat::IsSorted, true);
64        stats.set(Stat::IsStrictSorted, true);
65
66        ArrayData::try_new_owned(
67            &RoaringIntEncoding,
68            DType::Primitive(ptype, NonNullable),
69            length,
70            Arc::new(RoaringIntMetadata { ptype }),
71            Some(Buffer::from(bitmap.serialize::<Portable>())),
72            vec![].into(),
73            stats,
74        )?
75        .try_into()
76    }
77
78    pub fn owned_bitmap(&self) -> Bitmap {
79        Bitmap::deserialize::<Portable>(
80            self.as_ref()
81                .buffer()
82                .vortex_expect("RoaringBoolArray buffer is missing")
83                .as_ref(),
84        )
85    }
86
87    pub fn cached_ptype(&self) -> PType {
88        self.metadata().ptype
89    }
90
91    pub fn encode(array: ArrayData) -> VortexResult<ArrayData> {
92        if let Ok(parray) = PrimitiveArray::try_from(array) {
93            Ok(roaring_int_encode(parray)?.into_array())
94        } else {
95            vortex_bail!("RoaringInt can only encode primitive arrays")
96        }
97    }
98}
99
100impl ArrayTrait for RoaringIntArray {}
101
102impl VariantsVTable<RoaringIntArray> for RoaringIntEncoding {
103    fn as_primitive_array<'a>(
104        &self,
105        array: &'a RoaringIntArray,
106    ) -> Option<&'a dyn PrimitiveArrayTrait> {
107        Some(array)
108    }
109}
110
111impl PrimitiveArrayTrait for RoaringIntArray {}
112
113impl ValidityVTable<RoaringIntArray> for RoaringIntEncoding {
114    fn is_valid(&self, _array: &RoaringIntArray, _index: usize) -> bool {
115        true
116    }
117
118    fn logical_validity(&self, array: &RoaringIntArray) -> LogicalValidity {
119        LogicalValidity::AllValid(array.len())
120    }
121}
122
123impl IntoCanonical for RoaringIntArray {
124    fn into_canonical(self) -> VortexResult<Canonical> {
125        try_cast(
126            PrimitiveArray::from_vec(self.owned_bitmap().to_vec(), Validity::NonNullable),
127            self.dtype(),
128        )
129        .and_then(ArrayData::into_canonical)
130    }
131}
132
133impl VisitorVTable<RoaringIntArray> for RoaringIntEncoding {
134    fn accept(&self, array: &RoaringIntArray, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
135        visitor.visit_buffer(
136            array
137                .as_ref()
138                .buffer()
139                .vortex_expect("Missing buffer in RoaringIntArray"),
140        )
141    }
142}
143
144impl StatisticsVTable<RoaringIntArray> for RoaringIntEncoding {
145    fn compute_statistics(&self, array: &RoaringIntArray, stat: Stat) -> VortexResult<StatsSet> {
146        // possibly faster to write an accumulator over the iterator, though not necessarily
147        if stat == Stat::TrailingZeroFreq || stat == Stat::BitWidthFreq || stat == Stat::RunCount {
148            let primitive =
149                PrimitiveArray::from_vec(array.owned_bitmap().to_vec(), Validity::NonNullable);
150            primitive.statistics().compute_all(&[
151                Stat::TrailingZeroFreq,
152                Stat::BitWidthFreq,
153                Stat::RunCount,
154            ])
155        } else {
156            Ok(StatsSet::default())
157        }
158    }
159}