vortex_zigzag/
array.rs

1use vortex_array::stats::{ArrayStats, StatsSetRef};
2use vortex_array::vtable::{
3    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityChild,
4    ValidityVTableFromChild,
5};
6use vortex_array::{
7    Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, ToCanonical, vtable,
8};
9use vortex_dtype::{DType, PType, match_each_unsigned_integer_ptype};
10use vortex_error::{VortexResult, vortex_bail, vortex_err};
11use vortex_scalar::{PrimitiveScalar, Scalar};
12use zigzag::ZigZag as ExternalZigZag;
13
14use crate::compute::ZigZagEncoded;
15use crate::zigzag_decode;
16
17vtable!(ZigZag);
18
19impl VTable for ZigZagVTable {
20    type Array = ZigZagArray;
21    type Encoding = ZigZagEncoding;
22
23    type ArrayVTable = Self;
24    type CanonicalVTable = Self;
25    type OperationsVTable = Self;
26    type ValidityVTable = ValidityVTableFromChild;
27    type VisitorVTable = Self;
28    type ComputeVTable = NotSupported;
29    type EncodeVTable = Self;
30    type SerdeVTable = Self;
31
32    fn id(_encoding: &Self::Encoding) -> EncodingId {
33        EncodingId::new_ref("vortex.zigzag")
34    }
35
36    fn encoding(_array: &Self::Array) -> EncodingRef {
37        EncodingRef::new_ref(ZigZagEncoding.as_ref())
38    }
39}
40
41#[derive(Clone, Debug)]
42pub struct ZigZagArray {
43    dtype: DType,
44    encoded: ArrayRef,
45    stats_set: ArrayStats,
46}
47
48#[derive(Clone, Debug)]
49pub struct ZigZagEncoding;
50
51impl ZigZagArray {
52    pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
53        let encoded_dtype = encoded.dtype().clone();
54        if !encoded_dtype.is_unsigned_int() {
55            vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype);
56        }
57
58        let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed())
59            .with_nullability(encoded_dtype.nullability());
60
61        Ok(Self {
62            dtype,
63            encoded,
64            stats_set: Default::default(),
65        })
66    }
67
68    pub fn ptype(&self) -> PType {
69        self.dtype().as_ptype()
70    }
71
72    pub fn encoded(&self) -> &ArrayRef {
73        &self.encoded
74    }
75}
76
77impl ArrayVTable<ZigZagVTable> for ZigZagVTable {
78    fn len(array: &ZigZagArray) -> usize {
79        array.encoded.len()
80    }
81
82    fn dtype(array: &ZigZagArray) -> &DType {
83        &array.dtype
84    }
85
86    fn stats(array: &ZigZagArray) -> StatsSetRef<'_> {
87        array.stats_set.to_ref(array.as_ref())
88    }
89}
90
91impl CanonicalVTable<ZigZagVTable> for ZigZagVTable {
92    fn canonicalize(array: &ZigZagArray) -> VortexResult<Canonical> {
93        zigzag_decode(array.encoded().to_primitive()?).map(Canonical::Primitive)
94    }
95}
96
97impl OperationsVTable<ZigZagVTable> for ZigZagVTable {
98    fn slice(array: &ZigZagArray, start: usize, stop: usize) -> VortexResult<ArrayRef> {
99        Ok(ZigZagArray::try_new(array.encoded().slice(start, stop)?)?.into_array())
100    }
101
102    fn scalar_at(array: &ZigZagArray, index: usize) -> VortexResult<Scalar> {
103        let scalar = array.encoded().scalar_at(index)?;
104        if scalar.is_null() {
105            return Ok(scalar.reinterpret_cast(array.ptype()));
106        }
107
108        let pscalar = PrimitiveScalar::try_from(&scalar)?;
109        match_each_unsigned_integer_ptype!(pscalar.ptype(), |P| {
110            Ok(Scalar::primitive(
111                <<P as ZigZagEncoded>::Int>::decode(pscalar.typed_value::<P>().ok_or_else(
112                    || {
113                        vortex_err!(
114                            "Cannot decode provided scalar: expected {}, got ptype {}",
115                            std::any::type_name::<P>(),
116                            pscalar.ptype()
117                        )
118                    },
119                )?),
120                array.dtype().nullability(),
121            ))
122        })
123    }
124}
125
126impl ValidityChild<ZigZagVTable> for ZigZagVTable {
127    fn validity_child(array: &ZigZagArray) -> &dyn Array {
128        array.encoded()
129    }
130}
131
132#[cfg(test)]
133mod test {
134    use vortex_array::IntoArray;
135    use vortex_buffer::buffer;
136    use vortex_scalar::Scalar;
137
138    use super::*;
139
140    #[test]
141    fn test_compute_statistics() {
142        let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_array();
143        let canonical = array.to_canonical().unwrap();
144        let zigzag = ZigZagEncoding.encode(&canonical, None).unwrap().unwrap();
145
146        assert_eq!(
147            zigzag.statistics().compute_max::<i32>(),
148            array.statistics().compute_max::<i32>()
149        );
150        assert_eq!(
151            zigzag.statistics().compute_null_count(),
152            array.statistics().compute_null_count()
153        );
154        assert_eq!(
155            zigzag.statistics().compute_is_constant(),
156            array.statistics().compute_is_constant()
157        );
158
159        let sliced = zigzag.slice(0, 2).unwrap();
160        let sliced = sliced.as_::<ZigZagVTable>();
161        assert_eq!(
162            sliced.scalar_at(sliced.len() - 1).unwrap(),
163            Scalar::from(-5i32)
164        );
165
166        assert_eq!(
167            sliced.statistics().compute_min::<i32>(),
168            array.statistics().compute_min::<i32>()
169        );
170        assert_eq!(
171            sliced.statistics().compute_null_count(),
172            array.statistics().compute_null_count()
173        );
174        assert_eq!(
175            sliced.statistics().compute_is_constant(),
176            array.statistics().compute_is_constant()
177        );
178    }
179}