vortex_zigzag/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_array::stats::{ArrayStats, StatsSetRef};
5use vortex_array::vtable::{
6    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityChild,
7    ValidityVTableFromChild,
8};
9use vortex_array::{
10    Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, ToCanonical, vtable,
11};
12use vortex_dtype::{DType, PType, match_each_unsigned_integer_ptype};
13use vortex_error::{VortexResult, vortex_bail, vortex_err};
14use vortex_scalar::{PrimitiveScalar, Scalar};
15use zigzag::ZigZag as ExternalZigZag;
16
17use crate::compute::ZigZagEncoded;
18use crate::zigzag_decode;
19
20vtable!(ZigZag);
21
22impl VTable for ZigZagVTable {
23    type Array = ZigZagArray;
24    type Encoding = ZigZagEncoding;
25
26    type ArrayVTable = Self;
27    type CanonicalVTable = Self;
28    type OperationsVTable = Self;
29    type ValidityVTable = ValidityVTableFromChild;
30    type VisitorVTable = Self;
31    type ComputeVTable = NotSupported;
32    type EncodeVTable = Self;
33    type SerdeVTable = Self;
34
35    fn id(_encoding: &Self::Encoding) -> EncodingId {
36        EncodingId::new_ref("vortex.zigzag")
37    }
38
39    fn encoding(_array: &Self::Array) -> EncodingRef {
40        EncodingRef::new_ref(ZigZagEncoding.as_ref())
41    }
42}
43
44#[derive(Clone, Debug)]
45pub struct ZigZagArray {
46    dtype: DType,
47    encoded: ArrayRef,
48    stats_set: ArrayStats,
49}
50
51#[derive(Clone, Debug)]
52pub struct ZigZagEncoding;
53
54impl ZigZagArray {
55    pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
56        let encoded_dtype = encoded.dtype().clone();
57        if !encoded_dtype.is_unsigned_int() {
58            vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype);
59        }
60
61        let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed())
62            .with_nullability(encoded_dtype.nullability());
63
64        Ok(Self {
65            dtype,
66            encoded,
67            stats_set: Default::default(),
68        })
69    }
70
71    pub fn ptype(&self) -> PType {
72        self.dtype().as_ptype()
73    }
74
75    pub fn encoded(&self) -> &ArrayRef {
76        &self.encoded
77    }
78}
79
80impl ArrayVTable<ZigZagVTable> for ZigZagVTable {
81    fn len(array: &ZigZagArray) -> usize {
82        array.encoded.len()
83    }
84
85    fn dtype(array: &ZigZagArray) -> &DType {
86        &array.dtype
87    }
88
89    fn stats(array: &ZigZagArray) -> StatsSetRef<'_> {
90        array.stats_set.to_ref(array.as_ref())
91    }
92}
93
94impl CanonicalVTable<ZigZagVTable> for ZigZagVTable {
95    fn canonicalize(array: &ZigZagArray) -> VortexResult<Canonical> {
96        zigzag_decode(array.encoded().to_primitive()?).map(Canonical::Primitive)
97    }
98}
99
100impl OperationsVTable<ZigZagVTable> for ZigZagVTable {
101    fn slice(array: &ZigZagArray, start: usize, stop: usize) -> VortexResult<ArrayRef> {
102        Ok(ZigZagArray::try_new(array.encoded().slice(start, stop)?)?.into_array())
103    }
104
105    fn scalar_at(array: &ZigZagArray, index: usize) -> VortexResult<Scalar> {
106        let scalar = array.encoded().scalar_at(index)?;
107        if scalar.is_null() {
108            return Ok(scalar.reinterpret_cast(array.ptype()));
109        }
110
111        let pscalar = PrimitiveScalar::try_from(&scalar)?;
112        match_each_unsigned_integer_ptype!(pscalar.ptype(), |P| {
113            Ok(Scalar::primitive(
114                <<P as ZigZagEncoded>::Int>::decode(pscalar.typed_value::<P>().ok_or_else(
115                    || {
116                        vortex_err!(
117                            "Cannot decode provided scalar: expected {}, got ptype {}",
118                            std::any::type_name::<P>(),
119                            pscalar.ptype()
120                        )
121                    },
122                )?),
123                array.dtype().nullability(),
124            ))
125        })
126    }
127}
128
129impl ValidityChild<ZigZagVTable> for ZigZagVTable {
130    fn validity_child(array: &ZigZagArray) -> &dyn Array {
131        array.encoded()
132    }
133}
134
135#[cfg(test)]
136mod test {
137    use vortex_array::IntoArray;
138    use vortex_buffer::buffer;
139    use vortex_scalar::Scalar;
140
141    use super::*;
142
143    #[test]
144    fn test_compute_statistics() {
145        let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_array();
146        let canonical = array.to_canonical().unwrap();
147        let zigzag = ZigZagEncoding.encode(&canonical, None).unwrap().unwrap();
148
149        assert_eq!(
150            zigzag.statistics().compute_max::<i32>(),
151            array.statistics().compute_max::<i32>()
152        );
153        assert_eq!(
154            zigzag.statistics().compute_null_count(),
155            array.statistics().compute_null_count()
156        );
157        assert_eq!(
158            zigzag.statistics().compute_is_constant(),
159            array.statistics().compute_is_constant()
160        );
161
162        let sliced = zigzag.slice(0, 2).unwrap();
163        let sliced = sliced.as_::<ZigZagVTable>();
164        assert_eq!(
165            sliced.scalar_at(sliced.len() - 1).unwrap(),
166            Scalar::from(-5i32)
167        );
168
169        assert_eq!(
170            sliced.statistics().compute_min::<i32>(),
171            array.statistics().compute_min::<i32>()
172        );
173        assert_eq!(
174            sliced.statistics().compute_null_count(),
175            array.statistics().compute_null_count()
176        );
177        assert_eq!(
178            sliced.statistics().compute_is_constant(),
179            array.statistics().compute_is_constant()
180        );
181    }
182}