vortex_zigzag/
array.rs

1use vortex_array::arrays::PrimitiveArray;
2use vortex_array::stats::{ArrayStats, Precision, Stat, StatsSet, StatsSetRef};
3use vortex_array::variants::PrimitiveArrayTrait;
4use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
5use vortex_array::{
6    Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
7    ArrayVariantsImpl, Canonical, EmptyMetadata, Encoding, EncodingId, ToCanonical,
8    try_from_array_ref,
9};
10use vortex_dtype::{DType, PType};
11use vortex_error::{VortexResult, vortex_bail, vortex_err};
12use vortex_mask::Mask;
13use zigzag::ZigZag as ExternalZigZag;
14
15use crate::compress::zigzag_encode;
16use crate::zigzag_decode;
17
18#[derive(Clone, Debug)]
19pub struct ZigZagArray {
20    dtype: DType,
21    encoded: ArrayRef,
22    stats_set: ArrayStats,
23}
24
25try_from_array_ref!(ZigZagArray);
26
27pub struct ZigZagEncoding;
28impl Encoding for ZigZagEncoding {
29    type Array = ZigZagArray;
30    type Metadata = EmptyMetadata;
31}
32
33impl EncodingVTable for ZigZagEncoding {
34    fn id(&self) -> EncodingId {
35        EncodingId::new_ref("vortex.zigzag")
36    }
37}
38
39impl ZigZagArray {
40    pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
41        let encoded_dtype = encoded.dtype().clone();
42        if !encoded_dtype.is_unsigned_int() {
43            vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype);
44        }
45
46        let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed())
47            .with_nullability(encoded_dtype.nullability());
48
49        Ok(Self {
50            dtype,
51            encoded,
52            stats_set: Default::default(),
53        })
54    }
55
56    pub fn encode(array: &dyn Array) -> VortexResult<ZigZagArray> {
57        PrimitiveArray::try_from(array.to_array())
58            .map_err(|_| vortex_err!("ZigZag can only encoding primitive arrays"))
59            .and_then(zigzag_encode)
60    }
61
62    pub fn encoded(&self) -> &ArrayRef {
63        &self.encoded
64    }
65}
66
67impl ArrayImpl for ZigZagArray {
68    type Encoding = ZigZagEncoding;
69
70    fn _len(&self) -> usize {
71        self.encoded.len()
72    }
73
74    fn _dtype(&self) -> &DType {
75        &self.dtype
76    }
77
78    fn _vtable(&self) -> VTableRef {
79        VTableRef::new_ref(&ZigZagEncoding)
80    }
81}
82
83impl ArrayCanonicalImpl for ZigZagArray {
84    fn _to_canonical(&self) -> VortexResult<Canonical> {
85        zigzag_decode(self.encoded().to_primitive()?).map(Canonical::Primitive)
86    }
87}
88
89impl ArrayStatisticsImpl for ZigZagArray {
90    fn _stats_ref(&self) -> StatsSetRef<'_> {
91        self.stats_set.to_ref(self)
92    }
93}
94
95impl ArrayValidityImpl for ZigZagArray {
96    fn _is_valid(&self, index: usize) -> VortexResult<bool> {
97        self.encoded.is_valid(index)
98    }
99
100    fn _all_valid(&self) -> VortexResult<bool> {
101        self.encoded.all_valid()
102    }
103
104    fn _all_invalid(&self) -> VortexResult<bool> {
105        self.encoded.all_invalid()
106    }
107
108    fn _validity_mask(&self) -> VortexResult<Mask> {
109        self.encoded.validity_mask()
110    }
111}
112
113impl ArrayVariantsImpl for ZigZagArray {
114    fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
115        Some(self)
116    }
117}
118
119impl PrimitiveArrayTrait for ZigZagArray {}
120
121impl StatisticsVTable<&ZigZagArray> for ZigZagEncoding {
122    fn compute_statistics(&self, array: &ZigZagArray, stat: Stat) -> VortexResult<StatsSet> {
123        let mut stats = StatsSet::default();
124
125        // these stats are the same for array and array.encoded()
126        if matches!(stat, Stat::IsConstant | Stat::NullCount) {
127            if let Some(val) = array.encoded().statistics().compute_stat(stat)? {
128                stats.set(stat, Precision::exact(val));
129            }
130        } else if matches!(stat, Stat::Min | Stat::Max) {
131            let encoded_max = array.encoded().statistics().compute_as::<u64>(Stat::Max);
132            if let Some(val) = encoded_max {
133                // the max of the encoded array is the element with the highest absolute value (so either min if negative, or max if positive)
134                let decoded = <i64 as ExternalZigZag>::decode(val);
135                let decoded_stat = if decoded < 0 { Stat::Min } else { Stat::Max };
136                stats.set(decoded_stat, Precision::exact(decoded));
137            }
138        }
139
140        Ok(stats)
141    }
142}
143
144#[cfg(test)]
145mod test {
146    use vortex_array::IntoArray;
147    use vortex_array::compute::{scalar_at, slice};
148    use vortex_buffer::buffer;
149    use vortex_scalar::Scalar;
150
151    use super::*;
152
153    #[test]
154    fn test_compute_statistics() {
155        let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_array();
156        let zigzag = ZigZagArray::encode(&array).unwrap();
157
158        assert_eq!(
159            zigzag.statistics().compute_max::<i32>(),
160            array.statistics().compute_max::<i32>()
161        );
162        assert_eq!(
163            zigzag.statistics().compute_null_count(),
164            array.statistics().compute_null_count()
165        );
166        assert_eq!(
167            zigzag.statistics().compute_is_constant(),
168            array.statistics().compute_is_constant()
169        );
170
171        let sliced = ZigZagArray::try_from(slice(&zigzag, 0, 2).unwrap()).unwrap();
172        assert_eq!(
173            scalar_at(&sliced, sliced.len() - 1).unwrap(),
174            Scalar::from(-5i32)
175        );
176
177        assert_eq!(
178            sliced.statistics().compute_min::<i32>(),
179            array.statistics().compute_min::<i32>()
180        );
181        assert_eq!(
182            sliced.statistics().compute_null_count(),
183            array.statistics().compute_null_count()
184        );
185        assert_eq!(
186            sliced.statistics().compute_is_constant(),
187            array.statistics().compute_is_constant()
188        );
189    }
190}