1use vortex_array::arrays::PrimitiveArray;
2use vortex_array::stats::{ArrayStats, Precision, Stat, StatsSet, StatsSetRef};
3use vortex_array::variants::PrimitiveArrayTrait;
4use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
5use vortex_array::{
6 Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
7 ArrayVariantsImpl, Canonical, EmptyMetadata, Encoding, EncodingId, ToCanonical,
8 try_from_array_ref,
9};
10use vortex_dtype::{DType, PType};
11use vortex_error::{VortexResult, vortex_bail, vortex_err};
12use vortex_mask::Mask;
13use zigzag::ZigZag as ExternalZigZag;
14
15use crate::compress::zigzag_encode;
16use crate::zigzag_decode;
17
18#[derive(Clone, Debug)]
19pub struct ZigZagArray {
20 dtype: DType,
21 encoded: ArrayRef,
22 stats_set: ArrayStats,
23}
24
25try_from_array_ref!(ZigZagArray);
26
27pub struct ZigZagEncoding;
28impl Encoding for ZigZagEncoding {
29 type Array = ZigZagArray;
30 type Metadata = EmptyMetadata;
31}
32
33impl EncodingVTable for ZigZagEncoding {
34 fn id(&self) -> EncodingId {
35 EncodingId::new_ref("vortex.zigzag")
36 }
37}
38
39impl ZigZagArray {
40 pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
41 let encoded_dtype = encoded.dtype().clone();
42 if !encoded_dtype.is_unsigned_int() {
43 vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype);
44 }
45
46 let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed())
47 .with_nullability(encoded_dtype.nullability());
48
49 Ok(Self {
50 dtype,
51 encoded,
52 stats_set: Default::default(),
53 })
54 }
55
56 pub fn encode(array: &dyn Array) -> VortexResult<ZigZagArray> {
57 PrimitiveArray::try_from(array.to_array())
58 .map_err(|_| vortex_err!("ZigZag can only encoding primitive arrays"))
59 .and_then(zigzag_encode)
60 }
61
62 pub fn encoded(&self) -> &ArrayRef {
63 &self.encoded
64 }
65}
66
67impl ArrayImpl for ZigZagArray {
68 type Encoding = ZigZagEncoding;
69
70 fn _len(&self) -> usize {
71 self.encoded.len()
72 }
73
74 fn _dtype(&self) -> &DType {
75 &self.dtype
76 }
77
78 fn _vtable(&self) -> VTableRef {
79 VTableRef::new_ref(&ZigZagEncoding)
80 }
81}
82
83impl ArrayCanonicalImpl for ZigZagArray {
84 fn _to_canonical(&self) -> VortexResult<Canonical> {
85 zigzag_decode(self.encoded().to_primitive()?).map(Canonical::Primitive)
86 }
87}
88
89impl ArrayStatisticsImpl for ZigZagArray {
90 fn _stats_ref(&self) -> StatsSetRef<'_> {
91 self.stats_set.to_ref(self)
92 }
93}
94
95impl ArrayValidityImpl for ZigZagArray {
96 fn _is_valid(&self, index: usize) -> VortexResult<bool> {
97 self.encoded.is_valid(index)
98 }
99
100 fn _all_valid(&self) -> VortexResult<bool> {
101 self.encoded.all_valid()
102 }
103
104 fn _all_invalid(&self) -> VortexResult<bool> {
105 self.encoded.all_invalid()
106 }
107
108 fn _validity_mask(&self) -> VortexResult<Mask> {
109 self.encoded.validity_mask()
110 }
111}
112
113impl ArrayVariantsImpl for ZigZagArray {
114 fn _as_primitive_typed(&self) -> Option<&dyn PrimitiveArrayTrait> {
115 Some(self)
116 }
117}
118
119impl PrimitiveArrayTrait for ZigZagArray {}
120
121impl StatisticsVTable<&ZigZagArray> for ZigZagEncoding {
122 fn compute_statistics(&self, array: &ZigZagArray, stat: Stat) -> VortexResult<StatsSet> {
123 let mut stats = StatsSet::default();
124
125 if matches!(stat, Stat::IsConstant | Stat::NullCount) {
127 if let Some(val) = array.encoded().statistics().compute_stat(stat)? {
128 stats.set(stat, Precision::exact(val));
129 }
130 } else if matches!(stat, Stat::Min | Stat::Max) {
131 let encoded_max = array.encoded().statistics().compute_as::<u64>(Stat::Max);
132 if let Some(val) = encoded_max {
133 let decoded = <i64 as ExternalZigZag>::decode(val);
135 let decoded_stat = if decoded < 0 { Stat::Min } else { Stat::Max };
136 stats.set(decoded_stat, Precision::exact(decoded));
137 }
138 }
139
140 Ok(stats)
141 }
142}
143
144#[cfg(test)]
145mod test {
146 use vortex_array::IntoArray;
147 use vortex_array::compute::{scalar_at, slice};
148 use vortex_buffer::buffer;
149 use vortex_scalar::Scalar;
150
151 use super::*;
152
153 #[test]
154 fn test_compute_statistics() {
155 let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_array();
156 let zigzag = ZigZagArray::encode(&array).unwrap();
157
158 assert_eq!(
159 zigzag.statistics().compute_max::<i32>(),
160 array.statistics().compute_max::<i32>()
161 );
162 assert_eq!(
163 zigzag.statistics().compute_null_count(),
164 array.statistics().compute_null_count()
165 );
166 assert_eq!(
167 zigzag.statistics().compute_is_constant(),
168 array.statistics().compute_is_constant()
169 );
170
171 let sliced = ZigZagArray::try_from(slice(&zigzag, 0, 2).unwrap()).unwrap();
172 assert_eq!(
173 scalar_at(&sliced, sliced.len() - 1).unwrap(),
174 Scalar::from(-5i32)
175 );
176
177 assert_eq!(
178 sliced.statistics().compute_min::<i32>(),
179 array.statistics().compute_min::<i32>()
180 );
181 assert_eq!(
182 sliced.statistics().compute_null_count(),
183 array.statistics().compute_null_count()
184 );
185 assert_eq!(
186 sliced.statistics().compute_is_constant(),
187 array.statistics().compute_is_constant()
188 );
189 }
190}