1use std::hash::Hash;
5use std::ops::Range;
6
7use vortex_array::Array;
8use vortex_array::ArrayBufferVisitor;
9use vortex_array::ArrayChildVisitor;
10use vortex_array::ArrayEq;
11use vortex_array::ArrayHash;
12use vortex_array::ArrayRef;
13use vortex_array::Canonical;
14use vortex_array::EmptyMetadata;
15use vortex_array::IntoArray;
16use vortex_array::Precision;
17use vortex_array::ToCanonical;
18use vortex_array::buffer::BufferHandle;
19use vortex_array::serde::ArrayChildren;
20use vortex_array::stats::ArrayStats;
21use vortex_array::stats::StatsSetRef;
22use vortex_array::vtable;
23use vortex_array::vtable::ArrayId;
24use vortex_array::vtable::ArrayVTable;
25use vortex_array::vtable::ArrayVTableExt;
26use vortex_array::vtable::BaseArrayVTable;
27use vortex_array::vtable::CanonicalVTable;
28use vortex_array::vtable::EncodeVTable;
29use vortex_array::vtable::NotSupported;
30use vortex_array::vtable::OperationsVTable;
31use vortex_array::vtable::VTable;
32use vortex_array::vtable::ValidityChild;
33use vortex_array::vtable::ValidityVTableFromChild;
34use vortex_array::vtable::VisitorVTable;
35use vortex_dtype::DType;
36use vortex_dtype::PType;
37use vortex_dtype::match_each_unsigned_integer_ptype;
38use vortex_error::VortexExpect;
39use vortex_error::VortexResult;
40use vortex_error::vortex_bail;
41use vortex_error::vortex_ensure;
42use vortex_scalar::Scalar;
43use zigzag::ZigZag as ExternalZigZag;
44
45use crate::compute::ZigZagEncoded;
46use crate::zigzag_decode;
47use crate::zigzag_encode;
48
49vtable!(ZigZag);
50
51impl VTable for ZigZagVTable {
52 type Array = ZigZagArray;
53
54 type Metadata = EmptyMetadata;
55
56 type ArrayVTable = Self;
57 type CanonicalVTable = Self;
58 type OperationsVTable = Self;
59 type ValidityVTable = ValidityVTableFromChild;
60 type VisitorVTable = Self;
61 type ComputeVTable = NotSupported;
62 type EncodeVTable = Self;
63
64 fn id(&self) -> ArrayId {
65 ArrayId::new_ref("vortex.zigzag")
66 }
67
68 fn encoding(_array: &Self::Array) -> ArrayVTable {
69 ZigZagVTable.as_vtable()
70 }
71
72 fn metadata(_array: &ZigZagArray) -> VortexResult<Self::Metadata> {
73 Ok(EmptyMetadata)
74 }
75
76 fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
77 Ok(Some(vec![]))
78 }
79
80 fn deserialize(_buffer: &[u8]) -> VortexResult<Self::Metadata> {
81 Ok(EmptyMetadata)
82 }
83
84 fn build(
85 &self,
86 dtype: &DType,
87 len: usize,
88 _metadata: &Self::Metadata,
89 _buffers: &[BufferHandle],
90 children: &dyn ArrayChildren,
91 ) -> VortexResult<ZigZagArray> {
92 if children.len() != 1 {
93 vortex_bail!("Expected 1 child, got {}", children.len());
94 }
95
96 let ptype = PType::try_from(dtype)?;
97 let encoded_type = DType::Primitive(ptype.to_unsigned(), dtype.nullability());
98
99 let encoded = children.get(0, &encoded_type, len)?;
100 ZigZagArray::try_new(encoded)
101 }
102
103 fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
104 vortex_ensure!(
105 children.len() == 1,
106 "ZigZagArray expects exactly 1 child (encoded), got {}",
107 children.len()
108 );
109 array.encoded = children.into_iter().next().vortex_expect("checked");
110 Ok(())
111 }
112}
113
114#[derive(Clone, Debug)]
115pub struct ZigZagArray {
116 dtype: DType,
117 encoded: ArrayRef,
118 stats_set: ArrayStats,
119}
120
121#[derive(Debug)]
122pub struct ZigZagVTable;
123
124impl ZigZagArray {
125 pub fn new(encoded: ArrayRef) -> Self {
126 Self::try_new(encoded).vortex_expect("ZigZigArray new")
127 }
128
129 pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
130 let encoded_dtype = encoded.dtype().clone();
131 if !encoded_dtype.is_unsigned_int() {
132 vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype);
133 }
134
135 let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed())
136 .with_nullability(encoded_dtype.nullability());
137
138 Ok(Self {
139 dtype,
140 encoded,
141 stats_set: Default::default(),
142 })
143 }
144
145 pub fn ptype(&self) -> PType {
146 self.dtype().as_ptype()
147 }
148
149 pub fn encoded(&self) -> &ArrayRef {
150 &self.encoded
151 }
152}
153
154impl BaseArrayVTable<ZigZagVTable> for ZigZagVTable {
155 fn len(array: &ZigZagArray) -> usize {
156 array.encoded.len()
157 }
158
159 fn dtype(array: &ZigZagArray) -> &DType {
160 &array.dtype
161 }
162
163 fn stats(array: &ZigZagArray) -> StatsSetRef<'_> {
164 array.stats_set.to_ref(array.as_ref())
165 }
166
167 fn array_hash<H: std::hash::Hasher>(array: &ZigZagArray, state: &mut H, precision: Precision) {
168 array.dtype.hash(state);
169 array.encoded.array_hash(state, precision);
170 }
171
172 fn array_eq(array: &ZigZagArray, other: &ZigZagArray, precision: Precision) -> bool {
173 array.dtype == other.dtype && array.encoded.array_eq(&other.encoded, precision)
174 }
175}
176
177impl CanonicalVTable<ZigZagVTable> for ZigZagVTable {
178 fn canonicalize(array: &ZigZagArray) -> Canonical {
179 Canonical::Primitive(zigzag_decode(array.encoded().to_primitive()))
180 }
181}
182
183impl OperationsVTable<ZigZagVTable> for ZigZagVTable {
184 fn slice(array: &ZigZagArray, range: Range<usize>) -> ArrayRef {
185 ZigZagArray::new(array.encoded().slice(range)).into_array()
186 }
187
188 fn scalar_at(array: &ZigZagArray, index: usize) -> Scalar {
189 let scalar = array.encoded().scalar_at(index);
190 if scalar.is_null() {
191 return scalar.reinterpret_cast(array.ptype());
192 }
193
194 let pscalar = scalar.as_primitive();
195 match_each_unsigned_integer_ptype!(pscalar.ptype(), |P| {
196 Scalar::primitive(
197 <<P as ZigZagEncoded>::Int>::decode(
198 pscalar
199 .typed_value::<P>()
200 .vortex_expect("zigzag corruption"),
201 ),
202 array.dtype().nullability(),
203 )
204 })
205 }
206}
207
208impl ValidityChild<ZigZagVTable> for ZigZagVTable {
209 fn validity_child(array: &ZigZagArray) -> &ArrayRef {
210 array.encoded()
211 }
212}
213
214impl EncodeVTable<ZigZagVTable> for ZigZagVTable {
215 fn encode(
216 encoding: &ZigZagVTable,
217 canonical: &Canonical,
218 _like: Option<&ZigZagArray>,
219 ) -> VortexResult<Option<ZigZagArray>> {
220 let parray = canonical.clone().into_primitive();
221
222 if !parray.ptype().is_signed_int() {
223 vortex_bail!(
224 "only signed integers can be encoded into {}, got {}",
225 encoding.id(),
226 parray.ptype()
227 )
228 }
229
230 Ok(Some(zigzag_encode(parray)?))
231 }
232}
233
234impl VisitorVTable<ZigZagVTable> for ZigZagVTable {
235 fn visit_buffers(_array: &ZigZagArray, _visitor: &mut dyn ArrayBufferVisitor) {}
236
237 fn visit_children(array: &ZigZagArray, visitor: &mut dyn ArrayChildVisitor) {
238 visitor.visit_child("encoded", array.encoded())
239 }
240}
241
242#[cfg(test)]
243mod test {
244 use vortex_array::IntoArray;
245 use vortex_buffer::buffer;
246 use vortex_scalar::Scalar;
247
248 use super::*;
249
250 #[test]
251 fn test_compute_statistics() {
252 let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_array();
253 let canonical = array.to_canonical();
254 let zigzag = ZigZagVTable
255 .as_vtable()
256 .encode(&canonical, None)
257 .unwrap()
258 .unwrap();
259
260 assert_eq!(
261 zigzag.statistics().compute_max::<i32>(),
262 array.statistics().compute_max::<i32>()
263 );
264 assert_eq!(
265 zigzag.statistics().compute_null_count(),
266 array.statistics().compute_null_count()
267 );
268 assert_eq!(
269 zigzag.statistics().compute_is_constant(),
270 array.statistics().compute_is_constant()
271 );
272
273 let sliced = zigzag.slice(0..2);
274 let sliced = sliced.as_::<ZigZagVTable>();
275 assert_eq!(sliced.scalar_at(sliced.len() - 1), Scalar::from(-5i32));
276
277 assert_eq!(
278 sliced.statistics().compute_min::<i32>(),
279 array.statistics().compute_min::<i32>()
280 );
281 assert_eq!(
282 sliced.statistics().compute_null_count(),
283 array.statistics().compute_null_count()
284 );
285 assert_eq!(
286 sliced.statistics().compute_is_constant(),
287 array.statistics().compute_is_constant()
288 );
289 }
290}