vortex_zigzag/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5use std::ops::Range;
6
7use vortex_array::Array;
8use vortex_array::ArrayBufferVisitor;
9use vortex_array::ArrayChildVisitor;
10use vortex_array::ArrayEq;
11use vortex_array::ArrayHash;
12use vortex_array::ArrayRef;
13use vortex_array::Canonical;
14use vortex_array::EmptyMetadata;
15use vortex_array::IntoArray;
16use vortex_array::Precision;
17use vortex_array::ToCanonical;
18use vortex_array::buffer::BufferHandle;
19use vortex_array::serde::ArrayChildren;
20use vortex_array::stats::ArrayStats;
21use vortex_array::stats::StatsSetRef;
22use vortex_array::vtable;
23use vortex_array::vtable::ArrayId;
24use vortex_array::vtable::ArrayVTable;
25use vortex_array::vtable::ArrayVTableExt;
26use vortex_array::vtable::BaseArrayVTable;
27use vortex_array::vtable::CanonicalVTable;
28use vortex_array::vtable::EncodeVTable;
29use vortex_array::vtable::NotSupported;
30use vortex_array::vtable::OperationsVTable;
31use vortex_array::vtable::VTable;
32use vortex_array::vtable::ValidityChild;
33use vortex_array::vtable::ValidityVTableFromChild;
34use vortex_array::vtable::VisitorVTable;
35use vortex_dtype::DType;
36use vortex_dtype::PType;
37use vortex_dtype::match_each_unsigned_integer_ptype;
38use vortex_error::VortexExpect;
39use vortex_error::VortexResult;
40use vortex_error::vortex_bail;
41use vortex_error::vortex_ensure;
42use vortex_scalar::Scalar;
43use zigzag::ZigZag as ExternalZigZag;
44
45use crate::compute::ZigZagEncoded;
46use crate::zigzag_decode;
47use crate::zigzag_encode;
48
49vtable!(ZigZag);
50
51impl VTable for ZigZagVTable {
52    type Array = ZigZagArray;
53
54    type Metadata = EmptyMetadata;
55
56    type ArrayVTable = Self;
57    type CanonicalVTable = Self;
58    type OperationsVTable = Self;
59    type ValidityVTable = ValidityVTableFromChild;
60    type VisitorVTable = Self;
61    type ComputeVTable = NotSupported;
62    type EncodeVTable = Self;
63
64    fn id(&self) -> ArrayId {
65        ArrayId::new_ref("vortex.zigzag")
66    }
67
68    fn encoding(_array: &Self::Array) -> ArrayVTable {
69        ZigZagVTable.as_vtable()
70    }
71
72    fn metadata(_array: &ZigZagArray) -> VortexResult<Self::Metadata> {
73        Ok(EmptyMetadata)
74    }
75
76    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
77        Ok(Some(vec![]))
78    }
79
80    fn deserialize(_buffer: &[u8]) -> VortexResult<Self::Metadata> {
81        Ok(EmptyMetadata)
82    }
83
84    fn build(
85        &self,
86        dtype: &DType,
87        len: usize,
88        _metadata: &Self::Metadata,
89        _buffers: &[BufferHandle],
90        children: &dyn ArrayChildren,
91    ) -> VortexResult<ZigZagArray> {
92        if children.len() != 1 {
93            vortex_bail!("Expected 1 child, got {}", children.len());
94        }
95
96        let ptype = PType::try_from(dtype)?;
97        let encoded_type = DType::Primitive(ptype.to_unsigned(), dtype.nullability());
98
99        let encoded = children.get(0, &encoded_type, len)?;
100        ZigZagArray::try_new(encoded)
101    }
102
103    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
104        vortex_ensure!(
105            children.len() == 1,
106            "ZigZagArray expects exactly 1 child (encoded), got {}",
107            children.len()
108        );
109        array.encoded = children.into_iter().next().vortex_expect("checked");
110        Ok(())
111    }
112}
113
114#[derive(Clone, Debug)]
115pub struct ZigZagArray {
116    dtype: DType,
117    encoded: ArrayRef,
118    stats_set: ArrayStats,
119}
120
121#[derive(Debug)]
122pub struct ZigZagVTable;
123
124impl ZigZagArray {
125    pub fn new(encoded: ArrayRef) -> Self {
126        Self::try_new(encoded).vortex_expect("ZigZigArray new")
127    }
128
129    pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
130        let encoded_dtype = encoded.dtype().clone();
131        if !encoded_dtype.is_unsigned_int() {
132            vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype);
133        }
134
135        let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed())
136            .with_nullability(encoded_dtype.nullability());
137
138        Ok(Self {
139            dtype,
140            encoded,
141            stats_set: Default::default(),
142        })
143    }
144
145    pub fn ptype(&self) -> PType {
146        self.dtype().as_ptype()
147    }
148
149    pub fn encoded(&self) -> &ArrayRef {
150        &self.encoded
151    }
152}
153
154impl BaseArrayVTable<ZigZagVTable> for ZigZagVTable {
155    fn len(array: &ZigZagArray) -> usize {
156        array.encoded.len()
157    }
158
159    fn dtype(array: &ZigZagArray) -> &DType {
160        &array.dtype
161    }
162
163    fn stats(array: &ZigZagArray) -> StatsSetRef<'_> {
164        array.stats_set.to_ref(array.as_ref())
165    }
166
167    fn array_hash<H: std::hash::Hasher>(array: &ZigZagArray, state: &mut H, precision: Precision) {
168        array.dtype.hash(state);
169        array.encoded.array_hash(state, precision);
170    }
171
172    fn array_eq(array: &ZigZagArray, other: &ZigZagArray, precision: Precision) -> bool {
173        array.dtype == other.dtype && array.encoded.array_eq(&other.encoded, precision)
174    }
175}
176
177impl CanonicalVTable<ZigZagVTable> for ZigZagVTable {
178    fn canonicalize(array: &ZigZagArray) -> Canonical {
179        Canonical::Primitive(zigzag_decode(array.encoded().to_primitive()))
180    }
181}
182
183impl OperationsVTable<ZigZagVTable> for ZigZagVTable {
184    fn slice(array: &ZigZagArray, range: Range<usize>) -> ArrayRef {
185        ZigZagArray::new(array.encoded().slice(range)).into_array()
186    }
187
188    fn scalar_at(array: &ZigZagArray, index: usize) -> Scalar {
189        let scalar = array.encoded().scalar_at(index);
190        if scalar.is_null() {
191            return scalar.reinterpret_cast(array.ptype());
192        }
193
194        let pscalar = scalar.as_primitive();
195        match_each_unsigned_integer_ptype!(pscalar.ptype(), |P| {
196            Scalar::primitive(
197                <<P as ZigZagEncoded>::Int>::decode(
198                    pscalar
199                        .typed_value::<P>()
200                        .vortex_expect("zigzag corruption"),
201                ),
202                array.dtype().nullability(),
203            )
204        })
205    }
206}
207
208impl ValidityChild<ZigZagVTable> for ZigZagVTable {
209    fn validity_child(array: &ZigZagArray) -> &ArrayRef {
210        array.encoded()
211    }
212}
213
214impl EncodeVTable<ZigZagVTable> for ZigZagVTable {
215    fn encode(
216        encoding: &ZigZagVTable,
217        canonical: &Canonical,
218        _like: Option<&ZigZagArray>,
219    ) -> VortexResult<Option<ZigZagArray>> {
220        let parray = canonical.clone().into_primitive();
221
222        if !parray.ptype().is_signed_int() {
223            vortex_bail!(
224                "only signed integers can be encoded into {}, got {}",
225                encoding.id(),
226                parray.ptype()
227            )
228        }
229
230        Ok(Some(zigzag_encode(parray)?))
231    }
232}
233
234impl VisitorVTable<ZigZagVTable> for ZigZagVTable {
235    fn visit_buffers(_array: &ZigZagArray, _visitor: &mut dyn ArrayBufferVisitor) {}
236
237    fn visit_children(array: &ZigZagArray, visitor: &mut dyn ArrayChildVisitor) {
238        visitor.visit_child("encoded", array.encoded())
239    }
240}
241
242#[cfg(test)]
243mod test {
244    use vortex_array::IntoArray;
245    use vortex_buffer::buffer;
246    use vortex_scalar::Scalar;
247
248    use super::*;
249
250    #[test]
251    fn test_compute_statistics() {
252        let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_array();
253        let canonical = array.to_canonical();
254        let zigzag = ZigZagVTable
255            .as_vtable()
256            .encode(&canonical, None)
257            .unwrap()
258            .unwrap();
259
260        assert_eq!(
261            zigzag.statistics().compute_max::<i32>(),
262            array.statistics().compute_max::<i32>()
263        );
264        assert_eq!(
265            zigzag.statistics().compute_null_count(),
266            array.statistics().compute_null_count()
267        );
268        assert_eq!(
269            zigzag.statistics().compute_is_constant(),
270            array.statistics().compute_is_constant()
271        );
272
273        let sliced = zigzag.slice(0..2);
274        let sliced = sliced.as_::<ZigZagVTable>();
275        assert_eq!(sliced.scalar_at(sliced.len() - 1), Scalar::from(-5i32));
276
277        assert_eq!(
278            sliced.statistics().compute_min::<i32>(),
279            array.statistics().compute_min::<i32>()
280        );
281        assert_eq!(
282            sliced.statistics().compute_null_count(),
283            array.statistics().compute_null_count()
284        );
285        assert_eq!(
286            sliced.statistics().compute_is_constant(),
287            array.statistics().compute_is_constant()
288        );
289    }
290}