Skip to main content

vortex_zigzag/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5use std::sync::Arc;
6
7use vortex_array::ArrayEq;
8use vortex_array::ArrayHash;
9use vortex_array::ArrayRef;
10use vortex_array::DynArray;
11use vortex_array::EmptyMetadata;
12use vortex_array::ExecutionCtx;
13use vortex_array::ExecutionResult;
14use vortex_array::IntoArray;
15use vortex_array::Precision;
16use vortex_array::buffer::BufferHandle;
17use vortex_array::dtype::DType;
18use vortex_array::dtype::PType;
19use vortex_array::match_each_unsigned_integer_ptype;
20use vortex_array::scalar::Scalar;
21use vortex_array::serde::ArrayChildren;
22use vortex_array::stats::ArrayStats;
23use vortex_array::stats::StatsSetRef;
24use vortex_array::vtable;
25use vortex_array::vtable::Array;
26use vortex_array::vtable::ArrayId;
27use vortex_array::vtable::OperationsVTable;
28use vortex_array::vtable::VTable;
29use vortex_array::vtable::ValidityChild;
30use vortex_array::vtable::ValidityVTableFromChild;
31use vortex_error::VortexExpect;
32use vortex_error::VortexResult;
33use vortex_error::vortex_bail;
34use vortex_error::vortex_ensure;
35use vortex_error::vortex_panic;
36use vortex_session::VortexSession;
37use zigzag::ZigZag as ExternalZigZag;
38
39use crate::compute::ZigZagEncoded;
40use crate::kernel::PARENT_KERNELS;
41use crate::rules::RULES;
42use crate::zigzag_decode;
43
44vtable!(ZigZag);
45
46impl VTable for ZigZag {
47    type Array = ZigZagArray;
48
49    type Metadata = EmptyMetadata;
50    type OperationsVTable = Self;
51    type ValidityVTable = ValidityVTableFromChild;
52
53    fn vtable(_array: &Self::Array) -> &Self {
54        &ZigZag
55    }
56
57    fn id(&self) -> ArrayId {
58        Self::ID
59    }
60
61    fn len(array: &ZigZagArray) -> usize {
62        array.encoded.len()
63    }
64
65    fn dtype(array: &ZigZagArray) -> &DType {
66        &array.dtype
67    }
68
69    fn stats(array: &ZigZagArray) -> StatsSetRef<'_> {
70        array.stats_set.to_ref(array.as_ref())
71    }
72
73    fn array_hash<H: std::hash::Hasher>(array: &ZigZagArray, state: &mut H, precision: Precision) {
74        array.dtype.hash(state);
75        array.encoded.array_hash(state, precision);
76    }
77
78    fn array_eq(array: &ZigZagArray, other: &ZigZagArray, precision: Precision) -> bool {
79        array.dtype == other.dtype && array.encoded.array_eq(&other.encoded, precision)
80    }
81
82    fn nbuffers(_array: &ZigZagArray) -> usize {
83        0
84    }
85
86    fn buffer(_array: &ZigZagArray, idx: usize) -> BufferHandle {
87        vortex_panic!("ZigZagArray buffer index {idx} out of bounds")
88    }
89
90    fn buffer_name(_array: &ZigZagArray, idx: usize) -> Option<String> {
91        vortex_panic!("ZigZagArray buffer_name index {idx} out of bounds")
92    }
93
94    fn nchildren(_array: &ZigZagArray) -> usize {
95        1
96    }
97
98    fn child(array: &ZigZagArray, idx: usize) -> ArrayRef {
99        match idx {
100            0 => array.encoded().clone(),
101            _ => vortex_panic!("ZigZagArray child index {idx} out of bounds"),
102        }
103    }
104
105    fn child_name(_array: &ZigZagArray, idx: usize) -> String {
106        match idx {
107            0 => "encoded".to_string(),
108            _ => vortex_panic!("ZigZagArray child_name index {idx} out of bounds"),
109        }
110    }
111
112    fn metadata(_array: &ZigZagArray) -> VortexResult<Self::Metadata> {
113        Ok(EmptyMetadata)
114    }
115
116    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
117        Ok(Some(vec![]))
118    }
119
120    fn deserialize(
121        _bytes: &[u8],
122        _dtype: &DType,
123        _len: usize,
124        _buffers: &[BufferHandle],
125        _session: &VortexSession,
126    ) -> VortexResult<Self::Metadata> {
127        Ok(EmptyMetadata)
128    }
129
130    fn build(
131        dtype: &DType,
132        len: usize,
133        _metadata: &Self::Metadata,
134        _buffers: &[BufferHandle],
135        children: &dyn ArrayChildren,
136    ) -> VortexResult<ZigZagArray> {
137        if children.len() != 1 {
138            vortex_bail!("Expected 1 child, got {}", children.len());
139        }
140
141        let ptype = PType::try_from(dtype)?;
142        let encoded_type = DType::Primitive(ptype.to_unsigned(), dtype.nullability());
143
144        let encoded = children.get(0, &encoded_type, len)?;
145        ZigZagArray::try_new(encoded)
146    }
147
148    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
149        vortex_ensure!(
150            children.len() == 1,
151            "ZigZagArray expects exactly 1 child (encoded), got {}",
152            children.len()
153        );
154        array.encoded = children.into_iter().next().vortex_expect("checked");
155        Ok(())
156    }
157
158    fn execute(array: Arc<Array<Self>>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
159        Ok(ExecutionResult::done(
160            zigzag_decode(array.encoded().clone().execute(ctx)?).into_array(),
161        ))
162    }
163
164    fn reduce_parent(
165        array: &Array<Self>,
166        parent: &ArrayRef,
167        child_idx: usize,
168    ) -> VortexResult<Option<ArrayRef>> {
169        RULES.evaluate(array, parent, child_idx)
170    }
171
172    fn execute_parent(
173        array: &Array<Self>,
174        parent: &ArrayRef,
175        child_idx: usize,
176        ctx: &mut ExecutionCtx,
177    ) -> VortexResult<Option<ArrayRef>> {
178        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
179    }
180}
181
182#[derive(Clone, Debug)]
183pub struct ZigZagArray {
184    dtype: DType,
185    encoded: ArrayRef,
186    stats_set: ArrayStats,
187}
188
189#[derive(Clone, Debug)]
190pub struct ZigZag;
191
192impl ZigZag {
193    pub const ID: ArrayId = ArrayId::new_ref("vortex.zigzag");
194}
195
196impl ZigZagArray {
197    pub fn new(encoded: ArrayRef) -> Self {
198        Self::try_new(encoded).vortex_expect("ZigZagArray new")
199    }
200
201    pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
202        let encoded_dtype = encoded.dtype().clone();
203        if !encoded_dtype.is_unsigned_int() {
204            vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype);
205        }
206
207        let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed())
208            .with_nullability(encoded_dtype.nullability());
209
210        Ok(Self {
211            dtype,
212            encoded,
213            stats_set: Default::default(),
214        })
215    }
216
217    pub fn ptype(&self) -> PType {
218        self.dtype().as_ptype()
219    }
220
221    pub fn encoded(&self) -> &ArrayRef {
222        &self.encoded
223    }
224}
225
226impl OperationsVTable<ZigZag> for ZigZag {
227    fn scalar_at(
228        array: &ZigZagArray,
229        index: usize,
230        _ctx: &mut ExecutionCtx,
231    ) -> VortexResult<Scalar> {
232        let scalar = array.encoded().scalar_at(index)?;
233        if scalar.is_null() {
234            return scalar.primitive_reinterpret_cast(array.ptype());
235        }
236
237        let pscalar = scalar.as_primitive();
238        Ok(match_each_unsigned_integer_ptype!(pscalar.ptype(), |P| {
239            Scalar::primitive(
240                <<P as ZigZagEncoded>::Int>::decode(
241                    pscalar
242                        .typed_value::<P>()
243                        .vortex_expect("zigzag corruption"),
244                ),
245                array.dtype().nullability(),
246            )
247        }))
248    }
249}
250
251impl ValidityChild<ZigZag> for ZigZag {
252    fn validity_child(array: &ZigZagArray) -> &ArrayRef {
253        array.encoded()
254    }
255}
256
257#[cfg(test)]
258mod test {
259    use vortex_array::IntoArray;
260    use vortex_array::ToCanonical;
261    use vortex_array::scalar::Scalar;
262    use vortex_buffer::buffer;
263
264    use super::*;
265    use crate::zigzag_encode;
266
267    #[test]
268    fn test_compute_statistics() -> VortexResult<()> {
269        let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10]
270            .into_array()
271            .to_primitive();
272        let zigzag = zigzag_encode(array.clone())?;
273
274        assert_eq!(
275            zigzag.statistics().compute_max::<i32>(),
276            array.statistics().compute_max::<i32>()
277        );
278        assert_eq!(
279            zigzag.statistics().compute_null_count(),
280            array.statistics().compute_null_count()
281        );
282        assert_eq!(
283            zigzag.statistics().compute_is_constant(),
284            array.statistics().compute_is_constant()
285        );
286
287        let sliced = zigzag.slice(0..2).unwrap();
288        let sliced = sliced.as_::<ZigZag>();
289        assert_eq!(
290            sliced.scalar_at(sliced.len() - 1).unwrap(),
291            Scalar::from(-5i32)
292        );
293
294        assert_eq!(
295            sliced.statistics().compute_min::<i32>(),
296            array.statistics().compute_min::<i32>()
297        );
298        assert_eq!(
299            sliced.statistics().compute_null_count(),
300            array.statistics().compute_null_count()
301        );
302        assert_eq!(
303            sliced.statistics().compute_is_constant(),
304            array.statistics().compute_is_constant()
305        );
306        Ok(())
307    }
308}