Skip to main content

vortex_zigzag/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5
6use vortex_array::ArrayEq;
7use vortex_array::ArrayHash;
8use vortex_array::ArrayRef;
9use vortex_array::DynArray;
10use vortex_array::EmptyMetadata;
11use vortex_array::ExecutionCtx;
12use vortex_array::ExecutionStep;
13use vortex_array::IntoArray;
14use vortex_array::Precision;
15use vortex_array::buffer::BufferHandle;
16use vortex_array::dtype::DType;
17use vortex_array::dtype::PType;
18use vortex_array::match_each_unsigned_integer_ptype;
19use vortex_array::scalar::Scalar;
20use vortex_array::serde::ArrayChildren;
21use vortex_array::stats::ArrayStats;
22use vortex_array::stats::StatsSetRef;
23use vortex_array::vtable;
24use vortex_array::vtable::ArrayId;
25use vortex_array::vtable::OperationsVTable;
26use vortex_array::vtable::VTable;
27use vortex_array::vtable::ValidityChild;
28use vortex_array::vtable::ValidityVTableFromChild;
29use vortex_error::VortexExpect;
30use vortex_error::VortexResult;
31use vortex_error::vortex_bail;
32use vortex_error::vortex_ensure;
33use vortex_error::vortex_panic;
34use vortex_session::VortexSession;
35use zigzag::ZigZag as ExternalZigZag;
36
37use crate::compute::ZigZagEncoded;
38use crate::kernel::PARENT_KERNELS;
39use crate::rules::RULES;
40use crate::zigzag_decode;
41
42vtable!(ZigZag);
43
44impl VTable for ZigZagVTable {
45    type Array = ZigZagArray;
46
47    type Metadata = EmptyMetadata;
48    type OperationsVTable = Self;
49    type ValidityVTable = ValidityVTableFromChild;
50
51    fn id(_array: &Self::Array) -> ArrayId {
52        Self::ID
53    }
54
55    fn len(array: &ZigZagArray) -> usize {
56        array.encoded.len()
57    }
58
59    fn dtype(array: &ZigZagArray) -> &DType {
60        &array.dtype
61    }
62
63    fn stats(array: &ZigZagArray) -> StatsSetRef<'_> {
64        array.stats_set.to_ref(array.as_ref())
65    }
66
67    fn array_hash<H: std::hash::Hasher>(array: &ZigZagArray, state: &mut H, precision: Precision) {
68        array.dtype.hash(state);
69        array.encoded.array_hash(state, precision);
70    }
71
72    fn array_eq(array: &ZigZagArray, other: &ZigZagArray, precision: Precision) -> bool {
73        array.dtype == other.dtype && array.encoded.array_eq(&other.encoded, precision)
74    }
75
76    fn nbuffers(_array: &ZigZagArray) -> usize {
77        0
78    }
79
80    fn buffer(_array: &ZigZagArray, idx: usize) -> BufferHandle {
81        vortex_panic!("ZigZagArray buffer index {idx} out of bounds")
82    }
83
84    fn buffer_name(_array: &ZigZagArray, idx: usize) -> Option<String> {
85        vortex_panic!("ZigZagArray buffer_name index {idx} out of bounds")
86    }
87
88    fn nchildren(_array: &ZigZagArray) -> usize {
89        1
90    }
91
92    fn child(array: &ZigZagArray, idx: usize) -> ArrayRef {
93        match idx {
94            0 => array.encoded().clone(),
95            _ => vortex_panic!("ZigZagArray child index {idx} out of bounds"),
96        }
97    }
98
99    fn child_name(_array: &ZigZagArray, idx: usize) -> String {
100        match idx {
101            0 => "encoded".to_string(),
102            _ => vortex_panic!("ZigZagArray child_name index {idx} out of bounds"),
103        }
104    }
105
106    fn metadata(_array: &ZigZagArray) -> VortexResult<Self::Metadata> {
107        Ok(EmptyMetadata)
108    }
109
110    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
111        Ok(Some(vec![]))
112    }
113
114    fn deserialize(
115        _bytes: &[u8],
116        _dtype: &DType,
117        _len: usize,
118        _buffers: &[BufferHandle],
119        _session: &VortexSession,
120    ) -> VortexResult<Self::Metadata> {
121        Ok(EmptyMetadata)
122    }
123
124    fn build(
125        dtype: &DType,
126        len: usize,
127        _metadata: &Self::Metadata,
128        _buffers: &[BufferHandle],
129        children: &dyn ArrayChildren,
130    ) -> VortexResult<ZigZagArray> {
131        if children.len() != 1 {
132            vortex_bail!("Expected 1 child, got {}", children.len());
133        }
134
135        let ptype = PType::try_from(dtype)?;
136        let encoded_type = DType::Primitive(ptype.to_unsigned(), dtype.nullability());
137
138        let encoded = children.get(0, &encoded_type, len)?;
139        ZigZagArray::try_new(encoded)
140    }
141
142    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
143        vortex_ensure!(
144            children.len() == 1,
145            "ZigZagArray expects exactly 1 child (encoded), got {}",
146            children.len()
147        );
148        array.encoded = children.into_iter().next().vortex_expect("checked");
149        Ok(())
150    }
151
152    fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionStep> {
153        Ok(ExecutionStep::Done(
154            zigzag_decode(array.encoded().clone().execute(ctx)?).into_array(),
155        ))
156    }
157
158    fn reduce_parent(
159        array: &Self::Array,
160        parent: &ArrayRef,
161        child_idx: usize,
162    ) -> VortexResult<Option<ArrayRef>> {
163        RULES.evaluate(array, parent, child_idx)
164    }
165
166    fn execute_parent(
167        array: &Self::Array,
168        parent: &ArrayRef,
169        child_idx: usize,
170        ctx: &mut ExecutionCtx,
171    ) -> VortexResult<Option<ArrayRef>> {
172        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
173    }
174}
175
176#[derive(Clone, Debug)]
177pub struct ZigZagArray {
178    dtype: DType,
179    encoded: ArrayRef,
180    stats_set: ArrayStats,
181}
182
183#[derive(Debug)]
184pub struct ZigZagVTable;
185
186impl ZigZagVTable {
187    pub const ID: ArrayId = ArrayId::new_ref("vortex.zigzag");
188}
189
190impl ZigZagArray {
191    pub fn new(encoded: ArrayRef) -> Self {
192        Self::try_new(encoded).vortex_expect("ZigZagArray new")
193    }
194
195    pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
196        let encoded_dtype = encoded.dtype().clone();
197        if !encoded_dtype.is_unsigned_int() {
198            vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype);
199        }
200
201        let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed())
202            .with_nullability(encoded_dtype.nullability());
203
204        Ok(Self {
205            dtype,
206            encoded,
207            stats_set: Default::default(),
208        })
209    }
210
211    pub fn ptype(&self) -> PType {
212        self.dtype().as_ptype()
213    }
214
215    pub fn encoded(&self) -> &ArrayRef {
216        &self.encoded
217    }
218}
219
220impl OperationsVTable<ZigZagVTable> for ZigZagVTable {
221    fn scalar_at(array: &ZigZagArray, index: usize) -> VortexResult<Scalar> {
222        let scalar = array.encoded().scalar_at(index)?;
223        if scalar.is_null() {
224            return scalar.primitive_reinterpret_cast(array.ptype());
225        }
226
227        let pscalar = scalar.as_primitive();
228        Ok(match_each_unsigned_integer_ptype!(pscalar.ptype(), |P| {
229            Scalar::primitive(
230                <<P as ZigZagEncoded>::Int>::decode(
231                    pscalar
232                        .typed_value::<P>()
233                        .vortex_expect("zigzag corruption"),
234                ),
235                array.dtype().nullability(),
236            )
237        }))
238    }
239}
240
241impl ValidityChild<ZigZagVTable> for ZigZagVTable {
242    fn validity_child(array: &ZigZagArray) -> &ArrayRef {
243        array.encoded()
244    }
245}
246
247#[cfg(test)]
248mod test {
249    use vortex_array::IntoArray;
250    use vortex_array::ToCanonical;
251    use vortex_array::scalar::Scalar;
252    use vortex_buffer::buffer;
253
254    use super::*;
255    use crate::zigzag_encode;
256
257    #[test]
258    fn test_compute_statistics() -> VortexResult<()> {
259        let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10]
260            .into_array()
261            .to_primitive();
262        let zigzag = zigzag_encode(array.clone())?;
263
264        assert_eq!(
265            zigzag.statistics().compute_max::<i32>(),
266            array.statistics().compute_max::<i32>()
267        );
268        assert_eq!(
269            zigzag.statistics().compute_null_count(),
270            array.statistics().compute_null_count()
271        );
272        assert_eq!(
273            zigzag.statistics().compute_is_constant(),
274            array.statistics().compute_is_constant()
275        );
276
277        let sliced = zigzag.slice(0..2).unwrap();
278        let sliced = sliced.as_::<ZigZagVTable>();
279        assert_eq!(
280            sliced.scalar_at(sliced.len() - 1).unwrap(),
281            Scalar::from(-5i32)
282        );
283
284        assert_eq!(
285            sliced.statistics().compute_min::<i32>(),
286            array.statistics().compute_min::<i32>()
287        );
288        assert_eq!(
289            sliced.statistics().compute_null_count(),
290            array.statistics().compute_null_count()
291        );
292        assert_eq!(
293            sliced.statistics().compute_is_constant(),
294            array.statistics().compute_is_constant()
295        );
296        Ok(())
297    }
298}