Skip to main content

vortex_zigzag/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5use std::sync::Arc;
6
7use vortex_array::ArrayEq;
8use vortex_array::ArrayHash;
9use vortex_array::ArrayRef;
10use vortex_array::DynArray;
11use vortex_array::EmptyMetadata;
12use vortex_array::ExecutionCtx;
13use vortex_array::ExecutionResult;
14use vortex_array::IntoArray;
15use vortex_array::Precision;
16use vortex_array::buffer::BufferHandle;
17use vortex_array::dtype::DType;
18use vortex_array::dtype::PType;
19use vortex_array::match_each_unsigned_integer_ptype;
20use vortex_array::scalar::Scalar;
21use vortex_array::serde::ArrayChildren;
22use vortex_array::stats::ArrayStats;
23use vortex_array::stats::StatsSetRef;
24use vortex_array::vtable;
25use vortex_array::vtable::ArrayId;
26use vortex_array::vtable::OperationsVTable;
27use vortex_array::vtable::VTable;
28use vortex_array::vtable::ValidityChild;
29use vortex_array::vtable::ValidityVTableFromChild;
30use vortex_error::VortexExpect;
31use vortex_error::VortexResult;
32use vortex_error::vortex_bail;
33use vortex_error::vortex_ensure;
34use vortex_error::vortex_panic;
35use vortex_session::VortexSession;
36use zigzag::ZigZag as ExternalZigZag;
37
38use crate::compute::ZigZagEncoded;
39use crate::kernel::PARENT_KERNELS;
40use crate::rules::RULES;
41use crate::zigzag_decode;
42
43vtable!(ZigZag);
44
45impl VTable for ZigZag {
46    type Array = ZigZagArray;
47
48    type Metadata = EmptyMetadata;
49    type OperationsVTable = Self;
50    type ValidityVTable = ValidityVTableFromChild;
51
52    fn vtable(_array: &Self::Array) -> &Self {
53        &ZigZag
54    }
55
56    fn id(&self) -> ArrayId {
57        Self::ID
58    }
59
60    fn len(array: &ZigZagArray) -> usize {
61        array.encoded.len()
62    }
63
64    fn dtype(array: &ZigZagArray) -> &DType {
65        &array.dtype
66    }
67
68    fn stats(array: &ZigZagArray) -> StatsSetRef<'_> {
69        array.stats_set.to_ref(array.as_ref())
70    }
71
72    fn array_hash<H: std::hash::Hasher>(array: &ZigZagArray, state: &mut H, precision: Precision) {
73        array.dtype.hash(state);
74        array.encoded.array_hash(state, precision);
75    }
76
77    fn array_eq(array: &ZigZagArray, other: &ZigZagArray, precision: Precision) -> bool {
78        array.dtype == other.dtype && array.encoded.array_eq(&other.encoded, precision)
79    }
80
81    fn nbuffers(_array: &ZigZagArray) -> usize {
82        0
83    }
84
85    fn buffer(_array: &ZigZagArray, idx: usize) -> BufferHandle {
86        vortex_panic!("ZigZagArray buffer index {idx} out of bounds")
87    }
88
89    fn buffer_name(_array: &ZigZagArray, idx: usize) -> Option<String> {
90        vortex_panic!("ZigZagArray buffer_name index {idx} out of bounds")
91    }
92
93    fn nchildren(_array: &ZigZagArray) -> usize {
94        1
95    }
96
97    fn child(array: &ZigZagArray, idx: usize) -> ArrayRef {
98        match idx {
99            0 => array.encoded().clone(),
100            _ => vortex_panic!("ZigZagArray child index {idx} out of bounds"),
101        }
102    }
103
104    fn child_name(_array: &ZigZagArray, idx: usize) -> String {
105        match idx {
106            0 => "encoded".to_string(),
107            _ => vortex_panic!("ZigZagArray child_name index {idx} out of bounds"),
108        }
109    }
110
111    fn metadata(_array: &ZigZagArray) -> VortexResult<Self::Metadata> {
112        Ok(EmptyMetadata)
113    }
114
115    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
116        Ok(Some(vec![]))
117    }
118
119    fn deserialize(
120        _bytes: &[u8],
121        _dtype: &DType,
122        _len: usize,
123        _buffers: &[BufferHandle],
124        _session: &VortexSession,
125    ) -> VortexResult<Self::Metadata> {
126        Ok(EmptyMetadata)
127    }
128
129    fn build(
130        dtype: &DType,
131        len: usize,
132        _metadata: &Self::Metadata,
133        _buffers: &[BufferHandle],
134        children: &dyn ArrayChildren,
135    ) -> VortexResult<ZigZagArray> {
136        if children.len() != 1 {
137            vortex_bail!("Expected 1 child, got {}", children.len());
138        }
139
140        let ptype = PType::try_from(dtype)?;
141        let encoded_type = DType::Primitive(ptype.to_unsigned(), dtype.nullability());
142
143        let encoded = children.get(0, &encoded_type, len)?;
144        ZigZagArray::try_new(encoded)
145    }
146
147    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
148        vortex_ensure!(
149            children.len() == 1,
150            "ZigZagArray expects exactly 1 child (encoded), got {}",
151            children.len()
152        );
153        array.encoded = children.into_iter().next().vortex_expect("checked");
154        Ok(())
155    }
156
157    fn execute(array: Arc<Self::Array>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
158        Ok(ExecutionResult::done(
159            zigzag_decode(array.encoded().clone().execute(ctx)?).into_array(),
160        ))
161    }
162
163    fn reduce_parent(
164        array: &Self::Array,
165        parent: &ArrayRef,
166        child_idx: usize,
167    ) -> VortexResult<Option<ArrayRef>> {
168        RULES.evaluate(array, parent, child_idx)
169    }
170
171    fn execute_parent(
172        array: &Self::Array,
173        parent: &ArrayRef,
174        child_idx: usize,
175        ctx: &mut ExecutionCtx,
176    ) -> VortexResult<Option<ArrayRef>> {
177        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
178    }
179}
180
181#[derive(Clone, Debug)]
182pub struct ZigZagArray {
183    dtype: DType,
184    encoded: ArrayRef,
185    stats_set: ArrayStats,
186}
187
188#[derive(Clone, Debug)]
189pub struct ZigZag;
190
191impl ZigZag {
192    pub const ID: ArrayId = ArrayId::new_ref("vortex.zigzag");
193}
194
195impl ZigZagArray {
196    pub fn new(encoded: ArrayRef) -> Self {
197        Self::try_new(encoded).vortex_expect("ZigZagArray new")
198    }
199
200    pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
201        let encoded_dtype = encoded.dtype().clone();
202        if !encoded_dtype.is_unsigned_int() {
203            vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype);
204        }
205
206        let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed())
207            .with_nullability(encoded_dtype.nullability());
208
209        Ok(Self {
210            dtype,
211            encoded,
212            stats_set: Default::default(),
213        })
214    }
215
216    pub fn ptype(&self) -> PType {
217        self.dtype().as_ptype()
218    }
219
220    pub fn encoded(&self) -> &ArrayRef {
221        &self.encoded
222    }
223}
224
225impl OperationsVTable<ZigZag> for ZigZag {
226    fn scalar_at(array: &ZigZagArray, index: usize) -> VortexResult<Scalar> {
227        let scalar = array.encoded().scalar_at(index)?;
228        if scalar.is_null() {
229            return scalar.primitive_reinterpret_cast(array.ptype());
230        }
231
232        let pscalar = scalar.as_primitive();
233        Ok(match_each_unsigned_integer_ptype!(pscalar.ptype(), |P| {
234            Scalar::primitive(
235                <<P as ZigZagEncoded>::Int>::decode(
236                    pscalar
237                        .typed_value::<P>()
238                        .vortex_expect("zigzag corruption"),
239                ),
240                array.dtype().nullability(),
241            )
242        }))
243    }
244}
245
246impl ValidityChild<ZigZag> for ZigZag {
247    fn validity_child(array: &ZigZagArray) -> &ArrayRef {
248        array.encoded()
249    }
250}
251
252#[cfg(test)]
253mod test {
254    use vortex_array::IntoArray;
255    use vortex_array::ToCanonical;
256    use vortex_array::scalar::Scalar;
257    use vortex_buffer::buffer;
258
259    use super::*;
260    use crate::zigzag_encode;
261
262    #[test]
263    fn test_compute_statistics() -> VortexResult<()> {
264        let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10]
265            .into_array()
266            .to_primitive();
267        let zigzag = zigzag_encode(array.clone())?;
268
269        assert_eq!(
270            zigzag.statistics().compute_max::<i32>(),
271            array.statistics().compute_max::<i32>()
272        );
273        assert_eq!(
274            zigzag.statistics().compute_null_count(),
275            array.statistics().compute_null_count()
276        );
277        assert_eq!(
278            zigzag.statistics().compute_is_constant(),
279            array.statistics().compute_is_constant()
280        );
281
282        let sliced = zigzag.slice(0..2).unwrap();
283        let sliced = sliced.as_::<ZigZag>();
284        assert_eq!(
285            sliced.scalar_at(sliced.len() - 1).unwrap(),
286            Scalar::from(-5i32)
287        );
288
289        assert_eq!(
290            sliced.statistics().compute_min::<i32>(),
291            array.statistics().compute_min::<i32>()
292        );
293        assert_eq!(
294            sliced.statistics().compute_null_count(),
295            array.statistics().compute_null_count()
296        );
297        assert_eq!(
298            sliced.statistics().compute_is_constant(),
299            array.statistics().compute_is_constant()
300        );
301        Ok(())
302    }
303}