Skip to main content

vortex_zigzag/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5
6use vortex_array::Array;
7use vortex_array::ArrayEq;
8use vortex_array::ArrayHash;
9use vortex_array::ArrayRef;
10use vortex_array::EmptyMetadata;
11use vortex_array::ExecutionCtx;
12use vortex_array::IntoArray;
13use vortex_array::Precision;
14use vortex_array::buffer::BufferHandle;
15use vortex_array::dtype::DType;
16use vortex_array::dtype::PType;
17use vortex_array::match_each_unsigned_integer_ptype;
18use vortex_array::scalar::Scalar;
19use vortex_array::serde::ArrayChildren;
20use vortex_array::stats::ArrayStats;
21use vortex_array::stats::StatsSetRef;
22use vortex_array::vtable;
23use vortex_array::vtable::ArrayId;
24use vortex_array::vtable::OperationsVTable;
25use vortex_array::vtable::VTable;
26use vortex_array::vtable::ValidityChild;
27use vortex_array::vtable::ValidityVTableFromChild;
28use vortex_error::VortexExpect;
29use vortex_error::VortexResult;
30use vortex_error::vortex_bail;
31use vortex_error::vortex_ensure;
32use vortex_error::vortex_panic;
33use vortex_session::VortexSession;
34use zigzag::ZigZag as ExternalZigZag;
35
36use crate::compute::ZigZagEncoded;
37use crate::kernel::PARENT_KERNELS;
38use crate::rules::RULES;
39use crate::zigzag_decode;
40
41vtable!(ZigZag);
42
43impl VTable for ZigZagVTable {
44    type Array = ZigZagArray;
45
46    type Metadata = EmptyMetadata;
47    type OperationsVTable = Self;
48    type ValidityVTable = ValidityVTableFromChild;
49
50    fn id(_array: &Self::Array) -> ArrayId {
51        Self::ID
52    }
53
54    fn len(array: &ZigZagArray) -> usize {
55        array.encoded.len()
56    }
57
58    fn dtype(array: &ZigZagArray) -> &DType {
59        &array.dtype
60    }
61
62    fn stats(array: &ZigZagArray) -> StatsSetRef<'_> {
63        array.stats_set.to_ref(array.as_ref())
64    }
65
66    fn array_hash<H: std::hash::Hasher>(array: &ZigZagArray, state: &mut H, precision: Precision) {
67        array.dtype.hash(state);
68        array.encoded.array_hash(state, precision);
69    }
70
71    fn array_eq(array: &ZigZagArray, other: &ZigZagArray, precision: Precision) -> bool {
72        array.dtype == other.dtype && array.encoded.array_eq(&other.encoded, precision)
73    }
74
75    fn nbuffers(_array: &ZigZagArray) -> usize {
76        0
77    }
78
79    fn buffer(_array: &ZigZagArray, idx: usize) -> BufferHandle {
80        vortex_panic!("ZigZagArray buffer index {idx} out of bounds")
81    }
82
83    fn buffer_name(_array: &ZigZagArray, idx: usize) -> Option<String> {
84        vortex_panic!("ZigZagArray buffer_name index {idx} out of bounds")
85    }
86
87    fn nchildren(_array: &ZigZagArray) -> usize {
88        1
89    }
90
91    fn child(array: &ZigZagArray, idx: usize) -> ArrayRef {
92        match idx {
93            0 => array.encoded().clone(),
94            _ => vortex_panic!("ZigZagArray child index {idx} out of bounds"),
95        }
96    }
97
98    fn child_name(_array: &ZigZagArray, idx: usize) -> String {
99        match idx {
100            0 => "encoded".to_string(),
101            _ => vortex_panic!("ZigZagArray child_name index {idx} out of bounds"),
102        }
103    }
104
105    fn metadata(_array: &ZigZagArray) -> VortexResult<Self::Metadata> {
106        Ok(EmptyMetadata)
107    }
108
109    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
110        Ok(Some(vec![]))
111    }
112
113    fn deserialize(
114        _bytes: &[u8],
115        _dtype: &DType,
116        _len: usize,
117        _buffers: &[BufferHandle],
118        _session: &VortexSession,
119    ) -> VortexResult<Self::Metadata> {
120        Ok(EmptyMetadata)
121    }
122
123    fn build(
124        dtype: &DType,
125        len: usize,
126        _metadata: &Self::Metadata,
127        _buffers: &[BufferHandle],
128        children: &dyn ArrayChildren,
129    ) -> VortexResult<ZigZagArray> {
130        if children.len() != 1 {
131            vortex_bail!("Expected 1 child, got {}", children.len());
132        }
133
134        let ptype = PType::try_from(dtype)?;
135        let encoded_type = DType::Primitive(ptype.to_unsigned(), dtype.nullability());
136
137        let encoded = children.get(0, &encoded_type, len)?;
138        ZigZagArray::try_new(encoded)
139    }
140
141    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
142        vortex_ensure!(
143            children.len() == 1,
144            "ZigZagArray expects exactly 1 child (encoded), got {}",
145            children.len()
146        );
147        array.encoded = children.into_iter().next().vortex_expect("checked");
148        Ok(())
149    }
150
151    fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
152        Ok(zigzag_decode(array.encoded().clone().execute(ctx)?).into_array())
153    }
154
155    fn reduce_parent(
156        array: &Self::Array,
157        parent: &ArrayRef,
158        child_idx: usize,
159    ) -> VortexResult<Option<ArrayRef>> {
160        RULES.evaluate(array, parent, child_idx)
161    }
162
163    fn execute_parent(
164        array: &Self::Array,
165        parent: &ArrayRef,
166        child_idx: usize,
167        ctx: &mut ExecutionCtx,
168    ) -> VortexResult<Option<ArrayRef>> {
169        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
170    }
171}
172
173#[derive(Clone, Debug)]
174pub struct ZigZagArray {
175    dtype: DType,
176    encoded: ArrayRef,
177    stats_set: ArrayStats,
178}
179
180#[derive(Debug)]
181pub struct ZigZagVTable;
182
183impl ZigZagVTable {
184    pub const ID: ArrayId = ArrayId::new_ref("vortex.zigzag");
185}
186
187impl ZigZagArray {
188    pub fn new(encoded: ArrayRef) -> Self {
189        Self::try_new(encoded).vortex_expect("ZigZagArray new")
190    }
191
192    pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
193        let encoded_dtype = encoded.dtype().clone();
194        if !encoded_dtype.is_unsigned_int() {
195            vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype);
196        }
197
198        let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed())
199            .with_nullability(encoded_dtype.nullability());
200
201        Ok(Self {
202            dtype,
203            encoded,
204            stats_set: Default::default(),
205        })
206    }
207
208    pub fn ptype(&self) -> PType {
209        self.dtype().as_ptype()
210    }
211
212    pub fn encoded(&self) -> &ArrayRef {
213        &self.encoded
214    }
215}
216
217impl OperationsVTable<ZigZagVTable> for ZigZagVTable {
218    fn scalar_at(array: &ZigZagArray, index: usize) -> VortexResult<Scalar> {
219        let scalar = array.encoded().scalar_at(index)?;
220        if scalar.is_null() {
221            return scalar.primitive_reinterpret_cast(array.ptype());
222        }
223
224        let pscalar = scalar.as_primitive();
225        Ok(match_each_unsigned_integer_ptype!(pscalar.ptype(), |P| {
226            Scalar::primitive(
227                <<P as ZigZagEncoded>::Int>::decode(
228                    pscalar
229                        .typed_value::<P>()
230                        .vortex_expect("zigzag corruption"),
231                ),
232                array.dtype().nullability(),
233            )
234        }))
235    }
236}
237
238impl ValidityChild<ZigZagVTable> for ZigZagVTable {
239    fn validity_child(array: &ZigZagArray) -> &ArrayRef {
240        array.encoded()
241    }
242}
243
244#[cfg(test)]
245mod test {
246    use vortex_array::IntoArray;
247    use vortex_array::ToCanonical;
248    use vortex_array::scalar::Scalar;
249    use vortex_buffer::buffer;
250
251    use super::*;
252    use crate::zigzag_encode;
253
254    #[test]
255    fn test_compute_statistics() -> VortexResult<()> {
256        let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10]
257            .into_array()
258            .to_primitive();
259        let zigzag = zigzag_encode(array.clone())?;
260
261        assert_eq!(
262            zigzag.statistics().compute_max::<i32>(),
263            array.statistics().compute_max::<i32>()
264        );
265        assert_eq!(
266            zigzag.statistics().compute_null_count(),
267            array.statistics().compute_null_count()
268        );
269        assert_eq!(
270            zigzag.statistics().compute_is_constant(),
271            array.statistics().compute_is_constant()
272        );
273
274        let sliced = zigzag.slice(0..2).unwrap();
275        let sliced = sliced.as_::<ZigZagVTable>();
276        assert_eq!(
277            sliced.scalar_at(sliced.len() - 1).unwrap(),
278            Scalar::from(-5i32)
279        );
280
281        assert_eq!(
282            sliced.statistics().compute_min::<i32>(),
283            array.statistics().compute_min::<i32>()
284        );
285        assert_eq!(
286            sliced.statistics().compute_null_count(),
287            array.statistics().compute_null_count()
288        );
289        assert_eq!(
290            sliced.statistics().compute_is_constant(),
291            array.statistics().compute_is_constant()
292        );
293        Ok(())
294    }
295}