vortex_zigzag/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::ops::Range;
5
6use vortex_array::stats::{ArrayStats, StatsSetRef};
7use vortex_array::vtable::{
8    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityChild,
9    ValidityVTableFromChild,
10};
11use vortex_array::{
12    Array, ArrayRef, Canonical, EncodingId, EncodingRef, IntoArray, ToCanonical, vtable,
13};
14use vortex_dtype::{DType, PType, match_each_unsigned_integer_ptype};
15use vortex_error::{VortexExpect, VortexResult, vortex_bail};
16use vortex_scalar::Scalar;
17use zigzag::ZigZag as ExternalZigZag;
18
19use crate::compute::ZigZagEncoded;
20use crate::zigzag_decode;
21
22vtable!(ZigZag);
23
24impl VTable for ZigZagVTable {
25    type Array = ZigZagArray;
26    type Encoding = ZigZagEncoding;
27
28    type ArrayVTable = Self;
29    type CanonicalVTable = Self;
30    type OperationsVTable = Self;
31    type ValidityVTable = ValidityVTableFromChild;
32    type VisitorVTable = Self;
33    type ComputeVTable = NotSupported;
34    type EncodeVTable = Self;
35    type SerdeVTable = Self;
36    type PipelineVTable = NotSupported;
37
38    fn id(_encoding: &Self::Encoding) -> EncodingId {
39        EncodingId::new_ref("vortex.zigzag")
40    }
41
42    fn encoding(_array: &Self::Array) -> EncodingRef {
43        EncodingRef::new_ref(ZigZagEncoding.as_ref())
44    }
45}
46
47#[derive(Clone, Debug)]
48pub struct ZigZagArray {
49    dtype: DType,
50    encoded: ArrayRef,
51    stats_set: ArrayStats,
52}
53
54#[derive(Clone, Debug)]
55pub struct ZigZagEncoding;
56
57impl ZigZagArray {
58    pub fn new(encoded: ArrayRef) -> Self {
59        Self::try_new(encoded).vortex_expect("ZigZigArray new")
60    }
61
62    pub fn try_new(encoded: ArrayRef) -> VortexResult<Self> {
63        let encoded_dtype = encoded.dtype().clone();
64        if !encoded_dtype.is_unsigned_int() {
65            vortex_bail!(MismatchedTypes: "unsigned int", encoded_dtype);
66        }
67
68        let dtype = DType::from(PType::try_from(&encoded_dtype)?.to_signed())
69            .with_nullability(encoded_dtype.nullability());
70
71        Ok(Self {
72            dtype,
73            encoded,
74            stats_set: Default::default(),
75        })
76    }
77
78    pub fn ptype(&self) -> PType {
79        self.dtype().as_ptype()
80    }
81
82    pub fn encoded(&self) -> &ArrayRef {
83        &self.encoded
84    }
85}
86
87impl ArrayVTable<ZigZagVTable> for ZigZagVTable {
88    fn len(array: &ZigZagArray) -> usize {
89        array.encoded.len()
90    }
91
92    fn dtype(array: &ZigZagArray) -> &DType {
93        &array.dtype
94    }
95
96    fn stats(array: &ZigZagArray) -> StatsSetRef<'_> {
97        array.stats_set.to_ref(array.as_ref())
98    }
99}
100
101impl CanonicalVTable<ZigZagVTable> for ZigZagVTable {
102    fn canonicalize(array: &ZigZagArray) -> Canonical {
103        Canonical::Primitive(zigzag_decode(array.encoded().to_primitive()))
104    }
105}
106
107impl OperationsVTable<ZigZagVTable> for ZigZagVTable {
108    fn slice(array: &ZigZagArray, range: Range<usize>) -> ArrayRef {
109        ZigZagArray::new(array.encoded().slice(range)).into_array()
110    }
111
112    fn scalar_at(array: &ZigZagArray, index: usize) -> Scalar {
113        let scalar = array.encoded().scalar_at(index);
114        if scalar.is_null() {
115            return scalar.reinterpret_cast(array.ptype());
116        }
117
118        let pscalar = scalar.as_primitive();
119        match_each_unsigned_integer_ptype!(pscalar.ptype(), |P| {
120            Scalar::primitive(
121                <<P as ZigZagEncoded>::Int>::decode(
122                    pscalar
123                        .typed_value::<P>()
124                        .vortex_expect("zigzag corruption"),
125                ),
126                array.dtype().nullability(),
127            )
128        })
129    }
130}
131
132impl ValidityChild<ZigZagVTable> for ZigZagVTable {
133    fn validity_child(array: &ZigZagArray) -> &dyn Array {
134        array.encoded()
135    }
136}
137
138#[cfg(test)]
139mod test {
140    use vortex_array::IntoArray;
141    use vortex_buffer::buffer;
142    use vortex_scalar::Scalar;
143
144    use super::*;
145
146    #[test]
147    fn test_compute_statistics() {
148        let array = buffer![1i32, -5i32, 2, 3, 4, 5, 6, 7, 8, 9, 10].into_array();
149        let canonical = array.to_canonical();
150        let zigzag = ZigZagEncoding.encode(&canonical, None).unwrap().unwrap();
151
152        assert_eq!(
153            zigzag.statistics().compute_max::<i32>(),
154            array.statistics().compute_max::<i32>()
155        );
156        assert_eq!(
157            zigzag.statistics().compute_null_count(),
158            array.statistics().compute_null_count()
159        );
160        assert_eq!(
161            zigzag.statistics().compute_is_constant(),
162            array.statistics().compute_is_constant()
163        );
164
165        let sliced = zigzag.slice(0..2);
166        let sliced = sliced.as_::<ZigZagVTable>();
167        assert_eq!(sliced.scalar_at(sliced.len() - 1), Scalar::from(-5i32));
168
169        assert_eq!(
170            sliced.statistics().compute_min::<i32>(),
171            array.statistics().compute_min::<i32>()
172        );
173        assert_eq!(
174            sliced.statistics().compute_null_count(),
175            array.statistics().compute_null_count()
176        );
177        assert_eq!(
178            sliced.statistics().compute_is_constant(),
179            array.statistics().compute_is_constant()
180        );
181    }
182}