Skip to main content

vortex_sequence/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::ops::Add;
5
6use num_traits::CheckedAdd;
7use num_traits::CheckedSub;
8use vortex_array::ArrayRef;
9use vortex_array::ArrayView;
10use vortex_array::IntoArray;
11use vortex_array::LEGACY_SESSION;
12use vortex_array::VortexSessionExecute;
13use vortex_array::arrays::Primitive;
14use vortex_array::arrays::PrimitiveArray;
15use vortex_array::dtype::NativePType;
16use vortex_array::dtype::Nullability;
17use vortex_array::match_each_integer_ptype;
18use vortex_array::match_each_native_ptype;
19use vortex_array::scalar::PValue;
20use vortex_array::validity::Validity;
21use vortex_buffer::BufferMut;
22use vortex_buffer::trusted_len::TrustedLen;
23use vortex_error::VortexResult;
24
25use crate::Sequence;
26use crate::SequenceArray;
27use crate::SequenceData;
28/// An iterator that yields `base, base + step, base + 2*step, ...` via repeated addition.
29struct SequenceIter<T> {
30    acc: T,
31    step: T,
32    remaining: usize,
33}
34
35impl<T: Copy + Add<Output = T>> Iterator for SequenceIter<T> {
36    type Item = T;
37
38    #[inline]
39    fn next(&mut self) -> Option<T> {
40        if self.remaining == 0 {
41            return None;
42        }
43        let val = self.acc;
44        self.remaining -= 1;
45        if self.remaining > 0 {
46            self.acc = self.acc + self.step;
47        }
48        Some(val)
49    }
50
51    #[inline]
52    fn size_hint(&self) -> (usize, Option<usize>) {
53        (self.remaining, Some(self.remaining))
54    }
55}
56
57// SAFETY: `size_hint` returns an exact count and `next` yields exactly that many items.
58unsafe impl<T: Copy + Add<Output = T>> TrustedLen for SequenceIter<T> {}
59
60/// Decompresses a [`SequenceArray`] into a [`PrimitiveArray`].
61#[inline]
62pub fn sequence_decompress(array: &SequenceArray) -> VortexResult<ArrayRef> {
63    fn decompress_inner<P: NativePType>(
64        base: P,
65        multiplier: P,
66        len: usize,
67        nullability: Nullability,
68    ) -> PrimitiveArray {
69        let values = BufferMut::from_trusted_len_iter(SequenceIter {
70            acc: base,
71            step: multiplier,
72            remaining: len,
73        });
74        PrimitiveArray::new(values, Validity::from(nullability))
75    }
76
77    let prim = match_each_native_ptype!(array.ptype(), |P| {
78        let base = array.base().cast::<P>()?;
79        let multiplier = array.multiplier().cast::<P>()?;
80        decompress_inner(base, multiplier, array.len(), array.dtype().nullability())
81    });
82    Ok(prim.into_array())
83}
84
85/// Encodes a primitive array into a sequence array, this is possible if:
86/// 1. The array is not empty, and contains no nulls
87/// 2. The array is not a float array. (This is due to precision issues, how it will stack well with ALP).
88/// 3. The array is representable as a sequence `A[i] = base + i * multiplier` for multiplier != 0.
89/// 4. The sequence has no deviations from the equation, this could be fixed with patches. However,
90///    we might want a different array for that since sequence provide fast access.
91pub fn sequence_encode(
92    primitive_array: ArrayView<'_, Primitive>,
93) -> VortexResult<Option<ArrayRef>> {
94    if primitive_array.is_empty() {
95        // we cannot encode an empty array
96        return Ok(None);
97    }
98
99    if !primitive_array
100        .array()
101        .all_valid(&mut LEGACY_SESSION.create_execution_ctx())?
102    {
103        return Ok(None);
104    }
105
106    if primitive_array.ptype().is_float() {
107        // for now, we don't handle float arrays, due to possible precision issues
108        return Ok(None);
109    }
110
111    match_each_integer_ptype!(primitive_array.ptype(), |P| {
112        encode_primitive_array(
113            primitive_array.as_slice::<P>(),
114            primitive_array.dtype().nullability(),
115        )
116    })
117}
118
119fn encode_primitive_array<P: NativePType + Into<PValue> + CheckedAdd + CheckedSub>(
120    slice: &[P],
121    nullability: Nullability,
122) -> VortexResult<Option<ArrayRef>> {
123    if slice.len() == 1 {
124        // The multiplier here can be any value, zero is chosen
125        return Sequence::try_new_typed(slice[0], P::zero(), nullability, 1)
126            .map(|a| Some(a.into_array()));
127    }
128    let base = slice[0];
129    let Some(multiplier) = slice[1].checked_sub(&base) else {
130        return Ok(None);
131    };
132
133    if multiplier == P::zero() {
134        return Ok(None);
135    }
136
137    if SequenceData::try_last(base.into(), multiplier.into(), P::PTYPE, slice.len()).is_err() {
138        // If the last value is out of range, we cannot encode
139        return Ok(None);
140    }
141
142    slice
143        .windows(2)
144        .all(|w| Some(w[1]) == w[0].checked_add(&multiplier))
145        .then_some(
146            Sequence::try_new_typed(base, multiplier, nullability, slice.len())
147                .map(|a| a.into_array()),
148        )
149        .transpose()
150}
151
152#[cfg(test)]
153mod tests {
154    #[expect(unused_imports)]
155    use itertools::Itertools;
156    use vortex_array::ToCanonical;
157    use vortex_array::arrays::PrimitiveArray;
158    use vortex_array::assert_arrays_eq;
159
160    use crate::sequence_encode;
161
162    #[test]
163    fn test_encode_array_success() {
164        let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
165        let encoded = sequence_encode(primitive_array.as_view()).unwrap();
166        assert!(encoded.is_some());
167        let decoded = encoded.unwrap().to_primitive();
168        assert_arrays_eq!(decoded, primitive_array);
169    }
170
171    #[test]
172    fn test_encode_array_1_success() {
173        let primitive_array = PrimitiveArray::from_iter([0]);
174        let encoded = sequence_encode(primitive_array.as_view()).unwrap();
175        assert!(encoded.is_some());
176        let decoded = encoded.unwrap().to_primitive();
177        assert_arrays_eq!(decoded, primitive_array);
178    }
179
180    #[test]
181    fn test_encode_array_fail() {
182        let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
183
184        let encoded = sequence_encode(primitive_array.as_view()).unwrap();
185        assert!(encoded.is_none());
186    }
187
188    #[test]
189    fn test_encode_array_fail_oob() {
190        let primitive_array = PrimitiveArray::from_iter(vec![100i8; 1000]);
191
192        let encoded = sequence_encode(primitive_array.as_view()).unwrap();
193        assert!(encoded.is_none());
194    }
195
196    #[test]
197    fn test_encode_all_u8_values() {
198        let primitive_array = PrimitiveArray::from_iter(0u8..=255);
199        let encoded = sequence_encode(primitive_array.as_view()).unwrap();
200        assert!(encoded.is_some());
201        let decoded = encoded.unwrap().to_primitive();
202        assert_arrays_eq!(decoded, primitive_array);
203    }
204}