Skip to main content

vortex_sequence/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::ops::Add;
5
6use num_traits::CheckedAdd;
7use num_traits::CheckedSub;
8use vortex_array::ArrayRef;
9use vortex_array::ArrayView;
10use vortex_array::ExecutionCtx;
11use vortex_array::IntoArray;
12use vortex_array::arrays::Primitive;
13use vortex_array::arrays::PrimitiveArray;
14use vortex_array::dtype::NativePType;
15use vortex_array::dtype::Nullability;
16use vortex_array::match_each_integer_ptype;
17use vortex_array::match_each_native_ptype;
18use vortex_array::scalar::PValue;
19use vortex_array::validity::Validity;
20use vortex_buffer::BufferMut;
21use vortex_buffer::trusted_len::TrustedLen;
22use vortex_error::VortexResult;
23
24use crate::Sequence;
25use crate::SequenceArray;
26use crate::SequenceData;
27/// An iterator that yields `base, base + step, base + 2*step, ...` via repeated addition.
28struct SequenceIter<T> {
29    acc: T,
30    step: T,
31    remaining: usize,
32}
33
34impl<T: Copy + Add<Output = T>> Iterator for SequenceIter<T> {
35    type Item = T;
36
37    #[inline]
38    fn next(&mut self) -> Option<T> {
39        if self.remaining == 0 {
40            return None;
41        }
42        let val = self.acc;
43        self.remaining -= 1;
44        if self.remaining > 0 {
45            self.acc = self.acc + self.step;
46        }
47        Some(val)
48    }
49
50    #[inline]
51    fn size_hint(&self) -> (usize, Option<usize>) {
52        (self.remaining, Some(self.remaining))
53    }
54}
55
56// SAFETY: `size_hint` returns an exact count and `next` yields exactly that many items.
57unsafe impl<T: Copy + Add<Output = T>> TrustedLen for SequenceIter<T> {}
58
59/// Decompresses a [`SequenceArray`] into a [`PrimitiveArray`].
60#[inline]
61pub fn sequence_decompress(array: &SequenceArray) -> VortexResult<ArrayRef> {
62    fn decompress_inner<P: NativePType>(
63        base: P,
64        multiplier: P,
65        len: usize,
66        nullability: Nullability,
67    ) -> PrimitiveArray {
68        let values = BufferMut::from_trusted_len_iter(SequenceIter {
69            acc: base,
70            step: multiplier,
71            remaining: len,
72        });
73        PrimitiveArray::new(values, Validity::from(nullability))
74    }
75
76    let prim = match_each_native_ptype!(array.ptype(), |P| {
77        let base = array.base().cast::<P>()?;
78        let multiplier = array.multiplier().cast::<P>()?;
79        decompress_inner(base, multiplier, array.len(), array.dtype().nullability())
80    });
81    Ok(prim.into_array())
82}
83
84/// Encodes a primitive array into a sequence array, this is possible if:
85/// 1. The array is not empty, and contains no nulls
86/// 2. The array is not a float array. (This is due to precision issues, how it will stack well with ALP).
87/// 3. The array is representable as a sequence `A[i] = base + i * multiplier` for multiplier != 0.
88/// 4. The sequence has no deviations from the equation, this could be fixed with patches. However,
89///    we might want a different array for that since sequence provide fast access.
90pub fn sequence_encode(
91    primitive_array: ArrayView<'_, Primitive>,
92    ctx: &mut ExecutionCtx,
93) -> VortexResult<Option<ArrayRef>> {
94    if primitive_array.is_empty() {
95        // we cannot encode an empty array
96        return Ok(None);
97    }
98
99    if !primitive_array.array().all_valid(ctx)? {
100        return Ok(None);
101    }
102
103    if primitive_array.ptype().is_float() {
104        // for now, we don't handle float arrays, due to possible precision issues
105        return Ok(None);
106    }
107
108    match_each_integer_ptype!(primitive_array.ptype(), |P| {
109        encode_primitive_array(
110            primitive_array.as_slice::<P>(),
111            primitive_array.dtype().nullability(),
112        )
113    })
114}
115
116fn encode_primitive_array<P: NativePType + Into<PValue> + CheckedAdd + CheckedSub>(
117    slice: &[P],
118    nullability: Nullability,
119) -> VortexResult<Option<ArrayRef>> {
120    if slice.len() == 1 {
121        // The multiplier here can be any value, zero is chosen
122        return Sequence::try_new_typed(slice[0], P::zero(), nullability, 1)
123            .map(|a| Some(a.into_array()));
124    }
125    let base = slice[0];
126    let Some(multiplier) = slice[1].checked_sub(&base) else {
127        return Ok(None);
128    };
129
130    if multiplier == P::zero() {
131        return Ok(None);
132    }
133
134    if SequenceData::try_last(base.into(), multiplier.into(), P::PTYPE, slice.len()).is_err() {
135        // If the last value is out of range, we cannot encode
136        return Ok(None);
137    }
138
139    slice
140        .windows(2)
141        .all(|w| Some(w[1]) == w[0].checked_add(&multiplier))
142        .then_some(
143            Sequence::try_new_typed(base, multiplier, nullability, slice.len())
144                .map(|a| a.into_array()),
145        )
146        .transpose()
147}
148
149#[cfg(test)]
150mod tests {
151    #[expect(unused_imports)]
152    use itertools::Itertools;
153    use vortex_array::LEGACY_SESSION;
154    use vortex_array::VortexSessionExecute;
155    use vortex_array::arrays::PrimitiveArray;
156    use vortex_array::assert_arrays_eq;
157
158    use crate::sequence_encode;
159
160    #[test]
161    fn test_encode_array_success() {
162        let mut ctx = LEGACY_SESSION.create_execution_ctx();
163        let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
164        let encoded = sequence_encode(primitive_array.as_view(), &mut ctx).unwrap();
165        assert!(encoded.is_some());
166        let decoded = encoded
167            .unwrap()
168            .execute::<PrimitiveArray>(&mut ctx)
169            .unwrap();
170        assert_arrays_eq!(decoded, primitive_array);
171    }
172
173    #[test]
174    fn test_encode_array_1_success() {
175        let mut ctx = LEGACY_SESSION.create_execution_ctx();
176        let primitive_array = PrimitiveArray::from_iter([0]);
177        let encoded = sequence_encode(primitive_array.as_view(), &mut ctx).unwrap();
178        assert!(encoded.is_some());
179        let decoded = encoded
180            .unwrap()
181            .execute::<PrimitiveArray>(&mut ctx)
182            .unwrap();
183        assert_arrays_eq!(decoded, primitive_array);
184    }
185
186    #[test]
187    fn test_encode_array_fail() {
188        let mut ctx = LEGACY_SESSION.create_execution_ctx();
189        let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
190
191        let encoded = sequence_encode(primitive_array.as_view(), &mut ctx).unwrap();
192        assert!(encoded.is_none());
193    }
194
195    #[test]
196    fn test_encode_array_fail_oob() {
197        let mut ctx = LEGACY_SESSION.create_execution_ctx();
198        let primitive_array = PrimitiveArray::from_iter(vec![100i8; 1000]);
199
200        let encoded = sequence_encode(primitive_array.as_view(), &mut ctx).unwrap();
201        assert!(encoded.is_none());
202    }
203
204    #[test]
205    fn test_encode_all_u8_values() {
206        let mut ctx = LEGACY_SESSION.create_execution_ctx();
207        let primitive_array = PrimitiveArray::from_iter(0u8..=255);
208        let encoded = sequence_encode(primitive_array.as_view(), &mut ctx).unwrap();
209        assert!(encoded.is_some());
210        let decoded = encoded
211            .unwrap()
212            .execute::<PrimitiveArray>(&mut ctx)
213            .unwrap();
214        assert_arrays_eq!(decoded, primitive_array);
215    }
216}