Skip to main content

vortex_sequence/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::ops::Add;
5
6use num_traits::CheckedAdd;
7use num_traits::CheckedSub;
8use vortex_array::ArrayRef;
9use vortex_array::IntoArray;
10use vortex_array::arrays::PrimitiveArray;
11use vortex_array::dtype::NativePType;
12use vortex_array::dtype::Nullability;
13use vortex_array::match_each_integer_ptype;
14use vortex_array::match_each_native_ptype;
15use vortex_array::scalar::PValue;
16use vortex_array::validity::Validity;
17use vortex_buffer::BufferMut;
18use vortex_buffer::trusted_len::TrustedLen;
19use vortex_error::VortexResult;
20
21use crate::Sequence;
22use crate::SequenceArray;
23use crate::SequenceData;
24/// An iterator that yields `base, base + step, base + 2*step, ...` via repeated addition.
25struct SequenceIter<T> {
26    acc: T,
27    step: T,
28    remaining: usize,
29}
30
31impl<T: Copy + Add<Output = T>> Iterator for SequenceIter<T> {
32    type Item = T;
33
34    #[inline]
35    fn next(&mut self) -> Option<T> {
36        if self.remaining == 0 {
37            return None;
38        }
39        let val = self.acc;
40        self.remaining -= 1;
41        if self.remaining > 0 {
42            self.acc = self.acc + self.step;
43        }
44        Some(val)
45    }
46
47    #[inline]
48    fn size_hint(&self) -> (usize, Option<usize>) {
49        (self.remaining, Some(self.remaining))
50    }
51}
52
53// SAFETY: `size_hint` returns an exact count and `next` yields exactly that many items.
54unsafe impl<T: Copy + Add<Output = T>> TrustedLen for SequenceIter<T> {}
55
56/// Decompresses a [`SequenceArray`] into a [`PrimitiveArray`].
57#[inline]
58pub fn sequence_decompress(array: &SequenceArray) -> VortexResult<ArrayRef> {
59    fn decompress_inner<P: NativePType>(
60        base: P,
61        multiplier: P,
62        len: usize,
63        nullability: Nullability,
64    ) -> PrimitiveArray {
65        let values = BufferMut::from_trusted_len_iter(SequenceIter {
66            acc: base,
67            step: multiplier,
68            remaining: len,
69        });
70        PrimitiveArray::new(values, Validity::from(nullability))
71    }
72
73    let prim = match_each_native_ptype!(array.ptype(), |P| {
74        let base = array.base().cast::<P>()?;
75        let multiplier = array.multiplier().cast::<P>()?;
76        decompress_inner(base, multiplier, array.len(), array.dtype().nullability())
77    });
78    Ok(prim.into_array())
79}
80
81/// Encodes a primitive array into a sequence array, this is possible if:
82/// 1. The array is not empty, and contains no nulls
83/// 2. The array is not a float array. (This is due to precision issues, how it will stack well with ALP).
84/// 3. The array is representable as a sequence `A[i] = base + i * multiplier` for multiplier != 0.
85/// 4. The sequence has no deviations from the equation, this could be fixed with patches. However,
86///    we might want a different array for that since sequence provide fast access.
87pub fn sequence_encode(primitive_array: &PrimitiveArray) -> VortexResult<Option<ArrayRef>> {
88    if primitive_array.is_empty() {
89        // we cannot encode an empty array
90        return Ok(None);
91    }
92
93    if !primitive_array.all_valid()? {
94        return Ok(None);
95    }
96
97    if primitive_array.ptype().is_float() {
98        // for now, we don't handle float arrays, due to possible precision issues
99        return Ok(None);
100    }
101
102    match_each_integer_ptype!(primitive_array.ptype(), |P| {
103        encode_primitive_array(
104            primitive_array.as_slice::<P>(),
105            primitive_array.dtype().nullability(),
106        )
107    })
108}
109
110fn encode_primitive_array<P: NativePType + Into<PValue> + CheckedAdd + CheckedSub>(
111    slice: &[P],
112    nullability: Nullability,
113) -> VortexResult<Option<ArrayRef>> {
114    if slice.len() == 1 {
115        // The multiplier here can be any value, zero is chosen
116        return Sequence::try_new_typed(slice[0], P::zero(), nullability, 1)
117            .map(|a| Some(a.into_array()));
118    }
119    let base = slice[0];
120    let Some(multiplier) = slice[1].checked_sub(&base) else {
121        return Ok(None);
122    };
123
124    if multiplier == P::zero() {
125        return Ok(None);
126    }
127
128    if SequenceData::try_last(base.into(), multiplier.into(), P::PTYPE, slice.len()).is_err() {
129        // If the last value is out of range, we cannot encode
130        return Ok(None);
131    }
132
133    slice
134        .windows(2)
135        .all(|w| Some(w[1]) == w[0].checked_add(&multiplier))
136        .then_some(
137            Sequence::try_new_typed(base, multiplier, nullability, slice.len())
138                .map(|a| a.into_array()),
139        )
140        .transpose()
141}
142
143#[cfg(test)]
144mod tests {
145    #[allow(unused_imports)]
146    use itertools::Itertools;
147    use vortex_array::ToCanonical;
148    use vortex_array::arrays::PrimitiveArray;
149    use vortex_array::assert_arrays_eq;
150
151    use crate::sequence_encode;
152
153    #[test]
154    fn test_encode_array_success() {
155        let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
156        let encoded = sequence_encode(&primitive_array).unwrap();
157        assert!(encoded.is_some());
158        let decoded = encoded.unwrap().to_primitive();
159        assert_arrays_eq!(decoded, primitive_array);
160    }
161
162    #[test]
163    fn test_encode_array_1_success() {
164        let primitive_array = PrimitiveArray::from_iter([0]);
165        let encoded = sequence_encode(&primitive_array).unwrap();
166        assert!(encoded.is_some());
167        let decoded = encoded.unwrap().to_primitive();
168        assert_arrays_eq!(decoded, primitive_array);
169    }
170
171    #[test]
172    fn test_encode_array_fail() {
173        let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
174
175        let encoded = sequence_encode(&primitive_array).unwrap();
176        assert!(encoded.is_none());
177    }
178
179    #[test]
180    fn test_encode_array_fail_oob() {
181        let primitive_array = PrimitiveArray::from_iter(vec![100i8; 1000]);
182
183        let encoded = sequence_encode(&primitive_array).unwrap();
184        assert!(encoded.is_none());
185    }
186
187    #[test]
188    fn test_encode_all_u8_values() {
189        let primitive_array = PrimitiveArray::from_iter(0u8..=255);
190        let encoded = sequence_encode(&primitive_array).unwrap();
191        assert!(encoded.is_some());
192        let decoded = encoded.unwrap().to_primitive();
193        assert_arrays_eq!(decoded, primitive_array);
194    }
195}