vortex_sequence/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::CheckedAdd;
5use num_traits::CheckedSub;
6use vortex_array::ArrayRef;
7use vortex_array::arrays::PrimitiveArray;
8use vortex_dtype::NativePType;
9use vortex_dtype::Nullability;
10use vortex_dtype::match_each_integer_ptype;
11use vortex_error::VortexResult;
12use vortex_scalar::PValue;
13
14use crate::SequenceArray;
15
16/// Encodes a primitive array into a sequence array, this is possible if:
17/// 1. The array is not empty, and contains no nulls
18/// 2. The array is not a float array. (This is due to precision issues, how it will stack well with ALP).
19/// 3. The array is representable as a sequence `A[i] = base + i * multiplier` for multiplier != 0.
20/// 4. The sequence has no deviations from the equation, this could be fixed with patches. However,
21///    we might want a different array for that since sequence provide fast access.
22pub fn sequence_encode(primitive_array: &PrimitiveArray) -> VortexResult<Option<ArrayRef>> {
23    if primitive_array.is_empty() {
24        // we cannot encode an empty array
25        return Ok(None);
26    }
27
28    if !primitive_array.all_valid() {
29        return Ok(None);
30    }
31
32    if primitive_array.ptype().is_float() {
33        // for now, we don't handle float arrays, due to possible precision issues
34        return Ok(None);
35    }
36
37    match_each_integer_ptype!(primitive_array.ptype(), |P| {
38        encode_primitive_array(
39            primitive_array.as_slice::<P>(),
40            primitive_array.dtype().nullability(),
41        )
42    })
43}
44
45fn encode_primitive_array<P: NativePType + Into<PValue> + CheckedAdd + CheckedSub>(
46    slice: &[P],
47    nullability: Nullability,
48) -> VortexResult<Option<ArrayRef>> {
49    if slice.len() == 1 {
50        // The multiplier here can be any value, zero is chosen
51        return SequenceArray::typed_new(slice[0], P::zero(), nullability, 1)
52            .map(|a| Some(a.to_array()));
53    }
54    let base = slice[0];
55    let Some(multiplier) = slice[1].checked_sub(&base) else {
56        return Ok(None);
57    };
58
59    if multiplier == P::zero() {
60        return Ok(None);
61    }
62
63    if SequenceArray::try_last(base.into(), multiplier.into(), P::PTYPE, slice.len()).is_err() {
64        // If the last value is out of range, we cannot encode
65        return Ok(None);
66    }
67
68    slice
69        .windows(2)
70        .all(|w| Some(w[1]) == w[0].checked_add(&multiplier))
71        .then_some(
72            SequenceArray::typed_new(base, multiplier, nullability, slice.len())
73                .map(|a| a.to_array()),
74        )
75        .transpose()
76}
77
78#[cfg(test)]
79mod tests {
80    #[allow(unused_imports)]
81    use itertools::Itertools;
82    use vortex_array::ToCanonical;
83    use vortex_array::arrays::PrimitiveArray;
84    use vortex_array::assert_arrays_eq;
85
86    use crate::sequence_encode;
87
88    #[test]
89    fn test_encode_array_success() {
90        let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
91        let encoded = sequence_encode(&primitive_array).unwrap();
92        assert!(encoded.is_some());
93        let decoded = encoded.unwrap().to_primitive();
94        assert_arrays_eq!(decoded, primitive_array);
95    }
96
97    #[test]
98    fn test_encode_array_1_success() {
99        let primitive_array = PrimitiveArray::from_iter([0]);
100        let encoded = sequence_encode(&primitive_array).unwrap();
101        assert!(encoded.is_some());
102        let decoded = encoded.unwrap().to_primitive();
103        assert_arrays_eq!(decoded, primitive_array);
104    }
105
106    #[test]
107    fn test_encode_array_fail() {
108        let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
109
110        let encoded = sequence_encode(&primitive_array).unwrap();
111        assert!(encoded.is_none());
112    }
113
114    #[test]
115    fn test_encode_array_fail_oob() {
116        let primitive_array = PrimitiveArray::from_iter(vec![100i8; 1000]);
117
118        let encoded = sequence_encode(&primitive_array).unwrap();
119        assert!(encoded.is_none());
120    }
121}