vortex_sequence/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::{CheckedAdd, CheckedSub};
5use vortex_array::ArrayRef;
6use vortex_array::arrays::PrimitiveArray;
7use vortex_dtype::{NativePType, Nullability, match_each_integer_ptype};
8use vortex_error::VortexResult;
9use vortex_scalar::PValue;
10
11use crate::SequenceArray;
12
13/// Encodes a primitive array into a sequence array, this is possible if:
14/// 1. The array is not empty, and contains no nulls
15/// 2. The array is not a float array. (This is due to precision issues, how it will stack well with ALP).
16/// 3. The array is representable as a sequence `A[i] = base + i * multiplier` for multiplier != 0.
17/// 4. The sequence has no deviations from the equation, this could be fixed with patches. However,
18///    we might want a different array for that since sequence provide fast access.
19pub fn sequence_encode(primitive_array: &PrimitiveArray) -> VortexResult<Option<ArrayRef>> {
20    if primitive_array.is_empty() {
21        // we cannot encode an empty array
22        return Ok(None);
23    }
24
25    if !primitive_array.all_valid()? {
26        return Ok(None);
27    }
28
29    if primitive_array.ptype().is_float() {
30        // for now, we don't handle float arrays, due to possible precision issues
31        return Ok(None);
32    }
33
34    match_each_integer_ptype!(primitive_array.ptype(), |P| {
35        encode_primitive_array(
36            primitive_array.as_slice::<P>(),
37            primitive_array.dtype().nullability(),
38        )
39    })
40}
41
42fn encode_primitive_array<P: NativePType + Into<PValue> + CheckedAdd + CheckedSub>(
43    slice: &[P],
44    nullability: Nullability,
45) -> VortexResult<Option<ArrayRef>> {
46    if slice.len() == 1 {
47        // The multiplier here can be any value, zero is chosen
48        return SequenceArray::typed_new(slice[0], P::zero(), nullability, 1)
49            .map(|a| Some(a.to_array()));
50    }
51    let base = slice[0];
52    let Some(multiplier) = slice[1].checked_sub(&base) else {
53        return Ok(None);
54    };
55
56    if multiplier == P::zero() {
57        return Ok(None);
58    }
59
60    if SequenceArray::try_last(base.into(), multiplier.into(), P::PTYPE, slice.len()).is_err() {
61        // If the last value is out of range, we cannot encode
62        return Ok(None);
63    }
64
65    slice
66        .windows(2)
67        .all(|w| Some(w[1]) == w[0].checked_add(&multiplier))
68        .then_some(
69            SequenceArray::typed_new(base, multiplier, nullability, slice.len())
70                .map(|a| a.to_array()),
71        )
72        .transpose()
73}
74
75#[cfg(test)]
76mod tests {
77    use vortex_array::ToCanonical;
78    use vortex_array::arrays::PrimitiveArray;
79
80    use crate::sequence_encode;
81
82    #[test]
83    fn test_encode_array_success() {
84        let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
85        let encoded = sequence_encode(&primitive_array).unwrap();
86        assert!(encoded.is_some());
87        let decoded = encoded.unwrap().to_primitive().unwrap();
88        assert_eq!(decoded.as_slice::<i32>(), primitive_array.as_slice::<i32>());
89    }
90
91    #[test]
92    fn test_encode_array_1_success() {
93        let primitive_array = PrimitiveArray::from_iter([0]);
94        let encoded = sequence_encode(&primitive_array).unwrap();
95        assert!(encoded.is_some());
96        let decoded = encoded.unwrap().to_primitive().unwrap();
97        assert_eq!(decoded.as_slice::<i32>(), primitive_array.as_slice::<i32>());
98    }
99
100    #[test]
101    fn test_encode_array_fail() {
102        let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
103
104        let encoded = sequence_encode(&primitive_array).unwrap();
105        assert!(encoded.is_none());
106    }
107
108    #[test]
109    fn test_encode_array_fail_oob() {
110        let primitive_array = PrimitiveArray::from_iter(vec![100i8; 1000]);
111
112        let encoded = sequence_encode(&primitive_array).unwrap();
113        assert!(encoded.is_none());
114    }
115}