vortex_fastlanes/for/array/
for_compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::PrimInt;
5use num_traits::WrappingSub;
6use vortex_array::IntoArray;
7use vortex_array::arrays::PrimitiveArray;
8use vortex_array::expr::stats::Stat;
9use vortex_dtype::NativePType;
10use vortex_dtype::match_each_integer_ptype;
11use vortex_error::VortexResult;
12use vortex_error::vortex_err;
13
14use crate::FoRArray;
15
16impl FoRArray {
17    pub fn encode(array: PrimitiveArray) -> VortexResult<FoRArray> {
18        let min = array
19            .statistics()
20            .compute_stat(Stat::Min)?
21            .ok_or_else(|| vortex_err!("Min stat not found"))?;
22
23        let encoded = match_each_integer_ptype!(array.ptype(), |T| {
24            compress_primitive::<T>(array, T::try_from(&min)?)?.into_array()
25        });
26        FoRArray::try_new(encoded, min)
27    }
28}
29
30fn compress_primitive<T: NativePType + WrappingSub + PrimInt>(
31    parray: PrimitiveArray,
32    min: T,
33) -> VortexResult<PrimitiveArray> {
34    // Set null values to the min value, ensuring that decompress into a value in the primitive
35    // range (and stop them wrapping around).
36    parray.map_each_with_validity::<T, _, _>(|(v, bool)| {
37        if bool {
38            v.wrapping_sub(&min)
39        } else {
40            T::zero()
41        }
42    })
43}
44
45#[cfg(test)]
46mod test {
47    use itertools::Itertools;
48    use vortex_array::ToCanonical;
49    use vortex_array::assert_arrays_eq;
50    use vortex_array::expr::stats::StatsProvider;
51    use vortex_array::validity::Validity;
52    use vortex_buffer::buffer;
53    use vortex_dtype::PType;
54    use vortex_scalar::Scalar;
55
56    use super::*;
57    use crate::BitPackedArray;
58    use crate::r#for::array::for_decompress::decompress;
59    use crate::r#for::array::for_decompress::fused_decompress;
60
61    #[test]
62    fn test_compress_round_trip_small() {
63        let array = PrimitiveArray::new(
64            (1i32..10).collect::<vortex_buffer::Buffer<_>>(),
65            Validity::NonNullable,
66        );
67        let compressed = FoRArray::encode(array.clone()).unwrap();
68        assert_eq!(i32::try_from(compressed.reference_scalar()).unwrap(), 1);
69
70        let decompressed = compressed.to_primitive();
71        assert_arrays_eq!(decompressed, array);
72    }
73
74    #[test]
75    fn test_compress() {
76        // Create a range offset by a million.
77        let array = PrimitiveArray::new(
78            (0u32..10_000)
79                .map(|v| v + 1_000_000)
80                .collect::<vortex_buffer::Buffer<_>>(),
81            Validity::NonNullable,
82        );
83        let compressed = FoRArray::encode(array).unwrap();
84        assert_eq!(
85            u32::try_from(compressed.reference_scalar()).unwrap(),
86            1_000_000u32
87        );
88    }
89
90    #[test]
91    fn test_zeros() {
92        let array = PrimitiveArray::new(buffer![0i32; 100], Validity::NonNullable);
93        assert_eq!(array.statistics().len(), 0);
94
95        let dtype = array.dtype().clone();
96        let compressed = FoRArray::encode(array).unwrap();
97        assert_eq!(compressed.reference_scalar().dtype(), &dtype);
98        assert!(compressed.reference_scalar().dtype().is_signed_int());
99        assert!(compressed.encoded().dtype().is_signed_int());
100
101        let constant = compressed.encoded().as_constant().unwrap();
102        assert_eq!(constant, Scalar::from(0i32));
103    }
104
105    #[test]
106    fn test_decompress() {
107        // Create a range offset by a million.
108        let array = PrimitiveArray::from_iter((0u32..100_000).step_by(1024).map(|v| v + 1_000_000));
109        let compressed = FoRArray::encode(array.clone()).unwrap();
110        let decompressed = compressed.to_primitive();
111        assert_arrays_eq!(decompressed, array);
112    }
113
114    #[test]
115    fn test_decompress_fused() {
116        // Create a range offset by a million.
117        let expect = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7 + 10));
118        let array = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7));
119        let bp = BitPackedArray::encode(array.as_ref(), 3).unwrap();
120        let compressed = FoRArray::try_new(bp.into_array(), 10u32.into()).unwrap();
121        let decompressed = compressed.to_primitive();
122        assert_arrays_eq!(decompressed, expect);
123    }
124
125    #[test]
126    fn test_decompress_fused_patches() {
127        // Create a range offset by a million.
128        let expect = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7 + 10));
129        let array = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7));
130        let bp = BitPackedArray::encode(array.as_ref(), 2).unwrap();
131        let compressed = FoRArray::try_new(bp.clone().into_array(), 10u32.into()).unwrap();
132        let decompressed = fused_decompress::<u32>(&compressed, &bp);
133        assert_arrays_eq!(decompressed, expect);
134    }
135
136    #[test]
137    fn test_overflow() {
138        let array = PrimitiveArray::from_iter(i8::MIN..=i8::MAX);
139        let compressed = FoRArray::encode(array.clone()).unwrap();
140        assert_eq!(
141            i8::MIN,
142            compressed
143                .reference_scalar()
144                .as_primitive()
145                .typed_value::<i8>()
146                .unwrap()
147        );
148
149        let encoded = compressed
150            .encoded()
151            .to_primitive()
152            .reinterpret_cast(PType::U8);
153        let unsigned: Vec<u8> = (0..=u8::MAX).collect_vec();
154        let expected_unsigned = PrimitiveArray::from_iter(unsigned);
155        assert_arrays_eq!(encoded, expected_unsigned);
156
157        let decompressed = decompress(&compressed);
158        array
159            .as_slice::<i8>()
160            .iter()
161            .enumerate()
162            .for_each(|(i, v)| {
163                assert_eq!(*v, i8::try_from(compressed.scalar_at(i).as_ref()).unwrap());
164            });
165        assert_arrays_eq!(decompressed, array);
166    }
167}