vortex_fastlanes/for/array/
for_compress.rs1use num_traits::PrimInt;
5use num_traits::WrappingSub;
6use vortex_array::IntoArray;
7use vortex_array::arrays::PrimitiveArray;
8use vortex_array::expr::stats::Stat;
9use vortex_dtype::NativePType;
10use vortex_dtype::match_each_integer_ptype;
11use vortex_error::VortexResult;
12use vortex_error::vortex_err;
13
14use crate::FoRArray;
15
16impl FoRArray {
17 pub fn encode(array: PrimitiveArray) -> VortexResult<FoRArray> {
18 let min = array
19 .statistics()
20 .compute_stat(Stat::Min)?
21 .ok_or_else(|| vortex_err!("Min stat not found"))?;
22
23 let encoded = match_each_integer_ptype!(array.ptype(), |T| {
24 compress_primitive::<T>(array, T::try_from(&min)?)?.into_array()
25 });
26 FoRArray::try_new(encoded, min)
27 }
28}
29
30fn compress_primitive<T: NativePType + WrappingSub + PrimInt>(
31 parray: PrimitiveArray,
32 min: T,
33) -> VortexResult<PrimitiveArray> {
34 parray.map_each_with_validity::<T, _, _>(|(v, bool)| {
37 if bool {
38 v.wrapping_sub(&min)
39 } else {
40 T::zero()
41 }
42 })
43}
44
45#[cfg(test)]
46mod test {
47 use itertools::Itertools;
48 use vortex_array::ToCanonical;
49 use vortex_array::assert_arrays_eq;
50 use vortex_array::expr::stats::StatsProvider;
51 use vortex_array::validity::Validity;
52 use vortex_buffer::buffer;
53 use vortex_dtype::PType;
54 use vortex_scalar::Scalar;
55
56 use super::*;
57 use crate::BitPackedArray;
58 use crate::r#for::array::for_decompress::decompress;
59 use crate::r#for::array::for_decompress::fused_decompress;
60
61 #[test]
62 fn test_compress_round_trip_small() {
63 let array = PrimitiveArray::new(
64 (1i32..10).collect::<vortex_buffer::Buffer<_>>(),
65 Validity::NonNullable,
66 );
67 let compressed = FoRArray::encode(array.clone()).unwrap();
68 assert_eq!(i32::try_from(compressed.reference_scalar()).unwrap(), 1);
69
70 let decompressed = compressed.to_primitive();
71 assert_arrays_eq!(decompressed, array);
72 }
73
74 #[test]
75 fn test_compress() {
76 let array = PrimitiveArray::new(
78 (0u32..10_000)
79 .map(|v| v + 1_000_000)
80 .collect::<vortex_buffer::Buffer<_>>(),
81 Validity::NonNullable,
82 );
83 let compressed = FoRArray::encode(array).unwrap();
84 assert_eq!(
85 u32::try_from(compressed.reference_scalar()).unwrap(),
86 1_000_000u32
87 );
88 }
89
90 #[test]
91 fn test_zeros() {
92 let array = PrimitiveArray::new(buffer![0i32; 100], Validity::NonNullable);
93 assert_eq!(array.statistics().len(), 0);
94
95 let dtype = array.dtype().clone();
96 let compressed = FoRArray::encode(array).unwrap();
97 assert_eq!(compressed.reference_scalar().dtype(), &dtype);
98 assert!(compressed.reference_scalar().dtype().is_signed_int());
99 assert!(compressed.encoded().dtype().is_signed_int());
100
101 let constant = compressed.encoded().as_constant().unwrap();
102 assert_eq!(constant, Scalar::from(0i32));
103 }
104
105 #[test]
106 fn test_decompress() {
107 let array = PrimitiveArray::from_iter((0u32..100_000).step_by(1024).map(|v| v + 1_000_000));
109 let compressed = FoRArray::encode(array.clone()).unwrap();
110 let decompressed = compressed.to_primitive();
111 assert_arrays_eq!(decompressed, array);
112 }
113
114 #[test]
115 fn test_decompress_fused() {
116 let expect = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7 + 10));
118 let array = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7));
119 let bp = BitPackedArray::encode(array.as_ref(), 3).unwrap();
120 let compressed = FoRArray::try_new(bp.into_array(), 10u32.into()).unwrap();
121 let decompressed = compressed.to_primitive();
122 assert_arrays_eq!(decompressed, expect);
123 }
124
125 #[test]
126 fn test_decompress_fused_patches() {
127 let expect = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7 + 10));
129 let array = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7));
130 let bp = BitPackedArray::encode(array.as_ref(), 2).unwrap();
131 let compressed = FoRArray::try_new(bp.clone().into_array(), 10u32.into()).unwrap();
132 let decompressed = fused_decompress::<u32>(&compressed, &bp);
133 assert_arrays_eq!(decompressed, expect);
134 }
135
136 #[test]
137 fn test_overflow() {
138 let array = PrimitiveArray::from_iter(i8::MIN..=i8::MAX);
139 let compressed = FoRArray::encode(array.clone()).unwrap();
140 assert_eq!(
141 i8::MIN,
142 compressed
143 .reference_scalar()
144 .as_primitive()
145 .typed_value::<i8>()
146 .unwrap()
147 );
148
149 let encoded = compressed
150 .encoded()
151 .to_primitive()
152 .reinterpret_cast(PType::U8);
153 let unsigned: Vec<u8> = (0..=u8::MAX).collect_vec();
154 let expected_unsigned = PrimitiveArray::from_iter(unsigned);
155 assert_arrays_eq!(encoded, expected_unsigned);
156
157 let decompressed = decompress(&compressed);
158 array
159 .as_slice::<i8>()
160 .iter()
161 .enumerate()
162 .for_each(|(i, v)| {
163 assert_eq!(*v, i8::try_from(compressed.scalar_at(i).as_ref()).unwrap());
164 });
165 assert_arrays_eq!(decompressed, array);
166 }
167}