vortex_sequence/
compress.rs1use std::ops::Add;
5
6use num_traits::CheckedAdd;
7use num_traits::CheckedSub;
8use vortex_array::ArrayRef;
9use vortex_array::IntoArray;
10use vortex_array::arrays::PrimitiveArray;
11use vortex_array::dtype::NativePType;
12use vortex_array::dtype::Nullability;
13use vortex_array::match_each_integer_ptype;
14use vortex_array::match_each_native_ptype;
15use vortex_array::scalar::PValue;
16use vortex_array::validity::Validity;
17use vortex_buffer::BufferMut;
18use vortex_buffer::trusted_len::TrustedLen;
19use vortex_error::VortexResult;
20
21use crate::Sequence;
22use crate::SequenceArray;
23use crate::SequenceData;
24struct SequenceIter<T> {
26 acc: T,
27 step: T,
28 remaining: usize,
29}
30
31impl<T: Copy + Add<Output = T>> Iterator for SequenceIter<T> {
32 type Item = T;
33
34 #[inline]
35 fn next(&mut self) -> Option<T> {
36 if self.remaining == 0 {
37 return None;
38 }
39 let val = self.acc;
40 self.remaining -= 1;
41 if self.remaining > 0 {
42 self.acc = self.acc + self.step;
43 }
44 Some(val)
45 }
46
47 #[inline]
48 fn size_hint(&self) -> (usize, Option<usize>) {
49 (self.remaining, Some(self.remaining))
50 }
51}
52
53unsafe impl<T: Copy + Add<Output = T>> TrustedLen for SequenceIter<T> {}
55
56#[inline]
58pub fn sequence_decompress(array: &SequenceArray) -> VortexResult<ArrayRef> {
59 fn decompress_inner<P: NativePType>(
60 base: P,
61 multiplier: P,
62 len: usize,
63 nullability: Nullability,
64 ) -> PrimitiveArray {
65 let values = BufferMut::from_trusted_len_iter(SequenceIter {
66 acc: base,
67 step: multiplier,
68 remaining: len,
69 });
70 PrimitiveArray::new(values, Validity::from(nullability))
71 }
72
73 let prim = match_each_native_ptype!(array.ptype(), |P| {
74 let base = array.base().cast::<P>()?;
75 let multiplier = array.multiplier().cast::<P>()?;
76 decompress_inner(base, multiplier, array.len(), array.dtype().nullability())
77 });
78 Ok(prim.into_array())
79}
80
81pub fn sequence_encode(primitive_array: &PrimitiveArray) -> VortexResult<Option<ArrayRef>> {
88 if primitive_array.is_empty() {
89 return Ok(None);
91 }
92
93 if !primitive_array.all_valid()? {
94 return Ok(None);
95 }
96
97 if primitive_array.ptype().is_float() {
98 return Ok(None);
100 }
101
102 match_each_integer_ptype!(primitive_array.ptype(), |P| {
103 encode_primitive_array(
104 primitive_array.as_slice::<P>(),
105 primitive_array.dtype().nullability(),
106 )
107 })
108}
109
110fn encode_primitive_array<P: NativePType + Into<PValue> + CheckedAdd + CheckedSub>(
111 slice: &[P],
112 nullability: Nullability,
113) -> VortexResult<Option<ArrayRef>> {
114 if slice.len() == 1 {
115 return Sequence::try_new_typed(slice[0], P::zero(), nullability, 1)
117 .map(|a| Some(a.into_array()));
118 }
119 let base = slice[0];
120 let Some(multiplier) = slice[1].checked_sub(&base) else {
121 return Ok(None);
122 };
123
124 if multiplier == P::zero() {
125 return Ok(None);
126 }
127
128 if SequenceData::try_last(base.into(), multiplier.into(), P::PTYPE, slice.len()).is_err() {
129 return Ok(None);
131 }
132
133 slice
134 .windows(2)
135 .all(|w| Some(w[1]) == w[0].checked_add(&multiplier))
136 .then_some(
137 Sequence::try_new_typed(base, multiplier, nullability, slice.len())
138 .map(|a| a.into_array()),
139 )
140 .transpose()
141}
142
143#[cfg(test)]
144mod tests {
145 #[allow(unused_imports)]
146 use itertools::Itertools;
147 use vortex_array::ToCanonical;
148 use vortex_array::arrays::PrimitiveArray;
149 use vortex_array::assert_arrays_eq;
150
151 use crate::sequence_encode;
152
153 #[test]
154 fn test_encode_array_success() {
155 let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
156 let encoded = sequence_encode(&primitive_array).unwrap();
157 assert!(encoded.is_some());
158 let decoded = encoded.unwrap().to_primitive();
159 assert_arrays_eq!(decoded, primitive_array);
160 }
161
162 #[test]
163 fn test_encode_array_1_success() {
164 let primitive_array = PrimitiveArray::from_iter([0]);
165 let encoded = sequence_encode(&primitive_array).unwrap();
166 assert!(encoded.is_some());
167 let decoded = encoded.unwrap().to_primitive();
168 assert_arrays_eq!(decoded, primitive_array);
169 }
170
171 #[test]
172 fn test_encode_array_fail() {
173 let primitive_array = PrimitiveArray::from_iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
174
175 let encoded = sequence_encode(&primitive_array).unwrap();
176 assert!(encoded.is_none());
177 }
178
179 #[test]
180 fn test_encode_array_fail_oob() {
181 let primitive_array = PrimitiveArray::from_iter(vec![100i8; 1000]);
182
183 let encoded = sequence_encode(&primitive_array).unwrap();
184 assert!(encoded.is_none());
185 }
186
187 #[test]
188 fn test_encode_all_u8_values() {
189 let primitive_array = PrimitiveArray::from_iter(0u8..=255);
190 let encoded = sequence_encode(&primitive_array).unwrap();
191 assert!(encoded.is_some());
192 let decoded = encoded.unwrap().to_primitive();
193 assert_arrays_eq!(decoded, primitive_array);
194 }
195}