1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::mem::size_of;
7use std::mem::transmute;
8use std::mem::transmute_copy;
9
10use itertools::Itertools;
11use num_traits::CheckedSub;
12use num_traits::Float;
13use num_traits::PrimInt;
14use num_traits::ToPrimitive;
15
16mod array;
17mod compress;
18mod compute;
19mod decompress;
20mod ops;
21mod rules;
22
23#[cfg(test)]
24mod tests {
25 use vortex_array::ProstMetadata;
26 use vortex_array::dtype::PType;
27 use vortex_array::patches::PatchesMetadata;
28 use vortex_array::test_harness::check_metadata;
29
30 use crate::alp::array::ALPMetadata;
31
32 #[cfg_attr(miri, ignore)]
33 #[test]
34 fn test_alp_metadata() {
35 check_metadata(
36 "alp.metadata",
37 ProstMetadata(ALPMetadata {
38 patches: Some(PatchesMetadata::new(
39 usize::MAX,
40 usize::MAX,
41 PType::U64,
42 None,
43 None,
44 None,
45 )),
46 exp_e: u32::MAX,
47 exp_f: u32::MAX,
48 }),
49 );
50 }
51}
52
53pub use array::*;
54pub use compress::alp_encode;
55pub use decompress::decompress_into_array;
56use vortex_array::dtype::NativePType;
57use vortex_array::scalar::PValue;
58use vortex_buffer::Buffer;
59use vortex_buffer::BufferMut;
60
61const SAMPLE_SIZE: usize = 32;
62
63#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
64pub struct Exponents {
65 pub e: u8,
66 pub f: u8,
67}
68
69impl Display for Exponents {
70 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
71 write!(f, "e: {}, f: {}", self.e, self.f)
72 }
73}
74
75mod private {
76 pub trait Sealed {}
77
78 impl Sealed for f32 {}
79 impl Sealed for f64 {}
80}
81
82pub trait ALPFloat: private::Sealed + Float + Display + NativePType {
83 type ALPInt: PrimInt + Display + ToPrimitive + Copy + NativePType + Into<PValue>;
84
85 const FRACTIONAL_BITS: u8;
86 const MAX_EXPONENT: u8;
87 const SWEET: Self;
88 const F10: &'static [Self];
89 const IF10: &'static [Self];
90
91 #[inline]
93 fn fast_round(self) -> Self {
94 (self + Self::SWEET) - Self::SWEET
95 }
96
97 fn as_int(self) -> Self::ALPInt;
99
100 fn from_int(n: Self::ALPInt) -> Self;
102
103 fn find_best_exponents(values: &[Self]) -> Exponents {
104 let mut best_exp = Exponents { e: 0, f: 0 };
105 let mut best_nbytes: usize = usize::MAX;
106
107 let sample = (values.len() > SAMPLE_SIZE).then(|| {
108 values
109 .iter()
110 .step_by(values.len() / SAMPLE_SIZE)
111 .cloned()
112 .collect_vec()
113 });
114
115 for e in (0..Self::MAX_EXPONENT).rev() {
116 for f in 0..e {
117 let (_, encoded, _, exc_patches, _) = Self::encode(
118 sample.as_deref().unwrap_or(values),
119 Some(Exponents { e, f }),
120 );
121
122 let size = Self::estimate_encoded_size(&encoded, &exc_patches);
123 if size < best_nbytes {
124 best_nbytes = size;
125 best_exp = Exponents { e, f };
126 } else if size == best_nbytes && e - f < best_exp.e - best_exp.f {
127 best_exp = Exponents { e, f };
128 }
129 }
130 }
131
132 best_exp
133 }
134
135 #[inline]
136 fn estimate_encoded_size(encoded: &[Self::ALPInt], patches: &[Self]) -> usize {
137 let bits_per_encoded = encoded
138 .iter()
139 .minmax()
140 .into_option()
141 .and_then(|(min, max)| max.checked_sub(min))
143 .and_then(|range_size: <Self as ALPFloat>::ALPInt| range_size.to_u64())
144 .and_then(|range_size| {
145 range_size
146 .checked_ilog2()
147 .map(|bits| (bits + 1) as usize)
148 .or(Some(0))
149 })
150 .unwrap_or(size_of::<Self::ALPInt>() * 8);
151
152 let encoded_bytes = (encoded.len() * bits_per_encoded).div_ceil(8);
153 let patch_bytes = patches.len() * (size_of::<Self>() + size_of::<u16>());
156
157 encoded_bytes + patch_bytes
158 }
159
160 #[expect(
161 clippy::type_complexity,
162 reason = "tuple return type is appropriate for multiple encoding outputs"
163 )]
164 fn encode(
165 values: &[Self],
166 exponents: Option<Exponents>,
167 ) -> (
168 Exponents,
169 Buffer<Self::ALPInt>,
170 Buffer<u64>,
171 Buffer<Self>,
172 BufferMut<u64>,
173 ) {
174 let exp = exponents.unwrap_or_else(|| Self::find_best_exponents(values));
175
176 let mut encoded_output = BufferMut::<Self::ALPInt>::with_capacity(values.len());
177
178 let mut patch_indices = BufferMut::<u64>::with_capacity(values.len() / 32);
180 let mut patch_values = BufferMut::<Self>::with_capacity(values.len() / 32);
181
182 let mut chunk_offsets = BufferMut::<u64>::with_capacity(values.len().div_ceil(1024));
184 let mut fill_value: Option<Self::ALPInt> = None;
185
186 for chunk in values.chunks(1024) {
187 chunk_offsets.push(patch_indices.len() as u64);
188 encode_chunk_unchecked(
189 chunk,
190 exp,
191 &mut encoded_output,
192 &mut patch_indices,
193 &mut patch_values,
194 &mut fill_value,
195 );
196 }
197
198 (
199 exp,
200 encoded_output.freeze(),
201 patch_indices.freeze(),
202 patch_values.freeze(),
203 chunk_offsets,
204 )
205 }
206
207 #[inline]
208 fn encode_single(value: Self, exponents: Exponents) -> Option<Self::ALPInt> {
209 let encoded = Self::encode_single_unchecked(value, exponents);
210 let decoded = Self::decode_single(encoded, exponents);
211 if decoded.is_eq(value) {
212 return Some(encoded);
213 }
214 None
215 }
216
217 fn encode_above(value: Self, exponents: Exponents) -> Self::ALPInt {
218 (value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
219 .ceil()
220 .as_int()
221 }
222
223 fn encode_below(value: Self, exponents: Exponents) -> Self::ALPInt {
224 (value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
225 .floor()
226 .as_int()
227 }
228
229 fn decode(encoded: &[Self::ALPInt], exponents: Exponents) -> Vec<Self> {
230 let mut values = Vec::with_capacity(encoded.len());
231 for encoded in encoded {
232 values.push(Self::decode_single(*encoded, exponents));
233 }
234 values
235 }
236
237 fn decode_buffer(encoded: BufferMut<Self::ALPInt>, exponents: Exponents) -> BufferMut<Self> {
238 encoded.map_each_in_place(move |encoded| Self::decode_single(encoded, exponents))
239 }
240
241 fn decode_into(encoded: &[Self::ALPInt], exponents: Exponents, output: &mut [Self]) {
242 assert_eq!(encoded.len(), output.len());
243
244 for i in 0..encoded.len() {
245 output[i] = Self::decode_single(encoded[i], exponents)
246 }
247 }
248
249 fn decode_slice_inplace(encoded: &mut [Self::ALPInt], exponents: Exponents) {
250 let decoded: &mut [Self] = unsafe { transmute(encoded) };
251 decoded.iter_mut().for_each(|v| {
252 *v = Self::decode_single(
253 unsafe { transmute_copy::<Self, Self::ALPInt>(v) },
254 exponents,
255 )
256 })
257 }
258
259 #[inline(always)]
260 fn decode_single(encoded: Self::ALPInt, exponents: Exponents) -> Self {
261 Self::from_int(encoded) * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize]
262 }
263
264 #[inline(always)]
267 fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt {
268 (value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
269 .fast_round()
270 .as_int()
271 }
272}
273
274#[expect(
275 clippy::cast_possible_truncation,
276 reason = "intentional truncation for ALP encoding"
277)]
278fn encode_chunk_unchecked<T: ALPFloat>(
279 chunk: &[T],
280 exp: Exponents,
281 encoded_output: &mut BufferMut<T::ALPInt>,
282 patch_indices: &mut BufferMut<u64>,
283 patch_values: &mut BufferMut<T>,
284 fill_value: &mut Option<T::ALPInt>,
285) {
286 let num_prev_encoded = encoded_output.len();
287 let num_prev_patches = patch_indices.len();
288 assert_eq!(patch_indices.len(), patch_values.len());
289 let has_filled = fill_value.is_some();
290
291 let mut chunk_patch_count = 0;
293 encoded_output.extend_trusted(chunk.iter().map(|&v| {
294 let encoded = T::encode_single_unchecked(v, exp);
295 let decoded = T::decode_single(encoded, exp);
296 let neq = !decoded.is_eq(v) as usize;
297 chunk_patch_count += neq;
298 encoded
299 }));
300 let chunk_patch_count = chunk_patch_count; assert_eq!(encoded_output.len(), num_prev_encoded + chunk.len());
302
303 if chunk_patch_count > 0 {
304 patch_indices.reserve(chunk_patch_count + 1);
307 patch_values.reserve(chunk_patch_count + 1);
308
309 let patch_indices_mut = patch_indices.spare_capacity_mut();
311 let patch_values_mut = patch_values.spare_capacity_mut();
312 let mut chunk_patch_index = 0;
313 for i in num_prev_encoded..encoded_output.len() {
314 let decoded = T::decode_single(encoded_output[i], exp);
315 patch_indices_mut[chunk_patch_index].write(i as u64);
317 patch_values_mut[chunk_patch_index].write(chunk[i - num_prev_encoded]);
318 chunk_patch_index += !decoded.is_eq(chunk[i - num_prev_encoded]) as usize;
319 }
320 assert_eq!(chunk_patch_index, chunk_patch_count);
321 unsafe {
322 patch_indices.set_len(num_prev_patches + chunk_patch_count);
323 patch_values.set_len(num_prev_patches + chunk_patch_count);
324 }
325 }
326
327 if fill_value.is_none() && (num_prev_encoded + chunk_patch_count < encoded_output.len()) {
330 assert_eq!(num_prev_encoded, num_prev_patches);
331 for i in num_prev_encoded..encoded_output.len() {
332 if i >= patch_indices.len() || patch_indices[i] != i as u64 {
333 *fill_value = Some(encoded_output[i]);
334 break;
335 }
336 }
337 }
338
339 if let Some(fill_value) = fill_value {
342 let start_patch = if !has_filled { 0 } else { num_prev_patches };
344 for patch_idx in &patch_indices[start_patch..] {
345 encoded_output[*patch_idx as usize] = *fill_value;
346 }
347 }
348}
349
350impl ALPFloat for f32 {
351 type ALPInt = i32;
352 const FRACTIONAL_BITS: u8 = 23;
353 const MAX_EXPONENT: u8 = 10;
354 const SWEET: Self =
355 (1 << Self::FRACTIONAL_BITS) as Self + (1 << (Self::FRACTIONAL_BITS - 1)) as Self;
356
357 const F10: &'static [Self] = &[
358 1.0,
359 10.0,
360 100.0,
361 1000.0,
362 10000.0,
363 100000.0,
364 1000000.0,
365 10000000.0,
366 100000000.0,
367 1000000000.0,
368 10000000000.0, ];
370 const IF10: &'static [Self] = &[
371 1.0,
372 0.1,
373 0.01,
374 0.001,
375 0.0001,
376 0.00001,
377 0.000001,
378 0.0000001,
379 0.00000001,
380 0.000000001,
381 0.0000000001, ];
383
384 #[inline(always)]
385 #[expect(
386 clippy::cast_possible_truncation,
387 reason = "intentional float to int truncation for ALP encoding"
388 )]
389 fn as_int(self) -> Self::ALPInt {
390 self as _
391 }
392
393 #[inline(always)]
394 fn from_int(n: Self::ALPInt) -> Self {
395 n as _
396 }
397}
398
399impl ALPFloat for f64 {
400 type ALPInt = i64;
401 const FRACTIONAL_BITS: u8 = 52;
402 const MAX_EXPONENT: u8 = 18; const SWEET: Self =
404 (1u64 << Self::FRACTIONAL_BITS) as Self + (1u64 << (Self::FRACTIONAL_BITS - 1)) as Self;
405 const F10: &'static [Self] = &[
406 1.0,
407 10.0,
408 100.0,
409 1000.0,
410 10000.0,
411 100000.0,
412 1000000.0,
413 10000000.0,
414 100000000.0,
415 1000000000.0,
416 10000000000.0,
417 100000000000.0,
418 1000000000000.0,
419 10000000000000.0,
420 100000000000000.0,
421 1000000000000000.0,
422 10000000000000000.0,
423 100000000000000000.0,
424 1000000000000000000.0,
425 10000000000000000000.0,
426 100000000000000000000.0,
427 1000000000000000000000.0,
428 10000000000000000000000.0,
429 100000000000000000000000.0, ];
431
432 const IF10: &'static [Self] = &[
433 1.0,
434 0.1,
435 0.01,
436 0.001,
437 0.0001,
438 0.00001,
439 0.000001,
440 0.0000001,
441 0.00000001,
442 0.000000001,
443 0.0000000001,
444 0.00000000001,
445 0.000000000001,
446 0.0000000000001,
447 0.00000000000001,
448 0.000000000000001,
449 0.0000000000000001,
450 0.00000000000000001,
451 0.000000000000000001,
452 0.0000000000000000001,
453 0.00000000000000000001,
454 0.000000000000000000001,
455 0.0000000000000000000001,
456 0.00000000000000000000001, ];
458
459 #[inline(always)]
460 #[expect(
461 clippy::cast_possible_truncation,
462 reason = "intentional float to int truncation for ALP encoding"
463 )]
464 fn as_int(self) -> Self::ALPInt {
465 self as _
466 }
467
468 #[inline(always)]
469 fn from_int(n: Self::ALPInt) -> Self {
470 n as _
471 }
472}