1use itertools::Itertools;
5use vortex_array::ArrayRef;
6use vortex_array::IntoArray;
7use vortex_array::arrays::PrimitiveArray;
8use vortex_array::dtype::PType;
9use vortex_array::patches::Patches;
10use vortex_array::validity::Validity;
11use vortex_array::vtable::ValidityHelper;
12use vortex_buffer::Buffer;
13use vortex_buffer::BufferMut;
14use vortex_error::VortexResult;
15use vortex_error::vortex_bail;
16use vortex_mask::Mask;
17
18use crate::Exponents;
19use crate::alp::ALPArray;
20use crate::alp::ALPFloat;
21
22#[macro_export]
23macro_rules! match_each_alp_float_ptype {
24 ($self:expr, | $enc:ident | $body:block) => {{
25 use vortex_array::dtype::PType;
26 use vortex_error::vortex_panic;
27 let ptype = $self;
28 match ptype {
29 PType::F32 => {
30 type $enc = f32;
31 $body
32 }
33 PType::F64 => {
34 type $enc = f64;
35 $body
36 }
37 _ => vortex_panic!("ALP can only encode f32 and f64, got {}", ptype),
38 }
39 }};
40}
41
42pub fn alp_encode(parray: &PrimitiveArray, exponents: Option<Exponents>) -> VortexResult<ALPArray> {
43 let (exponents, encoded, patches) = match parray.ptype() {
44 PType::F32 => alp_encode_components_typed::<f32>(parray, exponents)?,
45 PType::F64 => alp_encode_components_typed::<f64>(parray, exponents)?,
46 _ => vortex_bail!("ALP can only encode f32 and f64"),
47 };
48
49 unsafe {
51 Ok(ALPArray::new_unchecked(
52 encoded,
53 exponents,
54 patches,
55 parray.dtype().clone(),
56 ))
57 }
58}
59
60#[expect(
61 clippy::cast_possible_truncation,
62 reason = "u64 index cast to usize is safe for reasonable array sizes"
63)]
64fn alp_encode_components_typed<T>(
65 values: &PrimitiveArray,
66 exponents: Option<Exponents>,
67) -> VortexResult<(Exponents, ArrayRef, Option<Patches>)>
68where
69 T: ALPFloat,
70{
71 let values_slice = values.as_slice::<T>();
72
73 let (exponents, encoded, exceptional_positions, exceptional_values, mut chunk_offsets) =
74 T::encode(values_slice, exponents);
75
76 let encoded_array = PrimitiveArray::new(encoded, values.validity().clone()).into_array();
77
78 let validity = values.validity_mask()?;
79 let (valid_exceptional_positions, valid_exceptional_values): (Buffer<u64>, Buffer<T>) =
82 match validity {
83 Mask::AllTrue(_) => (exceptional_positions, exceptional_values),
84 Mask::AllFalse(_) => {
85 (Buffer::empty(), Buffer::empty())
87 }
88 Mask::Values(is_valid) => {
89 let (pos, vals): (BufferMut<u64>, BufferMut<T>) = exceptional_positions
90 .into_iter()
91 .zip_eq(exceptional_values)
92 .filter(|(index, _)| {
93 let is_valid = is_valid.value(*index as usize);
94 if !is_valid {
95 let patch_chunk = *index as usize / 1024;
96 for chunk_idx in (patch_chunk + 1)..chunk_offsets.len() {
97 chunk_offsets[chunk_idx] -= 1;
98 }
99 }
100 is_valid
101 })
102 .unzip();
103 (pos.freeze(), vals.freeze())
104 }
105 };
106 let patches = if valid_exceptional_positions.is_empty() {
107 None
108 } else {
109 let patches_validity = if values.dtype().is_nullable() {
110 Validity::AllValid
111 } else {
112 Validity::NonNullable
113 };
114 let valid_exceptional_values =
115 PrimitiveArray::new(valid_exceptional_values, patches_validity).into_array();
116
117 Some(Patches::new(
118 values_slice.len(),
119 0,
120 valid_exceptional_positions.into_array(),
121 valid_exceptional_values,
122 Some(chunk_offsets.into_array()),
123 )?)
124 };
125 Ok((exponents, encoded_array, patches))
126}
127
128#[cfg(test)]
129mod tests {
130 use core::f64;
131
132 use f64::consts::E;
133 use f64::consts::PI;
134 use vortex_array::ToCanonical;
135 use vortex_array::assert_arrays_eq;
136 use vortex_array::dtype::NativePType;
137 use vortex_array::validity::Validity;
138 use vortex_buffer::Buffer;
139 use vortex_buffer::buffer;
140
141 use super::*;
142 use crate::decompress_into_array;
143
144 #[test]
145 fn test_compress() {
146 let array = PrimitiveArray::new(buffer![1.234f32; 1025], Validity::NonNullable);
147 let encoded = alp_encode(&array, None).unwrap();
148 assert!(encoded.patches().is_none());
149 let expected_encoded = PrimitiveArray::from_iter(vec![1234i32; 1025]);
150 assert_arrays_eq!(encoded.encoded(), expected_encoded);
151 assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
152
153 let decoded = decompress_into_array(encoded).unwrap();
154 assert_arrays_eq!(decoded, array);
155 }
156
157 #[test]
158 fn test_nullable_compress() {
159 let array = PrimitiveArray::from_option_iter([None, Some(1.234f32), None]);
160 let encoded = alp_encode(&array, None).unwrap();
161 assert!(encoded.patches().is_none());
162 let expected_encoded = PrimitiveArray::from_option_iter([None, Some(1234i32), None]);
163 assert_arrays_eq!(encoded.encoded(), expected_encoded);
164 assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
165
166 let decoded = decompress_into_array(encoded).unwrap();
167 let expected = PrimitiveArray::from_option_iter(vec![None, Some(1.234f32), None]);
168 assert_arrays_eq!(decoded, expected);
169 }
170
171 #[test]
172 #[allow(clippy::approx_constant)] fn test_patched_compress() {
174 let values = buffer![1.234f64, 2.718, PI, 4.0];
175 let array = PrimitiveArray::new(values.clone(), Validity::NonNullable);
176 let encoded = alp_encode(&array, None).unwrap();
177 assert!(encoded.patches().is_some());
178 let expected_encoded = PrimitiveArray::from_iter(vec![1234i64, 2718, 1234, 4000]);
179 assert_arrays_eq!(encoded.encoded(), expected_encoded);
180 assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
181
182 let decoded = decompress_into_array(encoded).unwrap();
183 let expected_decoded = PrimitiveArray::new(values, Validity::NonNullable);
184 assert_arrays_eq!(decoded, expected_decoded);
185 }
186
187 #[test]
188 #[allow(clippy::approx_constant)] fn test_compress_ignores_invalid_exceptional_values() {
190 let values = buffer![1.234f64, 2.718, PI, 4.0];
191 let array = PrimitiveArray::new(values, Validity::from_iter([true, true, false, true]));
192 let encoded = alp_encode(&array, None).unwrap();
193 assert!(encoded.patches().is_none());
194 let expected_encoded =
195 PrimitiveArray::from_option_iter(buffer![Some(1234i64), Some(2718), None, Some(4000)]);
196 assert_arrays_eq!(encoded.encoded(), expected_encoded);
197 assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
198
199 let decoded = decompress_into_array(encoded).unwrap();
200 assert_arrays_eq!(decoded, array);
201 }
202
203 #[test]
204 #[allow(clippy::approx_constant)] fn test_nullable_patched_scalar_at() {
206 let array = PrimitiveArray::from_option_iter([
207 Some(1.234f64),
208 Some(2.718),
209 Some(PI),
210 Some(4.0),
211 None,
212 ]);
213 let encoded = alp_encode(&array, None).unwrap();
214 assert!(encoded.patches().is_some());
215
216 assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
217
218 assert_arrays_eq!(encoded, array);
219
220 let _decoded = decompress_into_array(encoded).unwrap();
221 }
222
223 #[test]
224 fn roundtrips_close_fractional() {
225 let original = PrimitiveArray::from_iter([195.26274f32, 195.27837, -48.815685]);
226 let alp_arr = alp_encode(&original, None).unwrap();
227 assert_arrays_eq!(alp_arr, original);
228 }
229
230 #[test]
231 fn roundtrips_all_null() {
232 let original =
233 PrimitiveArray::new(buffer![195.26274f64, PI, -48.815685], Validity::AllInvalid);
234 let alp_arr = alp_encode(&original, None).unwrap();
235 let decompressed = alp_arr.to_primitive();
236
237 assert_eq!(
238 [195.26274, 195.26274, 195.26274],
240 decompressed.as_slice::<f64>()
241 );
242
243 assert_arrays_eq!(decompressed, original);
244 }
245
246 #[test]
247 fn non_finite_numbers() {
248 let original = PrimitiveArray::new(
249 buffer![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY],
250 Validity::NonNullable,
251 );
252 let encoded = alp_encode(&original, None).unwrap();
253 let decoded = encoded.to_primitive();
254 for idx in 0..original.len() {
255 let decoded_val = decoded.as_slice::<f32>()[idx];
256 let original_val = original.as_slice::<f32>()[idx];
257 assert!(
258 decoded_val.is_eq(original_val),
259 "Expected {original_val} but got {decoded_val}"
260 );
261 }
262 }
263
264 #[test]
265 fn test_chunk_offsets() {
266 let mut values = vec![1.0f64; 3072];
267
268 values[1023] = PI;
269 values[1024] = E;
270 values[1025] = PI;
271
272 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
273 let encoded = alp_encode(&array, None).unwrap();
274 let patches = encoded.patches().unwrap();
275
276 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
277 let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 3]);
278 assert_arrays_eq!(chunk_offsets, expected_offsets);
279
280 let patch_indices = patches.indices().to_primitive();
281 let expected_indices = PrimitiveArray::from_iter(vec![1023u64, 1024, 1025]);
282 assert_arrays_eq!(patch_indices, expected_indices);
283
284 let patch_values = patches.values().to_primitive();
285 let expected_values = PrimitiveArray::from_iter(vec![PI, E, PI]);
286 assert_arrays_eq!(patch_values, expected_values);
287 }
288
289 #[test]
290 fn test_chunk_offsets_no_patches_in_middle() {
291 let mut values = vec![1.0f64; 3072];
292 values[0] = PI;
293 values[2048] = E;
294
295 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
296 let encoded = alp_encode(&array, None).unwrap();
297 let patches = encoded.patches().unwrap();
298
299 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
300 let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
301 assert_arrays_eq!(chunk_offsets, expected_offsets);
302
303 let patch_indices = patches.indices().to_primitive();
304 let expected_indices = PrimitiveArray::from_iter(vec![0u64, 2048]);
305 assert_arrays_eq!(patch_indices, expected_indices);
306
307 let patch_values = patches.values().to_primitive();
308 let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
309 assert_arrays_eq!(patch_values, expected_values);
310 }
311
312 #[test]
313 fn test_chunk_offsets_trailing_empty_chunks() {
314 let mut values = vec![1.0f64; 3072];
315 values[0] = PI;
316
317 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
318 let encoded = alp_encode(&array, None).unwrap();
319 let patches = encoded.patches().unwrap();
320
321 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
322 let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
323 assert_arrays_eq!(chunk_offsets, expected_offsets);
324
325 let patch_indices = patches.indices().to_primitive();
326 let expected_indices = PrimitiveArray::from_iter(vec![0u64]);
327 assert_arrays_eq!(patch_indices, expected_indices);
328
329 let patch_values = patches.values().to_primitive();
330 let expected_values = PrimitiveArray::from_iter(vec![PI]);
331 assert_arrays_eq!(patch_values, expected_values);
332 }
333
334 #[test]
335 fn test_chunk_offsets_single_chunk() {
336 let mut values = vec![1.0f64; 512];
337 values[0] = PI;
338 values[100] = E;
339
340 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
341 let encoded = alp_encode(&array, None).unwrap();
342 let patches = encoded.patches().unwrap();
343
344 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
345 let expected_offsets = PrimitiveArray::from_iter(vec![0u64]);
346 assert_arrays_eq!(chunk_offsets, expected_offsets);
347
348 let patch_indices = patches.indices().to_primitive();
349 let expected_indices = PrimitiveArray::from_iter(vec![0u64, 100]);
350 assert_arrays_eq!(patch_indices, expected_indices);
351
352 let patch_values = patches.values().to_primitive();
353 let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
354 assert_arrays_eq!(patch_values, expected_values);
355 }
356
357 #[test]
358 fn test_slice_half_chunk_f32_roundtrip() {
359 let values = vec![1.234f32; 1024];
361 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
362 let encoded = alp_encode(&original, None).unwrap();
363
364 let sliced_alp = encoded.slice(512..1024).unwrap();
365
366 let expected_slice = original.slice(512..1024).unwrap();
367 assert_arrays_eq!(sliced_alp, expected_slice);
368 }
369
370 #[test]
371 fn test_slice_half_chunk_f64_roundtrip() {
372 let values = vec![5.678f64; 1024];
373 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
374 let encoded = alp_encode(&original, None).unwrap();
375
376 let sliced_alp = encoded.slice(512..1024).unwrap();
377
378 let expected_slice = original.slice(512..1024).unwrap();
379 assert_arrays_eq!(sliced_alp, expected_slice);
380 }
381
382 #[test]
383 fn test_slice_half_chunk_with_patches_roundtrip() {
384 let mut values = vec![1.0f64; 1024];
385 values[100] = PI;
386 values[200] = E;
387 values[600] = 42.42;
388
389 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
390 let encoded = alp_encode(&original, None).unwrap();
391
392 let sliced_alp = encoded.slice(512..1024).unwrap();
393
394 let expected_slice = original.slice(512..1024).unwrap();
395 assert_arrays_eq!(sliced_alp, expected_slice);
396 assert!(encoded.patches().is_some());
397 }
398
399 #[test]
400 fn test_slice_across_chunks_with_patches_roundtrip() {
401 let mut values = vec![1.0f64; 2048];
402 values[100] = PI;
403 values[200] = E;
404 values[600] = 42.42;
405 values[800] = 42.42;
406 values[1000] = 42.42;
407 values[1023] = 42.42;
408
409 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
410 let encoded = alp_encode(&original, None).unwrap();
411
412 let sliced_alp = encoded.slice(1023..1025).unwrap();
413
414 let expected_slice = original.slice(1023..1025).unwrap();
415 assert_arrays_eq!(sliced_alp, expected_slice);
416 assert!(encoded.patches().is_some());
417 }
418
419 #[test]
420 fn test_slice_half_chunk_nullable_roundtrip() {
421 let values = (0..1024)
422 .map(|i| if i % 3 == 0 { None } else { Some(2.5f32) })
423 .collect::<Vec<_>>();
424
425 let original = PrimitiveArray::from_option_iter(values);
426 let encoded = alp_encode(&original, None).unwrap();
427
428 let sliced_alp = encoded.slice(512..1024).unwrap();
429 let decoded = sliced_alp.to_primitive();
430
431 let expected_slice = original.slice(512..1024).unwrap();
432 assert_arrays_eq!(decoded, expected_slice);
433 }
434
435 #[test]
436 fn test_large_f32_array_uniform_values() {
437 let size = 10_000;
438 let array = PrimitiveArray::new(buffer![42.125f32; size], Validity::NonNullable);
439 let encoded = alp_encode(&array, None).unwrap();
440
441 assert!(encoded.patches().is_none());
442 let decoded = decompress_into_array(encoded).unwrap();
443 assert_arrays_eq!(decoded, array);
444 }
445
446 #[test]
447 fn test_large_f64_array_uniform_values() {
448 let size = 50_000;
449 let array = PrimitiveArray::new(buffer![123.456789f64; size], Validity::NonNullable);
450 let encoded = alp_encode(&array, None).unwrap();
451
452 assert!(encoded.patches().is_none());
453 let decoded = decompress_into_array(encoded).unwrap();
454 assert_arrays_eq!(decoded, array);
455 }
456
457 #[test]
458 fn test_large_f32_array_with_patches() {
459 let size = 5_000;
460 let mut values = vec![1.5f32; size];
461 values[100] = std::f32::consts::PI;
462 values[1500] = std::f32::consts::E;
463 values[3000] = f32::NEG_INFINITY;
464 values[4500] = f32::INFINITY;
465
466 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
467 let encoded = alp_encode(&array, None).unwrap();
468
469 assert!(encoded.patches().is_some());
470 let decoded = decompress_into_array(encoded).unwrap();
471 assert_arrays_eq!(decoded, array);
472 }
473
474 #[test]
475 fn test_large_f64_array_with_patches() {
476 let size = 8_000;
477 let mut values = vec![2.2184f64; size];
478 values[0] = PI;
479 values[1000] = E;
480 values[2000] = f64::NAN;
481 values[3000] = f64::INFINITY;
482 values[4000] = f64::NEG_INFINITY;
483 values[5000] = 0.0;
484 values[6000] = -0.0;
485 values[7000] = 999.999999999;
486
487 let array = PrimitiveArray::new(Buffer::from(values.clone()), Validity::NonNullable);
488 let encoded = alp_encode(&array, None).unwrap();
489
490 assert!(encoded.patches().is_some());
491 let decoded = decompress_into_array(encoded).unwrap();
492
493 for idx in 0..size {
494 let decoded_val = decoded.as_slice::<f64>()[idx];
495 let original_val = values[idx];
496 assert!(
497 decoded_val.is_eq(original_val),
498 "At index {idx}: Expected {original_val} but got {decoded_val}"
499 );
500 }
501 }
502
503 #[test]
504 fn test_large_nullable_array() {
505 let size = 12_000;
506 let values: Vec<Option<f32>> = (0..size)
507 .map(|i| {
508 if i % 7 == 0 {
509 None
510 } else {
511 Some((i as f32) * 0.1)
512 }
513 })
514 .collect();
515
516 let array = PrimitiveArray::from_option_iter(values);
517 let encoded = alp_encode(&array, None).unwrap();
518 let decoded = decompress_into_array(encoded).unwrap();
519
520 assert_arrays_eq!(decoded, array);
521 }
522
523 #[test]
524 fn test_large_mixed_validity_with_patches() {
525 let size = 6_000;
526 let mut values = vec![10.125f64; size];
527
528 values[500] = PI;
529 values[1500] = E;
530 values[2500] = f64::INFINITY;
531 values[3500] = f64::NEG_INFINITY;
532 values[4500] = f64::NAN;
533
534 let validity = Validity::from_iter((0..size).map(|i| !matches!(i, 500 | 2500)));
535
536 let array = PrimitiveArray::new(Buffer::from(values), validity);
537 let encoded = alp_encode(&array, None).unwrap();
538 let decoded = decompress_into_array(encoded).unwrap();
539
540 assert_arrays_eq!(decoded, array);
541 }
542}