1use itertools::Itertools;
5use vortex_array::ArrayRef;
6use vortex_array::IntoArray;
7use vortex_array::arrays::PrimitiveArray;
8use vortex_array::dtype::PType;
9use vortex_array::patches::Patches;
10use vortex_array::validity::Validity;
11use vortex_buffer::Buffer;
12use vortex_buffer::BufferMut;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_mask::Mask;
16
17use crate::ALP;
18use crate::Exponents;
19use crate::alp::ALPArray;
20use crate::alp::ALPFloat;
21
22#[macro_export]
23macro_rules! match_each_alp_float_ptype {
24 ($self:expr, | $enc:ident | $body:block) => {{
25 use vortex_array::dtype::PType;
26 use vortex_error::vortex_panic;
27 let ptype = $self;
28 match ptype {
29 PType::F32 => {
30 type $enc = f32;
31 $body
32 }
33 PType::F64 => {
34 type $enc = f64;
35 $body
36 }
37 _ => vortex_panic!("ALP can only encode f32 and f64, got {}", ptype),
38 }
39 }};
40}
41
42pub fn alp_encode(parray: &PrimitiveArray, exponents: Option<Exponents>) -> VortexResult<ALPArray> {
43 let (exponents, encoded, patches) = match parray.ptype() {
44 PType::F32 => alp_encode_components_typed::<f32>(parray, exponents)?,
45 PType::F64 => alp_encode_components_typed::<f64>(parray, exponents)?,
46 _ => vortex_bail!("ALP can only encode f32 and f64"),
47 };
48
49 unsafe { Ok(ALP::new_unchecked(encoded, exponents, patches)) }
51}
52
53#[expect(
54 clippy::cast_possible_truncation,
55 reason = "u64 index cast to usize is safe for reasonable array sizes"
56)]
57fn alp_encode_components_typed<T>(
58 values: &PrimitiveArray,
59 exponents: Option<Exponents>,
60) -> VortexResult<(Exponents, ArrayRef, Option<Patches>)>
61where
62 T: ALPFloat,
63{
64 let values_slice = values.as_slice::<T>();
65
66 let (exponents, encoded, exceptional_positions, exceptional_values, mut chunk_offsets) =
67 T::encode(values_slice, exponents);
68
69 let encoded_array = PrimitiveArray::new(encoded, values.validity()?).into_array();
70
71 let validity = values.validity_mask()?;
72 let (valid_exceptional_positions, valid_exceptional_values): (Buffer<u64>, Buffer<T>) =
75 match validity {
76 Mask::AllTrue(_) => (exceptional_positions, exceptional_values),
77 Mask::AllFalse(_) => {
78 (Buffer::empty(), Buffer::empty())
80 }
81 Mask::Values(is_valid) => {
82 let (pos, vals): (BufferMut<u64>, BufferMut<T>) = exceptional_positions
83 .into_iter()
84 .zip_eq(exceptional_values)
85 .filter(|(index, _)| {
86 let is_valid = is_valid.value(*index as usize);
87 if !is_valid {
88 let patch_chunk = *index as usize / 1024;
89 for chunk_idx in (patch_chunk + 1)..chunk_offsets.len() {
90 chunk_offsets[chunk_idx] -= 1;
91 }
92 }
93 is_valid
94 })
95 .unzip();
96 (pos.freeze(), vals.freeze())
97 }
98 };
99 let patches = if valid_exceptional_positions.is_empty() {
100 None
101 } else {
102 let patches_validity = if values.dtype().is_nullable() {
103 Validity::AllValid
104 } else {
105 Validity::NonNullable
106 };
107 let valid_exceptional_values =
108 PrimitiveArray::new(valid_exceptional_values, patches_validity).into_array();
109
110 Some(Patches::new(
111 values_slice.len(),
112 0,
113 valid_exceptional_positions.into_array(),
114 valid_exceptional_values,
115 Some(chunk_offsets.into_array()),
116 )?)
117 };
118 Ok((exponents, encoded_array, patches))
119}
120
121#[cfg(test)]
122mod tests {
123 use core::f64;
124
125 use f64::consts::E;
126 use f64::consts::PI;
127 use vortex_array::LEGACY_SESSION;
128 use vortex_array::ToCanonical;
129 use vortex_array::VortexSessionExecute;
130 use vortex_array::assert_arrays_eq;
131 use vortex_array::dtype::NativePType;
132 use vortex_array::validity::Validity;
133 use vortex_buffer::Buffer;
134 use vortex_buffer::buffer;
135
136 use super::*;
137 use crate::alp::array::ALPArrayExt;
138 use crate::alp::array::ALPArraySlotsExt;
139 use crate::decompress_into_array;
140
141 #[test]
142 fn test_compress() {
143 let array = PrimitiveArray::new(buffer![1.234f32; 1025], Validity::NonNullable);
144 let encoded = alp_encode(&array, None).unwrap();
145 assert!(encoded.patches().is_none());
146 let expected_encoded = PrimitiveArray::from_iter(vec![1234i32; 1025]);
147 assert_arrays_eq!(encoded.encoded(), expected_encoded);
148 assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
149
150 let decoded =
151 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
152 assert_arrays_eq!(decoded, array);
153 }
154
155 #[test]
156 fn test_nullable_compress() {
157 let array = PrimitiveArray::from_option_iter([None, Some(1.234f32), None]);
158 let encoded = alp_encode(&array, None).unwrap();
159 assert!(encoded.patches().is_none());
160 let expected_encoded = PrimitiveArray::from_option_iter([None, Some(1234i32), None]);
161 assert_arrays_eq!(encoded.encoded(), expected_encoded);
162 assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
163
164 let decoded =
165 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
166 let expected = PrimitiveArray::from_option_iter(vec![None, Some(1.234f32), None]);
167 assert_arrays_eq!(decoded, expected);
168 }
169
170 #[test]
171 #[allow(clippy::approx_constant)] fn test_patched_compress() {
173 let values = buffer![1.234f64, 2.718, PI, 4.0];
174 let array = PrimitiveArray::new(values.clone(), Validity::NonNullable);
175 let encoded = alp_encode(&array, None).unwrap();
176 assert!(encoded.patches().is_some());
177 let expected_encoded = PrimitiveArray::from_iter(vec![1234i64, 2718, 1234, 4000]);
178 assert_arrays_eq!(encoded.encoded(), expected_encoded);
179 assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
180
181 let decoded =
182 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
183 let expected_decoded = PrimitiveArray::new(values, Validity::NonNullable);
184 assert_arrays_eq!(decoded, expected_decoded);
185 }
186
187 #[test]
188 #[allow(clippy::approx_constant)] fn test_compress_ignores_invalid_exceptional_values() {
190 let values = buffer![1.234f64, 2.718, PI, 4.0];
191 let array = PrimitiveArray::new(values, Validity::from_iter([true, true, false, true]));
192 let encoded = alp_encode(&array, None).unwrap();
193 assert!(encoded.patches().is_none());
194 let expected_encoded =
195 PrimitiveArray::from_option_iter(buffer![Some(1234i64), Some(2718), None, Some(4000)]);
196 assert_arrays_eq!(encoded.encoded(), expected_encoded);
197 assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
198
199 let decoded =
200 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
201 assert_arrays_eq!(decoded, array);
202 }
203
204 #[test]
205 #[allow(clippy::approx_constant)] fn test_nullable_patched_scalar_at() {
207 let array = PrimitiveArray::from_option_iter([
208 Some(1.234f64),
209 Some(2.718),
210 Some(PI),
211 Some(4.0),
212 None,
213 ]);
214 let encoded = alp_encode(&array, None).unwrap();
215 assert!(encoded.patches().is_some());
216
217 assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
218
219 assert_arrays_eq!(encoded, array);
220
221 let _decoded =
222 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
223 }
224
225 #[test]
226 fn roundtrips_close_fractional() {
227 let original = PrimitiveArray::from_iter([195.26274f32, 195.27837, -48.815685]);
228 let alp_arr = alp_encode(&original, None).unwrap();
229 assert_arrays_eq!(alp_arr, original);
230 }
231
232 #[test]
233 fn roundtrips_all_null() {
234 let original =
235 PrimitiveArray::new(buffer![195.26274f64, PI, -48.815685], Validity::AllInvalid);
236 let alp_arr = alp_encode(&original, None).unwrap();
237 let decompressed = alp_arr.into_array().to_primitive();
238
239 assert_eq!(
240 [195.26274, 195.26274, 195.26274],
242 decompressed.as_slice::<f64>()
243 );
244
245 assert_arrays_eq!(decompressed, original);
246 }
247
248 #[test]
249 fn non_finite_numbers() {
250 let original = PrimitiveArray::new(
251 buffer![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY],
252 Validity::NonNullable,
253 );
254 let encoded = alp_encode(&original, None).unwrap();
255 let decoded = encoded.as_array().to_primitive();
256 for idx in 0..original.len() {
257 let decoded_val = decoded.as_slice::<f32>()[idx];
258 let original_val = original.as_slice::<f32>()[idx];
259 assert!(
260 decoded_val.is_eq(original_val),
261 "Expected {original_val} but got {decoded_val}"
262 );
263 }
264 }
265
266 #[test]
267 fn test_chunk_offsets() {
268 let mut values = vec![1.0f64; 3072];
269
270 values[1023] = PI;
271 values[1024] = E;
272 values[1025] = PI;
273
274 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
275 let encoded = alp_encode(&array, None).unwrap();
276 let patches = encoded.patches().unwrap();
277
278 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
279 let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 3]);
280 assert_arrays_eq!(chunk_offsets, expected_offsets);
281
282 let patch_indices = patches.indices().to_primitive();
283 let expected_indices = PrimitiveArray::from_iter(vec![1023u64, 1024, 1025]);
284 assert_arrays_eq!(patch_indices, expected_indices);
285
286 let patch_values = patches.values().to_primitive();
287 let expected_values = PrimitiveArray::from_iter(vec![PI, E, PI]);
288 assert_arrays_eq!(patch_values, expected_values);
289 }
290
291 #[test]
292 fn test_chunk_offsets_no_patches_in_middle() {
293 let mut values = vec![1.0f64; 3072];
294 values[0] = PI;
295 values[2048] = E;
296
297 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
298 let encoded = alp_encode(&array, None).unwrap();
299 let patches = encoded.patches().unwrap();
300
301 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
302 let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
303 assert_arrays_eq!(chunk_offsets, expected_offsets);
304
305 let patch_indices = patches.indices().to_primitive();
306 let expected_indices = PrimitiveArray::from_iter(vec![0u64, 2048]);
307 assert_arrays_eq!(patch_indices, expected_indices);
308
309 let patch_values = patches.values().to_primitive();
310 let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
311 assert_arrays_eq!(patch_values, expected_values);
312 }
313
314 #[test]
315 fn test_chunk_offsets_trailing_empty_chunks() {
316 let mut values = vec![1.0f64; 3072];
317 values[0] = PI;
318
319 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
320 let encoded = alp_encode(&array, None).unwrap();
321 let patches = encoded.patches().unwrap();
322
323 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
324 let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
325 assert_arrays_eq!(chunk_offsets, expected_offsets);
326
327 let patch_indices = patches.indices().to_primitive();
328 let expected_indices = PrimitiveArray::from_iter(vec![0u64]);
329 assert_arrays_eq!(patch_indices, expected_indices);
330
331 let patch_values = patches.values().to_primitive();
332 let expected_values = PrimitiveArray::from_iter(vec![PI]);
333 assert_arrays_eq!(patch_values, expected_values);
334 }
335
336 #[test]
337 fn test_chunk_offsets_single_chunk() {
338 let mut values = vec![1.0f64; 512];
339 values[0] = PI;
340 values[100] = E;
341
342 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
343 let encoded = alp_encode(&array, None).unwrap();
344 let patches = encoded.patches().unwrap();
345
346 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
347 let expected_offsets = PrimitiveArray::from_iter(vec![0u64]);
348 assert_arrays_eq!(chunk_offsets, expected_offsets);
349
350 let patch_indices = patches.indices().to_primitive();
351 let expected_indices = PrimitiveArray::from_iter(vec![0u64, 100]);
352 assert_arrays_eq!(patch_indices, expected_indices);
353
354 let patch_values = patches.values().to_primitive();
355 let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
356 assert_arrays_eq!(patch_values, expected_values);
357 }
358
359 #[test]
360 fn test_slice_half_chunk_f32_roundtrip() {
361 let values = vec![1.234f32; 1024];
363 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
364 let encoded = alp_encode(&original, None).unwrap();
365
366 let sliced_alp = encoded.slice(512..1024).unwrap();
367
368 let expected_slice = original.slice(512..1024).unwrap();
369 assert_arrays_eq!(sliced_alp, expected_slice);
370 }
371
372 #[test]
373 fn test_slice_half_chunk_f64_roundtrip() {
374 let values = vec![5.678f64; 1024];
375 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
376 let encoded = alp_encode(&original, None).unwrap();
377
378 let sliced_alp = encoded.slice(512..1024).unwrap();
379
380 let expected_slice = original.slice(512..1024).unwrap();
381 assert_arrays_eq!(sliced_alp, expected_slice);
382 }
383
384 #[test]
385 fn test_slice_half_chunk_with_patches_roundtrip() {
386 let mut values = vec![1.0f64; 1024];
387 values[100] = PI;
388 values[200] = E;
389 values[600] = 42.42;
390
391 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
392 let encoded = alp_encode(&original, None).unwrap();
393
394 let sliced_alp = encoded.slice(512..1024).unwrap();
395
396 let expected_slice = original.slice(512..1024).unwrap();
397 assert_arrays_eq!(sliced_alp, expected_slice);
398 assert!(encoded.patches().is_some());
399 }
400
401 #[test]
402 fn test_slice_across_chunks_with_patches_roundtrip() {
403 let mut values = vec![1.0f64; 2048];
404 values[100] = PI;
405 values[200] = E;
406 values[600] = 42.42;
407 values[800] = 42.42;
408 values[1000] = 42.42;
409 values[1023] = 42.42;
410
411 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
412 let encoded = alp_encode(&original, None).unwrap();
413
414 let sliced_alp = encoded.slice(1023..1025).unwrap();
415
416 let expected_slice = original.slice(1023..1025).unwrap();
417 assert_arrays_eq!(sliced_alp, expected_slice);
418 assert!(encoded.patches().is_some());
419 }
420
421 #[test]
422 fn test_slice_half_chunk_nullable_roundtrip() {
423 let values = (0..1024)
424 .map(|i| if i % 3 == 0 { None } else { Some(2.5f32) })
425 .collect::<Vec<_>>();
426
427 let original = PrimitiveArray::from_option_iter(values);
428 let encoded = alp_encode(&original, None).unwrap();
429
430 let sliced_alp = encoded.slice(512..1024).unwrap();
431 let decoded = sliced_alp.to_primitive();
432
433 let expected_slice = original.slice(512..1024).unwrap();
434 assert_arrays_eq!(decoded, expected_slice);
435 }
436
437 #[test]
438 fn test_large_f32_array_uniform_values() {
439 let size = 10_000;
440 let array = PrimitiveArray::new(buffer![42.125f32; size], Validity::NonNullable);
441 let encoded = alp_encode(&array, None).unwrap();
442
443 assert!(encoded.patches().is_none());
444 let decoded =
445 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
446 assert_arrays_eq!(decoded, array);
447 }
448
449 #[test]
450 fn test_large_f64_array_uniform_values() {
451 let size = 50_000;
452 let array = PrimitiveArray::new(buffer![123.456789f64; size], Validity::NonNullable);
453 let encoded = alp_encode(&array, None).unwrap();
454
455 assert!(encoded.patches().is_none());
456 let decoded =
457 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
458 assert_arrays_eq!(decoded, array);
459 }
460
461 #[test]
462 fn test_large_f32_array_with_patches() {
463 let size = 5_000;
464 let mut values = vec![1.5f32; size];
465 values[100] = std::f32::consts::PI;
466 values[1500] = std::f32::consts::E;
467 values[3000] = f32::NEG_INFINITY;
468 values[4500] = f32::INFINITY;
469
470 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
471 let encoded = alp_encode(&array, None).unwrap();
472
473 assert!(encoded.patches().is_some());
474 let decoded =
475 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
476 assert_arrays_eq!(decoded, array);
477 }
478
479 #[test]
480 fn test_large_f64_array_with_patches() {
481 let size = 8_000;
482 let mut values = vec![2.2184f64; size];
483 values[0] = PI;
484 values[1000] = E;
485 values[2000] = f64::NAN;
486 values[3000] = f64::INFINITY;
487 values[4000] = f64::NEG_INFINITY;
488 values[5000] = 0.0;
489 values[6000] = -0.0;
490 values[7000] = 999.999999999;
491
492 let array = PrimitiveArray::new(Buffer::from(values.clone()), Validity::NonNullable);
493 let encoded = alp_encode(&array, None).unwrap();
494
495 assert!(encoded.patches().is_some());
496 let decoded =
497 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
498
499 for idx in 0..size {
500 let decoded_val = decoded.as_slice::<f64>()[idx];
501 let original_val = values[idx];
502 assert!(
503 decoded_val.is_eq(original_val),
504 "At index {idx}: Expected {original_val} but got {decoded_val}"
505 );
506 }
507 }
508
509 #[test]
510 fn test_large_nullable_array() {
511 let size = 12_000;
512 let values: Vec<Option<f32>> = (0..size)
513 .map(|i| {
514 if i % 7 == 0 {
515 None
516 } else {
517 Some((i as f32) * 0.1)
518 }
519 })
520 .collect();
521
522 let array = PrimitiveArray::from_option_iter(values);
523 let encoded = alp_encode(&array, None).unwrap();
524 let decoded =
525 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
526
527 assert_arrays_eq!(decoded, array);
528 }
529
530 #[test]
531 fn test_large_mixed_validity_with_patches() {
532 let size = 6_000;
533 let mut values = vec![10.125f64; size];
534
535 values[500] = PI;
536 values[1500] = E;
537 values[2500] = f64::INFINITY;
538 values[3500] = f64::NEG_INFINITY;
539 values[4500] = f64::NAN;
540
541 let validity = Validity::from_iter((0..size).map(|i| !matches!(i, 500 | 2500)));
542
543 let array = PrimitiveArray::new(Buffer::from(values), validity);
544 let encoded = alp_encode(&array, None).unwrap();
545 let decoded =
546 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
547
548 assert_arrays_eq!(decoded, array);
549 }
550
551 #[test]
559 fn test_slice_mid_chunk_with_patches_in_trailing_chunk() {
560 let mut values = vec![1.0f64; 3072];
562 values[100] = PI;
564 values[500] = E;
565 values[1100] = PI;
567 values[1500] = E;
568 values[1900] = PI;
569 values[2100] = PI;
571 values[2500] = E;
572 values[2900] = PI;
573
574 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
575 let encoded = alp_encode(&original, None).unwrap();
576 assert!(encoded.patches().is_some());
577
578 let sliced_alp = encoded.slice(0..2500).unwrap();
582 let expected = original.slice(0..2500).unwrap();
583 assert_arrays_eq!(sliced_alp, expected);
584
585 let sliced_alp = encoded.slice(500..2500).unwrap();
587 let expected = original.slice(500..2500).unwrap();
588 assert_arrays_eq!(sliced_alp, expected);
589 }
590}