1use itertools::Itertools;
5use vortex_array::ArrayRef;
6use vortex_array::IntoArray;
7use vortex_array::arrays::PrimitiveArray;
8use vortex_array::dtype::PType;
9use vortex_array::patches::Patches;
10use vortex_array::validity::Validity;
11use vortex_array::vtable::ValidityHelper;
12use vortex_buffer::Buffer;
13use vortex_buffer::BufferMut;
14use vortex_error::VortexResult;
15use vortex_error::vortex_bail;
16use vortex_mask::Mask;
17
18use crate::Exponents;
19use crate::alp::ALPArray;
20use crate::alp::ALPFloat;
21
22#[macro_export]
23macro_rules! match_each_alp_float_ptype {
24 ($self:expr, | $enc:ident | $body:block) => {{
25 use vortex_array::dtype::PType;
26 use vortex_error::vortex_panic;
27 let ptype = $self;
28 match ptype {
29 PType::F32 => {
30 type $enc = f32;
31 $body
32 }
33 PType::F64 => {
34 type $enc = f64;
35 $body
36 }
37 _ => vortex_panic!("ALP can only encode f32 and f64, got {}", ptype),
38 }
39 }};
40}
41
42pub fn alp_encode(parray: &PrimitiveArray, exponents: Option<Exponents>) -> VortexResult<ALPArray> {
43 let (exponents, encoded, patches) = match parray.ptype() {
44 PType::F32 => alp_encode_components_typed::<f32>(parray, exponents)?,
45 PType::F64 => alp_encode_components_typed::<f64>(parray, exponents)?,
46 _ => vortex_bail!("ALP can only encode f32 and f64"),
47 };
48
49 unsafe {
51 Ok(ALPArray::new_unchecked(
52 encoded,
53 exponents,
54 patches,
55 parray.dtype().clone(),
56 ))
57 }
58}
59
60#[expect(
61 clippy::cast_possible_truncation,
62 reason = "u64 index cast to usize is safe for reasonable array sizes"
63)]
64fn alp_encode_components_typed<T>(
65 values: &PrimitiveArray,
66 exponents: Option<Exponents>,
67) -> VortexResult<(Exponents, ArrayRef, Option<Patches>)>
68where
69 T: ALPFloat,
70{
71 let values_slice = values.as_slice::<T>();
72
73 let (exponents, encoded, exceptional_positions, exceptional_values, mut chunk_offsets) =
74 T::encode(values_slice, exponents);
75
76 let encoded_array = PrimitiveArray::new(encoded, values.validity().clone()).into_array();
77
78 let validity = values.validity_mask()?;
79 let (valid_exceptional_positions, valid_exceptional_values): (Buffer<u64>, Buffer<T>) =
82 match validity {
83 Mask::AllTrue(_) => (exceptional_positions, exceptional_values),
84 Mask::AllFalse(_) => {
85 (Buffer::empty(), Buffer::empty())
87 }
88 Mask::Values(is_valid) => {
89 let (pos, vals): (BufferMut<u64>, BufferMut<T>) = exceptional_positions
90 .into_iter()
91 .zip_eq(exceptional_values)
92 .filter(|(index, _)| {
93 let is_valid = is_valid.value(*index as usize);
94 if !is_valid {
95 let patch_chunk = *index as usize / 1024;
96 for chunk_idx in (patch_chunk + 1)..chunk_offsets.len() {
97 chunk_offsets[chunk_idx] -= 1;
98 }
99 }
100 is_valid
101 })
102 .unzip();
103 (pos.freeze(), vals.freeze())
104 }
105 };
106 let patches = if valid_exceptional_positions.is_empty() {
107 None
108 } else {
109 let patches_validity = if values.dtype().is_nullable() {
110 Validity::AllValid
111 } else {
112 Validity::NonNullable
113 };
114 let valid_exceptional_values =
115 PrimitiveArray::new(valid_exceptional_values, patches_validity).into_array();
116
117 Some(Patches::new(
118 values_slice.len(),
119 0,
120 valid_exceptional_positions.into_array(),
121 valid_exceptional_values,
122 Some(chunk_offsets.into_array()),
123 )?)
124 };
125 Ok((exponents, encoded_array, patches))
126}
127
128#[cfg(test)]
129mod tests {
130 use core::f64;
131
132 use f64::consts::E;
133 use f64::consts::PI;
134 use vortex_array::LEGACY_SESSION;
135 use vortex_array::ToCanonical;
136 use vortex_array::VortexSessionExecute;
137 use vortex_array::assert_arrays_eq;
138 use vortex_array::dtype::NativePType;
139 use vortex_array::validity::Validity;
140 use vortex_buffer::Buffer;
141 use vortex_buffer::buffer;
142
143 use super::*;
144 use crate::decompress_into_array;
145
146 #[test]
147 fn test_compress() {
148 let array = PrimitiveArray::new(buffer![1.234f32; 1025], Validity::NonNullable);
149 let encoded = alp_encode(&array, None).unwrap();
150 assert!(encoded.patches().is_none());
151 let expected_encoded = PrimitiveArray::from_iter(vec![1234i32; 1025]);
152 assert_arrays_eq!(encoded.encoded(), expected_encoded);
153 assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
154
155 let decoded =
156 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
157 assert_arrays_eq!(decoded, array);
158 }
159
160 #[test]
161 fn test_nullable_compress() {
162 let array = PrimitiveArray::from_option_iter([None, Some(1.234f32), None]);
163 let encoded = alp_encode(&array, None).unwrap();
164 assert!(encoded.patches().is_none());
165 let expected_encoded = PrimitiveArray::from_option_iter([None, Some(1234i32), None]);
166 assert_arrays_eq!(encoded.encoded(), expected_encoded);
167 assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
168
169 let decoded =
170 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
171 let expected = PrimitiveArray::from_option_iter(vec![None, Some(1.234f32), None]);
172 assert_arrays_eq!(decoded, expected);
173 }
174
175 #[test]
176 #[allow(clippy::approx_constant)] fn test_patched_compress() {
178 let values = buffer![1.234f64, 2.718, PI, 4.0];
179 let array = PrimitiveArray::new(values.clone(), Validity::NonNullable);
180 let encoded = alp_encode(&array, None).unwrap();
181 assert!(encoded.patches().is_some());
182 let expected_encoded = PrimitiveArray::from_iter(vec![1234i64, 2718, 1234, 4000]);
183 assert_arrays_eq!(encoded.encoded(), expected_encoded);
184 assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
185
186 let decoded =
187 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
188 let expected_decoded = PrimitiveArray::new(values, Validity::NonNullable);
189 assert_arrays_eq!(decoded, expected_decoded);
190 }
191
192 #[test]
193 #[allow(clippy::approx_constant)] fn test_compress_ignores_invalid_exceptional_values() {
195 let values = buffer![1.234f64, 2.718, PI, 4.0];
196 let array = PrimitiveArray::new(values, Validity::from_iter([true, true, false, true]));
197 let encoded = alp_encode(&array, None).unwrap();
198 assert!(encoded.patches().is_none());
199 let expected_encoded =
200 PrimitiveArray::from_option_iter(buffer![Some(1234i64), Some(2718), None, Some(4000)]);
201 assert_arrays_eq!(encoded.encoded(), expected_encoded);
202 assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
203
204 let decoded =
205 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
206 assert_arrays_eq!(decoded, array);
207 }
208
209 #[test]
210 #[allow(clippy::approx_constant)] fn test_nullable_patched_scalar_at() {
212 let array = PrimitiveArray::from_option_iter([
213 Some(1.234f64),
214 Some(2.718),
215 Some(PI),
216 Some(4.0),
217 None,
218 ]);
219 let encoded = alp_encode(&array, None).unwrap();
220 assert!(encoded.patches().is_some());
221
222 assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
223
224 assert_arrays_eq!(encoded, array);
225
226 let _decoded =
227 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
228 }
229
230 #[test]
231 fn roundtrips_close_fractional() {
232 let original = PrimitiveArray::from_iter([195.26274f32, 195.27837, -48.815685]);
233 let alp_arr = alp_encode(&original, None).unwrap();
234 assert_arrays_eq!(alp_arr, original);
235 }
236
237 #[test]
238 fn roundtrips_all_null() {
239 let original =
240 PrimitiveArray::new(buffer![195.26274f64, PI, -48.815685], Validity::AllInvalid);
241 let alp_arr = alp_encode(&original, None).unwrap();
242 let decompressed = alp_arr.to_primitive();
243
244 assert_eq!(
245 [195.26274, 195.26274, 195.26274],
247 decompressed.as_slice::<f64>()
248 );
249
250 assert_arrays_eq!(decompressed, original);
251 }
252
253 #[test]
254 fn non_finite_numbers() {
255 let original = PrimitiveArray::new(
256 buffer![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY],
257 Validity::NonNullable,
258 );
259 let encoded = alp_encode(&original, None).unwrap();
260 let decoded = encoded.to_primitive();
261 for idx in 0..original.len() {
262 let decoded_val = decoded.as_slice::<f32>()[idx];
263 let original_val = original.as_slice::<f32>()[idx];
264 assert!(
265 decoded_val.is_eq(original_val),
266 "Expected {original_val} but got {decoded_val}"
267 );
268 }
269 }
270
271 #[test]
272 fn test_chunk_offsets() {
273 let mut values = vec![1.0f64; 3072];
274
275 values[1023] = PI;
276 values[1024] = E;
277 values[1025] = PI;
278
279 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
280 let encoded = alp_encode(&array, None).unwrap();
281 let patches = encoded.patches().unwrap();
282
283 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
284 let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 3]);
285 assert_arrays_eq!(chunk_offsets, expected_offsets);
286
287 let patch_indices = patches.indices().to_primitive();
288 let expected_indices = PrimitiveArray::from_iter(vec![1023u64, 1024, 1025]);
289 assert_arrays_eq!(patch_indices, expected_indices);
290
291 let patch_values = patches.values().to_primitive();
292 let expected_values = PrimitiveArray::from_iter(vec![PI, E, PI]);
293 assert_arrays_eq!(patch_values, expected_values);
294 }
295
296 #[test]
297 fn test_chunk_offsets_no_patches_in_middle() {
298 let mut values = vec![1.0f64; 3072];
299 values[0] = PI;
300 values[2048] = E;
301
302 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
303 let encoded = alp_encode(&array, None).unwrap();
304 let patches = encoded.patches().unwrap();
305
306 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
307 let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
308 assert_arrays_eq!(chunk_offsets, expected_offsets);
309
310 let patch_indices = patches.indices().to_primitive();
311 let expected_indices = PrimitiveArray::from_iter(vec![0u64, 2048]);
312 assert_arrays_eq!(patch_indices, expected_indices);
313
314 let patch_values = patches.values().to_primitive();
315 let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
316 assert_arrays_eq!(patch_values, expected_values);
317 }
318
319 #[test]
320 fn test_chunk_offsets_trailing_empty_chunks() {
321 let mut values = vec![1.0f64; 3072];
322 values[0] = PI;
323
324 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
325 let encoded = alp_encode(&array, None).unwrap();
326 let patches = encoded.patches().unwrap();
327
328 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
329 let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
330 assert_arrays_eq!(chunk_offsets, expected_offsets);
331
332 let patch_indices = patches.indices().to_primitive();
333 let expected_indices = PrimitiveArray::from_iter(vec![0u64]);
334 assert_arrays_eq!(patch_indices, expected_indices);
335
336 let patch_values = patches.values().to_primitive();
337 let expected_values = PrimitiveArray::from_iter(vec![PI]);
338 assert_arrays_eq!(patch_values, expected_values);
339 }
340
341 #[test]
342 fn test_chunk_offsets_single_chunk() {
343 let mut values = vec![1.0f64; 512];
344 values[0] = PI;
345 values[100] = E;
346
347 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
348 let encoded = alp_encode(&array, None).unwrap();
349 let patches = encoded.patches().unwrap();
350
351 let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
352 let expected_offsets = PrimitiveArray::from_iter(vec![0u64]);
353 assert_arrays_eq!(chunk_offsets, expected_offsets);
354
355 let patch_indices = patches.indices().to_primitive();
356 let expected_indices = PrimitiveArray::from_iter(vec![0u64, 100]);
357 assert_arrays_eq!(patch_indices, expected_indices);
358
359 let patch_values = patches.values().to_primitive();
360 let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
361 assert_arrays_eq!(patch_values, expected_values);
362 }
363
364 #[test]
365 fn test_slice_half_chunk_f32_roundtrip() {
366 let values = vec![1.234f32; 1024];
368 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
369 let encoded = alp_encode(&original, None).unwrap();
370
371 let sliced_alp = encoded.slice(512..1024).unwrap();
372
373 let expected_slice = original.slice(512..1024).unwrap();
374 assert_arrays_eq!(sliced_alp, expected_slice);
375 }
376
377 #[test]
378 fn test_slice_half_chunk_f64_roundtrip() {
379 let values = vec![5.678f64; 1024];
380 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
381 let encoded = alp_encode(&original, None).unwrap();
382
383 let sliced_alp = encoded.slice(512..1024).unwrap();
384
385 let expected_slice = original.slice(512..1024).unwrap();
386 assert_arrays_eq!(sliced_alp, expected_slice);
387 }
388
389 #[test]
390 fn test_slice_half_chunk_with_patches_roundtrip() {
391 let mut values = vec![1.0f64; 1024];
392 values[100] = PI;
393 values[200] = E;
394 values[600] = 42.42;
395
396 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
397 let encoded = alp_encode(&original, None).unwrap();
398
399 let sliced_alp = encoded.slice(512..1024).unwrap();
400
401 let expected_slice = original.slice(512..1024).unwrap();
402 assert_arrays_eq!(sliced_alp, expected_slice);
403 assert!(encoded.patches().is_some());
404 }
405
406 #[test]
407 fn test_slice_across_chunks_with_patches_roundtrip() {
408 let mut values = vec![1.0f64; 2048];
409 values[100] = PI;
410 values[200] = E;
411 values[600] = 42.42;
412 values[800] = 42.42;
413 values[1000] = 42.42;
414 values[1023] = 42.42;
415
416 let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
417 let encoded = alp_encode(&original, None).unwrap();
418
419 let sliced_alp = encoded.slice(1023..1025).unwrap();
420
421 let expected_slice = original.slice(1023..1025).unwrap();
422 assert_arrays_eq!(sliced_alp, expected_slice);
423 assert!(encoded.patches().is_some());
424 }
425
426 #[test]
427 fn test_slice_half_chunk_nullable_roundtrip() {
428 let values = (0..1024)
429 .map(|i| if i % 3 == 0 { None } else { Some(2.5f32) })
430 .collect::<Vec<_>>();
431
432 let original = PrimitiveArray::from_option_iter(values);
433 let encoded = alp_encode(&original, None).unwrap();
434
435 let sliced_alp = encoded.slice(512..1024).unwrap();
436 let decoded = sliced_alp.to_primitive();
437
438 let expected_slice = original.slice(512..1024).unwrap();
439 assert_arrays_eq!(decoded, expected_slice);
440 }
441
442 #[test]
443 fn test_large_f32_array_uniform_values() {
444 let size = 10_000;
445 let array = PrimitiveArray::new(buffer![42.125f32; size], Validity::NonNullable);
446 let encoded = alp_encode(&array, None).unwrap();
447
448 assert!(encoded.patches().is_none());
449 let decoded =
450 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
451 assert_arrays_eq!(decoded, array);
452 }
453
454 #[test]
455 fn test_large_f64_array_uniform_values() {
456 let size = 50_000;
457 let array = PrimitiveArray::new(buffer![123.456789f64; size], Validity::NonNullable);
458 let encoded = alp_encode(&array, None).unwrap();
459
460 assert!(encoded.patches().is_none());
461 let decoded =
462 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
463 assert_arrays_eq!(decoded, array);
464 }
465
466 #[test]
467 fn test_large_f32_array_with_patches() {
468 let size = 5_000;
469 let mut values = vec![1.5f32; size];
470 values[100] = std::f32::consts::PI;
471 values[1500] = std::f32::consts::E;
472 values[3000] = f32::NEG_INFINITY;
473 values[4500] = f32::INFINITY;
474
475 let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
476 let encoded = alp_encode(&array, None).unwrap();
477
478 assert!(encoded.patches().is_some());
479 let decoded =
480 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
481 assert_arrays_eq!(decoded, array);
482 }
483
484 #[test]
485 fn test_large_f64_array_with_patches() {
486 let size = 8_000;
487 let mut values = vec![2.2184f64; size];
488 values[0] = PI;
489 values[1000] = E;
490 values[2000] = f64::NAN;
491 values[3000] = f64::INFINITY;
492 values[4000] = f64::NEG_INFINITY;
493 values[5000] = 0.0;
494 values[6000] = -0.0;
495 values[7000] = 999.999999999;
496
497 let array = PrimitiveArray::new(Buffer::from(values.clone()), Validity::NonNullable);
498 let encoded = alp_encode(&array, None).unwrap();
499
500 assert!(encoded.patches().is_some());
501 let decoded =
502 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
503
504 for idx in 0..size {
505 let decoded_val = decoded.as_slice::<f64>()[idx];
506 let original_val = values[idx];
507 assert!(
508 decoded_val.is_eq(original_val),
509 "At index {idx}: Expected {original_val} but got {decoded_val}"
510 );
511 }
512 }
513
514 #[test]
515 fn test_large_nullable_array() {
516 let size = 12_000;
517 let values: Vec<Option<f32>> = (0..size)
518 .map(|i| {
519 if i % 7 == 0 {
520 None
521 } else {
522 Some((i as f32) * 0.1)
523 }
524 })
525 .collect();
526
527 let array = PrimitiveArray::from_option_iter(values);
528 let encoded = alp_encode(&array, None).unwrap();
529 let decoded =
530 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
531
532 assert_arrays_eq!(decoded, array);
533 }
534
535 #[test]
536 fn test_large_mixed_validity_with_patches() {
537 let size = 6_000;
538 let mut values = vec![10.125f64; size];
539
540 values[500] = PI;
541 values[1500] = E;
542 values[2500] = f64::INFINITY;
543 values[3500] = f64::NEG_INFINITY;
544 values[4500] = f64::NAN;
545
546 let validity = Validity::from_iter((0..size).map(|i| !matches!(i, 500 | 2500)));
547
548 let array = PrimitiveArray::new(Buffer::from(values), validity);
549 let encoded = alp_encode(&array, None).unwrap();
550 let decoded =
551 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
552
553 assert_arrays_eq!(decoded, array);
554 }
555}