1use std::fmt::Debug;
5use std::hash::Hash;
6
7use vortex_array::Array;
8use vortex_array::ArrayBufferVisitor;
9use vortex_array::ArrayChildVisitor;
10use vortex_array::ArrayEq;
11use vortex_array::ArrayHash;
12use vortex_array::ArrayRef;
13use vortex_array::DeserializeMetadata;
14use vortex_array::ExecutionCtx;
15use vortex_array::IntoArray;
16use vortex_array::Precision;
17use vortex_array::ProstMetadata;
18use vortex_array::SerializeMetadata;
19use vortex_array::buffer::BufferHandle;
20use vortex_array::patches::Patches;
21use vortex_array::patches::PatchesMetadata;
22use vortex_array::serde::ArrayChildren;
23use vortex_array::stats::ArrayStats;
24use vortex_array::stats::StatsSetRef;
25use vortex_array::vtable;
26use vortex_array::vtable::ArrayId;
27use vortex_array::vtable::BaseArrayVTable;
28use vortex_array::vtable::VTable;
29use vortex_array::vtable::ValidityChild;
30use vortex_array::vtable::ValidityVTableFromChild;
31use vortex_array::vtable::VisitorVTable;
32use vortex_dtype::DType;
33use vortex_dtype::PType;
34use vortex_error::VortexExpect;
35use vortex_error::VortexResult;
36use vortex_error::vortex_bail;
37use vortex_error::vortex_ensure;
38use vortex_error::vortex_err;
39use vortex_session::VortexSession;
40
41use crate::ALPFloat;
42use crate::alp::Exponents;
43use crate::alp::decompress::execute_decompress;
44use crate::alp::rules::PARENT_KERNELS;
45use crate::alp::rules::RULES;
46
47vtable!(ALP);
48
49impl VTable for ALPVTable {
50 type Array = ALPArray;
51
52 type Metadata = ProstMetadata<ALPMetadata>;
53
54 type ArrayVTable = Self;
55 type OperationsVTable = Self;
56 type ValidityVTable = ValidityVTableFromChild;
57 type VisitorVTable = Self;
58
59 fn id(_array: &Self::Array) -> ArrayId {
60 Self::ID
61 }
62
63 fn metadata(array: &ALPArray) -> VortexResult<Self::Metadata> {
64 let exponents = array.exponents();
65 Ok(ProstMetadata(ALPMetadata {
66 exp_e: exponents.e as u32,
67 exp_f: exponents.f as u32,
68 patches: array
69 .patches()
70 .map(|p| p.to_metadata(array.len(), array.dtype()))
71 .transpose()?,
72 }))
73 }
74
75 fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
76 Ok(Some(metadata.serialize()))
77 }
78
79 fn deserialize(
80 bytes: &[u8],
81 _dtype: &DType,
82 _len: usize,
83 _buffers: &[BufferHandle],
84 _session: &VortexSession,
85 ) -> VortexResult<Self::Metadata> {
86 Ok(ProstMetadata(
87 <ProstMetadata<ALPMetadata> as DeserializeMetadata>::deserialize(bytes)?,
88 ))
89 }
90
91 fn build(
92 dtype: &DType,
93 len: usize,
94 metadata: &Self::Metadata,
95 _buffers: &[BufferHandle],
96 children: &dyn ArrayChildren,
97 ) -> VortexResult<ALPArray> {
98 let encoded_ptype = match &dtype {
99 DType::Primitive(PType::F32, n) => DType::Primitive(PType::I32, *n),
100 DType::Primitive(PType::F64, n) => DType::Primitive(PType::I64, *n),
101 d => vortex_bail!(MismatchedTypes: "f32 or f64", d),
102 };
103 let encoded = children.get(0, &encoded_ptype, len)?;
104
105 let patches = metadata
106 .patches
107 .map(|p| {
108 let indices = children.get(1, &p.indices_dtype()?, p.len()?)?;
109 let values = children.get(2, dtype, p.len()?)?;
110 let chunk_offsets = p
111 .chunk_offsets_dtype()?
112 .map(|dtype| children.get(3, &dtype, usize::try_from(p.chunk_offsets_len())?))
113 .transpose()?;
114
115 Patches::new(len, p.offset()?, indices, values, chunk_offsets)
116 })
117 .transpose()?;
118
119 ALPArray::try_new(
120 encoded,
121 Exponents {
122 e: u8::try_from(metadata.exp_e)?,
123 f: u8::try_from(metadata.exp_f)?,
124 },
125 patches,
126 )
127 }
128
129 fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
130 let patches_info = array
132 .patches
133 .as_ref()
134 .map(|p| (p.array_len(), p.offset(), p.chunk_offsets().is_some()));
135
136 let expected_children = match &patches_info {
137 Some((_, _, has_chunk_offsets)) => 1 + 2 + if *has_chunk_offsets { 1 } else { 0 },
138 None => 1,
139 };
140
141 vortex_ensure!(
142 children.len() == expected_children,
143 "ALPArray expects {} children, got {}",
144 expected_children,
145 children.len()
146 );
147
148 let mut children_iter = children.into_iter();
149 array.encoded = children_iter
150 .next()
151 .ok_or_else(|| vortex_err!("Expected encoded child"))?;
152
153 if let Some((array_len, offset, _has_chunk_offsets)) = patches_info {
154 let indices = children_iter
155 .next()
156 .ok_or_else(|| vortex_err!("Expected patch indices child"))?;
157 let values = children_iter
158 .next()
159 .ok_or_else(|| vortex_err!("Expected patch values child"))?;
160 let chunk_offsets = children_iter.next();
161
162 array.patches = Some(Patches::new(
163 array_len,
164 offset,
165 indices,
166 values,
167 chunk_offsets,
168 )?);
169 }
170
171 Ok(())
172 }
173
174 fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
175 Ok(execute_decompress(array.clone(), ctx)?.into_array())
177 }
178
179 fn reduce_parent(
180 array: &Self::Array,
181 parent: &ArrayRef,
182 child_idx: usize,
183 ) -> VortexResult<Option<ArrayRef>> {
184 RULES.evaluate(array, parent, child_idx)
185 }
186
187 fn execute_parent(
188 array: &Self::Array,
189 parent: &ArrayRef,
190 child_idx: usize,
191 ctx: &mut ExecutionCtx,
192 ) -> VortexResult<Option<ArrayRef>> {
193 PARENT_KERNELS.execute(array, parent, child_idx, ctx)
194 }
195}
196
197#[derive(Clone, Debug)]
198pub struct ALPArray {
199 encoded: ArrayRef,
200 patches: Option<Patches>,
201 dtype: DType,
202 exponents: Exponents,
203 stats_set: ArrayStats,
204}
205
206#[derive(Debug)]
207pub struct ALPVTable;
208
209impl ALPVTable {
210 pub const ID: ArrayId = ArrayId::new_ref("vortex.alp");
211}
212
213#[derive(Clone, prost::Message)]
214pub struct ALPMetadata {
215 #[prost(uint32, tag = "1")]
216 pub(crate) exp_e: u32,
217 #[prost(uint32, tag = "2")]
218 pub(crate) exp_f: u32,
219 #[prost(message, optional, tag = "3")]
220 pub(crate) patches: Option<PatchesMetadata>,
221}
222
223impl ALPArray {
224 fn validate(
225 encoded: &dyn Array,
226 exponents: Exponents,
227 patches: Option<&Patches>,
228 ) -> VortexResult<()> {
229 vortex_ensure!(
230 matches!(
231 encoded.dtype(),
232 DType::Primitive(PType::I32 | PType::I64, _)
233 ),
234 "ALP encoded ints have invalid DType {}",
235 encoded.dtype(),
236 );
237
238 let Exponents { e, f } = exponents;
241 match encoded.dtype().as_ptype() {
242 PType::I32 => {
243 vortex_ensure!(exponents.e <= f32::MAX_EXPONENT, "e out of bounds: {e}");
244 vortex_ensure!(exponents.f <= f32::MAX_EXPONENT, "f out of bounds: {f}");
245 if let Some(patches) = patches {
246 Self::validate_patches::<f32>(patches, encoded)?;
247 }
248 }
249 PType::I64 => {
250 vortex_ensure!(e <= f64::MAX_EXPONENT, "e out of bounds: {e}");
251 vortex_ensure!(f <= f64::MAX_EXPONENT, "f out of bounds: {f}");
252
253 if let Some(patches) = patches {
254 Self::validate_patches::<f64>(patches, encoded)?;
255 }
256 }
257 _ => unreachable!(),
258 }
259
260 if let Some(patches) = patches {
262 vortex_ensure!(
263 patches.array_len() == encoded.len(),
264 "patches array_len != encoded len: {} != {}",
265 patches.array_len(),
266 encoded.len()
267 );
268
269 }
271
272 Ok(())
273 }
274
275 fn validate_patches<T: ALPFloat>(patches: &Patches, encoded: &dyn Array) -> VortexResult<()> {
277 vortex_ensure!(
278 patches.array_len() == encoded.len(),
279 "patches array_len != encoded len: {} != {}",
280 patches.array_len(),
281 encoded.len()
282 );
283
284 let expected_type = DType::Primitive(T::PTYPE, encoded.dtype().nullability());
285 vortex_ensure!(
286 patches.dtype() == &expected_type,
287 "Expected patches type {expected_type}, actual {}",
288 patches.dtype(),
289 );
290
291 Ok(())
292 }
293}
294
295impl ALPArray {
296 pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option<Patches>) -> Self {
301 Self::try_new(encoded, exponents, patches).vortex_expect("ALPArray new")
302 }
303
304 pub fn try_new(
356 encoded: ArrayRef,
357 exponents: Exponents,
358 patches: Option<Patches>,
359 ) -> VortexResult<Self> {
360 Self::validate(&encoded, exponents, patches.as_ref())?;
361
362 let dtype = match encoded.dtype() {
363 DType::Primitive(PType::I32, nullability) => DType::Primitive(PType::F32, *nullability),
364 DType::Primitive(PType::I64, nullability) => DType::Primitive(PType::F64, *nullability),
365 _ => unreachable!(),
366 };
367
368 Ok(Self {
369 dtype,
370 encoded,
371 exponents,
372 patches,
373 stats_set: Default::default(),
374 })
375 }
376
377 pub(crate) unsafe fn new_unchecked(
382 encoded: ArrayRef,
383 exponents: Exponents,
384 patches: Option<Patches>,
385 dtype: DType,
386 ) -> Self {
387 Self {
388 dtype,
389 encoded,
390 exponents,
391 patches,
392 stats_set: Default::default(),
393 }
394 }
395
396 pub fn ptype(&self) -> PType {
397 self.dtype.as_ptype()
398 }
399
400 pub fn encoded(&self) -> &ArrayRef {
401 &self.encoded
402 }
403
404 #[inline]
405 pub fn exponents(&self) -> Exponents {
406 self.exponents
407 }
408
409 pub fn patches(&self) -> Option<&Patches> {
410 self.patches.as_ref()
411 }
412
413 #[inline]
415 pub fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>, DType) {
416 (self.encoded, self.exponents, self.patches, self.dtype)
417 }
418}
419
420impl ValidityChild<ALPVTable> for ALPVTable {
421 fn validity_child(array: &ALPArray) -> &ArrayRef {
422 array.encoded()
423 }
424}
425
426impl BaseArrayVTable<ALPVTable> for ALPVTable {
427 fn len(array: &ALPArray) -> usize {
428 array.encoded.len()
429 }
430
431 fn dtype(array: &ALPArray) -> &DType {
432 &array.dtype
433 }
434
435 fn stats(array: &ALPArray) -> StatsSetRef<'_> {
436 array.stats_set.to_ref(array.as_ref())
437 }
438
439 fn array_hash<H: std::hash::Hasher>(array: &ALPArray, state: &mut H, precision: Precision) {
440 array.dtype.hash(state);
441 array.encoded.array_hash(state, precision);
442 array.exponents.hash(state);
443 array.patches.array_hash(state, precision);
444 }
445
446 fn array_eq(array: &ALPArray, other: &ALPArray, precision: Precision) -> bool {
447 array.dtype == other.dtype
448 && array.encoded.array_eq(&other.encoded, precision)
449 && array.exponents == other.exponents
450 && array.patches.array_eq(&other.patches, precision)
451 }
452}
453
454impl VisitorVTable<ALPVTable> for ALPVTable {
455 fn visit_buffers(_array: &ALPArray, _visitor: &mut dyn ArrayBufferVisitor) {}
456
457 fn nbuffers(_array: &ALPArray) -> usize {
458 0
459 }
460
461 fn visit_children(array: &ALPArray, visitor: &mut dyn ArrayChildVisitor) {
462 visitor.visit_child("encoded", array.encoded());
463 if let Some(patches) = array.patches() {
464 visitor.visit_patches(patches);
465 }
466 }
467
468 fn nchildren(array: &ALPArray) -> usize {
469 1 + array
471 .patches()
472 .map_or(0, |p| 2 + p.chunk_offsets().is_some() as usize)
473 }
474}
475
476#[cfg(test)]
477mod tests {
478 use std::f64::consts::PI;
479 use std::sync::LazyLock;
480
481 use rstest::rstest;
482 use vortex_array::Canonical;
483 use vortex_array::IntoArray;
484 use vortex_array::ToCanonical;
485 use vortex_array::VortexSessionExecute;
486 use vortex_array::arrays::PrimitiveArray;
487 use vortex_array::assert_arrays_eq;
488 use vortex_array::session::ArraySession;
489 use vortex_array::vtable::ValidityHelper;
490 use vortex_session::VortexSession;
491
492 use super::*;
493 use crate::alp_encode;
494 use crate::decompress_into_array;
495
496 static SESSION: LazyLock<VortexSession> =
497 LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
498
499 #[rstest]
500 #[case(0)]
501 #[case(1)]
502 #[case(100)]
503 #[case(1023)]
504 #[case(1024)]
505 #[case(1025)]
506 #[case(2047)]
507 #[case(2048)]
508 #[case(2049)]
509 fn test_execute_f32(#[case] size: usize) {
510 let values = PrimitiveArray::from_iter((0..size).map(|i| i as f32));
511 let encoded = alp_encode(&values, None).unwrap();
512
513 let result_canonical = {
514 let mut ctx = SESSION.create_execution_ctx();
515 encoded.to_array().execute::<Canonical>(&mut ctx).unwrap()
516 };
517 let expected = decompress_into_array(encoded).unwrap();
519
520 assert_arrays_eq!(result_canonical.into_array(), expected);
521 }
522
523 #[rstest]
524 #[case(0)]
525 #[case(1)]
526 #[case(100)]
527 #[case(1023)]
528 #[case(1024)]
529 #[case(1025)]
530 #[case(2047)]
531 #[case(2048)]
532 #[case(2049)]
533 fn test_execute_f64(#[case] size: usize) {
534 let values = PrimitiveArray::from_iter((0..size).map(|i| i as f64));
535 let encoded = alp_encode(&values, None).unwrap();
536
537 let result_canonical = {
538 let mut ctx = SESSION.create_execution_ctx();
539 encoded.to_array().execute::<Canonical>(&mut ctx).unwrap()
540 };
541 let expected = decompress_into_array(encoded).unwrap();
543
544 assert_arrays_eq!(result_canonical.into_array(), expected);
545 }
546
547 #[rstest]
548 #[case(100)]
549 #[case(1023)]
550 #[case(1024)]
551 #[case(1025)]
552 #[case(2047)]
553 #[case(2048)]
554 #[case(2049)]
555 fn test_execute_with_patches(#[case] size: usize) {
556 let values: Vec<f64> = (0..size)
557 .map(|i| match i % 4 {
558 0..=2 => 1.0,
559 _ => PI,
560 })
561 .collect();
562
563 let array = PrimitiveArray::from_iter(values);
564 let encoded = alp_encode(&array, None).unwrap();
565 assert!(encoded.patches().unwrap().array_len() > 0);
566
567 let result_canonical = {
568 let mut ctx = SESSION.create_execution_ctx();
569 encoded.to_array().execute::<Canonical>(&mut ctx).unwrap()
570 };
571 let expected = decompress_into_array(encoded).unwrap();
573
574 assert_arrays_eq!(result_canonical.into_array(), expected);
575 }
576
577 #[rstest]
578 #[case(0)]
579 #[case(1)]
580 #[case(100)]
581 #[case(1023)]
582 #[case(1024)]
583 #[case(1025)]
584 #[case(2047)]
585 #[case(2048)]
586 #[case(2049)]
587 fn test_execute_with_validity(#[case] size: usize) {
588 let values: Vec<Option<f32>> = (0..size)
589 .map(|i| if i % 2 == 1 { None } else { Some(1.0) })
590 .collect();
591
592 let array = PrimitiveArray::from_option_iter(values);
593 let encoded = alp_encode(&array, None).unwrap();
594
595 let result_canonical = {
596 let mut ctx = SESSION.create_execution_ctx();
597 encoded.to_array().execute::<Canonical>(&mut ctx).unwrap()
598 };
599 let expected = decompress_into_array(encoded).unwrap();
601
602 assert_arrays_eq!(result_canonical.into_array(), expected);
603 }
604
605 #[rstest]
606 #[case(100)]
607 #[case(1023)]
608 #[case(1024)]
609 #[case(1025)]
610 #[case(2047)]
611 #[case(2048)]
612 #[case(2049)]
613 fn test_execute_with_patches_and_validity(#[case] size: usize) {
614 let values: Vec<Option<f64>> = (0..size)
615 .map(|idx| match idx % 3 {
616 0 => Some(1.0),
617 1 => None,
618 _ => Some(PI),
619 })
620 .collect();
621
622 let array = PrimitiveArray::from_option_iter(values);
623 let encoded = alp_encode(&array, None).unwrap();
624 assert!(encoded.patches().unwrap().array_len() > 0);
625
626 let result_canonical = {
627 let mut ctx = SESSION.create_execution_ctx();
628 encoded.to_array().execute::<Canonical>(&mut ctx).unwrap()
629 };
630 let expected = decompress_into_array(encoded).unwrap();
632
633 assert_arrays_eq!(result_canonical.into_array(), expected);
634 }
635
636 #[rstest]
637 #[case(500, 100)]
638 #[case(1000, 200)]
639 #[case(2048, 512)]
640 fn test_execute_sliced_vector(#[case] size: usize, #[case] slice_start: usize) {
641 let values: Vec<Option<f64>> = (0..size)
642 .map(|i| {
643 if i % 5 == 0 {
644 None
645 } else if i % 4 == 3 {
646 Some(PI)
647 } else {
648 Some(1.0)
649 }
650 })
651 .collect();
652
653 let array = PrimitiveArray::from_option_iter(values.clone());
654 let encoded = alp_encode(&array, None).unwrap();
655
656 let slice_end = size - slice_start;
657 let slice_len = slice_end - slice_start;
658 let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
659
660 let result_canonical = {
661 let mut ctx = SESSION.create_execution_ctx();
662 sliced_encoded.execute::<Canonical>(&mut ctx).unwrap()
663 };
664 let result_primitive = result_canonical.into_primitive();
665
666 for idx in 0..slice_len {
667 let expected_value = values[slice_start + idx];
668
669 let result_valid = result_primitive.validity().is_valid(idx).unwrap();
670 assert_eq!(
671 result_valid,
672 expected_value.is_some(),
673 "Validity mismatch at idx={idx}",
674 );
675
676 if let Some(expected_val) = expected_value {
677 let result_val = result_primitive.as_slice::<f64>()[idx];
678 assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
679 }
680 }
681 }
682
683 #[rstest]
684 #[case(500, 100)]
685 #[case(1000, 200)]
686 #[case(2048, 512)]
687 fn test_sliced_to_primitive(#[case] size: usize, #[case] slice_start: usize) {
688 let values: Vec<Option<f64>> = (0..size)
689 .map(|i| {
690 if i % 5 == 0 {
691 None
692 } else if i % 4 == 3 {
693 Some(PI)
694 } else {
695 Some(1.0)
696 }
697 })
698 .collect();
699
700 let array = PrimitiveArray::from_option_iter(values.clone());
701 let encoded = alp_encode(&array, None).unwrap();
702
703 let slice_end = size - slice_start;
704 let slice_len = slice_end - slice_start;
705 let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
706
707 let result_primitive = sliced_encoded.to_primitive();
708
709 for idx in 0..slice_len {
710 let expected_value = values[slice_start + idx];
711
712 let result_valid = result_primitive.validity_mask().unwrap().value(idx);
713 assert_eq!(
714 result_valid,
715 expected_value.is_some(),
716 "Validity mismatch at idx={idx}",
717 );
718
719 if let Some(expected_val) = expected_value {
720 let buf = result_primitive.to_buffer::<f64>();
721 let result_val = buf.as_slice()[idx];
722 assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
723 }
724 }
725 }
726
727 #[test]
736 fn test_execute_decompress_with_patches_no_chunk_offsets_regression_5948() {
737 let values: Vec<f64> = vec![1.0, 2.0, PI, 4.0, 5.0];
739 let original = PrimitiveArray::from_iter(values);
740
741 let normally_encoded = alp_encode(&original, None).unwrap();
743 assert!(
744 normally_encoded.patches().is_some(),
745 "Test requires patches to be present"
746 );
747
748 let original_patches = normally_encoded.patches().unwrap();
749 assert!(
750 original_patches.chunk_offsets().is_some(),
751 "Normal encoding should have chunk_offsets"
752 );
753
754 let patches_without_chunk_offsets = Patches::new(
756 original_patches.array_len(),
757 original_patches.offset(),
758 original_patches.indices().clone(),
759 original_patches.values().clone(),
760 None, )
762 .unwrap();
763
764 let alp_without_chunk_offsets = ALPArray::new(
766 normally_encoded.encoded().clone(),
767 normally_encoded.exponents(),
768 Some(patches_without_chunk_offsets),
769 );
770
771 let result_legacy = decompress_into_array(alp_without_chunk_offsets.clone()).unwrap();
773 let legacy_slice = result_legacy.as_slice::<f64>();
774
775 assert!(
777 (legacy_slice[2] - PI).abs() < 1e-10,
778 "Legacy path should have PI at index 2, got {}",
779 legacy_slice[2]
780 );
781
782 let result_execute = {
784 let mut ctx = SESSION.create_execution_ctx();
785 execute_decompress(alp_without_chunk_offsets, &mut ctx).unwrap()
786 };
787 let execute_slice = result_execute.as_slice::<f64>();
788
789 assert!(
791 (execute_slice[2] - PI).abs() < 1e-10,
792 "Execute path should have PI at index 2, but got {} (patches were dropped!)",
793 execute_slice[2]
794 );
795 }
796}