1use std::fmt::Debug;
5use std::hash::Hash;
6
7use vortex_array::ArrayEq;
8use vortex_array::ArrayHash;
9use vortex_array::ArrayRef;
10use vortex_array::DeserializeMetadata;
11use vortex_array::DynArray;
12use vortex_array::ExecutionCtx;
13use vortex_array::ExecutionStep;
14use vortex_array::IntoArray;
15use vortex_array::Precision;
16use vortex_array::ProstMetadata;
17use vortex_array::SerializeMetadata;
18use vortex_array::buffer::BufferHandle;
19use vortex_array::dtype::DType;
20use vortex_array::dtype::PType;
21use vortex_array::patches::Patches;
22use vortex_array::patches::PatchesMetadata;
23use vortex_array::serde::ArrayChildren;
24use vortex_array::stats::ArrayStats;
25use vortex_array::stats::StatsSetRef;
26use vortex_array::vtable;
27use vortex_array::vtable::ArrayId;
28use vortex_array::vtable::VTable;
29use vortex_array::vtable::ValidityChild;
30use vortex_array::vtable::ValidityVTableFromChild;
31use vortex_array::vtable::patches_child;
32use vortex_array::vtable::patches_child_name;
33use vortex_array::vtable::patches_nchildren;
34use vortex_error::VortexExpect;
35use vortex_error::VortexResult;
36use vortex_error::vortex_bail;
37use vortex_error::vortex_ensure;
38use vortex_error::vortex_err;
39use vortex_error::vortex_panic;
40use vortex_session::VortexSession;
41
42use crate::ALPFloat;
43use crate::alp::Exponents;
44use crate::alp::decompress::execute_decompress;
45use crate::alp::rules::PARENT_KERNELS;
46use crate::alp::rules::RULES;
47
48vtable!(ALP);
49
50impl VTable for ALPVTable {
51 type Array = ALPArray;
52
53 type Metadata = ProstMetadata<ALPMetadata>;
54 type OperationsVTable = Self;
55 type ValidityVTable = ValidityVTableFromChild;
56
57 fn id(_array: &Self::Array) -> ArrayId {
58 Self::ID
59 }
60
61 fn len(array: &ALPArray) -> usize {
62 array.encoded.len()
63 }
64
65 fn dtype(array: &ALPArray) -> &DType {
66 &array.dtype
67 }
68
69 fn stats(array: &ALPArray) -> StatsSetRef<'_> {
70 array.stats_set.to_ref(array.as_ref())
71 }
72
73 fn array_hash<H: std::hash::Hasher>(array: &ALPArray, state: &mut H, precision: Precision) {
74 array.dtype.hash(state);
75 array.encoded.array_hash(state, precision);
76 array.exponents.hash(state);
77 array.patches.array_hash(state, precision);
78 }
79
80 fn array_eq(array: &ALPArray, other: &ALPArray, precision: Precision) -> bool {
81 array.dtype == other.dtype
82 && array.encoded.array_eq(&other.encoded, precision)
83 && array.exponents == other.exponents
84 && array.patches.array_eq(&other.patches, precision)
85 }
86
87 fn nbuffers(_array: &ALPArray) -> usize {
88 0
89 }
90
91 fn buffer(_array: &ALPArray, idx: usize) -> BufferHandle {
92 vortex_panic!("ALPArray buffer index {idx} out of bounds")
93 }
94
95 fn buffer_name(_array: &ALPArray, _idx: usize) -> Option<String> {
96 None
97 }
98
99 fn nchildren(array: &ALPArray) -> usize {
100 1 + array.patches().map_or(0, patches_nchildren)
101 }
102
103 fn child(array: &ALPArray, idx: usize) -> ArrayRef {
104 match idx {
105 0 => array.encoded().clone(),
106 _ => {
107 let patches = array
108 .patches()
109 .unwrap_or_else(|| vortex_panic!("ALPArray child index {idx} out of bounds"));
110 patches_child(patches, idx - 1)
111 }
112 }
113 }
114
115 fn child_name(array: &ALPArray, idx: usize) -> String {
116 match idx {
117 0 => "encoded".to_string(),
118 _ => {
119 if array.patches().is_none() {
120 vortex_panic!("ALPArray child_name index {idx} out of bounds");
121 }
122 patches_child_name(idx - 1).to_string()
123 }
124 }
125 }
126
127 fn metadata(array: &ALPArray) -> VortexResult<Self::Metadata> {
128 let exponents = array.exponents();
129 Ok(ProstMetadata(ALPMetadata {
130 exp_e: exponents.e as u32,
131 exp_f: exponents.f as u32,
132 patches: array
133 .patches()
134 .map(|p| p.to_metadata(array.len(), array.dtype()))
135 .transpose()?,
136 }))
137 }
138
139 fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
140 Ok(Some(metadata.serialize()))
141 }
142
143 fn deserialize(
144 bytes: &[u8],
145 _dtype: &DType,
146 _len: usize,
147 _buffers: &[BufferHandle],
148 _session: &VortexSession,
149 ) -> VortexResult<Self::Metadata> {
150 Ok(ProstMetadata(
151 <ProstMetadata<ALPMetadata> as DeserializeMetadata>::deserialize(bytes)?,
152 ))
153 }
154
155 fn build(
156 dtype: &DType,
157 len: usize,
158 metadata: &Self::Metadata,
159 _buffers: &[BufferHandle],
160 children: &dyn ArrayChildren,
161 ) -> VortexResult<ALPArray> {
162 let encoded_ptype = match &dtype {
163 DType::Primitive(PType::F32, n) => DType::Primitive(PType::I32, *n),
164 DType::Primitive(PType::F64, n) => DType::Primitive(PType::I64, *n),
165 d => vortex_bail!(MismatchedTypes: "f32 or f64", d),
166 };
167 let encoded = children.get(0, &encoded_ptype, len)?;
168
169 let patches = metadata
170 .patches
171 .map(|p| {
172 let indices = children.get(1, &p.indices_dtype()?, p.len()?)?;
173 let values = children.get(2, dtype, p.len()?)?;
174 let chunk_offsets = p
175 .chunk_offsets_dtype()?
176 .map(|dtype| children.get(3, &dtype, usize::try_from(p.chunk_offsets_len())?))
177 .transpose()?;
178
179 Patches::new(len, p.offset()?, indices, values, chunk_offsets)
180 })
181 .transpose()?;
182
183 ALPArray::try_new(
184 encoded,
185 Exponents {
186 e: u8::try_from(metadata.exp_e)?,
187 f: u8::try_from(metadata.exp_f)?,
188 },
189 patches,
190 )
191 }
192
193 fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
194 let patches_info = array
196 .patches
197 .as_ref()
198 .map(|p| (p.array_len(), p.offset(), p.chunk_offsets().is_some()));
199
200 let expected_children = match &patches_info {
201 Some((_, _, has_chunk_offsets)) => 1 + 2 + if *has_chunk_offsets { 1 } else { 0 },
202 None => 1,
203 };
204
205 vortex_ensure!(
206 children.len() == expected_children,
207 "ALPArray expects {} children, got {}",
208 expected_children,
209 children.len()
210 );
211
212 let mut children_iter = children.into_iter();
213 array.encoded = children_iter
214 .next()
215 .ok_or_else(|| vortex_err!("Expected encoded child"))?;
216
217 if let Some((array_len, offset, _has_chunk_offsets)) = patches_info {
218 let indices = children_iter
219 .next()
220 .ok_or_else(|| vortex_err!("Expected patch indices child"))?;
221 let values = children_iter
222 .next()
223 .ok_or_else(|| vortex_err!("Expected patch values child"))?;
224 let chunk_offsets = children_iter.next();
225
226 array.patches = Some(Patches::new(
227 array_len,
228 offset,
229 indices,
230 values,
231 chunk_offsets,
232 )?);
233 }
234
235 Ok(())
236 }
237
238 fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionStep> {
239 Ok(ExecutionStep::Done(
241 execute_decompress(array.clone(), ctx)?.into_array(),
242 ))
243 }
244
245 fn reduce_parent(
246 array: &Self::Array,
247 parent: &ArrayRef,
248 child_idx: usize,
249 ) -> VortexResult<Option<ArrayRef>> {
250 RULES.evaluate(array, parent, child_idx)
251 }
252
253 fn execute_parent(
254 array: &Self::Array,
255 parent: &ArrayRef,
256 child_idx: usize,
257 ctx: &mut ExecutionCtx,
258 ) -> VortexResult<Option<ArrayRef>> {
259 PARENT_KERNELS.execute(array, parent, child_idx, ctx)
260 }
261}
262
263#[derive(Clone, Debug)]
264pub struct ALPArray {
265 encoded: ArrayRef,
266 patches: Option<Patches>,
267 dtype: DType,
268 exponents: Exponents,
269 stats_set: ArrayStats,
270}
271
272#[derive(Debug)]
273pub struct ALPVTable;
274
275impl ALPVTable {
276 pub const ID: ArrayId = ArrayId::new_ref("vortex.alp");
277}
278
279#[derive(Clone, prost::Message)]
280pub struct ALPMetadata {
281 #[prost(uint32, tag = "1")]
282 pub(crate) exp_e: u32,
283 #[prost(uint32, tag = "2")]
284 pub(crate) exp_f: u32,
285 #[prost(message, optional, tag = "3")]
286 pub(crate) patches: Option<PatchesMetadata>,
287}
288
289impl ALPArray {
290 fn validate(
291 encoded: &ArrayRef,
292 exponents: Exponents,
293 patches: Option<&Patches>,
294 ) -> VortexResult<()> {
295 vortex_ensure!(
296 matches!(
297 encoded.dtype(),
298 DType::Primitive(PType::I32 | PType::I64, _)
299 ),
300 "ALP encoded ints have invalid DType {}",
301 encoded.dtype(),
302 );
303
304 let Exponents { e, f } = exponents;
307 match encoded.dtype().as_ptype() {
308 PType::I32 => {
309 vortex_ensure!(exponents.e <= f32::MAX_EXPONENT, "e out of bounds: {e}");
310 vortex_ensure!(exponents.f <= f32::MAX_EXPONENT, "f out of bounds: {f}");
311 if let Some(patches) = patches {
312 Self::validate_patches::<f32>(patches, encoded)?;
313 }
314 }
315 PType::I64 => {
316 vortex_ensure!(e <= f64::MAX_EXPONENT, "e out of bounds: {e}");
317 vortex_ensure!(f <= f64::MAX_EXPONENT, "f out of bounds: {f}");
318
319 if let Some(patches) = patches {
320 Self::validate_patches::<f64>(patches, encoded)?;
321 }
322 }
323 _ => unreachable!(),
324 }
325
326 if let Some(patches) = patches {
328 vortex_ensure!(
329 patches.array_len() == encoded.len(),
330 "patches array_len != encoded len: {} != {}",
331 patches.array_len(),
332 encoded.len()
333 );
334
335 }
337
338 Ok(())
339 }
340
341 fn validate_patches<T: ALPFloat>(patches: &Patches, encoded: &ArrayRef) -> VortexResult<()> {
343 vortex_ensure!(
344 patches.array_len() == encoded.len(),
345 "patches array_len != encoded len: {} != {}",
346 patches.array_len(),
347 encoded.len()
348 );
349
350 let expected_type = DType::Primitive(T::PTYPE, encoded.dtype().nullability());
351 vortex_ensure!(
352 patches.dtype() == &expected_type,
353 "Expected patches type {expected_type}, actual {}",
354 patches.dtype(),
355 );
356
357 Ok(())
358 }
359}
360
361impl ALPArray {
362 pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option<Patches>) -> Self {
367 Self::try_new(encoded, exponents, patches).vortex_expect("ALPArray new")
368 }
369
370 pub fn try_new(
422 encoded: ArrayRef,
423 exponents: Exponents,
424 patches: Option<Patches>,
425 ) -> VortexResult<Self> {
426 Self::validate(&encoded, exponents, patches.as_ref())?;
427
428 let dtype = match encoded.dtype() {
429 DType::Primitive(PType::I32, nullability) => DType::Primitive(PType::F32, *nullability),
430 DType::Primitive(PType::I64, nullability) => DType::Primitive(PType::F64, *nullability),
431 _ => unreachable!(),
432 };
433
434 Ok(Self {
435 dtype,
436 encoded,
437 exponents,
438 patches,
439 stats_set: Default::default(),
440 })
441 }
442
443 pub(crate) unsafe fn new_unchecked(
448 encoded: ArrayRef,
449 exponents: Exponents,
450 patches: Option<Patches>,
451 dtype: DType,
452 ) -> Self {
453 Self {
454 dtype,
455 encoded,
456 exponents,
457 patches,
458 stats_set: Default::default(),
459 }
460 }
461
462 pub fn ptype(&self) -> PType {
463 self.dtype.as_ptype()
464 }
465
466 pub fn encoded(&self) -> &ArrayRef {
467 &self.encoded
468 }
469
470 #[inline]
471 pub fn exponents(&self) -> Exponents {
472 self.exponents
473 }
474
475 pub fn patches(&self) -> Option<&Patches> {
476 self.patches.as_ref()
477 }
478
479 #[inline]
481 pub fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>, DType) {
482 (self.encoded, self.exponents, self.patches, self.dtype)
483 }
484}
485
486impl ValidityChild<ALPVTable> for ALPVTable {
487 fn validity_child(array: &ALPArray) -> &ArrayRef {
488 array.encoded()
489 }
490}
491
492#[cfg(test)]
493mod tests {
494 use std::f64::consts::PI;
495 use std::sync::LazyLock;
496
497 use rstest::rstest;
498 use vortex_array::Canonical;
499 use vortex_array::IntoArray;
500 use vortex_array::LEGACY_SESSION;
501 use vortex_array::ToCanonical;
502 use vortex_array::VortexSessionExecute;
503 use vortex_array::arrays::PrimitiveArray;
504 use vortex_array::assert_arrays_eq;
505 use vortex_array::session::ArraySession;
506 use vortex_array::vtable::ValidityHelper;
507 use vortex_session::VortexSession;
508
509 use super::*;
510 use crate::alp_encode;
511 use crate::decompress_into_array;
512
513 static SESSION: LazyLock<VortexSession> =
514 LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
515
516 #[rstest]
517 #[case(0)]
518 #[case(1)]
519 #[case(100)]
520 #[case(1023)]
521 #[case(1024)]
522 #[case(1025)]
523 #[case(2047)]
524 #[case(2048)]
525 #[case(2049)]
526 fn test_execute_f32(#[case] size: usize) {
527 let values = PrimitiveArray::from_iter((0..size).map(|i| i as f32));
528 let encoded = alp_encode(&values, None).unwrap();
529
530 let result_canonical = {
531 let mut ctx = SESSION.create_execution_ctx();
532 encoded
533 .clone()
534 .into_array()
535 .execute::<Canonical>(&mut ctx)
536 .unwrap()
537 };
538 let expected =
540 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
541
542 assert_arrays_eq!(result_canonical.into_array(), expected);
543 }
544
545 #[rstest]
546 #[case(0)]
547 #[case(1)]
548 #[case(100)]
549 #[case(1023)]
550 #[case(1024)]
551 #[case(1025)]
552 #[case(2047)]
553 #[case(2048)]
554 #[case(2049)]
555 fn test_execute_f64(#[case] size: usize) {
556 let values = PrimitiveArray::from_iter((0..size).map(|i| i as f64));
557 let encoded = alp_encode(&values, None).unwrap();
558
559 let result_canonical = {
560 let mut ctx = SESSION.create_execution_ctx();
561 encoded
562 .clone()
563 .into_array()
564 .execute::<Canonical>(&mut ctx)
565 .unwrap()
566 };
567 let expected =
569 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
570
571 assert_arrays_eq!(result_canonical.into_array(), expected);
572 }
573
574 #[rstest]
575 #[case(100)]
576 #[case(1023)]
577 #[case(1024)]
578 #[case(1025)]
579 #[case(2047)]
580 #[case(2048)]
581 #[case(2049)]
582 fn test_execute_with_patches(#[case] size: usize) {
583 let values: Vec<f64> = (0..size)
584 .map(|i| match i % 4 {
585 0..=2 => 1.0,
586 _ => PI,
587 })
588 .collect();
589
590 let array = PrimitiveArray::from_iter(values);
591 let encoded = alp_encode(&array, None).unwrap();
592 assert!(encoded.patches().unwrap().array_len() > 0);
593
594 let result_canonical = {
595 let mut ctx = SESSION.create_execution_ctx();
596 encoded
597 .clone()
598 .into_array()
599 .execute::<Canonical>(&mut ctx)
600 .unwrap()
601 };
602 let expected =
604 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
605
606 assert_arrays_eq!(result_canonical.into_array(), expected);
607 }
608
609 #[rstest]
610 #[case(0)]
611 #[case(1)]
612 #[case(100)]
613 #[case(1023)]
614 #[case(1024)]
615 #[case(1025)]
616 #[case(2047)]
617 #[case(2048)]
618 #[case(2049)]
619 fn test_execute_with_validity(#[case] size: usize) {
620 let values: Vec<Option<f32>> = (0..size)
621 .map(|i| if i % 2 == 1 { None } else { Some(1.0) })
622 .collect();
623
624 let array = PrimitiveArray::from_option_iter(values);
625 let encoded = alp_encode(&array, None).unwrap();
626
627 let result_canonical = {
628 let mut ctx = SESSION.create_execution_ctx();
629 encoded
630 .clone()
631 .into_array()
632 .execute::<Canonical>(&mut ctx)
633 .unwrap()
634 };
635 let expected =
637 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
638
639 assert_arrays_eq!(result_canonical.into_array(), expected);
640 }
641
642 #[rstest]
643 #[case(100)]
644 #[case(1023)]
645 #[case(1024)]
646 #[case(1025)]
647 #[case(2047)]
648 #[case(2048)]
649 #[case(2049)]
650 fn test_execute_with_patches_and_validity(#[case] size: usize) {
651 let values: Vec<Option<f64>> = (0..size)
652 .map(|idx| match idx % 3 {
653 0 => Some(1.0),
654 1 => None,
655 _ => Some(PI),
656 })
657 .collect();
658
659 let array = PrimitiveArray::from_option_iter(values);
660 let encoded = alp_encode(&array, None).unwrap();
661 assert!(encoded.patches().unwrap().array_len() > 0);
662
663 let result_canonical = {
664 let mut ctx = SESSION.create_execution_ctx();
665 encoded
666 .clone()
667 .into_array()
668 .execute::<Canonical>(&mut ctx)
669 .unwrap()
670 };
671 let expected =
673 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
674
675 assert_arrays_eq!(result_canonical.into_array(), expected);
676 }
677
678 #[rstest]
679 #[case(500, 100)]
680 #[case(1000, 200)]
681 #[case(2048, 512)]
682 fn test_execute_sliced_vector(#[case] size: usize, #[case] slice_start: usize) {
683 let values: Vec<Option<f64>> = (0..size)
684 .map(|i| {
685 if i % 5 == 0 {
686 None
687 } else if i % 4 == 3 {
688 Some(PI)
689 } else {
690 Some(1.0)
691 }
692 })
693 .collect();
694
695 let array = PrimitiveArray::from_option_iter(values.clone());
696 let encoded = alp_encode(&array, None).unwrap();
697
698 let slice_end = size - slice_start;
699 let slice_len = slice_end - slice_start;
700 let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
701
702 let result_canonical = {
703 let mut ctx = SESSION.create_execution_ctx();
704 sliced_encoded.execute::<Canonical>(&mut ctx).unwrap()
705 };
706 let result_primitive = result_canonical.into_primitive();
707
708 for idx in 0..slice_len {
709 let expected_value = values[slice_start + idx];
710
711 let result_valid = result_primitive.validity().is_valid(idx).unwrap();
712 assert_eq!(
713 result_valid,
714 expected_value.is_some(),
715 "Validity mismatch at idx={idx}",
716 );
717
718 if let Some(expected_val) = expected_value {
719 let result_val = result_primitive.as_slice::<f64>()[idx];
720 assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
721 }
722 }
723 }
724
725 #[rstest]
726 #[case(500, 100)]
727 #[case(1000, 200)]
728 #[case(2048, 512)]
729 fn test_sliced_to_primitive(#[case] size: usize, #[case] slice_start: usize) {
730 let values: Vec<Option<f64>> = (0..size)
731 .map(|i| {
732 if i % 5 == 0 {
733 None
734 } else if i % 4 == 3 {
735 Some(PI)
736 } else {
737 Some(1.0)
738 }
739 })
740 .collect();
741
742 let array = PrimitiveArray::from_option_iter(values.clone());
743 let encoded = alp_encode(&array, None).unwrap();
744
745 let slice_end = size - slice_start;
746 let slice_len = slice_end - slice_start;
747 let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
748
749 let result_primitive = sliced_encoded.to_primitive();
750
751 for idx in 0..slice_len {
752 let expected_value = values[slice_start + idx];
753
754 let result_valid = result_primitive.validity_mask().unwrap().value(idx);
755 assert_eq!(
756 result_valid,
757 expected_value.is_some(),
758 "Validity mismatch at idx={idx}",
759 );
760
761 if let Some(expected_val) = expected_value {
762 let buf = result_primitive.to_buffer::<f64>();
763 let result_val = buf.as_slice()[idx];
764 assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
765 }
766 }
767 }
768
769 #[test]
778 fn test_execute_decompress_with_patches_no_chunk_offsets_regression_5948() {
779 let values: Vec<f64> = vec![1.0, 2.0, PI, 4.0, 5.0];
781 let original = PrimitiveArray::from_iter(values);
782
783 let normally_encoded = alp_encode(&original, None).unwrap();
785 assert!(
786 normally_encoded.patches().is_some(),
787 "Test requires patches to be present"
788 );
789
790 let original_patches = normally_encoded.patches().unwrap();
791 assert!(
792 original_patches.chunk_offsets().is_some(),
793 "Normal encoding should have chunk_offsets"
794 );
795
796 let patches_without_chunk_offsets = Patches::new(
798 original_patches.array_len(),
799 original_patches.offset(),
800 original_patches.indices().clone(),
801 original_patches.values().clone(),
802 None, )
804 .unwrap();
805
806 let alp_without_chunk_offsets = ALPArray::new(
808 normally_encoded.encoded().clone(),
809 normally_encoded.exponents(),
810 Some(patches_without_chunk_offsets),
811 );
812
813 let result_legacy = decompress_into_array(
815 alp_without_chunk_offsets.clone(),
816 &mut LEGACY_SESSION.create_execution_ctx(),
817 )
818 .unwrap();
819 let legacy_slice = result_legacy.as_slice::<f64>();
820
821 assert!(
823 (legacy_slice[2] - PI).abs() < 1e-10,
824 "Legacy path should have PI at index 2, got {}",
825 legacy_slice[2]
826 );
827
828 let result_execute = {
830 let mut ctx = SESSION.create_execution_ctx();
831 execute_decompress(alp_without_chunk_offsets, &mut ctx).unwrap()
832 };
833 let execute_slice = result_execute.as_slice::<f64>();
834
835 assert!(
837 (execute_slice[2] - PI).abs() < 1e-10,
838 "Execute path should have PI at index 2, but got {} (patches were dropped!)",
839 execute_slice[2]
840 );
841 }
842}