1use std::fmt::Debug;
5use std::hash::Hash;
6use std::sync::Arc;
7
8use vortex_array::ArrayEq;
9use vortex_array::ArrayHash;
10use vortex_array::ArrayRef;
11use vortex_array::DeserializeMetadata;
12use vortex_array::DynArray;
13use vortex_array::ExecutionCtx;
14use vortex_array::ExecutionResult;
15use vortex_array::IntoArray;
16use vortex_array::Precision;
17use vortex_array::ProstMetadata;
18use vortex_array::SerializeMetadata;
19use vortex_array::buffer::BufferHandle;
20use vortex_array::dtype::DType;
21use vortex_array::dtype::PType;
22use vortex_array::patches::Patches;
23use vortex_array::patches::PatchesMetadata;
24use vortex_array::serde::ArrayChildren;
25use vortex_array::stats::ArrayStats;
26use vortex_array::stats::StatsSetRef;
27use vortex_array::vtable;
28use vortex_array::vtable::ArrayId;
29use vortex_array::vtable::VTable;
30use vortex_array::vtable::ValidityChild;
31use vortex_array::vtable::ValidityVTableFromChild;
32use vortex_array::vtable::patches_child;
33use vortex_array::vtable::patches_child_name;
34use vortex_array::vtable::patches_nchildren;
35use vortex_error::VortexExpect;
36use vortex_error::VortexResult;
37use vortex_error::vortex_bail;
38use vortex_error::vortex_ensure;
39use vortex_error::vortex_err;
40use vortex_error::vortex_panic;
41use vortex_session::VortexSession;
42
43use crate::ALPFloat;
44use crate::alp::Exponents;
45use crate::alp::decompress::execute_decompress;
46use crate::alp::rules::PARENT_KERNELS;
47use crate::alp::rules::RULES;
48
49vtable!(ALP);
50
51impl VTable for ALP {
52 type Array = ALPArray;
53
54 type Metadata = ProstMetadata<ALPMetadata>;
55 type OperationsVTable = Self;
56 type ValidityVTable = ValidityVTableFromChild;
57
58 fn vtable(_array: &Self::Array) -> &Self {
59 &ALP
60 }
61
62 fn id(&self) -> ArrayId {
63 Self::ID
64 }
65
66 fn len(array: &ALPArray) -> usize {
67 array.encoded.len()
68 }
69
70 fn dtype(array: &ALPArray) -> &DType {
71 &array.dtype
72 }
73
74 fn stats(array: &ALPArray) -> StatsSetRef<'_> {
75 array.stats_set.to_ref(array.as_ref())
76 }
77
78 fn array_hash<H: std::hash::Hasher>(array: &ALPArray, state: &mut H, precision: Precision) {
79 array.dtype.hash(state);
80 array.encoded.array_hash(state, precision);
81 array.exponents.hash(state);
82 array.patches.array_hash(state, precision);
83 }
84
85 fn array_eq(array: &ALPArray, other: &ALPArray, precision: Precision) -> bool {
86 array.dtype == other.dtype
87 && array.encoded.array_eq(&other.encoded, precision)
88 && array.exponents == other.exponents
89 && array.patches.array_eq(&other.patches, precision)
90 }
91
92 fn nbuffers(_array: &ALPArray) -> usize {
93 0
94 }
95
96 fn buffer(_array: &ALPArray, idx: usize) -> BufferHandle {
97 vortex_panic!("ALPArray buffer index {idx} out of bounds")
98 }
99
100 fn buffer_name(_array: &ALPArray, _idx: usize) -> Option<String> {
101 None
102 }
103
104 fn nchildren(array: &ALPArray) -> usize {
105 1 + array.patches().map_or(0, patches_nchildren)
106 }
107
108 fn child(array: &ALPArray, idx: usize) -> ArrayRef {
109 match idx {
110 0 => array.encoded().clone(),
111 _ => {
112 let patches = array
113 .patches()
114 .unwrap_or_else(|| vortex_panic!("ALPArray child index {idx} out of bounds"));
115 patches_child(patches, idx - 1)
116 }
117 }
118 }
119
120 fn child_name(array: &ALPArray, idx: usize) -> String {
121 match idx {
122 0 => "encoded".to_string(),
123 _ => {
124 if array.patches().is_none() {
125 vortex_panic!("ALPArray child_name index {idx} out of bounds");
126 }
127 patches_child_name(idx - 1).to_string()
128 }
129 }
130 }
131
132 fn metadata(array: &ALPArray) -> VortexResult<Self::Metadata> {
133 let exponents = array.exponents();
134 Ok(ProstMetadata(ALPMetadata {
135 exp_e: exponents.e as u32,
136 exp_f: exponents.f as u32,
137 patches: array
138 .patches()
139 .map(|p| p.to_metadata(array.len(), array.dtype()))
140 .transpose()?,
141 }))
142 }
143
144 fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
145 Ok(Some(metadata.serialize()))
146 }
147
148 fn deserialize(
149 bytes: &[u8],
150 _dtype: &DType,
151 _len: usize,
152 _buffers: &[BufferHandle],
153 _session: &VortexSession,
154 ) -> VortexResult<Self::Metadata> {
155 Ok(ProstMetadata(
156 <ProstMetadata<ALPMetadata> as DeserializeMetadata>::deserialize(bytes)?,
157 ))
158 }
159
160 fn build(
161 dtype: &DType,
162 len: usize,
163 metadata: &Self::Metadata,
164 _buffers: &[BufferHandle],
165 children: &dyn ArrayChildren,
166 ) -> VortexResult<ALPArray> {
167 let encoded_ptype = match &dtype {
168 DType::Primitive(PType::F32, n) => DType::Primitive(PType::I32, *n),
169 DType::Primitive(PType::F64, n) => DType::Primitive(PType::I64, *n),
170 d => vortex_bail!(MismatchedTypes: "f32 or f64", d),
171 };
172 let encoded = children.get(0, &encoded_ptype, len)?;
173
174 let patches = metadata
175 .patches
176 .map(|p| {
177 let indices = children.get(1, &p.indices_dtype()?, p.len()?)?;
178 let values = children.get(2, dtype, p.len()?)?;
179 let chunk_offsets = p
180 .chunk_offsets_dtype()?
181 .map(|dtype| children.get(3, &dtype, usize::try_from(p.chunk_offsets_len())?))
182 .transpose()?;
183
184 Patches::new(len, p.offset()?, indices, values, chunk_offsets)
185 })
186 .transpose()?;
187
188 ALPArray::try_new(
189 encoded,
190 Exponents {
191 e: u8::try_from(metadata.exp_e)?,
192 f: u8::try_from(metadata.exp_f)?,
193 },
194 patches,
195 )
196 }
197
198 fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
199 let patches_info = array
201 .patches
202 .as_ref()
203 .map(|p| (p.array_len(), p.offset(), p.chunk_offsets().is_some()));
204
205 let expected_children = match &patches_info {
206 Some((_, _, has_chunk_offsets)) => 1 + 2 + if *has_chunk_offsets { 1 } else { 0 },
207 None => 1,
208 };
209
210 vortex_ensure!(
211 children.len() == expected_children,
212 "ALPArray expects {} children, got {}",
213 expected_children,
214 children.len()
215 );
216
217 let mut children_iter = children.into_iter();
218 array.encoded = children_iter
219 .next()
220 .ok_or_else(|| vortex_err!("Expected encoded child"))?;
221
222 if let Some((array_len, offset, _has_chunk_offsets)) = patches_info {
223 let indices = children_iter
224 .next()
225 .ok_or_else(|| vortex_err!("Expected patch indices child"))?;
226 let values = children_iter
227 .next()
228 .ok_or_else(|| vortex_err!("Expected patch values child"))?;
229 let chunk_offsets = children_iter.next();
230
231 array.patches = Some(Patches::new(
232 array_len,
233 offset,
234 indices,
235 values,
236 chunk_offsets,
237 )?);
238 }
239
240 Ok(())
241 }
242
243 fn execute(array: Arc<Self::Array>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
244 let array = Arc::try_unwrap(array).unwrap_or_else(|arc| (*arc).clone());
245 Ok(ExecutionResult::done(
246 execute_decompress(array, ctx)?.into_array(),
247 ))
248 }
249
250 fn reduce_parent(
251 array: &Self::Array,
252 parent: &ArrayRef,
253 child_idx: usize,
254 ) -> VortexResult<Option<ArrayRef>> {
255 RULES.evaluate(array, parent, child_idx)
256 }
257
258 fn execute_parent(
259 array: &Self::Array,
260 parent: &ArrayRef,
261 child_idx: usize,
262 ctx: &mut ExecutionCtx,
263 ) -> VortexResult<Option<ArrayRef>> {
264 PARENT_KERNELS.execute(array, parent, child_idx, ctx)
265 }
266}
267
268#[derive(Clone, Debug)]
269pub struct ALPArray {
270 encoded: ArrayRef,
271 patches: Option<Patches>,
272 dtype: DType,
273 exponents: Exponents,
274 stats_set: ArrayStats,
275}
276
277#[derive(Clone, Debug)]
278pub struct ALP;
279
280impl ALP {
281 pub const ID: ArrayId = ArrayId::new_ref("vortex.alp");
282}
283
284#[derive(Clone, prost::Message)]
285pub struct ALPMetadata {
286 #[prost(uint32, tag = "1")]
287 pub(crate) exp_e: u32,
288 #[prost(uint32, tag = "2")]
289 pub(crate) exp_f: u32,
290 #[prost(message, optional, tag = "3")]
291 pub(crate) patches: Option<PatchesMetadata>,
292}
293
294impl ALPArray {
295 fn validate(
296 encoded: &ArrayRef,
297 exponents: Exponents,
298 patches: Option<&Patches>,
299 ) -> VortexResult<()> {
300 vortex_ensure!(
301 matches!(
302 encoded.dtype(),
303 DType::Primitive(PType::I32 | PType::I64, _)
304 ),
305 "ALP encoded ints have invalid DType {}",
306 encoded.dtype(),
307 );
308
309 let Exponents { e, f } = exponents;
312 match encoded.dtype().as_ptype() {
313 PType::I32 => {
314 vortex_ensure!(exponents.e <= f32::MAX_EXPONENT, "e out of bounds: {e}");
315 vortex_ensure!(exponents.f <= f32::MAX_EXPONENT, "f out of bounds: {f}");
316 if let Some(patches) = patches {
317 Self::validate_patches::<f32>(patches, encoded)?;
318 }
319 }
320 PType::I64 => {
321 vortex_ensure!(e <= f64::MAX_EXPONENT, "e out of bounds: {e}");
322 vortex_ensure!(f <= f64::MAX_EXPONENT, "f out of bounds: {f}");
323
324 if let Some(patches) = patches {
325 Self::validate_patches::<f64>(patches, encoded)?;
326 }
327 }
328 _ => unreachable!(),
329 }
330
331 if let Some(patches) = patches {
333 vortex_ensure!(
334 patches.array_len() == encoded.len(),
335 "patches array_len != encoded len: {} != {}",
336 patches.array_len(),
337 encoded.len()
338 );
339
340 }
342
343 Ok(())
344 }
345
346 fn validate_patches<T: ALPFloat>(patches: &Patches, encoded: &ArrayRef) -> VortexResult<()> {
348 vortex_ensure!(
349 patches.array_len() == encoded.len(),
350 "patches array_len != encoded len: {} != {}",
351 patches.array_len(),
352 encoded.len()
353 );
354
355 let expected_type = DType::Primitive(T::PTYPE, encoded.dtype().nullability());
356 vortex_ensure!(
357 patches.dtype() == &expected_type,
358 "Expected patches type {expected_type}, actual {}",
359 patches.dtype(),
360 );
361
362 Ok(())
363 }
364}
365
366impl ALPArray {
367 pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option<Patches>) -> Self {
372 Self::try_new(encoded, exponents, patches).vortex_expect("ALPArray new")
373 }
374
375 pub fn try_new(
427 encoded: ArrayRef,
428 exponents: Exponents,
429 patches: Option<Patches>,
430 ) -> VortexResult<Self> {
431 Self::validate(&encoded, exponents, patches.as_ref())?;
432
433 let dtype = match encoded.dtype() {
434 DType::Primitive(PType::I32, nullability) => DType::Primitive(PType::F32, *nullability),
435 DType::Primitive(PType::I64, nullability) => DType::Primitive(PType::F64, *nullability),
436 _ => unreachable!(),
437 };
438
439 Ok(Self {
440 dtype,
441 encoded,
442 exponents,
443 patches,
444 stats_set: Default::default(),
445 })
446 }
447
448 pub(crate) unsafe fn new_unchecked(
453 encoded: ArrayRef,
454 exponents: Exponents,
455 patches: Option<Patches>,
456 dtype: DType,
457 ) -> Self {
458 Self {
459 dtype,
460 encoded,
461 exponents,
462 patches,
463 stats_set: Default::default(),
464 }
465 }
466
467 pub fn ptype(&self) -> PType {
468 self.dtype.as_ptype()
469 }
470
471 pub fn encoded(&self) -> &ArrayRef {
472 &self.encoded
473 }
474
475 #[inline]
476 pub fn exponents(&self) -> Exponents {
477 self.exponents
478 }
479
480 pub fn patches(&self) -> Option<&Patches> {
481 self.patches.as_ref()
482 }
483
484 #[inline]
486 pub fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>, DType) {
487 (self.encoded, self.exponents, self.patches, self.dtype)
488 }
489}
490
491impl ValidityChild<ALP> for ALP {
492 fn validity_child(array: &ALPArray) -> &ArrayRef {
493 array.encoded()
494 }
495}
496
497#[cfg(test)]
498mod tests {
499 use std::f64::consts::PI;
500 use std::sync::LazyLock;
501
502 use rstest::rstest;
503 use vortex_array::Canonical;
504 use vortex_array::IntoArray;
505 use vortex_array::LEGACY_SESSION;
506 use vortex_array::ToCanonical;
507 use vortex_array::VortexSessionExecute;
508 use vortex_array::arrays::PrimitiveArray;
509 use vortex_array::assert_arrays_eq;
510 use vortex_array::session::ArraySession;
511 use vortex_array::vtable::ValidityHelper;
512 use vortex_session::VortexSession;
513
514 use super::*;
515 use crate::alp_encode;
516 use crate::decompress_into_array;
517
518 static SESSION: LazyLock<VortexSession> =
519 LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
520
521 #[rstest]
522 #[case(0)]
523 #[case(1)]
524 #[case(100)]
525 #[case(1023)]
526 #[case(1024)]
527 #[case(1025)]
528 #[case(2047)]
529 #[case(2048)]
530 #[case(2049)]
531 fn test_execute_f32(#[case] size: usize) {
532 let values = PrimitiveArray::from_iter((0..size).map(|i| i as f32));
533 let encoded = alp_encode(&values, None).unwrap();
534
535 let result_canonical = {
536 let mut ctx = SESSION.create_execution_ctx();
537 encoded
538 .clone()
539 .into_array()
540 .execute::<Canonical>(&mut ctx)
541 .unwrap()
542 };
543 let expected =
545 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
546
547 assert_arrays_eq!(result_canonical.into_array(), expected);
548 }
549
550 #[rstest]
551 #[case(0)]
552 #[case(1)]
553 #[case(100)]
554 #[case(1023)]
555 #[case(1024)]
556 #[case(1025)]
557 #[case(2047)]
558 #[case(2048)]
559 #[case(2049)]
560 fn test_execute_f64(#[case] size: usize) {
561 let values = PrimitiveArray::from_iter((0..size).map(|i| i as f64));
562 let encoded = alp_encode(&values, None).unwrap();
563
564 let result_canonical = {
565 let mut ctx = SESSION.create_execution_ctx();
566 encoded
567 .clone()
568 .into_array()
569 .execute::<Canonical>(&mut ctx)
570 .unwrap()
571 };
572 let expected =
574 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
575
576 assert_arrays_eq!(result_canonical.into_array(), expected);
577 }
578
579 #[rstest]
580 #[case(100)]
581 #[case(1023)]
582 #[case(1024)]
583 #[case(1025)]
584 #[case(2047)]
585 #[case(2048)]
586 #[case(2049)]
587 fn test_execute_with_patches(#[case] size: usize) {
588 let values: Vec<f64> = (0..size)
589 .map(|i| match i % 4 {
590 0..=2 => 1.0,
591 _ => PI,
592 })
593 .collect();
594
595 let array = PrimitiveArray::from_iter(values);
596 let encoded = alp_encode(&array, None).unwrap();
597 assert!(encoded.patches().unwrap().array_len() > 0);
598
599 let result_canonical = {
600 let mut ctx = SESSION.create_execution_ctx();
601 encoded
602 .clone()
603 .into_array()
604 .execute::<Canonical>(&mut ctx)
605 .unwrap()
606 };
607 let expected =
609 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
610
611 assert_arrays_eq!(result_canonical.into_array(), expected);
612 }
613
614 #[rstest]
615 #[case(0)]
616 #[case(1)]
617 #[case(100)]
618 #[case(1023)]
619 #[case(1024)]
620 #[case(1025)]
621 #[case(2047)]
622 #[case(2048)]
623 #[case(2049)]
624 fn test_execute_with_validity(#[case] size: usize) {
625 let values: Vec<Option<f32>> = (0..size)
626 .map(|i| if i % 2 == 1 { None } else { Some(1.0) })
627 .collect();
628
629 let array = PrimitiveArray::from_option_iter(values);
630 let encoded = alp_encode(&array, None).unwrap();
631
632 let result_canonical = {
633 let mut ctx = SESSION.create_execution_ctx();
634 encoded
635 .clone()
636 .into_array()
637 .execute::<Canonical>(&mut ctx)
638 .unwrap()
639 };
640 let expected =
642 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
643
644 assert_arrays_eq!(result_canonical.into_array(), expected);
645 }
646
647 #[rstest]
648 #[case(100)]
649 #[case(1023)]
650 #[case(1024)]
651 #[case(1025)]
652 #[case(2047)]
653 #[case(2048)]
654 #[case(2049)]
655 fn test_execute_with_patches_and_validity(#[case] size: usize) {
656 let values: Vec<Option<f64>> = (0..size)
657 .map(|idx| match idx % 3 {
658 0 => Some(1.0),
659 1 => None,
660 _ => Some(PI),
661 })
662 .collect();
663
664 let array = PrimitiveArray::from_option_iter(values);
665 let encoded = alp_encode(&array, None).unwrap();
666 assert!(encoded.patches().unwrap().array_len() > 0);
667
668 let result_canonical = {
669 let mut ctx = SESSION.create_execution_ctx();
670 encoded
671 .clone()
672 .into_array()
673 .execute::<Canonical>(&mut ctx)
674 .unwrap()
675 };
676 let expected =
678 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
679
680 assert_arrays_eq!(result_canonical.into_array(), expected);
681 }
682
683 #[rstest]
684 #[case(500, 100)]
685 #[case(1000, 200)]
686 #[case(2048, 512)]
687 fn test_execute_sliced_vector(#[case] size: usize, #[case] slice_start: usize) {
688 let values: Vec<Option<f64>> = (0..size)
689 .map(|i| {
690 if i % 5 == 0 {
691 None
692 } else if i % 4 == 3 {
693 Some(PI)
694 } else {
695 Some(1.0)
696 }
697 })
698 .collect();
699
700 let array = PrimitiveArray::from_option_iter(values.clone());
701 let encoded = alp_encode(&array, None).unwrap();
702
703 let slice_end = size - slice_start;
704 let slice_len = slice_end - slice_start;
705 let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
706
707 let result_canonical = {
708 let mut ctx = SESSION.create_execution_ctx();
709 sliced_encoded.execute::<Canonical>(&mut ctx).unwrap()
710 };
711 let result_primitive = result_canonical.into_primitive();
712
713 for idx in 0..slice_len {
714 let expected_value = values[slice_start + idx];
715
716 let result_valid = result_primitive.validity().is_valid(idx).unwrap();
717 assert_eq!(
718 result_valid,
719 expected_value.is_some(),
720 "Validity mismatch at idx={idx}",
721 );
722
723 if let Some(expected_val) = expected_value {
724 let result_val = result_primitive.as_slice::<f64>()[idx];
725 assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
726 }
727 }
728 }
729
730 #[rstest]
731 #[case(500, 100)]
732 #[case(1000, 200)]
733 #[case(2048, 512)]
734 fn test_sliced_to_primitive(#[case] size: usize, #[case] slice_start: usize) {
735 let values: Vec<Option<f64>> = (0..size)
736 .map(|i| {
737 if i % 5 == 0 {
738 None
739 } else if i % 4 == 3 {
740 Some(PI)
741 } else {
742 Some(1.0)
743 }
744 })
745 .collect();
746
747 let array = PrimitiveArray::from_option_iter(values.clone());
748 let encoded = alp_encode(&array, None).unwrap();
749
750 let slice_end = size - slice_start;
751 let slice_len = slice_end - slice_start;
752 let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
753
754 let result_primitive = sliced_encoded.to_primitive();
755
756 for idx in 0..slice_len {
757 let expected_value = values[slice_start + idx];
758
759 let result_valid = result_primitive.validity_mask().unwrap().value(idx);
760 assert_eq!(
761 result_valid,
762 expected_value.is_some(),
763 "Validity mismatch at idx={idx}",
764 );
765
766 if let Some(expected_val) = expected_value {
767 let buf = result_primitive.to_buffer::<f64>();
768 let result_val = buf.as_slice()[idx];
769 assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
770 }
771 }
772 }
773
774 #[test]
783 fn test_execute_decompress_with_patches_no_chunk_offsets_regression_5948() {
784 let values: Vec<f64> = vec![1.0, 2.0, PI, 4.0, 5.0];
786 let original = PrimitiveArray::from_iter(values);
787
788 let normally_encoded = alp_encode(&original, None).unwrap();
790 assert!(
791 normally_encoded.patches().is_some(),
792 "Test requires patches to be present"
793 );
794
795 let original_patches = normally_encoded.patches().unwrap();
796 assert!(
797 original_patches.chunk_offsets().is_some(),
798 "Normal encoding should have chunk_offsets"
799 );
800
801 let patches_without_chunk_offsets = Patches::new(
803 original_patches.array_len(),
804 original_patches.offset(),
805 original_patches.indices().clone(),
806 original_patches.values().clone(),
807 None, )
809 .unwrap();
810
811 let alp_without_chunk_offsets = ALPArray::new(
813 normally_encoded.encoded().clone(),
814 normally_encoded.exponents(),
815 Some(patches_without_chunk_offsets),
816 );
817
818 let result_legacy = decompress_into_array(
820 alp_without_chunk_offsets.clone(),
821 &mut LEGACY_SESSION.create_execution_ctx(),
822 )
823 .unwrap();
824 let legacy_slice = result_legacy.as_slice::<f64>();
825
826 assert!(
828 (legacy_slice[2] - PI).abs() < 1e-10,
829 "Legacy path should have PI at index 2, got {}",
830 legacy_slice[2]
831 );
832
833 let result_execute = {
835 let mut ctx = SESSION.create_execution_ctx();
836 execute_decompress(alp_without_chunk_offsets, &mut ctx).unwrap()
837 };
838 let execute_slice = result_execute.as_slice::<f64>();
839
840 assert!(
842 (execute_slice[2] - PI).abs() < 1e-10,
843 "Execute path should have PI at index 2, but got {} (patches were dropped!)",
844 execute_slice[2]
845 );
846 }
847}