1use std::fmt::Debug;
5use std::hash::Hash;
6
7use vortex_array::Array;
8use vortex_array::ArrayEq;
9use vortex_array::ArrayHash;
10use vortex_array::ArrayRef;
11use vortex_array::DeserializeMetadata;
12use vortex_array::ExecutionCtx;
13use vortex_array::IntoArray;
14use vortex_array::Precision;
15use vortex_array::ProstMetadata;
16use vortex_array::SerializeMetadata;
17use vortex_array::buffer::BufferHandle;
18use vortex_array::dtype::DType;
19use vortex_array::dtype::PType;
20use vortex_array::patches::Patches;
21use vortex_array::patches::PatchesMetadata;
22use vortex_array::serde::ArrayChildren;
23use vortex_array::stats::ArrayStats;
24use vortex_array::stats::StatsSetRef;
25use vortex_array::vtable;
26use vortex_array::vtable::ArrayId;
27use vortex_array::vtable::VTable;
28use vortex_array::vtable::ValidityChild;
29use vortex_array::vtable::ValidityVTableFromChild;
30use vortex_array::vtable::patches_child;
31use vortex_array::vtable::patches_child_name;
32use vortex_array::vtable::patches_nchildren;
33use vortex_error::VortexExpect;
34use vortex_error::VortexResult;
35use vortex_error::vortex_bail;
36use vortex_error::vortex_ensure;
37use vortex_error::vortex_err;
38use vortex_error::vortex_panic;
39use vortex_session::VortexSession;
40
41use crate::ALPFloat;
42use crate::alp::Exponents;
43use crate::alp::decompress::execute_decompress;
44use crate::alp::rules::PARENT_KERNELS;
45use crate::alp::rules::RULES;
46
47vtable!(ALP);
48
49impl VTable for ALPVTable {
50 type Array = ALPArray;
51
52 type Metadata = ProstMetadata<ALPMetadata>;
53 type OperationsVTable = Self;
54 type ValidityVTable = ValidityVTableFromChild;
55
56 fn id(_array: &Self::Array) -> ArrayId {
57 Self::ID
58 }
59
60 fn len(array: &ALPArray) -> usize {
61 array.encoded.len()
62 }
63
64 fn dtype(array: &ALPArray) -> &DType {
65 &array.dtype
66 }
67
68 fn stats(array: &ALPArray) -> StatsSetRef<'_> {
69 array.stats_set.to_ref(array.as_ref())
70 }
71
72 fn array_hash<H: std::hash::Hasher>(array: &ALPArray, state: &mut H, precision: Precision) {
73 array.dtype.hash(state);
74 array.encoded.array_hash(state, precision);
75 array.exponents.hash(state);
76 array.patches.array_hash(state, precision);
77 }
78
79 fn array_eq(array: &ALPArray, other: &ALPArray, precision: Precision) -> bool {
80 array.dtype == other.dtype
81 && array.encoded.array_eq(&other.encoded, precision)
82 && array.exponents == other.exponents
83 && array.patches.array_eq(&other.patches, precision)
84 }
85
86 fn nbuffers(_array: &ALPArray) -> usize {
87 0
88 }
89
90 fn buffer(_array: &ALPArray, idx: usize) -> BufferHandle {
91 vortex_panic!("ALPArray buffer index {idx} out of bounds")
92 }
93
94 fn buffer_name(_array: &ALPArray, _idx: usize) -> Option<String> {
95 None
96 }
97
98 fn nchildren(array: &ALPArray) -> usize {
99 1 + array.patches().map_or(0, patches_nchildren)
100 }
101
102 fn child(array: &ALPArray, idx: usize) -> ArrayRef {
103 match idx {
104 0 => array.encoded().clone(),
105 _ => {
106 let patches = array
107 .patches()
108 .unwrap_or_else(|| vortex_panic!("ALPArray child index {idx} out of bounds"));
109 patches_child(patches, idx - 1)
110 }
111 }
112 }
113
114 fn child_name(array: &ALPArray, idx: usize) -> String {
115 match idx {
116 0 => "encoded".to_string(),
117 _ => {
118 if array.patches().is_none() {
119 vortex_panic!("ALPArray child_name index {idx} out of bounds");
120 }
121 patches_child_name(idx - 1).to_string()
122 }
123 }
124 }
125
126 fn metadata(array: &ALPArray) -> VortexResult<Self::Metadata> {
127 let exponents = array.exponents();
128 Ok(ProstMetadata(ALPMetadata {
129 exp_e: exponents.e as u32,
130 exp_f: exponents.f as u32,
131 patches: array
132 .patches()
133 .map(|p| p.to_metadata(array.len(), array.dtype()))
134 .transpose()?,
135 }))
136 }
137
138 fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
139 Ok(Some(metadata.serialize()))
140 }
141
142 fn deserialize(
143 bytes: &[u8],
144 _dtype: &DType,
145 _len: usize,
146 _buffers: &[BufferHandle],
147 _session: &VortexSession,
148 ) -> VortexResult<Self::Metadata> {
149 Ok(ProstMetadata(
150 <ProstMetadata<ALPMetadata> as DeserializeMetadata>::deserialize(bytes)?,
151 ))
152 }
153
154 fn build(
155 dtype: &DType,
156 len: usize,
157 metadata: &Self::Metadata,
158 _buffers: &[BufferHandle],
159 children: &dyn ArrayChildren,
160 ) -> VortexResult<ALPArray> {
161 let encoded_ptype = match &dtype {
162 DType::Primitive(PType::F32, n) => DType::Primitive(PType::I32, *n),
163 DType::Primitive(PType::F64, n) => DType::Primitive(PType::I64, *n),
164 d => vortex_bail!(MismatchedTypes: "f32 or f64", d),
165 };
166 let encoded = children.get(0, &encoded_ptype, len)?;
167
168 let patches = metadata
169 .patches
170 .map(|p| {
171 let indices = children.get(1, &p.indices_dtype()?, p.len()?)?;
172 let values = children.get(2, dtype, p.len()?)?;
173 let chunk_offsets = p
174 .chunk_offsets_dtype()?
175 .map(|dtype| children.get(3, &dtype, usize::try_from(p.chunk_offsets_len())?))
176 .transpose()?;
177
178 Patches::new(len, p.offset()?, indices, values, chunk_offsets)
179 })
180 .transpose()?;
181
182 ALPArray::try_new(
183 encoded,
184 Exponents {
185 e: u8::try_from(metadata.exp_e)?,
186 f: u8::try_from(metadata.exp_f)?,
187 },
188 patches,
189 )
190 }
191
192 fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
193 let patches_info = array
195 .patches
196 .as_ref()
197 .map(|p| (p.array_len(), p.offset(), p.chunk_offsets().is_some()));
198
199 let expected_children = match &patches_info {
200 Some((_, _, has_chunk_offsets)) => 1 + 2 + if *has_chunk_offsets { 1 } else { 0 },
201 None => 1,
202 };
203
204 vortex_ensure!(
205 children.len() == expected_children,
206 "ALPArray expects {} children, got {}",
207 expected_children,
208 children.len()
209 );
210
211 let mut children_iter = children.into_iter();
212 array.encoded = children_iter
213 .next()
214 .ok_or_else(|| vortex_err!("Expected encoded child"))?;
215
216 if let Some((array_len, offset, _has_chunk_offsets)) = patches_info {
217 let indices = children_iter
218 .next()
219 .ok_or_else(|| vortex_err!("Expected patch indices child"))?;
220 let values = children_iter
221 .next()
222 .ok_or_else(|| vortex_err!("Expected patch values child"))?;
223 let chunk_offsets = children_iter.next();
224
225 array.patches = Some(Patches::new(
226 array_len,
227 offset,
228 indices,
229 values,
230 chunk_offsets,
231 )?);
232 }
233
234 Ok(())
235 }
236
237 fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
238 Ok(execute_decompress(array.clone(), ctx)?.into_array())
240 }
241
242 fn reduce_parent(
243 array: &Self::Array,
244 parent: &ArrayRef,
245 child_idx: usize,
246 ) -> VortexResult<Option<ArrayRef>> {
247 RULES.evaluate(array, parent, child_idx)
248 }
249
250 fn execute_parent(
251 array: &Self::Array,
252 parent: &ArrayRef,
253 child_idx: usize,
254 ctx: &mut ExecutionCtx,
255 ) -> VortexResult<Option<ArrayRef>> {
256 PARENT_KERNELS.execute(array, parent, child_idx, ctx)
257 }
258}
259
260#[derive(Clone, Debug)]
261pub struct ALPArray {
262 encoded: ArrayRef,
263 patches: Option<Patches>,
264 dtype: DType,
265 exponents: Exponents,
266 stats_set: ArrayStats,
267}
268
269#[derive(Debug)]
270pub struct ALPVTable;
271
272impl ALPVTable {
273 pub const ID: ArrayId = ArrayId::new_ref("vortex.alp");
274}
275
276#[derive(Clone, prost::Message)]
277pub struct ALPMetadata {
278 #[prost(uint32, tag = "1")]
279 pub(crate) exp_e: u32,
280 #[prost(uint32, tag = "2")]
281 pub(crate) exp_f: u32,
282 #[prost(message, optional, tag = "3")]
283 pub(crate) patches: Option<PatchesMetadata>,
284}
285
286impl ALPArray {
287 fn validate(
288 encoded: &ArrayRef,
289 exponents: Exponents,
290 patches: Option<&Patches>,
291 ) -> VortexResult<()> {
292 vortex_ensure!(
293 matches!(
294 encoded.dtype(),
295 DType::Primitive(PType::I32 | PType::I64, _)
296 ),
297 "ALP encoded ints have invalid DType {}",
298 encoded.dtype(),
299 );
300
301 let Exponents { e, f } = exponents;
304 match encoded.dtype().as_ptype() {
305 PType::I32 => {
306 vortex_ensure!(exponents.e <= f32::MAX_EXPONENT, "e out of bounds: {e}");
307 vortex_ensure!(exponents.f <= f32::MAX_EXPONENT, "f out of bounds: {f}");
308 if let Some(patches) = patches {
309 Self::validate_patches::<f32>(patches, encoded)?;
310 }
311 }
312 PType::I64 => {
313 vortex_ensure!(e <= f64::MAX_EXPONENT, "e out of bounds: {e}");
314 vortex_ensure!(f <= f64::MAX_EXPONENT, "f out of bounds: {f}");
315
316 if let Some(patches) = patches {
317 Self::validate_patches::<f64>(patches, encoded)?;
318 }
319 }
320 _ => unreachable!(),
321 }
322
323 if let Some(patches) = patches {
325 vortex_ensure!(
326 patches.array_len() == encoded.len(),
327 "patches array_len != encoded len: {} != {}",
328 patches.array_len(),
329 encoded.len()
330 );
331
332 }
334
335 Ok(())
336 }
337
338 fn validate_patches<T: ALPFloat>(patches: &Patches, encoded: &ArrayRef) -> VortexResult<()> {
340 vortex_ensure!(
341 patches.array_len() == encoded.len(),
342 "patches array_len != encoded len: {} != {}",
343 patches.array_len(),
344 encoded.len()
345 );
346
347 let expected_type = DType::Primitive(T::PTYPE, encoded.dtype().nullability());
348 vortex_ensure!(
349 patches.dtype() == &expected_type,
350 "Expected patches type {expected_type}, actual {}",
351 patches.dtype(),
352 );
353
354 Ok(())
355 }
356}
357
358impl ALPArray {
359 pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option<Patches>) -> Self {
364 Self::try_new(encoded, exponents, patches).vortex_expect("ALPArray new")
365 }
366
367 pub fn try_new(
419 encoded: ArrayRef,
420 exponents: Exponents,
421 patches: Option<Patches>,
422 ) -> VortexResult<Self> {
423 Self::validate(&encoded, exponents, patches.as_ref())?;
424
425 let dtype = match encoded.dtype() {
426 DType::Primitive(PType::I32, nullability) => DType::Primitive(PType::F32, *nullability),
427 DType::Primitive(PType::I64, nullability) => DType::Primitive(PType::F64, *nullability),
428 _ => unreachable!(),
429 };
430
431 Ok(Self {
432 dtype,
433 encoded,
434 exponents,
435 patches,
436 stats_set: Default::default(),
437 })
438 }
439
440 pub(crate) unsafe fn new_unchecked(
445 encoded: ArrayRef,
446 exponents: Exponents,
447 patches: Option<Patches>,
448 dtype: DType,
449 ) -> Self {
450 Self {
451 dtype,
452 encoded,
453 exponents,
454 patches,
455 stats_set: Default::default(),
456 }
457 }
458
459 pub fn ptype(&self) -> PType {
460 self.dtype.as_ptype()
461 }
462
463 pub fn encoded(&self) -> &ArrayRef {
464 &self.encoded
465 }
466
467 #[inline]
468 pub fn exponents(&self) -> Exponents {
469 self.exponents
470 }
471
472 pub fn patches(&self) -> Option<&Patches> {
473 self.patches.as_ref()
474 }
475
476 #[inline]
478 pub fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>, DType) {
479 (self.encoded, self.exponents, self.patches, self.dtype)
480 }
481}
482
483impl ValidityChild<ALPVTable> for ALPVTable {
484 fn validity_child(array: &ALPArray) -> &ArrayRef {
485 array.encoded()
486 }
487}
488
489#[cfg(test)]
490mod tests {
491 use std::f64::consts::PI;
492 use std::sync::LazyLock;
493
494 use rstest::rstest;
495 use vortex_array::Canonical;
496 use vortex_array::IntoArray;
497 use vortex_array::LEGACY_SESSION;
498 use vortex_array::ToCanonical;
499 use vortex_array::VortexSessionExecute;
500 use vortex_array::arrays::PrimitiveArray;
501 use vortex_array::assert_arrays_eq;
502 use vortex_array::session::ArraySession;
503 use vortex_array::vtable::ValidityHelper;
504 use vortex_session::VortexSession;
505
506 use super::*;
507 use crate::alp_encode;
508 use crate::decompress_into_array;
509
510 static SESSION: LazyLock<VortexSession> =
511 LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
512
513 #[rstest]
514 #[case(0)]
515 #[case(1)]
516 #[case(100)]
517 #[case(1023)]
518 #[case(1024)]
519 #[case(1025)]
520 #[case(2047)]
521 #[case(2048)]
522 #[case(2049)]
523 fn test_execute_f32(#[case] size: usize) {
524 let values = PrimitiveArray::from_iter((0..size).map(|i| i as f32));
525 let encoded = alp_encode(&values, None).unwrap();
526
527 let result_canonical = {
528 let mut ctx = SESSION.create_execution_ctx();
529 encoded.to_array().execute::<Canonical>(&mut ctx).unwrap()
530 };
531 let expected =
533 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
534
535 assert_arrays_eq!(result_canonical.into_array(), expected);
536 }
537
538 #[rstest]
539 #[case(0)]
540 #[case(1)]
541 #[case(100)]
542 #[case(1023)]
543 #[case(1024)]
544 #[case(1025)]
545 #[case(2047)]
546 #[case(2048)]
547 #[case(2049)]
548 fn test_execute_f64(#[case] size: usize) {
549 let values = PrimitiveArray::from_iter((0..size).map(|i| i as f64));
550 let encoded = alp_encode(&values, None).unwrap();
551
552 let result_canonical = {
553 let mut ctx = SESSION.create_execution_ctx();
554 encoded.to_array().execute::<Canonical>(&mut ctx).unwrap()
555 };
556 let expected =
558 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
559
560 assert_arrays_eq!(result_canonical.into_array(), expected);
561 }
562
563 #[rstest]
564 #[case(100)]
565 #[case(1023)]
566 #[case(1024)]
567 #[case(1025)]
568 #[case(2047)]
569 #[case(2048)]
570 #[case(2049)]
571 fn test_execute_with_patches(#[case] size: usize) {
572 let values: Vec<f64> = (0..size)
573 .map(|i| match i % 4 {
574 0..=2 => 1.0,
575 _ => PI,
576 })
577 .collect();
578
579 let array = PrimitiveArray::from_iter(values);
580 let encoded = alp_encode(&array, None).unwrap();
581 assert!(encoded.patches().unwrap().array_len() > 0);
582
583 let result_canonical = {
584 let mut ctx = SESSION.create_execution_ctx();
585 encoded.to_array().execute::<Canonical>(&mut ctx).unwrap()
586 };
587 let expected =
589 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
590
591 assert_arrays_eq!(result_canonical.into_array(), expected);
592 }
593
594 #[rstest]
595 #[case(0)]
596 #[case(1)]
597 #[case(100)]
598 #[case(1023)]
599 #[case(1024)]
600 #[case(1025)]
601 #[case(2047)]
602 #[case(2048)]
603 #[case(2049)]
604 fn test_execute_with_validity(#[case] size: usize) {
605 let values: Vec<Option<f32>> = (0..size)
606 .map(|i| if i % 2 == 1 { None } else { Some(1.0) })
607 .collect();
608
609 let array = PrimitiveArray::from_option_iter(values);
610 let encoded = alp_encode(&array, None).unwrap();
611
612 let result_canonical = {
613 let mut ctx = SESSION.create_execution_ctx();
614 encoded.to_array().execute::<Canonical>(&mut ctx).unwrap()
615 };
616 let expected =
618 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
619
620 assert_arrays_eq!(result_canonical.into_array(), expected);
621 }
622
623 #[rstest]
624 #[case(100)]
625 #[case(1023)]
626 #[case(1024)]
627 #[case(1025)]
628 #[case(2047)]
629 #[case(2048)]
630 #[case(2049)]
631 fn test_execute_with_patches_and_validity(#[case] size: usize) {
632 let values: Vec<Option<f64>> = (0..size)
633 .map(|idx| match idx % 3 {
634 0 => Some(1.0),
635 1 => None,
636 _ => Some(PI),
637 })
638 .collect();
639
640 let array = PrimitiveArray::from_option_iter(values);
641 let encoded = alp_encode(&array, None).unwrap();
642 assert!(encoded.patches().unwrap().array_len() > 0);
643
644 let result_canonical = {
645 let mut ctx = SESSION.create_execution_ctx();
646 encoded.to_array().execute::<Canonical>(&mut ctx).unwrap()
647 };
648 let expected =
650 decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
651
652 assert_arrays_eq!(result_canonical.into_array(), expected);
653 }
654
655 #[rstest]
656 #[case(500, 100)]
657 #[case(1000, 200)]
658 #[case(2048, 512)]
659 fn test_execute_sliced_vector(#[case] size: usize, #[case] slice_start: usize) {
660 let values: Vec<Option<f64>> = (0..size)
661 .map(|i| {
662 if i % 5 == 0 {
663 None
664 } else if i % 4 == 3 {
665 Some(PI)
666 } else {
667 Some(1.0)
668 }
669 })
670 .collect();
671
672 let array = PrimitiveArray::from_option_iter(values.clone());
673 let encoded = alp_encode(&array, None).unwrap();
674
675 let slice_end = size - slice_start;
676 let slice_len = slice_end - slice_start;
677 let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
678
679 let result_canonical = {
680 let mut ctx = SESSION.create_execution_ctx();
681 sliced_encoded.execute::<Canonical>(&mut ctx).unwrap()
682 };
683 let result_primitive = result_canonical.into_primitive();
684
685 for idx in 0..slice_len {
686 let expected_value = values[slice_start + idx];
687
688 let result_valid = result_primitive.validity().is_valid(idx).unwrap();
689 assert_eq!(
690 result_valid,
691 expected_value.is_some(),
692 "Validity mismatch at idx={idx}",
693 );
694
695 if let Some(expected_val) = expected_value {
696 let result_val = result_primitive.as_slice::<f64>()[idx];
697 assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
698 }
699 }
700 }
701
702 #[rstest]
703 #[case(500, 100)]
704 #[case(1000, 200)]
705 #[case(2048, 512)]
706 fn test_sliced_to_primitive(#[case] size: usize, #[case] slice_start: usize) {
707 let values: Vec<Option<f64>> = (0..size)
708 .map(|i| {
709 if i % 5 == 0 {
710 None
711 } else if i % 4 == 3 {
712 Some(PI)
713 } else {
714 Some(1.0)
715 }
716 })
717 .collect();
718
719 let array = PrimitiveArray::from_option_iter(values.clone());
720 let encoded = alp_encode(&array, None).unwrap();
721
722 let slice_end = size - slice_start;
723 let slice_len = slice_end - slice_start;
724 let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
725
726 let result_primitive = sliced_encoded.to_primitive();
727
728 for idx in 0..slice_len {
729 let expected_value = values[slice_start + idx];
730
731 let result_valid = result_primitive.validity_mask().unwrap().value(idx);
732 assert_eq!(
733 result_valid,
734 expected_value.is_some(),
735 "Validity mismatch at idx={idx}",
736 );
737
738 if let Some(expected_val) = expected_value {
739 let buf = result_primitive.to_buffer::<f64>();
740 let result_val = buf.as_slice()[idx];
741 assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
742 }
743 }
744 }
745
746 #[test]
755 fn test_execute_decompress_with_patches_no_chunk_offsets_regression_5948() {
756 let values: Vec<f64> = vec![1.0, 2.0, PI, 4.0, 5.0];
758 let original = PrimitiveArray::from_iter(values);
759
760 let normally_encoded = alp_encode(&original, None).unwrap();
762 assert!(
763 normally_encoded.patches().is_some(),
764 "Test requires patches to be present"
765 );
766
767 let original_patches = normally_encoded.patches().unwrap();
768 assert!(
769 original_patches.chunk_offsets().is_some(),
770 "Normal encoding should have chunk_offsets"
771 );
772
773 let patches_without_chunk_offsets = Patches::new(
775 original_patches.array_len(),
776 original_patches.offset(),
777 original_patches.indices().clone(),
778 original_patches.values().clone(),
779 None, )
781 .unwrap();
782
783 let alp_without_chunk_offsets = ALPArray::new(
785 normally_encoded.encoded().clone(),
786 normally_encoded.exponents(),
787 Some(patches_without_chunk_offsets),
788 );
789
790 let result_legacy = decompress_into_array(
792 alp_without_chunk_offsets.clone(),
793 &mut LEGACY_SESSION.create_execution_ctx(),
794 )
795 .unwrap();
796 let legacy_slice = result_legacy.as_slice::<f64>();
797
798 assert!(
800 (legacy_slice[2] - PI).abs() < 1e-10,
801 "Legacy path should have PI at index 2, got {}",
802 legacy_slice[2]
803 );
804
805 let result_execute = {
807 let mut ctx = SESSION.create_execution_ctx();
808 execute_decompress(alp_without_chunk_offsets, &mut ctx).unwrap()
809 };
810 let execute_slice = result_execute.as_slice::<f64>();
811
812 assert!(
814 (execute_slice[2] - PI).abs() < 1e-10,
815 "Execute path should have PI at index 2, but got {} (patches were dropped!)",
816 execute_slice[2]
817 );
818 }
819}