vortex_fastlanes/delta/array/mod.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::fmt::Formatter;
6
7use fastlanes::FastLanes;
8use vortex_array::ArrayRef;
9use vortex_array::TypedArrayRef;
10use vortex_array::dtype::PType;
11use vortex_array::match_each_unsigned_integer_ptype;
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_ensure;
15
16pub mod delta_compress;
17pub mod delta_decompress;
18
19/// The base values for each block of deltas.
20pub(super) const BASES_SLOT: usize = 0;
21/// The delta-encoded values relative to the base values.
22pub(super) const DELTAS_SLOT: usize = 1;
23pub(super) const NUM_SLOTS: usize = 2;
24pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["bases", "deltas"];
25
26/// A FastLanes-style delta-encoded array of primitive values.
27///
28/// A DeltaArray comprises a sequence of _chunks_ each representing exactly 1,024
29/// delta-encoded values. If the input array length is not a multiple of 1,024, the last chunk
30/// is padded with zeros to fill a complete 1,024-element chunk.
31///
32/// # Examples
33///
34/// ```
35/// use vortex_array::arrays::PrimitiveArray;
36/// use vortex_array::VortexSessionExecute;
37/// use vortex_array::session::ArraySession;
38/// use vortex_session::VortexSession;
39/// use vortex_fastlanes::Delta;
40///
41/// let session = VortexSession::empty().with::<ArraySession>();
42/// let primitive = PrimitiveArray::from_iter([1_u32, 2, 3, 5, 10, 11]);
43/// let array = Delta::try_from_primitive_array(&primitive, &mut session.create_execution_ctx()).unwrap();
44/// ```
45///
46/// Signed inputs are also supported; deltas across negative values are encoded by
47/// `wrapping_sub` and recovered by `wrapping_add` at decompress time:
48///
49/// ```
50/// use vortex_array::arrays::PrimitiveArray;
51/// use vortex_array::VortexSessionExecute;
52/// use vortex_array::session::ArraySession;
53/// use vortex_session::VortexSession;
54/// use vortex_fastlanes::Delta;
55///
56/// let session = VortexSession::empty().with::<ArraySession>();
57/// let primitive = PrimitiveArray::from_iter([-3_i32, -2, -1, 0, 1, 2]);
58/// let array = Delta::try_from_primitive_array(&primitive, &mut session.create_execution_ctx()).unwrap();
59/// ```
60///
61/// # Details
62///
63/// To facilitate slicing, this array accepts an `offset` and `logical_len`. The offset must be
64/// strictly less than 1,024 and the sum of `offset` and `logical_len` must not exceed the length of
65/// the `deltas` array. These values permit logical slicing without modifying any chunk containing a
66/// kept value. In particular, we may defer decompresison until the array is canonicalized or
67/// indexed. The `offset` is a physical offset into the first chunk, which necessarily contains
68/// 1,024 values. The `logical_len` is the number of logical values following the `offset`, which
69/// may be less than the number of physically stored values.
70///
71/// Each chunk is stored as a vector of bases and a vector of deltas. There are as many bases as
72/// there are _lanes_ of this type in a 1024-bit register. For example, for 64-bit values, there
73/// are 16 bases because there are 16 _lanes_. Each lane is a
74/// [delta-encoding](https://en.wikipedia.org/wiki/Delta_encoding) `1024 / bit_width` long vector
75/// of values. The deltas are stored in the
76/// [FastLanes](https://www.vldb.org/pvldb/vol16/p2132-afroozeh.pdf) order which splits the 1,024
77/// values into one contiguous sub-sequence per-lane, thus permitting delta encoding.
78///
79/// Note the validity is stored in the deltas array.
80#[derive(Clone, Debug)]
81pub struct DeltaData {
82 pub(super) offset: usize,
83}
84
85impl Display for DeltaData {
86 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
87 write!(f, "offset: {}", self.offset)
88 }
89}
90
91pub trait DeltaArrayExt: TypedArrayRef<crate::Delta> {
92 fn bases(&self) -> &ArrayRef {
93 self.as_ref().slots()[BASES_SLOT]
94 .as_ref()
95 .vortex_expect("DeltaArray bases slot")
96 }
97
98 fn deltas(&self) -> &ArrayRef {
99 self.as_ref().slots()[DELTAS_SLOT]
100 .as_ref()
101 .vortex_expect("DeltaArray deltas slot")
102 }
103
104 fn offset(&self) -> usize {
105 self.offset
106 }
107}
108
109impl<T: TypedArrayRef<crate::Delta>> DeltaArrayExt for T {}
110
111impl DeltaData {
112 pub fn try_new(offset: usize) -> VortexResult<Self> {
113 vortex_ensure!(offset < 1024, "offset must be less than 1024: {offset}");
114 Ok(Self { offset })
115 }
116}
117
118pub(crate) fn lane_count(ptype: PType) -> usize {
119 match_each_unsigned_integer_ptype!(ptype.to_unsigned(), |T| { T::LANES })
120}