vortex_fastlanes/delta/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use fastlanes::FastLanes;
5use vortex_array::ArrayRef;
6use vortex_array::IntoArray;
7use vortex_array::arrays::PrimitiveArray;
8use vortex_array::stats::ArrayStats;
9use vortex_array::validity::Validity;
10use vortex_buffer::Buffer;
11use vortex_dtype::DType;
12use vortex_dtype::NativePType;
13use vortex_dtype::PType;
14use vortex_dtype::match_each_unsigned_integer_ptype;
15use vortex_error::VortexExpect as _;
16use vortex_error::VortexResult;
17use vortex_error::vortex_bail;
18
19pub mod delta_compress;
20pub mod delta_decompress;
21
22/// A FastLanes-style delta-encoded array of primitive values.
23///
24/// A [`DeltaArray`] comprises a sequence of _chunks_ each representing 1,024 delta-encoded values,
25/// except the last chunk which may represent from one to 1,024 values.
26///
27/// # Examples
28///
29/// ```
30/// use vortex_fastlanes::DeltaArray;
31/// let array = DeltaArray::try_from_vec(vec![1_u32, 2, 3, 5, 10, 11]).unwrap();
32/// ```
33///
34/// # Details
35///
36/// To facilitate slicing, this array accepts an `offset` and `logical_len`. The offset must be
37/// strictly less than 1,024 and the sum of `offset` and `logical_len` must not exceed the length of
38/// the `deltas` array. These values permit logical slicing without modifying any chunk containing a
39/// kept value. In particular, we may defer decompresison until the array is canonicalized or
40/// indexed. The `offset` is a physical offset into the first chunk, which necessarily contains
41/// 1,024 values. The `logical_len` is the number of logical values following the `offset`, which
42/// may be less than the number of physically stored values.
43///
44/// Each chunk is stored as a vector of bases and a vector of deltas. If the chunk physically
45/// contains 1,024 values, then there are as many bases as there are _lanes_ of this type in a
46/// 1024-bit register. For example, for 64-bit values, there are 16 bases because there are 16
47/// _lanes_. Each lane is a [delta-encoding](https://en.wikipedia.org/wiki/Delta_encoding) `1024 /
48/// bit_width` long vector of values. The deltas are stored in the
49/// [FastLanes](https://www.vldb.org/pvldb/vol16/p2132-afroozeh.pdf) order which splits the 1,024
50/// values into one contiguous sub-sequence per-lane, thus permitting delta encoding.
51///
52/// If the chunk physically has fewer than 1,024 values, then it is stored as a traditional,
53/// non-SIMD-amenable, delta-encoded vector.
54///
55/// Note the validity is stored in the deltas array.
56#[derive(Clone, Debug)]
57pub struct DeltaArray {
58    pub(super) offset: usize,
59    pub(super) len: usize,
60    pub(super) dtype: DType,
61    pub(super) bases: ArrayRef,
62    pub(super) deltas: ArrayRef,
63    pub(super) stats_set: ArrayStats,
64}
65
66impl DeltaArray {
67    // TODO(ngates): remove constructing from vec
68    pub fn try_from_vec<T: NativePType>(vec: Vec<T>) -> VortexResult<Self> {
69        Self::try_from_primitive_array(&PrimitiveArray::new(
70            Buffer::copy_from(vec),
71            Validity::NonNullable,
72        ))
73    }
74
75    pub fn try_from_primitive_array(array: &PrimitiveArray) -> VortexResult<Self> {
76        let (bases, deltas) = delta_compress::delta_compress(array)?;
77
78        Self::try_from_delta_compress_parts(bases.into_array(), deltas.into_array())
79    }
80
81    /// Create a [`DeltaArray`] from the given `bases` and `deltas` arrays.
82    /// Note the `deltas` might be nullable
83    pub fn try_from_delta_compress_parts(bases: ArrayRef, deltas: ArrayRef) -> VortexResult<Self> {
84        let logical_len = deltas.len();
85        Self::try_new(bases, deltas, 0, logical_len)
86    }
87
88    pub fn try_new(
89        bases: ArrayRef,
90        deltas: ArrayRef,
91        offset: usize,
92        logical_len: usize,
93    ) -> VortexResult<Self> {
94        if offset >= 1024 {
95            vortex_bail!("offset must be less than 1024: {}", offset);
96        }
97        if offset + logical_len > deltas.len() {
98            vortex_bail!(
99                "offset + logical_len, {} + {}, must be less than or equal to the size of deltas: {}",
100                offset,
101                logical_len,
102                deltas.len()
103            )
104        }
105        if !bases.dtype().eq_ignore_nullability(deltas.dtype()) {
106            vortex_bail!(
107                "DeltaArray: bases and deltas must have the same dtype, got {:?} and {:?}",
108                bases.dtype(),
109                deltas.dtype()
110            );
111        }
112        let DType::Primitive(ptype, _) = bases.dtype().clone() else {
113            vortex_bail!(
114                "DeltaArray: dtype must be an integer, got {}",
115                bases.dtype()
116            );
117        };
118
119        if !ptype.is_int() {
120            vortex_bail!("DeltaArray: ptype must be an integer, got {}", ptype);
121        }
122
123        let lanes = lane_count(ptype);
124
125        if deltas.len().is_multiple_of(1024) != bases.len().is_multiple_of(lanes) {
126            vortex_bail!(
127                "deltas length ({}) is a multiple of 1024 iff bases length ({}) is a multiple of LANES ({})",
128                deltas.len(),
129                bases.len(),
130                lanes,
131            );
132        }
133
134        // SAFETY: validation done above
135        Ok(unsafe { Self::new_unchecked(bases, deltas, offset, logical_len) })
136    }
137
138    pub(crate) unsafe fn new_unchecked(
139        bases: ArrayRef,
140        deltas: ArrayRef,
141        offset: usize,
142        logical_len: usize,
143    ) -> Self {
144        Self {
145            offset,
146            len: logical_len,
147            dtype: bases.dtype().with_nullability(deltas.dtype().nullability()),
148            bases,
149            deltas,
150            stats_set: Default::default(),
151        }
152    }
153
154    #[inline]
155    pub fn bases(&self) -> &ArrayRef {
156        &self.bases
157    }
158
159    #[inline]
160    pub fn deltas(&self) -> &ArrayRef {
161        &self.deltas
162    }
163
164    #[inline]
165    pub(crate) fn lanes(&self) -> usize {
166        let ptype =
167            PType::try_from(self.dtype()).vortex_expect("DeltaArray DType must be primitive");
168        lane_count(ptype)
169    }
170
171    #[inline]
172    pub fn len(&self) -> usize {
173        self.len
174    }
175
176    #[inline]
177    pub fn is_empty(&self) -> bool {
178        self.len == 0
179    }
180
181    #[inline]
182    pub fn dtype(&self) -> &DType {
183        &self.dtype
184    }
185
186    #[inline]
187    /// The logical offset into the first chunk of [`Self::deltas`].
188    pub fn offset(&self) -> usize {
189        self.offset
190    }
191
192    #[inline]
193    pub(crate) fn bases_len(&self) -> usize {
194        self.bases.len()
195    }
196
197    #[inline]
198    pub(crate) fn deltas_len(&self) -> usize {
199        self.deltas.len()
200    }
201
202    #[inline]
203    pub(crate) fn stats_set(&self) -> &ArrayStats {
204        &self.stats_set
205    }
206}
207
208pub(crate) fn lane_count(ptype: PType) -> usize {
209    match_each_unsigned_integer_ptype!(ptype, |T| { T::LANES })
210}