Skip to main content

vortex_fastlanes/rle/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5use std::hash::Hasher;
6
7use prost::Message;
8use vortex_array::Array;
9use vortex_array::ArrayEq;
10use vortex_array::ArrayHash;
11use vortex_array::ArrayId;
12use vortex_array::ArrayParts;
13use vortex_array::ArrayRef;
14use vortex_array::ArrayView;
15use vortex_array::ExecutionCtx;
16use vortex_array::ExecutionResult;
17use vortex_array::IntoArray;
18use vortex_array::Precision;
19use vortex_array::arrays::Primitive;
20use vortex_array::buffer::BufferHandle;
21use vortex_array::dtype::DType;
22use vortex_array::dtype::Nullability;
23use vortex_array::dtype::PType;
24use vortex_array::serde::ArrayChildren;
25use vortex_array::vtable::VTable;
26use vortex_error::VortexExpect;
27use vortex_error::VortexResult;
28use vortex_error::vortex_ensure;
29use vortex_error::vortex_panic;
30use vortex_session::VortexSession;
31use vortex_session::registry::CachedId;
32
33use crate::RLEData;
34use crate::rle::array::INDICES_SLOT;
35use crate::rle::array::RLEArrayExt;
36use crate::rle::array::VALUES_IDX_OFFSETS_SLOT;
37use crate::rle::array::VALUES_SLOT;
38use crate::rle::array::rle_decompress::rle_decompress;
39use crate::rle::kernel::PARENT_KERNELS;
40use crate::rle::vtable::rules::RULES;
41
42mod operations;
43mod rules;
44mod validity;
45
46/// A [`RLE`]-encoded Vortex array.
47pub type RLEArray = Array<RLE>;
48
49#[derive(Clone, prost::Message)]
50pub struct RLEMetadata {
51    #[prost(uint64, tag = "1")]
52    pub values_len: u64,
53    #[prost(uint64, tag = "2")]
54    pub indices_len: u64,
55    #[prost(enumeration = "PType", tag = "3")]
56    pub indices_ptype: i32,
57    #[prost(uint64, tag = "4")]
58    pub values_idx_offsets_len: u64,
59    #[prost(enumeration = "PType", tag = "5")]
60    pub values_idx_offsets_ptype: i32,
61    #[prost(uint64, tag = "6", default = "0")]
62    pub offset: u64,
63}
64
65impl ArrayHash for RLEData {
66    fn array_hash<H: Hasher>(&self, state: &mut H, _precision: Precision) {
67        self.offset.hash(state);
68    }
69}
70
71impl ArrayEq for RLEData {
72    fn array_eq(&self, other: &Self, _precision: Precision) -> bool {
73        self.offset == other.offset
74    }
75}
76
77impl VTable for RLE {
78    type ArrayData = RLEData;
79
80    type OperationsVTable = Self;
81    type ValidityVTable = Self;
82
83    fn id(&self) -> ArrayId {
84        static ID: CachedId = CachedId::new("fastlanes.rle");
85        *ID
86    }
87
88    fn validate(
89        &self,
90        data: &Self::ArrayData,
91        dtype: &DType,
92        len: usize,
93        slots: &[Option<ArrayRef>],
94    ) -> VortexResult<()> {
95        validate_parts(
96            slots[VALUES_SLOT]
97                .as_ref()
98                .vortex_expect("RLEArray values slot must be populated"),
99            slots[INDICES_SLOT]
100                .as_ref()
101                .vortex_expect("RLEArray indices slot must be populated"),
102            slots[VALUES_IDX_OFFSETS_SLOT]
103                .as_ref()
104                .vortex_expect("RLEArray values_idx_offsets slot must be populated"),
105            data.offset,
106            dtype,
107            len,
108        )
109    }
110
111    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
112        0
113    }
114
115    fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
116        vortex_panic!("RLEArray buffer index {idx} out of bounds")
117    }
118
119    fn buffer_name(_array: ArrayView<'_, Self>, _idx: usize) -> Option<String> {
120        None
121    }
122
123    fn reduce_parent(
124        array: ArrayView<'_, Self>,
125        parent: &ArrayRef,
126        child_idx: usize,
127    ) -> VortexResult<Option<ArrayRef>> {
128        RULES.evaluate(array, parent, child_idx)
129    }
130
131    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
132        crate::rle::array::SLOT_NAMES[idx].to_string()
133    }
134
135    fn serialize(
136        array: ArrayView<'_, Self>,
137        _session: &VortexSession,
138    ) -> VortexResult<Option<Vec<u8>>> {
139        Ok(Some(
140            RLEMetadata {
141                values_len: array.values().len() as u64,
142                indices_len: array.indices().len() as u64,
143                indices_ptype: PType::try_from(array.indices().dtype())? as i32,
144                values_idx_offsets_len: array.values_idx_offsets().len() as u64,
145                values_idx_offsets_ptype: PType::try_from(array.values_idx_offsets().dtype())?
146                    as i32,
147                offset: array.offset() as u64,
148            }
149            .encode_to_vec(),
150        ))
151    }
152
153    fn deserialize(
154        &self,
155        dtype: &DType,
156        len: usize,
157        metadata: &[u8],
158        buffers: &[BufferHandle],
159        children: &dyn ArrayChildren,
160        _session: &VortexSession,
161    ) -> VortexResult<ArrayParts<Self>> {
162        vortex_ensure!(
163            buffers.is_empty(),
164            "RLEArray expects 0 buffers, got {}",
165            buffers.len()
166        );
167        let metadata = RLEMetadata::decode(metadata)?;
168        let values = children.get(
169            0,
170            &DType::Primitive(dtype.as_ptype(), Nullability::NonNullable),
171            usize::try_from(metadata.values_len)?,
172        )?;
173
174        let indices = children.get(
175            1,
176            &DType::Primitive(metadata.indices_ptype(), dtype.nullability()),
177            usize::try_from(metadata.indices_len)?,
178        )?;
179
180        let values_idx_offsets = children.get(
181            2,
182            &DType::Primitive(
183                metadata.values_idx_offsets_ptype(),
184                Nullability::NonNullable,
185            ),
186            usize::try_from(metadata.values_idx_offsets_len)?,
187        )?;
188
189        let slots = vec![Some(values), Some(indices), Some(values_idx_offsets)];
190        let data = RLEData::try_new(metadata.offset as usize)?;
191        Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots))
192    }
193
194    fn execute_parent(
195        array: ArrayView<'_, Self>,
196        parent: &ArrayRef,
197        child_idx: usize,
198        ctx: &mut ExecutionCtx,
199    ) -> VortexResult<Option<ArrayRef>> {
200        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
201    }
202
203    fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
204        Ok(ExecutionResult::done(
205            rle_decompress(&array, ctx)?.into_array(),
206        ))
207    }
208}
209
210#[derive(Clone, Debug)]
211pub struct RLE;
212
213impl RLE {
214    pub fn try_new(
215        values: ArrayRef,
216        indices: ArrayRef,
217        values_idx_offsets: ArrayRef,
218        offset: usize,
219        length: usize,
220    ) -> VortexResult<RLEArray> {
221        let dtype = DType::Primitive(values.dtype().as_ptype(), indices.dtype().nullability());
222        let slots = vec![Some(values), Some(indices), Some(values_idx_offsets)];
223        let data = RLEData::try_new(offset)?;
224        Array::try_from_parts(ArrayParts::new(RLE, dtype, length, data).with_slots(slots))
225    }
226
227    /// Create a new RLE array without validation.
228    ///
229    /// # Safety
230    /// See [`RLE::validate`] for preconditions.
231    pub unsafe fn new_unchecked(
232        values: ArrayRef,
233        indices: ArrayRef,
234        values_idx_offsets: ArrayRef,
235        offset: usize,
236        length: usize,
237    ) -> RLEArray {
238        let dtype = DType::Primitive(values.dtype().as_ptype(), indices.dtype().nullability());
239        let slots = vec![Some(values), Some(indices), Some(values_idx_offsets)];
240        let data = unsafe { RLEData::new_unchecked(offset) };
241        unsafe {
242            Array::from_parts_unchecked(ArrayParts::new(RLE, dtype, length, data).with_slots(slots))
243        }
244    }
245
246    /// Encode a primitive array using FastLanes RLE.
247    pub fn encode(array: ArrayView<'_, Primitive>) -> VortexResult<RLEArray> {
248        RLEData::encode(array)
249    }
250}
251
252fn validate_parts(
253    values: &ArrayRef,
254    indices: &ArrayRef,
255    values_idx_offsets: &ArrayRef,
256    offset: usize,
257    dtype: &DType,
258    length: usize,
259) -> VortexResult<()> {
260    vortex_ensure!(
261        matches!(
262            values.dtype(),
263            DType::Primitive(_, Nullability::NonNullable)
264        ),
265        "RLE values must be a non-nullable primitive type, got {}",
266        values.dtype()
267    );
268
269    vortex_ensure!(
270        matches!(indices.dtype().as_ptype(), PType::U8 | PType::U16),
271        "RLE indices must be u8 or u16, got {}",
272        indices.dtype()
273    );
274
275    vortex_ensure!(
276        values_idx_offsets.dtype().is_unsigned_int() && !values_idx_offsets.dtype().is_nullable(),
277        "RLE value idx offsets must be non-nullable unsigned integer, got {}",
278        values_idx_offsets.dtype()
279    );
280
281    vortex_ensure!(
282        indices.len().is_multiple_of(crate::FL_CHUNK_SIZE),
283        "RLE indices length must be a multiple of {}, got {}",
284        crate::FL_CHUNK_SIZE,
285        indices.len()
286    );
287
288    vortex_ensure!(
289        offset + length <= indices.len(),
290        "RLE offset + length, {offset} + {length}, must not exceed the indices length {}",
291        indices.len()
292    );
293
294    vortex_ensure!(
295        indices.len().div_ceil(crate::FL_CHUNK_SIZE) == values_idx_offsets.len(),
296        "RLE must have one value idx offset per chunk, got {}",
297        values_idx_offsets.len()
298    );
299
300    vortex_ensure!(
301        indices.len() >= values.len(),
302        "RLE must have at least as many indices as values, got {} indices and {} values",
303        indices.len(),
304        values.len()
305    );
306
307    let expected_dtype = DType::Primitive(values.dtype().as_ptype(), indices.dtype().nullability());
308    vortex_ensure!(
309        dtype == &expected_dtype,
310        "RLE dtype mismatch: expected {expected_dtype}, got {dtype}"
311    );
312
313    Ok(())
314}
315
316#[cfg(test)]
317mod tests {
318    use prost::Message;
319    use vortex_array::test_harness::check_metadata;
320
321    use super::RLEMetadata;
322
323    #[cfg_attr(miri, ignore)]
324    #[test]
325    fn test_rle_metadata() {
326        check_metadata(
327            "rle.metadata",
328            &RLEMetadata {
329                values_len: u64::MAX,
330                indices_len: u64::MAX,
331                indices_ptype: i32::MAX,
332                values_idx_offsets_len: u64::MAX,
333                values_idx_offsets_ptype: i32::MAX,
334                offset: u64::MAX,
335            }
336            .encode_to_vec(),
337        );
338    }
339}