1use vortex_array::Array;
5use vortex_array::ArrayRef;
6use vortex_array::stats::ArrayStats;
7use vortex_dtype::DType;
8use vortex_dtype::PType;
9use vortex_error::VortexResult;
10use vortex_error::vortex_ensure;
11
12use crate::FL_CHUNK_SIZE;
13
14pub mod rle_compress;
15pub mod rle_decompress;
16
17#[derive(Clone, Debug)]
18pub struct RLEArray {
19 pub(super) dtype: DType,
20 pub(super) values: ArrayRef,
22 pub(super) indices: ArrayRef,
24 pub(super) values_idx_offsets: ArrayRef,
34
35 pub(super) stats_set: ArrayStats,
36 pub(super) offset: usize,
38 pub(super) length: usize,
39}
40
41impl RLEArray {
42 fn validate(
43 values: &dyn Array,
44 indices: &dyn Array,
45 value_idx_offsets: &dyn Array,
46 offset: usize,
47 ) -> VortexResult<()> {
48 vortex_ensure!(
49 offset < 1024,
50 "Offset must be smaller than 1024, got {}",
51 offset
52 );
53
54 vortex_ensure!(
55 values.dtype().is_primitive(),
56 "RLE values must be a primitive type, got {}",
57 values.dtype()
58 );
59
60 vortex_ensure!(
61 matches!(indices.dtype().as_ptype(), PType::U8 | PType::U16),
62 "RLE indices must be u8 or u16, got {}",
63 indices.dtype()
64 );
65
66 vortex_ensure!(
67 value_idx_offsets.dtype().is_unsigned_int() && !value_idx_offsets.dtype().is_nullable(),
68 "RLE value idx offsets must be non-nullable unsigned integer, got {}",
69 value_idx_offsets.dtype()
70 );
71
72 vortex_ensure!(
73 indices.len().div_ceil(FL_CHUNK_SIZE) == value_idx_offsets.len(),
74 "RLE must have one value idx offset per chunk, got {}",
75 value_idx_offsets.len()
76 );
77
78 vortex_ensure!(
79 indices.len() >= values.len(),
80 "RLE must have at least as many indices as values, got {} indices and {} values",
81 indices.len(),
82 values.len()
83 );
84
85 Ok(())
86 }
87
88 pub fn try_new(
98 values: ArrayRef,
99 indices: ArrayRef,
100 values_idx_offsets: ArrayRef,
101 offset: usize,
102 length: usize,
103 ) -> VortexResult<Self> {
104 assert_eq!(indices.len() % FL_CHUNK_SIZE, 0);
105 Self::validate(&values, &indices, &values_idx_offsets, offset)?;
106
107 let dtype = DType::Primitive(values.dtype().as_ptype(), indices.dtype().nullability());
109
110 Ok(Self {
111 dtype,
112 values,
113 indices,
114 values_idx_offsets,
115 stats_set: ArrayStats::default(),
116 offset,
117 length,
118 })
119 }
120
121 pub unsafe fn new_unchecked(
131 values: ArrayRef,
132 indices: ArrayRef,
133 values_idx_offsets: ArrayRef,
134 dtype: DType,
135 offset: usize,
136 length: usize,
137 ) -> Self {
138 Self {
139 dtype,
140 values,
141 indices,
142 values_idx_offsets,
143 stats_set: ArrayStats::default(),
144 offset,
145 length,
146 }
147 }
148
149 #[inline]
150 pub fn len(&self) -> usize {
151 self.length
152 }
153
154 #[inline]
155 pub fn is_empty(&self) -> bool {
156 self.length == 0
157 }
158
159 #[inline]
160 pub fn dtype(&self) -> &DType {
161 &self.dtype
162 }
163
164 #[inline]
165 pub fn values(&self) -> &ArrayRef {
166 &self.values
167 }
168
169 #[inline]
170 pub fn indices(&self) -> &ArrayRef {
171 &self.indices
172 }
173
174 #[inline]
175 pub fn values_idx_offsets(&self) -> &ArrayRef {
176 &self.values_idx_offsets
177 }
178
179 #[expect(
185 clippy::expect_used,
186 reason = "expect is safe here as scalar_at returns a valid primitive"
187 )]
188 pub(crate) fn values_idx_offset(&self, chunk_idx: usize) -> usize {
189 self.values_idx_offsets
190 .scalar_at(chunk_idx)
191 .as_primitive()
192 .as_::<usize>()
193 .expect("index must be of type usize")
194 - self
195 .values_idx_offsets
196 .scalar_at(0)
197 .as_primitive()
198 .as_::<usize>()
199 .expect("index must be of type usize")
200 }
201
202 #[inline]
204 pub fn offset(&self) -> usize {
205 self.offset
206 }
207
208 #[inline]
209 pub(crate) fn stats_set(&self) -> &ArrayStats {
210 &self.stats_set
211 }
212}
213
214#[cfg(test)]
215mod tests {
216 use vortex_array::Array;
217 use vortex_array::ArrayContext;
218 use vortex_array::IntoArray;
219 use vortex_array::ToCanonical;
220 use vortex_array::arrays::PrimitiveArray;
221 use vortex_array::serde::ArrayParts;
222 use vortex_array::serde::SerializeOptions;
223 use vortex_array::validity::Validity;
224 use vortex_array::vtable::ArrayVTableExt;
225 use vortex_buffer::Buffer;
226 use vortex_buffer::ByteBufferMut;
227 use vortex_dtype::DType;
228 use vortex_dtype::Nullability;
229 use vortex_dtype::PType;
230
231 use crate::RLEArray;
232 use crate::RLEVTable;
233
234 #[test]
235 fn test_try_new() {
236 let values = PrimitiveArray::from_iter([10u32, 20, 30]).into_array();
237
238 let indices =
240 PrimitiveArray::from_iter([0u16, 0, 1, 1, 2].iter().cycle().take(1024).copied())
241 .into_array();
242 let values_idx_offsets = PrimitiveArray::from_iter([0u64]).into_array();
243 let rle_array = RLEArray::try_new(values, indices, values_idx_offsets, 0, 5).unwrap();
244
245 assert_eq!(rle_array.len(), 5);
246 assert_eq!(rle_array.values().len(), 3);
247 assert_eq!(rle_array.values().dtype().as_ptype(), PType::U32);
248 }
249
250 #[test]
251 fn test_try_new_with_validity() {
252 let values = PrimitiveArray::from_iter([10u32, 20]).into_array();
253 let values_idx_offsets = PrimitiveArray::from_iter([0u64]).into_array();
254
255 let indices_pattern = [0u16, 1, 0];
256 let validity_pattern = [true, false, true];
257
258 let indices_with_validity = PrimitiveArray::new(
260 indices_pattern
261 .iter()
262 .cycle()
263 .take(1024)
264 .copied()
265 .collect::<Buffer<u16>>(),
266 Validity::from_iter(validity_pattern.iter().cycle().take(1024).copied()),
267 )
268 .into_array();
269
270 let rle_array = RLEArray::try_new(
271 values.clone(),
272 indices_with_validity,
273 values_idx_offsets,
274 0,
275 3,
276 )
277 .unwrap();
278
279 assert_eq!(rle_array.len(), 3);
280 assert_eq!(rle_array.values().len(), 2);
281 assert!(rle_array.is_valid(0));
282 assert!(!rle_array.is_valid(1));
283 assert!(rle_array.is_valid(2));
284 }
285
286 #[test]
287 fn test_all_valid() {
288 let values = PrimitiveArray::from_iter([10u32, 20, 30]).into_array();
289 let values_idx_offsets = PrimitiveArray::from_iter([0u64]).into_array();
290
291 let indices_pattern = [0u16, 1, 2, 0, 1];
292 let validity_pattern = [true, true, true, false, false];
293
294 let indices_with_validity = PrimitiveArray::new(
296 indices_pattern
297 .iter()
298 .cycle()
299 .take(1024)
300 .copied()
301 .collect::<Buffer<u16>>(),
302 Validity::from_iter(validity_pattern.iter().cycle().take(1024).copied()),
303 )
304 .into_array();
305
306 let rle_array = RLEArray::try_new(
307 values.clone(),
308 indices_with_validity,
309 values_idx_offsets,
310 0,
311 5,
312 )
313 .unwrap();
314
315 let valid_slice = rle_array.slice(0..3);
316 assert!(valid_slice.all_valid());
317
318 let mixed_slice = rle_array.slice(1..5);
319 assert!(!mixed_slice.all_valid());
320 }
321
322 #[test]
323 fn test_all_invalid() {
324 let values = PrimitiveArray::from_iter([10u32, 20, 30]).into_array();
325 let values_idx_offsets = PrimitiveArray::from_iter([0u64]).into_array();
326
327 let indices_pattern = [0u16, 1, 2, 0, 1];
329 let validity_pattern = [true, true, false, false, false];
330
331 let indices_with_validity = PrimitiveArray::new(
332 indices_pattern
333 .iter()
334 .cycle()
335 .take(1024)
336 .copied()
337 .collect::<Buffer<u16>>(),
338 Validity::from_iter(validity_pattern.iter().cycle().take(1024).copied()),
339 )
340 .into_array();
341
342 let rle_array = RLEArray::try_new(
343 values.clone(),
344 indices_with_validity,
345 values_idx_offsets,
346 0,
347 5,
348 )
349 .unwrap();
350
351 let invalid_slice = rle_array.slice(2..5);
352 assert!(invalid_slice.all_invalid());
353
354 let mixed_slice = rle_array.slice(1..4);
355 assert!(!mixed_slice.all_invalid());
356 }
357
358 #[test]
359 fn test_validity_mask() {
360 let values = PrimitiveArray::from_iter([10u32, 20, 30]).into_array();
361 let values_idx_offsets = PrimitiveArray::from_iter([0u64]).into_array();
362
363 let indices_pattern = [0u16, 1, 2, 0];
365 let validity_pattern = [true, false, true, false];
366
367 let indices_with_validity = PrimitiveArray::new(
368 indices_pattern
369 .iter()
370 .cycle()
371 .take(1024)
372 .copied()
373 .collect::<Buffer<u16>>(),
374 Validity::from_iter(validity_pattern.iter().cycle().take(1024).copied()),
375 )
376 .into_array();
377
378 let rle_array = RLEArray::try_new(
379 values.clone(),
380 indices_with_validity,
381 values_idx_offsets,
382 0,
383 4,
384 )
385 .unwrap();
386
387 let sliced_array = rle_array.slice(1..4);
388 let validity_mask = sliced_array.validity_mask();
389
390 let expected_mask = Validity::from_iter([false, true, false]).to_mask(3);
391 assert_eq!(validity_mask.len(), expected_mask.len());
392 assert_eq!(validity_mask, expected_mask);
393 }
394
395 #[test]
396 fn test_try_new_empty() {
397 let values = PrimitiveArray::from_iter(Vec::<u32>::new()).into_array();
398 let indices = PrimitiveArray::from_iter(Vec::<u16>::new()).into_array();
399 let values_idx_offsets = PrimitiveArray::from_iter(Vec::<u64>::new()).into_array();
400 let rle_array = RLEArray::try_new(
401 values,
402 indices.clone(),
403 values_idx_offsets,
404 0,
405 indices.len(),
406 )
407 .unwrap();
408
409 assert_eq!(rle_array.len(), 0);
410 assert_eq!(rle_array.values().len(), 0);
411 }
412
413 #[test]
414 fn test_multi_chunk_two_chunks() {
415 let values = PrimitiveArray::from_iter([10u32, 20, 30, 40]).into_array();
416 let indices = PrimitiveArray::from_iter([0u16, 1].repeat(1024)).into_array();
417 let values_idx_offsets = PrimitiveArray::from_iter([0u64, 2]).into_array();
418 let rle_array = RLEArray::try_new(values, indices, values_idx_offsets, 0, 2048).unwrap();
419
420 assert_eq!(rle_array.len(), 2048);
421 assert_eq!(rle_array.values().len(), 4);
422
423 assert_eq!(rle_array.values_idx_offset(0), 0);
424 assert_eq!(rle_array.values_idx_offset(1), 2);
425 }
426
427 #[test]
428 fn test_rle_serialization() {
429 let primitive = PrimitiveArray::from_iter((0..2048).map(|i| (i / 100) as u32));
430 let rle_array = RLEArray::encode(&primitive).unwrap();
431 assert_eq!(rle_array.len(), 2048);
432
433 let original_data = rle_array.to_primitive();
434 let original_values = original_data.as_slice::<u32>();
435
436 let ctx = ArrayContext::empty().with(RLEVTable.as_vtable());
437 let serialized = rle_array
438 .to_array()
439 .serialize(&ctx, &SerializeOptions::default())
440 .unwrap();
441
442 let mut concat = ByteBufferMut::empty();
443 for buf in serialized {
444 concat.extend_from_slice(buf.as_ref());
445 }
446 let concat = concat.freeze();
447
448 let parts = ArrayParts::try_from(concat).unwrap();
449 let decoded = parts
450 .decode(
451 &ctx,
452 &DType::Primitive(PType::U32, Nullability::NonNullable),
453 2048,
454 )
455 .unwrap();
456
457 let decoded_data = decoded.to_primitive();
458 let decoded_values = decoded_data.as_slice::<u32>();
459
460 assert_eq!(original_values, decoded_values);
461 }
462
463 #[test]
464 fn test_rle_serialization_slice() {
465 let primitive = PrimitiveArray::from_iter((0..2048).map(|i| (i / 100) as u32));
466 let rle_array = RLEArray::encode(&primitive).unwrap();
467 let sliced = rle_array.slice(100..200);
468 assert_eq!(sliced.len(), 100);
469
470 let ctx = ArrayContext::empty().with(RLEVTable.as_vtable());
471 let serialized = sliced
472 .serialize(&ctx, &SerializeOptions::default())
473 .unwrap();
474
475 let mut concat = ByteBufferMut::empty();
476 for buf in serialized {
477 concat.extend_from_slice(buf.as_ref());
478 }
479 let concat = concat.freeze();
480
481 let parts = ArrayParts::try_from(concat).unwrap();
482 let decoded = parts.decode(&ctx, sliced.dtype(), sliced.len()).unwrap();
483
484 let original_data = sliced.to_primitive();
485 let decoded_data = decoded.to_primitive();
486
487 let original_values = original_data.as_slice::<u32>();
488 let decoded_values = decoded_data.as_slice::<u32>();
489
490 assert_eq!(original_values, decoded_values);
491 }
492}