1use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::Canonical;
10use crate::ExecutionCtx;
11use crate::IntoArray;
12use crate::arrays::ConstantArray;
13use crate::arrays::ListViewArray;
14use crate::arrays::PrimitiveArray;
15use crate::arrays::listview::ListViewArrayExt;
16use crate::arrays::primitive::PrimitiveArrayExt;
17use crate::builders::builder_with_capacity;
18use crate::builtins::ArrayBuiltins;
19use crate::dtype::IntegerPType;
20use crate::dtype::Nullability;
21use crate::dtype::PType;
22use crate::match_each_integer_ptype;
23use crate::match_each_unsigned_integer_ptype;
24use crate::scalar::Scalar;
25use crate::scalar_fn::fns::operators::Operator;
26use crate::validity::Validity;
27
28fn rebuilt_offset_ptype(offsets_ptype: PType) -> PType {
34 match offsets_ptype {
35 PType::U8 | PType::U16 | PType::U32 => PType::U32,
36 PType::U64 => PType::U64,
37 PType::I8 | PType::I16 | PType::I32 => PType::I32,
38 PType::I64 => PType::I64,
39 _ => unreachable!("invalid offsets PType"),
40 }
41}
42
43pub const DEFAULT_REBUILD_DENSITY_THRESHOLD: f32 = 0.1;
52
53pub const DEFAULT_TRIM_ELEMENTS_THRESHOLD: f32 = 0.05;
62
63pub enum ListViewRebuildMode {
65 MakeZeroCopyToList,
73
74 TrimElements,
80
81 MakeExact,
88
89 OverlapCompression,
96}
97
98impl ListViewArray {
99 pub fn rebuild(
101 &self,
102 mode: ListViewRebuildMode,
103 ctx: &mut ExecutionCtx,
104 ) -> VortexResult<ListViewArray> {
105 if self.is_empty() {
106 return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
108 }
109
110 match mode {
111 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(ctx),
112 ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(ctx),
113 ListViewRebuildMode::MakeExact => self.rebuild_make_exact(ctx),
114 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
115 }
116 }
117
118 fn rebuild_zero_copy_to_list(&self, ctx: &mut ExecutionCtx) -> VortexResult<ListViewArray> {
126 if self.is_zero_copy_to_list() {
127 return Ok(self.clone());
129 }
130
131 let offsets_ptype = self.offsets().dtype().as_ptype();
132 let sizes_ptype = self.sizes().dtype().as_ptype();
133
134 match_each_unsigned_integer_ptype!(sizes_ptype.to_unsigned(), |S| {
142 match offsets_ptype.to_unsigned() {
143 PType::U8 => self.naive_rebuild::<u8, u32, S>(ctx),
144 PType::U16 => self.naive_rebuild::<u16, u32, S>(ctx),
145 PType::U32 => self.naive_rebuild::<u32, u32, S>(ctx),
146 PType::U64 => self.naive_rebuild::<u64, u64, S>(ctx),
147 _ => unreachable!("invalid offsets PType"),
148 }
149 })
150 }
151
152 fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
156 &self,
157 ctx: &mut ExecutionCtx,
158 ) -> VortexResult<ListViewArray> {
159 let sizes_canonical = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
160 let sizes_canonical =
161 sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
162 let total: u64 = sizes_canonical
163 .as_slice::<S>()
164 .iter()
165 .map(|s| (*s).as_() as u64)
166 .sum();
167 if Self::should_use_take(total, self.len()) {
168 self.rebuild_with_take::<O, NewOffset, S>(ctx)
169 } else {
170 self.rebuild_list_by_list::<O, NewOffset, S>(ctx)
171 }
172 }
173
174 fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
182 if num_lists == 0 {
183 return true;
184 }
185 let avg = total_output_elements / num_lists as u64;
186 avg < 128
187 }
188
189 fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
192 &self,
193 ctx: &mut ExecutionCtx,
194 ) -> VortexResult<ListViewArray> {
195 let new_offset_ptype = rebuilt_offset_ptype(self.offsets().dtype().as_ptype());
196 let size_ptype = self.sizes().dtype().as_ptype();
197
198 let offsets_canonical = self.offsets().clone().execute::<PrimitiveArray>(ctx)?;
199 let offsets_canonical =
200 offsets_canonical.reinterpret_cast(offsets_canonical.ptype().to_unsigned());
201 let offsets_slice = offsets_canonical.as_slice::<O>();
202 let sizes_canonical = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
203 let sizes_canonical =
204 sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
205 let sizes_slice = sizes_canonical.as_slice::<S>();
206
207 let len = offsets_slice.len();
208
209 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
210 let mut new_sizes = BufferMut::<S>::with_capacity(len);
211 let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
212
213 let validity = self.validity()?.execute_mask(len, ctx)?;
217
218 let mut n_elements = NewOffset::zero();
219 for index in 0..len {
220 if !validity.value(index) {
221 new_offsets.push(n_elements);
222 new_sizes.push(S::zero());
223 continue;
224 }
225
226 let offset = offsets_slice[index];
227 let size = sizes_slice[index];
228 let start = offset.as_();
229 let stop = start + size.as_();
230
231 new_offsets.push(n_elements);
232 new_sizes.push(size);
233 take_indices.extend(start as u64..stop as u64);
234 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
235 }
236
237 let elements = self.elements().take(take_indices.into_array())?;
238 let offsets = PrimitiveArray::new(new_offsets.freeze(), Validity::NonNullable)
240 .reinterpret_cast(new_offset_ptype)
241 .into_array();
242 let sizes = PrimitiveArray::new(new_sizes.freeze(), Validity::NonNullable)
243 .reinterpret_cast(size_ptype)
244 .into_array();
245
246 Ok(unsafe {
250 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
251 .with_zero_copy_to_list(true)
252 })
253 }
254
255 fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
258 &self,
259 ctx: &mut ExecutionCtx,
260 ) -> VortexResult<ListViewArray> {
261 let element_dtype = self
262 .dtype()
263 .as_list_element_opt()
264 .vortex_expect("somehow had a canonical list that was not a list");
265
266 let new_offset_ptype = rebuilt_offset_ptype(self.offsets().dtype().as_ptype());
267 let size_ptype = self.sizes().dtype().as_ptype();
268
269 let offsets_canonical = self.offsets().clone().execute::<PrimitiveArray>(ctx)?;
270 let offsets_canonical =
271 offsets_canonical.reinterpret_cast(offsets_canonical.ptype().to_unsigned());
272 let offsets_slice = offsets_canonical.as_slice::<O>();
273 let sizes_canonical = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
274 let sizes_canonical =
275 sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
276 let sizes_slice = sizes_canonical.as_slice::<S>();
277
278 let len = offsets_slice.len();
279
280 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
281 let mut new_sizes = BufferMut::<S>::with_capacity(len);
286
287 let elements_canonical = self
289 .elements()
290 .clone()
291 .execute::<Canonical>(ctx)?
292 .into_array();
293
294 let mut new_elements_builder =
297 builder_with_capacity(element_dtype.as_ref(), self.elements().len());
298
299 let validity = self.validity()?.execute_mask(len, ctx)?;
301
302 let mut n_elements = NewOffset::zero();
303 for index in 0..len {
304 if !validity.value(index) {
305 new_offsets.push(n_elements);
308 new_sizes.push(S::zero());
309 continue;
310 }
311
312 let offset = offsets_slice[index];
313 let size = sizes_slice[index];
314
315 let start = offset.as_();
316 let stop = start + size.as_();
317
318 new_offsets.push(n_elements);
319 new_sizes.push(size);
320 new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
321
322 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
323 }
324
325 let offsets = PrimitiveArray::new(new_offsets.freeze(), Validity::NonNullable)
327 .reinterpret_cast(new_offset_ptype)
328 .into_array();
329 let sizes = PrimitiveArray::new(new_sizes.freeze(), Validity::NonNullable)
330 .reinterpret_cast(size_ptype)
331 .into_array();
332 let elements = new_elements_builder.finish();
333
334 debug_assert_eq!(
335 n_elements.as_(),
336 elements.len(),
337 "The accumulated elements somehow had the wrong length"
338 );
339
340 Ok(unsafe {
349 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
350 .with_zero_copy_to_list(true)
351 })
352 }
353
354 fn rebuild_trim_elements(&self, ctx: &mut ExecutionCtx) -> VortexResult<ListViewArray> {
358 let (start, end) = self.referenced_element_bounds(ctx)?;
359
360 unsafe { self.trim_elements(start, end) }
362 }
363
364 pub unsafe fn trim_elements(&self, start: usize, end: usize) -> VortexResult<ListViewArray> {
373 let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
374 let offset = <O as FromPrimitive>::from_usize(start)
375 .vortex_expect("unable to convert the min offset `start` into a `usize`");
376 let scalar = Scalar::primitive(offset, Nullability::NonNullable);
377
378 self.offsets()
379 .clone()
380 .binary(
381 ConstantArray::new(scalar, self.offsets().len()).into_array(),
382 Operator::Sub,
383 )
384 .vortex_expect("was somehow unable to adjust offsets down by their minimum")
385 });
386
387 let sliced_elements = self.elements().slice(start..end)?;
388
389 Ok(unsafe {
394 ListViewArray::new_unchecked(
395 sliced_elements,
396 adjusted_offsets,
397 self.sizes().clone(),
398 self.validity()?,
399 )
400 .with_zero_copy_to_list(self.is_zero_copy_to_list())
401 })
402 }
403
404 fn rebuild_make_exact(&self, ctx: &mut ExecutionCtx) -> VortexResult<ListViewArray> {
405 if self.is_zero_copy_to_list() {
406 self.rebuild_trim_elements(ctx)
407 } else {
408 self.rebuild_zero_copy_to_list(ctx)
411 }
412 }
413}
414
415#[cfg(test)]
416mod tests {
417 use vortex_buffer::BitBuffer;
418 use vortex_error::VortexResult;
419
420 use super::super::tests::common::SESSION;
421 use super::ListViewRebuildMode;
422 use crate::IntoArray;
423 use crate::VortexSessionExecute;
424 use crate::arrays::ListViewArray;
425 use crate::arrays::PrimitiveArray;
426 use crate::arrays::listview::ListViewArrayExt;
427 use crate::assert_arrays_eq;
428 use crate::dtype::Nullability;
429 use crate::validity::Validity;
430
431 #[test]
432 fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
433 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
437 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
438 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
439
440 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
441
442 let mut ctx = SESSION.create_execution_ctx();
443 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList, &mut ctx)?;
444
445 assert_eq!(flattened.elements().len(), 5);
448
449 assert_eq!(flattened.offset_at(0), 0);
451 assert_eq!(flattened.size_at(0), 3);
452 assert_eq!(flattened.offset_at(1), 3);
453 assert_eq!(flattened.size_at(1), 2);
454
455 assert_arrays_eq!(
457 flattened.list_elements_at(0)?,
458 PrimitiveArray::from_iter([1i32, 2, 3]),
459 &mut ctx
460 );
461
462 assert_arrays_eq!(
463 flattened.list_elements_at(1)?,
464 PrimitiveArray::from_iter([2i32, 3]),
465 &mut ctx
466 );
467 Ok(())
468 }
469
470 #[test]
471 fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
472 use crate::arrays::BoolArray;
473
474 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
476 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
477 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
478 let validity = Validity::Array(
479 BoolArray::new(
480 BitBuffer::from(vec![true, false, true]),
481 Validity::NonNullable,
482 )
483 .into_array(),
484 );
485
486 let listview = ListViewArray::new(elements, offsets, sizes, validity);
487
488 let mut ctx = SESSION.create_execution_ctx();
489 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList, &mut ctx)?;
490
491 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
493 assert!(flattened.validity()?.execute_is_valid(0, &mut ctx)?);
494 assert!(!flattened.validity()?.execute_is_valid(1, &mut ctx)?);
495 assert!(flattened.validity()?.execute_is_valid(2, &mut ctx)?);
496
497 assert_arrays_eq!(
499 flattened.list_elements_at(0)?,
500 PrimitiveArray::from_iter([1i32, 2]),
501 &mut ctx
502 );
503
504 assert_arrays_eq!(
505 flattened.list_elements_at(2)?,
506 PrimitiveArray::from_iter([3i32]),
507 &mut ctx
508 );
509 Ok(())
510 }
511
512 #[test]
513 fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
514 let elements =
522 PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
523 let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
524 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
525
526 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
527
528 let mut ctx = SESSION.create_execution_ctx();
529 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements, &mut ctx)?;
530
531 assert_eq!(trimmed.elements().len(), 5);
533
534 assert_eq!(trimmed.offset_at(0), 0);
536 assert_eq!(trimmed.size_at(0), 2);
537 assert_eq!(trimmed.offset_at(1), 3);
538 assert_eq!(trimmed.size_at(1), 2);
539
540 assert_arrays_eq!(
542 trimmed.list_elements_at(0)?,
543 PrimitiveArray::from_iter([1i32, 2]),
544 &mut ctx
545 );
546
547 assert_arrays_eq!(
548 trimmed.list_elements_at(1)?,
549 PrimitiveArray::from_iter([3i32, 4]),
550 &mut ctx
551 );
552
553 let all_elements = trimmed
555 .elements()
556 .clone()
557 .execute::<PrimitiveArray>(&mut ctx)?;
558 assert_eq!(all_elements.execute_scalar(2, &mut ctx)?, 97i32.into());
559 Ok(())
560 }
561
562 #[test]
563 fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
564 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
570 let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
571 let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
572 let validity = Validity::from_iter(vec![true, true, false, false]);
573
574 let listview = ListViewArray::new(elements, offsets, sizes, validity);
575
576 let mut ctx = SESSION.create_execution_ctx();
578 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList, &mut ctx)?;
579 assert!(rebuilt.is_zero_copy_to_list());
580
581 assert_eq!(rebuilt.offset_at(0), 0);
584 assert_eq!(rebuilt.offset_at(1), 2);
585 assert_eq!(rebuilt.offset_at(2), 4); assert_eq!(rebuilt.offset_at(3), 4); assert_eq!(rebuilt.size_at(0), 2);
590 assert_eq!(rebuilt.size_at(1), 2);
591 assert_eq!(rebuilt.size_at(2), 0); assert_eq!(rebuilt.size_at(3), 0); let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact, &mut ctx)?;
597
598 assert!(exact.is_valid(0, &mut ctx)?);
600 assert!(exact.is_valid(1, &mut ctx)?);
601 assert!(!exact.is_valid(2, &mut ctx)?);
602 assert!(!exact.is_valid(3, &mut ctx)?);
603
604 assert_arrays_eq!(
606 exact.list_elements_at(0)?,
607 PrimitiveArray::from_iter([1i32, 2]),
608 &mut ctx
609 );
610
611 assert_arrays_eq!(
612 exact.list_elements_at(1)?,
613 PrimitiveArray::from_iter([3i32, 4]),
614 &mut ctx
615 );
616 Ok(())
617 }
618
619 #[test]
622 fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
623 let mut elems = vec![0i32; 70_005];
624 elems[70_000] = 10;
625 elems[70_001] = 20;
626 elems[70_002] = 30;
627 elems[70_003] = 40;
628 let elements = PrimitiveArray::from_iter(elems).into_array();
629 let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
630 let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
631
632 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
633 let mut ctx = SESSION.create_execution_ctx();
634 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements, &mut ctx)?;
635 assert_arrays_eq!(
636 trimmed.list_elements_at(1)?,
637 PrimitiveArray::from_iter([30i32, 40]),
638 &mut ctx
639 );
640 Ok(())
641 }
642
643 #[test]
646 fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
647 let mut elems = vec![0i32; 70_001];
648 elems[3] = 30;
649 elems[4] = 40;
650 let elements = PrimitiveArray::from_iter(elems).into_array();
651 let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
652 let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
653
654 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
655 let mut ctx = SESSION.create_execution_ctx();
656 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements, &mut ctx)?;
657 assert_arrays_eq!(
658 trimmed.list_elements_at(1)?,
659 PrimitiveArray::from_iter([30i32, 40]),
660 &mut ctx
661 );
662 Ok(())
663 }
664
665 #[test]
668 fn test_rebuild_preserves_signed_offset_and_size_types() -> VortexResult<()> {
669 use crate::dtype::PType;
670
671 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
673 let offsets = PrimitiveArray::from_iter(vec![0i32, 1]).into_array();
674 let sizes = PrimitiveArray::from_iter(vec![3i16, 2]).into_array();
675
676 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
677 let mut ctx = SESSION.create_execution_ctx();
678 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList, &mut ctx)?;
679
680 assert_arrays_eq!(
682 rebuilt.list_elements_at(0)?,
683 PrimitiveArray::from_iter([1i32, 2, 3]),
684 &mut ctx
685 );
686 assert_arrays_eq!(
687 rebuilt.list_elements_at(1)?,
688 PrimitiveArray::from_iter([2i32, 3]),
689 &mut ctx
690 );
691
692 assert_eq!(rebuilt.offsets().dtype().as_ptype(), PType::I32);
694 assert_eq!(rebuilt.sizes().dtype().as_ptype(), PType::I16);
695 Ok(())
696 }
697
698 #[test]
701 fn heuristic_zero_lists_uses_take() {
702 assert!(ListViewArray::should_use_take(0, 0));
703 }
704
705 #[test]
706 fn heuristic_small_lists_use_take() {
707 assert!(ListViewArray::should_use_take(127_000, 1_000));
709 assert!(!ListViewArray::should_use_take(128_000, 1_000));
711 }
712
713 #[test]
716 fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
717 let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
718 let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
719 let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
720
721 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
722 let mut ctx = SESSION.create_execution_ctx();
723 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements, &mut ctx)?;
724
725 assert_arrays_eq!(trimmed, listview, &mut ctx);
727 Ok(())
728 }
729}