1use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::Canonical;
10use crate::ExecutionCtx;
11use crate::IntoArray;
12use crate::arrays::ConstantArray;
13use crate::arrays::ListViewArray;
14use crate::arrays::PrimitiveArray;
15use crate::arrays::listview::ListViewArrayExt;
16use crate::arrays::primitive::PrimitiveArrayExt;
17use crate::builders::builder_with_capacity;
18use crate::builtins::ArrayBuiltins;
19use crate::dtype::IntegerPType;
20use crate::dtype::Nullability;
21use crate::dtype::PType;
22use crate::match_each_integer_ptype;
23use crate::match_each_unsigned_integer_ptype;
24use crate::scalar::Scalar;
25use crate::scalar_fn::fns::operators::Operator;
26use crate::validity::Validity;
27
28fn rebuilt_offset_ptype(offsets_ptype: PType) -> PType {
34 match offsets_ptype {
35 PType::U8 | PType::U16 | PType::U32 => PType::U32,
36 PType::U64 => PType::U64,
37 PType::I8 | PType::I16 | PType::I32 => PType::I32,
38 PType::I64 => PType::I64,
39 _ => unreachable!("invalid offsets PType"),
40 }
41}
42
43pub const DEFAULT_REBUILD_DENSITY_THRESHOLD: f32 = 0.1;
52
53pub const DEFAULT_TRIM_ELEMENTS_THRESHOLD: f32 = 0.05;
62
63pub enum ListViewRebuildMode {
65 MakeZeroCopyToList,
73
74 TrimElements,
80
81 MakeExact,
88
89 OverlapCompression,
96}
97
98impl ListViewArray {
99 pub fn rebuild(
101 &self,
102 mode: ListViewRebuildMode,
103 ctx: &mut ExecutionCtx,
104 ) -> VortexResult<ListViewArray> {
105 if self.is_empty() {
106 return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
108 }
109
110 match mode {
111 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(ctx),
112 ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(ctx),
113 ListViewRebuildMode::MakeExact => self.rebuild_make_exact(ctx),
114 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
115 }
116 }
117
118 fn rebuild_zero_copy_to_list(&self, ctx: &mut ExecutionCtx) -> VortexResult<ListViewArray> {
126 if self.is_zero_copy_to_list() {
127 return Ok(self.clone());
129 }
130
131 let offsets_ptype = self.offsets().dtype().as_ptype();
132 let sizes_ptype = self.sizes().dtype().as_ptype();
133
134 match_each_unsigned_integer_ptype!(sizes_ptype.to_unsigned(), |S| {
142 match offsets_ptype.to_unsigned() {
143 PType::U8 => self.naive_rebuild::<u8, u32, S>(ctx),
144 PType::U16 => self.naive_rebuild::<u16, u32, S>(ctx),
145 PType::U32 => self.naive_rebuild::<u32, u32, S>(ctx),
146 PType::U64 => self.naive_rebuild::<u64, u64, S>(ctx),
147 _ => unreachable!("invalid offsets PType"),
148 }
149 })
150 }
151
152 fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
156 &self,
157 ctx: &mut ExecutionCtx,
158 ) -> VortexResult<ListViewArray> {
159 let sizes_canonical = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
160 let sizes_canonical =
161 sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
162 let total: u64 = sizes_canonical
163 .as_slice::<S>()
164 .iter()
165 .map(|s| (*s).as_() as u64)
166 .sum();
167 if Self::should_use_take(total, self.len()) {
168 self.rebuild_with_take::<O, NewOffset, S>(ctx)
169 } else {
170 self.rebuild_list_by_list::<O, NewOffset, S>(ctx)
171 }
172 }
173
174 fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
182 if num_lists == 0 {
183 return true;
184 }
185 let avg = total_output_elements / num_lists as u64;
186 avg < 128
187 }
188
189 fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
192 &self,
193 ctx: &mut ExecutionCtx,
194 ) -> VortexResult<ListViewArray> {
195 let new_offset_ptype = rebuilt_offset_ptype(self.offsets().dtype().as_ptype());
196 let size_ptype = self.sizes().dtype().as_ptype();
197
198 let offsets_canonical = self.offsets().clone().execute::<PrimitiveArray>(ctx)?;
199 let offsets_canonical =
200 offsets_canonical.reinterpret_cast(offsets_canonical.ptype().to_unsigned());
201 let offsets_slice = offsets_canonical.as_slice::<O>();
202 let sizes_canonical = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
203 let sizes_canonical =
204 sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
205 let sizes_slice = sizes_canonical.as_slice::<S>();
206
207 let len = offsets_slice.len();
208
209 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
210 let mut new_sizes = BufferMut::<S>::with_capacity(len);
211 let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
212
213 let validity = self.validity()?.execute_mask(len, ctx)?;
217
218 let mut n_elements = NewOffset::zero();
219 for index in 0..len {
220 if !validity.value(index) {
221 new_offsets.push(n_elements);
222 new_sizes.push(S::zero());
223 continue;
224 }
225
226 let offset = offsets_slice[index];
227 let size = sizes_slice[index];
228 let start = offset.as_();
229 let stop = start + size.as_();
230
231 new_offsets.push(n_elements);
232 new_sizes.push(size);
233 take_indices.extend(start as u64..stop as u64);
234 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
235 }
236
237 let elements = self.elements().take(take_indices.into_array())?;
238 let offsets = PrimitiveArray::new(new_offsets.freeze(), Validity::NonNullable)
240 .reinterpret_cast(new_offset_ptype)
241 .into_array();
242 let sizes = PrimitiveArray::new(new_sizes.freeze(), Validity::NonNullable)
243 .reinterpret_cast(size_ptype)
244 .into_array();
245
246 Ok(unsafe {
250 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
251 .with_zero_copy_to_list(true)
252 })
253 }
254
255 fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
258 &self,
259 ctx: &mut ExecutionCtx,
260 ) -> VortexResult<ListViewArray> {
261 let element_dtype = self
262 .dtype()
263 .as_list_element_opt()
264 .vortex_expect("somehow had a canonical list that was not a list");
265
266 let new_offset_ptype = rebuilt_offset_ptype(self.offsets().dtype().as_ptype());
267 let size_ptype = self.sizes().dtype().as_ptype();
268
269 let offsets_canonical = self.offsets().clone().execute::<PrimitiveArray>(ctx)?;
270 let offsets_canonical =
271 offsets_canonical.reinterpret_cast(offsets_canonical.ptype().to_unsigned());
272 let offsets_slice = offsets_canonical.as_slice::<O>();
273 let sizes_canonical = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
274 let sizes_canonical =
275 sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
276 let sizes_slice = sizes_canonical.as_slice::<S>();
277
278 let len = offsets_slice.len();
279
280 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
281 let mut new_sizes = BufferMut::<S>::with_capacity(len);
286
287 let elements_canonical = self
289 .elements()
290 .clone()
291 .execute::<Canonical>(ctx)?
292 .into_array();
293
294 let mut new_elements_builder =
297 builder_with_capacity(element_dtype.as_ref(), self.elements().len());
298
299 let validity = self.validity()?.execute_mask(len, ctx)?;
301
302 let mut n_elements = NewOffset::zero();
303 for index in 0..len {
304 if !validity.value(index) {
305 new_offsets.push(n_elements);
308 new_sizes.push(S::zero());
309 continue;
310 }
311
312 let offset = offsets_slice[index];
313 let size = sizes_slice[index];
314
315 let start = offset.as_();
316 let stop = start + size.as_();
317
318 new_offsets.push(n_elements);
319 new_sizes.push(size);
320 new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
321
322 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
323 }
324
325 let offsets = PrimitiveArray::new(new_offsets.freeze(), Validity::NonNullable)
327 .reinterpret_cast(new_offset_ptype)
328 .into_array();
329 let sizes = PrimitiveArray::new(new_sizes.freeze(), Validity::NonNullable)
330 .reinterpret_cast(size_ptype)
331 .into_array();
332 let elements = new_elements_builder.finish();
333
334 debug_assert_eq!(
335 n_elements.as_(),
336 elements.len(),
337 "The accumulated elements somehow had the wrong length"
338 );
339
340 Ok(unsafe {
349 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
350 .with_zero_copy_to_list(true)
351 })
352 }
353
354 fn rebuild_trim_elements(&self, ctx: &mut ExecutionCtx) -> VortexResult<ListViewArray> {
358 let (start, end) = self.referenced_element_bounds(ctx)?;
359
360 unsafe { self.trim_elements(start, end) }
362 }
363
364 pub unsafe fn trim_elements(&self, start: usize, end: usize) -> VortexResult<ListViewArray> {
373 let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
374 let offset = <O as FromPrimitive>::from_usize(start)
375 .vortex_expect("unable to convert the min offset `start` into a `usize`");
376 let scalar = Scalar::primitive(offset, Nullability::NonNullable);
377
378 self.offsets()
379 .clone()
380 .binary(
381 ConstantArray::new(scalar, self.offsets().len()).into_array(),
382 Operator::Sub,
383 )
384 .vortex_expect("was somehow unable to adjust offsets down by their minimum")
385 });
386
387 let sliced_elements = self.elements().slice(start..end)?;
388
389 Ok(unsafe {
394 ListViewArray::new_unchecked(
395 sliced_elements,
396 adjusted_offsets,
397 self.sizes().clone(),
398 self.validity()?,
399 )
400 .with_zero_copy_to_list(self.is_zero_copy_to_list())
401 })
402 }
403
404 fn rebuild_make_exact(&self, ctx: &mut ExecutionCtx) -> VortexResult<ListViewArray> {
405 if self.is_zero_copy_to_list() {
406 self.rebuild_trim_elements(ctx)
407 } else {
408 self.rebuild_zero_copy_to_list(ctx)
411 }
412 }
413}
414
415#[cfg(test)]
416mod tests {
417 use vortex_buffer::BitBuffer;
418 use vortex_error::VortexResult;
419
420 use super::super::tests::common::SESSION;
421 use super::ListViewRebuildMode;
422 use crate::IntoArray;
423 use crate::VortexSessionExecute;
424 use crate::arrays::ListViewArray;
425 use crate::arrays::PrimitiveArray;
426 use crate::arrays::listview::ListViewArrayExt;
427 use crate::assert_arrays_eq;
428 use crate::dtype::Nullability;
429 use crate::validity::Validity;
430
431 #[test]
432 fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
433 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
437 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
438 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
439
440 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
441
442 let mut ctx = SESSION.create_execution_ctx();
443 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList, &mut ctx)?;
444
445 assert_eq!(flattened.elements().len(), 5);
448
449 assert_eq!(flattened.offset_at(0), 0);
451 assert_eq!(flattened.size_at(0), 3);
452 assert_eq!(flattened.offset_at(1), 3);
453 assert_eq!(flattened.size_at(1), 2);
454
455 assert_arrays_eq!(
457 flattened.list_elements_at(0)?,
458 PrimitiveArray::from_iter([1i32, 2, 3])
459 );
460
461 assert_arrays_eq!(
462 flattened.list_elements_at(1)?,
463 PrimitiveArray::from_iter([2i32, 3])
464 );
465 Ok(())
466 }
467
468 #[test]
469 fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
470 use crate::arrays::BoolArray;
471
472 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
474 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
475 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
476 let validity = Validity::Array(
477 BoolArray::new(
478 BitBuffer::from(vec![true, false, true]),
479 Validity::NonNullable,
480 )
481 .into_array(),
482 );
483
484 let listview = ListViewArray::new(elements, offsets, sizes, validity);
485
486 let mut ctx = SESSION.create_execution_ctx();
487 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList, &mut ctx)?;
488
489 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
491 assert!(flattened.validity()?.execute_is_valid(0, &mut ctx)?);
492 assert!(!flattened.validity()?.execute_is_valid(1, &mut ctx)?);
493 assert!(flattened.validity()?.execute_is_valid(2, &mut ctx)?);
494
495 assert_arrays_eq!(
497 flattened.list_elements_at(0)?,
498 PrimitiveArray::from_iter([1i32, 2])
499 );
500
501 assert_arrays_eq!(
502 flattened.list_elements_at(2)?,
503 PrimitiveArray::from_iter([3i32])
504 );
505 Ok(())
506 }
507
508 #[test]
509 fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
510 let elements =
518 PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
519 let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
520 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
521
522 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
523
524 let mut ctx = SESSION.create_execution_ctx();
525 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements, &mut ctx)?;
526
527 assert_eq!(trimmed.elements().len(), 5);
529
530 assert_eq!(trimmed.offset_at(0), 0);
532 assert_eq!(trimmed.size_at(0), 2);
533 assert_eq!(trimmed.offset_at(1), 3);
534 assert_eq!(trimmed.size_at(1), 2);
535
536 assert_arrays_eq!(
538 trimmed.list_elements_at(0)?,
539 PrimitiveArray::from_iter([1i32, 2])
540 );
541
542 assert_arrays_eq!(
543 trimmed.list_elements_at(1)?,
544 PrimitiveArray::from_iter([3i32, 4])
545 );
546
547 let all_elements = trimmed
549 .elements()
550 .clone()
551 .execute::<PrimitiveArray>(&mut ctx)?;
552 assert_eq!(all_elements.execute_scalar(2, &mut ctx)?, 97i32.into());
553 Ok(())
554 }
555
556 #[test]
557 fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
558 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
564 let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
565 let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
566 let validity = Validity::from_iter(vec![true, true, false, false]);
567
568 let listview = ListViewArray::new(elements, offsets, sizes, validity);
569
570 let mut ctx = SESSION.create_execution_ctx();
572 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList, &mut ctx)?;
573 assert!(rebuilt.is_zero_copy_to_list());
574
575 assert_eq!(rebuilt.offset_at(0), 0);
578 assert_eq!(rebuilt.offset_at(1), 2);
579 assert_eq!(rebuilt.offset_at(2), 4); assert_eq!(rebuilt.offset_at(3), 4); assert_eq!(rebuilt.size_at(0), 2);
584 assert_eq!(rebuilt.size_at(1), 2);
585 assert_eq!(rebuilt.size_at(2), 0); assert_eq!(rebuilt.size_at(3), 0); let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact, &mut ctx)?;
591
592 assert!(exact.is_valid(0, &mut ctx)?);
594 assert!(exact.is_valid(1, &mut ctx)?);
595 assert!(!exact.is_valid(2, &mut ctx)?);
596 assert!(!exact.is_valid(3, &mut ctx)?);
597
598 assert_arrays_eq!(
600 exact.list_elements_at(0)?,
601 PrimitiveArray::from_iter([1i32, 2])
602 );
603
604 assert_arrays_eq!(
605 exact.list_elements_at(1)?,
606 PrimitiveArray::from_iter([3i32, 4])
607 );
608 Ok(())
609 }
610
611 #[test]
614 fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
615 let mut elems = vec![0i32; 70_005];
616 elems[70_000] = 10;
617 elems[70_001] = 20;
618 elems[70_002] = 30;
619 elems[70_003] = 40;
620 let elements = PrimitiveArray::from_iter(elems).into_array();
621 let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
622 let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
623
624 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
625 let mut ctx = SESSION.create_execution_ctx();
626 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements, &mut ctx)?;
627 assert_arrays_eq!(
628 trimmed.list_elements_at(1)?,
629 PrimitiveArray::from_iter([30i32, 40])
630 );
631 Ok(())
632 }
633
634 #[test]
637 fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
638 let mut elems = vec![0i32; 70_001];
639 elems[3] = 30;
640 elems[4] = 40;
641 let elements = PrimitiveArray::from_iter(elems).into_array();
642 let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
643 let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
644
645 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
646 let mut ctx = SESSION.create_execution_ctx();
647 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements, &mut ctx)?;
648 assert_arrays_eq!(
649 trimmed.list_elements_at(1)?,
650 PrimitiveArray::from_iter([30i32, 40])
651 );
652 Ok(())
653 }
654
655 #[test]
658 fn test_rebuild_preserves_signed_offset_and_size_types() -> VortexResult<()> {
659 use crate::dtype::PType;
660
661 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
663 let offsets = PrimitiveArray::from_iter(vec![0i32, 1]).into_array();
664 let sizes = PrimitiveArray::from_iter(vec![3i16, 2]).into_array();
665
666 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
667 let mut ctx = SESSION.create_execution_ctx();
668 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList, &mut ctx)?;
669
670 assert_arrays_eq!(
672 rebuilt.list_elements_at(0)?,
673 PrimitiveArray::from_iter([1i32, 2, 3])
674 );
675 assert_arrays_eq!(
676 rebuilt.list_elements_at(1)?,
677 PrimitiveArray::from_iter([2i32, 3])
678 );
679
680 assert_eq!(rebuilt.offsets().dtype().as_ptype(), PType::I32);
682 assert_eq!(rebuilt.sizes().dtype().as_ptype(), PType::I16);
683 Ok(())
684 }
685
686 #[test]
689 fn heuristic_zero_lists_uses_take() {
690 assert!(ListViewArray::should_use_take(0, 0));
691 }
692
693 #[test]
694 fn heuristic_small_lists_use_take() {
695 assert!(ListViewArray::should_use_take(127_000, 1_000));
697 assert!(!ListViewArray::should_use_take(128_000, 1_000));
699 }
700
701 #[test]
704 fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
705 let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
706 let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
707 let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
708
709 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
710 let mut ctx = SESSION.create_execution_ctx();
711 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements, &mut ctx)?;
712
713 assert_arrays_eq!(trimmed, listview);
715 Ok(())
716 }
717}