1use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::IntoArray;
10use crate::LEGACY_SESSION;
11#[expect(deprecated)]
12use crate::ToCanonical as _;
13use crate::VortexSessionExecute;
14use crate::arrays::ConstantArray;
15use crate::arrays::ListViewArray;
16use crate::arrays::PrimitiveArray;
17use crate::arrays::listview::ListViewArrayExt;
18use crate::arrays::primitive::PrimitiveArrayExt;
19use crate::builders::builder_with_capacity;
20use crate::builtins::ArrayBuiltins;
21use crate::dtype::IntegerPType;
22use crate::dtype::Nullability;
23use crate::dtype::PType;
24use crate::match_each_integer_ptype;
25use crate::match_each_unsigned_integer_ptype;
26use crate::scalar::Scalar;
27use crate::scalar_fn::fns::operators::Operator;
28use crate::validity::Validity;
29
30fn rebuilt_offset_ptype(offsets_ptype: PType) -> PType {
36 match offsets_ptype {
37 PType::U8 | PType::U16 | PType::U32 => PType::U32,
38 PType::U64 => PType::U64,
39 PType::I8 | PType::I16 | PType::I32 => PType::I32,
40 PType::I64 => PType::I64,
41 _ => unreachable!("invalid offsets PType"),
42 }
43}
44
45pub const DEFAULT_REBUILD_DENSITY_THRESHOLD: f32 = 0.1;
54
55pub const DEFAULT_TRIM_ELEMENTS_THRESHOLD: f32 = 0.05;
64
65pub enum ListViewRebuildMode {
67 MakeZeroCopyToList,
75
76 TrimElements,
82
83 MakeExact,
90
91 OverlapCompression,
98}
99
100impl ListViewArray {
101 pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
103 if self.is_empty() {
104 return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
106 }
107
108 match mode {
109 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
110 ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
111 ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
112 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
113 }
114 }
115
116 fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
124 if self.is_zero_copy_to_list() {
125 return Ok(self.clone());
127 }
128
129 let offsets_ptype = self.offsets().dtype().as_ptype();
130 let sizes_ptype = self.sizes().dtype().as_ptype();
131
132 match_each_unsigned_integer_ptype!(sizes_ptype.to_unsigned(), |S| {
140 match offsets_ptype.to_unsigned() {
141 PType::U8 => self.naive_rebuild::<u8, u32, S>(),
142 PType::U16 => self.naive_rebuild::<u16, u32, S>(),
143 PType::U32 => self.naive_rebuild::<u32, u32, S>(),
144 PType::U64 => self.naive_rebuild::<u64, u64, S>(),
145 _ => unreachable!("invalid offsets PType"),
146 }
147 })
148 }
149
150 fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
154 &self,
155 ) -> VortexResult<ListViewArray> {
156 #[expect(deprecated)]
157 let sizes_canonical = self.sizes().to_primitive();
158 let sizes_canonical =
159 sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
160 let total: u64 = sizes_canonical
161 .as_slice::<S>()
162 .iter()
163 .map(|s| (*s).as_() as u64)
164 .sum();
165 if Self::should_use_take(total, self.len()) {
166 self.rebuild_with_take::<O, NewOffset, S>()
167 } else {
168 self.rebuild_list_by_list::<O, NewOffset, S>()
169 }
170 }
171
172 fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
180 if num_lists == 0 {
181 return true;
182 }
183 let avg = total_output_elements / num_lists as u64;
184 avg < 128
185 }
186
187 fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
190 &self,
191 ) -> VortexResult<ListViewArray> {
192 let new_offset_ptype = rebuilt_offset_ptype(self.offsets().dtype().as_ptype());
193 let size_ptype = self.sizes().dtype().as_ptype();
194
195 #[expect(deprecated)]
196 let offsets_canonical = self.offsets().to_primitive();
197 let offsets_canonical =
198 offsets_canonical.reinterpret_cast(offsets_canonical.ptype().to_unsigned());
199 let offsets_slice = offsets_canonical.as_slice::<O>();
200 #[expect(deprecated)]
201 let sizes_canonical = self.sizes().to_primitive();
202 let sizes_canonical =
203 sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
204 let sizes_slice = sizes_canonical.as_slice::<S>();
205
206 let len = offsets_slice.len();
207
208 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
209 let mut new_sizes = BufferMut::<S>::with_capacity(len);
210 let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
211
212 let mut n_elements = NewOffset::zero();
213 for index in 0..len {
214 if !self.validity()?.is_valid(index)? {
215 new_offsets.push(n_elements);
216 new_sizes.push(S::zero());
217 continue;
218 }
219
220 let offset = offsets_slice[index];
221 let size = sizes_slice[index];
222 let start = offset.as_();
223 let stop = start + size.as_();
224
225 new_offsets.push(n_elements);
226 new_sizes.push(size);
227 take_indices.extend(start as u64..stop as u64);
228 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
229 }
230
231 let elements = self.elements().take(take_indices.into_array())?;
232 let offsets = PrimitiveArray::new(new_offsets.freeze(), Validity::NonNullable)
234 .reinterpret_cast(new_offset_ptype)
235 .into_array();
236 let sizes = PrimitiveArray::new(new_sizes.freeze(), Validity::NonNullable)
237 .reinterpret_cast(size_ptype)
238 .into_array();
239
240 Ok(unsafe {
244 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
245 .with_zero_copy_to_list(true)
246 })
247 }
248
249 fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
252 &self,
253 ) -> VortexResult<ListViewArray> {
254 let element_dtype = self
255 .dtype()
256 .as_list_element_opt()
257 .vortex_expect("somehow had a canonical list that was not a list");
258
259 let new_offset_ptype = rebuilt_offset_ptype(self.offsets().dtype().as_ptype());
260 let size_ptype = self.sizes().dtype().as_ptype();
261
262 #[expect(deprecated)]
263 let offsets_canonical = self.offsets().to_primitive();
264 let offsets_canonical =
265 offsets_canonical.reinterpret_cast(offsets_canonical.ptype().to_unsigned());
266 let offsets_slice = offsets_canonical.as_slice::<O>();
267 #[expect(deprecated)]
268 let sizes_canonical = self.sizes().to_primitive();
269 let sizes_canonical =
270 sizes_canonical.reinterpret_cast(sizes_canonical.ptype().to_unsigned());
271 let sizes_slice = sizes_canonical.as_slice::<S>();
272
273 let len = offsets_slice.len();
274
275 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
276 let mut new_sizes = BufferMut::<S>::with_capacity(len);
281
282 #[expect(deprecated)]
284 let elements_canonical = self
285 .elements()
286 .to_canonical()
287 .vortex_expect("canonicalize elements for rebuild")
288 .into_array();
289
290 let mut new_elements_builder =
293 builder_with_capacity(element_dtype.as_ref(), self.elements().len());
294
295 let mut n_elements = NewOffset::zero();
296 for index in 0..len {
297 if !self.validity()?.is_valid(index)? {
298 new_offsets.push(n_elements);
301 new_sizes.push(S::zero());
302 continue;
303 }
304
305 let offset = offsets_slice[index];
306 let size = sizes_slice[index];
307
308 let start = offset.as_();
309 let stop = start + size.as_();
310
311 new_offsets.push(n_elements);
312 new_sizes.push(size);
313 new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
314
315 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
316 }
317
318 let offsets = PrimitiveArray::new(new_offsets.freeze(), Validity::NonNullable)
320 .reinterpret_cast(new_offset_ptype)
321 .into_array();
322 let sizes = PrimitiveArray::new(new_sizes.freeze(), Validity::NonNullable)
323 .reinterpret_cast(size_ptype)
324 .into_array();
325 let elements = new_elements_builder.finish();
326
327 debug_assert_eq!(
328 n_elements.as_(),
329 elements.len(),
330 "The accumulated elements somehow had the wrong length"
331 );
332
333 Ok(unsafe {
342 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
343 .with_zero_copy_to_list(true)
344 })
345 }
346
347 fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
351 let mut ctx = LEGACY_SESSION.create_execution_ctx();
352 let (start, end) = self.referenced_element_bounds(&mut ctx)?;
353
354 unsafe { self.trim_elements(start, end) }
356 }
357
358 pub unsafe fn trim_elements(&self, start: usize, end: usize) -> VortexResult<ListViewArray> {
367 let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
368 let offset = <O as FromPrimitive>::from_usize(start)
369 .vortex_expect("unable to convert the min offset `start` into a `usize`");
370 let scalar = Scalar::primitive(offset, Nullability::NonNullable);
371
372 self.offsets()
373 .clone()
374 .binary(
375 ConstantArray::new(scalar, self.offsets().len()).into_array(),
376 Operator::Sub,
377 )
378 .vortex_expect("was somehow unable to adjust offsets down by their minimum")
379 });
380
381 let sliced_elements = self.elements().slice(start..end)?;
382
383 Ok(unsafe {
388 ListViewArray::new_unchecked(
389 sliced_elements,
390 adjusted_offsets,
391 self.sizes().clone(),
392 self.validity()?,
393 )
394 .with_zero_copy_to_list(self.is_zero_copy_to_list())
395 })
396 }
397
398 fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
399 if self.is_zero_copy_to_list() {
400 self.rebuild_trim_elements()
401 } else {
402 self.rebuild_zero_copy_to_list()
405 }
406 }
407}
408
409#[cfg(test)]
410mod tests {
411 use vortex_buffer::BitBuffer;
412 use vortex_error::VortexResult;
413
414 use super::ListViewRebuildMode;
415 use crate::IntoArray;
416 use crate::LEGACY_SESSION;
417 #[expect(deprecated)]
418 use crate::ToCanonical as _;
419 use crate::VortexSessionExecute;
420 use crate::arrays::ListViewArray;
421 use crate::arrays::PrimitiveArray;
422 use crate::arrays::listview::ListViewArrayExt;
423 use crate::assert_arrays_eq;
424 use crate::dtype::Nullability;
425 use crate::validity::Validity;
426
427 #[test]
428 fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
429 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
433 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
434 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
435
436 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
437
438 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
439
440 assert_eq!(flattened.elements().len(), 5);
443
444 assert_eq!(flattened.offset_at(0), 0);
446 assert_eq!(flattened.size_at(0), 3);
447 assert_eq!(flattened.offset_at(1), 3);
448 assert_eq!(flattened.size_at(1), 2);
449
450 assert_arrays_eq!(
452 flattened.list_elements_at(0)?,
453 PrimitiveArray::from_iter([1i32, 2, 3])
454 );
455
456 assert_arrays_eq!(
457 flattened.list_elements_at(1)?,
458 PrimitiveArray::from_iter([2i32, 3])
459 );
460 Ok(())
461 }
462
463 #[test]
464 fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
465 use crate::arrays::BoolArray;
466
467 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
469 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
470 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
471 let validity = Validity::Array(
472 BoolArray::new(
473 BitBuffer::from(vec![true, false, true]),
474 Validity::NonNullable,
475 )
476 .into_array(),
477 );
478
479 let listview = ListViewArray::new(elements, offsets, sizes, validity);
480
481 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
482
483 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
485 assert!(flattened.validity()?.is_valid(0)?);
486 assert!(!flattened.validity()?.is_valid(1)?);
487 assert!(flattened.validity()?.is_valid(2)?);
488
489 assert_arrays_eq!(
491 flattened.list_elements_at(0)?,
492 PrimitiveArray::from_iter([1i32, 2])
493 );
494
495 assert_arrays_eq!(
496 flattened.list_elements_at(2)?,
497 PrimitiveArray::from_iter([3i32])
498 );
499 Ok(())
500 }
501
502 #[test]
503 fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
504 let elements =
512 PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
513 let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
514 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
515
516 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
517
518 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
519
520 assert_eq!(trimmed.elements().len(), 5);
522
523 assert_eq!(trimmed.offset_at(0), 0);
525 assert_eq!(trimmed.size_at(0), 2);
526 assert_eq!(trimmed.offset_at(1), 3);
527 assert_eq!(trimmed.size_at(1), 2);
528
529 assert_arrays_eq!(
531 trimmed.list_elements_at(0)?,
532 PrimitiveArray::from_iter([1i32, 2])
533 );
534
535 assert_arrays_eq!(
536 trimmed.list_elements_at(1)?,
537 PrimitiveArray::from_iter([3i32, 4])
538 );
539
540 #[expect(deprecated)]
542 let all_elements = trimmed.elements().to_primitive();
543 assert_eq!(
544 all_elements.execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())?,
545 97i32.into()
546 );
547 Ok(())
548 }
549
550 #[test]
551 fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
552 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
558 let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
559 let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
560 let validity = Validity::from_iter(vec![true, true, false, false]);
561
562 let listview = ListViewArray::new(elements, offsets, sizes, validity);
563
564 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
566 assert!(rebuilt.is_zero_copy_to_list());
567
568 assert_eq!(rebuilt.offset_at(0), 0);
571 assert_eq!(rebuilt.offset_at(1), 2);
572 assert_eq!(rebuilt.offset_at(2), 4); assert_eq!(rebuilt.offset_at(3), 4); assert_eq!(rebuilt.size_at(0), 2);
577 assert_eq!(rebuilt.size_at(1), 2);
578 assert_eq!(rebuilt.size_at(2), 0); assert_eq!(rebuilt.size_at(3), 0); let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
584
585 assert!(exact.is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())?);
587 assert!(exact.is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())?);
588 assert!(!exact.is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())?);
589 assert!(!exact.is_valid(3, &mut LEGACY_SESSION.create_execution_ctx())?);
590
591 assert_arrays_eq!(
593 exact.list_elements_at(0)?,
594 PrimitiveArray::from_iter([1i32, 2])
595 );
596
597 assert_arrays_eq!(
598 exact.list_elements_at(1)?,
599 PrimitiveArray::from_iter([3i32, 4])
600 );
601 Ok(())
602 }
603
604 #[test]
607 fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
608 let mut elems = vec![0i32; 70_005];
609 elems[70_000] = 10;
610 elems[70_001] = 20;
611 elems[70_002] = 30;
612 elems[70_003] = 40;
613 let elements = PrimitiveArray::from_iter(elems).into_array();
614 let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
615 let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
616
617 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
618 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
619 assert_arrays_eq!(
620 trimmed.list_elements_at(1)?,
621 PrimitiveArray::from_iter([30i32, 40])
622 );
623 Ok(())
624 }
625
626 #[test]
629 fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
630 let mut elems = vec![0i32; 70_001];
631 elems[3] = 30;
632 elems[4] = 40;
633 let elements = PrimitiveArray::from_iter(elems).into_array();
634 let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
635 let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
636
637 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
638 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
639 assert_arrays_eq!(
640 trimmed.list_elements_at(1)?,
641 PrimitiveArray::from_iter([30i32, 40])
642 );
643 Ok(())
644 }
645
646 #[test]
649 fn test_rebuild_preserves_signed_offset_and_size_types() -> VortexResult<()> {
650 use crate::dtype::PType;
651
652 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
654 let offsets = PrimitiveArray::from_iter(vec![0i32, 1]).into_array();
655 let sizes = PrimitiveArray::from_iter(vec![3i16, 2]).into_array();
656
657 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
658 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
659
660 assert_arrays_eq!(
662 rebuilt.list_elements_at(0)?,
663 PrimitiveArray::from_iter([1i32, 2, 3])
664 );
665 assert_arrays_eq!(
666 rebuilt.list_elements_at(1)?,
667 PrimitiveArray::from_iter([2i32, 3])
668 );
669
670 assert_eq!(rebuilt.offsets().dtype().as_ptype(), PType::I32);
672 assert_eq!(rebuilt.sizes().dtype().as_ptype(), PType::I16);
673 Ok(())
674 }
675
676 #[test]
679 fn heuristic_zero_lists_uses_take() {
680 assert!(ListViewArray::should_use_take(0, 0));
681 }
682
683 #[test]
684 fn heuristic_small_lists_use_take() {
685 assert!(ListViewArray::should_use_take(127_000, 1_000));
687 assert!(!ListViewArray::should_use_take(128_000, 1_000));
689 }
690
691 #[test]
694 fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
695 let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
696 let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
697 let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
698
699 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
700 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
701
702 assert_arrays_eq!(trimmed, listview);
704 Ok(())
705 }
706}