1use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::IntoArray;
10use crate::LEGACY_SESSION;
11use crate::ToCanonical;
12use crate::VortexSessionExecute;
13use crate::aggregate_fn::fns::min_max::min_max;
14use crate::arrays::ConstantArray;
15use crate::arrays::ListViewArray;
16use crate::arrays::listview::ListViewArrayExt;
17use crate::builders::builder_with_capacity;
18use crate::builtins::ArrayBuiltins;
19use crate::dtype::DType;
20use crate::dtype::IntegerPType;
21use crate::dtype::Nullability;
22use crate::dtype::PType;
23use crate::match_each_integer_ptype;
24use crate::scalar::Scalar;
25use crate::scalar_fn::fns::operators::Operator;
26
27pub enum ListViewRebuildMode {
29 MakeZeroCopyToList,
37
38 TrimElements,
41
42 MakeExact,
49
50 OverlapCompression,
57}
58
59impl ListViewArray {
60 pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
62 if self.is_empty() {
63 return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
65 }
66
67 match mode {
68 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
69 ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
70 ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
71 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
72 }
73 }
74
75 fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
83 if self.is_zero_copy_to_list() {
84 return Ok(self.clone());
86 }
87
88 let offsets_ptype = self.offsets().dtype().as_ptype();
89 let sizes_ptype = self.sizes().dtype().as_ptype();
90
91 match_each_integer_ptype!(sizes_ptype, |S| {
99 match offsets_ptype {
100 PType::U8 => self.naive_rebuild::<u8, u32, S>(),
101 PType::U16 => self.naive_rebuild::<u16, u32, S>(),
102 PType::U32 => self.naive_rebuild::<u32, u32, S>(),
103 PType::U64 => self.naive_rebuild::<u64, u64, S>(),
104 PType::I8 => self.naive_rebuild::<i8, i32, S>(),
105 PType::I16 => self.naive_rebuild::<i16, i32, S>(),
106 PType::I32 => self.naive_rebuild::<i32, i32, S>(),
107 PType::I64 => self.naive_rebuild::<i64, i64, S>(),
108 _ => unreachable!("invalid offsets PType"),
109 }
110 })
111 }
112
113 fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
117 &self,
118 ) -> VortexResult<ListViewArray> {
119 let sizes_canonical = self.sizes().to_primitive();
120 let total: u64 = sizes_canonical
121 .as_slice::<S>()
122 .iter()
123 .map(|s| (*s).as_() as u64)
124 .sum();
125 if Self::should_use_take(total, self.len()) {
126 self.rebuild_with_take::<O, NewOffset, S>()
127 } else {
128 self.rebuild_list_by_list::<O, NewOffset, S>()
129 }
130 }
131
132 fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
140 if num_lists == 0 {
141 return true;
142 }
143 let avg = total_output_elements / num_lists as u64;
144 avg < 128
145 }
146
147 fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
150 &self,
151 ) -> VortexResult<ListViewArray> {
152 let offsets_canonical = self.offsets().to_primitive();
153 let offsets_slice = offsets_canonical.as_slice::<O>();
154 let sizes_canonical = self.sizes().to_primitive();
155 let sizes_slice = sizes_canonical.as_slice::<S>();
156
157 let len = offsets_slice.len();
158
159 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
160 let mut new_sizes = BufferMut::<S>::with_capacity(len);
161 let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
162
163 let mut n_elements = NewOffset::zero();
164 for index in 0..len {
165 if !self.validity()?.is_valid(index)? {
166 new_offsets.push(n_elements);
167 new_sizes.push(S::zero());
168 continue;
169 }
170
171 let offset = offsets_slice[index];
172 let size = sizes_slice[index];
173 let start = offset.as_();
174 let stop = start + size.as_();
175
176 new_offsets.push(n_elements);
177 new_sizes.push(size);
178 take_indices.extend(start as u64..stop as u64);
179 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
180 }
181
182 let elements = self.elements().take(take_indices.into_array())?;
183 let offsets = new_offsets.into_array();
184 let sizes = new_sizes.into_array();
185
186 Ok(unsafe {
190 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
191 .with_zero_copy_to_list(true)
192 })
193 }
194
195 fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
198 &self,
199 ) -> VortexResult<ListViewArray> {
200 let element_dtype = self
201 .dtype()
202 .as_list_element_opt()
203 .vortex_expect("somehow had a canonical list that was not a list");
204
205 let offsets_canonical = self.offsets().to_primitive();
206 let offsets_slice = offsets_canonical.as_slice::<O>();
207 let sizes_canonical = self.sizes().to_primitive();
208 let sizes_slice = sizes_canonical.as_slice::<S>();
209
210 let len = offsets_slice.len();
211
212 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
213 let mut new_sizes = BufferMut::<S>::with_capacity(len);
218
219 let elements_canonical = self
221 .elements()
222 .to_canonical()
223 .vortex_expect("canonicalize elements for rebuild")
224 .into_array();
225
226 let mut new_elements_builder =
229 builder_with_capacity(element_dtype.as_ref(), self.elements().len());
230
231 let mut n_elements = NewOffset::zero();
232 for index in 0..len {
233 if !self.validity()?.is_valid(index)? {
234 new_offsets.push(n_elements);
237 new_sizes.push(S::zero());
238 continue;
239 }
240
241 let offset = offsets_slice[index];
242 let size = sizes_slice[index];
243
244 let start = offset.as_();
245 let stop = start + size.as_();
246
247 new_offsets.push(n_elements);
248 new_sizes.push(size);
249 new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
250
251 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
252 }
253
254 let offsets = new_offsets.into_array();
255 let sizes = new_sizes.into_array();
256 let elements = new_elements_builder.finish();
257
258 debug_assert_eq!(
259 n_elements.as_(),
260 elements.len(),
261 "The accumulated elements somehow had the wrong length"
262 );
263
264 Ok(unsafe {
273 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
274 .with_zero_copy_to_list(true)
275 })
276 }
277
278 fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
282 let start = if self.is_zero_copy_to_list() {
283 self.offset_at(0)
287 } else {
288 let mut ctx = LEGACY_SESSION.create_execution_ctx();
289 self.offsets()
290 .statistics()
291 .compute_min(&mut ctx)
292 .vortex_expect(
293 "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
294 )
295 };
296
297 let end = if self.is_zero_copy_to_list() {
298 let last_offset = self.offset_at(self.len() - 1);
301 let last_size = self.size_at(self.len() - 1);
302 last_offset + last_size
303 } else {
304 let wide_dtype = DType::from(if self.offsets().dtype().as_ptype().is_unsigned_int() {
308 PType::U64
309 } else {
310 PType::I64
311 });
312 let offsets = self.offsets().cast(wide_dtype.clone())?;
313 let sizes = self.sizes().cast(wide_dtype)?;
314
315 let mut ctx = LEGACY_SESSION.create_execution_ctx();
316 let min_max = min_max(
317 &offsets
318 .binary(sizes, Operator::Add)
319 .vortex_expect("`offsets + sizes` somehow overflowed"),
320 &mut ctx,
321 )
322 .vortex_expect("Something went wrong while computing min and max")
323 .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
324
325 min_max
326 .max
327 .as_primitive()
328 .as_::<usize>()
329 .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
330 };
331
332 let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
333 let offset = <O as FromPrimitive>::from_usize(start)
334 .vortex_expect("unable to convert the min offset `start` into a `usize`");
335 let scalar = Scalar::primitive(offset, Nullability::NonNullable);
336
337 self.offsets()
338 .clone()
339 .binary(
340 ConstantArray::new(scalar, self.offsets().len()).into_array(),
341 Operator::Sub,
342 )
343 .vortex_expect("was somehow unable to adjust offsets down by their minimum")
344 });
345
346 let sliced_elements = self.elements().slice(start..end)?;
347
348 Ok(unsafe {
353 ListViewArray::new_unchecked(
354 sliced_elements,
355 adjusted_offsets,
356 self.sizes().clone(),
357 self.validity()?,
358 )
359 .with_zero_copy_to_list(self.is_zero_copy_to_list())
360 })
361 }
362
363 fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
364 if self.is_zero_copy_to_list() {
365 self.rebuild_trim_elements()
366 } else {
367 self.rebuild_zero_copy_to_list()
370 }
371 }
372}
373
374#[cfg(test)]
375mod tests {
376 use vortex_buffer::BitBuffer;
377 use vortex_error::VortexResult;
378
379 use super::ListViewRebuildMode;
380 use crate::IntoArray;
381 use crate::LEGACY_SESSION;
382 use crate::ToCanonical;
383 use crate::VortexSessionExecute;
384 use crate::arrays::ListViewArray;
385 use crate::arrays::PrimitiveArray;
386 use crate::arrays::listview::ListViewArrayExt;
387 use crate::assert_arrays_eq;
388 use crate::dtype::Nullability;
389 use crate::validity::Validity;
390
391 #[test]
392 fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
393 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
397 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
398 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
399
400 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
401
402 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
403
404 assert_eq!(flattened.elements().len(), 5);
407
408 assert_eq!(flattened.offset_at(0), 0);
410 assert_eq!(flattened.size_at(0), 3);
411 assert_eq!(flattened.offset_at(1), 3);
412 assert_eq!(flattened.size_at(1), 2);
413
414 assert_arrays_eq!(
416 flattened.list_elements_at(0).unwrap(),
417 PrimitiveArray::from_iter([1i32, 2, 3])
418 );
419
420 assert_arrays_eq!(
421 flattened.list_elements_at(1).unwrap(),
422 PrimitiveArray::from_iter([2i32, 3])
423 );
424 Ok(())
425 }
426
427 #[test]
428 fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
429 use crate::arrays::BoolArray;
430
431 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
433 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
434 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
435 let validity = Validity::Array(
436 BoolArray::new(
437 BitBuffer::from(vec![true, false, true]),
438 Validity::NonNullable,
439 )
440 .into_array(),
441 );
442
443 let listview = ListViewArray::new(elements, offsets, sizes, validity);
444
445 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
446
447 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
449 assert!(flattened.validity()?.is_valid(0).unwrap());
450 assert!(!flattened.validity()?.is_valid(1).unwrap());
451 assert!(flattened.validity()?.is_valid(2).unwrap());
452
453 assert_arrays_eq!(
455 flattened.list_elements_at(0).unwrap(),
456 PrimitiveArray::from_iter([1i32, 2])
457 );
458
459 assert_arrays_eq!(
460 flattened.list_elements_at(2).unwrap(),
461 PrimitiveArray::from_iter([3i32])
462 );
463 Ok(())
464 }
465
466 #[test]
467 fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
468 let elements =
476 PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
477 let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
478 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
479
480 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
481
482 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
483
484 assert_eq!(trimmed.elements().len(), 5);
486
487 assert_eq!(trimmed.offset_at(0), 0);
489 assert_eq!(trimmed.size_at(0), 2);
490 assert_eq!(trimmed.offset_at(1), 3);
491 assert_eq!(trimmed.size_at(1), 2);
492
493 assert_arrays_eq!(
495 trimmed.list_elements_at(0).unwrap(),
496 PrimitiveArray::from_iter([1i32, 2])
497 );
498
499 assert_arrays_eq!(
500 trimmed.list_elements_at(1).unwrap(),
501 PrimitiveArray::from_iter([3i32, 4])
502 );
503
504 let all_elements = trimmed.elements().to_primitive();
506 assert_eq!(
507 all_elements
508 .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
509 .unwrap(),
510 97i32.into()
511 );
512 Ok(())
513 }
514
515 #[test]
516 fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
517 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
523 let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
524 let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
525 let validity = Validity::from_iter(vec![true, true, false, false]);
526
527 let listview = ListViewArray::new(elements, offsets, sizes, validity);
528
529 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
531 assert!(rebuilt.is_zero_copy_to_list());
532
533 assert_eq!(rebuilt.offset_at(0), 0);
536 assert_eq!(rebuilt.offset_at(1), 2);
537 assert_eq!(rebuilt.offset_at(2), 4); assert_eq!(rebuilt.offset_at(3), 4); assert_eq!(rebuilt.size_at(0), 2);
542 assert_eq!(rebuilt.size_at(1), 2);
543 assert_eq!(rebuilt.size_at(2), 0); assert_eq!(rebuilt.size_at(3), 0); let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
549
550 assert!(
552 exact
553 .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
554 .unwrap()
555 );
556 assert!(
557 exact
558 .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
559 .unwrap()
560 );
561 assert!(
562 !exact
563 .is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())
564 .unwrap()
565 );
566 assert!(
567 !exact
568 .is_valid(3, &mut LEGACY_SESSION.create_execution_ctx())
569 .unwrap()
570 );
571
572 assert_arrays_eq!(
574 exact.list_elements_at(0).unwrap(),
575 PrimitiveArray::from_iter([1i32, 2])
576 );
577
578 assert_arrays_eq!(
579 exact.list_elements_at(1).unwrap(),
580 PrimitiveArray::from_iter([3i32, 4])
581 );
582 Ok(())
583 }
584
585 #[test]
588 fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
589 let mut elems = vec![0i32; 70_005];
590 elems[70_000] = 10;
591 elems[70_001] = 20;
592 elems[70_002] = 30;
593 elems[70_003] = 40;
594 let elements = PrimitiveArray::from_iter(elems).into_array();
595 let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
596 let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
597
598 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
599 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
600 assert_arrays_eq!(
601 trimmed.list_elements_at(1).unwrap(),
602 PrimitiveArray::from_iter([30i32, 40])
603 );
604 Ok(())
605 }
606
607 #[test]
610 fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
611 let mut elems = vec![0i32; 70_001];
612 elems[3] = 30;
613 elems[4] = 40;
614 let elements = PrimitiveArray::from_iter(elems).into_array();
615 let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
616 let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
617
618 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
619 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
620 assert_arrays_eq!(
621 trimmed.list_elements_at(1).unwrap(),
622 PrimitiveArray::from_iter([30i32, 40])
623 );
624 Ok(())
625 }
626
627 #[test]
630 fn heuristic_zero_lists_uses_take() {
631 assert!(ListViewArray::should_use_take(0, 0));
632 }
633
634 #[test]
635 fn heuristic_small_lists_use_take() {
636 assert!(ListViewArray::should_use_take(127_000, 1_000));
638 assert!(!ListViewArray::should_use_take(128_000, 1_000));
640 }
641
642 #[test]
645 fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
646 let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
647 let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
648 let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
649
650 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
651 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
652
653 assert_arrays_eq!(trimmed, listview);
655 Ok(())
656 }
657}