1use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::IntoArray;
10use crate::LEGACY_SESSION;
11#[expect(deprecated)]
12use crate::ToCanonical as _;
13use crate::VortexSessionExecute;
14use crate::aggregate_fn::fns::min_max::min_max;
15use crate::arrays::ConstantArray;
16use crate::arrays::ListViewArray;
17use crate::arrays::listview::ListViewArrayExt;
18use crate::builders::builder_with_capacity;
19use crate::builtins::ArrayBuiltins;
20use crate::dtype::DType;
21use crate::dtype::IntegerPType;
22use crate::dtype::Nullability;
23use crate::dtype::PType;
24use crate::match_each_integer_ptype;
25use crate::scalar::Scalar;
26use crate::scalar_fn::fns::operators::Operator;
27
28pub const DEFAULT_REBUILD_DENSITY_THRESHOLD: f32 = 0.1;
37
38pub enum ListViewRebuildMode {
40 MakeZeroCopyToList,
48
49 TrimElements,
52
53 MakeExact,
60
61 OverlapCompression,
68}
69
70impl ListViewArray {
71 pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
73 if self.is_empty() {
74 return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
76 }
77
78 match mode {
79 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
80 ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
81 ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
82 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
83 }
84 }
85
86 fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
94 if self.is_zero_copy_to_list() {
95 return Ok(self.clone());
97 }
98
99 let offsets_ptype = self.offsets().dtype().as_ptype();
100 let sizes_ptype = self.sizes().dtype().as_ptype();
101
102 match_each_integer_ptype!(sizes_ptype, |S| {
110 match offsets_ptype {
111 PType::U8 => self.naive_rebuild::<u8, u32, S>(),
112 PType::U16 => self.naive_rebuild::<u16, u32, S>(),
113 PType::U32 => self.naive_rebuild::<u32, u32, S>(),
114 PType::U64 => self.naive_rebuild::<u64, u64, S>(),
115 PType::I8 => self.naive_rebuild::<i8, i32, S>(),
116 PType::I16 => self.naive_rebuild::<i16, i32, S>(),
117 PType::I32 => self.naive_rebuild::<i32, i32, S>(),
118 PType::I64 => self.naive_rebuild::<i64, i64, S>(),
119 _ => unreachable!("invalid offsets PType"),
120 }
121 })
122 }
123
124 fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
128 &self,
129 ) -> VortexResult<ListViewArray> {
130 #[expect(deprecated)]
131 let sizes_canonical = self.sizes().to_primitive();
132 let total: u64 = sizes_canonical
133 .as_slice::<S>()
134 .iter()
135 .map(|s| (*s).as_() as u64)
136 .sum();
137 if Self::should_use_take(total, self.len()) {
138 self.rebuild_with_take::<O, NewOffset, S>()
139 } else {
140 self.rebuild_list_by_list::<O, NewOffset, S>()
141 }
142 }
143
144 fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
152 if num_lists == 0 {
153 return true;
154 }
155 let avg = total_output_elements / num_lists as u64;
156 avg < 128
157 }
158
159 fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
162 &self,
163 ) -> VortexResult<ListViewArray> {
164 #[expect(deprecated)]
165 let offsets_canonical = self.offsets().to_primitive();
166 let offsets_slice = offsets_canonical.as_slice::<O>();
167 #[expect(deprecated)]
168 let sizes_canonical = self.sizes().to_primitive();
169 let sizes_slice = sizes_canonical.as_slice::<S>();
170
171 let len = offsets_slice.len();
172
173 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
174 let mut new_sizes = BufferMut::<S>::with_capacity(len);
175 let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
176
177 let mut n_elements = NewOffset::zero();
178 for index in 0..len {
179 if !self.validity()?.is_valid(index)? {
180 new_offsets.push(n_elements);
181 new_sizes.push(S::zero());
182 continue;
183 }
184
185 let offset = offsets_slice[index];
186 let size = sizes_slice[index];
187 let start = offset.as_();
188 let stop = start + size.as_();
189
190 new_offsets.push(n_elements);
191 new_sizes.push(size);
192 take_indices.extend(start as u64..stop as u64);
193 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
194 }
195
196 let elements = self.elements().take(take_indices.into_array())?;
197 let offsets = new_offsets.into_array();
198 let sizes = new_sizes.into_array();
199
200 Ok(unsafe {
204 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
205 .with_zero_copy_to_list(true)
206 })
207 }
208
209 fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
212 &self,
213 ) -> VortexResult<ListViewArray> {
214 let element_dtype = self
215 .dtype()
216 .as_list_element_opt()
217 .vortex_expect("somehow had a canonical list that was not a list");
218
219 #[expect(deprecated)]
220 let offsets_canonical = self.offsets().to_primitive();
221 let offsets_slice = offsets_canonical.as_slice::<O>();
222 #[expect(deprecated)]
223 let sizes_canonical = self.sizes().to_primitive();
224 let sizes_slice = sizes_canonical.as_slice::<S>();
225
226 let len = offsets_slice.len();
227
228 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
229 let mut new_sizes = BufferMut::<S>::with_capacity(len);
234
235 #[expect(deprecated)]
237 let elements_canonical = self
238 .elements()
239 .to_canonical()
240 .vortex_expect("canonicalize elements for rebuild")
241 .into_array();
242
243 let mut new_elements_builder =
246 builder_with_capacity(element_dtype.as_ref(), self.elements().len());
247
248 let mut n_elements = NewOffset::zero();
249 for index in 0..len {
250 if !self.validity()?.is_valid(index)? {
251 new_offsets.push(n_elements);
254 new_sizes.push(S::zero());
255 continue;
256 }
257
258 let offset = offsets_slice[index];
259 let size = sizes_slice[index];
260
261 let start = offset.as_();
262 let stop = start + size.as_();
263
264 new_offsets.push(n_elements);
265 new_sizes.push(size);
266 new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
267
268 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
269 }
270
271 let offsets = new_offsets.into_array();
272 let sizes = new_sizes.into_array();
273 let elements = new_elements_builder.finish();
274
275 debug_assert_eq!(
276 n_elements.as_(),
277 elements.len(),
278 "The accumulated elements somehow had the wrong length"
279 );
280
281 Ok(unsafe {
290 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
291 .with_zero_copy_to_list(true)
292 })
293 }
294
295 fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
299 let start = if self.is_zero_copy_to_list() {
300 self.offset_at(0)
304 } else {
305 let mut ctx = LEGACY_SESSION.create_execution_ctx();
306 self.offsets()
307 .statistics()
308 .compute_min(&mut ctx)
309 .vortex_expect(
310 "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
311 )
312 };
313
314 let end = if self.is_zero_copy_to_list() {
315 let last_offset = self.offset_at(self.len() - 1);
318 let last_size = self.size_at(self.len() - 1);
319 last_offset + last_size
320 } else {
321 let wide_dtype = DType::from(if self.offsets().dtype().as_ptype().is_unsigned_int() {
325 PType::U64
326 } else {
327 PType::I64
328 });
329 let offsets = self.offsets().cast(wide_dtype.clone())?;
330 let sizes = self.sizes().cast(wide_dtype)?;
331
332 let mut ctx = LEGACY_SESSION.create_execution_ctx();
333 let min_max = min_max(
334 &offsets
335 .binary(sizes, Operator::Add)
336 .vortex_expect("`offsets + sizes` somehow overflowed"),
337 &mut ctx,
338 )
339 .vortex_expect("Something went wrong while computing min and max")
340 .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
341
342 min_max
343 .max
344 .as_primitive()
345 .as_::<usize>()
346 .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
347 };
348
349 let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
350 let offset = <O as FromPrimitive>::from_usize(start)
351 .vortex_expect("unable to convert the min offset `start` into a `usize`");
352 let scalar = Scalar::primitive(offset, Nullability::NonNullable);
353
354 self.offsets()
355 .clone()
356 .binary(
357 ConstantArray::new(scalar, self.offsets().len()).into_array(),
358 Operator::Sub,
359 )
360 .vortex_expect("was somehow unable to adjust offsets down by their minimum")
361 });
362
363 let sliced_elements = self.elements().slice(start..end)?;
364
365 Ok(unsafe {
370 ListViewArray::new_unchecked(
371 sliced_elements,
372 adjusted_offsets,
373 self.sizes().clone(),
374 self.validity()?,
375 )
376 .with_zero_copy_to_list(self.is_zero_copy_to_list())
377 })
378 }
379
380 fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
381 if self.is_zero_copy_to_list() {
382 self.rebuild_trim_elements()
383 } else {
384 self.rebuild_zero_copy_to_list()
387 }
388 }
389}
390
391#[cfg(test)]
392mod tests {
393 use vortex_buffer::BitBuffer;
394 use vortex_error::VortexResult;
395
396 use super::ListViewRebuildMode;
397 use crate::IntoArray;
398 use crate::LEGACY_SESSION;
399 #[expect(deprecated)]
400 use crate::ToCanonical as _;
401 use crate::VortexSessionExecute;
402 use crate::arrays::ListViewArray;
403 use crate::arrays::PrimitiveArray;
404 use crate::arrays::listview::ListViewArrayExt;
405 use crate::assert_arrays_eq;
406 use crate::dtype::Nullability;
407 use crate::validity::Validity;
408
409 #[test]
410 fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
411 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
415 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
416 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
417
418 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
419
420 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
421
422 assert_eq!(flattened.elements().len(), 5);
425
426 assert_eq!(flattened.offset_at(0), 0);
428 assert_eq!(flattened.size_at(0), 3);
429 assert_eq!(flattened.offset_at(1), 3);
430 assert_eq!(flattened.size_at(1), 2);
431
432 assert_arrays_eq!(
434 flattened.list_elements_at(0)?,
435 PrimitiveArray::from_iter([1i32, 2, 3])
436 );
437
438 assert_arrays_eq!(
439 flattened.list_elements_at(1)?,
440 PrimitiveArray::from_iter([2i32, 3])
441 );
442 Ok(())
443 }
444
445 #[test]
446 fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
447 use crate::arrays::BoolArray;
448
449 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
451 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
452 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
453 let validity = Validity::Array(
454 BoolArray::new(
455 BitBuffer::from(vec![true, false, true]),
456 Validity::NonNullable,
457 )
458 .into_array(),
459 );
460
461 let listview = ListViewArray::new(elements, offsets, sizes, validity);
462
463 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
464
465 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
467 assert!(flattened.validity()?.is_valid(0)?);
468 assert!(!flattened.validity()?.is_valid(1)?);
469 assert!(flattened.validity()?.is_valid(2)?);
470
471 assert_arrays_eq!(
473 flattened.list_elements_at(0)?,
474 PrimitiveArray::from_iter([1i32, 2])
475 );
476
477 assert_arrays_eq!(
478 flattened.list_elements_at(2)?,
479 PrimitiveArray::from_iter([3i32])
480 );
481 Ok(())
482 }
483
484 #[test]
485 fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
486 let elements =
494 PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
495 let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
496 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
497
498 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
499
500 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
501
502 assert_eq!(trimmed.elements().len(), 5);
504
505 assert_eq!(trimmed.offset_at(0), 0);
507 assert_eq!(trimmed.size_at(0), 2);
508 assert_eq!(trimmed.offset_at(1), 3);
509 assert_eq!(trimmed.size_at(1), 2);
510
511 assert_arrays_eq!(
513 trimmed.list_elements_at(0)?,
514 PrimitiveArray::from_iter([1i32, 2])
515 );
516
517 assert_arrays_eq!(
518 trimmed.list_elements_at(1)?,
519 PrimitiveArray::from_iter([3i32, 4])
520 );
521
522 #[expect(deprecated)]
524 let all_elements = trimmed.elements().to_primitive();
525 assert_eq!(
526 all_elements.execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())?,
527 97i32.into()
528 );
529 Ok(())
530 }
531
532 #[test]
533 fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
534 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
540 let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
541 let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
542 let validity = Validity::from_iter(vec![true, true, false, false]);
543
544 let listview = ListViewArray::new(elements, offsets, sizes, validity);
545
546 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
548 assert!(rebuilt.is_zero_copy_to_list());
549
550 assert_eq!(rebuilt.offset_at(0), 0);
553 assert_eq!(rebuilt.offset_at(1), 2);
554 assert_eq!(rebuilt.offset_at(2), 4); assert_eq!(rebuilt.offset_at(3), 4); assert_eq!(rebuilt.size_at(0), 2);
559 assert_eq!(rebuilt.size_at(1), 2);
560 assert_eq!(rebuilt.size_at(2), 0); assert_eq!(rebuilt.size_at(3), 0); let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
566
567 assert!(exact.is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())?);
569 assert!(exact.is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())?);
570 assert!(!exact.is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())?);
571 assert!(!exact.is_valid(3, &mut LEGACY_SESSION.create_execution_ctx())?);
572
573 assert_arrays_eq!(
575 exact.list_elements_at(0)?,
576 PrimitiveArray::from_iter([1i32, 2])
577 );
578
579 assert_arrays_eq!(
580 exact.list_elements_at(1)?,
581 PrimitiveArray::from_iter([3i32, 4])
582 );
583 Ok(())
584 }
585
586 #[test]
589 fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
590 let mut elems = vec![0i32; 70_005];
591 elems[70_000] = 10;
592 elems[70_001] = 20;
593 elems[70_002] = 30;
594 elems[70_003] = 40;
595 let elements = PrimitiveArray::from_iter(elems).into_array();
596 let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
597 let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
598
599 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
600 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
601 assert_arrays_eq!(
602 trimmed.list_elements_at(1)?,
603 PrimitiveArray::from_iter([30i32, 40])
604 );
605 Ok(())
606 }
607
608 #[test]
611 fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
612 let mut elems = vec![0i32; 70_001];
613 elems[3] = 30;
614 elems[4] = 40;
615 let elements = PrimitiveArray::from_iter(elems).into_array();
616 let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
617 let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
618
619 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
620 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
621 assert_arrays_eq!(
622 trimmed.list_elements_at(1)?,
623 PrimitiveArray::from_iter([30i32, 40])
624 );
625 Ok(())
626 }
627
628 #[test]
631 fn heuristic_zero_lists_uses_take() {
632 assert!(ListViewArray::should_use_take(0, 0));
633 }
634
635 #[test]
636 fn heuristic_small_lists_use_take() {
637 assert!(ListViewArray::should_use_take(127_000, 1_000));
639 assert!(!ListViewArray::should_use_take(128_000, 1_000));
641 }
642
643 #[test]
646 fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
647 let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
648 let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
649 let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
650
651 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
652 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
653
654 assert_arrays_eq!(trimmed, listview);
656 Ok(())
657 }
658}