1use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::IntoArray;
10use crate::LEGACY_SESSION;
11#[expect(deprecated)]
12use crate::ToCanonical as _;
13use crate::VortexSessionExecute;
14use crate::aggregate_fn::fns::min_max::min_max;
15use crate::arrays::ConstantArray;
16use crate::arrays::ListViewArray;
17use crate::arrays::listview::ListViewArrayExt;
18use crate::builders::builder_with_capacity;
19use crate::builtins::ArrayBuiltins;
20use crate::dtype::DType;
21use crate::dtype::IntegerPType;
22use crate::dtype::Nullability;
23use crate::dtype::PType;
24use crate::match_each_integer_ptype;
25use crate::scalar::Scalar;
26use crate::scalar_fn::fns::operators::Operator;
27
28pub enum ListViewRebuildMode {
30 MakeZeroCopyToList,
38
39 TrimElements,
42
43 MakeExact,
50
51 OverlapCompression,
58}
59
60impl ListViewArray {
61 pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
63 if self.is_empty() {
64 return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
66 }
67
68 match mode {
69 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
70 ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
71 ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
72 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
73 }
74 }
75
76 fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
84 if self.is_zero_copy_to_list() {
85 return Ok(self.clone());
87 }
88
89 let offsets_ptype = self.offsets().dtype().as_ptype();
90 let sizes_ptype = self.sizes().dtype().as_ptype();
91
92 match_each_integer_ptype!(sizes_ptype, |S| {
100 match offsets_ptype {
101 PType::U8 => self.naive_rebuild::<u8, u32, S>(),
102 PType::U16 => self.naive_rebuild::<u16, u32, S>(),
103 PType::U32 => self.naive_rebuild::<u32, u32, S>(),
104 PType::U64 => self.naive_rebuild::<u64, u64, S>(),
105 PType::I8 => self.naive_rebuild::<i8, i32, S>(),
106 PType::I16 => self.naive_rebuild::<i16, i32, S>(),
107 PType::I32 => self.naive_rebuild::<i32, i32, S>(),
108 PType::I64 => self.naive_rebuild::<i64, i64, S>(),
109 _ => unreachable!("invalid offsets PType"),
110 }
111 })
112 }
113
114 fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
118 &self,
119 ) -> VortexResult<ListViewArray> {
120 #[expect(deprecated)]
121 let sizes_canonical = self.sizes().to_primitive();
122 let total: u64 = sizes_canonical
123 .as_slice::<S>()
124 .iter()
125 .map(|s| (*s).as_() as u64)
126 .sum();
127 if Self::should_use_take(total, self.len()) {
128 self.rebuild_with_take::<O, NewOffset, S>()
129 } else {
130 self.rebuild_list_by_list::<O, NewOffset, S>()
131 }
132 }
133
134 fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
142 if num_lists == 0 {
143 return true;
144 }
145 let avg = total_output_elements / num_lists as u64;
146 avg < 128
147 }
148
149 fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
152 &self,
153 ) -> VortexResult<ListViewArray> {
154 #[expect(deprecated)]
155 let offsets_canonical = self.offsets().to_primitive();
156 let offsets_slice = offsets_canonical.as_slice::<O>();
157 #[expect(deprecated)]
158 let sizes_canonical = self.sizes().to_primitive();
159 let sizes_slice = sizes_canonical.as_slice::<S>();
160
161 let len = offsets_slice.len();
162
163 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
164 let mut new_sizes = BufferMut::<S>::with_capacity(len);
165 let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
166
167 let mut n_elements = NewOffset::zero();
168 for index in 0..len {
169 if !self.validity()?.is_valid(index)? {
170 new_offsets.push(n_elements);
171 new_sizes.push(S::zero());
172 continue;
173 }
174
175 let offset = offsets_slice[index];
176 let size = sizes_slice[index];
177 let start = offset.as_();
178 let stop = start + size.as_();
179
180 new_offsets.push(n_elements);
181 new_sizes.push(size);
182 take_indices.extend(start as u64..stop as u64);
183 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
184 }
185
186 let elements = self.elements().take(take_indices.into_array())?;
187 let offsets = new_offsets.into_array();
188 let sizes = new_sizes.into_array();
189
190 Ok(unsafe {
194 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
195 .with_zero_copy_to_list(true)
196 })
197 }
198
199 fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
202 &self,
203 ) -> VortexResult<ListViewArray> {
204 let element_dtype = self
205 .dtype()
206 .as_list_element_opt()
207 .vortex_expect("somehow had a canonical list that was not a list");
208
209 #[expect(deprecated)]
210 let offsets_canonical = self.offsets().to_primitive();
211 let offsets_slice = offsets_canonical.as_slice::<O>();
212 #[expect(deprecated)]
213 let sizes_canonical = self.sizes().to_primitive();
214 let sizes_slice = sizes_canonical.as_slice::<S>();
215
216 let len = offsets_slice.len();
217
218 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
219 let mut new_sizes = BufferMut::<S>::with_capacity(len);
224
225 #[expect(deprecated)]
227 let elements_canonical = self
228 .elements()
229 .to_canonical()
230 .vortex_expect("canonicalize elements for rebuild")
231 .into_array();
232
233 let mut new_elements_builder =
236 builder_with_capacity(element_dtype.as_ref(), self.elements().len());
237
238 let mut n_elements = NewOffset::zero();
239 for index in 0..len {
240 if !self.validity()?.is_valid(index)? {
241 new_offsets.push(n_elements);
244 new_sizes.push(S::zero());
245 continue;
246 }
247
248 let offset = offsets_slice[index];
249 let size = sizes_slice[index];
250
251 let start = offset.as_();
252 let stop = start + size.as_();
253
254 new_offsets.push(n_elements);
255 new_sizes.push(size);
256 new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
257
258 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
259 }
260
261 let offsets = new_offsets.into_array();
262 let sizes = new_sizes.into_array();
263 let elements = new_elements_builder.finish();
264
265 debug_assert_eq!(
266 n_elements.as_(),
267 elements.len(),
268 "The accumulated elements somehow had the wrong length"
269 );
270
271 Ok(unsafe {
280 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
281 .with_zero_copy_to_list(true)
282 })
283 }
284
285 fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
289 let start = if self.is_zero_copy_to_list() {
290 self.offset_at(0)
294 } else {
295 let mut ctx = LEGACY_SESSION.create_execution_ctx();
296 self.offsets()
297 .statistics()
298 .compute_min(&mut ctx)
299 .vortex_expect(
300 "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
301 )
302 };
303
304 let end = if self.is_zero_copy_to_list() {
305 let last_offset = self.offset_at(self.len() - 1);
308 let last_size = self.size_at(self.len() - 1);
309 last_offset + last_size
310 } else {
311 let wide_dtype = DType::from(if self.offsets().dtype().as_ptype().is_unsigned_int() {
315 PType::U64
316 } else {
317 PType::I64
318 });
319 let offsets = self.offsets().cast(wide_dtype.clone())?;
320 let sizes = self.sizes().cast(wide_dtype)?;
321
322 let mut ctx = LEGACY_SESSION.create_execution_ctx();
323 let min_max = min_max(
324 &offsets
325 .binary(sizes, Operator::Add)
326 .vortex_expect("`offsets + sizes` somehow overflowed"),
327 &mut ctx,
328 )
329 .vortex_expect("Something went wrong while computing min and max")
330 .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
331
332 min_max
333 .max
334 .as_primitive()
335 .as_::<usize>()
336 .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
337 };
338
339 let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
340 let offset = <O as FromPrimitive>::from_usize(start)
341 .vortex_expect("unable to convert the min offset `start` into a `usize`");
342 let scalar = Scalar::primitive(offset, Nullability::NonNullable);
343
344 self.offsets()
345 .clone()
346 .binary(
347 ConstantArray::new(scalar, self.offsets().len()).into_array(),
348 Operator::Sub,
349 )
350 .vortex_expect("was somehow unable to adjust offsets down by their minimum")
351 });
352
353 let sliced_elements = self.elements().slice(start..end)?;
354
355 Ok(unsafe {
360 ListViewArray::new_unchecked(
361 sliced_elements,
362 adjusted_offsets,
363 self.sizes().clone(),
364 self.validity()?,
365 )
366 .with_zero_copy_to_list(self.is_zero_copy_to_list())
367 })
368 }
369
370 fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
371 if self.is_zero_copy_to_list() {
372 self.rebuild_trim_elements()
373 } else {
374 self.rebuild_zero_copy_to_list()
377 }
378 }
379}
380
381#[cfg(test)]
382mod tests {
383 use vortex_buffer::BitBuffer;
384 use vortex_error::VortexResult;
385
386 use super::ListViewRebuildMode;
387 use crate::IntoArray;
388 use crate::LEGACY_SESSION;
389 #[expect(deprecated)]
390 use crate::ToCanonical as _;
391 use crate::VortexSessionExecute;
392 use crate::arrays::ListViewArray;
393 use crate::arrays::PrimitiveArray;
394 use crate::arrays::listview::ListViewArrayExt;
395 use crate::assert_arrays_eq;
396 use crate::dtype::Nullability;
397 use crate::validity::Validity;
398
399 #[test]
400 fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
401 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
405 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
406 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
407
408 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
409
410 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
411
412 assert_eq!(flattened.elements().len(), 5);
415
416 assert_eq!(flattened.offset_at(0), 0);
418 assert_eq!(flattened.size_at(0), 3);
419 assert_eq!(flattened.offset_at(1), 3);
420 assert_eq!(flattened.size_at(1), 2);
421
422 assert_arrays_eq!(
424 flattened.list_elements_at(0)?,
425 PrimitiveArray::from_iter([1i32, 2, 3])
426 );
427
428 assert_arrays_eq!(
429 flattened.list_elements_at(1)?,
430 PrimitiveArray::from_iter([2i32, 3])
431 );
432 Ok(())
433 }
434
435 #[test]
436 fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
437 use crate::arrays::BoolArray;
438
439 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
441 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
442 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
443 let validity = Validity::Array(
444 BoolArray::new(
445 BitBuffer::from(vec![true, false, true]),
446 Validity::NonNullable,
447 )
448 .into_array(),
449 );
450
451 let listview = ListViewArray::new(elements, offsets, sizes, validity);
452
453 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
454
455 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
457 assert!(flattened.validity()?.is_valid(0)?);
458 assert!(!flattened.validity()?.is_valid(1)?);
459 assert!(flattened.validity()?.is_valid(2)?);
460
461 assert_arrays_eq!(
463 flattened.list_elements_at(0)?,
464 PrimitiveArray::from_iter([1i32, 2])
465 );
466
467 assert_arrays_eq!(
468 flattened.list_elements_at(2)?,
469 PrimitiveArray::from_iter([3i32])
470 );
471 Ok(())
472 }
473
474 #[test]
475 fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
476 let elements =
484 PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
485 let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
486 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
487
488 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
489
490 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
491
492 assert_eq!(trimmed.elements().len(), 5);
494
495 assert_eq!(trimmed.offset_at(0), 0);
497 assert_eq!(trimmed.size_at(0), 2);
498 assert_eq!(trimmed.offset_at(1), 3);
499 assert_eq!(trimmed.size_at(1), 2);
500
501 assert_arrays_eq!(
503 trimmed.list_elements_at(0)?,
504 PrimitiveArray::from_iter([1i32, 2])
505 );
506
507 assert_arrays_eq!(
508 trimmed.list_elements_at(1)?,
509 PrimitiveArray::from_iter([3i32, 4])
510 );
511
512 #[expect(deprecated)]
514 let all_elements = trimmed.elements().to_primitive();
515 assert_eq!(
516 all_elements.execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())?,
517 97i32.into()
518 );
519 Ok(())
520 }
521
522 #[test]
523 fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
524 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
530 let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
531 let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
532 let validity = Validity::from_iter(vec![true, true, false, false]);
533
534 let listview = ListViewArray::new(elements, offsets, sizes, validity);
535
536 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
538 assert!(rebuilt.is_zero_copy_to_list());
539
540 assert_eq!(rebuilt.offset_at(0), 0);
543 assert_eq!(rebuilt.offset_at(1), 2);
544 assert_eq!(rebuilt.offset_at(2), 4); assert_eq!(rebuilt.offset_at(3), 4); assert_eq!(rebuilt.size_at(0), 2);
549 assert_eq!(rebuilt.size_at(1), 2);
550 assert_eq!(rebuilt.size_at(2), 0); assert_eq!(rebuilt.size_at(3), 0); let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
556
557 assert!(exact.is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())?);
559 assert!(exact.is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())?);
560 assert!(!exact.is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())?);
561 assert!(!exact.is_valid(3, &mut LEGACY_SESSION.create_execution_ctx())?);
562
563 assert_arrays_eq!(
565 exact.list_elements_at(0)?,
566 PrimitiveArray::from_iter([1i32, 2])
567 );
568
569 assert_arrays_eq!(
570 exact.list_elements_at(1)?,
571 PrimitiveArray::from_iter([3i32, 4])
572 );
573 Ok(())
574 }
575
576 #[test]
579 fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
580 let mut elems = vec![0i32; 70_005];
581 elems[70_000] = 10;
582 elems[70_001] = 20;
583 elems[70_002] = 30;
584 elems[70_003] = 40;
585 let elements = PrimitiveArray::from_iter(elems).into_array();
586 let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
587 let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
588
589 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
590 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
591 assert_arrays_eq!(
592 trimmed.list_elements_at(1)?,
593 PrimitiveArray::from_iter([30i32, 40])
594 );
595 Ok(())
596 }
597
598 #[test]
601 fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
602 let mut elems = vec![0i32; 70_001];
603 elems[3] = 30;
604 elems[4] = 40;
605 let elements = PrimitiveArray::from_iter(elems).into_array();
606 let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
607 let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
608
609 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
610 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
611 assert_arrays_eq!(
612 trimmed.list_elements_at(1)?,
613 PrimitiveArray::from_iter([30i32, 40])
614 );
615 Ok(())
616 }
617
618 #[test]
621 fn heuristic_zero_lists_uses_take() {
622 assert!(ListViewArray::should_use_take(0, 0));
623 }
624
625 #[test]
626 fn heuristic_small_lists_use_take() {
627 assert!(ListViewArray::should_use_take(127_000, 1_000));
629 assert!(!ListViewArray::should_use_take(128_000, 1_000));
631 }
632
633 #[test]
636 fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
637 let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
638 let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
639 let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
640
641 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
642 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
643
644 assert_arrays_eq!(trimmed, listview);
646 Ok(())
647 }
648}