1use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::DynArray;
10use crate::IntoArray;
11use crate::LEGACY_SESSION;
12use crate::ToCanonical;
13use crate::VortexSessionExecute;
14use crate::aggregate_fn::fns::min_max::min_max;
15use crate::arrays::ConstantArray;
16use crate::arrays::ListViewArray;
17use crate::builders::builder_with_capacity;
18use crate::builtins::ArrayBuiltins;
19use crate::dtype::DType;
20use crate::dtype::IntegerPType;
21use crate::dtype::Nullability;
22use crate::dtype::PType;
23use crate::match_each_integer_ptype;
24use crate::scalar::Scalar;
25use crate::scalar_fn::fns::operators::Operator;
26use crate::vtable::ValidityHelper;
27
28pub enum ListViewRebuildMode {
30 MakeZeroCopyToList,
38
39 TrimElements,
42
43 MakeExact,
50
51 OverlapCompression,
58}
59
60impl ListViewArray {
61 pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
63 if self.is_empty() {
64 return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
66 }
67
68 match mode {
69 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
70 ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
71 ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
72 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
73 }
74 }
75
76 fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
84 if self.is_zero_copy_to_list() {
85 return Ok(self.clone());
87 }
88
89 let offsets_ptype = self.offsets().dtype().as_ptype();
90 let sizes_ptype = self.sizes().dtype().as_ptype();
91
92 match_each_integer_ptype!(sizes_ptype, |S| {
100 match offsets_ptype {
101 PType::U8 => self.naive_rebuild::<u8, u32, S>(),
102 PType::U16 => self.naive_rebuild::<u16, u32, S>(),
103 PType::U32 => self.naive_rebuild::<u32, u32, S>(),
104 PType::U64 => self.naive_rebuild::<u64, u64, S>(),
105 PType::I8 => self.naive_rebuild::<i8, i32, S>(),
106 PType::I16 => self.naive_rebuild::<i16, i32, S>(),
107 PType::I32 => self.naive_rebuild::<i32, i32, S>(),
108 PType::I64 => self.naive_rebuild::<i64, i64, S>(),
109 _ => unreachable!("invalid offsets PType"),
110 }
111 })
112 }
113
114 fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
118 &self,
119 ) -> VortexResult<ListViewArray> {
120 let sizes_canonical = self.sizes().to_primitive();
121 let total: u64 = sizes_canonical
122 .as_slice::<S>()
123 .iter()
124 .map(|s| (*s).as_() as u64)
125 .sum();
126 if Self::should_use_take(total, self.len()) {
127 self.rebuild_with_take::<O, NewOffset, S>()
128 } else {
129 self.rebuild_list_by_list::<O, NewOffset, S>()
130 }
131 }
132
133 fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
141 if num_lists == 0 {
142 return true;
143 }
144 let avg = total_output_elements / num_lists as u64;
145 avg < 128
146 }
147
148 fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
151 &self,
152 ) -> VortexResult<ListViewArray> {
153 let offsets_canonical = self.offsets().to_primitive();
154 let offsets_slice = offsets_canonical.as_slice::<O>();
155 let sizes_canonical = self.sizes().to_primitive();
156 let sizes_slice = sizes_canonical.as_slice::<S>();
157
158 let len = offsets_slice.len();
159
160 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
161 let mut new_sizes = BufferMut::<S>::with_capacity(len);
162 let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
163
164 let mut n_elements = NewOffset::zero();
165 for index in 0..len {
166 if !self.is_valid(index)? {
167 new_offsets.push(n_elements);
168 new_sizes.push(S::zero());
169 continue;
170 }
171
172 let offset = offsets_slice[index];
173 let size = sizes_slice[index];
174 let start = offset.as_();
175 let stop = start + size.as_();
176
177 new_offsets.push(n_elements);
178 new_sizes.push(size);
179 take_indices.extend(start as u64..stop as u64);
180 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
181 }
182
183 let elements = self.elements().take(take_indices.into_array())?;
184 let offsets = new_offsets.into_array();
185 let sizes = new_sizes.into_array();
186
187 Ok(unsafe {
191 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
192 .with_zero_copy_to_list(true)
193 })
194 }
195
196 fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
199 &self,
200 ) -> VortexResult<ListViewArray> {
201 let element_dtype = self
202 .dtype()
203 .as_list_element_opt()
204 .vortex_expect("somehow had a canonical list that was not a list");
205
206 let offsets_canonical = self.offsets().to_primitive();
207 let offsets_slice = offsets_canonical.as_slice::<O>();
208 let sizes_canonical = self.sizes().to_primitive();
209 let sizes_slice = sizes_canonical.as_slice::<S>();
210
211 let len = offsets_slice.len();
212
213 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
214 let mut new_sizes = BufferMut::<S>::with_capacity(len);
219
220 let elements_canonical = self
222 .elements()
223 .to_canonical()
224 .vortex_expect("canonicalize elements for rebuild")
225 .into_array();
226
227 let mut new_elements_builder =
230 builder_with_capacity(element_dtype.as_ref(), self.elements().len());
231
232 let mut n_elements = NewOffset::zero();
233 for index in 0..len {
234 if !self.is_valid(index)? {
235 new_offsets.push(n_elements);
238 new_sizes.push(S::zero());
239 continue;
240 }
241
242 let offset = offsets_slice[index];
243 let size = sizes_slice[index];
244
245 let start = offset.as_();
246 let stop = start + size.as_();
247
248 new_offsets.push(n_elements);
249 new_sizes.push(size);
250 new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
251
252 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
253 }
254
255 let offsets = new_offsets.into_array();
256 let sizes = new_sizes.into_array();
257 let elements = new_elements_builder.finish();
258
259 debug_assert_eq!(
260 n_elements.as_(),
261 elements.len(),
262 "The accumulated elements somehow had the wrong length"
263 );
264
265 Ok(unsafe {
274 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
275 .with_zero_copy_to_list(true)
276 })
277 }
278
279 fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
283 let start = if self.is_zero_copy_to_list() {
284 self.offset_at(0)
288 } else {
289 self.offsets().statistics().compute_min().vortex_expect(
290 "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
291 )
292 };
293
294 let end = if self.is_zero_copy_to_list() {
295 let last_offset = self.offset_at(self.len() - 1);
298 let last_size = self.size_at(self.len() - 1);
299 last_offset + last_size
300 } else {
301 let wide_dtype = DType::from(if self.offsets().dtype().as_ptype().is_unsigned_int() {
305 PType::U64
306 } else {
307 PType::I64
308 });
309 let offsets = self.offsets().cast(wide_dtype.clone())?;
310 let sizes = self.sizes().cast(wide_dtype)?;
311
312 let mut ctx = LEGACY_SESSION.create_execution_ctx();
313 let min_max = min_max(
314 &offsets
315 .binary(sizes, Operator::Add)
316 .vortex_expect("`offsets + sizes` somehow overflowed"),
317 &mut ctx,
318 )
319 .vortex_expect("Something went wrong while computing min and max")
320 .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
321
322 min_max
323 .max
324 .as_primitive()
325 .as_::<usize>()
326 .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
327 };
328
329 let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
330 let offset = <O as FromPrimitive>::from_usize(start)
331 .vortex_expect("unable to convert the min offset `start` into a `usize`");
332 let scalar = Scalar::primitive(offset, Nullability::NonNullable);
333
334 self.offsets()
335 .to_array()
336 .binary(
337 ConstantArray::new(scalar, self.offsets().len()).into_array(),
338 Operator::Sub,
339 )
340 .vortex_expect("was somehow unable to adjust offsets down by their minimum")
341 });
342
343 let sliced_elements = self.elements().slice(start..end)?;
344
345 Ok(unsafe {
350 ListViewArray::new_unchecked(
351 sliced_elements,
352 adjusted_offsets,
353 self.sizes().clone(),
354 self.validity().clone(),
355 )
356 .with_zero_copy_to_list(self.is_zero_copy_to_list())
357 })
358 }
359
360 fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
361 if self.is_zero_copy_to_list() {
362 self.rebuild_trim_elements()
363 } else {
364 self.rebuild_zero_copy_to_list()
367 }
368 }
369}
370
371#[cfg(test)]
372#[allow(clippy::cast_possible_truncation)]
373mod tests {
374 use vortex_buffer::BitBuffer;
375 use vortex_error::VortexResult;
376
377 use super::ListViewRebuildMode;
378 use crate::IntoArray;
379 use crate::ToCanonical;
380 use crate::arrays::ListViewArray;
381 use crate::arrays::PrimitiveArray;
382 use crate::assert_arrays_eq;
383 use crate::dtype::Nullability;
384 use crate::validity::Validity;
385 use crate::vtable::ValidityHelper;
386
387 #[test]
388 fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
389 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
393 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
394 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
395
396 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
397
398 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
399
400 assert_eq!(flattened.elements().len(), 5);
403
404 assert_eq!(flattened.offset_at(0), 0);
406 assert_eq!(flattened.size_at(0), 3);
407 assert_eq!(flattened.offset_at(1), 3);
408 assert_eq!(flattened.size_at(1), 2);
409
410 assert_arrays_eq!(
412 flattened.list_elements_at(0).unwrap(),
413 PrimitiveArray::from_iter([1i32, 2, 3])
414 );
415
416 assert_arrays_eq!(
417 flattened.list_elements_at(1).unwrap(),
418 PrimitiveArray::from_iter([2i32, 3])
419 );
420 Ok(())
421 }
422
423 #[test]
424 fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
425 use crate::arrays::BoolArray;
426
427 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
429 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
430 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
431 let validity = Validity::Array(
432 BoolArray::new(
433 BitBuffer::from(vec![true, false, true]),
434 Validity::NonNullable,
435 )
436 .into_array(),
437 );
438
439 let listview = ListViewArray::new(elements, offsets, sizes, validity);
440
441 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
442
443 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
445 assert!(flattened.validity().is_valid(0).unwrap());
446 assert!(!flattened.validity().is_valid(1).unwrap());
447 assert!(flattened.validity().is_valid(2).unwrap());
448
449 assert_arrays_eq!(
451 flattened.list_elements_at(0).unwrap(),
452 PrimitiveArray::from_iter([1i32, 2])
453 );
454
455 assert_arrays_eq!(
456 flattened.list_elements_at(2).unwrap(),
457 PrimitiveArray::from_iter([3i32])
458 );
459 Ok(())
460 }
461
462 #[test]
463 fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
464 let elements =
472 PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
473 let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
474 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
475
476 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
477
478 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
479
480 assert_eq!(trimmed.elements().len(), 5);
482
483 assert_eq!(trimmed.offset_at(0), 0);
485 assert_eq!(trimmed.size_at(0), 2);
486 assert_eq!(trimmed.offset_at(1), 3);
487 assert_eq!(trimmed.size_at(1), 2);
488
489 assert_arrays_eq!(
491 trimmed.list_elements_at(0).unwrap(),
492 PrimitiveArray::from_iter([1i32, 2])
493 );
494
495 assert_arrays_eq!(
496 trimmed.list_elements_at(1).unwrap(),
497 PrimitiveArray::from_iter([3i32, 4])
498 );
499
500 let all_elements = trimmed.elements().to_primitive();
502 assert_eq!(all_elements.scalar_at(2).unwrap(), 97i32.into());
503 Ok(())
504 }
505
506 #[test]
507 fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
508 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
514 let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
515 let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
516 let validity = Validity::from_iter(vec![true, true, false, false]);
517
518 let listview = ListViewArray::new(elements, offsets, sizes, validity);
519
520 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
522 assert!(rebuilt.is_zero_copy_to_list());
523
524 assert_eq!(rebuilt.offset_at(0), 0);
527 assert_eq!(rebuilt.offset_at(1), 2);
528 assert_eq!(rebuilt.offset_at(2), 4); assert_eq!(rebuilt.offset_at(3), 4); assert_eq!(rebuilt.size_at(0), 2);
533 assert_eq!(rebuilt.size_at(1), 2);
534 assert_eq!(rebuilt.size_at(2), 0); assert_eq!(rebuilt.size_at(3), 0); let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
540
541 assert!(exact.is_valid(0).unwrap());
543 assert!(exact.is_valid(1).unwrap());
544 assert!(!exact.is_valid(2).unwrap());
545 assert!(!exact.is_valid(3).unwrap());
546
547 assert_arrays_eq!(
549 exact.list_elements_at(0).unwrap(),
550 PrimitiveArray::from_iter([1i32, 2])
551 );
552
553 assert_arrays_eq!(
554 exact.list_elements_at(1).unwrap(),
555 PrimitiveArray::from_iter([3i32, 4])
556 );
557 Ok(())
558 }
559
560 #[test]
563 fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
564 let mut elems = vec![0i32; 70_005];
565 elems[70_000] = 10;
566 elems[70_001] = 20;
567 elems[70_002] = 30;
568 elems[70_003] = 40;
569 let elements = PrimitiveArray::from_iter(elems).into_array();
570 let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
571 let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
572
573 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
574 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
575 assert_arrays_eq!(
576 trimmed.list_elements_at(1).unwrap(),
577 PrimitiveArray::from_iter([30i32, 40])
578 );
579 Ok(())
580 }
581
582 #[test]
585 fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
586 let mut elems = vec![0i32; 70_001];
587 elems[3] = 30;
588 elems[4] = 40;
589 let elements = PrimitiveArray::from_iter(elems).into_array();
590 let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
591 let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
592
593 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
594 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
595 assert_arrays_eq!(
596 trimmed.list_elements_at(1).unwrap(),
597 PrimitiveArray::from_iter([30i32, 40])
598 );
599 Ok(())
600 }
601
602 #[test]
605 fn heuristic_zero_lists_uses_take() {
606 assert!(ListViewArray::should_use_take(0, 0));
607 }
608
609 #[test]
610 fn heuristic_small_lists_use_take() {
611 assert!(ListViewArray::should_use_take(127_000, 1_000));
613 assert!(!ListViewArray::should_use_take(128_000, 1_000));
615 }
616
617 #[test]
620 fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
621 let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
622 let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
623 let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
624
625 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
626 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
627
628 assert_arrays_eq!(trimmed, listview);
630 Ok(())
631 }
632}