1use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::DynArray;
10use crate::IntoArray;
11use crate::ToCanonical;
12use crate::arrays::ConstantArray;
13use crate::arrays::ListViewArray;
14use crate::builders::builder_with_capacity;
15use crate::builtins::ArrayBuiltins;
16use crate::compute;
17use crate::dtype::IntegerPType;
18use crate::dtype::Nullability;
19use crate::match_each_integer_ptype;
20use crate::scalar::Scalar;
21use crate::scalar_fn::fns::operators::Operator;
22use crate::vtable::ValidityHelper;
23
24pub enum ListViewRebuildMode {
26 MakeZeroCopyToList,
34
35 TrimElements,
38
39 MakeExact,
46
47 OverlapCompression,
54}
55
56impl ListViewArray {
57 pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
59 if self.is_empty() {
60 return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
62 }
63
64 match mode {
65 ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
66 ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
67 ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
68 ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
69 }
70 }
71
72 fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
80 if self.is_zero_copy_to_list() {
81 return Ok(self.clone());
83 }
84
85 let offsets_ptype = self.offsets().dtype().as_ptype();
86 let sizes_ptype = self.sizes().dtype().as_ptype();
87
88 match_each_integer_ptype!(sizes_ptype, |S| {
96 match offsets_ptype {
97 PType::U8 => self.naive_rebuild::<u8, u32, S>(),
98 PType::U16 => self.naive_rebuild::<u16, u32, S>(),
99 PType::U32 => self.naive_rebuild::<u32, u32, S>(),
100 PType::U64 => self.naive_rebuild::<u64, u64, S>(),
101 PType::I8 => self.naive_rebuild::<i8, i32, S>(),
102 PType::I16 => self.naive_rebuild::<i16, i32, S>(),
103 PType::I32 => self.naive_rebuild::<i32, i32, S>(),
104 PType::I64 => self.naive_rebuild::<i64, i64, S>(),
105 _ => unreachable!("invalid offsets PType"),
106 }
107 })
108 }
109
110 fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
114 &self,
115 ) -> VortexResult<ListViewArray> {
116 let sizes_canonical = self.sizes().to_primitive();
117 let total: u64 = sizes_canonical
118 .as_slice::<S>()
119 .iter()
120 .map(|s| (*s).as_() as u64)
121 .sum();
122 if Self::should_use_take(total, self.len()) {
123 self.rebuild_with_take::<O, NewOffset, S>()
124 } else {
125 self.rebuild_list_by_list::<O, NewOffset, S>()
126 }
127 }
128
129 fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
137 if num_lists == 0 {
138 return true;
139 }
140 let avg = total_output_elements / num_lists as u64;
141 avg < 128
142 }
143
144 fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
147 &self,
148 ) -> VortexResult<ListViewArray> {
149 let offsets_canonical = self.offsets().to_primitive();
150 let offsets_slice = offsets_canonical.as_slice::<O>();
151 let sizes_canonical = self.sizes().to_primitive();
152 let sizes_slice = sizes_canonical.as_slice::<S>();
153
154 let len = offsets_slice.len();
155
156 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
157 let mut new_sizes = BufferMut::<S>::with_capacity(len);
158 let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
159
160 let mut n_elements = NewOffset::zero();
161 for index in 0..len {
162 if !self.is_valid(index)? {
163 new_offsets.push(n_elements);
164 new_sizes.push(S::zero());
165 continue;
166 }
167
168 let offset = offsets_slice[index];
169 let size = sizes_slice[index];
170 let start = offset.as_();
171 let stop = start + size.as_();
172
173 new_offsets.push(n_elements);
174 new_sizes.push(size);
175 take_indices.extend(start as u64..stop as u64);
176 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
177 }
178
179 let elements = self.elements().take(take_indices.into_array())?;
180 let offsets = new_offsets.into_array();
181 let sizes = new_sizes.into_array();
182
183 Ok(unsafe {
187 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
188 .with_zero_copy_to_list(true)
189 })
190 }
191
192 fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
195 &self,
196 ) -> VortexResult<ListViewArray> {
197 let element_dtype = self
198 .dtype()
199 .as_list_element_opt()
200 .vortex_expect("somehow had a canonical list that was not a list");
201
202 let offsets_canonical = self.offsets().to_primitive();
203 let offsets_slice = offsets_canonical.as_slice::<O>();
204 let sizes_canonical = self.sizes().to_primitive();
205 let sizes_slice = sizes_canonical.as_slice::<S>();
206
207 let len = offsets_slice.len();
208
209 let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
210 let mut new_sizes = BufferMut::<S>::with_capacity(len);
215
216 let elements_canonical = self
218 .elements()
219 .to_canonical()
220 .vortex_expect("canonicalize elements for rebuild")
221 .into_array();
222
223 let mut new_elements_builder =
226 builder_with_capacity(element_dtype.as_ref(), self.elements().len());
227
228 let mut n_elements = NewOffset::zero();
229 for index in 0..len {
230 if !self.is_valid(index)? {
231 new_offsets.push(n_elements);
234 new_sizes.push(S::zero());
235 continue;
236 }
237
238 let offset = offsets_slice[index];
239 let size = sizes_slice[index];
240
241 let start = offset.as_();
242 let stop = start + size.as_();
243
244 new_offsets.push(n_elements);
245 new_sizes.push(size);
246 new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
247
248 n_elements += num_traits::cast(size).vortex_expect("Cast failed");
249 }
250
251 let offsets = new_offsets.into_array();
252 let sizes = new_sizes.into_array();
253 let elements = new_elements_builder.finish();
254
255 debug_assert_eq!(
256 n_elements.as_(),
257 elements.len(),
258 "The accumulated elements somehow had the wrong length"
259 );
260
261 Ok(unsafe {
270 ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
271 .with_zero_copy_to_list(true)
272 })
273 }
274
275 fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
279 let start = if self.is_zero_copy_to_list() {
280 self.offset_at(0)
284 } else {
285 self.offsets().statistics().compute_min().vortex_expect(
286 "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
287 )
288 };
289
290 let end = if self.is_zero_copy_to_list() {
291 let last_offset = self.offset_at(self.len() - 1);
294 let last_size = self.size_at(self.len() - 1);
295 last_offset + last_size
296 } else {
297 let (offsets, sizes) = if self.offsets().dtype().as_ptype().byte_width()
300 >= self.sizes().dtype().as_ptype().byte_width()
301 {
302 (
303 self.offsets().clone(),
304 self.sizes().cast(self.offsets().dtype().clone())?,
305 )
306 } else {
307 (
308 self.offsets().cast(self.sizes().dtype().clone())?,
309 self.sizes().clone(),
310 )
311 };
312
313 let min_max = compute::min_max(
314 &offsets
315 .binary(sizes, Operator::Add)
316 .vortex_expect("`offsets + sizes` somehow overflowed"),
317 )
318 .vortex_expect("Something went wrong while computing min and max")
319 .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
320
321 min_max
322 .max
323 .as_primitive()
324 .as_::<usize>()
325 .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
326 };
327
328 let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
329 let offset = <O as FromPrimitive>::from_usize(start)
330 .vortex_expect("unable to convert the min offset `start` into a `usize`");
331 let scalar = Scalar::primitive(offset, Nullability::NonNullable);
332
333 self.offsets()
334 .to_array()
335 .binary(
336 ConstantArray::new(scalar, self.offsets().len()).into_array(),
337 Operator::Sub,
338 )
339 .vortex_expect("was somehow unable to adjust offsets down by their minimum")
340 });
341
342 let sliced_elements = self.elements().slice(start..end)?;
343
344 Ok(unsafe {
349 ListViewArray::new_unchecked(
350 sliced_elements,
351 adjusted_offsets,
352 self.sizes().clone(),
353 self.validity().clone(),
354 )
355 .with_zero_copy_to_list(self.is_zero_copy_to_list())
356 })
357 }
358
359 fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
360 if self.is_zero_copy_to_list() {
361 self.rebuild_trim_elements()
362 } else {
363 self.rebuild_zero_copy_to_list()
366 }
367 }
368}
369
370#[cfg(test)]
371#[allow(clippy::cast_possible_truncation)]
372mod tests {
373 use vortex_buffer::BitBuffer;
374 use vortex_error::VortexResult;
375
376 use super::ListViewRebuildMode;
377 use crate::IntoArray;
378 use crate::ToCanonical;
379 use crate::arrays::ListViewArray;
380 use crate::arrays::PrimitiveArray;
381 use crate::assert_arrays_eq;
382 use crate::dtype::Nullability;
383 use crate::validity::Validity;
384 use crate::vtable::ValidityHelper;
385
386 #[test]
387 fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
388 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
392 let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
393 let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
394
395 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
396
397 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
398
399 assert_eq!(flattened.elements().len(), 5);
402
403 assert_eq!(flattened.offset_at(0), 0);
405 assert_eq!(flattened.size_at(0), 3);
406 assert_eq!(flattened.offset_at(1), 3);
407 assert_eq!(flattened.size_at(1), 2);
408
409 assert_arrays_eq!(
411 flattened.list_elements_at(0).unwrap(),
412 PrimitiveArray::from_iter([1i32, 2, 3])
413 );
414
415 assert_arrays_eq!(
416 flattened.list_elements_at(1).unwrap(),
417 PrimitiveArray::from_iter([2i32, 3])
418 );
419 Ok(())
420 }
421
422 #[test]
423 fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
424 use crate::arrays::BoolArray;
425
426 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
428 let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
429 let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
430 let validity = Validity::Array(
431 BoolArray::new(
432 BitBuffer::from(vec![true, false, true]),
433 Validity::NonNullable,
434 )
435 .into_array(),
436 );
437
438 let listview = ListViewArray::new(elements, offsets, sizes, validity);
439
440 let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
441
442 assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
444 assert!(flattened.validity().is_valid(0).unwrap());
445 assert!(!flattened.validity().is_valid(1).unwrap());
446 assert!(flattened.validity().is_valid(2).unwrap());
447
448 assert_arrays_eq!(
450 flattened.list_elements_at(0).unwrap(),
451 PrimitiveArray::from_iter([1i32, 2])
452 );
453
454 assert_arrays_eq!(
455 flattened.list_elements_at(2).unwrap(),
456 PrimitiveArray::from_iter([3i32])
457 );
458 Ok(())
459 }
460
461 #[test]
462 fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
463 let elements =
471 PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
472 let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
473 let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
474
475 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
476
477 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
478
479 assert_eq!(trimmed.elements().len(), 5);
481
482 assert_eq!(trimmed.offset_at(0), 0);
484 assert_eq!(trimmed.size_at(0), 2);
485 assert_eq!(trimmed.offset_at(1), 3);
486 assert_eq!(trimmed.size_at(1), 2);
487
488 assert_arrays_eq!(
490 trimmed.list_elements_at(0).unwrap(),
491 PrimitiveArray::from_iter([1i32, 2])
492 );
493
494 assert_arrays_eq!(
495 trimmed.list_elements_at(1).unwrap(),
496 PrimitiveArray::from_iter([3i32, 4])
497 );
498
499 let all_elements = trimmed.elements().to_primitive();
501 assert_eq!(all_elements.scalar_at(2).unwrap(), 97i32.into());
502 Ok(())
503 }
504
505 #[test]
506 fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
507 let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
513 let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
514 let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
515 let validity = Validity::from_iter(vec![true, true, false, false]);
516
517 let listview = ListViewArray::new(elements, offsets, sizes, validity);
518
519 let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
521 assert!(rebuilt.is_zero_copy_to_list());
522
523 assert_eq!(rebuilt.offset_at(0), 0);
526 assert_eq!(rebuilt.offset_at(1), 2);
527 assert_eq!(rebuilt.offset_at(2), 4); assert_eq!(rebuilt.offset_at(3), 4); assert_eq!(rebuilt.size_at(0), 2);
532 assert_eq!(rebuilt.size_at(1), 2);
533 assert_eq!(rebuilt.size_at(2), 0); assert_eq!(rebuilt.size_at(3), 0); let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
539
540 assert!(exact.is_valid(0).unwrap());
542 assert!(exact.is_valid(1).unwrap());
543 assert!(!exact.is_valid(2).unwrap());
544 assert!(!exact.is_valid(3).unwrap());
545
546 assert_arrays_eq!(
548 exact.list_elements_at(0).unwrap(),
549 PrimitiveArray::from_iter([1i32, 2])
550 );
551
552 assert_arrays_eq!(
553 exact.list_elements_at(1).unwrap(),
554 PrimitiveArray::from_iter([3i32, 4])
555 );
556 Ok(())
557 }
558
559 #[test]
562 fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
563 let mut elems = vec![0i32; 70_005];
564 elems[70_000] = 10;
565 elems[70_001] = 20;
566 elems[70_002] = 30;
567 elems[70_003] = 40;
568 let elements = PrimitiveArray::from_iter(elems).into_array();
569 let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
570 let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
571
572 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
573 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
574 assert_arrays_eq!(
575 trimmed.list_elements_at(1).unwrap(),
576 PrimitiveArray::from_iter([30i32, 40])
577 );
578 Ok(())
579 }
580
581 #[test]
584 fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
585 let mut elems = vec![0i32; 70_001];
586 elems[3] = 30;
587 elems[4] = 40;
588 let elements = PrimitiveArray::from_iter(elems).into_array();
589 let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
590 let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
591
592 let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
593 let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
594 assert_arrays_eq!(
595 trimmed.list_elements_at(1).unwrap(),
596 PrimitiveArray::from_iter([30i32, 40])
597 );
598 Ok(())
599 }
600
601 #[test]
604 fn heuristic_zero_lists_uses_take() {
605 assert!(ListViewArray::should_use_take(0, 0));
606 }
607
608 #[test]
609 fn heuristic_small_lists_use_take() {
610 assert!(ListViewArray::should_use_take(127_000, 1_000));
612 assert!(!ListViewArray::should_use_take(128_000, 1_000));
614 }
615}