Skip to main content

vortex_array/arrays/listview/
rebuild.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::IntoArray;
10use crate::LEGACY_SESSION;
11use crate::ToCanonical;
12use crate::VortexSessionExecute;
13use crate::aggregate_fn::fns::min_max::min_max;
14use crate::arrays::ConstantArray;
15use crate::arrays::ListViewArray;
16use crate::arrays::listview::ListViewArrayExt;
17use crate::builders::builder_with_capacity;
18use crate::builtins::ArrayBuiltins;
19use crate::dtype::DType;
20use crate::dtype::IntegerPType;
21use crate::dtype::Nullability;
22use crate::dtype::PType;
23use crate::match_each_integer_ptype;
24use crate::scalar::Scalar;
25use crate::scalar_fn::fns::operators::Operator;
26
27/// Modes for rebuilding a [`ListViewArray`].
28pub enum ListViewRebuildMode {
29    /// Removes all unused data and flattens out all list data, such that the array is zero-copyable
30    /// to a [`ListArray`].
31    ///
32    /// This mode will deduplicate all overlapping list views, such that the [`ListViewArray`] looks
33    /// like a [`ListArray`] but with an additional `sizes` array.
34    ///
35    /// [`ListArray`]: crate::arrays::ListArray
36    MakeZeroCopyToList,
37
38    /// Removes any leading or trailing elements that are unused / not referenced by any views in
39    /// the [`ListViewArray`].
40    TrimElements,
41
42    /// Equivalent to `MakeZeroCopyToList` plus `TrimElements`.
43    ///
44    /// This is useful when concatenating multiple [`ListViewArray`]s together to create a new
45    /// [`ListViewArray`] that is also zero-copy to a [`ListArray`].
46    ///
47    /// [`ListArray`]: crate::arrays::ListArray
48    MakeExact,
49
50    // TODO(connor)[ListView]: Implement some version of this.
51    /// Finds the shortest packing / overlapping of list elements.
52    ///
53    /// This problem is known to be NP-hard, so maybe when someone proves that P=NP we can implement
54    /// this algorithm (but in all seriousness there are many approximate algorithms that could
55    /// work well here).
56    OverlapCompression,
57}
58
59impl ListViewArray {
60    /// Rebuilds the [`ListViewArray`] according to the specified mode.
61    pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
62        if self.is_empty() {
63            // SAFETY: An empty array is trivially zero-copyable to a `ListArray`.
64            return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
65        }
66
67        match mode {
68            ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
69            ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
70            ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
71            ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
72        }
73    }
74
75    /// Rebuilds a [`ListViewArray`], removing all data overlaps and creating a flattened layout.
76    ///
77    /// This is useful when the `elements` child array of the [`ListViewArray`] might have
78    /// overlapping, duplicate, and garbage data, and we want to have fully sequential data like
79    /// a [`ListArray`].
80    ///
81    /// [`ListArray`]: crate::arrays::ListArray
82    fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
83        if self.is_zero_copy_to_list() {
84            // Note that since everything in `ListViewArray` is `Arc`ed, this is quite cheap.
85            return Ok(self.clone());
86        }
87
88        let offsets_ptype = self.offsets().dtype().as_ptype();
89        let sizes_ptype = self.sizes().dtype().as_ptype();
90
91        // One of the main purposes behind adding this "zero-copyable to `ListArray`" optimization
92        // is that we want to pass data to systems that expect Arrow data.
93        // The arrow specification only allows for `i32` and `i64` offset and sizes types, so in
94        // order to also make `ListView` zero-copyable to **Arrow**'s `ListArray` (not just Vortex's
95        // `ListArray`), we rebuild the offsets as 32-bit or 64-bit integer types.
96        // TODO(connor)[ListView]: This is true for `sizes` as well, we should do this conversion
97        // for sizes as well.
98        match_each_integer_ptype!(sizes_ptype, |S| {
99            match offsets_ptype {
100                PType::U8 => self.naive_rebuild::<u8, u32, S>(),
101                PType::U16 => self.naive_rebuild::<u16, u32, S>(),
102                PType::U32 => self.naive_rebuild::<u32, u32, S>(),
103                PType::U64 => self.naive_rebuild::<u64, u64, S>(),
104                PType::I8 => self.naive_rebuild::<i8, i32, S>(),
105                PType::I16 => self.naive_rebuild::<i16, i32, S>(),
106                PType::I32 => self.naive_rebuild::<i32, i32, S>(),
107                PType::I64 => self.naive_rebuild::<i64, i64, S>(),
108                _ => unreachable!("invalid offsets PType"),
109            }
110        })
111    }
112
113    /// Picks between [`rebuild_with_take`](Self::rebuild_with_take) and
114    /// [`rebuild_list_by_list`](Self::rebuild_list_by_list) based on element dtype and average
115    /// list size.
116    fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
117        &self,
118    ) -> VortexResult<ListViewArray> {
119        let sizes_canonical = self.sizes().to_primitive();
120        let total: u64 = sizes_canonical
121            .as_slice::<S>()
122            .iter()
123            .map(|s| (*s).as_() as u64)
124            .sum();
125        if Self::should_use_take(total, self.len()) {
126            self.rebuild_with_take::<O, NewOffset, S>()
127        } else {
128            self.rebuild_list_by_list::<O, NewOffset, S>()
129        }
130    }
131
132    /// Returns `true` when we are confident that `rebuild_with_take` will
133    /// outperform `rebuild_list_by_list`.
134    ///
135    /// Take is dramatically faster for small lists (often 10-100×) because it
136    /// avoids per-list builder overhead. LBL is the safer default for larger
137    /// lists since its sequential memcpy scales well. We only choose take when
138    /// the average list size is small enough that take clearly dominates.
139    fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
140        if num_lists == 0 {
141            return true;
142        }
143        let avg = total_output_elements / num_lists as u64;
144        avg < 128
145    }
146
147    /// Rebuilds elements using a single bulk `take`: collect all element indices into a flat
148    /// `BufferMut<u64>`, perform a single `take`.
149    fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
150        &self,
151    ) -> VortexResult<ListViewArray> {
152        let offsets_canonical = self.offsets().to_primitive();
153        let offsets_slice = offsets_canonical.as_slice::<O>();
154        let sizes_canonical = self.sizes().to_primitive();
155        let sizes_slice = sizes_canonical.as_slice::<S>();
156
157        let len = offsets_slice.len();
158
159        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
160        let mut new_sizes = BufferMut::<S>::with_capacity(len);
161        let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
162
163        let mut n_elements = NewOffset::zero();
164        for index in 0..len {
165            if !self.validity()?.is_valid(index)? {
166                new_offsets.push(n_elements);
167                new_sizes.push(S::zero());
168                continue;
169            }
170
171            let offset = offsets_slice[index];
172            let size = sizes_slice[index];
173            let start = offset.as_();
174            let stop = start + size.as_();
175
176            new_offsets.push(n_elements);
177            new_sizes.push(size);
178            take_indices.extend(start as u64..stop as u64);
179            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
180        }
181
182        let elements = self.elements().take(take_indices.into_array())?;
183        let offsets = new_offsets.into_array();
184        let sizes = new_sizes.into_array();
185
186        // SAFETY: same invariants as `rebuild_list_by_list` — offsets are sequential and
187        // non-overlapping, all (offset, size) pairs reference valid elements, and the validity
188        // array is preserved from the original.
189        Ok(unsafe {
190            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
191                .with_zero_copy_to_list(true)
192        })
193    }
194
195    /// Rebuilds elements list-by-list: canonicalize elements upfront, then for each list `slice`
196    /// the relevant range and `extend_from_array` into a typed builder.
197    fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
198        &self,
199    ) -> VortexResult<ListViewArray> {
200        let element_dtype = self
201            .dtype()
202            .as_list_element_opt()
203            .vortex_expect("somehow had a canonical list that was not a list");
204
205        let offsets_canonical = self.offsets().to_primitive();
206        let offsets_slice = offsets_canonical.as_slice::<O>();
207        let sizes_canonical = self.sizes().to_primitive();
208        let sizes_slice = sizes_canonical.as_slice::<S>();
209
210        let len = offsets_slice.len();
211
212        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
213        // TODO(connor)[ListView]: Do we really need to do this?
214        // The only reason we need to rebuild the sizes here is that the validity may indicate that
215        // a list is null even though it has a non-zero size. This rebuild will set the size of all
216        // null lists to 0.
217        let mut new_sizes = BufferMut::<S>::with_capacity(len);
218
219        // Canonicalize the elements up front as we will be slicing the elements quite a lot.
220        let elements_canonical = self
221            .elements()
222            .to_canonical()
223            .vortex_expect("canonicalize elements for rebuild")
224            .into_array();
225
226        // Note that we do not know what the exact capacity should be of the new elements since
227        // there could be overlaps in the existing `ListViewArray`.
228        let mut new_elements_builder =
229            builder_with_capacity(element_dtype.as_ref(), self.elements().len());
230
231        let mut n_elements = NewOffset::zero();
232        for index in 0..len {
233            if !self.validity()?.is_valid(index)? {
234                // For NULL lists, place them after the previous item's data to maintain the
235                // no-overlap invariant for zero-copy to `ListArray` arrays.
236                new_offsets.push(n_elements);
237                new_sizes.push(S::zero());
238                continue;
239            }
240
241            let offset = offsets_slice[index];
242            let size = sizes_slice[index];
243
244            let start = offset.as_();
245            let stop = start + size.as_();
246
247            new_offsets.push(n_elements);
248            new_sizes.push(size);
249            new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
250
251            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
252        }
253
254        let offsets = new_offsets.into_array();
255        let sizes = new_sizes.into_array();
256        let elements = new_elements_builder.finish();
257
258        debug_assert_eq!(
259            n_elements.as_(),
260            elements.len(),
261            "The accumulated elements somehow had the wrong length"
262        );
263
264        // SAFETY:
265        // - All offsets are sequential and non-overlapping (`n_elements` tracks running total).
266        // - Each `offset[i] + size[i]` equals `offset[i+1]` for all valid indices (including null
267        //   lists).
268        // - All elements referenced by (offset, size) pairs exist within the new `elements` array.
269        // - The validity array is preserved from the original array unchanged
270        // - The array satisfies the zero-copy-to-list property by having sorted offsets, no gaps,
271        //   and no overlaps.
272        Ok(unsafe {
273            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
274                .with_zero_copy_to_list(true)
275        })
276    }
277
278    /// Rebuilds a [`ListViewArray`] by trimming any unused / unreferenced leading and trailing
279    /// elements, which is defined as a contiguous run of values in the `elements` array that are
280    /// not referecened by any views in the corresponding [`ListViewArray`].
281    fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
282        let start = if self.is_zero_copy_to_list() {
283            // If offsets are sorted, then the minimum offset is the first offset.
284            // Note that even if the first view is null, offsets must always be valid, so it is
285            // completely fine for us to use this as a lower-bounded start of the `elements`.
286            self.offset_at(0)
287        } else {
288            self.offsets().statistics().compute_min().vortex_expect(
289                "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
290            )
291        };
292
293        let end = if self.is_zero_copy_to_list() {
294            // If offsets are sorted and there are no overlaps (views are always "increasing"), we
295            // can just grab the last offset and last size.
296            let last_offset = self.offset_at(self.len() - 1);
297            let last_size = self.size_at(self.len() - 1);
298            last_offset + last_size
299        } else {
300            // Cast offsets and sizes to the widest integer type to prevent
301            // overflow when computing offsets + sizes. The end offset may not
302            // fit in the integer width otherwise.
303            let wide_dtype = DType::from(if self.offsets().dtype().as_ptype().is_unsigned_int() {
304                PType::U64
305            } else {
306                PType::I64
307            });
308            let offsets = self.offsets().cast(wide_dtype.clone())?;
309            let sizes = self.sizes().cast(wide_dtype)?;
310
311            let mut ctx = LEGACY_SESSION.create_execution_ctx();
312            let min_max = min_max(
313                &offsets
314                    .binary(sizes, Operator::Add)
315                    .vortex_expect("`offsets + sizes` somehow overflowed"),
316                &mut ctx,
317            )
318            .vortex_expect("Something went wrong while computing min and max")
319            .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
320
321            min_max
322                .max
323                .as_primitive()
324                .as_::<usize>()
325                .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
326        };
327
328        let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
329            let offset = <O as FromPrimitive>::from_usize(start)
330                .vortex_expect("unable to convert the min offset `start` into a `usize`");
331            let scalar = Scalar::primitive(offset, Nullability::NonNullable);
332
333            self.offsets()
334                .clone()
335                .binary(
336                    ConstantArray::new(scalar, self.offsets().len()).into_array(),
337                    Operator::Sub,
338                )
339                .vortex_expect("was somehow unable to adjust offsets down by their minimum")
340        });
341
342        let sliced_elements = self.elements().slice(start..end)?;
343
344        // SAFETY: The only thing we changed was the elements (which we verify through mins and
345        // maxes that all adjusted offsets + sizes are within the correct bounds), so the parameters
346        // are valid. And if the original array was zero-copyable to list, trimming elements doesn't
347        // change that property.
348        Ok(unsafe {
349            ListViewArray::new_unchecked(
350                sliced_elements,
351                adjusted_offsets,
352                self.sizes().clone(),
353                self.validity()?,
354            )
355            .with_zero_copy_to_list(self.is_zero_copy_to_list())
356        })
357    }
358
359    fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
360        if self.is_zero_copy_to_list() {
361            self.rebuild_trim_elements()
362        } else {
363            // When we completely rebuild the `ListViewArray`, we get the benefit that we also trim
364            // any leading and trailing garbage data.
365            self.rebuild_zero_copy_to_list()
366        }
367    }
368}
369
370#[cfg(test)]
371#[allow(clippy::cast_possible_truncation)]
372mod tests {
373    use vortex_buffer::BitBuffer;
374    use vortex_error::VortexResult;
375
376    use super::ListViewRebuildMode;
377    use crate::IntoArray;
378    use crate::ToCanonical;
379    use crate::arrays::ListViewArray;
380    use crate::arrays::PrimitiveArray;
381    use crate::arrays::listview::ListViewArrayExt;
382    use crate::assert_arrays_eq;
383    use crate::dtype::Nullability;
384    use crate::validity::Validity;
385
386    #[test]
387    fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
388        // Create a list view with overlapping lists: [A, B, C]
389        // List 0: offset=0, size=3 -> [A, B, C]
390        // List 1: offset=1, size=2 -> [B, C] (overlaps with List 0)
391        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
392        let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
393        let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
394
395        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
396
397        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
398
399        // After flatten: elements should be [A, B, C, B, C] = [1, 2, 3, 2, 3]
400        // Lists should be sequential with no overlaps
401        assert_eq!(flattened.elements().len(), 5);
402
403        // Offsets should be sequential
404        assert_eq!(flattened.offset_at(0), 0);
405        assert_eq!(flattened.size_at(0), 3);
406        assert_eq!(flattened.offset_at(1), 3);
407        assert_eq!(flattened.size_at(1), 2);
408
409        // Verify the data is correct
410        assert_arrays_eq!(
411            flattened.list_elements_at(0).unwrap(),
412            PrimitiveArray::from_iter([1i32, 2, 3])
413        );
414
415        assert_arrays_eq!(
416            flattened.list_elements_at(1).unwrap(),
417            PrimitiveArray::from_iter([2i32, 3])
418        );
419        Ok(())
420    }
421
422    #[test]
423    fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
424        use crate::arrays::BoolArray;
425
426        // Create a nullable list view with a null list
427        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
428        let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
429        let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
430        let validity = Validity::Array(
431            BoolArray::new(
432                BitBuffer::from(vec![true, false, true]),
433                Validity::NonNullable,
434            )
435            .into_array(),
436        );
437
438        let listview = ListViewArray::new(elements, offsets, sizes, validity);
439
440        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
441
442        // Verify nullability is preserved
443        assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
444        assert!(flattened.validity()?.is_valid(0).unwrap());
445        assert!(!flattened.validity()?.is_valid(1).unwrap());
446        assert!(flattened.validity()?.is_valid(2).unwrap());
447
448        // Verify valid lists contain correct data
449        assert_arrays_eq!(
450            flattened.list_elements_at(0).unwrap(),
451            PrimitiveArray::from_iter([1i32, 2])
452        );
453
454        assert_arrays_eq!(
455            flattened.list_elements_at(2).unwrap(),
456            PrimitiveArray::from_iter([3i32])
457        );
458        Ok(())
459    }
460
461    #[test]
462    fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
463        // Test trimming both leading and trailing unused elements while preserving gaps in the
464        // middle.
465        // Elements: [_, _, A, B, _, C, D, _, _]
466        //            0  1  2  3  4  5  6  7  8
467        // List 0: offset=2, size=2 -> [A, B]
468        // List 1: offset=5, size=2 -> [C, D]
469        // Should trim to: [A, B, _, C, D] with adjusted offsets.
470        let elements =
471            PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
472        let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
473        let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
474
475        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
476
477        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
478
479        // After trimming: elements should be [A, B, _, C, D] = [1, 2, 97, 3, 4].
480        assert_eq!(trimmed.elements().len(), 5);
481
482        // Offsets should be adjusted: old offset 2 -> new offset 0, old offset 5 -> new offset 3.
483        assert_eq!(trimmed.offset_at(0), 0);
484        assert_eq!(trimmed.size_at(0), 2);
485        assert_eq!(trimmed.offset_at(1), 3);
486        assert_eq!(trimmed.size_at(1), 2);
487
488        // Verify the data is correct.
489        assert_arrays_eq!(
490            trimmed.list_elements_at(0).unwrap(),
491            PrimitiveArray::from_iter([1i32, 2])
492        );
493
494        assert_arrays_eq!(
495            trimmed.list_elements_at(1).unwrap(),
496            PrimitiveArray::from_iter([3i32, 4])
497        );
498
499        // Note that element at index 2 (97) is preserved as a gap.
500        let all_elements = trimmed.elements().to_primitive();
501        assert_eq!(all_elements.scalar_at(2).unwrap(), 97i32.into());
502        Ok(())
503    }
504
505    #[test]
506    fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
507        // Regression test for issue #5412
508        // Tests that zero-copy-to-list arrays with trailing NULLs correctly calculate
509        // offsets for NULL items to maintain no-overlap invariant
510
511        // Create a ListViewArray with trailing NULLs
512        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
513        let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
514        let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
515        let validity = Validity::from_iter(vec![true, true, false, false]);
516
517        let listview = ListViewArray::new(elements, offsets, sizes, validity);
518
519        // First rebuild to make it zero-copy-to-list
520        let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
521        assert!(rebuilt.is_zero_copy_to_list());
522
523        // Verify NULL items have correct offsets (should not reuse previous offsets)
524        // After rebuild: offsets should be [0, 2, 4, 4] for zero-copy-to-list
525        assert_eq!(rebuilt.offset_at(0), 0);
526        assert_eq!(rebuilt.offset_at(1), 2);
527        assert_eq!(rebuilt.offset_at(2), 4); // NULL should be at position 4
528        assert_eq!(rebuilt.offset_at(3), 4); // Second NULL also at position 4
529
530        // All sizes should be correct
531        assert_eq!(rebuilt.size_at(0), 2);
532        assert_eq!(rebuilt.size_at(1), 2);
533        assert_eq!(rebuilt.size_at(2), 0); // NULL has size 0
534        assert_eq!(rebuilt.size_at(3), 0); // NULL has size 0
535
536        // Now rebuild with MakeExact (which calls naive_rebuild then trim_elements)
537        // This should not panic (issue #5412)
538        let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
539
540        // Verify the result is still valid
541        assert!(exact.is_valid(0).unwrap());
542        assert!(exact.is_valid(1).unwrap());
543        assert!(!exact.is_valid(2).unwrap());
544        assert!(!exact.is_valid(3).unwrap());
545
546        // Verify data is preserved
547        assert_arrays_eq!(
548            exact.list_elements_at(0).unwrap(),
549            PrimitiveArray::from_iter([1i32, 2])
550        );
551
552        assert_arrays_eq!(
553            exact.list_elements_at(1).unwrap(),
554            PrimitiveArray::from_iter([3i32, 4])
555        );
556        Ok(())
557    }
558
559    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
560    /// u32 offsets exceed u16::MAX, so u16 sizes are widened to u32 for the add.
561    #[test]
562    fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
563        let mut elems = vec![0i32; 70_005];
564        elems[70_000] = 10;
565        elems[70_001] = 20;
566        elems[70_002] = 30;
567        elems[70_003] = 40;
568        let elements = PrimitiveArray::from_iter(elems).into_array();
569        let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
570        let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
571
572        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
573        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
574        assert_arrays_eq!(
575            trimmed.list_elements_at(1).unwrap(),
576            PrimitiveArray::from_iter([30i32, 40])
577        );
578        Ok(())
579    }
580
581    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
582    /// u32 sizes exceed u16::MAX, so u16 offsets are widened to u32 for the add.
583    #[test]
584    fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
585        let mut elems = vec![0i32; 70_001];
586        elems[3] = 30;
587        elems[4] = 40;
588        let elements = PrimitiveArray::from_iter(elems).into_array();
589        let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
590        let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
591
592        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
593        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
594        assert_arrays_eq!(
595            trimmed.list_elements_at(1).unwrap(),
596            PrimitiveArray::from_iter([30i32, 40])
597        );
598        Ok(())
599    }
600
601    // ── should_use_take heuristic tests ────────────────────────────────────
602
603    #[test]
604    fn heuristic_zero_lists_uses_take() {
605        assert!(ListViewArray::should_use_take(0, 0));
606    }
607
608    #[test]
609    fn heuristic_small_lists_use_take() {
610        // avg = 127 → take
611        assert!(ListViewArray::should_use_take(127_000, 1_000));
612        // avg = 128 → LBL
613        assert!(!ListViewArray::should_use_take(128_000, 1_000));
614    }
615
616    /// Regression test for <https://github.com/vortex-data/vortex/issues/6973>.
617    /// Both offsets and sizes are u8, and offset + size exceeds u8::MAX.
618    #[test]
619    fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
620        let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
621        let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
622        let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
623
624        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
625        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
626
627        // min(offsets) = 0, so nothing to trim; output should equal input.
628        assert_arrays_eq!(trimmed, listview);
629        Ok(())
630    }
631}