Skip to main content

vortex_array/arrays/listview/
rebuild.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::DynArray;
10use crate::IntoArray;
11use crate::ToCanonical;
12use crate::arrays::ConstantArray;
13use crate::arrays::ListViewArray;
14use crate::builders::builder_with_capacity;
15use crate::builtins::ArrayBuiltins;
16use crate::compute;
17use crate::dtype::IntegerPType;
18use crate::dtype::Nullability;
19use crate::match_each_integer_ptype;
20use crate::scalar::Scalar;
21use crate::scalar_fn::fns::operators::Operator;
22use crate::vtable::ValidityHelper;
23
24/// Modes for rebuilding a [`ListViewArray`].
25pub enum ListViewRebuildMode {
26    /// Removes all unused data and flattens out all list data, such that the array is zero-copyable
27    /// to a [`ListArray`].
28    ///
29    /// This mode will deduplicate all overlapping list views, such that the [`ListViewArray`] looks
30    /// like a [`ListArray`] but with an additional `sizes` array.
31    ///
32    /// [`ListArray`]: crate::arrays::ListArray
33    MakeZeroCopyToList,
34
35    /// Removes any leading or trailing elements that are unused / not referenced by any views in
36    /// the [`ListViewArray`].
37    TrimElements,
38
39    /// Equivalent to `MakeZeroCopyToList` plus `TrimElements`.
40    ///
41    /// This is useful when concatenating multiple [`ListViewArray`]s together to create a new
42    /// [`ListViewArray`] that is also zero-copy to a [`ListArray`].
43    ///
44    /// [`ListArray`]: crate::arrays::ListArray
45    MakeExact,
46
47    // TODO(connor)[ListView]: Implement some version of this.
48    /// Finds the shortest packing / overlapping of list elements.
49    ///
50    /// This problem is known to be NP-hard, so maybe when someone proves that P=NP we can implement
51    /// this algorithm (but in all seriousness there are many approximate algorithms that could
52    /// work well here).
53    OverlapCompression,
54}
55
56impl ListViewArray {
57    /// Rebuilds the [`ListViewArray`] according to the specified mode.
58    pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
59        if self.is_empty() {
60            return Ok(self.clone());
61        }
62
63        match mode {
64            ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
65            ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
66            ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
67            ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
68        }
69    }
70
71    /// Rebuilds a [`ListViewArray`], removing all data overlaps and creating a flattened layout.
72    ///
73    /// This is useful when the `elements` child array of the [`ListViewArray`] might have
74    /// overlapping, duplicate, and garbage data, and we want to have fully sequential data like
75    /// a [`ListArray`].
76    ///
77    /// [`ListArray`]: crate::arrays::ListArray
78    fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
79        if self.is_zero_copy_to_list() {
80            // Note that since everything in `ListViewArray` is `Arc`ed, this is quite cheap.
81            return Ok(self.clone());
82        }
83
84        let offsets_ptype = self.offsets().dtype().as_ptype();
85        let sizes_ptype = self.sizes().dtype().as_ptype();
86
87        // One of the main purposes behind adding this "zero-copyable to `ListArray`" optimization
88        // is that we want to pass data to systems that expect Arrow data.
89        // The arrow specification only allows for `i32` and `i64` offset and sizes types, so in
90        // order to also make `ListView` zero-copyable to **Arrow**'s `ListArray` (not just Vortex's
91        // `ListArray`), we rebuild the offsets as 32-bit or 64-bit integer types.
92        // TODO(connor)[ListView]: This is true for `sizes` as well, we should do this conversion
93        // for sizes as well.
94        match_each_integer_ptype!(sizes_ptype, |S| {
95            match offsets_ptype {
96                PType::U8 => self.naive_rebuild::<u8, u32, S>(),
97                PType::U16 => self.naive_rebuild::<u16, u32, S>(),
98                PType::U32 => self.naive_rebuild::<u32, u32, S>(),
99                PType::U64 => self.naive_rebuild::<u64, u64, S>(),
100                PType::I8 => self.naive_rebuild::<i8, i32, S>(),
101                PType::I16 => self.naive_rebuild::<i16, i32, S>(),
102                PType::I32 => self.naive_rebuild::<i32, i32, S>(),
103                PType::I64 => self.naive_rebuild::<i64, i64, S>(),
104                _ => unreachable!("invalid offsets PType"),
105            }
106        })
107    }
108
109    /// Picks between [`rebuild_with_take`](Self::rebuild_with_take) and
110    /// [`rebuild_list_by_list`](Self::rebuild_list_by_list) based on element dtype and average
111    /// list size.
112    fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
113        &self,
114    ) -> VortexResult<ListViewArray> {
115        let sizes_canonical = self.sizes().to_primitive();
116        let total: u64 = sizes_canonical
117            .as_slice::<S>()
118            .iter()
119            .map(|s| (*s).as_() as u64)
120            .sum();
121        if Self::should_use_take(total, self.len()) {
122            self.rebuild_with_take::<O, NewOffset, S>()
123        } else {
124            self.rebuild_list_by_list::<O, NewOffset, S>()
125        }
126    }
127
128    /// Returns `true` when we are confident that `rebuild_with_take` will
129    /// outperform `rebuild_list_by_list`.
130    ///
131    /// Take is dramatically faster for small lists (often 10-100×) because it
132    /// avoids per-list builder overhead. LBL is the safer default for larger
133    /// lists since its sequential memcpy scales well. We only choose take when
134    /// the average list size is small enough that take clearly dominates.
135    fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
136        if num_lists == 0 {
137            return true;
138        }
139        let avg = total_output_elements / num_lists as u64;
140        avg < 128
141    }
142
143    /// Rebuilds elements using a single bulk `take`: collect all element indices into a flat
144    /// `BufferMut<u64>`, perform a single `take`.
145    fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
146        &self,
147    ) -> VortexResult<ListViewArray> {
148        let offsets_canonical = self.offsets().to_primitive();
149        let offsets_slice = offsets_canonical.as_slice::<O>();
150        let sizes_canonical = self.sizes().to_primitive();
151        let sizes_slice = sizes_canonical.as_slice::<S>();
152
153        let len = offsets_slice.len();
154
155        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
156        let mut new_sizes = BufferMut::<S>::with_capacity(len);
157        let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
158
159        let mut n_elements = NewOffset::zero();
160        for index in 0..len {
161            if !self.is_valid(index)? {
162                new_offsets.push(n_elements);
163                new_sizes.push(S::zero());
164                continue;
165            }
166
167            let offset = offsets_slice[index];
168            let size = sizes_slice[index];
169            let start = offset.as_();
170            let stop = start + size.as_();
171
172            new_offsets.push(n_elements);
173            new_sizes.push(size);
174            take_indices.extend(start as u64..stop as u64);
175            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
176        }
177
178        let elements = self.elements().take(take_indices.into_array())?;
179        let offsets = new_offsets.into_array();
180        let sizes = new_sizes.into_array();
181
182        // SAFETY: same invariants as `rebuild_list_by_list` — offsets are sequential and
183        // non-overlapping, all (offset, size) pairs reference valid elements, and the validity
184        // array is preserved from the original.
185        Ok(unsafe {
186            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
187                .with_zero_copy_to_list(true)
188        })
189    }
190
191    /// Rebuilds elements list-by-list: canonicalize elements upfront, then for each list `slice`
192    /// the relevant range and `extend_from_array` into a typed builder.
193    fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
194        &self,
195    ) -> VortexResult<ListViewArray> {
196        let element_dtype = self
197            .dtype()
198            .as_list_element_opt()
199            .vortex_expect("somehow had a canonical list that was not a list");
200
201        let offsets_canonical = self.offsets().to_primitive();
202        let offsets_slice = offsets_canonical.as_slice::<O>();
203        let sizes_canonical = self.sizes().to_primitive();
204        let sizes_slice = sizes_canonical.as_slice::<S>();
205
206        let len = offsets_slice.len();
207
208        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
209        // TODO(connor)[ListView]: Do we really need to do this?
210        // The only reason we need to rebuild the sizes here is that the validity may indicate that
211        // a list is null even though it has a non-zero size. This rebuild will set the size of all
212        // null lists to 0.
213        let mut new_sizes = BufferMut::<S>::with_capacity(len);
214
215        // Canonicalize the elements up front as we will be slicing the elements quite a lot.
216        let elements_canonical = self
217            .elements()
218            .to_canonical()
219            .vortex_expect("canonicalize elements for rebuild")
220            .into_array();
221
222        // Note that we do not know what the exact capacity should be of the new elements since
223        // there could be overlaps in the existing `ListViewArray`.
224        let mut new_elements_builder =
225            builder_with_capacity(element_dtype.as_ref(), self.elements().len());
226
227        let mut n_elements = NewOffset::zero();
228        for index in 0..len {
229            if !self.is_valid(index)? {
230                // For NULL lists, place them after the previous item's data to maintain the
231                // no-overlap invariant for zero-copy to `ListArray` arrays.
232                new_offsets.push(n_elements);
233                new_sizes.push(S::zero());
234                continue;
235            }
236
237            let offset = offsets_slice[index];
238            let size = sizes_slice[index];
239
240            let start = offset.as_();
241            let stop = start + size.as_();
242
243            new_offsets.push(n_elements);
244            new_sizes.push(size);
245            new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
246
247            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
248        }
249
250        let offsets = new_offsets.into_array();
251        let sizes = new_sizes.into_array();
252        let elements = new_elements_builder.finish();
253
254        debug_assert_eq!(
255            n_elements.as_(),
256            elements.len(),
257            "The accumulated elements somehow had the wrong length"
258        );
259
260        // SAFETY:
261        // - All offsets are sequential and non-overlapping (`n_elements` tracks running total).
262        // - Each `offset[i] + size[i]` equals `offset[i+1]` for all valid indices (including null
263        //   lists).
264        // - All elements referenced by (offset, size) pairs exist within the new `elements` array.
265        // - The validity array is preserved from the original array unchanged
266        // - The array satisfies the zero-copy-to-list property by having sorted offsets, no gaps,
267        //   and no overlaps.
268        Ok(unsafe {
269            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
270                .with_zero_copy_to_list(true)
271        })
272    }
273
274    /// Rebuilds a [`ListViewArray`] by trimming any unused / unreferenced leading and trailing
275    /// elements, which is defined as a contiguous run of values in the `elements` array that are
276    /// not referecened by any views in the corresponding [`ListViewArray`].
277    fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
278        let start = if self.is_zero_copy_to_list() {
279            // If offsets are sorted, then the minimum offset is the first offset.
280            // Note that even if the first view is null, offsets must always be valid, so it is
281            // completely fine for us to use this as a lower-bounded start of the `elements`.
282            self.offset_at(0)
283        } else {
284            self.offsets().statistics().compute_min().vortex_expect(
285                "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
286            )
287        };
288
289        let end = if self.is_zero_copy_to_list() {
290            // If offsets are sorted and there are no overlaps (views are always "increasing"), we
291            // can just grab the last offset and last size.
292            let last_offset = self.offset_at(self.len() - 1);
293            let last_size = self.size_at(self.len() - 1);
294            last_offset + last_size
295        } else {
296            // Offsets and sizes can have different primitive types (e.g. u32 vs u16).
297            // Cast the narrower to the wider since arithmetic requires identical operand types.
298            let (offsets, sizes) = if self.offsets().dtype().as_ptype().byte_width()
299                >= self.sizes().dtype().as_ptype().byte_width()
300            {
301                (
302                    self.offsets().clone(),
303                    self.sizes().cast(self.offsets().dtype().clone())?,
304                )
305            } else {
306                (
307                    self.offsets().cast(self.sizes().dtype().clone())?,
308                    self.sizes().clone(),
309                )
310            };
311
312            let min_max = compute::min_max(
313                &offsets
314                    .binary(sizes, Operator::Add)
315                    .vortex_expect("`offsets + sizes` somehow overflowed"),
316            )
317            .vortex_expect("Something went wrong while computing min and max")
318            .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
319
320            min_max
321                .max
322                .as_primitive()
323                .as_::<usize>()
324                .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
325        };
326
327        let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
328            let offset = <O as FromPrimitive>::from_usize(start)
329                .vortex_expect("unable to convert the min offset `start` into a `usize`");
330            let scalar = Scalar::primitive(offset, Nullability::NonNullable);
331
332            self.offsets()
333                .to_array()
334                .binary(
335                    ConstantArray::new(scalar, self.offsets().len()).into_array(),
336                    Operator::Sub,
337                )
338                .vortex_expect("was somehow unable to adjust offsets down by their minimum")
339        });
340
341        let sliced_elements = self.elements().slice(start..end)?;
342
343        // SAFETY: The only thing we changed was the elements (which we verify through mins and
344        // maxes that all adjusted offsets + sizes are within the correct bounds), so the parameters
345        // are valid. And if the original array was zero-copyable to list, trimming elements doesn't
346        // change that property.
347        Ok(unsafe {
348            ListViewArray::new_unchecked(
349                sliced_elements,
350                adjusted_offsets,
351                self.sizes().clone(),
352                self.validity().clone(),
353            )
354            .with_zero_copy_to_list(self.is_zero_copy_to_list())
355        })
356    }
357
358    fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
359        if self.is_zero_copy_to_list() {
360            self.rebuild_trim_elements()
361        } else {
362            // When we completely rebuild the `ListViewArray`, we get the benefit that we also trim
363            // any leading and trailing garbage data.
364            self.rebuild_zero_copy_to_list()
365        }
366    }
367}
368
369#[cfg(test)]
370#[allow(clippy::cast_possible_truncation)]
371mod tests {
372    use vortex_buffer::BitBuffer;
373    use vortex_error::VortexResult;
374
375    use super::ListViewRebuildMode;
376    use crate::IntoArray;
377    use crate::ToCanonical;
378    use crate::arrays::ListViewArray;
379    use crate::arrays::PrimitiveArray;
380    use crate::assert_arrays_eq;
381    use crate::dtype::Nullability;
382    use crate::validity::Validity;
383    use crate::vtable::ValidityHelper;
384
385    #[test]
386    fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
387        // Create a list view with overlapping lists: [A, B, C]
388        // List 0: offset=0, size=3 -> [A, B, C]
389        // List 1: offset=1, size=2 -> [B, C] (overlaps with List 0)
390        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
391        let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
392        let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
393
394        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
395
396        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
397
398        // After flatten: elements should be [A, B, C, B, C] = [1, 2, 3, 2, 3]
399        // Lists should be sequential with no overlaps
400        assert_eq!(flattened.elements().len(), 5);
401
402        // Offsets should be sequential
403        assert_eq!(flattened.offset_at(0), 0);
404        assert_eq!(flattened.size_at(0), 3);
405        assert_eq!(flattened.offset_at(1), 3);
406        assert_eq!(flattened.size_at(1), 2);
407
408        // Verify the data is correct
409        assert_arrays_eq!(
410            flattened.list_elements_at(0).unwrap(),
411            PrimitiveArray::from_iter([1i32, 2, 3])
412        );
413
414        assert_arrays_eq!(
415            flattened.list_elements_at(1).unwrap(),
416            PrimitiveArray::from_iter([2i32, 3])
417        );
418        Ok(())
419    }
420
421    #[test]
422    fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
423        use crate::arrays::BoolArray;
424
425        // Create a nullable list view with a null list
426        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
427        let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
428        let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
429        let validity = Validity::Array(
430            BoolArray::new(
431                BitBuffer::from(vec![true, false, true]),
432                Validity::NonNullable,
433            )
434            .into_array(),
435        );
436
437        let listview = ListViewArray::new(elements, offsets, sizes, validity);
438
439        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
440
441        // Verify nullability is preserved
442        assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
443        assert!(flattened.validity().is_valid(0).unwrap());
444        assert!(!flattened.validity().is_valid(1).unwrap());
445        assert!(flattened.validity().is_valid(2).unwrap());
446
447        // Verify valid lists contain correct data
448        assert_arrays_eq!(
449            flattened.list_elements_at(0).unwrap(),
450            PrimitiveArray::from_iter([1i32, 2])
451        );
452
453        assert_arrays_eq!(
454            flattened.list_elements_at(2).unwrap(),
455            PrimitiveArray::from_iter([3i32])
456        );
457        Ok(())
458    }
459
460    #[test]
461    fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
462        // Test trimming both leading and trailing unused elements while preserving gaps in the
463        // middle.
464        // Elements: [_, _, A, B, _, C, D, _, _]
465        //            0  1  2  3  4  5  6  7  8
466        // List 0: offset=2, size=2 -> [A, B]
467        // List 1: offset=5, size=2 -> [C, D]
468        // Should trim to: [A, B, _, C, D] with adjusted offsets.
469        let elements =
470            PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
471        let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
472        let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
473
474        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
475
476        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
477
478        // After trimming: elements should be [A, B, _, C, D] = [1, 2, 97, 3, 4].
479        assert_eq!(trimmed.elements().len(), 5);
480
481        // Offsets should be adjusted: old offset 2 -> new offset 0, old offset 5 -> new offset 3.
482        assert_eq!(trimmed.offset_at(0), 0);
483        assert_eq!(trimmed.size_at(0), 2);
484        assert_eq!(trimmed.offset_at(1), 3);
485        assert_eq!(trimmed.size_at(1), 2);
486
487        // Verify the data is correct.
488        assert_arrays_eq!(
489            trimmed.list_elements_at(0).unwrap(),
490            PrimitiveArray::from_iter([1i32, 2])
491        );
492
493        assert_arrays_eq!(
494            trimmed.list_elements_at(1).unwrap(),
495            PrimitiveArray::from_iter([3i32, 4])
496        );
497
498        // Note that element at index 2 (97) is preserved as a gap.
499        let all_elements = trimmed.elements().to_primitive();
500        assert_eq!(all_elements.scalar_at(2).unwrap(), 97i32.into());
501        Ok(())
502    }
503
504    #[test]
505    fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
506        // Regression test for issue #5412
507        // Tests that zero-copy-to-list arrays with trailing NULLs correctly calculate
508        // offsets for NULL items to maintain no-overlap invariant
509
510        // Create a ListViewArray with trailing NULLs
511        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
512        let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
513        let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
514        let validity = Validity::from_iter(vec![true, true, false, false]);
515
516        let listview = ListViewArray::new(elements, offsets, sizes, validity);
517
518        // First rebuild to make it zero-copy-to-list
519        let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
520        assert!(rebuilt.is_zero_copy_to_list());
521
522        // Verify NULL items have correct offsets (should not reuse previous offsets)
523        // After rebuild: offsets should be [0, 2, 4, 4] for zero-copy-to-list
524        assert_eq!(rebuilt.offset_at(0), 0);
525        assert_eq!(rebuilt.offset_at(1), 2);
526        assert_eq!(rebuilt.offset_at(2), 4); // NULL should be at position 4
527        assert_eq!(rebuilt.offset_at(3), 4); // Second NULL also at position 4
528
529        // All sizes should be correct
530        assert_eq!(rebuilt.size_at(0), 2);
531        assert_eq!(rebuilt.size_at(1), 2);
532        assert_eq!(rebuilt.size_at(2), 0); // NULL has size 0
533        assert_eq!(rebuilt.size_at(3), 0); // NULL has size 0
534
535        // Now rebuild with MakeExact (which calls naive_rebuild then trim_elements)
536        // This should not panic (issue #5412)
537        let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
538
539        // Verify the result is still valid
540        assert!(exact.is_valid(0).unwrap());
541        assert!(exact.is_valid(1).unwrap());
542        assert!(!exact.is_valid(2).unwrap());
543        assert!(!exact.is_valid(3).unwrap());
544
545        // Verify data is preserved
546        assert_arrays_eq!(
547            exact.list_elements_at(0).unwrap(),
548            PrimitiveArray::from_iter([1i32, 2])
549        );
550
551        assert_arrays_eq!(
552            exact.list_elements_at(1).unwrap(),
553            PrimitiveArray::from_iter([3i32, 4])
554        );
555        Ok(())
556    }
557
558    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
559    /// u32 offsets exceed u16::MAX, so u16 sizes are widened to u32 for the add.
560    #[test]
561    fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
562        let mut elems = vec![0i32; 70_005];
563        elems[70_000] = 10;
564        elems[70_001] = 20;
565        elems[70_002] = 30;
566        elems[70_003] = 40;
567        let elements = PrimitiveArray::from_iter(elems).into_array();
568        let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
569        let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
570
571        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
572        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
573        assert_arrays_eq!(
574            trimmed.list_elements_at(1).unwrap(),
575            PrimitiveArray::from_iter([30i32, 40])
576        );
577        Ok(())
578    }
579
580    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
581    /// u32 sizes exceed u16::MAX, so u16 offsets are widened to u32 for the add.
582    #[test]
583    fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
584        let mut elems = vec![0i32; 70_001];
585        elems[3] = 30;
586        elems[4] = 40;
587        let elements = PrimitiveArray::from_iter(elems).into_array();
588        let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
589        let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
590
591        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
592        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
593        assert_arrays_eq!(
594            trimmed.list_elements_at(1).unwrap(),
595            PrimitiveArray::from_iter([30i32, 40])
596        );
597        Ok(())
598    }
599
600    // ── should_use_take heuristic tests ────────────────────────────────────
601
602    #[test]
603    fn heuristic_zero_lists_uses_take() {
604        assert!(ListViewArray::should_use_take(0, 0));
605    }
606
607    #[test]
608    fn heuristic_small_lists_use_take() {
609        // avg = 127 → take
610        assert!(ListViewArray::should_use_take(127_000, 1_000));
611        // avg = 128 → LBL
612        assert!(!ListViewArray::should_use_take(128_000, 1_000));
613    }
614}