Skip to main content

vortex_array/arrays/listview/
rebuild.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::DynArray;
10use crate::IntoArray;
11use crate::ToCanonical;
12use crate::arrays::ConstantArray;
13use crate::arrays::ListViewArray;
14use crate::builders::builder_with_capacity;
15use crate::builtins::ArrayBuiltins;
16use crate::compute;
17use crate::dtype::IntegerPType;
18use crate::dtype::Nullability;
19use crate::match_each_integer_ptype;
20use crate::scalar::Scalar;
21use crate::scalar_fn::fns::operators::Operator;
22use crate::vtable::ValidityHelper;
23
24/// Modes for rebuilding a [`ListViewArray`].
25pub enum ListViewRebuildMode {
26    /// Removes all unused data and flattens out all list data, such that the array is zero-copyable
27    /// to a [`ListArray`].
28    ///
29    /// This mode will deduplicate all overlapping list views, such that the [`ListViewArray`] looks
30    /// like a [`ListArray`] but with an additional `sizes` array.
31    ///
32    /// [`ListArray`]: crate::arrays::ListArray
33    MakeZeroCopyToList,
34
35    /// Removes any leading or trailing elements that are unused / not referenced by any views in
36    /// the [`ListViewArray`].
37    TrimElements,
38
39    /// Equivalent to `MakeZeroCopyToList` plus `TrimElements`.
40    ///
41    /// This is useful when concatenating multiple [`ListViewArray`]s together to create a new
42    /// [`ListViewArray`] that is also zero-copy to a [`ListArray`].
43    ///
44    /// [`ListArray`]: crate::arrays::ListArray
45    MakeExact,
46
47    // TODO(connor)[ListView]: Implement some version of this.
48    /// Finds the shortest packing / overlapping of list elements.
49    ///
50    /// This problem is known to be NP-hard, so maybe when someone proves that P=NP we can implement
51    /// this algorithm (but in all seriousness there are many approximate algorithms that could
52    /// work well here).
53    OverlapCompression,
54}
55
56impl ListViewArray {
57    /// Rebuilds the [`ListViewArray`] according to the specified mode.
58    pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
59        if self.is_empty() {
60            // SAFETY: An empty array is trivially zero-copyable to a `ListArray`.
61            return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
62        }
63
64        match mode {
65            ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
66            ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
67            ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
68            ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
69        }
70    }
71
72    /// Rebuilds a [`ListViewArray`], removing all data overlaps and creating a flattened layout.
73    ///
74    /// This is useful when the `elements` child array of the [`ListViewArray`] might have
75    /// overlapping, duplicate, and garbage data, and we want to have fully sequential data like
76    /// a [`ListArray`].
77    ///
78    /// [`ListArray`]: crate::arrays::ListArray
79    fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
80        if self.is_zero_copy_to_list() {
81            // Note that since everything in `ListViewArray` is `Arc`ed, this is quite cheap.
82            return Ok(self.clone());
83        }
84
85        let offsets_ptype = self.offsets().dtype().as_ptype();
86        let sizes_ptype = self.sizes().dtype().as_ptype();
87
88        // One of the main purposes behind adding this "zero-copyable to `ListArray`" optimization
89        // is that we want to pass data to systems that expect Arrow data.
90        // The arrow specification only allows for `i32` and `i64` offset and sizes types, so in
91        // order to also make `ListView` zero-copyable to **Arrow**'s `ListArray` (not just Vortex's
92        // `ListArray`), we rebuild the offsets as 32-bit or 64-bit integer types.
93        // TODO(connor)[ListView]: This is true for `sizes` as well, we should do this conversion
94        // for sizes as well.
95        match_each_integer_ptype!(sizes_ptype, |S| {
96            match offsets_ptype {
97                PType::U8 => self.naive_rebuild::<u8, u32, S>(),
98                PType::U16 => self.naive_rebuild::<u16, u32, S>(),
99                PType::U32 => self.naive_rebuild::<u32, u32, S>(),
100                PType::U64 => self.naive_rebuild::<u64, u64, S>(),
101                PType::I8 => self.naive_rebuild::<i8, i32, S>(),
102                PType::I16 => self.naive_rebuild::<i16, i32, S>(),
103                PType::I32 => self.naive_rebuild::<i32, i32, S>(),
104                PType::I64 => self.naive_rebuild::<i64, i64, S>(),
105                _ => unreachable!("invalid offsets PType"),
106            }
107        })
108    }
109
110    /// Picks between [`rebuild_with_take`](Self::rebuild_with_take) and
111    /// [`rebuild_list_by_list`](Self::rebuild_list_by_list) based on element dtype and average
112    /// list size.
113    fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
114        &self,
115    ) -> VortexResult<ListViewArray> {
116        let sizes_canonical = self.sizes().to_primitive();
117        let total: u64 = sizes_canonical
118            .as_slice::<S>()
119            .iter()
120            .map(|s| (*s).as_() as u64)
121            .sum();
122        if Self::should_use_take(total, self.len()) {
123            self.rebuild_with_take::<O, NewOffset, S>()
124        } else {
125            self.rebuild_list_by_list::<O, NewOffset, S>()
126        }
127    }
128
129    /// Returns `true` when we are confident that `rebuild_with_take` will
130    /// outperform `rebuild_list_by_list`.
131    ///
132    /// Take is dramatically faster for small lists (often 10-100×) because it
133    /// avoids per-list builder overhead. LBL is the safer default for larger
134    /// lists since its sequential memcpy scales well. We only choose take when
135    /// the average list size is small enough that take clearly dominates.
136    fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
137        if num_lists == 0 {
138            return true;
139        }
140        let avg = total_output_elements / num_lists as u64;
141        avg < 128
142    }
143
144    /// Rebuilds elements using a single bulk `take`: collect all element indices into a flat
145    /// `BufferMut<u64>`, perform a single `take`.
146    fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
147        &self,
148    ) -> VortexResult<ListViewArray> {
149        let offsets_canonical = self.offsets().to_primitive();
150        let offsets_slice = offsets_canonical.as_slice::<O>();
151        let sizes_canonical = self.sizes().to_primitive();
152        let sizes_slice = sizes_canonical.as_slice::<S>();
153
154        let len = offsets_slice.len();
155
156        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
157        let mut new_sizes = BufferMut::<S>::with_capacity(len);
158        let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
159
160        let mut n_elements = NewOffset::zero();
161        for index in 0..len {
162            if !self.is_valid(index)? {
163                new_offsets.push(n_elements);
164                new_sizes.push(S::zero());
165                continue;
166            }
167
168            let offset = offsets_slice[index];
169            let size = sizes_slice[index];
170            let start = offset.as_();
171            let stop = start + size.as_();
172
173            new_offsets.push(n_elements);
174            new_sizes.push(size);
175            take_indices.extend(start as u64..stop as u64);
176            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
177        }
178
179        let elements = self.elements().take(take_indices.into_array())?;
180        let offsets = new_offsets.into_array();
181        let sizes = new_sizes.into_array();
182
183        // SAFETY: same invariants as `rebuild_list_by_list` — offsets are sequential and
184        // non-overlapping, all (offset, size) pairs reference valid elements, and the validity
185        // array is preserved from the original.
186        Ok(unsafe {
187            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
188                .with_zero_copy_to_list(true)
189        })
190    }
191
192    /// Rebuilds elements list-by-list: canonicalize elements upfront, then for each list `slice`
193    /// the relevant range and `extend_from_array` into a typed builder.
194    fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
195        &self,
196    ) -> VortexResult<ListViewArray> {
197        let element_dtype = self
198            .dtype()
199            .as_list_element_opt()
200            .vortex_expect("somehow had a canonical list that was not a list");
201
202        let offsets_canonical = self.offsets().to_primitive();
203        let offsets_slice = offsets_canonical.as_slice::<O>();
204        let sizes_canonical = self.sizes().to_primitive();
205        let sizes_slice = sizes_canonical.as_slice::<S>();
206
207        let len = offsets_slice.len();
208
209        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
210        // TODO(connor)[ListView]: Do we really need to do this?
211        // The only reason we need to rebuild the sizes here is that the validity may indicate that
212        // a list is null even though it has a non-zero size. This rebuild will set the size of all
213        // null lists to 0.
214        let mut new_sizes = BufferMut::<S>::with_capacity(len);
215
216        // Canonicalize the elements up front as we will be slicing the elements quite a lot.
217        let elements_canonical = self
218            .elements()
219            .to_canonical()
220            .vortex_expect("canonicalize elements for rebuild")
221            .into_array();
222
223        // Note that we do not know what the exact capacity should be of the new elements since
224        // there could be overlaps in the existing `ListViewArray`.
225        let mut new_elements_builder =
226            builder_with_capacity(element_dtype.as_ref(), self.elements().len());
227
228        let mut n_elements = NewOffset::zero();
229        for index in 0..len {
230            if !self.is_valid(index)? {
231                // For NULL lists, place them after the previous item's data to maintain the
232                // no-overlap invariant for zero-copy to `ListArray` arrays.
233                new_offsets.push(n_elements);
234                new_sizes.push(S::zero());
235                continue;
236            }
237
238            let offset = offsets_slice[index];
239            let size = sizes_slice[index];
240
241            let start = offset.as_();
242            let stop = start + size.as_();
243
244            new_offsets.push(n_elements);
245            new_sizes.push(size);
246            new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
247
248            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
249        }
250
251        let offsets = new_offsets.into_array();
252        let sizes = new_sizes.into_array();
253        let elements = new_elements_builder.finish();
254
255        debug_assert_eq!(
256            n_elements.as_(),
257            elements.len(),
258            "The accumulated elements somehow had the wrong length"
259        );
260
261        // SAFETY:
262        // - All offsets are sequential and non-overlapping (`n_elements` tracks running total).
263        // - Each `offset[i] + size[i]` equals `offset[i+1]` for all valid indices (including null
264        //   lists).
265        // - All elements referenced by (offset, size) pairs exist within the new `elements` array.
266        // - The validity array is preserved from the original array unchanged
267        // - The array satisfies the zero-copy-to-list property by having sorted offsets, no gaps,
268        //   and no overlaps.
269        Ok(unsafe {
270            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity.clone())
271                .with_zero_copy_to_list(true)
272        })
273    }
274
275    /// Rebuilds a [`ListViewArray`] by trimming any unused / unreferenced leading and trailing
276    /// elements, which is defined as a contiguous run of values in the `elements` array that are
277    /// not referecened by any views in the corresponding [`ListViewArray`].
278    fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
279        let start = if self.is_zero_copy_to_list() {
280            // If offsets are sorted, then the minimum offset is the first offset.
281            // Note that even if the first view is null, offsets must always be valid, so it is
282            // completely fine for us to use this as a lower-bounded start of the `elements`.
283            self.offset_at(0)
284        } else {
285            self.offsets().statistics().compute_min().vortex_expect(
286                "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
287            )
288        };
289
290        let end = if self.is_zero_copy_to_list() {
291            // If offsets are sorted and there are no overlaps (views are always "increasing"), we
292            // can just grab the last offset and last size.
293            let last_offset = self.offset_at(self.len() - 1);
294            let last_size = self.size_at(self.len() - 1);
295            last_offset + last_size
296        } else {
297            // Offsets and sizes can have different primitive types (e.g. u32 vs u16).
298            // Cast the narrower to the wider since arithmetic requires identical operand types.
299            let (offsets, sizes) = if self.offsets().dtype().as_ptype().byte_width()
300                >= self.sizes().dtype().as_ptype().byte_width()
301            {
302                (
303                    self.offsets().clone(),
304                    self.sizes().cast(self.offsets().dtype().clone())?,
305                )
306            } else {
307                (
308                    self.offsets().cast(self.sizes().dtype().clone())?,
309                    self.sizes().clone(),
310                )
311            };
312
313            let min_max = compute::min_max(
314                &offsets
315                    .binary(sizes, Operator::Add)
316                    .vortex_expect("`offsets + sizes` somehow overflowed"),
317            )
318            .vortex_expect("Something went wrong while computing min and max")
319            .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
320
321            min_max
322                .max
323                .as_primitive()
324                .as_::<usize>()
325                .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
326        };
327
328        let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
329            let offset = <O as FromPrimitive>::from_usize(start)
330                .vortex_expect("unable to convert the min offset `start` into a `usize`");
331            let scalar = Scalar::primitive(offset, Nullability::NonNullable);
332
333            self.offsets()
334                .to_array()
335                .binary(
336                    ConstantArray::new(scalar, self.offsets().len()).into_array(),
337                    Operator::Sub,
338                )
339                .vortex_expect("was somehow unable to adjust offsets down by their minimum")
340        });
341
342        let sliced_elements = self.elements().slice(start..end)?;
343
344        // SAFETY: The only thing we changed was the elements (which we verify through mins and
345        // maxes that all adjusted offsets + sizes are within the correct bounds), so the parameters
346        // are valid. And if the original array was zero-copyable to list, trimming elements doesn't
347        // change that property.
348        Ok(unsafe {
349            ListViewArray::new_unchecked(
350                sliced_elements,
351                adjusted_offsets,
352                self.sizes().clone(),
353                self.validity().clone(),
354            )
355            .with_zero_copy_to_list(self.is_zero_copy_to_list())
356        })
357    }
358
359    fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
360        if self.is_zero_copy_to_list() {
361            self.rebuild_trim_elements()
362        } else {
363            // When we completely rebuild the `ListViewArray`, we get the benefit that we also trim
364            // any leading and trailing garbage data.
365            self.rebuild_zero_copy_to_list()
366        }
367    }
368}
369
370#[cfg(test)]
371#[allow(clippy::cast_possible_truncation)]
372mod tests {
373    use vortex_buffer::BitBuffer;
374    use vortex_error::VortexResult;
375
376    use super::ListViewRebuildMode;
377    use crate::IntoArray;
378    use crate::ToCanonical;
379    use crate::arrays::ListViewArray;
380    use crate::arrays::PrimitiveArray;
381    use crate::assert_arrays_eq;
382    use crate::dtype::Nullability;
383    use crate::validity::Validity;
384    use crate::vtable::ValidityHelper;
385
386    #[test]
387    fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
388        // Create a list view with overlapping lists: [A, B, C]
389        // List 0: offset=0, size=3 -> [A, B, C]
390        // List 1: offset=1, size=2 -> [B, C] (overlaps with List 0)
391        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
392        let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
393        let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
394
395        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
396
397        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
398
399        // After flatten: elements should be [A, B, C, B, C] = [1, 2, 3, 2, 3]
400        // Lists should be sequential with no overlaps
401        assert_eq!(flattened.elements().len(), 5);
402
403        // Offsets should be sequential
404        assert_eq!(flattened.offset_at(0), 0);
405        assert_eq!(flattened.size_at(0), 3);
406        assert_eq!(flattened.offset_at(1), 3);
407        assert_eq!(flattened.size_at(1), 2);
408
409        // Verify the data is correct
410        assert_arrays_eq!(
411            flattened.list_elements_at(0).unwrap(),
412            PrimitiveArray::from_iter([1i32, 2, 3])
413        );
414
415        assert_arrays_eq!(
416            flattened.list_elements_at(1).unwrap(),
417            PrimitiveArray::from_iter([2i32, 3])
418        );
419        Ok(())
420    }
421
422    #[test]
423    fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
424        use crate::arrays::BoolArray;
425
426        // Create a nullable list view with a null list
427        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
428        let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
429        let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
430        let validity = Validity::Array(
431            BoolArray::new(
432                BitBuffer::from(vec![true, false, true]),
433                Validity::NonNullable,
434            )
435            .into_array(),
436        );
437
438        let listview = ListViewArray::new(elements, offsets, sizes, validity);
439
440        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
441
442        // Verify nullability is preserved
443        assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
444        assert!(flattened.validity().is_valid(0).unwrap());
445        assert!(!flattened.validity().is_valid(1).unwrap());
446        assert!(flattened.validity().is_valid(2).unwrap());
447
448        // Verify valid lists contain correct data
449        assert_arrays_eq!(
450            flattened.list_elements_at(0).unwrap(),
451            PrimitiveArray::from_iter([1i32, 2])
452        );
453
454        assert_arrays_eq!(
455            flattened.list_elements_at(2).unwrap(),
456            PrimitiveArray::from_iter([3i32])
457        );
458        Ok(())
459    }
460
461    #[test]
462    fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
463        // Test trimming both leading and trailing unused elements while preserving gaps in the
464        // middle.
465        // Elements: [_, _, A, B, _, C, D, _, _]
466        //            0  1  2  3  4  5  6  7  8
467        // List 0: offset=2, size=2 -> [A, B]
468        // List 1: offset=5, size=2 -> [C, D]
469        // Should trim to: [A, B, _, C, D] with adjusted offsets.
470        let elements =
471            PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
472        let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
473        let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
474
475        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
476
477        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
478
479        // After trimming: elements should be [A, B, _, C, D] = [1, 2, 97, 3, 4].
480        assert_eq!(trimmed.elements().len(), 5);
481
482        // Offsets should be adjusted: old offset 2 -> new offset 0, old offset 5 -> new offset 3.
483        assert_eq!(trimmed.offset_at(0), 0);
484        assert_eq!(trimmed.size_at(0), 2);
485        assert_eq!(trimmed.offset_at(1), 3);
486        assert_eq!(trimmed.size_at(1), 2);
487
488        // Verify the data is correct.
489        assert_arrays_eq!(
490            trimmed.list_elements_at(0).unwrap(),
491            PrimitiveArray::from_iter([1i32, 2])
492        );
493
494        assert_arrays_eq!(
495            trimmed.list_elements_at(1).unwrap(),
496            PrimitiveArray::from_iter([3i32, 4])
497        );
498
499        // Note that element at index 2 (97) is preserved as a gap.
500        let all_elements = trimmed.elements().to_primitive();
501        assert_eq!(all_elements.scalar_at(2).unwrap(), 97i32.into());
502        Ok(())
503    }
504
505    #[test]
506    fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
507        // Regression test for issue #5412
508        // Tests that zero-copy-to-list arrays with trailing NULLs correctly calculate
509        // offsets for NULL items to maintain no-overlap invariant
510
511        // Create a ListViewArray with trailing NULLs
512        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
513        let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
514        let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
515        let validity = Validity::from_iter(vec![true, true, false, false]);
516
517        let listview = ListViewArray::new(elements, offsets, sizes, validity);
518
519        // First rebuild to make it zero-copy-to-list
520        let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
521        assert!(rebuilt.is_zero_copy_to_list());
522
523        // Verify NULL items have correct offsets (should not reuse previous offsets)
524        // After rebuild: offsets should be [0, 2, 4, 4] for zero-copy-to-list
525        assert_eq!(rebuilt.offset_at(0), 0);
526        assert_eq!(rebuilt.offset_at(1), 2);
527        assert_eq!(rebuilt.offset_at(2), 4); // NULL should be at position 4
528        assert_eq!(rebuilt.offset_at(3), 4); // Second NULL also at position 4
529
530        // All sizes should be correct
531        assert_eq!(rebuilt.size_at(0), 2);
532        assert_eq!(rebuilt.size_at(1), 2);
533        assert_eq!(rebuilt.size_at(2), 0); // NULL has size 0
534        assert_eq!(rebuilt.size_at(3), 0); // NULL has size 0
535
536        // Now rebuild with MakeExact (which calls naive_rebuild then trim_elements)
537        // This should not panic (issue #5412)
538        let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
539
540        // Verify the result is still valid
541        assert!(exact.is_valid(0).unwrap());
542        assert!(exact.is_valid(1).unwrap());
543        assert!(!exact.is_valid(2).unwrap());
544        assert!(!exact.is_valid(3).unwrap());
545
546        // Verify data is preserved
547        assert_arrays_eq!(
548            exact.list_elements_at(0).unwrap(),
549            PrimitiveArray::from_iter([1i32, 2])
550        );
551
552        assert_arrays_eq!(
553            exact.list_elements_at(1).unwrap(),
554            PrimitiveArray::from_iter([3i32, 4])
555        );
556        Ok(())
557    }
558
559    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
560    /// u32 offsets exceed u16::MAX, so u16 sizes are widened to u32 for the add.
561    #[test]
562    fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
563        let mut elems = vec![0i32; 70_005];
564        elems[70_000] = 10;
565        elems[70_001] = 20;
566        elems[70_002] = 30;
567        elems[70_003] = 40;
568        let elements = PrimitiveArray::from_iter(elems).into_array();
569        let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
570        let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
571
572        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
573        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
574        assert_arrays_eq!(
575            trimmed.list_elements_at(1).unwrap(),
576            PrimitiveArray::from_iter([30i32, 40])
577        );
578        Ok(())
579    }
580
581    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
582    /// u32 sizes exceed u16::MAX, so u16 offsets are widened to u32 for the add.
583    #[test]
584    fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
585        let mut elems = vec![0i32; 70_001];
586        elems[3] = 30;
587        elems[4] = 40;
588        let elements = PrimitiveArray::from_iter(elems).into_array();
589        let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
590        let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
591
592        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
593        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
594        assert_arrays_eq!(
595            trimmed.list_elements_at(1).unwrap(),
596            PrimitiveArray::from_iter([30i32, 40])
597        );
598        Ok(())
599    }
600
601    // ── should_use_take heuristic tests ────────────────────────────────────
602
603    #[test]
604    fn heuristic_zero_lists_uses_take() {
605        assert!(ListViewArray::should_use_take(0, 0));
606    }
607
608    #[test]
609    fn heuristic_small_lists_use_take() {
610        // avg = 127 → take
611        assert!(ListViewArray::should_use_take(127_000, 1_000));
612        // avg = 128 → LBL
613        assert!(!ListViewArray::should_use_take(128_000, 1_000));
614    }
615}