Skip to main content

vortex_array/arrays/listview/
rebuild.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::IntoArray;
10use crate::LEGACY_SESSION;
11#[expect(deprecated)]
12use crate::ToCanonical as _;
13use crate::VortexSessionExecute;
14use crate::aggregate_fn::fns::min_max::min_max;
15use crate::arrays::ConstantArray;
16use crate::arrays::ListViewArray;
17use crate::arrays::listview::ListViewArrayExt;
18use crate::builders::builder_with_capacity;
19use crate::builtins::ArrayBuiltins;
20use crate::dtype::DType;
21use crate::dtype::IntegerPType;
22use crate::dtype::Nullability;
23use crate::dtype::PType;
24use crate::match_each_integer_ptype;
25use crate::scalar::Scalar;
26use crate::scalar_fn::fns::operators::Operator;
27
28/// Modes for rebuilding a [`ListViewArray`].
29pub enum ListViewRebuildMode {
30    /// Removes all unused data and flattens out all list data, such that the array is zero-copyable
31    /// to a [`ListArray`].
32    ///
33    /// This mode will deduplicate all overlapping list views, such that the [`ListViewArray`] looks
34    /// like a [`ListArray`] but with an additional `sizes` array.
35    ///
36    /// [`ListArray`]: crate::arrays::ListArray
37    MakeZeroCopyToList,
38
39    /// Removes any leading or trailing elements that are unused / not referenced by any views in
40    /// the [`ListViewArray`].
41    TrimElements,
42
43    /// Equivalent to `MakeZeroCopyToList` plus `TrimElements`.
44    ///
45    /// This is useful when concatenating multiple [`ListViewArray`]s together to create a new
46    /// [`ListViewArray`] that is also zero-copy to a [`ListArray`].
47    ///
48    /// [`ListArray`]: crate::arrays::ListArray
49    MakeExact,
50
51    // TODO(connor)[ListView]: Implement some version of this.
52    /// Finds the shortest packing / overlapping of list elements.
53    ///
54    /// This problem is known to be NP-hard, so maybe when someone proves that P=NP we can implement
55    /// this algorithm (but in all seriousness there are many approximate algorithms that could
56    /// work well here).
57    OverlapCompression,
58}
59
60impl ListViewArray {
61    /// Rebuilds the [`ListViewArray`] according to the specified mode.
62    pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
63        if self.is_empty() {
64            // SAFETY: An empty array is trivially zero-copyable to a `ListArray`.
65            return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
66        }
67
68        match mode {
69            ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
70            ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
71            ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
72            ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
73        }
74    }
75
76    /// Rebuilds a [`ListViewArray`], removing all data overlaps and creating a flattened layout.
77    ///
78    /// This is useful when the `elements` child array of the [`ListViewArray`] might have
79    /// overlapping, duplicate, and garbage data, and we want to have fully sequential data like
80    /// a [`ListArray`].
81    ///
82    /// [`ListArray`]: crate::arrays::ListArray
83    fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
84        if self.is_zero_copy_to_list() {
85            // Note that since everything in `ListViewArray` is `Arc`ed, this is quite cheap.
86            return Ok(self.clone());
87        }
88
89        let offsets_ptype = self.offsets().dtype().as_ptype();
90        let sizes_ptype = self.sizes().dtype().as_ptype();
91
92        // One of the main purposes behind adding this "zero-copyable to `ListArray`" optimization
93        // is that we want to pass data to systems that expect Arrow data.
94        // The arrow specification only allows for `i32` and `i64` offset and sizes types, so in
95        // order to also make `ListView` zero-copyable to **Arrow**'s `ListArray` (not just Vortex's
96        // `ListArray`), we rebuild the offsets as 32-bit or 64-bit integer types.
97        // TODO(connor)[ListView]: This is true for `sizes` as well, we should do this conversion
98        // for sizes as well.
99        match_each_integer_ptype!(sizes_ptype, |S| {
100            match offsets_ptype {
101                PType::U8 => self.naive_rebuild::<u8, u32, S>(),
102                PType::U16 => self.naive_rebuild::<u16, u32, S>(),
103                PType::U32 => self.naive_rebuild::<u32, u32, S>(),
104                PType::U64 => self.naive_rebuild::<u64, u64, S>(),
105                PType::I8 => self.naive_rebuild::<i8, i32, S>(),
106                PType::I16 => self.naive_rebuild::<i16, i32, S>(),
107                PType::I32 => self.naive_rebuild::<i32, i32, S>(),
108                PType::I64 => self.naive_rebuild::<i64, i64, S>(),
109                _ => unreachable!("invalid offsets PType"),
110            }
111        })
112    }
113
114    /// Picks between [`rebuild_with_take`](Self::rebuild_with_take) and
115    /// [`rebuild_list_by_list`](Self::rebuild_list_by_list) based on element dtype and average
116    /// list size.
117    fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
118        &self,
119    ) -> VortexResult<ListViewArray> {
120        #[expect(deprecated)]
121        let sizes_canonical = self.sizes().to_primitive();
122        let total: u64 = sizes_canonical
123            .as_slice::<S>()
124            .iter()
125            .map(|s| (*s).as_() as u64)
126            .sum();
127        if Self::should_use_take(total, self.len()) {
128            self.rebuild_with_take::<O, NewOffset, S>()
129        } else {
130            self.rebuild_list_by_list::<O, NewOffset, S>()
131        }
132    }
133
134    /// Returns `true` when we are confident that `rebuild_with_take` will
135    /// outperform `rebuild_list_by_list`.
136    ///
137    /// Take is dramatically faster for small lists (often 10-100×) because it
138    /// avoids per-list builder overhead. LBL is the safer default for larger
139    /// lists since its sequential memcpy scales well. We only choose take when
140    /// the average list size is small enough that take clearly dominates.
141    fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
142        if num_lists == 0 {
143            return true;
144        }
145        let avg = total_output_elements / num_lists as u64;
146        avg < 128
147    }
148
149    /// Rebuilds elements using a single bulk `take`: collect all element indices into a flat
150    /// `BufferMut<u64>`, perform a single `take`.
151    fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
152        &self,
153    ) -> VortexResult<ListViewArray> {
154        #[expect(deprecated)]
155        let offsets_canonical = self.offsets().to_primitive();
156        let offsets_slice = offsets_canonical.as_slice::<O>();
157        #[expect(deprecated)]
158        let sizes_canonical = self.sizes().to_primitive();
159        let sizes_slice = sizes_canonical.as_slice::<S>();
160
161        let len = offsets_slice.len();
162
163        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
164        let mut new_sizes = BufferMut::<S>::with_capacity(len);
165        let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
166
167        let mut n_elements = NewOffset::zero();
168        for index in 0..len {
169            if !self.validity()?.is_valid(index)? {
170                new_offsets.push(n_elements);
171                new_sizes.push(S::zero());
172                continue;
173            }
174
175            let offset = offsets_slice[index];
176            let size = sizes_slice[index];
177            let start = offset.as_();
178            let stop = start + size.as_();
179
180            new_offsets.push(n_elements);
181            new_sizes.push(size);
182            take_indices.extend(start as u64..stop as u64);
183            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
184        }
185
186        let elements = self.elements().take(take_indices.into_array())?;
187        let offsets = new_offsets.into_array();
188        let sizes = new_sizes.into_array();
189
190        // SAFETY: same invariants as `rebuild_list_by_list` — offsets are sequential and
191        // non-overlapping, all (offset, size) pairs reference valid elements, and the validity
192        // array is preserved from the original.
193        Ok(unsafe {
194            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
195                .with_zero_copy_to_list(true)
196        })
197    }
198
199    /// Rebuilds elements list-by-list: canonicalize elements upfront, then for each list `slice`
200    /// the relevant range and `extend_from_array` into a typed builder.
201    fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
202        &self,
203    ) -> VortexResult<ListViewArray> {
204        let element_dtype = self
205            .dtype()
206            .as_list_element_opt()
207            .vortex_expect("somehow had a canonical list that was not a list");
208
209        #[expect(deprecated)]
210        let offsets_canonical = self.offsets().to_primitive();
211        let offsets_slice = offsets_canonical.as_slice::<O>();
212        #[expect(deprecated)]
213        let sizes_canonical = self.sizes().to_primitive();
214        let sizes_slice = sizes_canonical.as_slice::<S>();
215
216        let len = offsets_slice.len();
217
218        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
219        // TODO(connor)[ListView]: Do we really need to do this?
220        // The only reason we need to rebuild the sizes here is that the validity may indicate that
221        // a list is null even though it has a non-zero size. This rebuild will set the size of all
222        // null lists to 0.
223        let mut new_sizes = BufferMut::<S>::with_capacity(len);
224
225        // Canonicalize the elements up front as we will be slicing the elements quite a lot.
226        #[expect(deprecated)]
227        let elements_canonical = self
228            .elements()
229            .to_canonical()
230            .vortex_expect("canonicalize elements for rebuild")
231            .into_array();
232
233        // Note that we do not know what the exact capacity should be of the new elements since
234        // there could be overlaps in the existing `ListViewArray`.
235        let mut new_elements_builder =
236            builder_with_capacity(element_dtype.as_ref(), self.elements().len());
237
238        let mut n_elements = NewOffset::zero();
239        for index in 0..len {
240            if !self.validity()?.is_valid(index)? {
241                // For NULL lists, place them after the previous item's data to maintain the
242                // no-overlap invariant for zero-copy to `ListArray` arrays.
243                new_offsets.push(n_elements);
244                new_sizes.push(S::zero());
245                continue;
246            }
247
248            let offset = offsets_slice[index];
249            let size = sizes_slice[index];
250
251            let start = offset.as_();
252            let stop = start + size.as_();
253
254            new_offsets.push(n_elements);
255            new_sizes.push(size);
256            new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
257
258            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
259        }
260
261        let offsets = new_offsets.into_array();
262        let sizes = new_sizes.into_array();
263        let elements = new_elements_builder.finish();
264
265        debug_assert_eq!(
266            n_elements.as_(),
267            elements.len(),
268            "The accumulated elements somehow had the wrong length"
269        );
270
271        // SAFETY:
272        // - All offsets are sequential and non-overlapping (`n_elements` tracks running total).
273        // - Each `offset[i] + size[i]` equals `offset[i+1]` for all valid indices (including null
274        //   lists).
275        // - All elements referenced by (offset, size) pairs exist within the new `elements` array.
276        // - The validity array is preserved from the original array unchanged
277        // - The array satisfies the zero-copy-to-list property by having sorted offsets, no gaps,
278        //   and no overlaps.
279        Ok(unsafe {
280            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
281                .with_zero_copy_to_list(true)
282        })
283    }
284
285    /// Rebuilds a [`ListViewArray`] by trimming any unused / unreferenced leading and trailing
286    /// elements, which is defined as a contiguous run of values in the `elements` array that are
287    /// not referecened by any views in the corresponding [`ListViewArray`].
288    fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
289        let start = if self.is_zero_copy_to_list() {
290            // If offsets are sorted, then the minimum offset is the first offset.
291            // Note that even if the first view is null, offsets must always be valid, so it is
292            // completely fine for us to use this as a lower-bounded start of the `elements`.
293            self.offset_at(0)
294        } else {
295            let mut ctx = LEGACY_SESSION.create_execution_ctx();
296            self.offsets()
297                .statistics()
298                .compute_min(&mut ctx)
299                .vortex_expect(
300                    "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
301                )
302        };
303
304        let end = if self.is_zero_copy_to_list() {
305            // If offsets are sorted and there are no overlaps (views are always "increasing"), we
306            // can just grab the last offset and last size.
307            let last_offset = self.offset_at(self.len() - 1);
308            let last_size = self.size_at(self.len() - 1);
309            last_offset + last_size
310        } else {
311            // Cast offsets and sizes to the widest integer type to prevent
312            // overflow when computing offsets + sizes. The end offset may not
313            // fit in the integer width otherwise.
314            let wide_dtype = DType::from(if self.offsets().dtype().as_ptype().is_unsigned_int() {
315                PType::U64
316            } else {
317                PType::I64
318            });
319            let offsets = self.offsets().cast(wide_dtype.clone())?;
320            let sizes = self.sizes().cast(wide_dtype)?;
321
322            let mut ctx = LEGACY_SESSION.create_execution_ctx();
323            let min_max = min_max(
324                &offsets
325                    .binary(sizes, Operator::Add)
326                    .vortex_expect("`offsets + sizes` somehow overflowed"),
327                &mut ctx,
328            )
329            .vortex_expect("Something went wrong while computing min and max")
330            .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
331
332            min_max
333                .max
334                .as_primitive()
335                .as_::<usize>()
336                .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
337        };
338
339        let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
340            let offset = <O as FromPrimitive>::from_usize(start)
341                .vortex_expect("unable to convert the min offset `start` into a `usize`");
342            let scalar = Scalar::primitive(offset, Nullability::NonNullable);
343
344            self.offsets()
345                .clone()
346                .binary(
347                    ConstantArray::new(scalar, self.offsets().len()).into_array(),
348                    Operator::Sub,
349                )
350                .vortex_expect("was somehow unable to adjust offsets down by their minimum")
351        });
352
353        let sliced_elements = self.elements().slice(start..end)?;
354
355        // SAFETY: The only thing we changed was the elements (which we verify through mins and
356        // maxes that all adjusted offsets + sizes are within the correct bounds), so the parameters
357        // are valid. And if the original array was zero-copyable to list, trimming elements doesn't
358        // change that property.
359        Ok(unsafe {
360            ListViewArray::new_unchecked(
361                sliced_elements,
362                adjusted_offsets,
363                self.sizes().clone(),
364                self.validity()?,
365            )
366            .with_zero_copy_to_list(self.is_zero_copy_to_list())
367        })
368    }
369
370    fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
371        if self.is_zero_copy_to_list() {
372            self.rebuild_trim_elements()
373        } else {
374            // When we completely rebuild the `ListViewArray`, we get the benefit that we also trim
375            // any leading and trailing garbage data.
376            self.rebuild_zero_copy_to_list()
377        }
378    }
379}
380
381#[cfg(test)]
382mod tests {
383    use vortex_buffer::BitBuffer;
384    use vortex_error::VortexResult;
385
386    use super::ListViewRebuildMode;
387    use crate::IntoArray;
388    use crate::LEGACY_SESSION;
389    #[expect(deprecated)]
390    use crate::ToCanonical as _;
391    use crate::VortexSessionExecute;
392    use crate::arrays::ListViewArray;
393    use crate::arrays::PrimitiveArray;
394    use crate::arrays::listview::ListViewArrayExt;
395    use crate::assert_arrays_eq;
396    use crate::dtype::Nullability;
397    use crate::validity::Validity;
398
399    #[test]
400    fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
401        // Create a list view with overlapping lists: [A, B, C]
402        // List 0: offset=0, size=3 -> [A, B, C]
403        // List 1: offset=1, size=2 -> [B, C] (overlaps with List 0)
404        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
405        let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
406        let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
407
408        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
409
410        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
411
412        // After flatten: elements should be [A, B, C, B, C] = [1, 2, 3, 2, 3]
413        // Lists should be sequential with no overlaps
414        assert_eq!(flattened.elements().len(), 5);
415
416        // Offsets should be sequential
417        assert_eq!(flattened.offset_at(0), 0);
418        assert_eq!(flattened.size_at(0), 3);
419        assert_eq!(flattened.offset_at(1), 3);
420        assert_eq!(flattened.size_at(1), 2);
421
422        // Verify the data is correct
423        assert_arrays_eq!(
424            flattened.list_elements_at(0)?,
425            PrimitiveArray::from_iter([1i32, 2, 3])
426        );
427
428        assert_arrays_eq!(
429            flattened.list_elements_at(1)?,
430            PrimitiveArray::from_iter([2i32, 3])
431        );
432        Ok(())
433    }
434
435    #[test]
436    fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
437        use crate::arrays::BoolArray;
438
439        // Create a nullable list view with a null list
440        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
441        let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
442        let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
443        let validity = Validity::Array(
444            BoolArray::new(
445                BitBuffer::from(vec![true, false, true]),
446                Validity::NonNullable,
447            )
448            .into_array(),
449        );
450
451        let listview = ListViewArray::new(elements, offsets, sizes, validity);
452
453        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
454
455        // Verify nullability is preserved
456        assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
457        assert!(flattened.validity()?.is_valid(0)?);
458        assert!(!flattened.validity()?.is_valid(1)?);
459        assert!(flattened.validity()?.is_valid(2)?);
460
461        // Verify valid lists contain correct data
462        assert_arrays_eq!(
463            flattened.list_elements_at(0)?,
464            PrimitiveArray::from_iter([1i32, 2])
465        );
466
467        assert_arrays_eq!(
468            flattened.list_elements_at(2)?,
469            PrimitiveArray::from_iter([3i32])
470        );
471        Ok(())
472    }
473
474    #[test]
475    fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
476        // Test trimming both leading and trailing unused elements while preserving gaps in the
477        // middle.
478        // Elements: [_, _, A, B, _, C, D, _, _]
479        //            0  1  2  3  4  5  6  7  8
480        // List 0: offset=2, size=2 -> [A, B]
481        // List 1: offset=5, size=2 -> [C, D]
482        // Should trim to: [A, B, _, C, D] with adjusted offsets.
483        let elements =
484            PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
485        let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
486        let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
487
488        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
489
490        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
491
492        // After trimming: elements should be [A, B, _, C, D] = [1, 2, 97, 3, 4].
493        assert_eq!(trimmed.elements().len(), 5);
494
495        // Offsets should be adjusted: old offset 2 -> new offset 0, old offset 5 -> new offset 3.
496        assert_eq!(trimmed.offset_at(0), 0);
497        assert_eq!(trimmed.size_at(0), 2);
498        assert_eq!(trimmed.offset_at(1), 3);
499        assert_eq!(trimmed.size_at(1), 2);
500
501        // Verify the data is correct.
502        assert_arrays_eq!(
503            trimmed.list_elements_at(0)?,
504            PrimitiveArray::from_iter([1i32, 2])
505        );
506
507        assert_arrays_eq!(
508            trimmed.list_elements_at(1)?,
509            PrimitiveArray::from_iter([3i32, 4])
510        );
511
512        // Note that element at index 2 (97) is preserved as a gap.
513        #[expect(deprecated)]
514        let all_elements = trimmed.elements().to_primitive();
515        assert_eq!(
516            all_elements.execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())?,
517            97i32.into()
518        );
519        Ok(())
520    }
521
522    #[test]
523    fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
524        // Regression test for issue #5412
525        // Tests that zero-copy-to-list arrays with trailing NULLs correctly calculate
526        // offsets for NULL items to maintain no-overlap invariant
527
528        // Create a ListViewArray with trailing NULLs
529        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
530        let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
531        let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
532        let validity = Validity::from_iter(vec![true, true, false, false]);
533
534        let listview = ListViewArray::new(elements, offsets, sizes, validity);
535
536        // First rebuild to make it zero-copy-to-list
537        let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
538        assert!(rebuilt.is_zero_copy_to_list());
539
540        // Verify NULL items have correct offsets (should not reuse previous offsets)
541        // After rebuild: offsets should be [0, 2, 4, 4] for zero-copy-to-list
542        assert_eq!(rebuilt.offset_at(0), 0);
543        assert_eq!(rebuilt.offset_at(1), 2);
544        assert_eq!(rebuilt.offset_at(2), 4); // NULL should be at position 4
545        assert_eq!(rebuilt.offset_at(3), 4); // Second NULL also at position 4
546
547        // All sizes should be correct
548        assert_eq!(rebuilt.size_at(0), 2);
549        assert_eq!(rebuilt.size_at(1), 2);
550        assert_eq!(rebuilt.size_at(2), 0); // NULL has size 0
551        assert_eq!(rebuilt.size_at(3), 0); // NULL has size 0
552
553        // Now rebuild with MakeExact (which calls naive_rebuild then trim_elements)
554        // This should not panic (issue #5412)
555        let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
556
557        // Verify the result is still valid
558        assert!(exact.is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())?);
559        assert!(exact.is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())?);
560        assert!(!exact.is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())?);
561        assert!(!exact.is_valid(3, &mut LEGACY_SESSION.create_execution_ctx())?);
562
563        // Verify data is preserved
564        assert_arrays_eq!(
565            exact.list_elements_at(0)?,
566            PrimitiveArray::from_iter([1i32, 2])
567        );
568
569        assert_arrays_eq!(
570            exact.list_elements_at(1)?,
571            PrimitiveArray::from_iter([3i32, 4])
572        );
573        Ok(())
574    }
575
576    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
577    /// u32 offsets exceed u16::MAX, so u16 sizes are widened to u32 for the add.
578    #[test]
579    fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
580        let mut elems = vec![0i32; 70_005];
581        elems[70_000] = 10;
582        elems[70_001] = 20;
583        elems[70_002] = 30;
584        elems[70_003] = 40;
585        let elements = PrimitiveArray::from_iter(elems).into_array();
586        let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
587        let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
588
589        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
590        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
591        assert_arrays_eq!(
592            trimmed.list_elements_at(1)?,
593            PrimitiveArray::from_iter([30i32, 40])
594        );
595        Ok(())
596    }
597
598    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
599    /// u32 sizes exceed u16::MAX, so u16 offsets are widened to u32 for the add.
600    #[test]
601    fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
602        let mut elems = vec![0i32; 70_001];
603        elems[3] = 30;
604        elems[4] = 40;
605        let elements = PrimitiveArray::from_iter(elems).into_array();
606        let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
607        let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
608
609        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
610        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
611        assert_arrays_eq!(
612            trimmed.list_elements_at(1)?,
613            PrimitiveArray::from_iter([30i32, 40])
614        );
615        Ok(())
616    }
617
618    // ── should_use_take heuristic tests ────────────────────────────────────
619
620    #[test]
621    fn heuristic_zero_lists_uses_take() {
622        assert!(ListViewArray::should_use_take(0, 0));
623    }
624
625    #[test]
626    fn heuristic_small_lists_use_take() {
627        // avg = 127 → take
628        assert!(ListViewArray::should_use_take(127_000, 1_000));
629        // avg = 128 → LBL
630        assert!(!ListViewArray::should_use_take(128_000, 1_000));
631    }
632
633    /// Regression test for <https://github.com/vortex-data/vortex/issues/6973>.
634    /// Both offsets and sizes are u8, and offset + size exceeds u8::MAX.
635    #[test]
636    fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
637        let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
638        let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
639        let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
640
641        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
642        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
643
644        // min(offsets) = 0, so nothing to trim; output should equal input.
645        assert_arrays_eq!(trimmed, listview);
646        Ok(())
647    }
648}