Skip to main content

vortex_array/arrays/listview/
rebuild.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::IntoArray;
10use crate::LEGACY_SESSION;
11#[expect(deprecated)]
12use crate::ToCanonical as _;
13use crate::VortexSessionExecute;
14use crate::aggregate_fn::fns::min_max::min_max;
15use crate::arrays::ConstantArray;
16use crate::arrays::ListViewArray;
17use crate::arrays::listview::ListViewArrayExt;
18use crate::builders::builder_with_capacity;
19use crate::builtins::ArrayBuiltins;
20use crate::dtype::DType;
21use crate::dtype::IntegerPType;
22use crate::dtype::Nullability;
23use crate::dtype::PType;
24use crate::match_each_integer_ptype;
25use crate::scalar::Scalar;
26use crate::scalar_fn::fns::operators::Operator;
27
28/// Density threshold to decide whether to rebuild a sparse `ListViewArray`.
29///
30/// A `ListViewArray` can accumulate unreferenced bytes in its `elements` buffer after
31/// metadata-only operations like `take` and `filter`. When density (referenced fraction of `elements`)
32/// falls below this threshold, the benefits of a rebuild may outweigh its cost.
33///
34/// This is a somewhat arbitrary rule-of-thumb and may be suboptimal depending on different use cases and
35/// list element dtypes.
36pub const DEFAULT_REBUILD_DENSITY_THRESHOLD: f32 = 0.1;
37
38/// Modes for rebuilding a [`ListViewArray`].
39pub enum ListViewRebuildMode {
40    /// Removes all unused data and flattens out all list data, such that the array is zero-copyable
41    /// to a [`ListArray`].
42    ///
43    /// This mode will deduplicate all overlapping list views, such that the [`ListViewArray`] looks
44    /// like a [`ListArray`] but with an additional `sizes` array.
45    ///
46    /// [`ListArray`]: crate::arrays::ListArray
47    MakeZeroCopyToList,
48
49    /// Removes any leading or trailing elements that are unused / not referenced by any views in
50    /// the [`ListViewArray`].
51    TrimElements,
52
53    /// Equivalent to `MakeZeroCopyToList` plus `TrimElements`.
54    ///
55    /// This is useful when concatenating multiple [`ListViewArray`]s together to create a new
56    /// [`ListViewArray`] that is also zero-copy to a [`ListArray`].
57    ///
58    /// [`ListArray`]: crate::arrays::ListArray
59    MakeExact,
60
61    // TODO(connor)[ListView]: Implement some version of this.
62    /// Finds the shortest packing / overlapping of list elements.
63    ///
64    /// This problem is known to be NP-hard, so maybe when someone proves that P=NP we can implement
65    /// this algorithm (but in all seriousness there are many approximate algorithms that could
66    /// work well here).
67    OverlapCompression,
68}
69
70impl ListViewArray {
71    /// Rebuilds the [`ListViewArray`] according to the specified mode.
72    pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
73        if self.is_empty() {
74            // SAFETY: An empty array is trivially zero-copyable to a `ListArray`.
75            return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
76        }
77
78        match mode {
79            ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
80            ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
81            ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
82            ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
83        }
84    }
85
86    /// Rebuilds a [`ListViewArray`], removing all data overlaps and creating a flattened layout.
87    ///
88    /// This is useful when the `elements` child array of the [`ListViewArray`] might have
89    /// overlapping, duplicate, and garbage data, and we want to have fully sequential data like
90    /// a [`ListArray`].
91    ///
92    /// [`ListArray`]: crate::arrays::ListArray
93    fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
94        if self.is_zero_copy_to_list() {
95            // Note that since everything in `ListViewArray` is `Arc`ed, this is quite cheap.
96            return Ok(self.clone());
97        }
98
99        let offsets_ptype = self.offsets().dtype().as_ptype();
100        let sizes_ptype = self.sizes().dtype().as_ptype();
101
102        // One of the main purposes behind adding this "zero-copyable to `ListArray`" optimization
103        // is that we want to pass data to systems that expect Arrow data.
104        // The arrow specification only allows for `i32` and `i64` offset and sizes types, so in
105        // order to also make `ListView` zero-copyable to **Arrow**'s `ListArray` (not just Vortex's
106        // `ListArray`), we rebuild the offsets as 32-bit or 64-bit integer types.
107        // TODO(connor)[ListView]: This is true for `sizes` as well, we should do this conversion
108        // for sizes as well.
109        match_each_integer_ptype!(sizes_ptype, |S| {
110            match offsets_ptype {
111                PType::U8 => self.naive_rebuild::<u8, u32, S>(),
112                PType::U16 => self.naive_rebuild::<u16, u32, S>(),
113                PType::U32 => self.naive_rebuild::<u32, u32, S>(),
114                PType::U64 => self.naive_rebuild::<u64, u64, S>(),
115                PType::I8 => self.naive_rebuild::<i8, i32, S>(),
116                PType::I16 => self.naive_rebuild::<i16, i32, S>(),
117                PType::I32 => self.naive_rebuild::<i32, i32, S>(),
118                PType::I64 => self.naive_rebuild::<i64, i64, S>(),
119                _ => unreachable!("invalid offsets PType"),
120            }
121        })
122    }
123
124    /// Picks between [`rebuild_with_take`](Self::rebuild_with_take) and
125    /// [`rebuild_list_by_list`](Self::rebuild_list_by_list) based on element dtype and average
126    /// list size.
127    fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
128        &self,
129    ) -> VortexResult<ListViewArray> {
130        #[expect(deprecated)]
131        let sizes_canonical = self.sizes().to_primitive();
132        let total: u64 = sizes_canonical
133            .as_slice::<S>()
134            .iter()
135            .map(|s| (*s).as_() as u64)
136            .sum();
137        if Self::should_use_take(total, self.len()) {
138            self.rebuild_with_take::<O, NewOffset, S>()
139        } else {
140            self.rebuild_list_by_list::<O, NewOffset, S>()
141        }
142    }
143
144    /// Returns `true` when we are confident that `rebuild_with_take` will
145    /// outperform `rebuild_list_by_list`.
146    ///
147    /// Take is dramatically faster for small lists (often 10-100×) because it
148    /// avoids per-list builder overhead. LBL is the safer default for larger
149    /// lists since its sequential memcpy scales well. We only choose take when
150    /// the average list size is small enough that take clearly dominates.
151    fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
152        if num_lists == 0 {
153            return true;
154        }
155        let avg = total_output_elements / num_lists as u64;
156        avg < 128
157    }
158
159    /// Rebuilds elements using a single bulk `take`: collect all element indices into a flat
160    /// `BufferMut<u64>`, perform a single `take`.
161    fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
162        &self,
163    ) -> VortexResult<ListViewArray> {
164        #[expect(deprecated)]
165        let offsets_canonical = self.offsets().to_primitive();
166        let offsets_slice = offsets_canonical.as_slice::<O>();
167        #[expect(deprecated)]
168        let sizes_canonical = self.sizes().to_primitive();
169        let sizes_slice = sizes_canonical.as_slice::<S>();
170
171        let len = offsets_slice.len();
172
173        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
174        let mut new_sizes = BufferMut::<S>::with_capacity(len);
175        let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
176
177        let mut n_elements = NewOffset::zero();
178        for index in 0..len {
179            if !self.validity()?.is_valid(index)? {
180                new_offsets.push(n_elements);
181                new_sizes.push(S::zero());
182                continue;
183            }
184
185            let offset = offsets_slice[index];
186            let size = sizes_slice[index];
187            let start = offset.as_();
188            let stop = start + size.as_();
189
190            new_offsets.push(n_elements);
191            new_sizes.push(size);
192            take_indices.extend(start as u64..stop as u64);
193            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
194        }
195
196        let elements = self.elements().take(take_indices.into_array())?;
197        let offsets = new_offsets.into_array();
198        let sizes = new_sizes.into_array();
199
200        // SAFETY: same invariants as `rebuild_list_by_list` — offsets are sequential and
201        // non-overlapping, all (offset, size) pairs reference valid elements, and the validity
202        // array is preserved from the original.
203        Ok(unsafe {
204            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
205                .with_zero_copy_to_list(true)
206        })
207    }
208
209    /// Rebuilds elements list-by-list: canonicalize elements upfront, then for each list `slice`
210    /// the relevant range and `extend_from_array` into a typed builder.
211    fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
212        &self,
213    ) -> VortexResult<ListViewArray> {
214        let element_dtype = self
215            .dtype()
216            .as_list_element_opt()
217            .vortex_expect("somehow had a canonical list that was not a list");
218
219        #[expect(deprecated)]
220        let offsets_canonical = self.offsets().to_primitive();
221        let offsets_slice = offsets_canonical.as_slice::<O>();
222        #[expect(deprecated)]
223        let sizes_canonical = self.sizes().to_primitive();
224        let sizes_slice = sizes_canonical.as_slice::<S>();
225
226        let len = offsets_slice.len();
227
228        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
229        // TODO(connor)[ListView]: Do we really need to do this?
230        // The only reason we need to rebuild the sizes here is that the validity may indicate that
231        // a list is null even though it has a non-zero size. This rebuild will set the size of all
232        // null lists to 0.
233        let mut new_sizes = BufferMut::<S>::with_capacity(len);
234
235        // Canonicalize the elements up front as we will be slicing the elements quite a lot.
236        #[expect(deprecated)]
237        let elements_canonical = self
238            .elements()
239            .to_canonical()
240            .vortex_expect("canonicalize elements for rebuild")
241            .into_array();
242
243        // Note that we do not know what the exact capacity should be of the new elements since
244        // there could be overlaps in the existing `ListViewArray`.
245        let mut new_elements_builder =
246            builder_with_capacity(element_dtype.as_ref(), self.elements().len());
247
248        let mut n_elements = NewOffset::zero();
249        for index in 0..len {
250            if !self.validity()?.is_valid(index)? {
251                // For NULL lists, place them after the previous item's data to maintain the
252                // no-overlap invariant for zero-copy to `ListArray` arrays.
253                new_offsets.push(n_elements);
254                new_sizes.push(S::zero());
255                continue;
256            }
257
258            let offset = offsets_slice[index];
259            let size = sizes_slice[index];
260
261            let start = offset.as_();
262            let stop = start + size.as_();
263
264            new_offsets.push(n_elements);
265            new_sizes.push(size);
266            new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
267
268            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
269        }
270
271        let offsets = new_offsets.into_array();
272        let sizes = new_sizes.into_array();
273        let elements = new_elements_builder.finish();
274
275        debug_assert_eq!(
276            n_elements.as_(),
277            elements.len(),
278            "The accumulated elements somehow had the wrong length"
279        );
280
281        // SAFETY:
282        // - All offsets are sequential and non-overlapping (`n_elements` tracks running total).
283        // - Each `offset[i] + size[i]` equals `offset[i+1]` for all valid indices (including null
284        //   lists).
285        // - All elements referenced by (offset, size) pairs exist within the new `elements` array.
286        // - The validity array is preserved from the original array unchanged
287        // - The array satisfies the zero-copy-to-list property by having sorted offsets, no gaps,
288        //   and no overlaps.
289        Ok(unsafe {
290            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
291                .with_zero_copy_to_list(true)
292        })
293    }
294
295    /// Rebuilds a [`ListViewArray`] by trimming any unused / unreferenced leading and trailing
296    /// elements, which is defined as a contiguous run of values in the `elements` array that are
297    /// not referecened by any views in the corresponding [`ListViewArray`].
298    fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
299        let start = if self.is_zero_copy_to_list() {
300            // If offsets are sorted, then the minimum offset is the first offset.
301            // Note that even if the first view is null, offsets must always be valid, so it is
302            // completely fine for us to use this as a lower-bounded start of the `elements`.
303            self.offset_at(0)
304        } else {
305            let mut ctx = LEGACY_SESSION.create_execution_ctx();
306            self.offsets()
307                .statistics()
308                .compute_min(&mut ctx)
309                .vortex_expect(
310                    "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
311                )
312        };
313
314        let end = if self.is_zero_copy_to_list() {
315            // If offsets are sorted and there are no overlaps (views are always "increasing"), we
316            // can just grab the last offset and last size.
317            let last_offset = self.offset_at(self.len() - 1);
318            let last_size = self.size_at(self.len() - 1);
319            last_offset + last_size
320        } else {
321            // Cast offsets and sizes to the widest integer type to prevent
322            // overflow when computing offsets + sizes. The end offset may not
323            // fit in the integer width otherwise.
324            let wide_dtype = DType::from(if self.offsets().dtype().as_ptype().is_unsigned_int() {
325                PType::U64
326            } else {
327                PType::I64
328            });
329            let offsets = self.offsets().cast(wide_dtype.clone())?;
330            let sizes = self.sizes().cast(wide_dtype)?;
331
332            let mut ctx = LEGACY_SESSION.create_execution_ctx();
333            let min_max = min_max(
334                &offsets
335                    .binary(sizes, Operator::Add)
336                    .vortex_expect("`offsets + sizes` somehow overflowed"),
337                &mut ctx,
338            )
339            .vortex_expect("Something went wrong while computing min and max")
340            .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
341
342            min_max
343                .max
344                .as_primitive()
345                .as_::<usize>()
346                .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
347        };
348
349        let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
350            let offset = <O as FromPrimitive>::from_usize(start)
351                .vortex_expect("unable to convert the min offset `start` into a `usize`");
352            let scalar = Scalar::primitive(offset, Nullability::NonNullable);
353
354            self.offsets()
355                .clone()
356                .binary(
357                    ConstantArray::new(scalar, self.offsets().len()).into_array(),
358                    Operator::Sub,
359                )
360                .vortex_expect("was somehow unable to adjust offsets down by their minimum")
361        });
362
363        let sliced_elements = self.elements().slice(start..end)?;
364
365        // SAFETY: The only thing we changed was the elements (which we verify through mins and
366        // maxes that all adjusted offsets + sizes are within the correct bounds), so the parameters
367        // are valid. And if the original array was zero-copyable to list, trimming elements doesn't
368        // change that property.
369        Ok(unsafe {
370            ListViewArray::new_unchecked(
371                sliced_elements,
372                adjusted_offsets,
373                self.sizes().clone(),
374                self.validity()?,
375            )
376            .with_zero_copy_to_list(self.is_zero_copy_to_list())
377        })
378    }
379
380    fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
381        if self.is_zero_copy_to_list() {
382            self.rebuild_trim_elements()
383        } else {
384            // When we completely rebuild the `ListViewArray`, we get the benefit that we also trim
385            // any leading and trailing garbage data.
386            self.rebuild_zero_copy_to_list()
387        }
388    }
389}
390
391#[cfg(test)]
392mod tests {
393    use vortex_buffer::BitBuffer;
394    use vortex_error::VortexResult;
395
396    use super::ListViewRebuildMode;
397    use crate::IntoArray;
398    use crate::LEGACY_SESSION;
399    #[expect(deprecated)]
400    use crate::ToCanonical as _;
401    use crate::VortexSessionExecute;
402    use crate::arrays::ListViewArray;
403    use crate::arrays::PrimitiveArray;
404    use crate::arrays::listview::ListViewArrayExt;
405    use crate::assert_arrays_eq;
406    use crate::dtype::Nullability;
407    use crate::validity::Validity;
408
409    #[test]
410    fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
411        // Create a list view with overlapping lists: [A, B, C]
412        // List 0: offset=0, size=3 -> [A, B, C]
413        // List 1: offset=1, size=2 -> [B, C] (overlaps with List 0)
414        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
415        let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
416        let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
417
418        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
419
420        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
421
422        // After flatten: elements should be [A, B, C, B, C] = [1, 2, 3, 2, 3]
423        // Lists should be sequential with no overlaps
424        assert_eq!(flattened.elements().len(), 5);
425
426        // Offsets should be sequential
427        assert_eq!(flattened.offset_at(0), 0);
428        assert_eq!(flattened.size_at(0), 3);
429        assert_eq!(flattened.offset_at(1), 3);
430        assert_eq!(flattened.size_at(1), 2);
431
432        // Verify the data is correct
433        assert_arrays_eq!(
434            flattened.list_elements_at(0)?,
435            PrimitiveArray::from_iter([1i32, 2, 3])
436        );
437
438        assert_arrays_eq!(
439            flattened.list_elements_at(1)?,
440            PrimitiveArray::from_iter([2i32, 3])
441        );
442        Ok(())
443    }
444
445    #[test]
446    fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
447        use crate::arrays::BoolArray;
448
449        // Create a nullable list view with a null list
450        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
451        let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
452        let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
453        let validity = Validity::Array(
454            BoolArray::new(
455                BitBuffer::from(vec![true, false, true]),
456                Validity::NonNullable,
457            )
458            .into_array(),
459        );
460
461        let listview = ListViewArray::new(elements, offsets, sizes, validity);
462
463        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
464
465        // Verify nullability is preserved
466        assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
467        assert!(flattened.validity()?.is_valid(0)?);
468        assert!(!flattened.validity()?.is_valid(1)?);
469        assert!(flattened.validity()?.is_valid(2)?);
470
471        // Verify valid lists contain correct data
472        assert_arrays_eq!(
473            flattened.list_elements_at(0)?,
474            PrimitiveArray::from_iter([1i32, 2])
475        );
476
477        assert_arrays_eq!(
478            flattened.list_elements_at(2)?,
479            PrimitiveArray::from_iter([3i32])
480        );
481        Ok(())
482    }
483
484    #[test]
485    fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
486        // Test trimming both leading and trailing unused elements while preserving gaps in the
487        // middle.
488        // Elements: [_, _, A, B, _, C, D, _, _]
489        //            0  1  2  3  4  5  6  7  8
490        // List 0: offset=2, size=2 -> [A, B]
491        // List 1: offset=5, size=2 -> [C, D]
492        // Should trim to: [A, B, _, C, D] with adjusted offsets.
493        let elements =
494            PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
495        let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
496        let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
497
498        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
499
500        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
501
502        // After trimming: elements should be [A, B, _, C, D] = [1, 2, 97, 3, 4].
503        assert_eq!(trimmed.elements().len(), 5);
504
505        // Offsets should be adjusted: old offset 2 -> new offset 0, old offset 5 -> new offset 3.
506        assert_eq!(trimmed.offset_at(0), 0);
507        assert_eq!(trimmed.size_at(0), 2);
508        assert_eq!(trimmed.offset_at(1), 3);
509        assert_eq!(trimmed.size_at(1), 2);
510
511        // Verify the data is correct.
512        assert_arrays_eq!(
513            trimmed.list_elements_at(0)?,
514            PrimitiveArray::from_iter([1i32, 2])
515        );
516
517        assert_arrays_eq!(
518            trimmed.list_elements_at(1)?,
519            PrimitiveArray::from_iter([3i32, 4])
520        );
521
522        // Note that element at index 2 (97) is preserved as a gap.
523        #[expect(deprecated)]
524        let all_elements = trimmed.elements().to_primitive();
525        assert_eq!(
526            all_elements.execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())?,
527            97i32.into()
528        );
529        Ok(())
530    }
531
532    #[test]
533    fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
534        // Regression test for issue #5412
535        // Tests that zero-copy-to-list arrays with trailing NULLs correctly calculate
536        // offsets for NULL items to maintain no-overlap invariant
537
538        // Create a ListViewArray with trailing NULLs
539        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
540        let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
541        let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
542        let validity = Validity::from_iter(vec![true, true, false, false]);
543
544        let listview = ListViewArray::new(elements, offsets, sizes, validity);
545
546        // First rebuild to make it zero-copy-to-list
547        let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
548        assert!(rebuilt.is_zero_copy_to_list());
549
550        // Verify NULL items have correct offsets (should not reuse previous offsets)
551        // After rebuild: offsets should be [0, 2, 4, 4] for zero-copy-to-list
552        assert_eq!(rebuilt.offset_at(0), 0);
553        assert_eq!(rebuilt.offset_at(1), 2);
554        assert_eq!(rebuilt.offset_at(2), 4); // NULL should be at position 4
555        assert_eq!(rebuilt.offset_at(3), 4); // Second NULL also at position 4
556
557        // All sizes should be correct
558        assert_eq!(rebuilt.size_at(0), 2);
559        assert_eq!(rebuilt.size_at(1), 2);
560        assert_eq!(rebuilt.size_at(2), 0); // NULL has size 0
561        assert_eq!(rebuilt.size_at(3), 0); // NULL has size 0
562
563        // Now rebuild with MakeExact (which calls naive_rebuild then trim_elements)
564        // This should not panic (issue #5412)
565        let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
566
567        // Verify the result is still valid
568        assert!(exact.is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())?);
569        assert!(exact.is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())?);
570        assert!(!exact.is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())?);
571        assert!(!exact.is_valid(3, &mut LEGACY_SESSION.create_execution_ctx())?);
572
573        // Verify data is preserved
574        assert_arrays_eq!(
575            exact.list_elements_at(0)?,
576            PrimitiveArray::from_iter([1i32, 2])
577        );
578
579        assert_arrays_eq!(
580            exact.list_elements_at(1)?,
581            PrimitiveArray::from_iter([3i32, 4])
582        );
583        Ok(())
584    }
585
586    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
587    /// u32 offsets exceed u16::MAX, so u16 sizes are widened to u32 for the add.
588    #[test]
589    fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
590        let mut elems = vec![0i32; 70_005];
591        elems[70_000] = 10;
592        elems[70_001] = 20;
593        elems[70_002] = 30;
594        elems[70_003] = 40;
595        let elements = PrimitiveArray::from_iter(elems).into_array();
596        let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
597        let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
598
599        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
600        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
601        assert_arrays_eq!(
602            trimmed.list_elements_at(1)?,
603            PrimitiveArray::from_iter([30i32, 40])
604        );
605        Ok(())
606    }
607
608    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
609    /// u32 sizes exceed u16::MAX, so u16 offsets are widened to u32 for the add.
610    #[test]
611    fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
612        let mut elems = vec![0i32; 70_001];
613        elems[3] = 30;
614        elems[4] = 40;
615        let elements = PrimitiveArray::from_iter(elems).into_array();
616        let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
617        let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
618
619        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
620        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
621        assert_arrays_eq!(
622            trimmed.list_elements_at(1)?,
623            PrimitiveArray::from_iter([30i32, 40])
624        );
625        Ok(())
626    }
627
628    // ── should_use_take heuristic tests ────────────────────────────────────
629
630    #[test]
631    fn heuristic_zero_lists_uses_take() {
632        assert!(ListViewArray::should_use_take(0, 0));
633    }
634
635    #[test]
636    fn heuristic_small_lists_use_take() {
637        // avg = 127 → take
638        assert!(ListViewArray::should_use_take(127_000, 1_000));
639        // avg = 128 → LBL
640        assert!(!ListViewArray::should_use_take(128_000, 1_000));
641    }
642
643    /// Regression test for <https://github.com/vortex-data/vortex/issues/6973>.
644    /// Both offsets and sizes are u8, and offset + size exceeds u8::MAX.
645    #[test]
646    fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
647        let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
648        let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
649        let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
650
651        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
652        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
653
654        // min(offsets) = 0, so nothing to trim; output should equal input.
655        assert_arrays_eq!(trimmed, listview);
656        Ok(())
657    }
658}