Skip to main content

vortex_array/arrays/listview/
rebuild.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::FromPrimitive;
5use vortex_buffer::BufferMut;
6use vortex_error::VortexExpect;
7use vortex_error::VortexResult;
8
9use crate::IntoArray;
10use crate::LEGACY_SESSION;
11use crate::ToCanonical;
12use crate::VortexSessionExecute;
13use crate::aggregate_fn::fns::min_max::min_max;
14use crate::arrays::ConstantArray;
15use crate::arrays::ListViewArray;
16use crate::arrays::listview::ListViewArrayExt;
17use crate::builders::builder_with_capacity;
18use crate::builtins::ArrayBuiltins;
19use crate::dtype::DType;
20use crate::dtype::IntegerPType;
21use crate::dtype::Nullability;
22use crate::dtype::PType;
23use crate::match_each_integer_ptype;
24use crate::scalar::Scalar;
25use crate::scalar_fn::fns::operators::Operator;
26
27/// Modes for rebuilding a [`ListViewArray`].
28pub enum ListViewRebuildMode {
29    /// Removes all unused data and flattens out all list data, such that the array is zero-copyable
30    /// to a [`ListArray`].
31    ///
32    /// This mode will deduplicate all overlapping list views, such that the [`ListViewArray`] looks
33    /// like a [`ListArray`] but with an additional `sizes` array.
34    ///
35    /// [`ListArray`]: crate::arrays::ListArray
36    MakeZeroCopyToList,
37
38    /// Removes any leading or trailing elements that are unused / not referenced by any views in
39    /// the [`ListViewArray`].
40    TrimElements,
41
42    /// Equivalent to `MakeZeroCopyToList` plus `TrimElements`.
43    ///
44    /// This is useful when concatenating multiple [`ListViewArray`]s together to create a new
45    /// [`ListViewArray`] that is also zero-copy to a [`ListArray`].
46    ///
47    /// [`ListArray`]: crate::arrays::ListArray
48    MakeExact,
49
50    // TODO(connor)[ListView]: Implement some version of this.
51    /// Finds the shortest packing / overlapping of list elements.
52    ///
53    /// This problem is known to be NP-hard, so maybe when someone proves that P=NP we can implement
54    /// this algorithm (but in all seriousness there are many approximate algorithms that could
55    /// work well here).
56    OverlapCompression,
57}
58
59impl ListViewArray {
60    /// Rebuilds the [`ListViewArray`] according to the specified mode.
61    pub fn rebuild(&self, mode: ListViewRebuildMode) -> VortexResult<ListViewArray> {
62        if self.is_empty() {
63            // SAFETY: An empty array is trivially zero-copyable to a `ListArray`.
64            return Ok(unsafe { self.clone().with_zero_copy_to_list(true) });
65        }
66
67        match mode {
68            ListViewRebuildMode::MakeZeroCopyToList => self.rebuild_zero_copy_to_list(),
69            ListViewRebuildMode::TrimElements => self.rebuild_trim_elements(),
70            ListViewRebuildMode::MakeExact => self.rebuild_make_exact(),
71            ListViewRebuildMode::OverlapCompression => unimplemented!("Does P=NP?"),
72        }
73    }
74
75    /// Rebuilds a [`ListViewArray`], removing all data overlaps and creating a flattened layout.
76    ///
77    /// This is useful when the `elements` child array of the [`ListViewArray`] might have
78    /// overlapping, duplicate, and garbage data, and we want to have fully sequential data like
79    /// a [`ListArray`].
80    ///
81    /// [`ListArray`]: crate::arrays::ListArray
82    fn rebuild_zero_copy_to_list(&self) -> VortexResult<ListViewArray> {
83        if self.is_zero_copy_to_list() {
84            // Note that since everything in `ListViewArray` is `Arc`ed, this is quite cheap.
85            return Ok(self.clone());
86        }
87
88        let offsets_ptype = self.offsets().dtype().as_ptype();
89        let sizes_ptype = self.sizes().dtype().as_ptype();
90
91        // One of the main purposes behind adding this "zero-copyable to `ListArray`" optimization
92        // is that we want to pass data to systems that expect Arrow data.
93        // The arrow specification only allows for `i32` and `i64` offset and sizes types, so in
94        // order to also make `ListView` zero-copyable to **Arrow**'s `ListArray` (not just Vortex's
95        // `ListArray`), we rebuild the offsets as 32-bit or 64-bit integer types.
96        // TODO(connor)[ListView]: This is true for `sizes` as well, we should do this conversion
97        // for sizes as well.
98        match_each_integer_ptype!(sizes_ptype, |S| {
99            match offsets_ptype {
100                PType::U8 => self.naive_rebuild::<u8, u32, S>(),
101                PType::U16 => self.naive_rebuild::<u16, u32, S>(),
102                PType::U32 => self.naive_rebuild::<u32, u32, S>(),
103                PType::U64 => self.naive_rebuild::<u64, u64, S>(),
104                PType::I8 => self.naive_rebuild::<i8, i32, S>(),
105                PType::I16 => self.naive_rebuild::<i16, i32, S>(),
106                PType::I32 => self.naive_rebuild::<i32, i32, S>(),
107                PType::I64 => self.naive_rebuild::<i64, i64, S>(),
108                _ => unreachable!("invalid offsets PType"),
109            }
110        })
111    }
112
113    /// Picks between [`rebuild_with_take`](Self::rebuild_with_take) and
114    /// [`rebuild_list_by_list`](Self::rebuild_list_by_list) based on element dtype and average
115    /// list size.
116    fn naive_rebuild<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
117        &self,
118    ) -> VortexResult<ListViewArray> {
119        let sizes_canonical = self.sizes().to_primitive();
120        let total: u64 = sizes_canonical
121            .as_slice::<S>()
122            .iter()
123            .map(|s| (*s).as_() as u64)
124            .sum();
125        if Self::should_use_take(total, self.len()) {
126            self.rebuild_with_take::<O, NewOffset, S>()
127        } else {
128            self.rebuild_list_by_list::<O, NewOffset, S>()
129        }
130    }
131
132    /// Returns `true` when we are confident that `rebuild_with_take` will
133    /// outperform `rebuild_list_by_list`.
134    ///
135    /// Take is dramatically faster for small lists (often 10-100×) because it
136    /// avoids per-list builder overhead. LBL is the safer default for larger
137    /// lists since its sequential memcpy scales well. We only choose take when
138    /// the average list size is small enough that take clearly dominates.
139    fn should_use_take(total_output_elements: u64, num_lists: usize) -> bool {
140        if num_lists == 0 {
141            return true;
142        }
143        let avg = total_output_elements / num_lists as u64;
144        avg < 128
145    }
146
147    /// Rebuilds elements using a single bulk `take`: collect all element indices into a flat
148    /// `BufferMut<u64>`, perform a single `take`.
149    fn rebuild_with_take<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
150        &self,
151    ) -> VortexResult<ListViewArray> {
152        let offsets_canonical = self.offsets().to_primitive();
153        let offsets_slice = offsets_canonical.as_slice::<O>();
154        let sizes_canonical = self.sizes().to_primitive();
155        let sizes_slice = sizes_canonical.as_slice::<S>();
156
157        let len = offsets_slice.len();
158
159        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
160        let mut new_sizes = BufferMut::<S>::with_capacity(len);
161        let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());
162
163        let mut n_elements = NewOffset::zero();
164        for index in 0..len {
165            if !self.validity()?.is_valid(index)? {
166                new_offsets.push(n_elements);
167                new_sizes.push(S::zero());
168                continue;
169            }
170
171            let offset = offsets_slice[index];
172            let size = sizes_slice[index];
173            let start = offset.as_();
174            let stop = start + size.as_();
175
176            new_offsets.push(n_elements);
177            new_sizes.push(size);
178            take_indices.extend(start as u64..stop as u64);
179            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
180        }
181
182        let elements = self.elements().take(take_indices.into_array())?;
183        let offsets = new_offsets.into_array();
184        let sizes = new_sizes.into_array();
185
186        // SAFETY: same invariants as `rebuild_list_by_list` — offsets are sequential and
187        // non-overlapping, all (offset, size) pairs reference valid elements, and the validity
188        // array is preserved from the original.
189        Ok(unsafe {
190            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
191                .with_zero_copy_to_list(true)
192        })
193    }
194
195    /// Rebuilds elements list-by-list: canonicalize elements upfront, then for each list `slice`
196    /// the relevant range and `extend_from_array` into a typed builder.
197    fn rebuild_list_by_list<O: IntegerPType, NewOffset: IntegerPType, S: IntegerPType>(
198        &self,
199    ) -> VortexResult<ListViewArray> {
200        let element_dtype = self
201            .dtype()
202            .as_list_element_opt()
203            .vortex_expect("somehow had a canonical list that was not a list");
204
205        let offsets_canonical = self.offsets().to_primitive();
206        let offsets_slice = offsets_canonical.as_slice::<O>();
207        let sizes_canonical = self.sizes().to_primitive();
208        let sizes_slice = sizes_canonical.as_slice::<S>();
209
210        let len = offsets_slice.len();
211
212        let mut new_offsets = BufferMut::<NewOffset>::with_capacity(len);
213        // TODO(connor)[ListView]: Do we really need to do this?
214        // The only reason we need to rebuild the sizes here is that the validity may indicate that
215        // a list is null even though it has a non-zero size. This rebuild will set the size of all
216        // null lists to 0.
217        let mut new_sizes = BufferMut::<S>::with_capacity(len);
218
219        // Canonicalize the elements up front as we will be slicing the elements quite a lot.
220        let elements_canonical = self
221            .elements()
222            .to_canonical()
223            .vortex_expect("canonicalize elements for rebuild")
224            .into_array();
225
226        // Note that we do not know what the exact capacity should be of the new elements since
227        // there could be overlaps in the existing `ListViewArray`.
228        let mut new_elements_builder =
229            builder_with_capacity(element_dtype.as_ref(), self.elements().len());
230
231        let mut n_elements = NewOffset::zero();
232        for index in 0..len {
233            if !self.validity()?.is_valid(index)? {
234                // For NULL lists, place them after the previous item's data to maintain the
235                // no-overlap invariant for zero-copy to `ListArray` arrays.
236                new_offsets.push(n_elements);
237                new_sizes.push(S::zero());
238                continue;
239            }
240
241            let offset = offsets_slice[index];
242            let size = sizes_slice[index];
243
244            let start = offset.as_();
245            let stop = start + size.as_();
246
247            new_offsets.push(n_elements);
248            new_sizes.push(size);
249            new_elements_builder.extend_from_array(&elements_canonical.slice(start..stop)?);
250
251            n_elements += num_traits::cast(size).vortex_expect("Cast failed");
252        }
253
254        let offsets = new_offsets.into_array();
255        let sizes = new_sizes.into_array();
256        let elements = new_elements_builder.finish();
257
258        debug_assert_eq!(
259            n_elements.as_(),
260            elements.len(),
261            "The accumulated elements somehow had the wrong length"
262        );
263
264        // SAFETY:
265        // - All offsets are sequential and non-overlapping (`n_elements` tracks running total).
266        // - Each `offset[i] + size[i]` equals `offset[i+1]` for all valid indices (including null
267        //   lists).
268        // - All elements referenced by (offset, size) pairs exist within the new `elements` array.
269        // - The validity array is preserved from the original array unchanged
270        // - The array satisfies the zero-copy-to-list property by having sorted offsets, no gaps,
271        //   and no overlaps.
272        Ok(unsafe {
273            ListViewArray::new_unchecked(elements, offsets, sizes, self.validity()?)
274                .with_zero_copy_to_list(true)
275        })
276    }
277
278    /// Rebuilds a [`ListViewArray`] by trimming any unused / unreferenced leading and trailing
279    /// elements, which is defined as a contiguous run of values in the `elements` array that are
280    /// not referecened by any views in the corresponding [`ListViewArray`].
281    fn rebuild_trim_elements(&self) -> VortexResult<ListViewArray> {
282        let start = if self.is_zero_copy_to_list() {
283            // If offsets are sorted, then the minimum offset is the first offset.
284            // Note that even if the first view is null, offsets must always be valid, so it is
285            // completely fine for us to use this as a lower-bounded start of the `elements`.
286            self.offset_at(0)
287        } else {
288            let mut ctx = LEGACY_SESSION.create_execution_ctx();
289            self.offsets()
290                .statistics()
291                .compute_min(&mut ctx)
292                .vortex_expect(
293                    "[ListViewArray::rebuild]: `offsets` must report min statistic that is a `usize`",
294                )
295        };
296
297        let end = if self.is_zero_copy_to_list() {
298            // If offsets are sorted and there are no overlaps (views are always "increasing"), we
299            // can just grab the last offset and last size.
300            let last_offset = self.offset_at(self.len() - 1);
301            let last_size = self.size_at(self.len() - 1);
302            last_offset + last_size
303        } else {
304            // Cast offsets and sizes to the widest integer type to prevent
305            // overflow when computing offsets + sizes. The end offset may not
306            // fit in the integer width otherwise.
307            let wide_dtype = DType::from(if self.offsets().dtype().as_ptype().is_unsigned_int() {
308                PType::U64
309            } else {
310                PType::I64
311            });
312            let offsets = self.offsets().cast(wide_dtype.clone())?;
313            let sizes = self.sizes().cast(wide_dtype)?;
314
315            let mut ctx = LEGACY_SESSION.create_execution_ctx();
316            let min_max = min_max(
317                &offsets
318                    .binary(sizes, Operator::Add)
319                    .vortex_expect("`offsets + sizes` somehow overflowed"),
320                &mut ctx,
321            )
322            .vortex_expect("Something went wrong while computing min and max")
323            .vortex_expect("We checked that the array was not empty in the top-level `rebuild`");
324
325            min_max
326                .max
327                .as_primitive()
328                .as_::<usize>()
329                .vortex_expect("unable to interpret the max `offset + size` as a `usize`")
330        };
331
332        let adjusted_offsets = match_each_integer_ptype!(self.offsets().dtype().as_ptype(), |O| {
333            let offset = <O as FromPrimitive>::from_usize(start)
334                .vortex_expect("unable to convert the min offset `start` into a `usize`");
335            let scalar = Scalar::primitive(offset, Nullability::NonNullable);
336
337            self.offsets()
338                .clone()
339                .binary(
340                    ConstantArray::new(scalar, self.offsets().len()).into_array(),
341                    Operator::Sub,
342                )
343                .vortex_expect("was somehow unable to adjust offsets down by their minimum")
344        });
345
346        let sliced_elements = self.elements().slice(start..end)?;
347
348        // SAFETY: The only thing we changed was the elements (which we verify through mins and
349        // maxes that all adjusted offsets + sizes are within the correct bounds), so the parameters
350        // are valid. And if the original array was zero-copyable to list, trimming elements doesn't
351        // change that property.
352        Ok(unsafe {
353            ListViewArray::new_unchecked(
354                sliced_elements,
355                adjusted_offsets,
356                self.sizes().clone(),
357                self.validity()?,
358            )
359            .with_zero_copy_to_list(self.is_zero_copy_to_list())
360        })
361    }
362
363    fn rebuild_make_exact(&self) -> VortexResult<ListViewArray> {
364        if self.is_zero_copy_to_list() {
365            self.rebuild_trim_elements()
366        } else {
367            // When we completely rebuild the `ListViewArray`, we get the benefit that we also trim
368            // any leading and trailing garbage data.
369            self.rebuild_zero_copy_to_list()
370        }
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    use vortex_buffer::BitBuffer;
377    use vortex_error::VortexResult;
378
379    use super::ListViewRebuildMode;
380    use crate::IntoArray;
381    use crate::LEGACY_SESSION;
382    use crate::ToCanonical;
383    use crate::VortexSessionExecute;
384    use crate::arrays::ListViewArray;
385    use crate::arrays::PrimitiveArray;
386    use crate::arrays::listview::ListViewArrayExt;
387    use crate::assert_arrays_eq;
388    use crate::dtype::Nullability;
389    use crate::validity::Validity;
390
391    #[test]
392    fn test_rebuild_flatten_removes_overlaps() -> VortexResult<()> {
393        // Create a list view with overlapping lists: [A, B, C]
394        // List 0: offset=0, size=3 -> [A, B, C]
395        // List 1: offset=1, size=2 -> [B, C] (overlaps with List 0)
396        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
397        let offsets = PrimitiveArray::from_iter(vec![0u32, 1]).into_array();
398        let sizes = PrimitiveArray::from_iter(vec![3u32, 2]).into_array();
399
400        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
401
402        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
403
404        // After flatten: elements should be [A, B, C, B, C] = [1, 2, 3, 2, 3]
405        // Lists should be sequential with no overlaps
406        assert_eq!(flattened.elements().len(), 5);
407
408        // Offsets should be sequential
409        assert_eq!(flattened.offset_at(0), 0);
410        assert_eq!(flattened.size_at(0), 3);
411        assert_eq!(flattened.offset_at(1), 3);
412        assert_eq!(flattened.size_at(1), 2);
413
414        // Verify the data is correct
415        assert_arrays_eq!(
416            flattened.list_elements_at(0).unwrap(),
417            PrimitiveArray::from_iter([1i32, 2, 3])
418        );
419
420        assert_arrays_eq!(
421            flattened.list_elements_at(1).unwrap(),
422            PrimitiveArray::from_iter([2i32, 3])
423        );
424        Ok(())
425    }
426
427    #[test]
428    fn test_rebuild_flatten_with_nullable() -> VortexResult<()> {
429        use crate::arrays::BoolArray;
430
431        // Create a nullable list view with a null list
432        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3]).into_array();
433        let offsets = PrimitiveArray::from_iter(vec![0u32, 1, 2]).into_array();
434        let sizes = PrimitiveArray::from_iter(vec![2u32, 1, 1]).into_array();
435        let validity = Validity::Array(
436            BoolArray::new(
437                BitBuffer::from(vec![true, false, true]),
438                Validity::NonNullable,
439            )
440            .into_array(),
441        );
442
443        let listview = ListViewArray::new(elements, offsets, sizes, validity);
444
445        let flattened = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
446
447        // Verify nullability is preserved
448        assert_eq!(flattened.dtype().nullability(), Nullability::Nullable);
449        assert!(flattened.validity()?.is_valid(0).unwrap());
450        assert!(!flattened.validity()?.is_valid(1).unwrap());
451        assert!(flattened.validity()?.is_valid(2).unwrap());
452
453        // Verify valid lists contain correct data
454        assert_arrays_eq!(
455            flattened.list_elements_at(0).unwrap(),
456            PrimitiveArray::from_iter([1i32, 2])
457        );
458
459        assert_arrays_eq!(
460            flattened.list_elements_at(2).unwrap(),
461            PrimitiveArray::from_iter([3i32])
462        );
463        Ok(())
464    }
465
466    #[test]
467    fn test_rebuild_trim_elements_basic() -> VortexResult<()> {
468        // Test trimming both leading and trailing unused elements while preserving gaps in the
469        // middle.
470        // Elements: [_, _, A, B, _, C, D, _, _]
471        //            0  1  2  3  4  5  6  7  8
472        // List 0: offset=2, size=2 -> [A, B]
473        // List 1: offset=5, size=2 -> [C, D]
474        // Should trim to: [A, B, _, C, D] with adjusted offsets.
475        let elements =
476            PrimitiveArray::from_iter(vec![99i32, 98, 1, 2, 97, 3, 4, 96, 95]).into_array();
477        let offsets = PrimitiveArray::from_iter(vec![2u32, 5]).into_array();
478        let sizes = PrimitiveArray::from_iter(vec![2u32, 2]).into_array();
479
480        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
481
482        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
483
484        // After trimming: elements should be [A, B, _, C, D] = [1, 2, 97, 3, 4].
485        assert_eq!(trimmed.elements().len(), 5);
486
487        // Offsets should be adjusted: old offset 2 -> new offset 0, old offset 5 -> new offset 3.
488        assert_eq!(trimmed.offset_at(0), 0);
489        assert_eq!(trimmed.size_at(0), 2);
490        assert_eq!(trimmed.offset_at(1), 3);
491        assert_eq!(trimmed.size_at(1), 2);
492
493        // Verify the data is correct.
494        assert_arrays_eq!(
495            trimmed.list_elements_at(0).unwrap(),
496            PrimitiveArray::from_iter([1i32, 2])
497        );
498
499        assert_arrays_eq!(
500            trimmed.list_elements_at(1).unwrap(),
501            PrimitiveArray::from_iter([3i32, 4])
502        );
503
504        // Note that element at index 2 (97) is preserved as a gap.
505        let all_elements = trimmed.elements().to_primitive();
506        assert_eq!(
507            all_elements
508                .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
509                .unwrap(),
510            97i32.into()
511        );
512        Ok(())
513    }
514
515    #[test]
516    fn test_rebuild_with_trailing_nulls_regression() -> VortexResult<()> {
517        // Regression test for issue #5412
518        // Tests that zero-copy-to-list arrays with trailing NULLs correctly calculate
519        // offsets for NULL items to maintain no-overlap invariant
520
521        // Create a ListViewArray with trailing NULLs
522        let elements = PrimitiveArray::from_iter(vec![1i32, 2, 3, 4]).into_array();
523        let offsets = PrimitiveArray::from_iter(vec![0u32, 2, 0, 0]).into_array();
524        let sizes = PrimitiveArray::from_iter(vec![2u32, 2, 0, 0]).into_array();
525        let validity = Validity::from_iter(vec![true, true, false, false]);
526
527        let listview = ListViewArray::new(elements, offsets, sizes, validity);
528
529        // First rebuild to make it zero-copy-to-list
530        let rebuilt = listview.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?;
531        assert!(rebuilt.is_zero_copy_to_list());
532
533        // Verify NULL items have correct offsets (should not reuse previous offsets)
534        // After rebuild: offsets should be [0, 2, 4, 4] for zero-copy-to-list
535        assert_eq!(rebuilt.offset_at(0), 0);
536        assert_eq!(rebuilt.offset_at(1), 2);
537        assert_eq!(rebuilt.offset_at(2), 4); // NULL should be at position 4
538        assert_eq!(rebuilt.offset_at(3), 4); // Second NULL also at position 4
539
540        // All sizes should be correct
541        assert_eq!(rebuilt.size_at(0), 2);
542        assert_eq!(rebuilt.size_at(1), 2);
543        assert_eq!(rebuilt.size_at(2), 0); // NULL has size 0
544        assert_eq!(rebuilt.size_at(3), 0); // NULL has size 0
545
546        // Now rebuild with MakeExact (which calls naive_rebuild then trim_elements)
547        // This should not panic (issue #5412)
548        let exact = rebuilt.rebuild(ListViewRebuildMode::MakeExact)?;
549
550        // Verify the result is still valid
551        assert!(
552            exact
553                .is_valid(0, &mut LEGACY_SESSION.create_execution_ctx())
554                .unwrap()
555        );
556        assert!(
557            exact
558                .is_valid(1, &mut LEGACY_SESSION.create_execution_ctx())
559                .unwrap()
560        );
561        assert!(
562            !exact
563                .is_valid(2, &mut LEGACY_SESSION.create_execution_ctx())
564                .unwrap()
565        );
566        assert!(
567            !exact
568                .is_valid(3, &mut LEGACY_SESSION.create_execution_ctx())
569                .unwrap()
570        );
571
572        // Verify data is preserved
573        assert_arrays_eq!(
574            exact.list_elements_at(0).unwrap(),
575            PrimitiveArray::from_iter([1i32, 2])
576        );
577
578        assert_arrays_eq!(
579            exact.list_elements_at(1).unwrap(),
580            PrimitiveArray::from_iter([3i32, 4])
581        );
582        Ok(())
583    }
584
585    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
586    /// u32 offsets exceed u16::MAX, so u16 sizes are widened to u32 for the add.
587    #[test]
588    fn test_rebuild_trim_elements_offsets_wider_than_sizes() -> VortexResult<()> {
589        let mut elems = vec![0i32; 70_005];
590        elems[70_000] = 10;
591        elems[70_001] = 20;
592        elems[70_002] = 30;
593        elems[70_003] = 40;
594        let elements = PrimitiveArray::from_iter(elems).into_array();
595        let offsets = PrimitiveArray::from_iter(vec![70_000u32, 70_002]).into_array();
596        let sizes = PrimitiveArray::from_iter(vec![2u16, 2]).into_array();
597
598        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
599        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
600        assert_arrays_eq!(
601            trimmed.list_elements_at(1).unwrap(),
602            PrimitiveArray::from_iter([30i32, 40])
603        );
604        Ok(())
605    }
606
607    /// Regression test for <https://github.com/vortex-data/vortex/issues/6773>.
608    /// u32 sizes exceed u16::MAX, so u16 offsets are widened to u32 for the add.
609    #[test]
610    fn test_rebuild_trim_elements_sizes_wider_than_offsets() -> VortexResult<()> {
611        let mut elems = vec![0i32; 70_001];
612        elems[3] = 30;
613        elems[4] = 40;
614        let elements = PrimitiveArray::from_iter(elems).into_array();
615        let offsets = PrimitiveArray::from_iter(vec![1u16, 3]).into_array();
616        let sizes = PrimitiveArray::from_iter(vec![70_000u32, 2]).into_array();
617
618        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
619        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
620        assert_arrays_eq!(
621            trimmed.list_elements_at(1).unwrap(),
622            PrimitiveArray::from_iter([30i32, 40])
623        );
624        Ok(())
625    }
626
627    // ── should_use_take heuristic tests ────────────────────────────────────
628
629    #[test]
630    fn heuristic_zero_lists_uses_take() {
631        assert!(ListViewArray::should_use_take(0, 0));
632    }
633
634    #[test]
635    fn heuristic_small_lists_use_take() {
636        // avg = 127 → take
637        assert!(ListViewArray::should_use_take(127_000, 1_000));
638        // avg = 128 → LBL
639        assert!(!ListViewArray::should_use_take(128_000, 1_000));
640    }
641
642    /// Regression test for <https://github.com/vortex-data/vortex/issues/6973>.
643    /// Both offsets and sizes are u8, and offset + size exceeds u8::MAX.
644    #[test]
645    fn test_rebuild_trim_elements_sum_overflows_type() -> VortexResult<()> {
646        let elements = PrimitiveArray::from_iter(vec![0i32; 261]).into_array();
647        let offsets = PrimitiveArray::from_iter(vec![215u8, 0]).into_array();
648        let sizes = PrimitiveArray::from_iter(vec![46u8, 10]).into_array();
649
650        let listview = ListViewArray::new(elements, offsets, sizes, Validity::NonNullable);
651        let trimmed = listview.rebuild(ListViewRebuildMode::TrimElements)?;
652
653        // min(offsets) = 0, so nothing to trim; output should equal input.
654        assert_arrays_eq!(trimmed, listview);
655        Ok(())
656    }
657}