vortex_vector/listview/
vector.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`ListViewVector`].
5
6use std::fmt::Debug;
7use std::ops::BitAnd;
8use std::ops::RangeBounds;
9use std::sync::Arc;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_mask::Mask;
14
15use super::ListViewScalar;
16use super::ListViewVectorMut;
17use crate::Vector;
18use crate::match_each_integer_pvector;
19use crate::match_each_integer_pvector_pair;
20use crate::primitive::PVector;
21use crate::primitive::PrimitiveVector;
22use crate::vector_ops::VectorMutOps;
23use crate::vector_ops::VectorOps;
24
25/// A vector of variable-width lists.
26///
27/// Each list is defined by 2 integers: an offset and a size (a "list view"), which point into a
28/// child `elements` vector.
29///
30/// Note that the list views **do not** need to be sorted, nor do they have to be contiguous or
31/// fully cover the `elements` vector. This means that multiple views can be pointing to the same
32/// elements.
33///
34/// # Structure
35///
36/// - `elements`: The child vector of all list elements, stored as an [`Arc<Vector>`].
37/// - `offsets`: A [`PrimitiveVector`] containing the starting offset of each list in the `elements`
38///   vector.
39/// - `sizes`: A [`PrimitiveVector`] containing the size (number of elements) of each list.
40/// - `validity`: A [`Mask`] indicating which lists are null.
41#[derive(Debug, Clone)]
42pub struct ListViewVector {
43    /// The child vector of elements.
44    pub(super) elements: Arc<Vector>,
45
46    /// Offsets for each list into the elements vector.
47    ///
48    /// Offsets are always integers, and always non-negative (even if the type is signed).
49    pub(super) offsets: PrimitiveVector,
50
51    /// Sizes (lengths) of each list.
52    ///
53    /// Sizes are always integers, and always non-negative (even if the type is signed).
54    pub(super) sizes: PrimitiveVector,
55
56    /// The validity mask (where `true` represents a list is **not** null).
57    ///
58    /// Note that the `elements` vector will have its own internal validity, denoting if individual
59    /// list elements are null.
60    pub(super) validity: Mask,
61
62    /// The length of the vector (which is the same as the length of the validity mask).
63    ///
64    /// This is stored here as a convenience, as the validity also tracks this information.
65    pub(super) len: usize,
66}
67
68impl PartialEq for ListViewVector {
69    fn eq(&self, other: &Self) -> bool {
70        if self.len != other.len {
71            return false;
72        }
73        if self.validity != other.validity {
74            return false;
75        }
76        if self.elements.len() != other.elements.len() {
77            return false;
78        }
79
80        // Offsets and sizes must have matching types, then compare within the match
81        match_each_integer_pvector_pair!(
82            (&self.offsets, &other.offsets),
83            |self_offsets, other_offsets| {
84                match_each_integer_pvector_pair!(
85                    (&self.sizes, &other.sizes),
86                    |self_sizes, other_sizes| {
87                        listview_eq_impl(
88                            self.len,
89                            &self.validity,
90                            self.elements.as_ref(),
91                            other.elements.as_ref(),
92                            self_offsets,
93                            other_offsets,
94                            self_sizes,
95                            other_sizes,
96                        )
97                    },
98                    { false } // Size types don't match
99                )
100            },
101            { false } // Offset types don't match
102        )
103    }
104}
105
106/// Helper function for ListViewVector equality comparison.
107#[expect(clippy::too_many_arguments)]
108fn listview_eq_impl<O, S>(
109    len: usize,
110    validity: &Mask,
111    self_elements: &Vector,
112    other_elements: &Vector,
113    self_offsets: &PVector<O>,
114    other_offsets: &PVector<O>,
115    self_sizes: &PVector<S>,
116    other_sizes: &PVector<S>,
117) -> bool
118where
119    O: vortex_dtype::NativePType + Copy,
120    S: vortex_dtype::NativePType + Copy,
121    usize: TryFrom<O> + TryFrom<S>,
122{
123    // Fast path: if all lists are invalid, elements don't matter
124    if validity.all_false() {
125        return true;
126    }
127
128    // Fast path: if all lists are valid, compare elements directly
129    if validity.all_true() {
130        return self_elements == other_elements
131            && self_offsets == other_offsets
132            && self_sizes == other_sizes;
133    }
134
135    // Build element-level mask using Vec<bool> to handle overlapping slices correctly
136    let elem_len = self_elements.len();
137    let mut element_valid = vec![false; elem_len];
138    for i in 0..len {
139        if validity.value(i) {
140            let offset = self_offsets
141                .get_as::<usize>(i)
142                .vortex_expect("offset is valid and fits in usize");
143            let size = self_sizes
144                .get_as::<usize>(i)
145                .vortex_expect("size is valid and fits in usize");
146            for j in offset..(offset + size).min(elem_len) {
147                element_valid[j] = true;
148            }
149        }
150    }
151    let element_mask = Mask::from_buffer(vortex_buffer::BitBuffer::from(element_valid));
152
153    // Clone elements and apply the element-level mask
154    let mut self_elems = self_elements.clone();
155    let mut other_elems = other_elements.clone();
156    self_elems.mask_validity(&element_mask);
157    other_elems.mask_validity(&element_mask);
158
159    if self_elems != other_elems {
160        return false;
161    }
162
163    // Compare offsets and sizes at valid positions
164    (0..len).all(|i| {
165        !validity.value(i)
166            || (self_offsets.get_as::<usize>(i) == other_offsets.get_as::<usize>(i)
167                && self_sizes.get_as::<usize>(i) == other_sizes.get_as::<usize>(i))
168    })
169}
170
171impl ListViewVector {
172    /// Creates a new [`ListViewVector`] from its components.
173    ///
174    /// # Panics
175    ///
176    /// Panics if:
177    ///
178    /// - `offsets` or `sizes` contain nulls values.
179    /// - `offsets`, `sizes`, and `validity` do not all have the same length
180    /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this
181    ///   would cause overflow)
182    /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than
183    ///   `elements.len()` (even if the corresponding view is defined as null by the validity
184    ///   array).
185    pub fn new(
186        elements: Arc<Vector>,
187        offsets: PrimitiveVector,
188        sizes: PrimitiveVector,
189        validity: Mask,
190    ) -> Self {
191        Self::try_new(elements, offsets, sizes, validity)
192            .vortex_expect("Invalid ListViewVector construction")
193    }
194
195    /// Attempts to create a new [`ListViewVector`] from its components.
196    ///
197    /// # Errors
198    ///
199    /// Returns an error if:
200    ///
201    /// - `offsets` or `sizes` contain nulls values.
202    /// - `offsets`, `sizes`, and `validity` do not all have the same length
203    /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this
204    ///   would cause overflow)
205    /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than
206    ///   `elements.len()` (even if the corresponding view is defined as null by the validity
207    ///   array).
208    pub fn try_new(
209        elements: Arc<Vector>,
210        offsets: PrimitiveVector,
211        sizes: PrimitiveVector,
212        validity: Mask,
213    ) -> VortexResult<Self> {
214        let len = validity.len();
215
216        vortex_ensure!(
217            offsets.len() == len,
218            "Offsets length {} does not match validity length {len}",
219            offsets.len(),
220        );
221        vortex_ensure!(
222            sizes.len() == len,
223            "Sizes length {} does not match validity length {len}",
224            sizes.len(),
225        );
226
227        vortex_ensure!(
228            offsets.validity().all_true(),
229            "Offsets vector must not contain null values"
230        );
231        vortex_ensure!(
232            sizes.validity().all_true(),
233            "Sizes vector must not contain null values"
234        );
235
236        let offsets_width = offsets.ptype().byte_width();
237        let sizes_width = sizes.ptype().byte_width();
238        vortex_ensure!(
239            sizes_width <= offsets_width,
240            "Sizes integer width {sizes_width} must be \
241                    <= offsets integer width {offsets_width} to prevent overflow",
242        );
243
244        // Check that each `offsets[i] + sizes[i] <= elements.len()`.
245        validate_views_bound(elements.len(), &offsets, &sizes)?;
246
247        Ok(Self {
248            elements,
249            offsets,
250            sizes,
251            validity,
252            len,
253        })
254    }
255
256    /// Creates a new [`ListViewVector`] without validation.
257    ///
258    /// # Safety
259    ///
260    /// The caller must ensure all of the following invariants are satisfied:
261    ///
262    /// - `offsets` and `sizes` must be non-nullable integer vectors.
263    /// - `offsets`, `sizes`, and `validity` must have the same length.
264    /// - Size integer width must be smaller than or equal to offset type (to prevent overflow).
265    /// - For each `i`, `offsets[i] + sizes[i]` must not overflow and must be `<= elements.len()`
266    ///   (even if the corresponding view is defined as null by the validity array).
267    pub unsafe fn new_unchecked(
268        elements: Arc<Vector>,
269        offsets: PrimitiveVector,
270        sizes: PrimitiveVector,
271        validity: Mask,
272    ) -> Self {
273        let len = validity.len();
274
275        if cfg!(debug_assertions) {
276            Self::new(elements, offsets, sizes, validity)
277        } else {
278            Self {
279                elements,
280                offsets,
281                sizes,
282                validity,
283                len,
284            }
285        }
286    }
287
288    /// Decomposes the [`ListViewVector`] into its constituent parts (child elements, offsets,
289    /// sizes, and validity).
290    pub fn into_parts(self) -> (Arc<Vector>, PrimitiveVector, PrimitiveVector, Mask) {
291        (self.elements, self.offsets, self.sizes, self.validity)
292    }
293
294    /// Returns a reference to the `elements` vector.
295    #[inline]
296    pub fn elements(&self) -> &Arc<Vector> {
297        &self.elements
298    }
299
300    /// Returns a reference to the integer `offsets` vector.
301    #[inline]
302    pub fn offsets(&self) -> &PrimitiveVector {
303        &self.offsets
304    }
305
306    /// Returns a reference to the integer `sizes` vector.
307    #[inline]
308    pub fn sizes(&self) -> &PrimitiveVector {
309        &self.sizes
310    }
311}
312
313impl VectorOps for ListViewVector {
314    type Mutable = ListViewVectorMut;
315    type Scalar = ListViewScalar;
316
317    fn len(&self) -> usize {
318        self.len
319    }
320
321    fn validity(&self) -> &Mask {
322        &self.validity
323    }
324
325    fn mask_validity(&mut self, mask: &Mask) {
326        self.validity = self.validity.bitand(mask);
327    }
328
329    fn scalar_at(&self, index: usize) -> ListViewScalar {
330        assert!(index < self.len());
331        ListViewScalar::new(self.slice(index..index + 1))
332    }
333
334    fn slice(&self, range: impl RangeBounds<usize> + Clone + Debug) -> Self {
335        let offsets = self.offsets.slice(range.clone());
336        let sizes = self.sizes.slice(range);
337        // SAFETY: offsets/sizes combined still point at valid elements
338        unsafe {
339            Self::new_unchecked(
340                self.elements().clone(),
341                offsets,
342                sizes,
343                self.validity().clone(),
344            )
345        }
346    }
347
348    fn clear(&mut self) {
349        self.offsets.clear();
350        self.sizes.clear();
351        Arc::make_mut(&mut self.elements).clear();
352        self.validity.clear();
353        self.len = 0;
354    }
355
356    fn try_into_mut(self) -> Result<ListViewVectorMut, Self> {
357        // Try to unwrap the `Arc`.
358        let elements = match Arc::try_unwrap(self.elements) {
359            Ok(elements) => elements,
360            Err(elements) => return Err(Self { elements, ..self }),
361        };
362
363        // Try to make the validity mutable.
364        let validity = match self.validity.try_into_mut() {
365            Ok(v) => v,
366            Err(validity) => {
367                return Err(Self {
368                    elements: Arc::new(elements),
369                    validity,
370                    ..self
371                });
372            }
373        };
374
375        // Try to make the offsets mutable.
376        let offsets = match self.offsets.try_into_mut() {
377            Ok(mutable_offsets) => mutable_offsets,
378            Err(offsets) => {
379                return Err(Self {
380                    offsets,
381                    sizes: self.sizes,
382                    elements: Arc::new(elements),
383                    validity: validity.freeze(),
384                    len: self.len,
385                });
386            }
387        };
388
389        // Try to make the sizes mutable.
390        let sizes = match self.sizes.try_into_mut() {
391            Ok(mutable_sizes) => mutable_sizes,
392            Err(sizes) => {
393                return Err(Self {
394                    offsets: offsets.freeze(),
395                    sizes,
396                    elements: Arc::new(elements),
397                    validity: validity.freeze(),
398                    len: self.len,
399                });
400            }
401        };
402
403        // Try to make the elements mutable.
404        match elements.try_into_mut() {
405            Ok(mut_elements) => Ok(ListViewVectorMut {
406                offsets,
407                sizes,
408                elements: Box::new(mut_elements),
409                validity,
410                len: self.len,
411            }),
412            Err(elements) => Err(Self {
413                offsets: offsets.freeze(),
414                sizes: sizes.freeze(),
415                elements: Arc::new(elements),
416                validity: validity.freeze(),
417                len: self.len,
418            }),
419        }
420    }
421
422    fn into_mut(self) -> ListViewVectorMut {
423        let len = self.len;
424        let validity = self.validity.into_mut();
425        let offsets = self.offsets.into_mut();
426        let sizes = self.sizes.into_mut();
427
428        // If someone else has a strong reference to the `Arc`, clone the underlying data (which is
429        // just a **different** reference count increment).
430        let elements = Arc::try_unwrap(self.elements).unwrap_or_else(|arc| (*arc).clone());
431
432        ListViewVectorMut {
433            offsets,
434            sizes,
435            elements: Box::new(elements.into_mut()),
436            validity,
437            len,
438        }
439    }
440}
441
442// TODO(connor): It would be better to separate everything inside the macros into its own function,
443// but that would require adding another macro that sets a type `$type` to be used by the caller.
444/// Checks that all views are `<= elements_len`.
445#[expect(
446    clippy::cognitive_complexity,
447    reason = "complexity from nested match_each_* macros"
448)]
449#[allow(clippy::cast_possible_truncation)] // casts inside macro
450fn validate_views_bound(
451    elements_len: usize,
452    offsets: &PrimitiveVector,
453    sizes: &PrimitiveVector,
454) -> VortexResult<()> {
455    let len = offsets.len();
456
457    match_each_integer_pvector!(&offsets, |offsets_vector| {
458        match_each_integer_pvector!(&sizes, |sizes_vector| {
459            let offsets_slice = offsets_vector.as_ref();
460            let sizes_slice = sizes_vector.as_ref();
461
462            for i in 0..len {
463                let offset = offsets_slice[i] as usize;
464                let size = sizes_slice[i] as usize;
465                vortex_ensure!(offset + size <= elements_len);
466            }
467        });
468    });
469
470    Ok(())
471}
472
473#[cfg(test)]
474mod tests {
475    use std::sync::Arc;
476
477    use vortex_buffer::Buffer;
478    use vortex_mask::Mask;
479
480    use super::*;
481    use crate::primitive::PVector;
482
483    /// Helper to create a ListViewVector with i32 elements and u32 offsets/sizes
484    fn make_listview(
485        elements: Vec<i32>,
486        offsets: Vec<u32>,
487        sizes: Vec<u32>,
488        validity: Mask,
489    ) -> ListViewVector {
490        let elem_validity = Mask::new_true(elements.len());
491        let elements = PVector::new(Buffer::from(elements), elem_validity);
492        let offsets_len = offsets.len();
493        let sizes_len = sizes.len();
494        let offsets = PVector::new(Buffer::from(offsets), Mask::new_true(offsets_len));
495        let sizes = PVector::new(Buffer::from(sizes), Mask::new_true(sizes_len));
496        ListViewVector::try_new(
497            Arc::new(Vector::from(elements)),
498            PrimitiveVector::from(offsets),
499            PrimitiveVector::from(sizes),
500            validity,
501        )
502        .unwrap()
503    }
504
505    #[test]
506    fn test_listview_eq_all_valid() {
507        // All lists valid - direct element comparison
508        let v1 = make_listview(
509            vec![1, 2, 3, 4, 5],
510            vec![0, 2, 3],
511            vec![2, 1, 2],
512            Mask::new_true(3),
513        );
514        let v2 = make_listview(
515            vec![1, 2, 3, 4, 5],
516            vec![0, 2, 3],
517            vec![2, 1, 2],
518            Mask::new_true(3),
519        );
520        assert_eq!(v1, v2);
521
522        // Different elements should not be equal
523        let v3 = make_listview(
524            vec![1, 2, 99, 4, 5],
525            vec![0, 2, 3],
526            vec![2, 1, 2],
527            Mask::new_true(3),
528        );
529        assert_ne!(v1, v3);
530    }
531
532    #[test]
533    fn test_listview_eq_all_invalid() {
534        // All lists invalid - elements don't matter
535        let v1 = make_listview(
536            vec![1, 2, 3, 4, 5],
537            vec![0, 2, 3],
538            vec![2, 1, 2],
539            Mask::new_false(3),
540        );
541        let v2 = make_listview(
542            vec![99, 99, 99, 99, 99],
543            vec![0, 2, 3],
544            vec![2, 1, 2],
545            Mask::new_false(3),
546        );
547        assert_eq!(v1, v2);
548    }
549
550    #[test]
551    fn test_listview_eq_mixed_validity() {
552        // Lists: [1,2], null, [4,5]
553        // Elements at positions 2 (index of null list's elements) don't matter
554        let validity = Mask::from_indices(3, vec![0, 2]);
555
556        let v1 = make_listview(
557            vec![1, 2, 3, 4, 5],
558            vec![0, 2, 3],
559            vec![2, 1, 2],
560            validity.clone(),
561        );
562        let v2 = make_listview(
563            vec![1, 2, 3, 4, 5],
564            vec![0, 2, 3],
565            vec![2, 1, 2],
566            validity.clone(),
567        );
568        assert_eq!(v1, v2);
569
570        // Element at position 2 is only used by the invalid list - should still be equal
571        let v3 = make_listview(
572            vec![1, 2, 99, 4, 5],
573            vec![0, 2, 3],
574            vec![2, 1, 2],
575            validity.clone(),
576        );
577        assert_eq!(v1, v3, "Invalid list's elements should be ignored");
578
579        // Element at position 0 is used by valid list 0 - should NOT be equal
580        let v4 = make_listview(vec![99, 2, 3, 4, 5], vec![0, 2, 3], vec![2, 1, 2], validity);
581        assert_ne!(v1, v4, "Valid list's elements must match");
582    }
583
584    #[test]
585    fn test_listview_eq_overlapping_slices() {
586        // Overlapping ranges: list0=[0..3], list1=[1..4] (overlapping at positions 1,2)
587        // This tests that the Vec<bool> approach handles overlaps correctly
588        let v1 = make_listview(vec![1, 2, 3, 4], vec![0, 1], vec![3, 3], Mask::new_true(2));
589        let v2 = make_listview(vec![1, 2, 3, 4], vec![0, 1], vec![3, 3], Mask::new_true(2));
590        assert_eq!(v1, v2);
591
592        // Different element in overlapping region
593        let v3 = make_listview(vec![1, 99, 3, 4], vec![0, 1], vec![3, 3], Mask::new_true(2));
594        assert_ne!(v1, v3);
595    }
596
597    #[test]
598    fn test_listview_eq_overlapping_with_invalid() {
599        // list0=[0..3] valid, list1=[1..4] invalid
600        // Positions 1,2 are in overlap but list1 is invalid, so only list0's view matters
601        let validity = Mask::from_indices(2, vec![0]); // only list 0 is valid
602
603        let v1 = make_listview(vec![1, 2, 3, 4], vec![0, 1], vec![3, 3], validity.clone());
604
605        // Element at position 3 is only used by invalid list1 - can differ
606        let v2 = make_listview(vec![1, 2, 3, 99], vec![0, 1], vec![3, 3], validity.clone());
607        assert_eq!(v1, v2, "Element used only by invalid list can differ");
608
609        // Element at position 2 is used by valid list0 - must match
610        let v3 = make_listview(vec![1, 2, 99, 4], vec![0, 1], vec![3, 3], validity);
611        assert_ne!(v1, v3, "Element used by valid list must match");
612    }
613
614    #[test]
615    fn test_listview_eq_different_offsets_sizes() {
616        // Same elements but different offsets at valid positions
617        let v1 = make_listview(vec![1, 2, 3, 4], vec![0, 2], vec![2, 2], Mask::new_true(2));
618        let v2 = make_listview(
619            vec![1, 2, 3, 4],
620            vec![0, 1], // different offset for list1
621            vec![2, 2],
622            Mask::new_true(2),
623        );
624        assert_ne!(v1, v2, "Different offsets at valid positions");
625
626        // Different sizes at valid positions
627        let v3 = make_listview(
628            vec![1, 2, 3, 4],
629            vec![0, 2],
630            vec![2, 1], // different size for list1
631            Mask::new_true(2),
632        );
633        assert_ne!(v1, v3, "Different sizes at valid positions");
634    }
635
636    #[test]
637    fn test_listview_eq_different_validity() {
638        let v1 = make_listview(vec![1, 2, 3, 4], vec![0, 2], vec![2, 2], Mask::new_true(2));
639        let v2 = make_listview(
640            vec![1, 2, 3, 4],
641            vec![0, 2],
642            vec![2, 2],
643            Mask::from_indices(2, vec![0]), // only first list valid
644        );
645        assert_ne!(v1, v2, "Different validity patterns");
646    }
647
648    #[test]
649    fn test_listview_eq_empty() {
650        let v1 = make_listview(vec![], vec![], vec![], Mask::new_true(0));
651        let v2 = make_listview(vec![], vec![], vec![], Mask::new_true(0));
652        assert_eq!(v1, v2);
653    }
654}