vortex_vector/listview/
vector.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`ListViewVector`].
5
6use std::fmt::Debug;
7use std::ops::BitAnd;
8use std::ops::RangeBounds;
9use std::sync::Arc;
10
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_mask::Mask;
15
16use super::ListViewScalar;
17use super::ListViewVectorMut;
18use crate::Vector;
19use crate::match_each_integer_pvector;
20use crate::primitive::PrimitiveVector;
21use crate::vector_ops::VectorMutOps;
22use crate::vector_ops::VectorOps;
23
24/// A vector of variable-width lists.
25///
26/// Each list is defined by 2 integers: an offset and a size (a "list view"), which point into a
27/// child `elements` vector.
28///
29/// Note that the list views **do not** need to be sorted, nor do they have to be contiguous or
30/// fully cover the `elements` vector. This means that multiple views can be pointing to the same
31/// elements.
32///
33/// # Structure
34///
35/// - `elements`: The child vector of all list elements, stored as an [`Arc<Vector>`].
36/// - `offsets`: A [`PrimitiveVector`] containing the starting offset of each list in the `elements`
37///   vector.
38/// - `sizes`: A [`PrimitiveVector`] containing the size (number of elements) of each list.
39/// - `validity`: A [`Mask`] indicating which lists are null.
40#[derive(Debug, Clone)]
41pub struct ListViewVector {
42    /// The child vector of elements.
43    pub(super) elements: Arc<Vector>,
44
45    /// Offsets for each list into the elements vector.
46    ///
47    /// Offsets are always integers, and always non-negative (even if the type is signed).
48    pub(super) offsets: PrimitiveVector,
49
50    /// Sizes (lengths) of each list.
51    ///
52    /// Sizes are always integers, and always non-negative (even if the type is signed).
53    pub(super) sizes: PrimitiveVector,
54
55    /// The validity mask (where `true` represents a list is **not** null).
56    ///
57    /// Note that the `elements` vector will have its own internal validity, denoting if individual
58    /// list elements are null.
59    pub(super) validity: Mask,
60
61    /// The length of the vector (which is the same as the length of the validity mask).
62    ///
63    /// This is stored here as a convenience, as the validity also tracks this information.
64    pub(super) len: usize,
65}
66
67impl ListViewVector {
68    /// Creates a new [`ListViewVector`] from its components.
69    ///
70    /// # Panics
71    ///
72    /// Panics if:
73    ///
74    /// - `offsets` or `sizes` contain nulls values.
75    /// - `offsets`, `sizes`, and `validity` do not all have the same length
76    /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this
77    ///   would cause overflow)
78    /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than
79    ///   `elements.len()` (even if the corresponding view is defined as null by the validity
80    ///   array).
81    pub fn new(
82        elements: Arc<Vector>,
83        offsets: PrimitiveVector,
84        sizes: PrimitiveVector,
85        validity: Mask,
86    ) -> Self {
87        Self::try_new(elements, offsets, sizes, validity)
88            .vortex_expect("Invalid ListViewVector construction")
89    }
90
91    /// Attempts to create a new [`ListViewVector`] from its components.
92    ///
93    /// # Errors
94    ///
95    /// Returns an error if:
96    ///
97    /// - `offsets` or `sizes` contain nulls values.
98    /// - `offsets`, `sizes`, and `validity` do not all have the same length
99    /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this
100    ///   would cause overflow)
101    /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than
102    ///   `elements.len()` (even if the corresponding view is defined as null by the validity
103    ///   array).
104    pub fn try_new(
105        elements: Arc<Vector>,
106        offsets: PrimitiveVector,
107        sizes: PrimitiveVector,
108        validity: Mask,
109    ) -> VortexResult<Self> {
110        let len = validity.len();
111
112        vortex_ensure!(
113            offsets.len() == len,
114            "Offsets length {} does not match validity length {len}",
115            offsets.len(),
116        );
117        vortex_ensure!(
118            sizes.len() == len,
119            "Sizes length {} does not match validity length {len}",
120            sizes.len(),
121        );
122
123        vortex_ensure!(
124            offsets.validity().all_true(),
125            "Offsets vector must not contain null values"
126        );
127        vortex_ensure!(
128            sizes.validity().all_true(),
129            "Sizes vector must not contain null values"
130        );
131
132        let offsets_width = offsets.ptype().byte_width();
133        let sizes_width = sizes.ptype().byte_width();
134        vortex_ensure!(
135            sizes_width <= offsets_width,
136            "Sizes integer width {sizes_width} must be \
137                    <= offsets integer width {offsets_width} to prevent overflow",
138        );
139
140        // Check that each `offsets[i] + sizes[i] <= elements.len()`.
141        validate_views_bound(elements.len(), &offsets, &sizes)?;
142
143        Ok(Self {
144            elements,
145            offsets,
146            sizes,
147            validity,
148            len,
149        })
150    }
151
152    /// Creates a new [`ListViewVector`] without validation.
153    ///
154    /// # Safety
155    ///
156    /// The caller must ensure all of the following invariants are satisfied:
157    ///
158    /// - `offsets` and `sizes` must be non-nullable integer vectors.
159    /// - `offsets`, `sizes`, and `validity` must have the same length.
160    /// - Size integer width must be smaller than or equal to offset type (to prevent overflow).
161    /// - For each `i`, `offsets[i] + sizes[i]` must not overflow and must be `<= elements.len()`
162    ///   (even if the corresponding view is defined as null by the validity array).
163    pub unsafe fn new_unchecked(
164        elements: Arc<Vector>,
165        offsets: PrimitiveVector,
166        sizes: PrimitiveVector,
167        validity: Mask,
168    ) -> Self {
169        let len = validity.len();
170
171        if cfg!(debug_assertions) {
172            Self::new(elements, offsets, sizes, validity)
173        } else {
174            Self {
175                elements,
176                offsets,
177                sizes,
178                validity,
179                len,
180            }
181        }
182    }
183
184    /// Decomposes the [`ListViewVector`] into its constituent parts (child elements, offsets,
185    /// sizes, and validity).
186    pub fn into_parts(self) -> (Arc<Vector>, PrimitiveVector, PrimitiveVector, Mask) {
187        (self.elements, self.offsets, self.sizes, self.validity)
188    }
189
190    /// Returns a reference to the `elements` vector.
191    #[inline]
192    pub fn elements(&self) -> &Arc<Vector> {
193        &self.elements
194    }
195
196    /// Returns a reference to the integer `offsets` vector.
197    #[inline]
198    pub fn offsets(&self) -> &PrimitiveVector {
199        &self.offsets
200    }
201
202    /// Returns a reference to the integer `sizes` vector.
203    #[inline]
204    pub fn sizes(&self) -> &PrimitiveVector {
205        &self.sizes
206    }
207}
208
209impl VectorOps for ListViewVector {
210    type Mutable = ListViewVectorMut;
211    type Scalar = ListViewScalar;
212
213    fn len(&self) -> usize {
214        self.len
215    }
216
217    fn validity(&self) -> &Mask {
218        &self.validity
219    }
220
221    fn mask_validity(&mut self, mask: &Mask) {
222        self.validity = self.validity.bitand(mask);
223    }
224
225    fn scalar_at(&self, index: usize) -> ListViewScalar {
226        assert!(index < self.len());
227        ListViewScalar::new(self.slice(index..index + 1))
228    }
229
230    fn slice(&self, _range: impl RangeBounds<usize> + Clone + Debug) -> Self {
231        todo!()
232    }
233
234    fn clear(&mut self) {
235        self.offsets.clear();
236        self.sizes.clear();
237        Arc::make_mut(&mut self.elements).clear();
238        self.validity.clear();
239        self.len = 0;
240    }
241
242    fn try_into_mut(self) -> Result<ListViewVectorMut, Self> {
243        // Try to unwrap the `Arc`.
244        let elements = match Arc::try_unwrap(self.elements) {
245            Ok(elements) => elements,
246            Err(elements) => return Err(Self { elements, ..self }),
247        };
248
249        // Try to make the validity mutable.
250        let validity = match self.validity.try_into_mut() {
251            Ok(v) => v,
252            Err(validity) => {
253                return Err(Self {
254                    elements: Arc::new(elements),
255                    validity,
256                    ..self
257                });
258            }
259        };
260
261        // Try to make the offsets mutable.
262        let offsets = match self.offsets.try_into_mut() {
263            Ok(mutable_offsets) => mutable_offsets,
264            Err(offsets) => {
265                return Err(Self {
266                    offsets,
267                    sizes: self.sizes,
268                    elements: Arc::new(elements),
269                    validity: validity.freeze(),
270                    len: self.len,
271                });
272            }
273        };
274
275        // Try to make the sizes mutable.
276        let sizes = match self.sizes.try_into_mut() {
277            Ok(mutable_sizes) => mutable_sizes,
278            Err(sizes) => {
279                return Err(Self {
280                    offsets: offsets.freeze(),
281                    sizes,
282                    elements: Arc::new(elements),
283                    validity: validity.freeze(),
284                    len: self.len,
285                });
286            }
287        };
288
289        // Try to make the elements mutable.
290        match elements.try_into_mut() {
291            Ok(mut_elements) => Ok(ListViewVectorMut {
292                offsets,
293                sizes,
294                elements: Box::new(mut_elements),
295                validity,
296                len: self.len,
297            }),
298            Err(elements) => Err(Self {
299                offsets: offsets.freeze(),
300                sizes: sizes.freeze(),
301                elements: Arc::new(elements),
302                validity: validity.freeze(),
303                len: self.len,
304            }),
305        }
306    }
307
308    fn into_mut(self) -> ListViewVectorMut {
309        let len = self.len;
310        let validity = self.validity.into_mut();
311        let offsets = self.offsets.into_mut();
312        let sizes = self.sizes.into_mut();
313
314        // If someone else has a strong reference to the `Arc`, clone the underlying data (which is
315        // just a **different** reference count increment).
316        let elements = Arc::try_unwrap(self.elements).unwrap_or_else(|arc| (*arc).clone());
317
318        ListViewVectorMut {
319            offsets,
320            sizes,
321            elements: Box::new(elements.into_mut()),
322            validity,
323            len,
324        }
325    }
326}
327
328// TODO(connor): It would be better to separate everything inside the macros into its own function,
329// but that would require adding another macro that sets a type `$type` to be used by the caller.
330/// Checks that all views are `<= elements_len`.
331#[expect(
332    clippy::cognitive_complexity,
333    reason = "complexity from nested match_each_* macros"
334)]
335#[allow(clippy::cast_possible_truncation)] // casts inside macro
336fn validate_views_bound(
337    elements_len: usize,
338    offsets: &PrimitiveVector,
339    sizes: &PrimitiveVector,
340) -> VortexResult<()> {
341    let len = offsets.len();
342
343    match_each_integer_pvector!(&offsets, |offsets_vector| {
344        match_each_integer_pvector!(&sizes, |sizes_vector| {
345            let offsets_slice = offsets_vector.as_ref();
346            let sizes_slice = sizes_vector.as_ref();
347
348            for i in 0..len {
349                let offset = offsets_slice[i] as usize;
350                let size = sizes_slice[i] as usize;
351                vortex_ensure!(offset + size <= elements_len);
352            }
353        });
354    });
355
356    Ok(())
357}