vortex_vector/listview/
vector.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`ListViewVector`].
5
6use std::fmt::Debug;
7use std::ops::RangeBounds;
8use std::sync::Arc;
9
10use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
11use vortex_mask::Mask;
12
13use super::{ListViewScalar, ListViewVectorMut};
14use crate::primitive::PrimitiveVector;
15use crate::vector_ops::{VectorMutOps, VectorOps};
16use crate::{Scalar, Vector, match_each_integer_pvector};
17
18/// A vector of variable-width lists.
19///
20/// Each list is defined by 2 integers: an offset and a size (a "list view"), which point into a
21/// child `elements` vector.
22///
23/// Note that the list views **do not** need to be sorted, nor do they have to be contiguous or
24/// fully cover the `elements` vector. This means that multiple views can be pointing to the same
25/// elements.
26///
27/// # Structure
28///
29/// - `elements`: The child vector of all list elements, stored as an [`Arc<Vector>`].
30/// - `offsets`: A [`PrimitiveVector`] containing the starting offset of each list in the `elements`
31///   vector.
32/// - `sizes`: A [`PrimitiveVector`] containing the size (number of elements) of each list.
33/// - `validity`: A [`Mask`] indicating which lists are null.
34#[derive(Debug, Clone)]
35pub struct ListViewVector {
36    /// The child vector of elements.
37    pub(super) elements: Arc<Vector>,
38
39    /// Offsets for each list into the elements vector.
40    ///
41    /// Offsets are always integers, and always non-negative (even if the type is signed).
42    pub(super) offsets: PrimitiveVector,
43
44    /// Sizes (lengths) of each list.
45    ///
46    /// Sizes are always integers, and always non-negative (even if the type is signed).
47    pub(super) sizes: PrimitiveVector,
48
49    /// The validity mask (where `true` represents a list is **not** null).
50    ///
51    /// Note that the `elements` vector will have its own internal validity, denoting if individual
52    /// list elements are null.
53    pub(super) validity: Mask,
54
55    /// The length of the vector (which is the same as the length of the validity mask).
56    ///
57    /// This is stored here as a convenience, as the validity also tracks this information.
58    pub(super) len: usize,
59}
60
61impl ListViewVector {
62    /// Creates a new [`ListViewVector`] from its components.
63    ///
64    /// # Panics
65    ///
66    /// Panics if:
67    ///
68    /// - `offsets` or `sizes` contain nulls values.
69    /// - `offsets`, `sizes`, and `validity` do not all have the same length
70    /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this
71    ///   would cause overflow)
72    /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than
73    ///   `elements.len()` (even if the corresponding view is defined as null by the validity
74    ///   array).
75    pub fn new(
76        elements: Arc<Vector>,
77        offsets: PrimitiveVector,
78        sizes: PrimitiveVector,
79        validity: Mask,
80    ) -> Self {
81        Self::try_new(elements, offsets, sizes, validity)
82            .vortex_expect("Invalid ListViewVector construction")
83    }
84
85    /// Attempts to create a new [`ListViewVector`] from its components.
86    ///
87    /// # Errors
88    ///
89    /// Returns an error if:
90    ///
91    /// - `offsets` or `sizes` contain nulls values.
92    /// - `offsets`, `sizes`, and `validity` do not all have the same length
93    /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this
94    ///   would cause overflow)
95    /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than
96    ///   `elements.len()` (even if the corresponding view is defined as null by the validity
97    ///   array).
98    pub fn try_new(
99        elements: Arc<Vector>,
100        offsets: PrimitiveVector,
101        sizes: PrimitiveVector,
102        validity: Mask,
103    ) -> VortexResult<Self> {
104        let len = validity.len();
105
106        vortex_ensure!(
107            offsets.len() == len,
108            "Offsets length {} does not match validity length {len}",
109            offsets.len(),
110        );
111        vortex_ensure!(
112            sizes.len() == len,
113            "Sizes length {} does not match validity length {len}",
114            sizes.len(),
115        );
116
117        vortex_ensure!(
118            offsets.validity().all_true(),
119            "Offsets vector must not contain null values"
120        );
121        vortex_ensure!(
122            sizes.validity().all_true(),
123            "Sizes vector must not contain null values"
124        );
125
126        let offsets_width = offsets.ptype().byte_width();
127        let sizes_width = sizes.ptype().byte_width();
128        vortex_ensure!(
129            sizes_width <= offsets_width,
130            "Sizes integer width {sizes_width} must be \
131                    <= offsets integer width {offsets_width} to prevent overflow",
132        );
133
134        // Check that each `offsets[i] + sizes[i] <= elements.len()`.
135        validate_views_bound(elements.len(), &offsets, &sizes)?;
136
137        Ok(Self {
138            elements,
139            offsets,
140            sizes,
141            validity,
142            len,
143        })
144    }
145
146    /// Creates a new [`ListViewVector`] without validation.
147    ///
148    /// # Safety
149    ///
150    /// The caller must ensure all of the following invariants are satisfied:
151    ///
152    /// - `offsets` and `sizes` must be non-nullable integer vectors.
153    /// - `offsets`, `sizes`, and `validity` must have the same length.
154    /// - Size integer width must be smaller than or equal to offset type (to prevent overflow).
155    /// - For each `i`, `offsets[i] + sizes[i]` must not overflow and must be `<= elements.len()`
156    ///   (even if the corresponding view is defined as null by the validity array).
157    pub unsafe fn new_unchecked(
158        elements: Arc<Vector>,
159        offsets: PrimitiveVector,
160        sizes: PrimitiveVector,
161        validity: Mask,
162    ) -> Self {
163        let len = validity.len();
164
165        if cfg!(debug_assertions) {
166            Self::new(elements, offsets, sizes, validity)
167        } else {
168            Self {
169                elements,
170                offsets,
171                sizes,
172                validity,
173                len,
174            }
175        }
176    }
177
178    /// Decomposes the [`ListViewVector`] into its constituent parts (child elements, offsets,
179    /// sizes, and validity).
180    pub fn into_parts(self) -> (Arc<Vector>, PrimitiveVector, PrimitiveVector, Mask) {
181        (self.elements, self.offsets, self.sizes, self.validity)
182    }
183
184    /// Returns a reference to the `elements` vector.
185    #[inline]
186    pub fn elements(&self) -> &Arc<Vector> {
187        &self.elements
188    }
189
190    /// Returns a reference to the integer `offsets` vector.
191    #[inline]
192    pub fn offsets(&self) -> &PrimitiveVector {
193        &self.offsets
194    }
195
196    /// Returns a reference to the integer `sizes` vector.
197    #[inline]
198    pub fn sizes(&self) -> &PrimitiveVector {
199        &self.sizes
200    }
201}
202
203impl VectorOps for ListViewVector {
204    type Mutable = ListViewVectorMut;
205
206    fn len(&self) -> usize {
207        self.len
208    }
209
210    fn validity(&self) -> &Mask {
211        &self.validity
212    }
213
214    fn scalar_at(&self, index: usize) -> Scalar {
215        assert!(index < self.len());
216        ListViewScalar::new(self.slice(index..index + 1)).into()
217    }
218
219    fn slice(&self, _range: impl RangeBounds<usize> + Clone + Debug) -> Self {
220        todo!()
221    }
222
223    fn try_into_mut(self) -> Result<ListViewVectorMut, Self> {
224        // Try to unwrap the `Arc`.
225        let elements = match Arc::try_unwrap(self.elements) {
226            Ok(elements) => elements,
227            Err(elements) => return Err(Self { elements, ..self }),
228        };
229
230        // Try to make the validity mutable.
231        let validity = match self.validity.try_into_mut() {
232            Ok(v) => v,
233            Err(validity) => {
234                return Err(Self {
235                    elements: Arc::new(elements),
236                    validity,
237                    ..self
238                });
239            }
240        };
241
242        // Try to make the offsets mutable.
243        let offsets = match self.offsets.try_into_mut() {
244            Ok(mutable_offsets) => mutable_offsets,
245            Err(offsets) => {
246                return Err(Self {
247                    offsets,
248                    sizes: self.sizes,
249                    elements: Arc::new(elements),
250                    validity: validity.freeze(),
251                    len: self.len,
252                });
253            }
254        };
255
256        // Try to make the sizes mutable.
257        let sizes = match self.sizes.try_into_mut() {
258            Ok(mutable_sizes) => mutable_sizes,
259            Err(sizes) => {
260                return Err(Self {
261                    offsets: offsets.freeze(),
262                    sizes,
263                    elements: Arc::new(elements),
264                    validity: validity.freeze(),
265                    len: self.len,
266                });
267            }
268        };
269
270        // Try to make the elements mutable.
271        match elements.try_into_mut() {
272            Ok(mut_elements) => Ok(ListViewVectorMut {
273                offsets,
274                sizes,
275                elements: Box::new(mut_elements),
276                validity,
277                len: self.len,
278            }),
279            Err(elements) => Err(Self {
280                offsets: offsets.freeze(),
281                sizes: sizes.freeze(),
282                elements: Arc::new(elements),
283                validity: validity.freeze(),
284                len: self.len,
285            }),
286        }
287    }
288
289    fn into_mut(self) -> ListViewVectorMut {
290        let len = self.len;
291        let validity = self.validity.into_mut();
292        let offsets = self.offsets.into_mut();
293        let sizes = self.sizes.into_mut();
294
295        // If someone else has a strong reference to the `Arc`, clone the underlying data (which is
296        // just a **different** reference count increment).
297        let elements = Arc::try_unwrap(self.elements).unwrap_or_else(|arc| (*arc).clone());
298
299        ListViewVectorMut {
300            offsets,
301            sizes,
302            elements: Box::new(elements.into_mut()),
303            validity,
304            len,
305        }
306    }
307}
308
309// TODO(connor): It would be better to separate everything inside the macros into its own function,
310// but that would require adding another macro that sets a type `$type` to be used by the caller.
311/// Checks that all views are `<= elements_len`.
312#[allow(clippy::cognitive_complexity, clippy::cast_possible_truncation)]
313fn validate_views_bound(
314    elements_len: usize,
315    offsets: &PrimitiveVector,
316    sizes: &PrimitiveVector,
317) -> VortexResult<()> {
318    let len = offsets.len();
319
320    match_each_integer_pvector!(&offsets, |offsets_vector| {
321        match_each_integer_pvector!(&sizes, |sizes_vector| {
322            let offsets_slice = offsets_vector.as_ref();
323            let sizes_slice = sizes_vector.as_ref();
324
325            for i in 0..len {
326                let offset = offsets_slice[i] as usize;
327                let size = sizes_slice[i] as usize;
328                vortex_ensure!(offset + size <= elements_len);
329            }
330        });
331    });
332
333    Ok(())
334}