vortex_vector/listview/vector.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`ListViewVector`].
5
6use std::fmt::Debug;
7use std::ops::RangeBounds;
8use std::sync::Arc;
9
10use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
11use vortex_mask::Mask;
12
13use super::{ListViewScalar, ListViewVectorMut};
14use crate::primitive::PrimitiveVector;
15use crate::vector_ops::{VectorMutOps, VectorOps};
16use crate::{Scalar, Vector, match_each_integer_pvector};
17
18/// A vector of variable-width lists.
19///
20/// Each list is defined by 2 integers: an offset and a size (a "list view"), which point into a
21/// child `elements` vector.
22///
23/// Note that the list views **do not** need to be sorted, nor do they have to be contiguous or
24/// fully cover the `elements` vector. This means that multiple views can be pointing to the same
25/// elements.
26///
27/// # Structure
28///
29/// - `elements`: The child vector of all list elements, stored as an [`Arc<Vector>`].
30/// - `offsets`: A [`PrimitiveVector`] containing the starting offset of each list in the `elements`
31/// vector.
32/// - `sizes`: A [`PrimitiveVector`] containing the size (number of elements) of each list.
33/// - `validity`: A [`Mask`] indicating which lists are null.
34#[derive(Debug, Clone)]
35pub struct ListViewVector {
36 /// The child vector of elements.
37 pub(super) elements: Arc<Vector>,
38
39 /// Offsets for each list into the elements vector.
40 ///
41 /// Offsets are always integers, and always non-negative (even if the type is signed).
42 pub(super) offsets: PrimitiveVector,
43
44 /// Sizes (lengths) of each list.
45 ///
46 /// Sizes are always integers, and always non-negative (even if the type is signed).
47 pub(super) sizes: PrimitiveVector,
48
49 /// The validity mask (where `true` represents a list is **not** null).
50 ///
51 /// Note that the `elements` vector will have its own internal validity, denoting if individual
52 /// list elements are null.
53 pub(super) validity: Mask,
54
55 /// The length of the vector (which is the same as the length of the validity mask).
56 ///
57 /// This is stored here as a convenience, as the validity also tracks this information.
58 pub(super) len: usize,
59}
60
61impl ListViewVector {
62 /// Creates a new [`ListViewVector`] from its components.
63 ///
64 /// # Panics
65 ///
66 /// Panics if:
67 ///
68 /// - `offsets` or `sizes` contain nulls values.
69 /// - `offsets`, `sizes`, and `validity` do not all have the same length
70 /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this
71 /// would cause overflow)
72 /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than
73 /// `elements.len()` (even if the corresponding view is defined as null by the validity
74 /// array).
75 pub fn new(
76 elements: Arc<Vector>,
77 offsets: PrimitiveVector,
78 sizes: PrimitiveVector,
79 validity: Mask,
80 ) -> Self {
81 Self::try_new(elements, offsets, sizes, validity)
82 .vortex_expect("Invalid ListViewVector construction")
83 }
84
85 /// Attempts to create a new [`ListViewVector`] from its components.
86 ///
87 /// # Errors
88 ///
89 /// Returns an error if:
90 ///
91 /// - `offsets` or `sizes` contain nulls values.
92 /// - `offsets`, `sizes`, and `validity` do not all have the same length
93 /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this
94 /// would cause overflow)
95 /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than
96 /// `elements.len()` (even if the corresponding view is defined as null by the validity
97 /// array).
98 pub fn try_new(
99 elements: Arc<Vector>,
100 offsets: PrimitiveVector,
101 sizes: PrimitiveVector,
102 validity: Mask,
103 ) -> VortexResult<Self> {
104 let len = validity.len();
105
106 vortex_ensure!(
107 offsets.len() == len,
108 "Offsets length {} does not match validity length {len}",
109 offsets.len(),
110 );
111 vortex_ensure!(
112 sizes.len() == len,
113 "Sizes length {} does not match validity length {len}",
114 sizes.len(),
115 );
116
117 vortex_ensure!(
118 offsets.validity().all_true(),
119 "Offsets vector must not contain null values"
120 );
121 vortex_ensure!(
122 sizes.validity().all_true(),
123 "Sizes vector must not contain null values"
124 );
125
126 let offsets_width = offsets.ptype().byte_width();
127 let sizes_width = sizes.ptype().byte_width();
128 vortex_ensure!(
129 sizes_width <= offsets_width,
130 "Sizes integer width {sizes_width} must be \
131 <= offsets integer width {offsets_width} to prevent overflow",
132 );
133
134 // Check that each `offsets[i] + sizes[i] <= elements.len()`.
135 validate_views_bound(elements.len(), &offsets, &sizes)?;
136
137 Ok(Self {
138 elements,
139 offsets,
140 sizes,
141 validity,
142 len,
143 })
144 }
145
146 /// Creates a new [`ListViewVector`] without validation.
147 ///
148 /// # Safety
149 ///
150 /// The caller must ensure all of the following invariants are satisfied:
151 ///
152 /// - `offsets` and `sizes` must be non-nullable integer vectors.
153 /// - `offsets`, `sizes`, and `validity` must have the same length.
154 /// - Size integer width must be smaller than or equal to offset type (to prevent overflow).
155 /// - For each `i`, `offsets[i] + sizes[i]` must not overflow and must be `<= elements.len()`
156 /// (even if the corresponding view is defined as null by the validity array).
157 pub unsafe fn new_unchecked(
158 elements: Arc<Vector>,
159 offsets: PrimitiveVector,
160 sizes: PrimitiveVector,
161 validity: Mask,
162 ) -> Self {
163 let len = validity.len();
164
165 if cfg!(debug_assertions) {
166 Self::new(elements, offsets, sizes, validity)
167 } else {
168 Self {
169 elements,
170 offsets,
171 sizes,
172 validity,
173 len,
174 }
175 }
176 }
177
178 /// Decomposes the [`ListViewVector`] into its constituent parts (child elements, offsets,
179 /// sizes, and validity).
180 pub fn into_parts(self) -> (Arc<Vector>, PrimitiveVector, PrimitiveVector, Mask) {
181 (self.elements, self.offsets, self.sizes, self.validity)
182 }
183
184 /// Returns a reference to the `elements` vector.
185 #[inline]
186 pub fn elements(&self) -> &Arc<Vector> {
187 &self.elements
188 }
189
190 /// Returns a reference to the integer `offsets` vector.
191 #[inline]
192 pub fn offsets(&self) -> &PrimitiveVector {
193 &self.offsets
194 }
195
196 /// Returns a reference to the integer `sizes` vector.
197 #[inline]
198 pub fn sizes(&self) -> &PrimitiveVector {
199 &self.sizes
200 }
201}
202
203impl VectorOps for ListViewVector {
204 type Mutable = ListViewVectorMut;
205
206 fn len(&self) -> usize {
207 self.len
208 }
209
210 fn validity(&self) -> &Mask {
211 &self.validity
212 }
213
214 fn scalar_at(&self, index: usize) -> Scalar {
215 assert!(index < self.len());
216 ListViewScalar::new(self.slice(index..index + 1)).into()
217 }
218
219 fn slice(&self, _range: impl RangeBounds<usize> + Clone + Debug) -> Self {
220 todo!()
221 }
222
223 fn try_into_mut(self) -> Result<ListViewVectorMut, Self> {
224 // Try to unwrap the `Arc`.
225 let elements = match Arc::try_unwrap(self.elements) {
226 Ok(elements) => elements,
227 Err(elements) => return Err(Self { elements, ..self }),
228 };
229
230 // Try to make the validity mutable.
231 let validity = match self.validity.try_into_mut() {
232 Ok(v) => v,
233 Err(validity) => {
234 return Err(Self {
235 elements: Arc::new(elements),
236 validity,
237 ..self
238 });
239 }
240 };
241
242 // Try to make the offsets mutable.
243 let offsets = match self.offsets.try_into_mut() {
244 Ok(mutable_offsets) => mutable_offsets,
245 Err(offsets) => {
246 return Err(Self {
247 offsets,
248 sizes: self.sizes,
249 elements: Arc::new(elements),
250 validity: validity.freeze(),
251 len: self.len,
252 });
253 }
254 };
255
256 // Try to make the sizes mutable.
257 let sizes = match self.sizes.try_into_mut() {
258 Ok(mutable_sizes) => mutable_sizes,
259 Err(sizes) => {
260 return Err(Self {
261 offsets: offsets.freeze(),
262 sizes,
263 elements: Arc::new(elements),
264 validity: validity.freeze(),
265 len: self.len,
266 });
267 }
268 };
269
270 // Try to make the elements mutable.
271 match elements.try_into_mut() {
272 Ok(mut_elements) => Ok(ListViewVectorMut {
273 offsets,
274 sizes,
275 elements: Box::new(mut_elements),
276 validity,
277 len: self.len,
278 }),
279 Err(elements) => Err(Self {
280 offsets: offsets.freeze(),
281 sizes: sizes.freeze(),
282 elements: Arc::new(elements),
283 validity: validity.freeze(),
284 len: self.len,
285 }),
286 }
287 }
288
289 fn into_mut(self) -> ListViewVectorMut {
290 let len = self.len;
291 let validity = self.validity.into_mut();
292 let offsets = self.offsets.into_mut();
293 let sizes = self.sizes.into_mut();
294
295 // If someone else has a strong reference to the `Arc`, clone the underlying data (which is
296 // just a **different** reference count increment).
297 let elements = Arc::try_unwrap(self.elements).unwrap_or_else(|arc| (*arc).clone());
298
299 ListViewVectorMut {
300 offsets,
301 sizes,
302 elements: Box::new(elements.into_mut()),
303 validity,
304 len,
305 }
306 }
307}
308
309// TODO(connor): It would be better to separate everything inside the macros into its own function,
310// but that would require adding another macro that sets a type `$type` to be used by the caller.
311/// Checks that all views are `<= elements_len`.
312#[allow(clippy::cognitive_complexity, clippy::cast_possible_truncation)]
313fn validate_views_bound(
314 elements_len: usize,
315 offsets: &PrimitiveVector,
316 sizes: &PrimitiveVector,
317) -> VortexResult<()> {
318 let len = offsets.len();
319
320 match_each_integer_pvector!(&offsets, |offsets_vector| {
321 match_each_integer_pvector!(&sizes, |sizes_vector| {
322 let offsets_slice = offsets_vector.as_ref();
323 let sizes_slice = sizes_vector.as_ref();
324
325 for i in 0..len {
326 let offset = offsets_slice[i] as usize;
327 let size = sizes_slice[i] as usize;
328 vortex_ensure!(offset + size <= elements_len);
329 }
330 });
331 });
332
333 Ok(())
334}