vortex_vector/listview/vector.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`ListViewVector`].
5
6use std::fmt::Debug;
7use std::ops::BitAnd;
8use std::ops::RangeBounds;
9use std::sync::Arc;
10
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_mask::Mask;
15
16use super::ListViewScalar;
17use super::ListViewVectorMut;
18use crate::Vector;
19use crate::match_each_integer_pvector;
20use crate::primitive::PrimitiveVector;
21use crate::vector_ops::VectorMutOps;
22use crate::vector_ops::VectorOps;
23
24/// A vector of variable-width lists.
25///
26/// Each list is defined by 2 integers: an offset and a size (a "list view"), which point into a
27/// child `elements` vector.
28///
29/// Note that the list views **do not** need to be sorted, nor do they have to be contiguous or
30/// fully cover the `elements` vector. This means that multiple views can be pointing to the same
31/// elements.
32///
33/// # Structure
34///
35/// - `elements`: The child vector of all list elements, stored as an [`Arc<Vector>`].
36/// - `offsets`: A [`PrimitiveVector`] containing the starting offset of each list in the `elements`
37/// vector.
38/// - `sizes`: A [`PrimitiveVector`] containing the size (number of elements) of each list.
39/// - `validity`: A [`Mask`] indicating which lists are null.
40#[derive(Debug, Clone)]
41pub struct ListViewVector {
42 /// The child vector of elements.
43 pub(super) elements: Arc<Vector>,
44
45 /// Offsets for each list into the elements vector.
46 ///
47 /// Offsets are always integers, and always non-negative (even if the type is signed).
48 pub(super) offsets: PrimitiveVector,
49
50 /// Sizes (lengths) of each list.
51 ///
52 /// Sizes are always integers, and always non-negative (even if the type is signed).
53 pub(super) sizes: PrimitiveVector,
54
55 /// The validity mask (where `true` represents a list is **not** null).
56 ///
57 /// Note that the `elements` vector will have its own internal validity, denoting if individual
58 /// list elements are null.
59 pub(super) validity: Mask,
60
61 /// The length of the vector (which is the same as the length of the validity mask).
62 ///
63 /// This is stored here as a convenience, as the validity also tracks this information.
64 pub(super) len: usize,
65}
66
67impl ListViewVector {
68 /// Creates a new [`ListViewVector`] from its components.
69 ///
70 /// # Panics
71 ///
72 /// Panics if:
73 ///
74 /// - `offsets` or `sizes` contain nulls values.
75 /// - `offsets`, `sizes`, and `validity` do not all have the same length
76 /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this
77 /// would cause overflow)
78 /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than
79 /// `elements.len()` (even if the corresponding view is defined as null by the validity
80 /// array).
81 pub fn new(
82 elements: Arc<Vector>,
83 offsets: PrimitiveVector,
84 sizes: PrimitiveVector,
85 validity: Mask,
86 ) -> Self {
87 Self::try_new(elements, offsets, sizes, validity)
88 .vortex_expect("Invalid ListViewVector construction")
89 }
90
91 /// Attempts to create a new [`ListViewVector`] from its components.
92 ///
93 /// # Errors
94 ///
95 /// Returns an error if:
96 ///
97 /// - `offsets` or `sizes` contain nulls values.
98 /// - `offsets`, `sizes`, and `validity` do not all have the same length
99 /// - The `sizes` integer width is not less than or equal to the `offsets` integer width (this
100 /// would cause overflow)
101 /// - For any `i`, `offsets[i] + sizes[i]` causes an overflow or is greater than
102 /// `elements.len()` (even if the corresponding view is defined as null by the validity
103 /// array).
104 pub fn try_new(
105 elements: Arc<Vector>,
106 offsets: PrimitiveVector,
107 sizes: PrimitiveVector,
108 validity: Mask,
109 ) -> VortexResult<Self> {
110 let len = validity.len();
111
112 vortex_ensure!(
113 offsets.len() == len,
114 "Offsets length {} does not match validity length {len}",
115 offsets.len(),
116 );
117 vortex_ensure!(
118 sizes.len() == len,
119 "Sizes length {} does not match validity length {len}",
120 sizes.len(),
121 );
122
123 vortex_ensure!(
124 offsets.validity().all_true(),
125 "Offsets vector must not contain null values"
126 );
127 vortex_ensure!(
128 sizes.validity().all_true(),
129 "Sizes vector must not contain null values"
130 );
131
132 let offsets_width = offsets.ptype().byte_width();
133 let sizes_width = sizes.ptype().byte_width();
134 vortex_ensure!(
135 sizes_width <= offsets_width,
136 "Sizes integer width {sizes_width} must be \
137 <= offsets integer width {offsets_width} to prevent overflow",
138 );
139
140 // Check that each `offsets[i] + sizes[i] <= elements.len()`.
141 validate_views_bound(elements.len(), &offsets, &sizes)?;
142
143 Ok(Self {
144 elements,
145 offsets,
146 sizes,
147 validity,
148 len,
149 })
150 }
151
152 /// Creates a new [`ListViewVector`] without validation.
153 ///
154 /// # Safety
155 ///
156 /// The caller must ensure all of the following invariants are satisfied:
157 ///
158 /// - `offsets` and `sizes` must be non-nullable integer vectors.
159 /// - `offsets`, `sizes`, and `validity` must have the same length.
160 /// - Size integer width must be smaller than or equal to offset type (to prevent overflow).
161 /// - For each `i`, `offsets[i] + sizes[i]` must not overflow and must be `<= elements.len()`
162 /// (even if the corresponding view is defined as null by the validity array).
163 pub unsafe fn new_unchecked(
164 elements: Arc<Vector>,
165 offsets: PrimitiveVector,
166 sizes: PrimitiveVector,
167 validity: Mask,
168 ) -> Self {
169 let len = validity.len();
170
171 if cfg!(debug_assertions) {
172 Self::new(elements, offsets, sizes, validity)
173 } else {
174 Self {
175 elements,
176 offsets,
177 sizes,
178 validity,
179 len,
180 }
181 }
182 }
183
184 /// Decomposes the [`ListViewVector`] into its constituent parts (child elements, offsets,
185 /// sizes, and validity).
186 pub fn into_parts(self) -> (Arc<Vector>, PrimitiveVector, PrimitiveVector, Mask) {
187 (self.elements, self.offsets, self.sizes, self.validity)
188 }
189
190 /// Returns a reference to the `elements` vector.
191 #[inline]
192 pub fn elements(&self) -> &Arc<Vector> {
193 &self.elements
194 }
195
196 /// Returns a reference to the integer `offsets` vector.
197 #[inline]
198 pub fn offsets(&self) -> &PrimitiveVector {
199 &self.offsets
200 }
201
202 /// Returns a reference to the integer `sizes` vector.
203 #[inline]
204 pub fn sizes(&self) -> &PrimitiveVector {
205 &self.sizes
206 }
207}
208
209impl VectorOps for ListViewVector {
210 type Mutable = ListViewVectorMut;
211 type Scalar = ListViewScalar;
212
213 fn len(&self) -> usize {
214 self.len
215 }
216
217 fn validity(&self) -> &Mask {
218 &self.validity
219 }
220
221 fn mask_validity(&mut self, mask: &Mask) {
222 self.validity = self.validity.bitand(mask);
223 }
224
225 fn scalar_at(&self, index: usize) -> ListViewScalar {
226 assert!(index < self.len());
227 ListViewScalar::new(self.slice(index..index + 1))
228 }
229
230 fn slice(&self, _range: impl RangeBounds<usize> + Clone + Debug) -> Self {
231 todo!()
232 }
233
234 fn clear(&mut self) {
235 self.offsets.clear();
236 self.sizes.clear();
237 Arc::make_mut(&mut self.elements).clear();
238 self.validity.clear();
239 self.len = 0;
240 }
241
242 fn try_into_mut(self) -> Result<ListViewVectorMut, Self> {
243 // Try to unwrap the `Arc`.
244 let elements = match Arc::try_unwrap(self.elements) {
245 Ok(elements) => elements,
246 Err(elements) => return Err(Self { elements, ..self }),
247 };
248
249 // Try to make the validity mutable.
250 let validity = match self.validity.try_into_mut() {
251 Ok(v) => v,
252 Err(validity) => {
253 return Err(Self {
254 elements: Arc::new(elements),
255 validity,
256 ..self
257 });
258 }
259 };
260
261 // Try to make the offsets mutable.
262 let offsets = match self.offsets.try_into_mut() {
263 Ok(mutable_offsets) => mutable_offsets,
264 Err(offsets) => {
265 return Err(Self {
266 offsets,
267 sizes: self.sizes,
268 elements: Arc::new(elements),
269 validity: validity.freeze(),
270 len: self.len,
271 });
272 }
273 };
274
275 // Try to make the sizes mutable.
276 let sizes = match self.sizes.try_into_mut() {
277 Ok(mutable_sizes) => mutable_sizes,
278 Err(sizes) => {
279 return Err(Self {
280 offsets: offsets.freeze(),
281 sizes,
282 elements: Arc::new(elements),
283 validity: validity.freeze(),
284 len: self.len,
285 });
286 }
287 };
288
289 // Try to make the elements mutable.
290 match elements.try_into_mut() {
291 Ok(mut_elements) => Ok(ListViewVectorMut {
292 offsets,
293 sizes,
294 elements: Box::new(mut_elements),
295 validity,
296 len: self.len,
297 }),
298 Err(elements) => Err(Self {
299 offsets: offsets.freeze(),
300 sizes: sizes.freeze(),
301 elements: Arc::new(elements),
302 validity: validity.freeze(),
303 len: self.len,
304 }),
305 }
306 }
307
308 fn into_mut(self) -> ListViewVectorMut {
309 let len = self.len;
310 let validity = self.validity.into_mut();
311 let offsets = self.offsets.into_mut();
312 let sizes = self.sizes.into_mut();
313
314 // If someone else has a strong reference to the `Arc`, clone the underlying data (which is
315 // just a **different** reference count increment).
316 let elements = Arc::try_unwrap(self.elements).unwrap_or_else(|arc| (*arc).clone());
317
318 ListViewVectorMut {
319 offsets,
320 sizes,
321 elements: Box::new(elements.into_mut()),
322 validity,
323 len,
324 }
325 }
326}
327
328// TODO(connor): It would be better to separate everything inside the macros into its own function,
329// but that would require adding another macro that sets a type `$type` to be used by the caller.
330/// Checks that all views are `<= elements_len`.
331#[expect(
332 clippy::cognitive_complexity,
333 reason = "complexity from nested match_each_* macros"
334)]
335#[allow(clippy::cast_possible_truncation)] // casts inside macro
336fn validate_views_bound(
337 elements_len: usize,
338 offsets: &PrimitiveVector,
339 sizes: &PrimitiveVector,
340) -> VortexResult<()> {
341 let len = offsets.len();
342
343 match_each_integer_pvector!(&offsets, |offsets_vector| {
344 match_each_integer_pvector!(&sizes, |sizes_vector| {
345 let offsets_slice = offsets_vector.as_ref();
346 let sizes_slice = sizes_vector.as_ref();
347
348 for i in 0..len {
349 let offset = offsets_slice[i] as usize;
350 let size = sizes_slice[i] as usize;
351 vortex_ensure!(offset + size <= elements_len);
352 }
353 });
354 });
355
356 Ok(())
357}