vortex_vector/struct_/vector.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`StructVector`].
5
6use std::fmt::Debug;
7use std::ops::BitAnd;
8use std::ops::RangeBounds;
9use std::sync::Arc;
10
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_mask::Mask;
15
16use crate::Vector;
17use crate::VectorMutOps;
18use crate::VectorOps;
19use crate::struct_::StructScalar;
20use crate::struct_::StructVectorMut;
21
22/// An immutable vector of struct values.
23///
24/// Struct values are stored column-wise in the vector, so values in the same field are stored next
25/// to each other (rather than values in the same struct stored next to each other).
26#[derive(Debug, Clone)]
27pub struct StructVector {
28 /// The fields of the `StructVector`, each stored column-wise as a [`Vector`].
29 ///
30 /// We store these as an [`Arc<Box<_>>`] because we need to call [`try_unwrap()`] in our
31 /// [`try_into_mut()`] implementation, and since slices are unsized it is not implemented for
32 /// [`Arc<[Vector]>`].
33 ///
34 /// [`try_unwrap()`]: Arc::try_unwrap
35 /// [`try_into_mut()`]: Self::try_into_mut
36 pub(super) fields: Arc<Box<[Vector]>>,
37
38 /// The validity mask (where `true` represents an element is **not** null).
39 pub(super) validity: Mask,
40
41 /// The length of the vector (which is the same as all field vectors).
42 ///
43 /// This is stored here as a convenience, as the validity also tracks this information.
44 pub(super) len: usize,
45}
46
47impl StructVector {
48 /// Creates a new [`StructVector`] from the given fields and validity mask.
49 ///
50 /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
51 /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
52 ///
53 /// # Panics
54 ///
55 /// Panics if:
56 ///
57 /// - Any field vector has a length that does not match the length of other fields.
58 /// - The validity mask length does not match the field length.
59 pub fn new(fields: Arc<Box<[Vector]>>, validity: Mask) -> Self {
60 Self::try_new(fields, validity).vortex_expect("Failed to create `StructVector`")
61 }
62
63 /// Tries to create a new [`StructVector`] from the given fields and validity mask.
64 ///
65 /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
66 /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
67 ///
68 /// # Errors
69 ///
70 /// Returns an error if:
71 ///
72 /// - Any field vector has a length that does not match the length of other fields.
73 /// - The validity mask length does not match the field length.
74 pub fn try_new(fields: Arc<Box<[Vector]>>, validity: Mask) -> VortexResult<Self> {
75 let len = validity.len();
76
77 // Validate that all fields have the correct length.
78 for (i, field) in fields.iter().enumerate() {
79 vortex_ensure!(
80 field.len() == len,
81 "Field {} has length {} but expected length {}",
82 i,
83 field.len(),
84 len
85 );
86 }
87
88 Ok(Self {
89 fields,
90 validity,
91 len,
92 })
93 }
94
95 /// Creates a new [`StructVector`] from the given fields and validity mask without validation.
96 ///
97 /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
98 /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
99 ///
100 /// # Safety
101 ///
102 /// The caller must ensure that:
103 ///
104 /// - All field vectors have the same length.
105 /// - The validity mask has a length equal to the field length.
106 pub unsafe fn new_unchecked(fields: Arc<Box<[Vector]>>, validity: Mask) -> Self {
107 let len = validity.len();
108
109 if cfg!(debug_assertions) {
110 Self::new(fields, validity)
111 } else {
112 Self {
113 fields,
114 validity,
115 len,
116 }
117 }
118 }
119
120 /// Decomposes the struct vector into its constituent parts (fields and validity).
121 pub fn into_parts(self) -> (Arc<Box<[Vector]>>, Mask) {
122 (self.fields, self.validity)
123 }
124
125 /// Returns the fields of the `StructVector`, each stored column-wise as a [`Vector`].
126 pub fn fields(&self) -> &Arc<Box<[Vector]>> {
127 &self.fields
128 }
129}
130
131impl VectorOps for StructVector {
132 type Mutable = StructVectorMut;
133 type Scalar = StructScalar;
134
135 fn len(&self) -> usize {
136 self.len
137 }
138
139 fn validity(&self) -> &Mask {
140 &self.validity
141 }
142
143 fn mask_validity(&mut self, mask: &Mask) {
144 self.validity = self.validity.bitand(mask);
145 }
146
147 fn scalar_at(&self, index: usize) -> StructScalar {
148 assert!(index < self.len());
149 StructScalar::new(self.slice(index..index + 1))
150 }
151
152 fn slice(&self, _range: impl RangeBounds<usize> + Clone + Debug) -> Self {
153 todo!()
154 }
155
156 fn clear(&mut self) {
157 self.len = 0;
158 self.validity.clear();
159 Arc::make_mut(&mut self.fields)
160 .iter_mut()
161 .for_each(|f| f.clear());
162 }
163
164 fn try_into_mut(self) -> Result<StructVectorMut, Self> {
165 let len = self.len;
166
167 let fields = match Arc::try_unwrap(self.fields) {
168 Ok(fields) => fields,
169 Err(fields) => return Err(Self { fields, ..self }),
170 };
171
172 let validity = match self.validity.try_into_mut() {
173 Ok(validity) => validity,
174 Err(validity) => {
175 return Err(Self {
176 fields: Arc::new(fields),
177 validity,
178 len,
179 });
180 }
181 };
182
183 // Convert all the remaining fields to mutable, if possible.
184 let mut mutable_fields = Vec::with_capacity(fields.len());
185 let mut fields_iter = fields.into_iter();
186
187 while let Some(field) = fields_iter.next() {
188 match field.try_into_mut() {
189 Ok(mutable_field) => {
190 // We were able to take ownership of the field vector, so add it and keep going.
191 mutable_fields.push(mutable_field);
192 }
193 Err(immutable_field) => {
194 // We were unable to take ownership, so we must re-freeze all of the fields
195 // vectors we took ownership over and reconstruct the original `StructVector`.
196 let mut all_fields: Vec<Vector> = mutable_fields
197 .into_iter()
198 .map(|mut_field| mut_field.freeze())
199 .collect();
200
201 all_fields.push(immutable_field);
202 all_fields.extend(fields_iter);
203
204 return Err(Self {
205 fields: Arc::new(all_fields.into_boxed_slice()),
206 len: self.len,
207 validity: validity.freeze(),
208 });
209 }
210 }
211 }
212
213 Ok(StructVectorMut {
214 fields: mutable_fields.into_boxed_slice(),
215 len: self.len,
216 validity,
217 })
218 }
219
220 fn into_mut(self) -> StructVectorMut {
221 let len = self.len;
222 let validity = self.validity.into_mut();
223
224 // If someone else has a strong reference to the `Arc`, clone the underlying data (which is
225 // just a **different** reference count increment).
226 let fields = Arc::try_unwrap(self.fields).unwrap_or_else(|arc| (*arc).clone());
227
228 let mutable_fields: Box<[_]> = fields
229 .into_vec()
230 .into_iter()
231 .map(|field| field.into_mut())
232 .collect();
233
234 StructVectorMut {
235 fields: mutable_fields,
236 len,
237 validity,
238 }
239 }
240}