vortex_vector/struct_/vector.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`StructVector`].
5
6use std::fmt::Debug;
7use std::ops::RangeBounds;
8use std::sync::Arc;
9
10use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
11use vortex_mask::Mask;
12
13use crate::struct_::{StructScalar, StructVectorMut};
14use crate::{Scalar, Vector, VectorMutOps, VectorOps};
15
16/// An immutable vector of struct values.
17///
18/// Struct values are stored column-wise in the vector, so values in the same field are stored next
19/// to each other (rather than values in the same struct stored next to each other).
20#[derive(Debug, Clone)]
21pub struct StructVector {
22 /// The fields of the `StructVector`, each stored column-wise as a [`Vector`].
23 ///
24 /// We store these as an [`Arc<Box<_>>`] because we need to call [`try_unwrap()`] in our
25 /// [`try_into_mut()`] implementation, and since slices are unsized it is not implemented for
26 /// [`Arc<[Vector]>`].
27 ///
28 /// [`try_unwrap()`]: Arc::try_unwrap
29 /// [`try_into_mut()`]: Self::try_into_mut
30 pub(super) fields: Arc<Box<[Vector]>>,
31
32 /// The validity mask (where `true` represents an element is **not** null).
33 pub(super) validity: Mask,
34
35 /// The length of the vector (which is the same as all field vectors).
36 ///
37 /// This is stored here as a convenience, as the validity also tracks this information.
38 pub(super) len: usize,
39}
40
41impl StructVector {
42 /// Creates a new [`StructVector`] from the given fields and validity mask.
43 ///
44 /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
45 /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
46 ///
47 /// # Panics
48 ///
49 /// Panics if:
50 ///
51 /// - Any field vector has a length that does not match the length of other fields.
52 /// - The validity mask length does not match the field length.
53 pub fn new(fields: Arc<Box<[Vector]>>, validity: Mask) -> Self {
54 Self::try_new(fields, validity).vortex_expect("Failed to create `StructVector`")
55 }
56
57 /// Tries to create a new [`StructVector`] from the given fields and validity mask.
58 ///
59 /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
60 /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
61 ///
62 /// # Errors
63 ///
64 /// Returns an error if:
65 ///
66 /// - Any field vector has a length that does not match the length of other fields.
67 /// - The validity mask length does not match the field length.
68 pub fn try_new(fields: Arc<Box<[Vector]>>, validity: Mask) -> VortexResult<Self> {
69 let len = validity.len();
70
71 // Validate that all fields have the correct length.
72 for (i, field) in fields.iter().enumerate() {
73 vortex_ensure!(
74 field.len() == len,
75 "Field {} has length {} but expected length {}",
76 i,
77 field.len(),
78 len
79 );
80 }
81
82 Ok(Self {
83 fields,
84 validity,
85 len,
86 })
87 }
88
89 /// Creates a new [`StructVector`] from the given fields and validity mask without validation.
90 ///
91 /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
92 /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
93 ///
94 /// # Safety
95 ///
96 /// The caller must ensure that:
97 ///
98 /// - All field vectors have the same length.
99 /// - The validity mask has a length equal to the field length.
100 pub unsafe fn new_unchecked(fields: Arc<Box<[Vector]>>, validity: Mask) -> Self {
101 let len = validity.len();
102
103 if cfg!(debug_assertions) {
104 Self::new(fields, validity)
105 } else {
106 Self {
107 fields,
108 validity,
109 len,
110 }
111 }
112 }
113
114 /// Decomposes the struct vector into its constituent parts (fields and validity).
115 pub fn into_parts(self) -> (Arc<Box<[Vector]>>, Mask) {
116 (self.fields, self.validity)
117 }
118
119 /// Returns the fields of the `StructVector`, each stored column-wise as a [`Vector`].
120 pub fn fields(&self) -> &Arc<Box<[Vector]>> {
121 &self.fields
122 }
123}
124
125impl VectorOps for StructVector {
126 type Mutable = StructVectorMut;
127
128 fn len(&self) -> usize {
129 self.len
130 }
131
132 fn validity(&self) -> &Mask {
133 &self.validity
134 }
135
136 fn scalar_at(&self, index: usize) -> Scalar {
137 assert!(index < self.len());
138 StructScalar::new(self.slice(index..index + 1)).into()
139 }
140
141 fn slice(&self, _range: impl RangeBounds<usize> + Clone + Debug) -> Self {
142 todo!()
143 }
144
145 fn try_into_mut(self) -> Result<StructVectorMut, Self> {
146 let len = self.len;
147
148 let fields = match Arc::try_unwrap(self.fields) {
149 Ok(fields) => fields,
150 Err(fields) => return Err(Self { fields, ..self }),
151 };
152
153 let validity = match self.validity.try_into_mut() {
154 Ok(validity) => validity,
155 Err(validity) => {
156 return Err(Self {
157 fields: Arc::new(fields),
158 validity,
159 len,
160 });
161 }
162 };
163
164 // Convert all the remaining fields to mutable, if possible.
165 let mut mutable_fields = Vec::with_capacity(fields.len());
166 let mut fields_iter = fields.into_iter();
167
168 while let Some(field) = fields_iter.next() {
169 match field.try_into_mut() {
170 Ok(mutable_field) => {
171 // We were able to take ownership of the field vector, so add it and keep going.
172 mutable_fields.push(mutable_field);
173 }
174 Err(immutable_field) => {
175 // We were unable to take ownership, so we must re-freeze all of the fields
176 // vectors we took ownership over and reconstruct the original `StructVector`.
177 let mut all_fields: Vec<Vector> = mutable_fields
178 .into_iter()
179 .map(|mut_field| mut_field.freeze())
180 .collect();
181
182 all_fields.push(immutable_field);
183 all_fields.extend(fields_iter);
184
185 return Err(Self {
186 fields: Arc::new(all_fields.into_boxed_slice()),
187 len: self.len,
188 validity: validity.freeze(),
189 });
190 }
191 }
192 }
193
194 Ok(StructVectorMut {
195 fields: mutable_fields.into_boxed_slice(),
196 len: self.len,
197 validity,
198 })
199 }
200
201 fn into_mut(self) -> StructVectorMut {
202 let len = self.len;
203 let validity = self.validity.into_mut();
204
205 // If someone else has a strong reference to the `Arc`, clone the underlying data (which is
206 // just a **different** reference count increment).
207 let fields = Arc::try_unwrap(self.fields).unwrap_or_else(|arc| (*arc).clone());
208
209 let mutable_fields: Box<[_]> = fields
210 .into_vec()
211 .into_iter()
212 .map(|field| field.into_mut())
213 .collect();
214
215 StructVectorMut {
216 fields: mutable_fields,
217 len,
218 validity,
219 }
220 }
221}