vortex_vector/struct_/
vector.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`StructVector`].
5
6use std::fmt::Debug;
7use std::ops::BitAnd;
8use std::ops::RangeBounds;
9use std::sync::Arc;
10
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_mask::Mask;
15
16use crate::Vector;
17use crate::VectorMutOps;
18use crate::VectorOps;
19use crate::struct_::StructScalar;
20use crate::struct_::StructVectorMut;
21
22/// An immutable vector of struct values.
23///
24/// Struct values are stored column-wise in the vector, so values in the same field are stored next
25/// to each other (rather than values in the same struct stored next to each other).
26#[derive(Debug, Clone)]
27pub struct StructVector {
28    /// The fields of the `StructVector`, each stored column-wise as a [`Vector`].
29    ///
30    /// We store these as an [`Arc<Box<_>>`] because we need to call [`try_unwrap()`] in our
31    /// [`try_into_mut()`] implementation, and since slices are unsized it is not implemented for
32    /// [`Arc<[Vector]>`].
33    ///
34    /// [`try_unwrap()`]: Arc::try_unwrap
35    /// [`try_into_mut()`]: Self::try_into_mut
36    pub(super) fields: Arc<Box<[Vector]>>,
37
38    /// The validity mask (where `true` represents an element is **not** null).
39    pub(super) validity: Mask,
40
41    /// The length of the vector (which is the same as all field vectors).
42    ///
43    /// This is stored here as a convenience, as the validity also tracks this information.
44    pub(super) len: usize,
45}
46
47impl StructVector {
48    /// Creates a new [`StructVector`] from the given fields and validity mask.
49    ///
50    /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
51    /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
52    ///
53    /// # Panics
54    ///
55    /// Panics if:
56    ///
57    /// - Any field vector has a length that does not match the length of other fields.
58    /// - The validity mask length does not match the field length.
59    pub fn new(fields: Arc<Box<[Vector]>>, validity: Mask) -> Self {
60        Self::try_new(fields, validity).vortex_expect("Failed to create `StructVector`")
61    }
62
63    /// Tries to create a new [`StructVector`] from the given fields and validity mask.
64    ///
65    /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
66    /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
67    ///
68    /// # Errors
69    ///
70    /// Returns an error if:
71    ///
72    /// - Any field vector has a length that does not match the length of other fields.
73    /// - The validity mask length does not match the field length.
74    pub fn try_new(fields: Arc<Box<[Vector]>>, validity: Mask) -> VortexResult<Self> {
75        let len = validity.len();
76
77        // Validate that all fields have the correct length.
78        for (i, field) in fields.iter().enumerate() {
79            vortex_ensure!(
80                field.len() == len,
81                "Field {} has length {} but expected length {}",
82                i,
83                field.len(),
84                len
85            );
86        }
87
88        Ok(Self {
89            fields,
90            validity,
91            len,
92        })
93    }
94
95    /// Creates a new [`StructVector`] from the given fields and validity mask without validation.
96    ///
97    /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
98    /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
99    ///
100    /// # Safety
101    ///
102    /// The caller must ensure that:
103    ///
104    /// - All field vectors have the same length.
105    /// - The validity mask has a length equal to the field length.
106    pub unsafe fn new_unchecked(fields: Arc<Box<[Vector]>>, validity: Mask) -> Self {
107        let len = validity.len();
108
109        if cfg!(debug_assertions) {
110            Self::new(fields, validity)
111        } else {
112            Self {
113                fields,
114                validity,
115                len,
116            }
117        }
118    }
119
120    /// Decomposes the struct vector into its constituent parts (fields and validity).
121    pub fn into_parts(self) -> (Arc<Box<[Vector]>>, Mask) {
122        (self.fields, self.validity)
123    }
124
125    /// Returns the fields of the `StructVector`, each stored column-wise as a [`Vector`].
126    pub fn fields(&self) -> &Arc<Box<[Vector]>> {
127        &self.fields
128    }
129}
130
131impl VectorOps for StructVector {
132    type Mutable = StructVectorMut;
133    type Scalar = StructScalar;
134
135    fn len(&self) -> usize {
136        self.len
137    }
138
139    fn validity(&self) -> &Mask {
140        &self.validity
141    }
142
143    fn mask_validity(&mut self, mask: &Mask) {
144        self.validity = self.validity.bitand(mask);
145    }
146
147    fn scalar_at(&self, index: usize) -> StructScalar {
148        assert!(index < self.len());
149        StructScalar::new(self.slice(index..index + 1))
150    }
151
152    fn slice(&self, _range: impl RangeBounds<usize> + Clone + Debug) -> Self {
153        todo!()
154    }
155
156    fn clear(&mut self) {
157        self.len = 0;
158        self.validity.clear();
159        Arc::make_mut(&mut self.fields)
160            .iter_mut()
161            .for_each(|f| f.clear());
162    }
163
164    fn try_into_mut(self) -> Result<StructVectorMut, Self> {
165        let len = self.len;
166
167        let fields = match Arc::try_unwrap(self.fields) {
168            Ok(fields) => fields,
169            Err(fields) => return Err(Self { fields, ..self }),
170        };
171
172        let validity = match self.validity.try_into_mut() {
173            Ok(validity) => validity,
174            Err(validity) => {
175                return Err(Self {
176                    fields: Arc::new(fields),
177                    validity,
178                    len,
179                });
180            }
181        };
182
183        // Convert all the remaining fields to mutable, if possible.
184        let mut mutable_fields = Vec::with_capacity(fields.len());
185        let mut fields_iter = fields.into_iter();
186
187        while let Some(field) = fields_iter.next() {
188            match field.try_into_mut() {
189                Ok(mutable_field) => {
190                    // We were able to take ownership of the field vector, so add it and keep going.
191                    mutable_fields.push(mutable_field);
192                }
193                Err(immutable_field) => {
194                    // We were unable to take ownership, so we must re-freeze all of the fields
195                    // vectors we took ownership over and reconstruct the original `StructVector`.
196                    let mut all_fields: Vec<Vector> = mutable_fields
197                        .into_iter()
198                        .map(|mut_field| mut_field.freeze())
199                        .collect();
200
201                    all_fields.push(immutable_field);
202                    all_fields.extend(fields_iter);
203
204                    return Err(Self {
205                        fields: Arc::new(all_fields.into_boxed_slice()),
206                        len: self.len,
207                        validity: validity.freeze(),
208                    });
209                }
210            }
211        }
212
213        Ok(StructVectorMut {
214            fields: mutable_fields.into_boxed_slice(),
215            len: self.len,
216            validity,
217        })
218    }
219
220    fn into_mut(self) -> StructVectorMut {
221        let len = self.len;
222        let validity = self.validity.into_mut();
223
224        // If someone else has a strong reference to the `Arc`, clone the underlying data (which is
225        // just a **different** reference count increment).
226        let fields = Arc::try_unwrap(self.fields).unwrap_or_else(|arc| (*arc).clone());
227
228        let mutable_fields: Box<[_]> = fields
229            .into_vec()
230            .into_iter()
231            .map(|field| field.into_mut())
232            .collect();
233
234        StructVectorMut {
235            fields: mutable_fields,
236            len,
237            validity,
238        }
239    }
240}