vortex_vector/struct_/
vector.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`StructVector`].
5
6use std::fmt::Debug;
7use std::ops::RangeBounds;
8use std::sync::Arc;
9
10use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
11use vortex_mask::Mask;
12
13use crate::struct_::{StructScalar, StructVectorMut};
14use crate::{Scalar, Vector, VectorMutOps, VectorOps};
15
16/// An immutable vector of struct values.
17///
18/// Struct values are stored column-wise in the vector, so values in the same field are stored next
19/// to each other (rather than values in the same struct stored next to each other).
20#[derive(Debug, Clone)]
21pub struct StructVector {
22    /// The fields of the `StructVector`, each stored column-wise as a [`Vector`].
23    ///
24    /// We store these as an [`Arc<Box<_>>`] because we need to call [`try_unwrap()`] in our
25    /// [`try_into_mut()`] implementation, and since slices are unsized it is not implemented for
26    /// [`Arc<[Vector]>`].
27    ///
28    /// [`try_unwrap()`]: Arc::try_unwrap
29    /// [`try_into_mut()`]: Self::try_into_mut
30    pub(super) fields: Arc<Box<[Vector]>>,
31
32    /// The validity mask (where `true` represents an element is **not** null).
33    pub(super) validity: Mask,
34
35    /// The length of the vector (which is the same as all field vectors).
36    ///
37    /// This is stored here as a convenience, as the validity also tracks this information.
38    pub(super) len: usize,
39}
40
41impl StructVector {
42    /// Creates a new [`StructVector`] from the given fields and validity mask.
43    ///
44    /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
45    /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
46    ///
47    /// # Panics
48    ///
49    /// Panics if:
50    ///
51    /// - Any field vector has a length that does not match the length of other fields.
52    /// - The validity mask length does not match the field length.
53    pub fn new(fields: Arc<Box<[Vector]>>, validity: Mask) -> Self {
54        Self::try_new(fields, validity).vortex_expect("Failed to create `StructVector`")
55    }
56
57    /// Tries to create a new [`StructVector`] from the given fields and validity mask.
58    ///
59    /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
60    /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
61    ///
62    /// # Errors
63    ///
64    /// Returns an error if:
65    ///
66    /// - Any field vector has a length that does not match the length of other fields.
67    /// - The validity mask length does not match the field length.
68    pub fn try_new(fields: Arc<Box<[Vector]>>, validity: Mask) -> VortexResult<Self> {
69        let len = validity.len();
70
71        // Validate that all fields have the correct length.
72        for (i, field) in fields.iter().enumerate() {
73            vortex_ensure!(
74                field.len() == len,
75                "Field {} has length {} but expected length {}",
76                i,
77                field.len(),
78                len
79            );
80        }
81
82        Ok(Self {
83            fields,
84            validity,
85            len,
86        })
87    }
88
89    /// Creates a new [`StructVector`] from the given fields and validity mask without validation.
90    ///
91    /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
92    /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
93    ///
94    /// # Safety
95    ///
96    /// The caller must ensure that:
97    ///
98    /// - All field vectors have the same length.
99    /// - The validity mask has a length equal to the field length.
100    pub unsafe fn new_unchecked(fields: Arc<Box<[Vector]>>, validity: Mask) -> Self {
101        let len = validity.len();
102
103        if cfg!(debug_assertions) {
104            Self::new(fields, validity)
105        } else {
106            Self {
107                fields,
108                validity,
109                len,
110            }
111        }
112    }
113
114    /// Decomposes the struct vector into its constituent parts (fields and validity).
115    pub fn into_parts(self) -> (Arc<Box<[Vector]>>, Mask) {
116        (self.fields, self.validity)
117    }
118
119    /// Returns the fields of the `StructVector`, each stored column-wise as a [`Vector`].
120    pub fn fields(&self) -> &Arc<Box<[Vector]>> {
121        &self.fields
122    }
123}
124
125impl VectorOps for StructVector {
126    type Mutable = StructVectorMut;
127
128    fn len(&self) -> usize {
129        self.len
130    }
131
132    fn validity(&self) -> &Mask {
133        &self.validity
134    }
135
136    fn scalar_at(&self, index: usize) -> Scalar {
137        assert!(index < self.len());
138        StructScalar::new(self.slice(index..index + 1)).into()
139    }
140
141    fn slice(&self, _range: impl RangeBounds<usize> + Clone + Debug) -> Self {
142        todo!()
143    }
144
145    fn try_into_mut(self) -> Result<StructVectorMut, Self> {
146        let len = self.len;
147
148        let fields = match Arc::try_unwrap(self.fields) {
149            Ok(fields) => fields,
150            Err(fields) => return Err(Self { fields, ..self }),
151        };
152
153        let validity = match self.validity.try_into_mut() {
154            Ok(validity) => validity,
155            Err(validity) => {
156                return Err(Self {
157                    fields: Arc::new(fields),
158                    validity,
159                    len,
160                });
161            }
162        };
163
164        // Convert all the remaining fields to mutable, if possible.
165        let mut mutable_fields = Vec::with_capacity(fields.len());
166        let mut fields_iter = fields.into_iter();
167
168        while let Some(field) = fields_iter.next() {
169            match field.try_into_mut() {
170                Ok(mutable_field) => {
171                    // We were able to take ownership of the field vector, so add it and keep going.
172                    mutable_fields.push(mutable_field);
173                }
174                Err(immutable_field) => {
175                    // We were unable to take ownership, so we must re-freeze all of the fields
176                    // vectors we took ownership over and reconstruct the original `StructVector`.
177                    let mut all_fields: Vec<Vector> = mutable_fields
178                        .into_iter()
179                        .map(|mut_field| mut_field.freeze())
180                        .collect();
181
182                    all_fields.push(immutable_field);
183                    all_fields.extend(fields_iter);
184
185                    return Err(Self {
186                        fields: Arc::new(all_fields.into_boxed_slice()),
187                        len: self.len,
188                        validity: validity.freeze(),
189                    });
190                }
191            }
192        }
193
194        Ok(StructVectorMut {
195            fields: mutable_fields.into_boxed_slice(),
196            len: self.len,
197            validity,
198        })
199    }
200
201    fn into_mut(self) -> StructVectorMut {
202        let len = self.len;
203        let validity = self.validity.into_mut();
204
205        // If someone else has a strong reference to the `Arc`, clone the underlying data (which is
206        // just a **different** reference count increment).
207        let fields = Arc::try_unwrap(self.fields).unwrap_or_else(|arc| (*arc).clone());
208
209        let mutable_fields: Box<[_]> = fields
210            .into_vec()
211            .into_iter()
212            .map(|field| field.into_mut())
213            .collect();
214
215        StructVectorMut {
216            fields: mutable_fields,
217            len,
218            validity,
219        }
220    }
221}