vortex_vector/struct_/
vector.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Definition and implementation of [`StructVector`].
5
6use std::fmt::Debug;
7use std::ops::BitAnd;
8use std::ops::RangeBounds;
9use std::sync::Arc;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_mask::Mask;
14
15use crate::Vector;
16use crate::VectorMutOps;
17use crate::VectorOps;
18use crate::struct_::StructScalar;
19use crate::struct_::StructVectorMut;
20
21/// An immutable vector of struct values.
22///
23/// Struct values are stored column-wise in the vector, so values in the same field are stored next
24/// to each other (rather than values in the same struct stored next to each other).
25#[derive(Debug, Clone)]
26pub struct StructVector {
27    /// The fields of the `StructVector`, each stored column-wise as a [`Vector`].
28    ///
29    /// We store these as an [`Arc<Box<_>>`] because we need to call [`try_unwrap()`] in our
30    /// [`try_into_mut()`] implementation, and since slices are unsized it is not implemented for
31    /// [`Arc<[Vector]>`].
32    ///
33    /// [`try_unwrap()`]: Arc::try_unwrap
34    /// [`try_into_mut()`]: Self::try_into_mut
35    pub(super) fields: Arc<Box<[Vector]>>,
36
37    /// The validity mask (where `true` represents an element is **not** null).
38    pub(super) validity: Mask,
39
40    /// The length of the vector (which is the same as all field vectors).
41    ///
42    /// This is stored here as a convenience, as the validity also tracks this information.
43    pub(super) len: usize,
44}
45
46impl PartialEq for StructVector {
47    fn eq(&self, other: &Self) -> bool {
48        if self.len != other.len {
49            return false;
50        }
51        // Number of fields must match
52        if self.fields.len() != other.fields.len() {
53            return false;
54        }
55        // Validity patterns must match
56        if self.validity != other.validity {
57            return false;
58        }
59        // For each field pair: clone the fields, call mask_validity(&combined_mask) on both clones
60        // where combined_mask = self.validity AND other.validity, then compare with ==
61        let combined_mask = self.validity.bitand(&other.validity);
62
63        // Each field must match with the combined mask applied
64        for (self_field, other_field) in self.fields.iter().zip(other.fields.iter()) {
65            let mut self_field_masked = self_field.clone();
66            let mut other_field_masked = other_field.clone();
67            self_field_masked.mask_validity(&combined_mask);
68            other_field_masked.mask_validity(&combined_mask);
69
70            if self_field_masked != other_field_masked {
71                return false;
72            }
73        }
74        true
75    }
76}
77
78impl StructVector {
79    /// Creates a new [`StructVector`] from the given fields and validity mask.
80    ///
81    /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
82    /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
83    ///
84    /// # Panics
85    ///
86    /// Panics if:
87    ///
88    /// - Any field vector has a length that does not match the length of other fields.
89    /// - The validity mask length does not match the field length.
90    pub fn new(fields: Arc<Box<[Vector]>>, validity: Mask) -> Self {
91        Self::try_new(fields, validity).vortex_expect("Failed to create `StructVector`")
92    }
93
94    /// Tries to create a new [`StructVector`] from the given fields and validity mask.
95    ///
96    /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
97    /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
98    ///
99    /// # Errors
100    ///
101    /// Returns an error if:
102    ///
103    /// - Any field vector has a length that does not match the length of other fields.
104    /// - The validity mask length does not match the field length.
105    pub fn try_new(fields: Arc<Box<[Vector]>>, validity: Mask) -> VortexResult<Self> {
106        let len = validity.len();
107
108        // Validate that all fields have the correct length.
109        for (i, field) in fields.iter().enumerate() {
110            vortex_ensure!(
111                field.len() == len,
112                "Field {} has length {} but expected length {}",
113                i,
114                field.len(),
115                len
116            );
117        }
118
119        Ok(Self {
120            fields,
121            validity,
122            len,
123        })
124    }
125
126    /// Creates a new [`StructVector`] from the given fields and validity mask without validation.
127    ///
128    /// Note that we take [`Arc<Box<[_]>>`] in order to enable easier conversion to
129    /// [`StructVectorMut`] via [`try_into_mut()`](Self::try_into_mut).
130    ///
131    /// # Safety
132    ///
133    /// The caller must ensure that:
134    ///
135    /// - All field vectors have the same length.
136    /// - The validity mask has a length equal to the field length.
137    pub unsafe fn new_unchecked(fields: Arc<Box<[Vector]>>, validity: Mask) -> Self {
138        let len = validity.len();
139
140        if cfg!(debug_assertions) {
141            Self::new(fields, validity)
142        } else {
143            Self {
144                fields,
145                validity,
146                len,
147            }
148        }
149    }
150
151    /// Decomposes the struct vector into its constituent parts (fields and validity).
152    pub fn into_parts(self) -> (Arc<Box<[Vector]>>, Mask) {
153        (self.fields, self.validity)
154    }
155
156    /// Returns the fields of the `StructVector`, each stored column-wise as a [`Vector`].
157    pub fn fields(&self) -> &Arc<Box<[Vector]>> {
158        &self.fields
159    }
160}
161
162impl Eq for StructVector {}
163
164impl VectorOps for StructVector {
165    type Mutable = StructVectorMut;
166    type Scalar = StructScalar;
167
168    fn len(&self) -> usize {
169        self.len
170    }
171
172    fn validity(&self) -> &Mask {
173        &self.validity
174    }
175
176    fn mask_validity(&mut self, mask: &Mask) {
177        self.validity = self.validity.bitand(mask);
178    }
179
180    fn scalar_at(&self, index: usize) -> StructScalar {
181        assert!(index < self.len());
182        StructScalar::new(self.slice(index..index + 1))
183    }
184
185    fn slice(&self, range: impl RangeBounds<usize> + Clone + Debug) -> Self {
186        let sliced_fields: Box<[_]> = self
187            .fields
188            .iter()
189            .map(|field| field.slice(range.clone()))
190            .collect();
191
192        let sliced_validity = self.validity.slice(range);
193        let len = sliced_validity.len();
194
195        StructVector {
196            fields: Arc::new(sliced_fields),
197            validity: sliced_validity,
198            len,
199        }
200    }
201
202    fn clear(&mut self) {
203        self.len = 0;
204        self.validity.clear();
205        Arc::make_mut(&mut self.fields)
206            .iter_mut()
207            .for_each(|f| f.clear());
208    }
209
210    fn try_into_mut(self) -> Result<StructVectorMut, Self> {
211        let len = self.len;
212
213        let fields = match Arc::try_unwrap(self.fields) {
214            Ok(fields) => fields,
215            Err(fields) => return Err(Self { fields, ..self }),
216        };
217
218        let validity = match self.validity.try_into_mut() {
219            Ok(validity) => validity,
220            Err(validity) => {
221                return Err(Self {
222                    fields: Arc::new(fields),
223                    validity,
224                    len,
225                });
226            }
227        };
228
229        // Convert all the remaining fields to mutable, if possible.
230        let mut mutable_fields = Vec::with_capacity(fields.len());
231        let mut fields_iter = fields.into_iter();
232
233        while let Some(field) = fields_iter.next() {
234            match field.try_into_mut() {
235                Ok(mutable_field) => {
236                    // We were able to take ownership of the field vector, so add it and keep going.
237                    mutable_fields.push(mutable_field);
238                }
239                Err(immutable_field) => {
240                    // We were unable to take ownership, so we must re-freeze all of the fields
241                    // vectors we took ownership over and reconstruct the original `StructVector`.
242                    let mut all_fields: Vec<Vector> = mutable_fields
243                        .into_iter()
244                        .map(|mut_field| mut_field.freeze())
245                        .collect();
246
247                    all_fields.push(immutable_field);
248                    all_fields.extend(fields_iter);
249
250                    return Err(Self {
251                        fields: Arc::new(all_fields.into_boxed_slice()),
252                        len: self.len,
253                        validity: validity.freeze(),
254                    });
255                }
256            }
257        }
258
259        Ok(StructVectorMut {
260            fields: mutable_fields.into_boxed_slice(),
261            len: self.len,
262            validity,
263        })
264    }
265
266    fn into_mut(self) -> StructVectorMut {
267        let len = self.len;
268        let validity = self.validity.into_mut();
269
270        // If someone else has a strong reference to the `Arc`, clone the underlying data (which is
271        // just a **different** reference count increment).
272        let fields = Arc::try_unwrap(self.fields).unwrap_or_else(|arc| (*arc).clone());
273
274        let mutable_fields: Box<[_]> = fields
275            .into_vec()
276            .into_iter()
277            .map(|field| field.into_mut())
278            .collect();
279
280        StructVectorMut {
281            fields: mutable_fields,
282            len,
283            validity,
284        }
285    }
286}
287
288#[cfg(test)]
289mod tests {
290    use std::sync::Arc;
291
292    use vortex_mask::Mask;
293
294    use super::*;
295    use crate::bool::BoolVectorMut;
296    use crate::null::NullVector;
297    use crate::primitive::PVectorMut;
298
299    #[test]
300    fn test_struct_vector_eq_identical() {
301        // Two identical struct vectors should be equal.
302        let v1 = StructVector::new(
303            Arc::new(Box::new([
304                NullVector::new(3).into(),
305                BoolVectorMut::from_iter([true, false, true])
306                    .freeze()
307                    .into(),
308                PVectorMut::<i32>::from_iter([10, 20, 30]).freeze().into(),
309            ])),
310            Mask::AllTrue(3),
311        );
312
313        let v2 = StructVector::new(
314            Arc::new(Box::new([
315                NullVector::new(3).into(),
316                BoolVectorMut::from_iter([true, false, true])
317                    .freeze()
318                    .into(),
319                PVectorMut::<i32>::from_iter([10, 20, 30]).freeze().into(),
320            ])),
321            Mask::AllTrue(3),
322        );
323
324        assert_eq!(v1, v2);
325    }
326
327    #[test]
328    fn test_struct_vector_eq_different_length() {
329        // Struct vectors with different lengths should not be equal.
330        let v1 = StructVector::new(
331            Arc::new(Box::new([PVectorMut::<i32>::from_iter([10, 20, 30])
332                .freeze()
333                .into()])),
334            Mask::AllTrue(3),
335        );
336
337        let v2 = StructVector::new(
338            Arc::new(Box::new([PVectorMut::<i32>::from_iter([10, 20])
339                .freeze()
340                .into()])),
341            Mask::AllTrue(2),
342        );
343
344        assert_ne!(v1, v2);
345    }
346
347    #[test]
348    fn test_struct_vector_eq_different_field_count() {
349        // Struct vectors with different number of fields should not be equal.
350        let v1 = StructVector::new(
351            Arc::new(Box::new([
352                PVectorMut::<i32>::from_iter([10, 20, 30]).freeze().into(),
353                BoolVectorMut::from_iter([true, false, true])
354                    .freeze()
355                    .into(),
356            ])),
357            Mask::AllTrue(3),
358        );
359
360        let v2 = StructVector::new(
361            Arc::new(Box::new([PVectorMut::<i32>::from_iter([10, 20, 30])
362                .freeze()
363                .into()])),
364            Mask::AllTrue(3),
365        );
366
367        assert_ne!(v1, v2);
368    }
369
370    #[test]
371    fn test_struct_vector_eq_different_validity() {
372        // Struct vectors with different validity patterns should not be equal.
373        let v1 = StructVector::new(
374            Arc::new(Box::new([PVectorMut::<i32>::from_iter([10, 20, 30])
375                .freeze()
376                .into()])),
377            Mask::AllTrue(3),
378        );
379
380        let v2 = StructVector::new(
381            Arc::new(Box::new([PVectorMut::<i32>::from_iter([10, 20, 30])
382                .freeze()
383                .into()])),
384            Mask::from_iter([true, false, true]),
385        );
386
387        assert_ne!(v1, v2);
388    }
389
390    #[test]
391    fn test_struct_vector_eq_different_field_values() {
392        // Struct vectors with different field values should not be equal.
393        let v1 = StructVector::new(
394            Arc::new(Box::new([PVectorMut::<i32>::from_iter([10, 20, 30])
395                .freeze()
396                .into()])),
397            Mask::AllTrue(3),
398        );
399
400        let v2 = StructVector::new(
401            Arc::new(Box::new([PVectorMut::<i32>::from_iter([10, 99, 30])
402                .freeze()
403                .into()])),
404            Mask::AllTrue(3),
405        );
406
407        assert_ne!(v1, v2);
408    }
409
410    #[test]
411    fn test_struct_vector_eq_ignores_invalid_positions() {
412        // Two struct vectors with different values at invalid positions should be equal
413        // as long as they have the same validity pattern and same values at valid positions.
414        //
415        // validity = [true, false, true] means position 1 is invalid
416        let validity = Mask::from_iter([true, false, true]);
417
418        let v1 = StructVector::new(
419            Arc::new(Box::new([PVectorMut::<i32>::from_iter([10, 20, 30])
420                .freeze()
421                .into()])),
422            validity.clone(),
423        );
424
425        // Different value at position 1 (which is invalid)
426        let v2 = StructVector::new(
427            Arc::new(Box::new([PVectorMut::<i32>::from_iter([10, 99, 30])
428                .freeze()
429                .into()])),
430            validity,
431        );
432
433        assert_eq!(v1, v2);
434    }
435
436    #[test]
437    fn test_struct_vector_eq_combined_mask_applied() {
438        // Test that the combined mask (self.validity AND other.validity) is applied.
439        // Both vectors have the same validity, so the combined mask equals that validity.
440        //
441        // validity = [true, false, true, false, true] means positions 1,3 are invalid
442        let validity = Mask::from_iter([true, false, true, false, true]);
443
444        let v1 = StructVector::new(
445            Arc::new(Box::new([
446                PVectorMut::<i32>::from_iter([1, 2, 3, 4, 5])
447                    .freeze()
448                    .into(),
449                BoolVectorMut::from_iter([true, true, true, true, true])
450                    .freeze()
451                    .into(),
452            ])),
453            validity.clone(),
454        );
455
456        // Different values at invalid positions (1 and 3)
457        let v2 = StructVector::new(
458            Arc::new(Box::new([
459                PVectorMut::<i32>::from_iter([1, 999, 3, 888, 5])
460                    .freeze()
461                    .into(),
462                BoolVectorMut::from_iter([true, false, true, false, true])
463                    .freeze()
464                    .into(),
465            ])),
466            validity,
467        );
468
469        assert_eq!(v1, v2);
470    }
471
472    #[test]
473    fn test_struct_vector_eq_nested() {
474        // Test equality with nested struct vectors.
475        let inner1 = StructVector::new(
476            Arc::new(Box::new([BoolVectorMut::from_iter([true, false, true])
477                .freeze()
478                .into()])),
479            Mask::AllTrue(3),
480        );
481
482        let inner2 = StructVector::new(
483            Arc::new(Box::new([BoolVectorMut::from_iter([true, false, true])
484                .freeze()
485                .into()])),
486            Mask::AllTrue(3),
487        );
488
489        let v1 = StructVector::new(Arc::new(Box::new([inner1.into()])), Mask::AllTrue(3));
490
491        let v2 = StructVector::new(Arc::new(Box::new([inner2.into()])), Mask::AllTrue(3));
492
493        assert_eq!(v1, v2);
494    }
495
496    #[test]
497    fn test_struct_vector_eq_empty() {
498        // Two empty struct vectors should be equal.
499        let v1 = StructVector::new(Arc::new(Box::new([])), Mask::AllTrue(0));
500        let v2 = StructVector::new(Arc::new(Box::new([])), Mask::AllTrue(0));
501
502        assert_eq!(v1, v2);
503    }
504}