Skip to main content

vortex_array/arrays/variant/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod vtable;
5
6pub(crate) mod compute;
7
8use vortex_error::VortexExpect;
9use vortex_error::VortexResult;
10use vortex_error::vortex_ensure;
11
12pub use self::vtable::Variant;
13pub use self::vtable::VariantArray;
14
15pub(crate) fn initialize(session: &vortex_session::VortexSession) {
16    vtable::initialize(session);
17}
18
19use crate::ArrayRef;
20use crate::array::Array;
21use crate::array::ArrayParts;
22use crate::array::EmptyArrayData;
23use crate::array::TypedArrayRef;
24use crate::dtype::DType;
25
26pub(super) const CORE_STORAGE_SLOT: usize = 0;
27pub(super) const SHREDDED_SLOT: usize = 1;
28pub(super) const NUM_SLOTS: usize = 2;
29pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["core_storage", "shredded"];
30
31/// Accessors for canonical variant storage.
32///
33/// A canonical variant array keeps the full variant value for every row in `core_storage` and may
34/// carry a row-aligned, storage-agnostic `shredded` typed tree for selected paths.
35///
36/// `core_storage` is a logical `DType::Variant` array, not a specific physical encoding: it may be
37/// chunked, constant, or otherwise encoded. Callers must use normal array operations instead of
38/// assuming a particular slot layout. The shredded child may have any dtype; its dtype is recorded
39/// during serialization and validated by normal child deserialization.
40pub trait VariantArrayExt: TypedArrayRef<Variant> {
41    /// Returns the logical variant storage that preserves the full value for every row.
42    fn core_storage(&self) -> &ArrayRef {
43        self.as_ref().slots()[CORE_STORAGE_SLOT]
44            .as_ref()
45            .vortex_expect("validated variant core_storage slot")
46    }
47
48    /// Returns the optional row-aligned typed shredded tree for selected variant paths.
49    /// This functions returns `Some` only if the array was canonicalized and the shredded data
50    /// was pulled out of the underlying variant storage.
51    fn shredded(&self) -> Option<&ArrayRef> {
52        self.as_ref().slots()[SHREDDED_SLOT].as_ref()
53    }
54}
55impl<T: TypedArrayRef<Variant>> VariantArrayExt for T {}
56
57impl Array<Variant> {
58    /// Creates a new `VariantArray` with logical variant core storage and optional shredded storage.
59    ///
60    /// `core_storage` must have `DType::Variant`, but it may use any Variant-typed physical
61    /// encoding. See [`VariantArrayExt`] for the higher-level storage contract.
62    ///
63    /// `shredded`, when present, must be row-aligned with `core_storage` and stores typed values for
64    /// selected variant paths.
65    pub fn try_new(core_storage: ArrayRef, shredded: Option<ArrayRef>) -> VortexResult<Self> {
66        let dtype = core_storage.dtype().clone();
67        vortex_ensure!(
68            matches!(dtype, DType::Variant(_)),
69            "VariantArray core_storage dtype must be Variant, found {dtype}"
70        );
71        let len = core_storage.len();
72        let stats = core_storage.statistics().to_owned();
73        Ok(Array::try_from_parts(
74            ArrayParts::new(Variant, dtype, len, EmptyArrayData)
75                .with_slots(vec![Some(core_storage), shredded].into()),
76        )?
77        .with_stats_set(stats))
78    }
79}
80
81#[cfg(test)]
82mod tests {
83    use vortex_buffer::buffer;
84    use vortex_error::VortexResult;
85    use vortex_error::vortex_err;
86    use vortex_mask::Mask;
87
88    use crate::ArrayRef;
89    use crate::Canonical;
90    use crate::IntoArray;
91    use crate::VortexSessionExecute;
92    use crate::array_session;
93    use crate::arrays::BoolArray;
94    use crate::arrays::ChunkedArray;
95    use crate::arrays::ConstantArray;
96    use crate::arrays::PrimitiveArray;
97    use crate::arrays::StructArray;
98    use crate::arrays::VariantArray;
99    use crate::arrays::variant::VariantArrayExt;
100    use crate::assert_arrays_eq;
101    use crate::builtins::ArrayBuiltins;
102    use crate::dtype::DType;
103    use crate::dtype::Nullability;
104    use crate::dtype::PType;
105    use crate::expr::root;
106    use crate::expr::variant_get;
107    use crate::scalar::Scalar;
108    use crate::scalar_fn::fns::variant_get::VariantPath;
109
110    fn core_storage(len: usize) -> ArrayRef {
111        ConstantArray::new(
112            Scalar::variant(Scalar::primitive(1i32, Nullability::NonNullable)),
113            len,
114        )
115        .into_array()
116    }
117
118    fn row_storage(values: impl IntoIterator<Item = i32>) -> VortexResult<ArrayRef> {
119        let chunks = values
120            .into_iter()
121            .map(|value| {
122                ConstantArray::new(
123                    Scalar::variant(Scalar::primitive(value, Nullability::NonNullable)),
124                    1,
125                )
126                .into_array()
127            })
128            .collect();
129
130        Ok(ChunkedArray::try_new(chunks, DType::Variant(Nullability::NonNullable))?.into_array())
131    }
132
133    fn variant_with_shredded(
134        core_values: impl IntoIterator<Item = i32>,
135        shredded_values: impl IntoIterator<Item = i32>,
136    ) -> VortexResult<VariantArray> {
137        VariantArray::try_new(
138            row_storage(core_values)?,
139            Some(PrimitiveArray::from_iter(shredded_values).into_array()),
140        )
141    }
142
143    fn execute_variant(array: ArrayRef) -> VortexResult<VariantArray> {
144        let mut ctx = array_session().create_execution_ctx();
145        let Canonical::Variant(variant) = array.execute::<Canonical>(&mut ctx)? else {
146            return Err(vortex_err!("expected canonical variant array"));
147        };
148        Ok(variant)
149    }
150
151    fn assert_variant_rows(
152        array: &VariantArray,
153        expected_core: &[Option<i32>],
154        expected_shredded: &[Option<i32>],
155    ) -> VortexResult<()> {
156        assert_variant_core_rows(array, expected_core)?;
157        assert_eq!(array.len(), expected_shredded.len());
158
159        let shredded = array
160            .shredded()
161            .ok_or_else(|| vortex_err!("expected shredded child"))?;
162        let mut ctx = array_session().create_execution_ctx();
163        let shredded = shredded.clone().execute::<PrimitiveArray>(&mut ctx)?;
164        let expected_shredded_array = if let Some(values) = expected_shredded
165            .iter()
166            .copied()
167            .collect::<Option<Vec<_>>>()
168        {
169            PrimitiveArray::from_iter(values)
170        } else {
171            PrimitiveArray::from_option_iter(expected_shredded.iter().copied())
172        };
173        assert_arrays_eq!(shredded, expected_shredded_array, &mut ctx);
174
175        Ok(())
176    }
177
178    fn assert_variant_core_rows(
179        array: &VariantArray,
180        expected_core: &[Option<i32>],
181    ) -> VortexResult<()> {
182        assert_eq!(array.len(), expected_core.len());
183
184        let mut ctx = array_session().create_execution_ctx();
185        for (idx, expected) in expected_core.iter().enumerate() {
186            let scalar = array.core_storage().execute_scalar(idx, &mut ctx)?;
187            let variant = scalar.as_variant();
188            match expected {
189                Some(expected) => {
190                    let value = variant
191                        .value()
192                        .ok_or_else(|| vortex_err!("expected non-null variant row"))?;
193                    assert_eq!(value.as_primitive().typed_value::<i32>(), Some(*expected));
194                }
195                None => assert!(variant.is_null()),
196            }
197        }
198
199        Ok(())
200    }
201
202    #[test]
203    fn try_new_exposes_core_storage_without_shredded() -> VortexResult<()> {
204        let core_storage = core_storage(2);
205
206        let variant = VariantArray::try_new(core_storage.clone(), None)?;
207
208        assert_eq!(variant.dtype(), core_storage.dtype());
209        assert_eq!(variant.len(), 2);
210        assert_eq!(variant.core_storage().dtype(), core_storage.dtype());
211        assert!(variant.shredded().is_none());
212
213        Ok(())
214    }
215
216    #[test]
217    fn try_new_exposes_core_storage_and_shredded() -> VortexResult<()> {
218        let core_storage = core_storage(3);
219        let shredded = buffer![10i32, 20, 30].into_array();
220
221        let variant = VariantArray::try_new(core_storage.clone(), Some(shredded.clone()))?;
222
223        assert_eq!(variant.dtype(), &DType::Variant(Nullability::NonNullable));
224        assert_eq!(variant.len(), 3);
225        assert_eq!(variant.core_storage().dtype(), core_storage.dtype());
226        assert_eq!(variant.core_storage().len(), core_storage.len());
227        assert_eq!(
228            variant.shredded().map(|child| child.dtype()),
229            Some(shredded.dtype())
230        );
231        assert_eq!(
232            variant.shredded().map(|child| child.len()),
233            Some(shredded.len())
234        );
235        assert_eq!(variant.as_ref().slot_name(0), "core_storage");
236        assert_eq!(variant.as_ref().slot_name(1), "shredded");
237
238        Ok(())
239    }
240
241    #[test]
242    fn try_new_rejects_non_variant_core_storage() {
243        let core_storage = PrimitiveArray::from_iter([1i32, 2, 3]).into_array();
244
245        assert!(VariantArray::try_new(core_storage, None).is_err());
246    }
247
248    #[test]
249    fn try_new_rejects_shredded_length_mismatch() {
250        let core_storage = core_storage(3);
251        let shredded = buffer![10i32, 20].into_array();
252
253        assert!(VariantArray::try_new(core_storage, Some(shredded)).is_err());
254    }
255
256    #[test]
257    fn scalar_at_merges_shredded_with_core_storage() -> VortexResult<()> {
258        let dtype = DType::Variant(Nullability::Nullable);
259        let core_chunks = [Some(1i32), None, Some(3)]
260            .into_iter()
261            .map(|value| {
262                let scalar = match value {
263                    Some(value) => {
264                        Scalar::variant(Scalar::primitive(value, Nullability::NonNullable))
265                            .cast(&dtype)?
266                    }
267                    None => Scalar::null(dtype.clone()),
268                };
269                Ok(ConstantArray::new(scalar, 1).into_array())
270            })
271            .collect::<VortexResult<Vec<_>>>()?;
272        let core_storage = ChunkedArray::try_new(core_chunks, dtype)?.into_array();
273        let shredded = PrimitiveArray::from_option_iter([Some(10i32), Some(20), None]).into_array();
274        let variant = VariantArray::try_new(core_storage, Some(shredded))?;
275
276        let mut ctx = array_session().create_execution_ctx();
277        for (idx, expected) in [Some(10i32), None, Some(3)].into_iter().enumerate() {
278            let scalar = variant.execute_scalar(idx, &mut ctx)?;
279            let variant = scalar.as_variant();
280            match expected {
281                Some(expected) => {
282                    let value = variant
283                        .value()
284                        .ok_or_else(|| vortex_err!("expected non-null variant row"))?;
285                    assert_eq!(value.as_primitive().typed_value::<i32>(), Some(expected));
286                }
287                None => assert!(variant.is_null()),
288            }
289        }
290
291        Ok(())
292    }
293
294    #[test]
295    fn slice_preserves_core_storage_and_shredded_rows() -> VortexResult<()> {
296        let variant = variant_with_shredded(0..5, 10..15)?;
297
298        let sliced = execute_variant(variant.into_array().slice(1..4)?)?;
299
300        assert_variant_rows(
301            &sliced,
302            &[Some(1), Some(2), Some(3)],
303            &[Some(11), Some(12), Some(13)],
304        )
305    }
306
307    #[test]
308    fn filter_preserves_core_storage_and_shredded_rows() -> VortexResult<()> {
309        let variant = variant_with_shredded(0..5, 10..15)?;
310
311        let filtered = execute_variant(
312            variant
313                .into_array()
314                .filter(Mask::from_iter([true, false, true, false, true]))?,
315        )?;
316
317        assert_variant_rows(
318            &filtered,
319            &[Some(0), Some(2), Some(4)],
320            &[Some(10), Some(12), Some(14)],
321        )
322    }
323
324    #[test]
325    fn take_preserves_core_storage_and_shredded_rows() -> VortexResult<()> {
326        let variant = variant_with_shredded(0..5, 10..15)?;
327
328        let taken = execute_variant(
329            variant
330                .into_array()
331                .take(buffer![4u64, 1, 3].into_array())?,
332        )?;
333
334        assert_variant_rows(
335            &taken,
336            &[Some(4), Some(1), Some(3)],
337            &[Some(14), Some(11), Some(13)],
338        )
339    }
340
341    #[test]
342    fn mask_preserves_core_storage_and_shredded_rows() -> VortexResult<()> {
343        let variant = variant_with_shredded(0..5, 10..15)?;
344        let mask = BoolArray::from_iter([true, false, true, false, true]).into_array();
345
346        let masked = execute_variant(variant.into_array().mask(mask)?)?;
347
348        assert_variant_rows(
349            &masked,
350            &[Some(0), None, Some(2), None, Some(4)],
351            &[Some(10), None, Some(12), None, Some(14)],
352        )
353    }
354
355    #[test]
356    fn mask_preserves_chunked_core_storage_validity() -> VortexResult<()> {
357        let dtype = DType::Variant(Nullability::Nullable);
358        let core_chunks = [Some(1i32), None, Some(3), Some(4)]
359            .into_iter()
360            .map(|value| {
361                let scalar = match value {
362                    Some(value) => {
363                        Scalar::variant(Scalar::primitive(value, Nullability::NonNullable))
364                            .cast(&dtype)?
365                    }
366                    None => Scalar::null(dtype.clone()),
367                };
368                Ok(ConstantArray::new(scalar, 1).into_array())
369            })
370            .collect::<VortexResult<Vec<_>>>()?;
371        let core_storage = ChunkedArray::try_new(core_chunks, dtype)?.into_array();
372        let variant = VariantArray::try_new(core_storage, None)?;
373        let mask = BoolArray::from_iter([true, true, false, true]).into_array();
374
375        let masked = execute_variant(variant.into_array().mask(mask)?)?;
376
377        assert_variant_core_rows(&masked, &[Some(1), None, None, Some(4)])
378    }
379
380    #[test]
381    fn variant_get_keeps_valid_shredded_rows_for_matching_dtype() -> VortexResult<()> {
382        let mut ctx = array_session().create_execution_ctx();
383        let core_storage = row_storage([1, 2, 3])?;
384        let shredded = StructArray::try_from_iter([(
385            "a",
386            PrimitiveArray::from_iter([10i32, 20, 30]).into_array(),
387        )])?;
388        let variant = VariantArray::try_new(core_storage, Some(shredded.into_array()))?;
389        let expr = variant_get(
390            root(),
391            VariantPath::field("a"),
392            Some(DType::Primitive(PType::I32, Nullability::NonNullable)),
393        );
394
395        let result = variant
396            .into_array()
397            .apply(&expr)?
398            .execute::<PrimitiveArray>(&mut array_session().create_execution_ctx())?;
399
400        assert_arrays_eq!(
401            result,
402            PrimitiveArray::from_option_iter([Some(10i32), Some(20), Some(30)]),
403            &mut ctx
404        );
405        Ok(())
406    }
407
408    #[test]
409    fn variant_get_treats_value_and_typed_value_as_logical_field_names() -> VortexResult<()> {
410        let mut ctx = array_session().create_execution_ctx();
411        let core_storage = row_storage([1, 2, 3])?;
412        let shredded = StructArray::try_from_iter([
413            (
414                "value",
415                PrimitiveArray::from_iter([10i32, 20, 30]).into_array(),
416            ),
417            (
418                "typed_value",
419                PrimitiveArray::from_iter([40i32, 50, 60]).into_array(),
420            ),
421        ])?;
422        let variant = VariantArray::try_new(core_storage, Some(shredded.into_array()))?;
423
424        let value_expr = variant_get(
425            root(),
426            VariantPath::field("value"),
427            Some(DType::Primitive(PType::I32, Nullability::NonNullable)),
428        );
429        let value_result = variant
430            .clone()
431            .into_array()
432            .apply(&value_expr)?
433            .execute::<PrimitiveArray>(&mut array_session().create_execution_ctx())?;
434        assert_arrays_eq!(
435            value_result,
436            PrimitiveArray::from_option_iter([Some(10i32), Some(20), Some(30)]),
437            &mut ctx
438        );
439
440        let typed_value_expr = variant_get(
441            root(),
442            VariantPath::field("typed_value"),
443            Some(DType::Primitive(PType::I32, Nullability::NonNullable)),
444        );
445        let typed_value_result = variant
446            .into_array()
447            .apply(&typed_value_expr)?
448            .execute::<PrimitiveArray>(&mut array_session().create_execution_ctx())?;
449        assert_arrays_eq!(
450            typed_value_result,
451            PrimitiveArray::from_option_iter([Some(40i32), Some(50), Some(60)]),
452            &mut ctx
453        );
454        Ok(())
455    }
456}