Skip to main content

vortex_array/arrays/variant/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod vtable;
5
6pub(crate) mod compute;
7
8use vortex_error::VortexExpect;
9use vortex_error::VortexResult;
10use vortex_error::vortex_ensure;
11
12pub use self::vtable::Variant;
13pub use self::vtable::VariantArray;
14use crate::ArrayRef;
15use crate::array::Array;
16use crate::array::ArrayParts;
17use crate::array::EmptyArrayData;
18use crate::array::TypedArrayRef;
19use crate::dtype::DType;
20
21pub(super) const CORE_STORAGE_SLOT: usize = 0;
22pub(super) const SHREDDED_SLOT: usize = 1;
23pub(super) const NUM_SLOTS: usize = 2;
24pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["core_storage", "shredded"];
25
26/// Accessors for canonical variant storage.
27///
28/// A canonical variant array keeps the full variant value for every row in `core_storage` and may
29/// carry a row-aligned, storage-agnostic `shredded` typed tree for selected paths.
30///
31/// `core_storage` is a logical `DType::Variant` array, not a specific physical encoding: it may be
32/// chunked, constant, or otherwise encoded. Callers must use normal array operations instead of
33/// assuming a particular slot layout. The shredded child may have any dtype; its dtype is recorded
34/// during serialization and validated by normal child deserialization.
35pub trait VariantArrayExt: TypedArrayRef<Variant> {
36    /// Returns the logical variant storage that preserves the full value for every row.
37    fn core_storage(&self) -> &ArrayRef {
38        self.as_ref().slots()[CORE_STORAGE_SLOT]
39            .as_ref()
40            .vortex_expect("validated variant core_storage slot")
41    }
42
43    /// Returns the optional row-aligned typed shredded tree for selected variant paths.
44    /// This functions returns `Some` only if the array was canonicalized and the shredded data
45    /// was pulled out of the underlying variant storage.
46    fn shredded(&self) -> Option<&ArrayRef> {
47        self.as_ref().slots()[SHREDDED_SLOT].as_ref()
48    }
49}
50impl<T: TypedArrayRef<Variant>> VariantArrayExt for T {}
51
52impl Array<Variant> {
53    /// Creates a new `VariantArray` with logical variant core storage and optional shredded storage.
54    ///
55    /// `core_storage` must have `DType::Variant`, but it may use any Variant-typed physical
56    /// encoding. See [`VariantArrayExt`] for the higher-level storage contract.
57    ///
58    /// `shredded`, when present, must be row-aligned with `core_storage` and stores typed values for
59    /// selected variant paths.
60    pub fn try_new(core_storage: ArrayRef, shredded: Option<ArrayRef>) -> VortexResult<Self> {
61        let dtype = core_storage.dtype().clone();
62        vortex_ensure!(
63            matches!(dtype, DType::Variant(_)),
64            "VariantArray core_storage dtype must be Variant, found {dtype}"
65        );
66        let len = core_storage.len();
67        let stats = core_storage.statistics().to_owned();
68        Ok(Array::try_from_parts(
69            ArrayParts::new(Variant, dtype, len, EmptyArrayData)
70                .with_slots(vec![Some(core_storage), shredded].into()),
71        )?
72        .with_stats_set(stats))
73    }
74}
75
76#[cfg(test)]
77mod tests {
78    use vortex_buffer::buffer;
79    use vortex_error::VortexResult;
80    use vortex_error::vortex_err;
81    use vortex_mask::Mask;
82
83    use crate::ArrayRef;
84    use crate::Canonical;
85    use crate::IntoArray;
86    use crate::LEGACY_SESSION;
87    use crate::VortexSessionExecute;
88    use crate::arrays::BoolArray;
89    use crate::arrays::ChunkedArray;
90    use crate::arrays::ConstantArray;
91    use crate::arrays::PrimitiveArray;
92    use crate::arrays::StructArray;
93    use crate::arrays::VariantArray;
94    use crate::arrays::variant::VariantArrayExt;
95    use crate::assert_arrays_eq;
96    use crate::builtins::ArrayBuiltins;
97    use crate::dtype::DType;
98    use crate::dtype::Nullability;
99    use crate::dtype::PType;
100    use crate::expr::root;
101    use crate::expr::variant_get;
102    use crate::scalar::Scalar;
103    use crate::scalar_fn::fns::variant_get::VariantPath;
104
105    fn core_storage(len: usize) -> ArrayRef {
106        ConstantArray::new(
107            Scalar::variant(Scalar::primitive(1i32, Nullability::NonNullable)),
108            len,
109        )
110        .into_array()
111    }
112
113    fn row_storage(values: impl IntoIterator<Item = i32>) -> VortexResult<ArrayRef> {
114        let chunks = values
115            .into_iter()
116            .map(|value| {
117                ConstantArray::new(
118                    Scalar::variant(Scalar::primitive(value, Nullability::NonNullable)),
119                    1,
120                )
121                .into_array()
122            })
123            .collect();
124
125        Ok(ChunkedArray::try_new(chunks, DType::Variant(Nullability::NonNullable))?.into_array())
126    }
127
128    fn variant_with_shredded(
129        core_values: impl IntoIterator<Item = i32>,
130        shredded_values: impl IntoIterator<Item = i32>,
131    ) -> VortexResult<VariantArray> {
132        VariantArray::try_new(
133            row_storage(core_values)?,
134            Some(PrimitiveArray::from_iter(shredded_values).into_array()),
135        )
136    }
137
138    fn execute_variant(array: ArrayRef) -> VortexResult<VariantArray> {
139        let mut ctx = LEGACY_SESSION.create_execution_ctx();
140        let Canonical::Variant(variant) = array.execute::<Canonical>(&mut ctx)? else {
141            return Err(vortex_err!("expected canonical variant array"));
142        };
143        Ok(variant)
144    }
145
146    fn assert_variant_rows(
147        array: &VariantArray,
148        expected_core: &[Option<i32>],
149        expected_shredded: &[Option<i32>],
150    ) -> VortexResult<()> {
151        assert_variant_core_rows(array, expected_core)?;
152        assert_eq!(array.len(), expected_shredded.len());
153
154        let shredded = array
155            .shredded()
156            .ok_or_else(|| vortex_err!("expected shredded child"))?;
157        let mut ctx = LEGACY_SESSION.create_execution_ctx();
158        let shredded = shredded.clone().execute::<PrimitiveArray>(&mut ctx)?;
159        let expected_shredded_array = if let Some(values) = expected_shredded
160            .iter()
161            .copied()
162            .collect::<Option<Vec<_>>>()
163        {
164            PrimitiveArray::from_iter(values)
165        } else {
166            PrimitiveArray::from_option_iter(expected_shredded.iter().copied())
167        };
168        assert_arrays_eq!(shredded, expected_shredded_array);
169
170        Ok(())
171    }
172
173    fn assert_variant_core_rows(
174        array: &VariantArray,
175        expected_core: &[Option<i32>],
176    ) -> VortexResult<()> {
177        assert_eq!(array.len(), expected_core.len());
178
179        let mut ctx = LEGACY_SESSION.create_execution_ctx();
180        for (idx, expected) in expected_core.iter().enumerate() {
181            let scalar = array.core_storage().execute_scalar(idx, &mut ctx)?;
182            let variant = scalar.as_variant();
183            match expected {
184                Some(expected) => {
185                    let value = variant
186                        .value()
187                        .ok_or_else(|| vortex_err!("expected non-null variant row"))?;
188                    assert_eq!(value.as_primitive().typed_value::<i32>(), Some(*expected));
189                }
190                None => assert!(variant.is_null()),
191            }
192        }
193
194        Ok(())
195    }
196
197    #[test]
198    fn try_new_exposes_core_storage_without_shredded() -> VortexResult<()> {
199        let core_storage = core_storage(2);
200
201        let variant = VariantArray::try_new(core_storage.clone(), None)?;
202
203        assert_eq!(variant.dtype(), core_storage.dtype());
204        assert_eq!(variant.len(), 2);
205        assert_eq!(variant.core_storage().dtype(), core_storage.dtype());
206        assert!(variant.shredded().is_none());
207
208        Ok(())
209    }
210
211    #[test]
212    fn try_new_exposes_core_storage_and_shredded() -> VortexResult<()> {
213        let core_storage = core_storage(3);
214        let shredded = buffer![10i32, 20, 30].into_array();
215
216        let variant = VariantArray::try_new(core_storage.clone(), Some(shredded.clone()))?;
217
218        assert_eq!(variant.dtype(), &DType::Variant(Nullability::NonNullable));
219        assert_eq!(variant.len(), 3);
220        assert_eq!(variant.core_storage().dtype(), core_storage.dtype());
221        assert_eq!(variant.core_storage().len(), core_storage.len());
222        assert_eq!(
223            variant.shredded().map(|child| child.dtype()),
224            Some(shredded.dtype())
225        );
226        assert_eq!(
227            variant.shredded().map(|child| child.len()),
228            Some(shredded.len())
229        );
230        assert_eq!(variant.as_ref().slot_name(0), "core_storage");
231        assert_eq!(variant.as_ref().slot_name(1), "shredded");
232
233        Ok(())
234    }
235
236    #[test]
237    fn try_new_rejects_non_variant_core_storage() {
238        let core_storage = PrimitiveArray::from_iter([1i32, 2, 3]).into_array();
239
240        assert!(VariantArray::try_new(core_storage, None).is_err());
241    }
242
243    #[test]
244    fn try_new_rejects_shredded_length_mismatch() {
245        let core_storage = core_storage(3);
246        let shredded = buffer![10i32, 20].into_array();
247
248        assert!(VariantArray::try_new(core_storage, Some(shredded)).is_err());
249    }
250
251    #[test]
252    fn scalar_at_merges_shredded_with_core_storage() -> VortexResult<()> {
253        let dtype = DType::Variant(Nullability::Nullable);
254        let core_chunks = [Some(1i32), None, Some(3)]
255            .into_iter()
256            .map(|value| {
257                let scalar = match value {
258                    Some(value) => {
259                        Scalar::variant(Scalar::primitive(value, Nullability::NonNullable))
260                            .cast(&dtype)?
261                    }
262                    None => Scalar::null(dtype.clone()),
263                };
264                Ok(ConstantArray::new(scalar, 1).into_array())
265            })
266            .collect::<VortexResult<Vec<_>>>()?;
267        let core_storage = ChunkedArray::try_new(core_chunks, dtype)?.into_array();
268        let shredded = PrimitiveArray::from_option_iter([Some(10i32), Some(20), None]).into_array();
269        let variant = VariantArray::try_new(core_storage, Some(shredded))?;
270
271        let mut ctx = LEGACY_SESSION.create_execution_ctx();
272        for (idx, expected) in [Some(10i32), None, Some(3)].into_iter().enumerate() {
273            let scalar = variant.execute_scalar(idx, &mut ctx)?;
274            let variant = scalar.as_variant();
275            match expected {
276                Some(expected) => {
277                    let value = variant
278                        .value()
279                        .ok_or_else(|| vortex_err!("expected non-null variant row"))?;
280                    assert_eq!(value.as_primitive().typed_value::<i32>(), Some(expected));
281                }
282                None => assert!(variant.is_null()),
283            }
284        }
285
286        Ok(())
287    }
288
289    #[test]
290    fn slice_preserves_core_storage_and_shredded_rows() -> VortexResult<()> {
291        let variant = variant_with_shredded(0..5, 10..15)?;
292
293        let sliced = execute_variant(variant.into_array().slice(1..4)?)?;
294
295        assert_variant_rows(
296            &sliced,
297            &[Some(1), Some(2), Some(3)],
298            &[Some(11), Some(12), Some(13)],
299        )
300    }
301
302    #[test]
303    fn filter_preserves_core_storage_and_shredded_rows() -> VortexResult<()> {
304        let variant = variant_with_shredded(0..5, 10..15)?;
305
306        let filtered = execute_variant(
307            variant
308                .into_array()
309                .filter(Mask::from_iter([true, false, true, false, true]))?,
310        )?;
311
312        assert_variant_rows(
313            &filtered,
314            &[Some(0), Some(2), Some(4)],
315            &[Some(10), Some(12), Some(14)],
316        )
317    }
318
319    #[test]
320    fn take_preserves_core_storage_and_shredded_rows() -> VortexResult<()> {
321        let variant = variant_with_shredded(0..5, 10..15)?;
322
323        let taken = execute_variant(
324            variant
325                .into_array()
326                .take(buffer![4u64, 1, 3].into_array())?,
327        )?;
328
329        assert_variant_rows(
330            &taken,
331            &[Some(4), Some(1), Some(3)],
332            &[Some(14), Some(11), Some(13)],
333        )
334    }
335
336    #[test]
337    fn mask_preserves_core_storage_and_shredded_rows() -> VortexResult<()> {
338        let variant = variant_with_shredded(0..5, 10..15)?;
339        let mask = BoolArray::from_iter([true, false, true, false, true]).into_array();
340
341        let masked = execute_variant(variant.into_array().mask(mask)?)?;
342
343        assert_variant_rows(
344            &masked,
345            &[Some(0), None, Some(2), None, Some(4)],
346            &[Some(10), None, Some(12), None, Some(14)],
347        )
348    }
349
350    #[test]
351    fn mask_preserves_chunked_core_storage_validity() -> VortexResult<()> {
352        let dtype = DType::Variant(Nullability::Nullable);
353        let core_chunks = [Some(1i32), None, Some(3), Some(4)]
354            .into_iter()
355            .map(|value| {
356                let scalar = match value {
357                    Some(value) => {
358                        Scalar::variant(Scalar::primitive(value, Nullability::NonNullable))
359                            .cast(&dtype)?
360                    }
361                    None => Scalar::null(dtype.clone()),
362                };
363                Ok(ConstantArray::new(scalar, 1).into_array())
364            })
365            .collect::<VortexResult<Vec<_>>>()?;
366        let core_storage = ChunkedArray::try_new(core_chunks, dtype)?.into_array();
367        let variant = VariantArray::try_new(core_storage, None)?;
368        let mask = BoolArray::from_iter([true, true, false, true]).into_array();
369
370        let masked = execute_variant(variant.into_array().mask(mask)?)?;
371
372        assert_variant_core_rows(&masked, &[Some(1), None, None, Some(4)])
373    }
374
375    #[test]
376    fn variant_get_keeps_valid_shredded_rows_for_matching_dtype() -> VortexResult<()> {
377        let core_storage = row_storage([1, 2, 3])?;
378        let shredded = StructArray::try_from_iter([(
379            "a",
380            PrimitiveArray::from_iter([10i32, 20, 30]).into_array(),
381        )])?;
382        let variant = VariantArray::try_new(core_storage, Some(shredded.into_array()))?;
383        let expr = variant_get(
384            root(),
385            VariantPath::field("a"),
386            Some(DType::Primitive(PType::I32, Nullability::NonNullable)),
387        );
388
389        let result = variant
390            .into_array()
391            .apply(&expr)?
392            .execute::<PrimitiveArray>(&mut LEGACY_SESSION.create_execution_ctx())?;
393
394        assert_arrays_eq!(
395            result,
396            PrimitiveArray::from_option_iter([Some(10i32), Some(20), Some(30)])
397        );
398        Ok(())
399    }
400
401    #[test]
402    fn variant_get_treats_value_and_typed_value_as_logical_field_names() -> VortexResult<()> {
403        let core_storage = row_storage([1, 2, 3])?;
404        let shredded = StructArray::try_from_iter([
405            (
406                "value",
407                PrimitiveArray::from_iter([10i32, 20, 30]).into_array(),
408            ),
409            (
410                "typed_value",
411                PrimitiveArray::from_iter([40i32, 50, 60]).into_array(),
412            ),
413        ])?;
414        let variant = VariantArray::try_new(core_storage, Some(shredded.into_array()))?;
415
416        let value_expr = variant_get(
417            root(),
418            VariantPath::field("value"),
419            Some(DType::Primitive(PType::I32, Nullability::NonNullable)),
420        );
421        let value_result = variant
422            .clone()
423            .into_array()
424            .apply(&value_expr)?
425            .execute::<PrimitiveArray>(&mut LEGACY_SESSION.create_execution_ctx())?;
426        assert_arrays_eq!(
427            value_result,
428            PrimitiveArray::from_option_iter([Some(10i32), Some(20), Some(30)])
429        );
430
431        let typed_value_expr = variant_get(
432            root(),
433            VariantPath::field("typed_value"),
434            Some(DType::Primitive(PType::I32, Nullability::NonNullable)),
435        );
436        let typed_value_result = variant
437            .into_array()
438            .apply(&typed_value_expr)?
439            .execute::<PrimitiveArray>(&mut LEGACY_SESSION.create_execution_ctx())?;
440        assert_arrays_eq!(
441            typed_value_result,
442            PrimitiveArray::from_option_iter([Some(40i32), Some(50), Some(60)])
443        );
444        Ok(())
445    }
446}