vortex_array/arrays/struct_/compute/
to_arrow.rs

1use std::sync::Arc;
2
3use arrow_array::{ArrayRef, StructArray as ArrowStructArray};
4use arrow_schema::{DataType, Field, Fields};
5use itertools::Itertools;
6use vortex_error::{VortexResult, vortex_bail};
7
8use crate::Array;
9use crate::arrays::{StructArray, StructEncoding};
10use crate::compute::{ToArrowFn, to_arrow};
11use crate::variants::StructArrayTrait;
12
13impl ToArrowFn<&StructArray> for StructEncoding {
14    fn to_arrow(
15        &self,
16        array: &StructArray,
17        data_type: &DataType,
18    ) -> VortexResult<Option<ArrayRef>> {
19        let target_fields = match data_type {
20            DataType::Struct(fields) => fields,
21            _ => vortex_bail!("Unsupported data type: {data_type}"),
22        };
23
24        let field_arrays = target_fields
25            .iter()
26            .zip_eq(array.fields())
27            .map(|(field, arr)| {
28                // We check that the Vortex array nullability is compatible with the field
29                // nullability. In other words, make sure we don't return any nulls for a
30                // non-nullable field.
31                if arr.dtype().is_nullable() && !field.is_nullable() && !arr.all_valid()? {
32                    vortex_bail!(
33                        "Field {} is non-nullable but has nulls {}",
34                        field,
35                        arr.tree_display()
36                    );
37                }
38
39                to_arrow(arr, field.data_type()).map_err(|err| {
40                    err.with_context(format!("Failed to canonicalize field {}", field))
41                })
42            })
43            .collect::<VortexResult<Vec<_>>>()?;
44
45        let nulls = array.validity_mask()?.to_null_buffer();
46
47        if field_arrays.is_empty() {
48            Ok(Some(Arc::new(ArrowStructArray::new_empty_fields(
49                array.len(),
50                nulls,
51            ))))
52        } else {
53            let arrow_fields = array
54                .names()
55                .iter()
56                .zip(field_arrays.iter())
57                .zip(target_fields.iter())
58                .map(|((name, field_array), target_field)| {
59                    Field::new(
60                        &**name,
61                        field_array.data_type().clone(),
62                        target_field.is_nullable(),
63                    )
64                })
65                .map(Arc::new)
66                .collect::<Fields>();
67
68            Ok(Some(Arc::new(ArrowStructArray::try_new(
69                arrow_fields,
70                field_arrays,
71                nulls,
72            )?)))
73        }
74    }
75}
76
77#[cfg(test)]
78mod tests {
79    use vortex_buffer::buffer;
80    use vortex_dtype::FieldNames;
81
82    use super::*;
83    use crate::arrays::PrimitiveArray;
84    use crate::arrow::IntoArrowArray;
85    use crate::validity::Validity;
86
87    #[test]
88    fn nullable_non_null_to_arrow() {
89        let xs = PrimitiveArray::new(buffer![0i64, 1, 2, 3, 4], Validity::AllValid);
90
91        let struct_a = StructArray::try_new(
92            FieldNames::from(["xs".into()]),
93            vec![xs.into_array()],
94            5,
95            Validity::AllValid,
96        )
97        .unwrap();
98
99        let fields = vec![Field::new("xs", DataType::Int64, false)];
100        let arrow_dt = DataType::Struct(fields.into());
101
102        struct_a.into_array().into_arrow(&arrow_dt).unwrap();
103    }
104
105    #[test]
106    fn nullable_with_nulls_to_arrow() {
107        let xs =
108            PrimitiveArray::from_option_iter(vec![Some(0_i64), Some(1), Some(2), None, Some(3)]);
109
110        let struct_a = StructArray::try_new(
111            FieldNames::from(["xs".into()]),
112            vec![xs.into_array()],
113            5,
114            Validity::AllValid,
115        )
116        .unwrap();
117
118        let fields = vec![Field::new("xs", DataType::Int64, false)];
119        let arrow_dt = DataType::Struct(fields.into());
120
121        assert!(struct_a.into_array().into_arrow(&arrow_dt).is_err());
122    }
123}