vortex_array/arrow/
record_batch.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::RecordBatch;
5use arrow_array::cast::AsArray;
6use arrow_schema::DataType;
7use arrow_schema::Schema;
8use vortex_error::VortexError;
9use vortex_error::VortexResult;
10use vortex_error::vortex_bail;
11use vortex_error::vortex_ensure;
12
13use crate::Array;
14use crate::Canonical;
15use crate::arrays::StructArray;
16use crate::arrow::compute::to_arrow;
17use crate::arrow::compute::to_arrow_preferred;
18
19impl TryFrom<&dyn Array> for RecordBatch {
20    type Error = VortexError;
21
22    fn try_from(value: &dyn Array) -> VortexResult<Self> {
23        let Canonical::Struct(struct_array) = value.to_canonical() else {
24            vortex_bail!("RecordBatch can only be constructed from ")
25        };
26
27        vortex_ensure!(
28            struct_array.all_valid(),
29            "RecordBatch can only be constructed from StructArray with no nulls"
30        );
31
32        let array_ref = to_arrow_preferred(struct_array.as_ref())?;
33        Ok(RecordBatch::from(array_ref.as_struct()))
34    }
35}
36
37impl StructArray {
38    pub fn into_record_batch_with_schema(
39        self,
40        schema: impl AsRef<Schema>,
41    ) -> VortexResult<RecordBatch> {
42        let data_type = DataType::Struct(schema.as_ref().fields.clone());
43        let array_ref = to_arrow(self.as_ref(), &data_type)?;
44        Ok(RecordBatch::from(array_ref.as_struct()))
45    }
46}
47
48#[cfg(test)]
49mod tests {
50    use std::sync::Arc;
51
52    use arrow_schema::DataType;
53    use arrow_schema::Field;
54    use arrow_schema::FieldRef;
55    use arrow_schema::Schema;
56    use vortex_dtype::DType;
57    use vortex_dtype::Nullability;
58    use vortex_dtype::PType;
59    use vortex_scalar::Scalar;
60
61    use crate::arrays::StructArray;
62    use crate::builders::ArrayBuilder;
63    use crate::builders::ListBuilder;
64
65    #[test]
66    fn test_into_rb_with_schema() {
67        let mut xs = ListBuilder::<u32>::new(
68            Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)),
69            Nullability::Nullable,
70        );
71
72        xs.append_scalar(&Scalar::list(
73            xs.element_dtype().clone(),
74            vec![1i32.into(), 2i32.into(), 3i32.into()],
75            Nullability::Nullable,
76        ))
77        .unwrap();
78        xs.append_null();
79        xs.append_zero();
80
81        let xs = xs.finish();
82
83        let array = StructArray::from_fields(&[("xs", xs)]).unwrap();
84
85        // Explicitly request a conversion to LargeListView type instead of the preferred type.
86        let arrow_schema = Arc::new(Schema::new(vec![Field::new(
87            "xs",
88            DataType::LargeListView(FieldRef::new(Field::new_list_field(DataType::Int32, false))),
89            true,
90        )]));
91        let rb = array.into_record_batch_with_schema(arrow_schema).unwrap();
92
93        let xs = rb.column(0);
94        assert_eq!(
95            xs.data_type(),
96            &DataType::LargeListView(FieldRef::new(Field::new_list_field(DataType::Int32, false)))
97        );
98    }
99}