Skip to main content

vortex_array/arrow/
record_batch.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::RecordBatch;
5use arrow_array::cast::AsArray;
6use arrow_schema::DataType;
7use arrow_schema::Schema;
8use vortex_error::VortexError;
9use vortex_error::VortexResult;
10use vortex_error::vortex_bail;
11use vortex_error::vortex_ensure;
12
13use crate::ArrayRef;
14use crate::Canonical;
15use crate::LEGACY_SESSION;
16use crate::VortexSessionExecute;
17use crate::array::IntoArray;
18use crate::arrays::StructArray;
19use crate::arrow::ArrowArrayExecutor;
20use crate::validity::Validity;
21
22// deprecated(note = "Use ArrowArrayExecutor::execute_record_batch instead")
23impl TryFrom<&ArrayRef> for RecordBatch {
24    type Error = VortexError;
25
26    fn try_from(value: &ArrayRef) -> VortexResult<Self> {
27        let Canonical::Struct(struct_array) = value.to_canonical()? else {
28            vortex_bail!("RecordBatch can only be constructed from ")
29        };
30
31        vortex_ensure!(
32            matches!(struct_array.validity()?, Validity::AllValid),
33            "RecordBatch can only be constructed from StructArray with no nulls"
34        );
35
36        let data_type = struct_array.dtype().to_arrow_dtype()?;
37        let array_ref = struct_array
38            .into_array()
39            .execute_arrow(Some(&data_type), &mut LEGACY_SESSION.create_execution_ctx())?;
40        Ok(RecordBatch::from(array_ref.as_struct()))
41    }
42}
43
44impl StructArray {
45    pub fn into_record_batch_with_schema(
46        self,
47        schema: impl AsRef<Schema>,
48    ) -> VortexResult<RecordBatch> {
49        let data_type = DataType::Struct(schema.as_ref().fields.clone());
50        let array_ref = self
51            .into_array()
52            .execute_arrow(Some(&data_type), &mut LEGACY_SESSION.create_execution_ctx())?;
53        Ok(RecordBatch::from(array_ref.as_struct()))
54    }
55}
56
57#[cfg(test)]
58mod tests {
59    use std::sync::Arc;
60
61    use arrow_schema::DataType;
62    use arrow_schema::Field;
63    use arrow_schema::FieldRef;
64    use arrow_schema::Schema;
65
66    use crate::arrow::record_batch::StructArray;
67    use crate::builders::ArrayBuilder;
68    use crate::builders::ListBuilder;
69    use crate::dtype::DType;
70    use crate::dtype::Nullability;
71    use crate::dtype::PType;
72    use crate::scalar::Scalar;
73
74    #[test]
75    fn test_into_rb_with_schema() {
76        let mut xs = ListBuilder::<u32>::new(
77            Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)),
78            Nullability::Nullable,
79        );
80
81        xs.append_scalar(&Scalar::list(
82            xs.element_dtype().clone(),
83            vec![1i32.into(), 2i32.into(), 3i32.into()],
84            Nullability::Nullable,
85        ))
86        .unwrap();
87        xs.append_null();
88        xs.append_zero();
89
90        let xs = xs.finish();
91
92        let array = StructArray::from_fields(&[("xs", xs)]).unwrap();
93
94        // Explicitly request a conversion to LargeListView type instead of the preferred type.
95        let arrow_schema = Arc::new(Schema::new(vec![Field::new(
96            "xs",
97            DataType::LargeListView(FieldRef::new(Field::new_list_field(DataType::Int32, false))),
98            true,
99        )]));
100        let rb = array.into_record_batch_with_schema(arrow_schema).unwrap();
101
102        let xs = rb.column(0);
103        assert_eq!(
104            xs.data_type(),
105            &DataType::LargeListView(FieldRef::new(Field::new_list_field(DataType::Int32, false)))
106        );
107    }
108}