Skip to main content

vortex_array/arrow/
datum.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::Array as ArrowArray;
5use arrow_array::ArrayRef as ArrowArrayRef;
6use arrow_array::Datum as ArrowDatum;
7use arrow_schema::DataType;
8use vortex_error::VortexExpect;
9use vortex_error::VortexResult;
10use vortex_error::vortex_panic;
11
12use crate::ArrayRef;
13use crate::IntoArray;
14use crate::LEGACY_SESSION;
15use crate::VortexSessionExecute;
16use crate::arrays::Constant;
17use crate::arrays::ConstantArray;
18use crate::arrow::FromArrowArray;
19use crate::arrow::IntoArrowArray;
20
21/// A wrapper around a generic Arrow array that can be used as a Datum in Arrow compute.
22#[derive(Debug)]
23pub struct Datum {
24    array: ArrowArrayRef,
25    is_scalar: bool,
26}
27
28impl Datum {
29    /// Create a new [`Datum`] from an [`ArrayRef`], which can then be passed to Arrow compute.
30    pub fn try_new(array: &ArrayRef) -> VortexResult<Self> {
31        if array.is::<Constant>() {
32            Ok(Self {
33                array: array.slice(0..1)?.into_arrow_preferred()?,
34                is_scalar: true,
35            })
36        } else {
37            Ok(Self {
38                array: array.clone().into_arrow_preferred()?,
39                is_scalar: false,
40            })
41        }
42    }
43
44    /// Create a new [`Datum`] from an `DynArray`, which can then be passed to Arrow compute.
45    /// This not try and convert the array to a scalar if it is constant.
46    pub fn try_new_array(array: &ArrayRef) -> VortexResult<Self> {
47        Ok(Self {
48            array: array.clone().into_arrow_preferred()?,
49            is_scalar: false,
50        })
51    }
52
53    pub fn try_new_with_target_datatype(
54        array: &ArrayRef,
55        target_datatype: &DataType,
56    ) -> VortexResult<Self> {
57        if array.is::<Constant>() {
58            Ok(Self {
59                array: array.slice(0..1)?.into_arrow(target_datatype)?,
60                is_scalar: true,
61            })
62        } else {
63            Ok(Self {
64                array: array.clone().into_arrow(target_datatype)?,
65                is_scalar: false,
66            })
67        }
68    }
69
70    pub fn data_type(&self) -> &DataType {
71        self.array.data_type()
72    }
73}
74
75impl ArrowDatum for Datum {
76    fn get(&self) -> (&dyn ArrowArray, bool) {
77        (&self.array, self.is_scalar)
78    }
79}
80
81/// Convert an Arrow array to an Array with a specific length.
82/// This is useful for compute functions that delegate to Arrow using [Datum],
83/// which will return a scalar (length 1 Arrow array) if the input array is constant.
84///
85/// # Error
86///
87/// The provided array must have length
88pub fn from_arrow_array_with_len<A>(array: A, len: usize, nullable: bool) -> VortexResult<ArrayRef>
89where
90    ArrayRef: FromArrowArray<A>,
91{
92    let array = ArrayRef::from_arrow(array, nullable)?;
93    if array.len() == len {
94        return Ok(array);
95    }
96
97    if array.len() != 1 {
98        vortex_panic!(
99            "Array length mismatch, expected {} got {} for encoding {}",
100            len,
101            array.len(),
102            array.encoding_id()
103        );
104    }
105
106    Ok(ConstantArray::new(
107        array
108            .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
109            .vortex_expect("array of length 1 must support execute_scalar(0)"),
110        len,
111    )
112    .into_array())
113}