vortex_array/arrow/
datum.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_array::Array as ArrowArray;
5use arrow_array::ArrayRef as ArrowArrayRef;
6use arrow_array::Datum as ArrowDatum;
7use arrow_schema::DataType;
8use vortex_error::VortexResult;
9use vortex_error::vortex_panic;
10
11use crate::Array;
12use crate::ArrayRef;
13use crate::IntoArray;
14use crate::arrays::ConstantArray;
15use crate::arrow::FromArrowArray;
16use crate::arrow::IntoArrowArray;
17
18/// A wrapper around a generic Arrow array that can be used as a Datum in Arrow compute.
19#[derive(Debug)]
20pub struct Datum {
21    array: ArrowArrayRef,
22    is_scalar: bool,
23}
24
25impl Datum {
26    /// Create a new [`Datum`] from an [`ArrayRef`], which can then be passed to Arrow compute.
27    pub fn try_new(array: &dyn Array) -> VortexResult<Self> {
28        if array.is_constant() {
29            Ok(Self {
30                array: array.slice(0..1).into_arrow_preferred()?,
31                is_scalar: true,
32            })
33        } else {
34            Ok(Self {
35                array: array.to_array().into_arrow_preferred()?,
36                is_scalar: false,
37            })
38        }
39    }
40
41    /// Create a new [`Datum`] from an [`Array`], which can then be passed to Arrow compute.
42    /// This not try and convert the array to a scalar if it is constant.
43    pub fn try_new_array(array: &dyn Array) -> VortexResult<Self> {
44        Ok(Self {
45            array: array.to_array().into_arrow_preferred()?,
46            is_scalar: false,
47        })
48    }
49
50    pub fn try_new_with_target_datatype(
51        array: &dyn Array,
52        target_datatype: &DataType,
53    ) -> VortexResult<Self> {
54        if array.is_constant() {
55            Ok(Self {
56                array: array.slice(0..1).into_arrow(target_datatype)?,
57                is_scalar: true,
58            })
59        } else {
60            Ok(Self {
61                array: array.to_array().into_arrow(target_datatype)?,
62                is_scalar: false,
63            })
64        }
65    }
66
67    pub fn data_type(&self) -> &DataType {
68        self.array.data_type()
69    }
70}
71
72impl ArrowDatum for Datum {
73    fn get(&self) -> (&dyn ArrowArray, bool) {
74        (&self.array, self.is_scalar)
75    }
76}
77
78/// Convert an Arrow array to an Array with a specific length.
79/// This is useful for compute functions that delegate to Arrow using [Datum],
80/// which will return a scalar (length 1 Arrow array) if the input array is constant.
81///
82/// # Error
83///
84/// The provided array must have length
85pub fn from_arrow_array_with_len<A>(array: A, len: usize, nullable: bool) -> ArrayRef
86where
87    ArrayRef: FromArrowArray<A>,
88{
89    let array = ArrayRef::from_arrow(array, nullable);
90    if array.len() == len {
91        return array;
92    }
93
94    if array.len() != 1 {
95        vortex_panic!(
96            "Array length mismatch, expected {} got {} for encoding {}",
97            len,
98            array.len(),
99            array.encoding_id()
100        );
101    }
102
103    ConstantArray::new(array.scalar_at(0), len).into_array()
104}