vortex_array/compute/
to_arrow.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
use arrow_array::{Array as ArrowArray, ArrayRef};
use arrow_schema::DataType;
use vortex_error::{vortex_err, VortexError, VortexExpect, VortexResult};

use crate::arrow::infer_data_type;
use crate::encoding::Encoding;
use crate::{Array, IntoArray, IntoCanonical};

/// Trait for Arrow conversion compute function.
pub trait ToArrowFn<A> {
    /// Return the preferred Arrow [`DataType`] of the encoding, or None of the canonical
    /// [`DataType`] for the array's Vortex [`vortex_dtype::DType`] should be used.
    fn preferred_arrow_data_type(&self, _array: &A) -> VortexResult<Option<DataType>> {
        Ok(None)
    }

    /// Convert the array to an Arrow array of the given type.
    ///
    /// Implementation can return None if the conversion cannot be specialized by this encoding.
    /// In this case, the default conversion via `into_canonical` will be used.
    fn to_arrow(&self, array: &A, data_type: &DataType) -> VortexResult<Option<ArrayRef>>;
}

impl<E: Encoding> ToArrowFn<Array> for E
where
    E: ToArrowFn<E::Array>,
    for<'a> &'a E::Array: TryFrom<&'a Array, Error = VortexError>,
{
    fn preferred_arrow_data_type(&self, array: &Array) -> VortexResult<Option<DataType>> {
        let (array_ref, encoding) = array.try_downcast_ref::<E>()?;
        ToArrowFn::preferred_arrow_data_type(encoding, array_ref)
    }

    fn to_arrow(&self, array: &Array, data_type: &DataType) -> VortexResult<Option<ArrayRef>> {
        let (array_ref, encoding) = array.try_downcast_ref::<E>()?;
        ToArrowFn::to_arrow(encoding, array_ref, data_type)
    }
}

/// Return the preferred Arrow [`DataType`] of the array.
pub fn preferred_arrow_data_type<A: AsRef<Array>>(array: A) -> VortexResult<DataType> {
    let array = array.as_ref();

    if let Some(result) = array
        .vtable()
        .to_arrow_fn()
        .and_then(|f| f.preferred_arrow_data_type(array).transpose())
        .transpose()?
    {
        return Ok(result);
    }

    // Otherwise, we use the default.
    infer_data_type(array.dtype())
}

/// Convert the array to an Arrow array of the given type.
pub fn to_arrow<A: AsRef<Array>>(array: A, data_type: &DataType) -> VortexResult<ArrayRef> {
    let array = array.as_ref();

    if let Some(result) = array
        .vtable()
        .to_arrow_fn()
        .and_then(|f| f.to_arrow(array, data_type).transpose())
        .transpose()?
    {
        assert_eq!(
            result.data_type(),
            data_type,
            "ToArrowFn returned wrong data type"
        );
        return Ok(result);
    }

    // Fall back to canonicalizing and then converting.
    let array = array.clone().into_canonical()?.into_array();
    array
        .vtable()
        .to_arrow_fn()
        .vortex_expect("Canonical encodings must implement ToArrowFn")
        .to_arrow(&array, data_type)?
        .ok_or_else(|| {
            vortex_err!(
                "Failed to convert array {} to Arrow {}",
                array.encoding(),
                data_type
            )
        })
}