macro_rules! bail_unhandled_arrow_conversion_dtype_pair {
($input_pl_dtype:expr, $output_arrow_field:expr) => {{
return Err(
$crate::series::arrow_export::unhandled_arrow_conversion_dtype_pair_err(
$input_pl_dtype,
$output_arrow_field,
),
);
}};
}
#[cfg(feature = "dtype-categorical")]
pub mod categorical;
use std::borrow::Cow;
use std::sync::Arc;
use polars_compute::cast::cast_unchecked;
use polars_error::{PolarsError, PolarsResult, polars_ensure, polars_err};
use crate::prelude::{
Array, ArrayRef, ArrowDataType, ArrowField, BinaryViewArray, CompatLevel, DataType, ListArray,
PlSmallStr, PrimitiveArray, Series,
};
fn unhandled_arrow_conversion_dtype_pair_err(
input_pl_dtype: &DataType,
output_arrow_field: &ArrowField,
) -> PolarsError {
polars_err!(
InvalidOperation:
"to_arrow() conversion failed: cannot convert \
({input_pl_dtype:?}) to ({output_arrow_field:?})",
)
}
macro_rules! primitive_to_boxed_with_logical {
($array:expr, $physical:ty, $logical_arrow_dtype:expr) => {{
let arr: &PrimitiveArray<$physical> = $array.as_any().downcast_ref().unwrap();
arr.clone().to($logical_arrow_dtype).to_boxed()
}};
}
fn ensure_no_nulls(array: &dyn Array) -> PolarsResult<()> {
polars_ensure!(
!array.has_nulls(),
SchemaMismatch:
"to_arrow() conversion failed: nullable is false but array contained {} NULLs (arrow dtype: {:?})",
array.null_count(), array.dtype(),
);
Ok(())
}
impl Series {
pub fn to_arrow(&self, chunk_idx: usize, compat_level: CompatLevel) -> ArrayRef {
self.to_arrow_with_field(
chunk_idx,
Cow::Owned(
self.dtype()
.to_arrow_field(self.name().clone(), compat_level),
),
true,
)
.unwrap()
}
pub fn to_arrow_with_field<'a>(
&self,
chunk_idx: usize,
output_arrow_field: Cow<'a, ArrowField>,
skip_attach_pl_metadata: bool,
) -> PolarsResult<ArrayRef> {
ToArrowConverter {
skip_attach_pl_metadata,
#[cfg(feature = "dtype-categorical")]
categorical_converter: {
let mut categorical_converter =
crate::series::arrow_export::categorical::CategoricalToArrowConverter {
converters: Default::default(),
persist_remap: false,
};
categorical_converter.initialize(self.dtype());
categorical_converter
},
}
.array_to_arrow(
self.chunks().get(chunk_idx).unwrap().as_ref(),
self.dtype(),
output_arrow_field,
)
}
}
pub struct ToArrowConverter {
pub skip_attach_pl_metadata: bool,
#[cfg(feature = "dtype-categorical")]
pub categorical_converter:
crate::series::arrow_export::categorical::CategoricalToArrowConverter,
}
impl ToArrowConverter {
pub fn array_to_arrow<'a>(
&mut self,
array: &dyn Array,
dtype: &DataType,
arrow_field: Cow<'a, ArrowField>,
) -> PolarsResult<Box<dyn Array>> {
let nullable = arrow_field.is_nullable;
let out = self.array_to_arrow_impl(array, dtype, arrow_field)?;
if !nullable {
ensure_no_nulls(array)?
}
Ok(out)
}
fn array_to_arrow_impl<'a>(
&mut self,
array: &dyn Array,
polars_dtype: &DataType,
arrow_field: Cow<'a, ArrowField>,
) -> PolarsResult<Box<dyn Array>> {
Ok(match (polars_dtype, arrow_field.dtype()) {
#[cfg(feature = "dtype-struct")]
(DataType::Struct(struct_fields), ArrowDataType::Struct(arrow_struct_fields)) => {
use arrow::array::StructArray;
let arr: &StructArray = array.as_any().downcast_ref().unwrap();
polars_ensure!(
arrow_struct_fields.len() == arr.fields().len()
&& arrow_struct_fields
.iter()
.zip(arr.fields())
.all(|(l, r)| l.name() == r.name()),
SchemaMismatch:
"to_arrow() conversion failed: struct field names mismatch: {:?} != expected: {:?}",
arrow_field.dtype(), arr.dtype()
);
let mut arrow_dtype = to_owned_dtype(arrow_field);
let ArrowDataType::Struct(arrow_struct_fields) = &mut arrow_dtype else {
unreachable!()
};
self.attach_pl_field_metadata(
struct_fields
.iter()
.map(|x| x.dtype())
.zip(arrow_struct_fields.iter_mut()),
);
let values: Vec<ArrayRef> = arr
.values()
.iter()
.zip(struct_fields.iter())
.zip(arrow_struct_fields.iter())
.map(|((values, pl_field), arrow_field)| {
self.array_to_arrow(
values.as_ref(),
pl_field.dtype(),
Cow::Borrowed(arrow_field),
)
})
.collect::<PolarsResult<_>>()?;
let arr =
StructArray::try_new(arrow_dtype, arr.len(), values, arr.validity().cloned())?;
Box::new(arr)
},
(DataType::List(item_dtype), ArrowDataType::LargeList(_)) => {
let arr: &ListArray<i64> = array.as_any().downcast_ref().unwrap();
let mut arrow_dtype = to_owned_dtype(arrow_field);
let ArrowDataType::LargeList(arrow_item_field) = &mut arrow_dtype else {
unreachable!()
};
self.attach_pl_field_metadata(std::iter::once((
item_dtype.as_ref(),
arrow_item_field.as_mut(),
)));
let new_values = self.array_to_arrow(
arr.values().as_ref(),
item_dtype,
Cow::Borrowed(arrow_item_field.as_ref()),
)?;
let arr = ListArray::<i64>::new(
arrow_dtype,
arr.offsets().clone(),
new_values,
arr.validity().cloned(),
);
Box::new(arr)
},
#[cfg(feature = "dtype-array")]
(DataType::Array(item_dtype, width), ArrowDataType::FixedSizeList(_, arrow_width)) => {
use arrow::array::FixedSizeListArray;
let arr: &FixedSizeListArray = array.as_any().downcast_ref().unwrap();
polars_ensure!(
*arrow_width == *width,
SchemaMismatch:
"to_arrow() conversion failed: fixed-size list width mismatch \
({arrow_width:?} != expected: {width:?})"
);
let mut arrow_dtype = to_owned_dtype(arrow_field);
let ArrowDataType::FixedSizeList(arrow_item_field, _) = &mut arrow_dtype else {
unreachable!()
};
self.attach_pl_field_metadata(std::iter::once((
item_dtype.as_ref(),
arrow_item_field.as_mut(),
)));
let new_values = self.array_to_arrow(
arr.values().as_ref(),
item_dtype,
Cow::Borrowed(arrow_item_field.as_ref()),
)?;
let arr = FixedSizeListArray::new(
arrow_dtype,
arr.len(),
new_values,
arr.validity().cloned(),
);
Box::new(arr)
},
#[cfg(feature = "dtype-categorical")]
(DataType::Categorical(_, _) | DataType::Enum(_, _), _) => {
self.categorical_converter.array_to_arrow(
array,
polars_dtype,
arrow_field.as_ref(),
)?
},
#[cfg(feature = "dtype-date")]
(DataType::Date, ArrowDataType::Date32) => {
primitive_to_boxed_with_logical!(array, i32, ArrowDataType::Date32)
},
#[cfg(feature = "dtype-datetime")]
(DataType::Datetime(tu, tz), ArrowDataType::Timestamp(atu, atz)) => {
use crate::prelude::TimeZone;
let matching = atu == &tu.to_arrow()
&& TimeZone::eq_none_as_utc(
TimeZone::opt_try_new(atz.clone())?.as_ref(),
tz.as_ref(),
);
if !matching {
bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
}
primitive_to_boxed_with_logical!(array, i64, to_owned_dtype(arrow_field))
},
#[cfg(feature = "dtype-duration")]
(DataType::Duration(tu), ArrowDataType::Duration(atu)) => {
let matching = atu == &tu.to_arrow();
if !matching {
bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
}
primitive_to_boxed_with_logical!(array, i64, to_owned_dtype(arrow_field))
},
#[cfg(feature = "dtype-time")]
(DataType::Time, ArrowDataType::Time64(crate::prelude::ArrowTimeUnit::Nanosecond)) => {
primitive_to_boxed_with_logical!(array, i64, to_owned_dtype(arrow_field))
},
#[cfg(feature = "dtype-time")]
(DataType::Time, ArrowDataType::Time64(crate::prelude::ArrowTimeUnit::Microsecond)) => {
use polars_compute::cast::time64ns_to_time64us;
let array: &PrimitiveArray<i64> = array.as_any().downcast_ref().unwrap();
time64ns_to_time64us(array).boxed()
},
#[cfg(feature = "dtype-decimal")]
(DataType::Decimal(prec, scale), ArrowDataType::Decimal(a_prec, a_scale)) => {
let matching = *a_prec == *prec && *a_scale == *scale;
if !matching {
bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
}
primitive_to_boxed_with_logical!(array, i128, to_owned_dtype(arrow_field))
},
#[cfg(feature = "object")]
(DataType::Object(_), ArrowDataType::FixedSizeBinary(8)) => {
use crate::chunked_array::object::builder::object_series_to_arrow_array;
let out = object_series_to_arrow_array(&unsafe {
Series::from_chunks_and_dtype_unchecked(
PlSmallStr::EMPTY,
vec![array.to_boxed()],
polars_dtype,
)
});
assert_eq!(out.dtype(), &ArrowDataType::FixedSizeBinary(8));
out
},
(DataType::String, ArrowDataType::Utf8View) => array.to_boxed(),
(DataType::String, ArrowDataType::LargeUtf8) => {
cast_unchecked(array, &ArrowDataType::LargeUtf8).unwrap()
},
(DataType::Binary, ArrowDataType::BinaryView) => array.to_boxed(),
(DataType::Binary, ArrowDataType::LargeBinary) => {
cast_unchecked(array, &ArrowDataType::LargeBinary).unwrap()
},
(DataType::Binary, ArrowDataType::FixedSizeBinary(row_width)) => {
use polars_compute::cast::binview_to_fixed_binary;
let array: &BinaryViewArray = array.as_any().downcast_ref().unwrap();
binview_to_fixed_binary(array, *row_width)?.boxed()
},
(DataType::Binary, ArrowDataType::Extension(_)) => {
let arrow_dtype = to_owned_dtype(arrow_field);
let ArrowDataType::Extension(ext_type) = &arrow_dtype else {
unreachable!()
};
let storage_field =
ArrowField::new(ext_type.name.clone(), ext_type.inner.clone(), true);
let mut array =
self.array_to_arrow(array, &DataType::Binary, Cow::Owned(storage_field))?;
*array.dtype_mut() = arrow_dtype;
array.to_boxed()
},
#[cfg(feature = "dtype-extension")]
(
DataType::Extension(pl_ext_type, storage_dtype),
ArrowDataType::Extension(arrow_ext_type),
) => {
use arrow::datatypes::ExtensionType;
let ExtensionType {
name,
inner: _,
metadata,
} = arrow_ext_type.as_ref();
if name != pl_ext_type.name().as_ref() {
bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
}
match (
metadata.as_deref(),
pl_ext_type.serialize_metadata().as_deref(),
) {
(Some("") | None, Some("") | None) => {},
(l, r) => {
if l != r {
bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
}
},
};
let arrow_dtype = to_owned_dtype(arrow_field);
let ArrowDataType::Extension(arrow_ext_type) = &arrow_dtype else {
unreachable!()
};
let storage_arrow_field = ArrowField::new(
arrow_ext_type.name.clone(),
arrow_ext_type.inner.clone(),
true,
);
let mut arr =
self.array_to_arrow(array, storage_dtype, Cow::Owned(storage_arrow_field))?;
*arr.dtype_mut() = arrow_dtype;
arr
},
(pl_dtype, arrow_dtype) => {
if array.dtype() != arrow_dtype {
bail_unhandled_arrow_conversion_dtype_pair!(polars_dtype, &arrow_field)
}
if pl_dtype.is_logical() {
panic!("{pl_dtype:?}");
}
array.to_boxed()
},
})
}
#[inline]
fn attach_pl_field_metadata<'a, 'b, I>(&self, iter: I)
where
I: IntoIterator<Item = (&'a DataType, &'b mut ArrowField)>,
{
if self.skip_attach_pl_metadata {
return;
}
inner(&mut iter.into_iter());
#[inline(never)]
fn inner(iter: &mut dyn Iterator<Item = (&DataType, &mut ArrowField)>) {
for (pl_dtype, arrow_field) in iter {
match pl_dtype {
#[cfg(feature = "dtype-categorical")]
DataType::Categorical(..) | DataType::Enum(..)
if !matches!(arrow_field.dtype(), ArrowDataType::Dictionary(..)) =>
{
continue;
},
_ => {},
}
let mut pl_md = pl_dtype.to_arrow_field_metadata();
if arrow_field.metadata.is_none() {
arrow_field.metadata = pl_md.take().map(|x| x.into());
}
if let Some(pl_md) = pl_md
&& let Some(md) = arrow_field.metadata.as_mut()
{
for (k, v) in pl_md {
if !md.contains_key(&k) {
Arc::make_mut(md).insert(k, v);
}
}
}
}
}
}
}
fn to_owned_dtype(field: Cow<ArrowField>) -> ArrowDataType {
match field {
Cow::Borrowed(f) => f.dtype().clone(),
Cow::Owned(f) => f.dtype,
}
}