use arrow::array::{Array, PrimitiveArray};
use arrow::ffi;
use arrow::ffi::{ArrowArray, ArrowArrayStream, ArrowArrayStreamReader, ArrowSchema};
use polars::prelude::*;
use polars_ffi::version_0::SeriesExport;
use pyo3::exceptions::{PyTypeError, PyValueError};
use pyo3::prelude::*;
use pyo3::pybacked::PyBackedBytes;
use pyo3::types::{PyCapsule, PyTuple, PyType};
use super::PySeries;
use crate::error::PyPolarsErr;
fn validate_pycapsule_name(capsule: &Bound<PyCapsule>, expected_name: &str) -> PyResult<()> {
let capsule_name = capsule.name()?;
if let Some(capsule_name) = capsule_name {
let capsule_name = unsafe { capsule_name.as_cstr() };
if capsule_name.to_str() != Ok(expected_name) {
return Err(PyValueError::new_err(format!(
"Expected name '{expected_name}' in PyCapsule, instead got '{capsule_name:?}'"
)));
}
} else {
return Err(PyValueError::new_err(
"Expected schema PyCapsule to have name set.",
));
}
Ok(())
}
pub(crate) fn call_arrow_c_array<'py>(
ob: &Bound<'py, PyAny>,
) -> PyResult<(Bound<'py, PyCapsule>, Bound<'py, PyCapsule>)> {
if !ob.hasattr("__arrow_c_array__")? {
return Err(PyValueError::new_err(
"Expected an object with dunder __arrow_c_array__",
));
}
let tuple = ob.getattr("__arrow_c_array__")?.call0()?;
if !tuple.is_instance_of::<PyTuple>() {
return Err(PyTypeError::new_err(
"Expected __arrow_c_array__ to return a tuple.",
));
}
let schema_capsule = tuple.get_item(0)?.cast_into()?;
let array_capsule = tuple.get_item(1)?.cast_into()?;
Ok((schema_capsule, array_capsule))
}
pub(crate) fn import_array_pycapsules(
schema_capsule: &Bound<PyCapsule>,
array_capsule: &Bound<PyCapsule>,
) -> PyResult<(arrow::datatypes::Field, Box<dyn Array>)> {
let field = import_schema_pycapsule(schema_capsule)?;
validate_pycapsule_name(array_capsule, "arrow_array")?;
unsafe {
#[allow(deprecated)]
let array_ptr = std::ptr::replace(array_capsule.pointer() as _, ArrowArray::empty());
let array = ffi::import_array_from_c(array_ptr, field.dtype().clone()).unwrap();
Ok((field, array))
}
}
pub(crate) fn import_schema_pycapsule(
schema_capsule: &Bound<PyCapsule>,
) -> PyResult<arrow::datatypes::Field> {
validate_pycapsule_name(schema_capsule, "arrow_schema")?;
unsafe {
#[allow(deprecated)]
let schema_ptr = schema_capsule.reference::<ArrowSchema>();
let field = ffi::import_field_from_c(schema_ptr).unwrap();
Ok(field)
}
}
fn call_arrow_c_stream<'py>(ob: &Bound<'py, PyAny>) -> PyResult<Bound<'py, PyCapsule>> {
if !ob.hasattr("__arrow_c_stream__")? {
return Err(PyValueError::new_err(
"Expected an object with dunder __arrow_c_stream__",
));
}
let capsule = ob.getattr("__arrow_c_stream__")?.call0()?.cast_into()?;
Ok(capsule)
}
pub(crate) fn import_stream_pycapsule(capsule: &Bound<PyCapsule>) -> PyResult<PySeries> {
validate_pycapsule_name(capsule, "arrow_array_stream")?;
let mut stream = unsafe {
#[allow(deprecated)]
let stream_ptr = Box::new(std::ptr::replace(
capsule.pointer() as _,
ArrowArrayStream::empty(),
));
ArrowArrayStreamReader::try_new(stream_ptr)
.map_err(|err| PyValueError::new_err(err.to_string()))?
};
let mut produced_arrays: Vec<Box<dyn Array>> = vec![];
while let Some(array) = unsafe { stream.next() } {
produced_arrays.push(array.map_err(PyPolarsErr::from)?);
}
let s = if produced_arrays.is_empty() {
let polars_dt = DataType::from_arrow_field(stream.field());
Series::new_empty(stream.field().name.clone(), &polars_dt)
} else {
Series::try_from((stream.field(), produced_arrays)).map_err(PyPolarsErr::from)?
};
Ok(PySeries::new(s))
}
#[pymethods]
impl PySeries {
#[classmethod]
pub fn from_arrow_c_array(_cls: &Bound<PyType>, ob: &Bound<'_, PyAny>) -> PyResult<Self> {
let (schema_capsule, array_capsule) = call_arrow_c_array(ob)?;
let (field, array) = import_array_pycapsules(&schema_capsule, &array_capsule)?;
let s = Series::try_from((&field, array)).unwrap();
Ok(PySeries::new(s))
}
#[classmethod]
pub fn from_arrow_c_stream(_cls: &Bound<PyType>, ob: &Bound<'_, PyAny>) -> PyResult<Self> {
let capsule = call_arrow_c_stream(ob)?;
import_stream_pycapsule(&capsule)
}
#[classmethod]
pub unsafe fn _import(_cls: &Bound<PyType>, location: usize) -> PyResult<Self> {
let location = location as *mut SeriesExport;
let series = unsafe {
let export = location.read();
polars_ffi::version_0::import_series(export).map_err(PyPolarsErr::from)?
};
Ok(PySeries::from(series))
}
#[staticmethod]
pub fn _import_decimal_from_iceberg_binary_repr(
bytes_list: &Bound<PyAny>, precision: usize,
scale: usize,
) -> PyResult<Self> {
let max_abs_decimal_value = 10_i128.pow(u32::try_from(precision).unwrap()) - 1;
let out: Vec<i128> = bytes_list
.try_iter()?
.map(|bytes| {
let be_bytes: Option<PyBackedBytes> = bytes?.extract()?;
let mut le_bytes: [u8; 16] = [0; _];
if let Some(be_bytes) = be_bytes.as_deref() {
if be_bytes.len() > le_bytes.len() {
return Err(PyValueError::new_err(format!(
"iceberg binary data for decimal exceeded 16 bytes: {}",
be_bytes.len()
)));
}
for (i, byte) in be_bytes.iter().rev().enumerate() {
le_bytes[i] = *byte;
}
}
let value = i128::from_le_bytes(le_bytes);
if value.abs() > max_abs_decimal_value {
return Err(PyValueError::new_err(format!(
"iceberg decoded value for decimal exceeded precision: \
value: {value}, precision: {precision}",
)));
}
Ok(value)
})
.collect::<PyResult<_>>()?;
Ok(PySeries::from(unsafe {
Series::from_chunks_and_dtype_unchecked(
PlSmallStr::EMPTY,
vec![PrimitiveArray::<i128>::from_vec(out).boxed()],
&DataType::Decimal(precision, scale),
)
}))
}
}