polars_python/series/
import.rs

1use arrow::array::Array;
2use arrow::ffi;
3use arrow::ffi::{ArrowArray, ArrowArrayStream, ArrowArrayStreamReader, ArrowSchema};
4use polars::prelude::*;
5use polars_ffi::version_0::SeriesExport;
6use pyo3::exceptions::{PyTypeError, PyValueError};
7use pyo3::prelude::*;
8use pyo3::types::{PyCapsule, PyTuple, PyType};
9
10use super::PySeries;
11use crate::error::PyPolarsErr;
12
13/// Validate PyCapsule has provided name
14fn validate_pycapsule_name(capsule: &Bound<PyCapsule>, expected_name: &str) -> PyResult<()> {
15    let capsule_name = capsule.name()?;
16    if let Some(capsule_name) = capsule_name {
17        let capsule_name = capsule_name.to_str()?;
18        if capsule_name != expected_name {
19            return Err(PyValueError::new_err(format!(
20                "Expected name '{expected_name}' in PyCapsule, instead got '{capsule_name}'"
21            )));
22        }
23    } else {
24        return Err(PyValueError::new_err(
25            "Expected schema PyCapsule to have name set.",
26        ));
27    }
28
29    Ok(())
30}
31
32/// Import `__arrow_c_array__` across Python boundary
33pub(crate) fn call_arrow_c_array<'py>(
34    ob: &Bound<'py, PyAny>,
35) -> PyResult<(Bound<'py, PyCapsule>, Bound<'py, PyCapsule>)> {
36    if !ob.hasattr("__arrow_c_array__")? {
37        return Err(PyValueError::new_err(
38            "Expected an object with dunder __arrow_c_array__",
39        ));
40    }
41
42    let tuple = ob.getattr("__arrow_c_array__")?.call0()?;
43    if !tuple.is_instance_of::<PyTuple>() {
44        return Err(PyTypeError::new_err(
45            "Expected __arrow_c_array__ to return a tuple.",
46        ));
47    }
48
49    let schema_capsule = tuple.get_item(0)?.downcast_into()?;
50    let array_capsule = tuple.get_item(1)?.downcast_into()?;
51    Ok((schema_capsule, array_capsule))
52}
53
54pub(crate) fn import_array_pycapsules(
55    schema_capsule: &Bound<PyCapsule>,
56    array_capsule: &Bound<PyCapsule>,
57) -> PyResult<(arrow::datatypes::Field, Box<dyn Array>)> {
58    validate_pycapsule_name(schema_capsule, "arrow_schema")?;
59    validate_pycapsule_name(array_capsule, "arrow_array")?;
60
61    // # Safety
62    // schema_capsule holds a valid C ArrowSchema pointer, as defined by the Arrow PyCapsule
63    // Interface
64    // array_capsule holds a valid C ArrowArray pointer, as defined by the Arrow PyCapsule
65    // Interface
66    let (field, array) = unsafe {
67        let schema_ptr = schema_capsule.reference::<ArrowSchema>();
68        let array_ptr = std::ptr::replace(array_capsule.pointer() as _, ArrowArray::empty());
69
70        let field = ffi::import_field_from_c(schema_ptr).unwrap();
71        let array = ffi::import_array_from_c(array_ptr, field.dtype().clone()).unwrap();
72        (field, array)
73    };
74
75    Ok((field, array))
76}
77
78/// Import `__arrow_c_stream__` across Python boundary.
79fn call_arrow_c_stream<'py>(ob: &Bound<'py, PyAny>) -> PyResult<Bound<'py, PyCapsule>> {
80    if !ob.hasattr("__arrow_c_stream__")? {
81        return Err(PyValueError::new_err(
82            "Expected an object with dunder __arrow_c_stream__",
83        ));
84    }
85
86    let capsule = ob.getattr("__arrow_c_stream__")?.call0()?.downcast_into()?;
87    Ok(capsule)
88}
89
90pub(crate) fn import_stream_pycapsule(capsule: &Bound<PyCapsule>) -> PyResult<PySeries> {
91    validate_pycapsule_name(capsule, "arrow_array_stream")?;
92
93    // # Safety
94    // capsule holds a valid C ArrowArrayStream pointer, as defined by the Arrow PyCapsule
95    // Interface
96    let mut stream = unsafe {
97        // Takes ownership of the pointed to ArrowArrayStream
98        // This acts to move the data out of the capsule pointer, setting the release callback to NULL
99        let stream_ptr = Box::new(std::ptr::replace(
100            capsule.pointer() as _,
101            ArrowArrayStream::empty(),
102        ));
103        ArrowArrayStreamReader::try_new(stream_ptr)
104            .map_err(|err| PyValueError::new_err(err.to_string()))?
105    };
106
107    let mut produced_arrays: Vec<Box<dyn Array>> = vec![];
108    while let Some(array) = unsafe { stream.next() } {
109        produced_arrays.push(array.unwrap());
110    }
111
112    // Series::try_from fails for an empty vec of chunks
113    let s = if produced_arrays.is_empty() {
114        let polars_dt = DataType::from_arrow_field(stream.field());
115        Series::new_empty(stream.field().name.clone(), &polars_dt)
116    } else {
117        Series::try_from((stream.field(), produced_arrays)).unwrap()
118    };
119    Ok(PySeries::new(s))
120}
121#[pymethods]
122impl PySeries {
123    #[classmethod]
124    pub fn from_arrow_c_array(_cls: &Bound<PyType>, ob: &Bound<'_, PyAny>) -> PyResult<Self> {
125        let (schema_capsule, array_capsule) = call_arrow_c_array(ob)?;
126        let (field, array) = import_array_pycapsules(&schema_capsule, &array_capsule)?;
127        let s = Series::try_from((&field, array)).unwrap();
128        Ok(PySeries::new(s))
129    }
130
131    #[classmethod]
132    pub fn from_arrow_c_stream(_cls: &Bound<PyType>, ob: &Bound<'_, PyAny>) -> PyResult<Self> {
133        let capsule = call_arrow_c_stream(ob)?;
134        import_stream_pycapsule(&capsule)
135    }
136
137    #[classmethod]
138    /// Import a series via polars-ffi
139    /// Takes ownership of the [`SeriesExport`] at [`location`]
140    /// # Safety
141    /// [`location`] should be the address of an allocated and initialized [`SeriesExport`]
142    pub unsafe fn _import(_cls: &Bound<PyType>, location: usize) -> PyResult<Self> {
143        let location = location as *mut SeriesExport;
144
145        // # Safety
146        // `location` should be valid for reading
147        let series = unsafe {
148            let export = location.read();
149            polars_ffi::version_0::import_series(export).map_err(PyPolarsErr::from)?
150        };
151        Ok(PySeries { series })
152    }
153}