pyo3_arrow/
export.rs

1//! Wrappers around objects defined in this crate to simplify returning data to `arro3-core`.
2//!
3//! By default, if you return something like a `PyArray` from your Python function, it will work
4//! because `PyArray` implements `#[pyclass]`, but it will statically link the private methods
5//! defined on `PyArray` in your given version of `pyo3-arrow`.
6//!
7//! This isn't ideal for a few reasons. For one, this means that the actual classes returned from
8//! multiple packages will be _different_. This also means that any updates in the latest `arro3`
9//! version won't be reflected in your exported classes.
10//!
11//! Instead, because Arrow is an ABI-stable format, it's easy to _dynamically_ link the data. So we
12//! can pass Arrow data at runtime to whatever version of `arro3-core` the user has in their Python
13//! environment.
14//!
15//! Because each of the objects in this module implements `[IntoPyObject]`, you can return these
16//! objects directly.
17//!
18//! ```notest
19//! /// A function that will automatically return
20//! #[pyfunction]
21//! fn my_function() -> pyo3_arrow::export::Arro3Array {
22//!     todo!()
23//! }
24//! ```
25//!
26//! Note that this means you must require `arro3-core` as a Python dependency in the
27//! `pyproject.toml` of your Rust-Python library.
28
29use std::sync::Arc;
30
31use arrow_array::{RecordBatch, RecordBatchReader};
32use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaRef};
33use pyo3::intern;
34use pyo3::prelude::*;
35use pyo3::types::PyTuple;
36
37use crate::ffi::{to_array_pycapsules, to_schema_pycapsule};
38use crate::{
39    PyArray, PyChunkedArray, PyDataType, PyField, PyRecordBatch, PyRecordBatchReader, PyScalar,
40    PySchema, PyTable,
41};
42
43/// A wrapper around a [PyArray] that implements [IntoPyObject] to convert to a runtime-available
44/// `arro3.core.Array`.
45///
46/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
47/// `arro3.core.Array` and not the one statically linked from Rust.
48#[derive(Debug)]
49pub struct Arro3Array(PyArray);
50
51impl From<PyArray> for Arro3Array {
52    fn from(value: PyArray) -> Self {
53        Self(value)
54    }
55}
56
57impl<'py> IntoPyObject<'py> for Arro3Array {
58    type Target = PyAny;
59    type Output = Bound<'py, PyAny>;
60    type Error = PyErr;
61
62    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
63        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
64        arro3_mod.getattr(intern!(py, "Array"))?.call_method1(
65            intern!(py, "from_arrow_pycapsule"),
66            to_array_pycapsules(py, self.0.field().clone(), &self.0.array(), None)?,
67        )
68    }
69}
70
71/// A wrapper around a [PyChunkedArray] that implements [IntoPyObject] to convert to a
72/// runtime-available `arro3.core.ChunkedArray`.
73///
74/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
75/// `arro3.core.ChunkedArray` and not the one statically linked from Rust.
76#[derive(Debug)]
77pub struct Arro3ChunkedArray(PyChunkedArray);
78
79impl From<PyChunkedArray> for Arro3ChunkedArray {
80    fn from(value: PyChunkedArray) -> Self {
81        Self(value)
82    }
83}
84
85impl<'py> IntoPyObject<'py> for Arro3ChunkedArray {
86    type Target = PyAny;
87    type Output = Bound<'py, PyAny>;
88    type Error = PyErr;
89
90    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
91        let capsule = PyChunkedArray::to_stream_pycapsule(
92            py,
93            self.0.chunks().to_vec(),
94            self.0.field().clone(),
95            None,
96        )?;
97
98        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
99        arro3_mod
100            .getattr(intern!(py, "ChunkedArray"))?
101            .call_method1(
102                intern!(py, "from_arrow_pycapsule"),
103                PyTuple::new(py, vec![capsule])?,
104            )
105    }
106}
107
108/// A wrapper around a [PyField] that implements [IntoPyObject] to convert to a runtime-available
109/// `arro3.core.Field`.
110///
111/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
112/// `arro3.core.Field` and not the one statically linked from Rust.
113#[derive(Debug)]
114pub struct Arro3Field(PyField);
115
116impl From<PyField> for Arro3Field {
117    fn from(value: PyField) -> Self {
118        Self(value)
119    }
120}
121
122impl From<FieldRef> for Arro3Field {
123    fn from(value: FieldRef) -> Self {
124        Self(value.into())
125    }
126}
127
128impl From<&Field> for Arro3Field {
129    fn from(value: &Field) -> Self {
130        Self(Arc::new(value.clone()).into())
131    }
132}
133
134impl<'py> IntoPyObject<'py> for Arro3Field {
135    type Target = PyAny;
136    type Output = Bound<'py, PyAny>;
137    type Error = PyErr;
138
139    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
140        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
141        arro3_mod.getattr(intern!(py, "Field"))?.call_method1(
142            intern!(py, "from_arrow_pycapsule"),
143            PyTuple::new(py, vec![to_schema_pycapsule(py, self.0.as_ref())?])?,
144        )
145    }
146}
147
148/// A wrapper around a [PyDataType] that implements [IntoPyObject] to convert to a
149/// runtime-available `arro3.core.DataType`.
150///
151/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
152/// `arro3.core.DataType` and not the one statically linked from Rust.
153#[derive(Debug)]
154pub struct Arro3DataType(PyDataType);
155
156impl From<PyDataType> for Arro3DataType {
157    fn from(value: PyDataType) -> Self {
158        Self(value)
159    }
160}
161
162impl From<DataType> for Arro3DataType {
163    fn from(value: DataType) -> Self {
164        Self(PyDataType::new(value))
165    }
166}
167
168impl<'py> IntoPyObject<'py> for Arro3DataType {
169    type Target = PyAny;
170    type Output = Bound<'py, PyAny>;
171    type Error = PyErr;
172
173    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
174        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
175        arro3_mod.getattr(intern!(py, "DataType"))?.call_method1(
176            intern!(py, "from_arrow_pycapsule"),
177            PyTuple::new(py, vec![to_schema_pycapsule(py, self.0.as_ref())?])?,
178        )
179    }
180}
181
182/// A wrapper around a [PyRecordBatch] that implements [IntoPyObject] to convert to a
183/// runtime-available `arro3.core.RecordBatch`.
184///
185/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
186/// `arro3.core.RecordBatch` and not the one statically linked from Rust.
187#[derive(Debug)]
188pub struct Arro3RecordBatch(PyRecordBatch);
189
190impl From<PyRecordBatch> for Arro3RecordBatch {
191    fn from(value: PyRecordBatch) -> Self {
192        Self(value)
193    }
194}
195
196impl From<RecordBatch> for Arro3RecordBatch {
197    fn from(value: RecordBatch) -> Self {
198        Self(value.into())
199    }
200}
201
202impl<'py> IntoPyObject<'py> for Arro3RecordBatch {
203    type Target = PyAny;
204    type Output = Bound<'py, PyAny>;
205    type Error = PyErr;
206
207    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
208        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
209        let capsules = PyRecordBatch::to_array_pycapsules(py, self.0.into_inner(), None)?;
210        arro3_mod
211            .getattr(intern!(py, "RecordBatch"))?
212            .call_method1(intern!(py, "from_arrow_pycapsule"), capsules)
213    }
214}
215
216/// A wrapper around a [PyRecordBatchReader] that implements [IntoPyObject] to convert to a
217/// runtime-available `arro3.core.RecordBatchReader`.
218///
219/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
220/// `arro3.core.RecordBatchReader` and not the one statically linked from Rust.
221pub struct Arro3RecordBatchReader(PyRecordBatchReader);
222
223impl From<PyRecordBatchReader> for Arro3RecordBatchReader {
224    fn from(value: PyRecordBatchReader) -> Self {
225        Self(value)
226    }
227}
228
229impl From<Box<dyn RecordBatchReader + Send>> for Arro3RecordBatchReader {
230    fn from(value: Box<dyn RecordBatchReader + Send>) -> Self {
231        Self(PyRecordBatchReader::new(value))
232    }
233}
234
235impl<'py> IntoPyObject<'py> for Arro3RecordBatchReader {
236    type Target = PyAny;
237    type Output = Bound<'py, PyAny>;
238    type Error = PyErr;
239
240    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
241        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
242        let capsule = PyRecordBatchReader::to_stream_pycapsule(py, self.0.into_reader()?, None)?;
243        arro3_mod
244            .getattr(intern!(py, "RecordBatchReader"))?
245            .call_method1(
246                intern!(py, "from_arrow_pycapsule"),
247                PyTuple::new(py, vec![capsule])?,
248            )
249    }
250}
251
252/// A wrapper around a [PyScalar] that implements [IntoPyObject] to convert to a
253/// runtime-available `arro3.core.Scalar`.
254///
255/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
256/// `arro3.core.Scalar` and not the one statically linked from Rust.
257#[derive(Debug)]
258pub struct Arro3Scalar(PyScalar);
259
260impl From<PyScalar> for Arro3Scalar {
261    fn from(value: PyScalar) -> Self {
262        Self(value)
263    }
264}
265
266impl<'py> IntoPyObject<'py> for Arro3Scalar {
267    type Target = PyAny;
268    type Output = Bound<'py, PyAny>;
269    type Error = PyErr;
270
271    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
272        let capsules = to_array_pycapsules(py, self.0.field().clone(), &self.0.array(), None)?;
273
274        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
275        arro3_mod
276            .getattr(intern!(py, "Scalar"))?
277            .call_method1(intern!(py, "from_arrow_pycapsule"), capsules)
278    }
279}
280
281/// A wrapper around a [PySchema] that implements [IntoPyObject] to convert to a
282/// runtime-available `arro3.core.Schema`.
283///
284/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
285/// `arro3.core.Schema` and not the one statically linked from Rust.
286#[derive(Debug)]
287pub struct Arro3Schema(PySchema);
288
289impl From<PySchema> for Arro3Schema {
290    fn from(value: PySchema) -> Self {
291        Self(value)
292    }
293}
294
295impl From<SchemaRef> for Arro3Schema {
296    fn from(value: SchemaRef) -> Self {
297        Self(PySchema::new(value))
298    }
299}
300
301impl From<Schema> for Arro3Schema {
302    fn from(value: Schema) -> Self {
303        Self(PySchema::new(Arc::new(value)))
304    }
305}
306
307impl<'py> IntoPyObject<'py> for Arro3Schema {
308    type Target = PyAny;
309    type Output = Bound<'py, PyAny>;
310    type Error = PyErr;
311
312    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
313        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
314        arro3_mod.getattr(intern!(py, "Schema"))?.call_method1(
315            intern!(py, "from_arrow_pycapsule"),
316            PyTuple::new(py, vec![to_schema_pycapsule(py, self.0.as_ref())?])?,
317        )
318    }
319}
320
321/// A wrapper around a [PyTable] that implements [IntoPyObject] to convert to a
322/// runtime-available `arro3.core.Table`.
323///
324/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
325/// `arro3.core.Table` and not the one statically linked from Rust.
326#[derive(Debug)]
327pub struct Arro3Table(PyTable);
328
329impl From<PyTable> for Arro3Table {
330    fn from(value: PyTable) -> Self {
331        Self(value)
332    }
333}
334
335impl<'py> IntoPyObject<'py> for Arro3Table {
336    type Target = PyAny;
337    type Output = Bound<'py, PyAny>;
338    type Error = PyErr;
339
340    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
341        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
342        let (batches, schema) = self.0.into_inner();
343        let capsule = PyTable::to_stream_pycapsule(py, batches, schema, None)?;
344        arro3_mod.getattr(intern!(py, "Table"))?.call_method1(
345            intern!(py, "from_arrow_pycapsule"),
346            PyTuple::new(py, vec![capsule])?,
347        )
348    }
349}