pyo3_arrow/
export.rs

1//! Wrappers around objects defined in this crate to simplify returning data to `arro3-core`.
2//!
3//! By default, if you return something like a `PyArray` from your Python function, it will work
4//! because `PyArray` implements `#[pyclass]`, but it will statically link the private methods
5//! defined on `PyArray` in your given version of `pyo3-arrow`.
6//!
7//! This isn't ideal for a few reasons. For one, this means that the actual classes returned from
8//! multiple packages will be _different_. This also means that any updates in the latest `arro3`
9//! version won't be reflected in your exported classes.
10//!
11//! Instead, because Arrow is an ABI-stable format, it's easy to _dynamically_ link the data. So we
12//! can pass Arrow data at runtime to whatever version of `arro3-core` the user has in their Python
13//! environment.
14//!
15//! Because each of the objects in this module implements `[IntoPyObject]`, you can return these
16//! objects directly.
17//!
18//! ```notest
19//! /// A function that will automatically return
20//! #[pyfunction]
21//! fn my_function() -> pyo3_arrow::export::Arro3Array {
22//!     todo!()
23//! }
24//! ```
25//!
26//! Note that this means you must require `arro3-core` as a Python dependency in the
27//! `pyproject.toml` of your Rust-Python library.
28
29use std::sync::Arc;
30
31use arrow_array::{ArrayRef, RecordBatch, RecordBatchReader};
32use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaRef};
33use pyo3::intern;
34use pyo3::prelude::*;
35use pyo3::types::PyTuple;
36
37use crate::ffi::{to_array_pycapsules, to_schema_pycapsule, to_stream_pycapsule, ArrayReader};
38use crate::{
39    PyArray, PyArrayReader, PyChunkedArray, PyDataType, PyField, PyRecordBatch,
40    PyRecordBatchReader, PyScalar, PySchema, PyTable,
41};
42
43/// A wrapper around a [PyArray] that implements [IntoPyObject] to convert to a runtime-available
44/// `arro3.core.Array`.
45///
46/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
47/// `arro3.core.Array` and not the one statically linked from Rust.
48#[derive(Debug)]
49pub struct Arro3Array(PyArray);
50
51impl From<PyArray> for Arro3Array {
52    fn from(value: PyArray) -> Self {
53        Self(value)
54    }
55}
56
57impl From<ArrayRef> for Arro3Array {
58    fn from(value: ArrayRef) -> Self {
59        Self(value.into())
60    }
61}
62
63impl<'py> IntoPyObject<'py> for Arro3Array {
64    type Target = PyAny;
65    type Output = Bound<'py, PyAny>;
66    type Error = PyErr;
67
68    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
69        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
70        arro3_mod.getattr(intern!(py, "Array"))?.call_method1(
71            intern!(py, "from_arrow_pycapsule"),
72            to_array_pycapsules(py, self.0.field().clone(), &self.0.array(), None)?,
73        )
74    }
75}
76
77/// A wrapper around a [PyArrayReader] that implements [IntoPyObject] to convert to a
78/// runtime-available `arro3.core.ArrayReader`.
79///
80/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
81/// `arro3.core.ArrayReader` and not the one statically linked from Rust.
82pub struct Arro3ArrayReader(PyArrayReader);
83
84impl From<PyArrayReader> for Arro3ArrayReader {
85    fn from(value: PyArrayReader) -> Self {
86        Self(value)
87    }
88}
89
90impl From<Box<dyn ArrayReader + Send>> for Arro3ArrayReader {
91    fn from(value: Box<dyn ArrayReader + Send>) -> Self {
92        Self(value.into())
93    }
94}
95
96impl<'py> IntoPyObject<'py> for Arro3ArrayReader {
97    type Target = PyAny;
98    type Output = Bound<'py, PyAny>;
99    type Error = PyErr;
100
101    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
102        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
103        let capsule = to_stream_pycapsule(py, self.0.into_reader()?, None)?;
104
105        arro3_mod.getattr(intern!(py, "ArrayReader"))?.call_method1(
106            intern!(py, "from_arrow_pycapsule"),
107            PyTuple::new(py, [capsule])?,
108        )
109    }
110}
111
112/// A wrapper around a [PyChunkedArray] that implements [IntoPyObject] to convert to a
113/// runtime-available `arro3.core.ChunkedArray`.
114///
115/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
116/// `arro3.core.ChunkedArray` and not the one statically linked from Rust.
117#[derive(Debug)]
118pub struct Arro3ChunkedArray(PyChunkedArray);
119
120impl From<PyChunkedArray> for Arro3ChunkedArray {
121    fn from(value: PyChunkedArray) -> Self {
122        Self(value)
123    }
124}
125
126impl<'py> IntoPyObject<'py> for Arro3ChunkedArray {
127    type Target = PyAny;
128    type Output = Bound<'py, PyAny>;
129    type Error = PyErr;
130
131    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
132        let capsule = PyChunkedArray::to_stream_pycapsule(
133            py,
134            self.0.chunks().to_vec(),
135            self.0.field().clone(),
136            None,
137        )?;
138
139        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
140        arro3_mod
141            .getattr(intern!(py, "ChunkedArray"))?
142            .call_method1(
143                intern!(py, "from_arrow_pycapsule"),
144                PyTuple::new(py, vec![capsule])?,
145            )
146    }
147}
148
149/// A wrapper around a [PyField] that implements [IntoPyObject] to convert to a runtime-available
150/// `arro3.core.Field`.
151///
152/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
153/// `arro3.core.Field` and not the one statically linked from Rust.
154#[derive(Debug)]
155pub struct Arro3Field(PyField);
156
157impl From<PyField> for Arro3Field {
158    fn from(value: PyField) -> Self {
159        Self(value)
160    }
161}
162
163impl From<FieldRef> for Arro3Field {
164    fn from(value: FieldRef) -> Self {
165        Self(value.into())
166    }
167}
168
169impl From<&Field> for Arro3Field {
170    fn from(value: &Field) -> Self {
171        Self(Arc::new(value.clone()).into())
172    }
173}
174
175impl<'py> IntoPyObject<'py> for Arro3Field {
176    type Target = PyAny;
177    type Output = Bound<'py, PyAny>;
178    type Error = PyErr;
179
180    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
181        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
182        arro3_mod.getattr(intern!(py, "Field"))?.call_method1(
183            intern!(py, "from_arrow_pycapsule"),
184            PyTuple::new(py, vec![to_schema_pycapsule(py, self.0.as_ref())?])?,
185        )
186    }
187}
188
189/// A wrapper around a [PyDataType] that implements [IntoPyObject] to convert to a
190/// runtime-available `arro3.core.DataType`.
191///
192/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
193/// `arro3.core.DataType` and not the one statically linked from Rust.
194#[derive(Debug)]
195pub struct Arro3DataType(PyDataType);
196
197impl From<PyDataType> for Arro3DataType {
198    fn from(value: PyDataType) -> Self {
199        Self(value)
200    }
201}
202
203impl From<DataType> for Arro3DataType {
204    fn from(value: DataType) -> Self {
205        Self(PyDataType::new(value))
206    }
207}
208
209impl<'py> IntoPyObject<'py> for Arro3DataType {
210    type Target = PyAny;
211    type Output = Bound<'py, PyAny>;
212    type Error = PyErr;
213
214    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
215        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
216        arro3_mod.getattr(intern!(py, "DataType"))?.call_method1(
217            intern!(py, "from_arrow_pycapsule"),
218            PyTuple::new(py, vec![to_schema_pycapsule(py, self.0.as_ref())?])?,
219        )
220    }
221}
222
223/// A wrapper around a [PyRecordBatch] that implements [IntoPyObject] to convert to a
224/// runtime-available `arro3.core.RecordBatch`.
225///
226/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
227/// `arro3.core.RecordBatch` and not the one statically linked from Rust.
228#[derive(Debug)]
229pub struct Arro3RecordBatch(PyRecordBatch);
230
231impl From<PyRecordBatch> for Arro3RecordBatch {
232    fn from(value: PyRecordBatch) -> Self {
233        Self(value)
234    }
235}
236
237impl From<RecordBatch> for Arro3RecordBatch {
238    fn from(value: RecordBatch) -> Self {
239        Self(value.into())
240    }
241}
242
243impl<'py> IntoPyObject<'py> for Arro3RecordBatch {
244    type Target = PyAny;
245    type Output = Bound<'py, PyAny>;
246    type Error = PyErr;
247
248    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
249        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
250        let capsules = PyRecordBatch::to_array_pycapsules(py, self.0.into_inner(), None)?;
251        arro3_mod
252            .getattr(intern!(py, "RecordBatch"))?
253            .call_method1(intern!(py, "from_arrow_pycapsule"), capsules)
254    }
255}
256
257/// A wrapper around a [PyRecordBatchReader] that implements [IntoPyObject] to convert to a
258/// runtime-available `arro3.core.RecordBatchReader`.
259///
260/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
261/// `arro3.core.RecordBatchReader` and not the one statically linked from Rust.
262pub struct Arro3RecordBatchReader(PyRecordBatchReader);
263
264impl From<PyRecordBatchReader> for Arro3RecordBatchReader {
265    fn from(value: PyRecordBatchReader) -> Self {
266        Self(value)
267    }
268}
269
270impl From<Box<dyn RecordBatchReader + Send>> for Arro3RecordBatchReader {
271    fn from(value: Box<dyn RecordBatchReader + Send>) -> Self {
272        Self(PyRecordBatchReader::new(value))
273    }
274}
275
276impl<'py> IntoPyObject<'py> for Arro3RecordBatchReader {
277    type Target = PyAny;
278    type Output = Bound<'py, PyAny>;
279    type Error = PyErr;
280
281    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
282        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
283        let capsule = PyRecordBatchReader::to_stream_pycapsule(py, self.0.into_reader()?, None)?;
284        arro3_mod
285            .getattr(intern!(py, "RecordBatchReader"))?
286            .call_method1(
287                intern!(py, "from_arrow_pycapsule"),
288                PyTuple::new(py, vec![capsule])?,
289            )
290    }
291}
292
293/// A wrapper around a [PyScalar] that implements [IntoPyObject] to convert to a
294/// runtime-available `arro3.core.Scalar`.
295///
296/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
297/// `arro3.core.Scalar` and not the one statically linked from Rust.
298#[derive(Debug)]
299pub struct Arro3Scalar(PyScalar);
300
301impl From<PyScalar> for Arro3Scalar {
302    fn from(value: PyScalar) -> Self {
303        Self(value)
304    }
305}
306
307impl<'py> IntoPyObject<'py> for Arro3Scalar {
308    type Target = PyAny;
309    type Output = Bound<'py, PyAny>;
310    type Error = PyErr;
311
312    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
313        let capsules = to_array_pycapsules(py, self.0.field().clone(), &self.0.array(), None)?;
314
315        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
316        arro3_mod
317            .getattr(intern!(py, "Scalar"))?
318            .call_method1(intern!(py, "from_arrow_pycapsule"), capsules)
319    }
320}
321
322/// A wrapper around a [PySchema] that implements [IntoPyObject] to convert to a
323/// runtime-available `arro3.core.Schema`.
324///
325/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
326/// `arro3.core.Schema` and not the one statically linked from Rust.
327#[derive(Debug)]
328pub struct Arro3Schema(PySchema);
329
330impl From<PySchema> for Arro3Schema {
331    fn from(value: PySchema) -> Self {
332        Self(value)
333    }
334}
335
336impl From<SchemaRef> for Arro3Schema {
337    fn from(value: SchemaRef) -> Self {
338        Self(PySchema::new(value))
339    }
340}
341
342impl From<Schema> for Arro3Schema {
343    fn from(value: Schema) -> Self {
344        Self(PySchema::new(Arc::new(value)))
345    }
346}
347
348impl<'py> IntoPyObject<'py> for Arro3Schema {
349    type Target = PyAny;
350    type Output = Bound<'py, PyAny>;
351    type Error = PyErr;
352
353    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
354        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
355        arro3_mod.getattr(intern!(py, "Schema"))?.call_method1(
356            intern!(py, "from_arrow_pycapsule"),
357            PyTuple::new(py, vec![to_schema_pycapsule(py, self.0.as_ref())?])?,
358        )
359    }
360}
361
362/// A wrapper around a [PyTable] that implements [IntoPyObject] to convert to a
363/// runtime-available `arro3.core.Table`.
364///
365/// This ensures that we return data with the **user's** runtime-provided (dynamically-linked)
366/// `arro3.core.Table` and not the one statically linked from Rust.
367#[derive(Debug)]
368pub struct Arro3Table(PyTable);
369
370impl Arro3Table {
371    pub(crate) fn into_inner(self) -> PyTable {
372        self.0
373    }
374}
375
376impl From<PyTable> for Arro3Table {
377    fn from(value: PyTable) -> Self {
378        Self(value)
379    }
380}
381
382impl<'py> IntoPyObject<'py> for Arro3Table {
383    type Target = PyAny;
384    type Output = Bound<'py, PyAny>;
385    type Error = PyErr;
386
387    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
388        let arro3_mod = py.import(intern!(py, "arro3.core"))?;
389        let (batches, schema) = self.0.into_inner();
390        let capsule = PyTable::to_stream_pycapsule(py, batches, schema, None)?;
391        arro3_mod.getattr(intern!(py, "Table"))?.call_method1(
392            intern!(py, "from_arrow_pycapsule"),
393            PyTuple::new(py, vec![capsule])?,
394        )
395    }
396}