Skip to main content

pyo3_geoarrow/
input.rs

1//! Input types for accepting GeoArrow data from Python.
2//!
3//! This module provides utilities for accepting GeoArrow data in various forms from Python,
4//! automatically handling both arrays and streams through the Arrow C Data Interface.
5
6use geoarrow_array::{GeoArrowArrayIterator, GeoArrowArrayReader};
7use geoarrow_schema::GeoArrowType;
8use geoarrow_schema::error::GeoArrowResult;
9use pyo3::exceptions::PyValueError;
10use pyo3::intern;
11use pyo3::prelude::*;
12
13use crate::{PyGeoArray, PyGeoArrayReader, PyGeoArrowResult, PyGeoChunkedArray};
14
15/// An enum over [`PyGeoArray`] and [`PyGeoArrayReader`], used when a function accepts either
16/// Arrow object as input.
17///
18/// This type automatically extracts from Python objects that implement either:
19///
20/// - `__arrow_c_array__` (for single arrays)
21/// - `__arrow_c_stream__` (for array streams)
22pub enum AnyGeoArray {
23    /// A single Array, held in a [PyGeoArray].
24    Array(PyGeoArray),
25    /// A stream of possibly multiple Arrays, held in a [PyGeoArrayReader].
26    Stream(PyGeoArrayReader),
27}
28
29impl AnyGeoArray {
30    /// Consume this and convert it into a [PyGeoChunkedArray].
31    ///
32    /// All arrays from the stream will be materialized in memory.
33    pub fn into_chunked_array(self) -> PyGeoArrowResult<PyGeoChunkedArray> {
34        let reader = self.into_reader()?;
35        let data_type = reader.data_type();
36        let chunks = reader.collect::<GeoArrowResult<Vec<_>>>()?;
37        Ok(PyGeoChunkedArray::try_new(chunks, data_type)?)
38    }
39
40    /// Convert this into a [GeoArrow array reader][GeoArrowArrayReader].
41    pub fn into_reader(self) -> PyResult<Box<dyn GeoArrowArrayReader + Send>> {
42        match self {
43            Self::Array(array) => {
44                let geo_array = array.into_inner();
45                let data_type = geo_array.data_type();
46                Ok(Box::new(GeoArrowArrayIterator::new(
47                    vec![Ok(geo_array)],
48                    data_type,
49                )))
50            }
51            Self::Stream(stream) => stream.into_reader(),
52        }
53    }
54
55    /// Get the GeoArrow data type of this array or stream.
56    pub fn data_type(&self) -> GeoArrowType {
57        match self {
58            Self::Array(array) => array.inner().data_type(),
59            Self::Stream(reader) => reader.data_type().clone(),
60        }
61    }
62}
63
64impl<'py> FromPyObject<'_, 'py> for AnyGeoArray {
65    type Error = PyErr;
66
67    fn extract(ob: Borrowed<'_, 'py, PyAny>) -> PyResult<Self> {
68        // First extract infallibly if __arrow_c_array__ method is present, so that any exception
69        // in that gets propagated. Also check if PyArray extract works so that Buffer Protocol
70        // conversion still works.
71        // Do the same for __arrow_c_stream__ and PyArrayReader below.
72        if ob.hasattr(intern!(ob.py(), "__arrow_c_array__"))? {
73            Ok(Self::Array(ob.extract()?))
74        } else if ob.hasattr(intern!(ob.py(), "__arrow_c_stream__"))? {
75            Ok(Self::Stream(ob.extract()?))
76        } else {
77            Err(PyValueError::new_err(
78                "Expected object with __arrow_c_array__ or __arrow_c_stream__ method.",
79            ))
80        }
81    }
82}