1use std::sync::Arc;
2
3use arrow_schema::ArrowError;
4use geoarrow_array::GeoArrowArray;
5use geoarrow_array::array::from_arrow_array;
6use geoarrow_cast::downcast::NativeType;
7use geoarrow_schema::{
8 BoxType, GeoArrowType, GeometryCollectionType, LineStringType, MultiLineStringType,
9 MultiPointType, MultiPolygonType, PointType, PolygonType,
10};
11use pyo3::exceptions::{PyIndexError, PyTypeError};
12use pyo3::intern;
13use pyo3::prelude::*;
14use pyo3::types::{PyCapsule, PyTuple, PyType};
15use pyo3_arrow::ffi::{ArrayIterator, to_stream_pycapsule};
16use pyo3_arrow::input::AnyArray;
17use pyo3_arrow::{PyArrayReader, PyChunkedArray};
18
19use crate::data_type::PyGeoType;
20use crate::error::{PyGeoArrowError, PyGeoArrowResult};
21use crate::input::AnyGeoArray;
22use crate::scalar::PyGeoScalar;
23use crate::utils::text_repr::text_repr;
24use crate::{PyCoordType, PyGeoArray};
25
26#[pyclass(
30 module = "geoarrow.rust.core",
31 name = "GeoChunkedArray",
32 subclass,
33 frozen
34)]
35pub struct PyGeoChunkedArray {
36 chunks: Vec<Arc<dyn GeoArrowArray>>,
37 data_type: GeoArrowType,
38}
39
40impl PyGeoChunkedArray {
41 pub fn try_new(chunks: Vec<Arc<dyn GeoArrowArray>>, data_type: GeoArrowType) -> PyResult<Self> {
43 if !chunks.iter().all(|chunk| chunk.data_type() == data_type) {
44 return Err(PyTypeError::new_err("All chunks must have same data type"));
45 }
46
47 Ok(Self { chunks, data_type })
48 }
49
50 pub fn from_arrow_pycapsule(capsule: &Bound<PyCapsule>) -> PyGeoArrowResult<Self> {
52 PyChunkedArray::from_arrow_pycapsule(capsule)?.try_into()
53 }
54
55 pub fn to_geoarrow<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
59 let geoarrow_mod = py.import(intern!(py, "geoarrow.rust.core"))?;
60 geoarrow_mod
61 .getattr(intern!(py, "GeoChunkedArray"))?
62 .call_method1(
63 intern!(py, "from_arrow_pycapsule"),
64 PyTuple::new(py, vec![self.__arrow_c_stream__(py, None)?])?,
65 )
66 }
67
68 pub fn from_arrays(chunks: Vec<Arc<dyn GeoArrowArray>>) -> PyGeoArrowResult<Self> {
71 if chunks.is_empty() {
72 return Err(ArrowError::SchemaError(
73 "Cannot infer data type from empty Vec<Arc<dyn GeoArrowArray>>".to_string(),
74 )
75 .into());
76 }
77
78 if !chunks
79 .windows(2)
80 .all(|w| w[0].data_type() == w[1].data_type())
81 {
82 return Err(ArrowError::SchemaError("Mismatched data types".to_string()).into());
83 }
84
85 let data_type = chunks[0].data_type();
86 Ok(Self::try_new(chunks, data_type)?)
87 }
88
89 pub fn into_inner(self) -> (Vec<Arc<dyn GeoArrowArray>>, GeoArrowType) {
91 (self.chunks, self.data_type)
92 }
93}
94
95#[pymethods]
96impl PyGeoChunkedArray {
97 #[new]
98 #[pyo3(signature = (arrays, r#type=None))]
99 fn init(
100 py: Python,
101 arrays: &Bound<PyAny>,
102 r#type: Option<PyGeoType>,
103 ) -> PyGeoArrowResult<Self> {
104 if arrays.hasattr(intern!(py, "__arrow_c_array__"))?
105 || arrays.hasattr(intern!(py, "__arrow_c_stream__"))?
106 {
107 Ok(arrays.extract::<AnyGeoArray>()?.into_chunked_array()?)
108 } else if let Ok(geo_arrays) = arrays.extract::<Vec<PyGeoArray>>() {
109 let geo_arrays = geo_arrays
110 .into_iter()
111 .map(|arr| arr.into_inner())
112 .collect::<Vec<_>>();
113
114 if !geo_arrays
115 .windows(2)
116 .all(|w| w[0].data_type() == w[1].data_type())
117 {
118 return Err(PyTypeError::new_err(
119 "Cannot create a ChunkedArray with differing data types.",
120 )
121 .into());
122 }
123
124 let geo_type = r#type
125 .map(|py_data_type| py_data_type.into_inner())
126 .unwrap_or_else(|| geo_arrays[0].data_type());
127
128 Ok(Self::try_new(geo_arrays, geo_type)?)
129 } else {
130 Err(
131 PyTypeError::new_err("Expected ChunkedArray-like input or sequence of arrays.")
132 .into(),
133 )
134 }
135 }
136
137 #[pyo3(signature = (requested_schema=None))]
138 fn __arrow_c_stream__<'py>(
139 &self,
140 py: Python<'py>,
141 requested_schema: Option<Bound<'py, PyCapsule>>,
142 ) -> PyResult<Bound<'py, PyCapsule>> {
143 let field = Arc::new(self.data_type.to_field("", true));
144 let arrow_chunks = self
145 .chunks
146 .iter()
147 .map(|x| x.to_array_ref())
148 .collect::<Vec<_>>();
149
150 let array_reader = Box::new(ArrayIterator::new(arrow_chunks.into_iter().map(Ok), field));
151 Ok(to_stream_pycapsule(py, array_reader, requested_schema)?)
152 }
153
154 fn __eq__(&self, other: &Bound<PyAny>) -> bool {
156 if let Ok(other) = other.extract::<Self>() {
158 self.data_type == other.data_type
159 && self.chunks.len() == other.chunks.len()
160 && self
161 .chunks
162 .iter()
163 .zip(other.chunks)
164 .all(|(left, right)| left.to_array_ref() == right.to_array_ref())
165 } else {
166 false
167 }
168 }
169
170 fn __getitem__(&self, i: isize) -> PyGeoArrowResult<PyGeoScalar> {
171 let mut i = if i < 0 {
173 let i = self.__len__() as isize + i;
174 if i < 0 {
175 return Err(PyIndexError::new_err("Index out of range").into());
176 }
177 i as usize
178 } else {
179 i as usize
180 };
181 if i >= self.__len__() {
182 return Err(PyIndexError::new_err("Index out of range").into());
183 }
184
185 for chunk in self.chunks() {
186 if i < chunk.inner().len() {
187 return PyGeoScalar::try_new(chunk.inner().slice(i, 1));
188 }
189 i -= chunk.inner().len();
190 }
191 unreachable!("index in range but past end of last chunk")
192 }
193
194 fn __len__(&self) -> usize {
195 self.chunks.iter().fold(0, |acc, arr| acc + arr.len())
196 }
197
198 fn __repr__(&self) -> String {
199 format!("GeoChunkedArray({})", text_repr(&self.data_type))
200 }
201
202 #[classmethod]
203 fn from_arrow(_cls: &Bound<PyType>, data: Self) -> Self {
204 data
205 }
206
207 #[classmethod]
208 #[pyo3(name = "from_arrow_pycapsule")]
209 fn from_arrow_pycapsule_py(
210 _cls: &Bound<PyType>,
211 capsule: &Bound<PyCapsule>,
212 ) -> PyGeoArrowResult<Self> {
213 Self::from_arrow_pycapsule(capsule)
214 }
215
216 #[getter]
217 fn null_count(&self) -> usize {
218 self.chunks
219 .iter()
220 .map(|chunk| chunk.logical_null_count())
221 .sum()
222 }
223
224 #[getter]
225 fn num_chunks(&self) -> usize {
226 self.chunks.len()
227 }
228
229 fn chunk(&self, i: usize) -> PyGeoArray {
230 PyGeoArray::new(self.chunks[i].clone())
231 }
232
233 fn chunks(&self) -> Vec<PyGeoArray> {
234 self.chunks
235 .iter()
236 .map(|chunk| PyGeoArray::new(chunk.clone()))
237 .collect()
238 }
239
240 #[pyo3(signature = (to_type, /))]
241 fn cast(&self, to_type: PyGeoType) -> PyGeoArrowResult<Self> {
242 let casted = self
243 .chunks
244 .iter()
245 .map(|chunk| geoarrow_cast::cast::cast(chunk.as_ref(), to_type.as_ref()))
246 .collect::<Result<Vec<_>, _>>()?;
247
248 Self::from_arrays(casted)
249 }
250
251 #[pyo3(
252 signature = (*, coord_type = PyCoordType::Separated),
253 text_signature = "(*, coord_type='separated')"
254 )]
255 fn downcast(&self, coord_type: PyCoordType) -> PyGeoArrowResult<Self> {
256 if let Some((native_type, dim)) =
257 geoarrow_cast::downcast::infer_downcast_type(self.chunks.iter().map(|x| x.as_ref()))?
258 {
259 let metadata = self.data_type.metadata().clone();
260 let coord_type = coord_type.into();
261 let to_type = match native_type {
262 NativeType::Point => PointType::new(dim, metadata)
263 .with_coord_type(coord_type)
264 .into(),
265 NativeType::LineString => LineStringType::new(dim, metadata)
266 .with_coord_type(coord_type)
267 .into(),
268 NativeType::Polygon => PolygonType::new(dim, metadata)
269 .with_coord_type(coord_type)
270 .into(),
271 NativeType::MultiPoint => MultiPointType::new(dim, metadata)
272 .with_coord_type(coord_type)
273 .into(),
274 NativeType::MultiLineString => MultiLineStringType::new(dim, metadata)
275 .with_coord_type(coord_type)
276 .into(),
277 NativeType::MultiPolygon => MultiPolygonType::new(dim, metadata)
278 .with_coord_type(coord_type)
279 .into(),
280 NativeType::GeometryCollection => GeometryCollectionType::new(dim, metadata)
281 .with_coord_type(coord_type)
282 .into(),
283 NativeType::Rect => BoxType::new(dim, metadata).into(),
284 };
285 self.cast(PyGeoType::new(to_type))
286 } else {
287 Ok(Self::try_new(self.chunks.clone(), self.data_type.clone())?)
288 }
289 }
290
291 #[getter]
292 fn r#type(&self) -> PyGeoType {
293 self.data_type.clone().into()
294 }
295}
296
297impl<'py> FromPyObject<'_, 'py> for PyGeoChunkedArray {
298 type Error = PyErr;
299
300 fn extract(ob: Borrowed<'_, 'py, PyAny>) -> PyResult<Self> {
301 let chunked_array = ob.extract::<AnyArray>()?.into_chunked_array()?;
302 chunked_array.try_into().map_err(PyErr::from)
303 }
304}
305
306impl TryFrom<PyChunkedArray> for PyGeoChunkedArray {
307 type Error = PyGeoArrowError;
308
309 fn try_from(value: PyChunkedArray) -> Result<Self, Self::Error> {
310 let (chunks, field) = value.into_inner();
311 let geo_chunks = chunks
312 .iter()
313 .map(|array| from_arrow_array(&array, &field))
314 .collect::<Result<Vec<_>, _>>()?;
315 let geo_data_type = GeoArrowType::try_from(field.as_ref())?;
316 Ok(Self {
317 chunks: geo_chunks,
318 data_type: geo_data_type,
319 })
320 }
321}
322
323impl TryFrom<PyArrayReader> for PyGeoChunkedArray {
324 type Error = PyGeoArrowError;
325
326 fn try_from(value: PyArrayReader) -> Result<Self, Self::Error> {
327 value.into_chunked_array()?.try_into()
328 }
329}