1use std::fmt::Display;
2use std::sync::Arc;
3
4use arrow_array::types::{
5 Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
6 UInt64Type, UInt8Type,
7};
8use arrow_array::{
9 Array, ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Datum, LargeBinaryArray,
10 LargeStringArray, PrimitiveArray, StringArray, StringViewArray,
11};
12use arrow_cast::cast;
13use arrow_schema::{ArrowError, DataType, Field, FieldRef};
14use arrow_select::concat::concat;
15use arrow_select::take::take;
16use numpy::PyUntypedArray;
17use pyo3::exceptions::{PyIndexError, PyNotImplementedError, PyValueError};
18use pyo3::prelude::*;
19use pyo3::types::{PyCapsule, PyTuple, PyType};
20use pyo3::{intern, IntoPyObjectExt};
21
22#[cfg(feature = "buffer_protocol")]
23use crate::buffer::AnyBufferProtocol;
24use crate::error::PyArrowResult;
25use crate::export::{Arro3Array, Arro3DataType, Arro3Field};
26use crate::ffi::from_python::utils::import_array_pycapsules;
27use crate::ffi::to_python::nanoarrow::to_nanoarrow_array;
28use crate::ffi::{to_array_pycapsules, to_schema_pycapsule};
29use crate::input::AnyArray;
30use crate::interop::numpy::from_numpy::from_numpy;
31use crate::interop::numpy::to_numpy::to_numpy;
32use crate::scalar::PyScalar;
33use crate::{PyDataType, PyField};
34
35#[derive(Debug)]
45#[pyclass(module = "arro3.core._core", name = "Array", subclass, frozen)]
46pub struct PyArray {
47 array: ArrayRef,
48 field: FieldRef,
49}
50
51impl PyArray {
52 pub fn new(array: ArrayRef, field: FieldRef) -> Self {
56 Self::try_new(array, field).unwrap()
57 }
58
59 pub fn try_new(array: ArrayRef, field: FieldRef) -> Result<Self, ArrowError> {
61 if array.data_type() != field.data_type() {
65 return Err(ArrowError::SchemaError(
66 format!("Array DataType must match Field DataType. Array DataType is {}; field DataType is {}", array.data_type(), field.data_type())
67 ));
68 }
69 Ok(Self { array, field })
70 }
71
72 pub fn from_array_ref(array: ArrayRef) -> Self {
74 let field = Field::new("", array.data_type().clone(), true);
75 Self::new(array, Arc::new(field))
76 }
77
78 pub fn from_arrow_pycapsule(
80 schema_capsule: &Bound<PyCapsule>,
81 array_capsule: &Bound<PyCapsule>,
82 ) -> PyResult<Self> {
83 let (array, field, _data_len) = import_array_pycapsules(schema_capsule, array_capsule)?;
84 Ok(Self::new(array, Arc::new(field)))
85 }
86
87 pub fn array(&self) -> &ArrayRef {
89 &self.array
90 }
91
92 pub fn field(&self) -> &FieldRef {
94 &self.field
95 }
96
97 pub fn into_inner(self) -> (ArrayRef, FieldRef) {
99 (self.array, self.field)
100 }
101
102 pub fn to_arro3<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
106 let arro3_mod = py.import(intern!(py, "arro3.core"))?;
107 arro3_mod.getattr(intern!(py, "Array"))?.call_method1(
108 intern!(py, "from_arrow_pycapsule"),
109 self.__arrow_c_array__(py, None)?,
110 )
111 }
112
113 pub fn into_arro3(self, py: Python) -> PyResult<Bound<PyAny>> {
117 let arro3_mod = py.import(intern!(py, "arro3.core"))?;
118 let array_capsules = to_array_pycapsules(py, self.field.clone(), &self.array, None)?;
119 arro3_mod
120 .getattr(intern!(py, "Array"))?
121 .call_method1(intern!(py, "from_arrow_pycapsule"), array_capsules)
122 }
123
124 pub fn to_nanoarrow<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
126 to_nanoarrow_array(py, self.__arrow_c_array__(py, None)?)
127 }
128
129 pub fn to_pyarrow<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
133 let pyarrow_mod = py.import(intern!(py, "pyarrow"))?;
134 let cloned = Self::new(self.array.clone(), self.field.clone());
135 pyarrow_mod
136 .getattr(intern!(py, "array"))?
137 .call1(PyTuple::new(py, vec![cloned.into_pyobject(py)?])?)
138 }
139}
140
141impl From<ArrayRef> for PyArray {
142 fn from(value: ArrayRef) -> Self {
143 Self::from_array_ref(value)
144 }
145}
146
147impl AsRef<ArrayRef> for PyArray {
148 fn as_ref(&self) -> &ArrayRef {
149 &self.array
150 }
151}
152
153impl Display for PyArray {
154 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
155 write!(f, "arro3.core.Array<")?;
156 self.array.data_type().fmt(f)?;
157 writeln!(f, ">")?;
158 Ok(())
159 }
160}
161
162impl Datum for PyArray {
163 fn get(&self) -> (&dyn Array, bool) {
164 (self.array.as_ref(), false)
165 }
166}
167
168#[pymethods]
169impl PyArray {
170 #[new]
171 #[pyo3(signature = (obj, /, r#type = None, *))]
172 pub(crate) fn init(obj: &Bound<PyAny>, r#type: Option<PyField>) -> PyResult<Self> {
173 if let Ok(data) = obj.extract::<PyArray>() {
174 return Ok(data);
175 }
176
177 macro_rules! impl_primitive {
178 ($rust_type:ty, $arrow_type:ty) => {{
179 let values: Vec<Option<$rust_type>> = obj.extract()?;
180 Arc::new(PrimitiveArray::<$arrow_type>::from(values))
181 }};
182 }
183
184 let field = r#type
185 .ok_or(PyValueError::new_err(
186 "type must be passed for non-Arrow input",
187 ))?
188 .into_inner();
189 let array: ArrayRef = match field.data_type() {
190 DataType::Float32 => impl_primitive!(f32, Float32Type),
191 DataType::Float64 => impl_primitive!(f64, Float64Type),
192 DataType::UInt8 => impl_primitive!(u8, UInt8Type),
193 DataType::UInt16 => impl_primitive!(u16, UInt16Type),
194 DataType::UInt32 => impl_primitive!(u32, UInt32Type),
195 DataType::UInt64 => impl_primitive!(u64, UInt64Type),
196 DataType::Int8 => impl_primitive!(i8, Int8Type),
197 DataType::Int16 => impl_primitive!(i16, Int16Type),
198 DataType::Int32 => impl_primitive!(i32, Int32Type),
199 DataType::Int64 => impl_primitive!(i64, Int64Type),
200 DataType::Boolean => {
201 let values: Vec<Option<bool>> = obj.extract()?;
202 Arc::new(BooleanArray::from(values))
203 }
204 DataType::Binary => {
205 let values: Vec<Option<Vec<u8>>> = obj.extract()?;
206 let slices = values
207 .iter()
208 .map(|maybe_vec| maybe_vec.as_ref().map(|vec| vec.as_slice()))
209 .collect::<Vec<_>>();
210 Arc::new(BinaryArray::from(slices))
211 }
212 DataType::LargeBinary => {
213 let values: Vec<Option<Vec<u8>>> = obj.extract()?;
214 let slices = values
215 .iter()
216 .map(|maybe_vec| maybe_vec.as_ref().map(|vec| vec.as_slice()))
217 .collect::<Vec<_>>();
218 Arc::new(LargeBinaryArray::from(slices))
219 }
220 DataType::BinaryView => {
221 let values: Vec<Option<Vec<u8>>> = obj.extract()?;
222 let slices = values
223 .iter()
224 .map(|maybe_vec| maybe_vec.as_ref().map(|vec| vec.as_slice()))
225 .collect::<Vec<_>>();
226 Arc::new(BinaryViewArray::from(slices))
227 }
228 DataType::Utf8 => {
229 let values: Vec<Option<String>> = obj.extract()?;
230 Arc::new(StringArray::from(values))
231 }
232 DataType::LargeUtf8 => {
233 let values: Vec<Option<String>> = obj.extract()?;
234 Arc::new(LargeStringArray::from(values))
235 }
236 DataType::Utf8View => {
237 let values: Vec<Option<String>> = obj.extract()?;
238 Arc::new(StringViewArray::from(values))
239 }
240 dt => {
241 return Err(PyNotImplementedError::new_err(format!(
242 "Array constructor for {dt} not yet implemented."
243 )))
244 }
245 };
246 Ok(Self::new(array, field))
247 }
248
249 #[cfg(feature = "buffer_protocol")]
250 fn buffer(&self) -> crate::buffer::PyArrowBuffer {
251 use arrow_array::cast::AsArray;
252
253 match self.array.data_type() {
254 DataType::Int64 => {
255 let arr = self.array.as_primitive::<Int64Type>();
256 let values = arr.values();
257 let buffer = values.inner().clone();
258 crate::buffer::PyArrowBuffer::new(buffer)
259 }
260 _ => todo!(),
261 }
262 }
263
264 #[pyo3(signature = (dtype=None, copy=None))]
265 #[allow(unused_variables)]
266 fn __array__<'py>(
267 &'py self,
268 py: Python<'py>,
269 dtype: Option<Bound<'py, PyAny>>,
270 copy: Option<Bound<'py, PyAny>>,
271 ) -> PyResult<Bound<'py, PyAny>> {
272 to_numpy(py, &self.array)
273 }
274
275 #[pyo3(signature = (requested_schema=None))]
276 fn __arrow_c_array__<'py>(
277 &'py self,
278 py: Python<'py>,
279 requested_schema: Option<Bound<'py, PyCapsule>>,
280 ) -> PyArrowResult<Bound<'py, PyTuple>> {
281 to_array_pycapsules(py, self.field.clone(), &self.array, requested_schema)
282 }
283
284 fn __arrow_c_schema__<'py>(&'py self, py: Python<'py>) -> PyArrowResult<Bound<'py, PyCapsule>> {
285 to_schema_pycapsule(py, self.field.as_ref())
286 }
287
288 fn __eq__(&self, other: &PyArray) -> bool {
289 self.array.as_ref() == other.array.as_ref() && self.field == other.field
290 }
291
292 fn __getitem__(&self, i: isize) -> PyArrowResult<PyScalar> {
293 let i = if i < 0 {
295 let i = self.array.len() as isize + i;
296 if i < 0 {
297 return Err(PyIndexError::new_err("Index out of range").into());
298 }
299 i as usize
300 } else {
301 i as usize
302 };
303 if i >= self.array.len() {
304 return Err(PyIndexError::new_err("Index out of range").into());
305 }
306 PyScalar::try_new(self.array.slice(i, 1), self.field.clone())
307 }
308
309 fn __len__(&self) -> usize {
310 self.array.len()
311 }
312
313 fn __repr__(&self) -> String {
314 self.to_string()
315 }
316
317 #[classmethod]
318 fn from_arrow(_cls: &Bound<PyType>, input: AnyArray) -> PyArrowResult<Self> {
319 match input {
320 AnyArray::Array(array) => Ok(array),
321 AnyArray::Stream(stream) => {
322 let chunked_array = stream.into_chunked_array()?;
323 let (chunks, field) = chunked_array.into_inner();
324 let chunk_refs = chunks.iter().map(|arr| arr.as_ref()).collect::<Vec<_>>();
325 let concatted = concat(chunk_refs.as_slice())?;
326 Ok(Self::new(concatted, field))
327 }
328 }
329 }
330
331 #[classmethod]
332 #[pyo3(name = "from_arrow_pycapsule")]
333 fn from_arrow_pycapsule_py(
334 _cls: &Bound<PyType>,
335 schema_capsule: &Bound<PyCapsule>,
336 array_capsule: &Bound<PyCapsule>,
337 ) -> PyResult<Self> {
338 Self::from_arrow_pycapsule(schema_capsule, array_capsule)
339 }
340
341 #[cfg(feature = "buffer_protocol")]
343 #[classmethod]
344 fn from_buffer(_cls: &Bound<PyType>, buffer: AnyBufferProtocol) -> PyArrowResult<Self> {
345 buffer.try_into()
346 }
347
348 #[classmethod]
349 fn from_numpy(
350 _cls: &Bound<PyType>,
351 py: Python,
352 array: Bound<'_, PyAny>,
353 ) -> PyArrowResult<Self> {
354 let mut numpy_array = array;
355 if numpy_array.hasattr("__array__")? {
356 numpy_array = numpy_array.call_method0("__array__")?;
357 };
358
359 #[cfg(feature = "buffer_protocol")]
361 if let Ok(buf) = numpy_array.extract::<AnyBufferProtocol>() {
362 return buf.try_into();
363 }
364
365 let numpy_array: Bound<PyUntypedArray> = FromPyObject::extract_bound(&numpy_array)?;
366 let arrow_array = from_numpy(py, &numpy_array)?;
367 Ok(Self::from_array_ref(arrow_array))
368 }
369
370 fn cast(&self, target_type: PyField) -> PyArrowResult<Arro3Array> {
371 let new_field = target_type.into_inner();
372 let new_array = cast(self.as_ref(), new_field.data_type())?;
373 Ok(PyArray::new(new_array, new_field).into())
374 }
375
376 #[getter]
377 #[pyo3(name = "field")]
378 fn py_field(&self) -> Arro3Field {
379 PyField::new(self.field.clone()).into()
380 }
381
382 #[getter]
383 fn nbytes(&self) -> usize {
384 self.array.get_array_memory_size()
385 }
386
387 #[getter]
388 fn null_count(&self) -> usize {
389 self.array.null_count()
390 }
391
392 #[pyo3(signature = (offset=0, length=None))]
393 fn slice(&self, offset: usize, length: Option<usize>) -> Arro3Array {
394 let length = length.unwrap_or_else(|| self.array.len() - offset);
395 let new_array = self.array.slice(offset, length);
396 PyArray::new(new_array, self.field().clone()).into()
397 }
398
399 fn take(&self, indices: PyArray) -> PyArrowResult<Arro3Array> {
400 let new_array = take(self.as_ref(), indices.as_ref(), None)?;
401 Ok(PyArray::new(new_array, self.field.clone()).into())
402 }
403
404 fn to_numpy<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
405 self.__array__(py, None, None)
406 }
407
408 fn to_pylist(&self, py: Python) -> PyResult<PyObject> {
409 let mut scalars = Vec::with_capacity(self.array.len());
410 for i in 0..self.array.len() {
411 let scalar =
412 unsafe { PyScalar::new_unchecked(self.array.slice(i, 1), self.field.clone()) };
413 scalars.push(scalar.as_py(py)?);
414 }
415 scalars.into_py_any(py)
416 }
417
418 #[getter]
419 fn r#type(&self) -> Arro3DataType {
420 PyDataType::new(self.field.data_type().clone()).into()
421 }
422}